ia64/xen-unstable

changeset 12213:cf3d69ba5633

[HVM] Move shadow initialisation into domain-creation hypercall.
Allocate HVM guest memory in the libxc builder function rather
than in xend. Clean up fall out from these changes.

Todo: Fix ia64. Move PV builder to same model (it should allocate the
memory rather than xend doing so -- then it can avoid using
xc_get_pfn_list()).

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Nov 01 18:37:23 2006 +0000 (2006-11-01)
parents 96f51a000ed0
children aac5c1fcde6e
files tools/ioemu/vl.c tools/libxc/xc_hvm_build.c tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/image.py xen/arch/x86/domain.c xen/arch/x86/domctl.c xen/arch/x86/mm/shadow/common.c xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/tools/ioemu/vl.c	Wed Nov 01 18:32:45 2006 +0000
     1.2 +++ b/tools/ioemu/vl.c	Wed Nov 01 18:37:23 2006 +0000
     1.3 @@ -6420,15 +6420,14 @@ int main(int argc, char **argv)
     1.4      }
     1.5  
     1.6  #if defined(__i386__) || defined(__x86_64__)
     1.7 -    if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
     1.8 +    for ( i = 0; i < tmp_nr_pages; i++)
     1.9 +        page_array[i] = i;
    1.10 +    if (xc_domain_translate_gpfn_list(xc_handle, domid, tmp_nr_pages,
    1.11 +                                      page_array, page_array)) {
    1.12          fprintf(logfile, "xc_get_pfn_list returned error %d\n", errno);
    1.13          exit(-1);
    1.14      }
    1.15  
    1.16 -    if (ram_size > HVM_BELOW_4G_RAM_END)
    1.17 -        for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++)
    1.18 -            page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i];
    1.19 -
    1.20      phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
    1.21                                           PROT_READ|PROT_WRITE, page_array,
    1.22                                           tmp_nr_pages);
     2.1 --- a/tools/libxc/xc_hvm_build.c	Wed Nov 01 18:32:45 2006 +0000
     2.2 +++ b/tools/libxc/xc_hvm_build.c	Wed Nov 01 18:37:23 2006 +0000
     2.3 @@ -196,7 +196,6 @@ static int set_hvm_info(int xc_handle, u
     2.4  static int setup_guest(int xc_handle,
     2.5                         uint32_t dom, int memsize,
     2.6                         char *image, unsigned long image_size,
     2.7 -                       unsigned long nr_pages,
     2.8                         vcpu_guest_context_t *ctxt,
     2.9                         unsigned long shared_info_frame,
    2.10                         unsigned int vcpus,
    2.11 @@ -207,18 +206,13 @@ static int setup_guest(int xc_handle,
    2.12                         unsigned long *store_mfn)
    2.13  {
    2.14      xen_pfn_t *page_array = NULL;
    2.15 -    unsigned long count, i;
    2.16 -    unsigned long long ptr;
    2.17 -    xc_mmu_t *mmu = NULL;
    2.18 -
    2.19 +    unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
    2.20 +    unsigned long shared_page_nr;
    2.21      shared_info_t *shared_info;
    2.22      void *e820_page;
    2.23 -
    2.24      struct domain_setup_info dsi;
    2.25      uint64_t v_end;
    2.26  
    2.27 -    unsigned long shared_page_nr;
    2.28 -
    2.29      memset(&dsi, 0, sizeof(struct domain_setup_info));
    2.30  
    2.31      if ( (parseelfimage(image, image_size, &dsi)) != 0 )
    2.32 @@ -230,7 +224,6 @@ static int setup_guest(int xc_handle,
    2.33          goto error_out;
    2.34      }
    2.35  
    2.36 -    /* memsize is in megabytes */
    2.37      v_end = (unsigned long long)memsize << 20;
    2.38  
    2.39      IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
    2.40 @@ -255,53 +248,27 @@ static int setup_guest(int xc_handle,
    2.41          goto error_out;
    2.42      }
    2.43  
    2.44 -    if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
    2.45 +    for ( i = 0; i < nr_pages; i++ )
    2.46 +        page_array[i] = i;
    2.47 +    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
    2.48 +        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
    2.49 +
    2.50 +    if ( xc_domain_memory_populate_physmap(xc_handle, dom, nr_pages,
    2.51 +                                           0, 0, page_array) )
    2.52      {
    2.53 -        PERROR("Could not get the page frame list.\n");
    2.54 +        PERROR("Could not allocate memory for HVM guest.\n");
    2.55          goto error_out;
    2.56      }
    2.57  
    2.58 -    /* HVM domains must be put into shadow mode at the start of day. */
    2.59 -    /* XXX *After* xc_get_pfn_list()!! */
    2.60 -    if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE,
    2.61 -                           NULL, 0, NULL, 
    2.62 -                           XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  |
    2.63 -                           XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE |
    2.64 -                           XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL, 
    2.65 -                           NULL) )
    2.66 +    if ( xc_domain_translate_gpfn_list(xc_handle, dom, nr_pages,
    2.67 +                                       page_array, page_array) )
    2.68      {
    2.69 -        PERROR("Could not enable shadow paging for domain.\n");
    2.70 +        PERROR("Could not translate addresses of HVM guest.\n");
    2.71          goto error_out;
    2.72 -    }        
    2.73 +    }
    2.74  
    2.75      loadelfimage(image, xc_handle, dom, page_array, &dsi);
    2.76  
    2.77 -    if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
    2.78 -        goto error_out;
    2.79 -
    2.80 -    /* Write the machine->phys table entries. */
    2.81 -    for ( count = 0; count < nr_pages; count++ )
    2.82 -    {
    2.83 -        unsigned long gpfn_count_skip;
    2.84 -
    2.85 -        ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
    2.86 -
    2.87 -        gpfn_count_skip = 0;
    2.88 -
    2.89 -        /*
    2.90 -         * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
    2.91 -         * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
    2.92 -         * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
    2.93 -         */
    2.94 -        if ( count >= (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) )
    2.95 -            gpfn_count_skip = HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
    2.96 -
    2.97 -        if ( xc_add_mmu_update(xc_handle, mmu,
    2.98 -                               ptr | MMU_MACHPHYS_UPDATE,
    2.99 -                               count + gpfn_count_skip) )
   2.100 -            goto error_out;
   2.101 -    }
   2.102 -
   2.103      if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) )
   2.104      {
   2.105          ERROR("Couldn't set hvm info for HVM guest.\n");
   2.106 @@ -352,22 +319,13 @@ static int setup_guest(int xc_handle,
   2.107      if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
   2.108          goto error_out;
   2.109  
   2.110 -    /* Send the page update requests down to the hypervisor. */
   2.111 -    if ( xc_finish_mmu_updates(xc_handle, mmu) )
   2.112 -        goto error_out;
   2.113 -
   2.114 -    free(mmu);
   2.115      free(page_array);
   2.116  
   2.117 -    /*
   2.118 -     * Initial register values:
   2.119 -     */
   2.120      ctxt->user_regs.eip = dsi.v_kernentry;
   2.121  
   2.122      return 0;
   2.123  
   2.124   error_out:
   2.125 -    free(mmu);
   2.126      free(page_array);
   2.127      return -1;
   2.128  }
   2.129 @@ -387,8 +345,6 @@ static int xc_hvm_build_internal(int xc_
   2.130      struct xen_domctl launch_domctl, domctl;
   2.131      int rc, i;
   2.132      vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
   2.133 -    unsigned long nr_pages;
   2.134 -    xen_capabilities_info_t xen_caps;
   2.135  
   2.136      if ( (image == NULL) || (image_size == 0) )
   2.137      {
   2.138 @@ -396,25 +352,6 @@ static int xc_hvm_build_internal(int xc_
   2.139          goto error_out;
   2.140      }
   2.141  
   2.142 -    if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
   2.143 -    {
   2.144 -        PERROR("Failed to get xen version info");
   2.145 -        goto error_out;
   2.146 -    }
   2.147 -
   2.148 -    if ( !strstr(xen_caps, "hvm") )
   2.149 -    {
   2.150 -        PERROR("CPU doesn't support HVM extensions or "
   2.151 -               "the extensions are not enabled");
   2.152 -        goto error_out;
   2.153 -    }
   2.154 -
   2.155 -    if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
   2.156 -    {
   2.157 -        PERROR("Could not find total pages for domain");
   2.158 -        goto error_out;
   2.159 -    }
   2.160 -
   2.161      if ( lock_pages(&st_ctxt, sizeof(st_ctxt) ) )
   2.162      {
   2.163          PERROR("%s: ctxt mlock failed", __func__);
   2.164 @@ -430,24 +367,10 @@ static int xc_hvm_build_internal(int xc_
   2.165          goto error_out;
   2.166      }
   2.167  
   2.168 -#if 0
   2.169 -    /* HVM domains must be put into shadow mode at the start of day */
   2.170 -    if ( xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_ENABLE,
   2.171 -                           NULL, 0, NULL, 
   2.172 -                           XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  |
   2.173 -                           XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE |
   2.174 -                           XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL, 
   2.175 -                           NULL) )
   2.176 -    {
   2.177 -        PERROR("Could not enable shadow paging for domain.\n");
   2.178 -        goto error_out;
   2.179 -    }        
   2.180 -#endif
   2.181 +    memset(ctxt, 0, sizeof(*ctxt));
   2.182 +    ctxt->flags = VGCF_HVM_GUEST;
   2.183  
   2.184 -    memset(ctxt, 0, sizeof(*ctxt));
   2.185 -
   2.186 -    ctxt->flags = VGCF_HVM_GUEST;
   2.187 -    if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
   2.188 +    if ( setup_guest(xc_handle, domid, memsize, image, image_size,
   2.189                       ctxt, domctl.u.getdomaininfo.shared_info_frame,
   2.190                       vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
   2.191      {
     3.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Wed Nov 01 18:32:45 2006 +0000
     3.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Nov 01 18:37:23 2006 +0000
     3.3 @@ -1295,9 +1295,11 @@ class XendDomainInfo:
     3.4              shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
     3.5              self.info['shadow_memory'] = shadow_cur
     3.6  
     3.7 -            # initial memory reservation
     3.8 -            xc.domain_memory_increase_reservation(self.domid, reservation, 0,
     3.9 -                                                  0)
    3.10 +            # Initial memory reservation
    3.11 +            if not (self._infoIsSet('image') and
    3.12 +                    sxp.name(self.info['image']) == "hvm"):
    3.13 +                xc.domain_memory_increase_reservation(
    3.14 +                    self.domid, reservation, 0, 0)
    3.15  
    3.16              self._createChannels()
    3.17  
     4.1 --- a/tools/python/xen/xend/image.py	Wed Nov 01 18:32:45 2006 +0000
     4.2 +++ b/tools/python/xen/xend/image.py	Wed Nov 01 18:37:23 2006 +0000
     4.3 @@ -478,22 +478,12 @@ class X86_HVM_ImageHandler(HVMImageHandl
     4.4  
     4.5      def getRequiredAvailableMemory(self, mem_kb):
     4.6          # Add 8 MiB overhead for QEMU's video RAM.
     4.7 -        return self.getRequiredInitialReservation(mem_kb) + 8192
     4.8 +        return mem_kb + 8192
     4.9  
    4.10      def getRequiredInitialReservation(self, mem_kb):
    4.11 -        page_kb = 4
    4.12 -        # This was derived emperically:
    4.13 -        #   2.4 MB overhead per 1024 MB RAM
    4.14 -        #   + 4 to avoid low-memory condition
    4.15 -        extra_mb = (2.4/1024) * (mem_kb/1024.0) + 4;
    4.16 -        extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
    4.17 -        return mem_kb + extra_pages * page_kb
    4.18 +        return mem_kb
    4.19  
    4.20      def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
    4.21 -        # The given value is the configured value -- we need to include the
    4.22 -        # overhead due to getRequiredInitialReservation.
    4.23 -        maxmem_kb = self.getRequiredInitialReservation(maxmem_kb)
    4.24 -
    4.25          # 256 pages (1MB) per vcpu,
    4.26          # plus 1 page per MiB of RAM for the P2M map,
    4.27          # plus 1 page per MiB of RAM to shadow the resident processes.  
     5.1 --- a/xen/arch/x86/domain.c	Wed Nov 01 18:32:45 2006 +0000
     5.2 +++ b/xen/arch/x86/domain.c	Wed Nov 01 18:37:23 2006 +0000
     5.3 @@ -155,19 +155,12 @@ int arch_domain_create(struct domain *d)
     5.4  {
     5.5      l1_pgentry_t gdt_l1e;
     5.6      int vcpuid, pdpt_order;
     5.7 -    int i;
     5.8 -
     5.9 -    if ( is_hvm_domain(d) && !hvm_enabled )
    5.10 -    {
    5.11 -        gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
    5.12 -                 "on a non-VT/AMDV platform.\n");
    5.13 -        return -EINVAL;
    5.14 -    }
    5.15 +    int i, rc = -ENOMEM;
    5.16  
    5.17      pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
    5.18      d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
    5.19      if ( d->arch.mm_perdomain_pt == NULL )
    5.20 -        goto fail_nomem;
    5.21 +        goto fail;
    5.22      memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
    5.23  
    5.24      /*
    5.25 @@ -192,7 +185,7 @@ int arch_domain_create(struct domain *d)
    5.26      d->arch.mm_perdomain_l3 = alloc_xenheap_page();
    5.27      if ( (d->arch.mm_perdomain_l2 == NULL) ||
    5.28           (d->arch.mm_perdomain_l3 == NULL) )
    5.29 -        goto fail_nomem;
    5.30 +        goto fail;
    5.31  
    5.32      memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
    5.33      for ( i = 0; i < (1 << pdpt_order); i++ )
    5.34 @@ -219,26 +212,41 @@ int arch_domain_create(struct domain *d)
    5.35          d->arch.ioport_caps = 
    5.36              rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
    5.37          if ( d->arch.ioport_caps == NULL )
    5.38 -            goto fail_nomem;
    5.39 +            goto fail;
    5.40  
    5.41          if ( (d->shared_info = alloc_xenheap_page()) == NULL )
    5.42 -            goto fail_nomem;
    5.43 +            goto fail;
    5.44  
    5.45          memset(d->shared_info, 0, PAGE_SIZE);
    5.46          share_xen_page_with_guest(
    5.47              virt_to_page(d->shared_info), d, XENSHARE_writable);
    5.48      }
    5.49  
    5.50 +    if ( is_hvm_domain(d) )
    5.51 +    {
    5.52 +        if ( !hvm_enabled )
    5.53 +        {
    5.54 +            gdprintk(XENLOG_WARNING, "Attempt to create a HVM guest "
    5.55 +                     "on a non-VT/AMDV platform.\n");
    5.56 +            rc = -EINVAL;
    5.57 +            goto fail;
    5.58 +        }
    5.59 +
    5.60 +        rc = shadow_enable(d, SHM2_refcounts|SHM2_translate|SHM2_external);
    5.61 +        if ( rc != 0 )
    5.62 +            goto fail;
    5.63 +    }
    5.64 +
    5.65      return 0;
    5.66  
    5.67 - fail_nomem:
    5.68 + fail:
    5.69      free_xenheap_page(d->shared_info);
    5.70  #ifdef __x86_64__
    5.71      free_xenheap_page(d->arch.mm_perdomain_l2);
    5.72      free_xenheap_page(d->arch.mm_perdomain_l3);
    5.73  #endif
    5.74      free_xenheap_pages(d->arch.mm_perdomain_pt, pdpt_order);
    5.75 -    return -ENOMEM;
    5.76 +    return rc;
    5.77  }
    5.78  
    5.79  void arch_domain_destroy(struct domain *d)
     6.1 --- a/xen/arch/x86/domctl.c	Wed Nov 01 18:32:45 2006 +0000
     6.2 +++ b/xen/arch/x86/domctl.c	Wed Nov 01 18:37:23 2006 +0000
     6.3 @@ -224,45 +224,18 @@ long arch_do_domctl(
     6.4  
     6.5              spin_lock(&d->page_alloc_lock);
     6.6  
     6.7 -            if ( is_hvm_domain(d) && shadow_mode_translate(d) )
     6.8 +            list_ent = d->page_list.next;
     6.9 +            for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
    6.10              {
    6.11 -                /* HVM domain: scan P2M to get guaranteed physmap order. */
    6.12 -                for ( i = 0, gmfn = 0;
    6.13 -                      (i < max_pfns) && (i < d->tot_pages); 
    6.14 -                      i++, gmfn++ )
    6.15 +                mfn = page_to_mfn(list_entry(
    6.16 +                    list_ent, struct page_info, list));
    6.17 +                if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
    6.18 +                                          i, &mfn, 1) )
    6.19                  {
    6.20 -                    if ( unlikely(i == (HVM_BELOW_4G_MMIO_START>>PAGE_SHIFT)) )
    6.21 -                    {
    6.22 -                        /* skip MMIO range */
    6.23 -                        gmfn += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
    6.24 -                    }
    6.25 -                    mfn = gmfn_to_mfn(d, gmfn);
    6.26 -                    if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
    6.27 -                                              i, &mfn, 1) )
    6.28 -                    {
    6.29 -                        ret = -EFAULT;
    6.30 -                        break;
    6.31 -                    }
    6.32 +                    ret = -EFAULT;
    6.33 +                    break;
    6.34                  }
    6.35 -            }
    6.36 -            else 
    6.37 -            {        
    6.38 -                /* Other guests: return in order of ownership list. */
    6.39 -                list_ent = d->page_list.next;
    6.40 -                for ( i = 0;
    6.41 -                      (i < max_pfns) && (list_ent != &d->page_list);
    6.42 -                      i++ )
    6.43 -                {
    6.44 -                    mfn = page_to_mfn(list_entry(
    6.45 -                        list_ent, struct page_info, list));
    6.46 -                    if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
    6.47 -                                              i, &mfn, 1) )
    6.48 -                    {
    6.49 -                        ret = -EFAULT;
    6.50 -                        break;
    6.51 -                    }
    6.52 -                    list_ent = mfn_to_page(mfn)->list.next;
    6.53 -                }
    6.54 +                list_ent = mfn_to_page(mfn)->list.next;
    6.55              }
    6.56              
    6.57              spin_unlock(&d->page_alloc_lock);
     7.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Nov 01 18:32:45 2006 +0000
     7.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Nov 01 18:37:23 2006 +0000
     7.3 @@ -2461,7 +2461,7 @@ static void sh_new_mode(struct domain *d
     7.4          sh_update_paging_modes(v);
     7.5  }
     7.6  
     7.7 -static int shadow_enable(struct domain *d, u32 mode)
     7.8 +int shadow_enable(struct domain *d, u32 mode)
     7.9  /* Turn on "permanent" shadow features: external, translate, refcount.
    7.10   * Can only be called once on a domain, and these features cannot be
    7.11   * disabled. 
    7.12 @@ -3092,6 +3092,8 @@ int shadow_domctl(struct domain *d,
    7.13          if ( shadow_mode_log_dirty(d) )
    7.14              if ( (rc = shadow_log_dirty_disable(d)) != 0 ) 
    7.15                  return rc;
    7.16 +        if ( is_hvm_domain(d) )
    7.17 +            return -EINVAL;
    7.18          if ( d->arch.shadow.mode & SHM2_enable )
    7.19              if ( (rc = shadow_test_disable(d)) != 0 ) 
    7.20                  return rc;
     8.1 --- a/xen/include/asm-x86/shadow.h	Wed Nov 01 18:32:45 2006 +0000
     8.2 +++ b/xen/include/asm-x86/shadow.h	Wed Nov 01 18:37:23 2006 +0000
     8.3 @@ -313,6 +313,9 @@ static inline int shadow_guest_paging_le
     8.4  /**************************************************************************/
     8.5  /* Entry points into the shadow code */
     8.6  
     8.7 +/* Enable arbitrary shadow mode. */
     8.8 +int shadow_enable(struct domain *d, u32 mode);
     8.9 +
    8.10  /* Turning on shadow test mode */
    8.11  int shadow_test_enable(struct domain *d);
    8.12