ia64/xen-unstable

changeset 19797:8440fc9f7a25

x86-64: do not pass unmanageable amounts of memory to Dom0

Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.

For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:31:17 2009 +0100 (2009-06-18)
parents 1c01814f9a25
children af06333d4c5d
files xen/arch/x86/domain_build.c
line diff
     1.1 --- a/xen/arch/x86/domain_build.c	Thu Jun 18 10:30:28 2009 +0100
     1.2 +++ b/xen/arch/x86/domain_build.c	Thu Jun 18 10:31:17 2009 +0100
     1.3 @@ -146,33 +146,65 @@ static struct page_info * __init alloc_c
     1.4      return page;
     1.5  }
     1.6  
     1.7 -static unsigned long __init compute_dom0_nr_pages(void)
     1.8 +static unsigned long __init compute_dom0_nr_pages(
     1.9 +#ifdef __x86_64__
    1.10 +    unsigned long vstart, unsigned long vend, size_t sizeof_long)
    1.11 +#else
    1.12 +    void)
    1.13 +#endif
    1.14  {
    1.15      unsigned long avail = avail_domheap_pages() + initial_images_nrpages();
    1.16 +    unsigned long nr_pages = dom0_nrpages;
    1.17 +    unsigned long min_pages = dom0_min_nrpages;
    1.18 +    unsigned long max_pages = dom0_max_nrpages;
    1.19  
    1.20      /*
    1.21       * If domain 0 allocation isn't specified, reserve 1/16th of available
    1.22       * memory for things like DMA buffers. This reservation is clamped to 
    1.23       * a maximum of 128MB.
    1.24       */
    1.25 -    if ( dom0_nrpages == 0 )
    1.26 -    {
    1.27 -        dom0_nrpages = avail;
    1.28 -        dom0_nrpages = min(dom0_nrpages / 16, 128L << (20 - PAGE_SHIFT));
    1.29 -        dom0_nrpages = -dom0_nrpages;
    1.30 -    }
    1.31 +    if ( nr_pages == 0 )
    1.32 +        nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT));
    1.33  
    1.34      /* Negative memory specification means "all memory - specified amount". */
    1.35 -    if ( dom0_nrpages     < 0 ) dom0_nrpages     += avail;
    1.36 -    if ( dom0_min_nrpages < 0 ) dom0_min_nrpages += avail;
    1.37 -    if ( dom0_max_nrpages < 0 ) dom0_max_nrpages += avail;
    1.38 +    if ( (long)nr_pages  < 0 ) nr_pages  += avail;
    1.39 +    if ( (long)min_pages < 0 ) min_pages += avail;
    1.40 +    if ( (long)max_pages < 0 ) max_pages += avail;
    1.41  
    1.42      /* Clamp dom0 memory according to min/max limits and available memory. */
    1.43 -    dom0_nrpages = max(dom0_nrpages, dom0_min_nrpages);
    1.44 -    dom0_nrpages = min(dom0_nrpages, dom0_max_nrpages);
    1.45 -    dom0_nrpages = min(dom0_nrpages, (long)avail);
    1.46 +    nr_pages = max(nr_pages, min_pages);
    1.47 +    nr_pages = min(nr_pages, max_pages);
    1.48 +    nr_pages = min(nr_pages, avail);
    1.49  
    1.50 -    return dom0_nrpages;
    1.51 +#ifdef __x86_64__
    1.52 +    if ( vstart && dom0_nrpages <= 0 &&
    1.53 +         (dom0_min_nrpages <= 0 || nr_pages > min_pages) )
    1.54 +    {
    1.55 +        /*
    1.56 +         * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M
    1.57 +         * note) require that there is enough virtual space beyond the initial
    1.58 +         * allocation to set up their initial page tables. This space is
    1.59 +         * roughly the same size as the p2m table, so make sure the initial
    1.60 +         * allocation doesn't consume more than about half the space that's
    1.61 +         * available between params.virt_base and the address space end.
    1.62 +         */
    1.63 +        unsigned long end = vend + nr_pages * sizeof_long;
    1.64 +
    1.65 +        if ( end > vstart )
    1.66 +            end += end - vstart;
    1.67 +        if ( end <= vstart ||
    1.68 +             (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) )
    1.69 +        {
    1.70 +            end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long);
    1.71 +            nr_pages = (end - vend) / (2 * sizeof_long);
    1.72 +            if ( dom0_min_nrpages > 0 && nr_pages < min_pages )
    1.73 +                nr_pages = min_pages;
    1.74 +            printk("Dom0 memory clipped to %lu pages\n", nr_pages);
    1.75 +        }
    1.76 +    }
    1.77 +#endif
    1.78 +
    1.79 +    return nr_pages;
    1.80  }
    1.81  
    1.82  static void __init process_dom0_ioports_disable(void)
    1.83 @@ -282,8 +314,6 @@ int __init construct_dom0(
    1.84  
    1.85      d->max_pages = ~0U;
    1.86  
    1.87 -    nr_pages = compute_dom0_nr_pages();
    1.88 -
    1.89      if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
    1.90          return rc;
    1.91  
    1.92 @@ -342,9 +372,18 @@ int __init construct_dom0(
    1.93          d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
    1.94          v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
    1.95  
    1.96 -        if ( nr_pages != (unsigned int)nr_pages )
    1.97 -            nr_pages = UINT_MAX;
    1.98 +        nr_pages = compute_dom0_nr_pages(parms.virt_base,
    1.99 +            round_pgup(parms.virt_kend) + round_pgup(initrd_len),
   1.100 +            sizeof(unsigned int));
   1.101      }
   1.102 +    else if (parms.p2m_base != UNSET_ADDR)
   1.103 +        nr_pages = compute_dom0_nr_pages(0, 0, 0);
   1.104 +    else
   1.105 +        nr_pages = compute_dom0_nr_pages(parms.virt_base,
   1.106 +            round_pgup(parms.virt_kend) + round_pgup(initrd_len),
   1.107 +            sizeof(unsigned long));
   1.108 +#else
   1.109 +    nr_pages = compute_dom0_nr_pages();
   1.110  #endif
   1.111  
   1.112      if ( parms.pae == PAEKERN_extended_cr3 )