ia64/xen-unstable

changeset 10541:234939c0ec3a

[BALLOON] Make the domain0 ballooning logic in xend aware of pages that are on the page scrub list.
This fixes the case where crashing/restarting a domain can cause dom0 to balloon more than
necessary.

I changed the physinfo dictionary in Python to be in KiB, rather than
MiB, to avoid accumulating ugly rounding errors. I tried to avoid
changing units anywhere else.

Signed-off-by: Charles Coffing <ccoffing@novell.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 27 12:03:46 2006 +0100 (2006-06-27)
parents 7154e0416313
children 02b0ed160e8e
files tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendNode.py tools/python/xen/xend/balloon.py xen/arch/x86/dom0_ops.c xen/common/page_alloc.c xen/include/public/dom0_ops.h xen/include/xen/mm.h
line diff
     1.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Tue Jun 27 11:50:57 2006 +0100
     1.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Tue Jun 27 12:03:46 2006 +0100
     1.3 @@ -582,6 +582,12 @@ static PyObject *pyxc_readconsolering(Xc
     1.4  }
     1.5  
     1.6  
     1.7 +static unsigned long pages_to_kib(unsigned long pages)
     1.8 +{
     1.9 +    return pages * (XC_PAGE_SIZE / 1024);
    1.10 +}
    1.11 +
    1.12 +
    1.13  static PyObject *pyxc_pages_to_kib(XcObject *self, PyObject *args)
    1.14  {
    1.15      unsigned long pages;
    1.16 @@ -589,13 +595,7 @@ static PyObject *pyxc_pages_to_kib(XcObj
    1.17      if (!PyArg_ParseTuple(args, "l", &pages))
    1.18          return NULL;
    1.19  
    1.20 -    return PyLong_FromUnsignedLong(pages * (XC_PAGE_SIZE / 1024));
    1.21 -}
    1.22 -
    1.23 -
    1.24 -static unsigned long pages_to_mb(unsigned long pages)
    1.25 -{
    1.26 -    return (pages * (XC_PAGE_SIZE / 1024) + 1023) / 1024;
    1.27 +    return PyLong_FromUnsignedLong(pages_to_kib(pages));
    1.28  }
    1.29  
    1.30  
    1.31 @@ -618,13 +618,14 @@ static PyObject *pyxc_physinfo(XcObject 
    1.32      if(q>cpu_cap)
    1.33          *(q-1)=0;
    1.34  
    1.35 -    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}",
    1.36 +    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
    1.37                           "threads_per_core", info.threads_per_core,
    1.38                           "cores_per_socket", info.cores_per_socket,
    1.39                           "sockets_per_node", info.sockets_per_node,
    1.40                           "nr_nodes",         info.nr_nodes,
    1.41 -                         "total_memory",     pages_to_mb(info.total_pages),
    1.42 -                         "free_memory",      pages_to_mb(info.free_pages),
    1.43 +                         "total_memory",     pages_to_kib(info.total_pages),
    1.44 +                         "free_memory",      pages_to_kib(info.free_pages),
    1.45 +                         "scrub_memory",     pages_to_kib(info.scrub_pages),
    1.46                           "cpu_khz",          info.cpu_khz,
    1.47                           "hw_caps",          cpu_cap);
    1.48  }
     2.1 --- a/tools/python/xen/xend/XendNode.py	Tue Jun 27 11:50:57 2006 +0100
     2.2 +++ b/tools/python/xen/xend/XendNode.py	Tue Jun 27 12:03:46 2006 +0100
     2.3 @@ -64,6 +64,9 @@ class XendNode:
     2.4                             info['cores_per_socket'] *
     2.5                             info['threads_per_core'])
     2.6          info['cpu_mhz'] = info['cpu_khz'] / 1000
     2.7 +        # physinfo is in KiB
     2.8 +        info['total_memory'] = info['total_memory'] / 1024
     2.9 +        info['free_memory']  = info['free_memory'] / 1024
    2.10  
    2.11          ITEM_ORDER = ['nr_cpus',
    2.12                        'nr_nodes',
     3.1 --- a/tools/python/xen/xend/balloon.py	Tue Jun 27 11:50:57 2006 +0100
     3.2 +++ b/tools/python/xen/xend/balloon.py	Tue Jun 27 12:03:46 2006 +0100
     3.3 @@ -29,8 +29,6 @@ from XendError import VmError
     3.4  
     3.5  PROC_XEN_BALLOON = '/proc/xen/balloon'
     3.6  
     3.7 -BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
     3.8 -                      # rounded.
     3.9  RETRY_LIMIT = 20
    3.10  RETRY_LIMIT_INCR = 5
    3.11  ##
    3.12 @@ -68,22 +66,22 @@ def _get_proc_balloon(label):
    3.13          f.close()
    3.14  
    3.15  def get_dom0_current_alloc():
    3.16 -    """Returns the current memory allocation (in MiB) of dom0."""
    3.17 +    """Returns the current memory allocation (in KiB) of dom0."""
    3.18  
    3.19      kb = _get_proc_balloon(labels['current'])
    3.20      if kb == None:
    3.21          raise VmError('Failed to query current memory allocation of dom0.')
    3.22 -    return kb / 1024
    3.23 +    return kb
    3.24  
    3.25  def get_dom0_target_alloc():
    3.26 -    """Returns the target memory allocation (in MiB) of dom0."""
    3.27 +    """Returns the target memory allocation (in KiB) of dom0."""
    3.28  
    3.29      kb = _get_proc_balloon(labels['target'])
    3.30      if kb == None:
    3.31          raise VmError('Failed to query target memory allocation of dom0.')
    3.32 -    return kb / 1024
    3.33 +    return kb
    3.34  
    3.35 -def free(required):
    3.36 +def free(need_mem):
    3.37      """Balloon out memory from the privileged domain so that there is the
    3.38      specified required amount (in KiB) free.
    3.39      """
    3.40 @@ -92,9 +90,10 @@ def free(required):
    3.41      # to balloon out to free some up.  Memory freed by a destroyed domain may
    3.42      # not appear in the free_memory field immediately, because it needs to be
    3.43      # scrubbed before it can be released to the free list, which is done
    3.44 -    # asynchronously by Xen; ballooning is asynchronous also.  No matter where
    3.45 -    # we expect the free memory to come from, therefore, we need to wait for
    3.46 -    # it to become available.
    3.47 +    # asynchronously by Xen; ballooning is asynchronous also.  Such memory
    3.48 +    # does, however, need to be accounted for when calculating how much dom0
    3.49 +    # needs to balloon.  No matter where we expect the free memory to come
    3.50 +    # from, we need to wait for it to become available.
    3.51      #
    3.52      # We are not allowed to balloon below dom0_min_mem, or if dom0_min_mem
    3.53      # is 0, we cannot balloon at all.  Memory can still become available
    3.54 @@ -108,43 +107,49 @@ def free(required):
    3.55      # usage, so we recheck the required alloc each time around the loop, but
    3.56      # track the last used value so that we don't trigger too many watches.
    3.57  
    3.58 -    need_mem = (required + 1023) / 1024 + BALLOON_OUT_SLACK
    3.59 -
    3.60      xroot = XendRoot.instance()
    3.61      xc = xen.lowlevel.xc.xc()
    3.62  
    3.63      try:
    3.64 -        dom0_min_mem = xroot.get_dom0_min_mem()
    3.65 +        dom0_min_mem = xroot.get_dom0_min_mem() * 1024
    3.66  
    3.67          retries = 0
    3.68          sleep_time = SLEEP_TIME_GROWTH
    3.69          last_new_alloc = None
    3.70          rlimit = RETRY_LIMIT
    3.71          while retries < rlimit:
    3.72 -            free_mem = xc.physinfo()['free_memory']
    3.73 +            physinfo = xc.physinfo()
    3.74 +            free_mem = physinfo['free_memory']
    3.75 +            scrub_mem = physinfo['scrub_memory']
    3.76  
    3.77              if free_mem >= need_mem:
    3.78 -                log.debug("Balloon: free %d; need %d; done.", free_mem,
    3.79 -                          need_mem)
    3.80 +                log.debug("Balloon: %d KiB free; need %d; done.",
    3.81 +                          free_mem, need_mem)
    3.82                  return
    3.83  
    3.84              if retries == 0:
    3.85 -                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
    3.86 -                log.debug("Balloon: free %d; need %d; retries: %d.", 
    3.87 -                          free_mem, need_mem, rlimit)
    3.88 +                rlimit += ((need_mem - free_mem)/1024/1024) * RETRY_LIMIT_INCR
    3.89 +                log.debug("Balloon: %d KiB free; %d to scrub; need %d; retries: %d.",
    3.90 +                          free_mem, scrub_mem, need_mem, rlimit)
    3.91  
    3.92              if dom0_min_mem > 0:
    3.93                  dom0_alloc = get_dom0_current_alloc()
    3.94 -                new_alloc = dom0_alloc - (need_mem - free_mem)
    3.95 +                new_alloc = dom0_alloc - (need_mem - free_mem - scrub_mem)
    3.96  
    3.97 -                if (new_alloc >= dom0_min_mem and
    3.98 -                    new_alloc != last_new_alloc):
    3.99 -                    log.debug("Balloon: setting dom0 target to %d.",
   3.100 -                              new_alloc)
   3.101 -                    dom0 = XendDomain.instance().privilegedDomain()
   3.102 -                    dom0.setMemoryTarget(new_alloc)
   3.103 -                    last_new_alloc = new_alloc
   3.104 -                    # Continue to retry, waiting for ballooning.
   3.105 +                if free_mem + scrub_mem >= need_mem:
   3.106 +                    if last_new_alloc == None:
   3.107 +                        log.debug("Balloon: waiting on scrubbing")
   3.108 +                        last_new_alloc = dom0_alloc
   3.109 +                else:
   3.110 +                    if (new_alloc >= dom0_min_mem and
   3.111 +                        new_alloc != last_new_alloc):
   3.112 +                        new_alloc_mb = new_alloc / 1024  # Round down
   3.113 +                        log.debug("Balloon: setting dom0 target to %d MiB.",
   3.114 +                                  new_alloc_mb)
   3.115 +                        dom0 = XendDomain.instance().privilegedDomain()
   3.116 +                        dom0.setMemoryTarget(new_alloc_mb)
   3.117 +                        last_new_alloc = new_alloc
   3.118 +                # Continue to retry, waiting for ballooning or scrubbing.
   3.119  
   3.120              time.sleep(sleep_time)
   3.121              if retries < 2 * RETRY_LIMIT:
   3.122 @@ -154,15 +159,15 @@ def free(required):
   3.123          # Not enough memory; diagnose the problem.
   3.124          if dom0_min_mem == 0:
   3.125              raise VmError(('Not enough free memory and dom0_min_mem is 0, so '
   3.126 -                           'I cannot release any more.  I need %d MiB but '
   3.127 +                           'I cannot release any more.  I need %d KiB but '
   3.128                             'only have %d.') %
   3.129                            (need_mem, free_mem))
   3.130          elif new_alloc < dom0_min_mem:
   3.131              raise VmError(
   3.132 -                ('I need %d MiB, but dom0_min_mem is %d and shrinking to '
   3.133 -                 '%d MiB would leave only %d MiB free.') %
   3.134 +                ('I need %d KiB, but dom0_min_mem is %d and shrinking to '
   3.135 +                 '%d KiB would leave only %d KiB free.') %
   3.136                  (need_mem, dom0_min_mem, dom0_min_mem,
   3.137 -                 free_mem + dom0_alloc - dom0_min_mem))
   3.138 +                 free_mem + scrub_mem + dom0_alloc - dom0_min_mem))
   3.139          else:
   3.140              raise VmError('The privileged domain did not balloon!')
   3.141  
     4.1 --- a/xen/arch/x86/dom0_ops.c	Tue Jun 27 11:50:57 2006 +0100
     4.2 +++ b/xen/arch/x86/dom0_ops.c	Tue Jun 27 12:03:46 2006 +0100
     4.3 @@ -194,6 +194,7 @@ long arch_do_dom0_op(struct dom0_op *op,
     4.4          pi->nr_nodes         = 1;
     4.5          pi->total_pages      = total_pages;
     4.6          pi->free_pages       = avail_domheap_pages();
     4.7 +        pi->scrub_pages      = avail_scrub_pages();
     4.8          pi->cpu_khz          = cpu_khz;
     4.9          memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
    4.10          memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
     5.1 --- a/xen/common/page_alloc.c	Tue Jun 27 11:50:57 2006 +0100
     5.2 +++ b/xen/common/page_alloc.c	Tue Jun 27 12:03:46 2006 +0100
     5.3 @@ -61,6 +61,7 @@ custom_param("lowmem_emergency_pool", pa
     5.4  
     5.5  static DEFINE_SPINLOCK(page_scrub_lock);
     5.6  LIST_HEAD(page_scrub_list);
     5.7 +static unsigned long scrub_pages;
     5.8  
     5.9  /*********************
    5.10   * ALLOCATION BITMAP
    5.11 @@ -696,6 +697,7 @@ void free_domheap_pages(struct page_info
    5.12              {
    5.13                  spin_lock(&page_scrub_lock);
    5.14                  list_add(&pg[i].list, &page_scrub_list);
    5.15 +                scrub_pages++;
    5.16                  spin_unlock(&page_scrub_lock);
    5.17              }
    5.18          }
    5.19 @@ -784,9 +786,10 @@ static void page_scrub_softirq(void)
    5.20          /* Remove peeled pages from the list. */
    5.21          ent->next->prev = &page_scrub_list;
    5.22          page_scrub_list.next = ent->next;
    5.23 -        
    5.24 +        scrub_pages -= (i+1);
    5.25 +
    5.26          spin_unlock(&page_scrub_lock);
    5.27 -        
    5.28 +
    5.29          /* Working backwards, scrub each page in turn. */
    5.30          while ( ent != &page_scrub_list )
    5.31          {
    5.32 @@ -800,6 +803,11 @@ static void page_scrub_softirq(void)
    5.33      } while ( (NOW() - start) < MILLISECS(1) );
    5.34  }
    5.35  
    5.36 +unsigned long avail_scrub_pages(void)
    5.37 +{
    5.38 +    return scrub_pages;
    5.39 +}
    5.40 +
    5.41  static __init int page_scrub_init(void)
    5.42  {
    5.43      open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
     6.1 --- a/xen/include/public/dom0_ops.h	Tue Jun 27 11:50:57 2006 +0100
     6.2 +++ b/xen/include/public/dom0_ops.h	Tue Jun 27 12:03:46 2006 +0100
     6.3 @@ -231,6 +231,7 @@ struct dom0_physinfo {
     6.4      uint32_t cpu_khz;
     6.5      uint64_t total_pages;
     6.6      uint64_t free_pages;
     6.7 +    uint64_t scrub_pages;
     6.8      uint32_t hw_cap[8];
     6.9  };
    6.10  typedef struct dom0_physinfo dom0_physinfo_t;
     7.1 --- a/xen/include/xen/mm.h	Tue Jun 27 11:50:57 2006 +0100
     7.2 +++ b/xen/include/xen/mm.h	Tue Jun 27 12:03:46 2006 +0100
     7.3 @@ -91,6 +91,7 @@ extern struct list_head page_scrub_list;
     7.4          if ( !list_empty(&page_scrub_list) )    \
     7.5              raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
     7.6      } while ( 0 )
     7.7 +unsigned long avail_scrub_pages(void);
     7.8  
     7.9  #include <asm/mm.h>
    7.10