ia64/xen-unstable
changeset 10541:234939c0ec3a
[BALLOON] Make the domain0 ballooning logic in xend aware of pages that are on the page scrub list.
This fixes the case where crashing/restarting a domain can cause dom0 to balloon more than
necessary.
I changed the physinfo dictionary in Python to be in KiB, rather than
MiB, to avoid accumulating ugly rounding errors. I tried to avoid
changing units anywhere else.
Signed-off-by: Charles Coffing <ccoffing@novell.com>
This fixes the case where crashing/restarting a domain can cause dom0 to balloon more than
necessary.
I changed the physinfo dictionary in Python to be in KiB, rather than
MiB, to avoid accumulating ugly rounding errors. I tried to avoid
changing units anywhere else.
Signed-off-by: Charles Coffing <ccoffing@novell.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Tue Jun 27 12:03:46 2006 +0100 (2006-06-27) |
parents | 7154e0416313 |
children | 02b0ed160e8e |
files | tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendNode.py tools/python/xen/xend/balloon.py xen/arch/x86/dom0_ops.c xen/common/page_alloc.c xen/include/public/dom0_ops.h xen/include/xen/mm.h |
line diff
1.1 --- a/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 11:50:57 2006 +0100 1.2 +++ b/tools/python/xen/lowlevel/xc/xc.c Tue Jun 27 12:03:46 2006 +0100 1.3 @@ -582,6 +582,12 @@ static PyObject *pyxc_readconsolering(Xc 1.4 } 1.5 1.6 1.7 +static unsigned long pages_to_kib(unsigned long pages) 1.8 +{ 1.9 + return pages * (XC_PAGE_SIZE / 1024); 1.10 +} 1.11 + 1.12 + 1.13 static PyObject *pyxc_pages_to_kib(XcObject *self, PyObject *args) 1.14 { 1.15 unsigned long pages; 1.16 @@ -589,13 +595,7 @@ static PyObject *pyxc_pages_to_kib(XcObj 1.17 if (!PyArg_ParseTuple(args, "l", &pages)) 1.18 return NULL; 1.19 1.20 - return PyLong_FromUnsignedLong(pages * (XC_PAGE_SIZE / 1024)); 1.21 -} 1.22 - 1.23 - 1.24 -static unsigned long pages_to_mb(unsigned long pages) 1.25 -{ 1.26 - return (pages * (XC_PAGE_SIZE / 1024) + 1023) / 1024; 1.27 + return PyLong_FromUnsignedLong(pages_to_kib(pages)); 1.28 } 1.29 1.30 1.31 @@ -618,13 +618,14 @@ static PyObject *pyxc_physinfo(XcObject 1.32 if(q>cpu_cap) 1.33 *(q-1)=0; 1.34 1.35 - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}", 1.36 + return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}", 1.37 "threads_per_core", info.threads_per_core, 1.38 "cores_per_socket", info.cores_per_socket, 1.39 "sockets_per_node", info.sockets_per_node, 1.40 "nr_nodes", info.nr_nodes, 1.41 - "total_memory", pages_to_mb(info.total_pages), 1.42 - "free_memory", pages_to_mb(info.free_pages), 1.43 + "total_memory", pages_to_kib(info.total_pages), 1.44 + "free_memory", pages_to_kib(info.free_pages), 1.45 + "scrub_memory", pages_to_kib(info.scrub_pages), 1.46 "cpu_khz", info.cpu_khz, 1.47 "hw_caps", cpu_cap); 1.48 }
2.1 --- a/tools/python/xen/xend/XendNode.py Tue Jun 27 11:50:57 2006 +0100 2.2 +++ b/tools/python/xen/xend/XendNode.py Tue Jun 27 12:03:46 2006 +0100 2.3 @@ -64,6 +64,9 @@ class XendNode: 2.4 info['cores_per_socket'] * 2.5 info['threads_per_core']) 2.6 info['cpu_mhz'] = info['cpu_khz'] / 1000 2.7 + # physinfo is in KiB 2.8 + info['total_memory'] = info['total_memory'] / 1024 2.9 + info['free_memory'] = info['free_memory'] / 1024 2.10 2.11 ITEM_ORDER = ['nr_cpus', 2.12 'nr_nodes',
3.1 --- a/tools/python/xen/xend/balloon.py Tue Jun 27 11:50:57 2006 +0100 3.2 +++ b/tools/python/xen/xend/balloon.py Tue Jun 27 12:03:46 2006 +0100 3.3 @@ -29,8 +29,6 @@ from XendError import VmError 3.4 3.5 PROC_XEN_BALLOON = '/proc/xen/balloon' 3.6 3.7 -BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are 3.8 - # rounded. 3.9 RETRY_LIMIT = 20 3.10 RETRY_LIMIT_INCR = 5 3.11 ## 3.12 @@ -68,22 +66,22 @@ def _get_proc_balloon(label): 3.13 f.close() 3.14 3.15 def get_dom0_current_alloc(): 3.16 - """Returns the current memory allocation (in MiB) of dom0.""" 3.17 + """Returns the current memory allocation (in KiB) of dom0.""" 3.18 3.19 kb = _get_proc_balloon(labels['current']) 3.20 if kb == None: 3.21 raise VmError('Failed to query current memory allocation of dom0.') 3.22 - return kb / 1024 3.23 + return kb 3.24 3.25 def get_dom0_target_alloc(): 3.26 - """Returns the target memory allocation (in MiB) of dom0.""" 3.27 + """Returns the target memory allocation (in KiB) of dom0.""" 3.28 3.29 kb = _get_proc_balloon(labels['target']) 3.30 if kb == None: 3.31 raise VmError('Failed to query target memory allocation of dom0.') 3.32 - return kb / 1024 3.33 + return kb 3.34 3.35 -def free(required): 3.36 +def free(need_mem): 3.37 """Balloon out memory from the privileged domain so that there is the 3.38 specified required amount (in KiB) free. 3.39 """ 3.40 @@ -92,9 +90,10 @@ def free(required): 3.41 # to balloon out to free some up. Memory freed by a destroyed domain may 3.42 # not appear in the free_memory field immediately, because it needs to be 3.43 # scrubbed before it can be released to the free list, which is done 3.44 - # asynchronously by Xen; ballooning is asynchronous also. No matter where 3.45 - # we expect the free memory to come from, therefore, we need to wait for 3.46 - # it to become available. 3.47 + # asynchronously by Xen; ballooning is asynchronous also. Such memory 3.48 + # does, however, need to be accounted for when calculating how much dom0 3.49 + # needs to balloon. No matter where we expect the free memory to come 3.50 + # from, we need to wait for it to become available. 3.51 # 3.52 # We are not allowed to balloon below dom0_min_mem, or if dom0_min_mem 3.53 # is 0, we cannot balloon at all. Memory can still become available 3.54 @@ -108,43 +107,49 @@ def free(required): 3.55 # usage, so we recheck the required alloc each time around the loop, but 3.56 # track the last used value so that we don't trigger too many watches. 3.57 3.58 - need_mem = (required + 1023) / 1024 + BALLOON_OUT_SLACK 3.59 - 3.60 xroot = XendRoot.instance() 3.61 xc = xen.lowlevel.xc.xc() 3.62 3.63 try: 3.64 - dom0_min_mem = xroot.get_dom0_min_mem() 3.65 + dom0_min_mem = xroot.get_dom0_min_mem() * 1024 3.66 3.67 retries = 0 3.68 sleep_time = SLEEP_TIME_GROWTH 3.69 last_new_alloc = None 3.70 rlimit = RETRY_LIMIT 3.71 while retries < rlimit: 3.72 - free_mem = xc.physinfo()['free_memory'] 3.73 + physinfo = xc.physinfo() 3.74 + free_mem = physinfo['free_memory'] 3.75 + scrub_mem = physinfo['scrub_memory'] 3.76 3.77 if free_mem >= need_mem: 3.78 - log.debug("Balloon: free %d; need %d; done.", free_mem, 3.79 - need_mem) 3.80 + log.debug("Balloon: %d KiB free; need %d; done.", 3.81 + free_mem, need_mem) 3.82 return 3.83 3.84 if retries == 0: 3.85 - rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR 3.86 - log.debug("Balloon: free %d; need %d; retries: %d.", 3.87 - free_mem, need_mem, rlimit) 3.88 + rlimit += ((need_mem - free_mem)/1024/1024) * RETRY_LIMIT_INCR 3.89 + log.debug("Balloon: %d KiB free; %d to scrub; need %d; retries: %d.", 3.90 + free_mem, scrub_mem, need_mem, rlimit) 3.91 3.92 if dom0_min_mem > 0: 3.93 dom0_alloc = get_dom0_current_alloc() 3.94 - new_alloc = dom0_alloc - (need_mem - free_mem) 3.95 + new_alloc = dom0_alloc - (need_mem - free_mem - scrub_mem) 3.96 3.97 - if (new_alloc >= dom0_min_mem and 3.98 - new_alloc != last_new_alloc): 3.99 - log.debug("Balloon: setting dom0 target to %d.", 3.100 - new_alloc) 3.101 - dom0 = XendDomain.instance().privilegedDomain() 3.102 - dom0.setMemoryTarget(new_alloc) 3.103 - last_new_alloc = new_alloc 3.104 - # Continue to retry, waiting for ballooning. 3.105 + if free_mem + scrub_mem >= need_mem: 3.106 + if last_new_alloc == None: 3.107 + log.debug("Balloon: waiting on scrubbing") 3.108 + last_new_alloc = dom0_alloc 3.109 + else: 3.110 + if (new_alloc >= dom0_min_mem and 3.111 + new_alloc != last_new_alloc): 3.112 + new_alloc_mb = new_alloc / 1024 # Round down 3.113 + log.debug("Balloon: setting dom0 target to %d MiB.", 3.114 + new_alloc_mb) 3.115 + dom0 = XendDomain.instance().privilegedDomain() 3.116 + dom0.setMemoryTarget(new_alloc_mb) 3.117 + last_new_alloc = new_alloc 3.118 + # Continue to retry, waiting for ballooning or scrubbing. 3.119 3.120 time.sleep(sleep_time) 3.121 if retries < 2 * RETRY_LIMIT: 3.122 @@ -154,15 +159,15 @@ def free(required): 3.123 # Not enough memory; diagnose the problem. 3.124 if dom0_min_mem == 0: 3.125 raise VmError(('Not enough free memory and dom0_min_mem is 0, so ' 3.126 - 'I cannot release any more. I need %d MiB but ' 3.127 + 'I cannot release any more. I need %d KiB but ' 3.128 'only have %d.') % 3.129 (need_mem, free_mem)) 3.130 elif new_alloc < dom0_min_mem: 3.131 raise VmError( 3.132 - ('I need %d MiB, but dom0_min_mem is %d and shrinking to ' 3.133 - '%d MiB would leave only %d MiB free.') % 3.134 + ('I need %d KiB, but dom0_min_mem is %d and shrinking to ' 3.135 + '%d KiB would leave only %d KiB free.') % 3.136 (need_mem, dom0_min_mem, dom0_min_mem, 3.137 - free_mem + dom0_alloc - dom0_min_mem)) 3.138 + free_mem + scrub_mem + dom0_alloc - dom0_min_mem)) 3.139 else: 3.140 raise VmError('The privileged domain did not balloon!') 3.141
4.1 --- a/xen/arch/x86/dom0_ops.c Tue Jun 27 11:50:57 2006 +0100 4.2 +++ b/xen/arch/x86/dom0_ops.c Tue Jun 27 12:03:46 2006 +0100 4.3 @@ -194,6 +194,7 @@ long arch_do_dom0_op(struct dom0_op *op, 4.4 pi->nr_nodes = 1; 4.5 pi->total_pages = total_pages; 4.6 pi->free_pages = avail_domheap_pages(); 4.7 + pi->scrub_pages = avail_scrub_pages(); 4.8 pi->cpu_khz = cpu_khz; 4.9 memset(pi->hw_cap, 0, sizeof(pi->hw_cap)); 4.10 memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
5.1 --- a/xen/common/page_alloc.c Tue Jun 27 11:50:57 2006 +0100 5.2 +++ b/xen/common/page_alloc.c Tue Jun 27 12:03:46 2006 +0100 5.3 @@ -61,6 +61,7 @@ custom_param("lowmem_emergency_pool", pa 5.4 5.5 static DEFINE_SPINLOCK(page_scrub_lock); 5.6 LIST_HEAD(page_scrub_list); 5.7 +static unsigned long scrub_pages; 5.8 5.9 /********************* 5.10 * ALLOCATION BITMAP 5.11 @@ -696,6 +697,7 @@ void free_domheap_pages(struct page_info 5.12 { 5.13 spin_lock(&page_scrub_lock); 5.14 list_add(&pg[i].list, &page_scrub_list); 5.15 + scrub_pages++; 5.16 spin_unlock(&page_scrub_lock); 5.17 } 5.18 } 5.19 @@ -784,9 +786,10 @@ static void page_scrub_softirq(void) 5.20 /* Remove peeled pages from the list. */ 5.21 ent->next->prev = &page_scrub_list; 5.22 page_scrub_list.next = ent->next; 5.23 - 5.24 + scrub_pages -= (i+1); 5.25 + 5.26 spin_unlock(&page_scrub_lock); 5.27 - 5.28 + 5.29 /* Working backwards, scrub each page in turn. */ 5.30 while ( ent != &page_scrub_list ) 5.31 { 5.32 @@ -800,6 +803,11 @@ static void page_scrub_softirq(void) 5.33 } while ( (NOW() - start) < MILLISECS(1) ); 5.34 } 5.35 5.36 +unsigned long avail_scrub_pages(void) 5.37 +{ 5.38 + return scrub_pages; 5.39 +} 5.40 + 5.41 static __init int page_scrub_init(void) 5.42 { 5.43 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
6.1 --- a/xen/include/public/dom0_ops.h Tue Jun 27 11:50:57 2006 +0100 6.2 +++ b/xen/include/public/dom0_ops.h Tue Jun 27 12:03:46 2006 +0100 6.3 @@ -231,6 +231,7 @@ struct dom0_physinfo { 6.4 uint32_t cpu_khz; 6.5 uint64_t total_pages; 6.6 uint64_t free_pages; 6.7 + uint64_t scrub_pages; 6.8 uint32_t hw_cap[8]; 6.9 }; 6.10 typedef struct dom0_physinfo dom0_physinfo_t;
7.1 --- a/xen/include/xen/mm.h Tue Jun 27 11:50:57 2006 +0100 7.2 +++ b/xen/include/xen/mm.h Tue Jun 27 12:03:46 2006 +0100 7.3 @@ -91,6 +91,7 @@ extern struct list_head page_scrub_list; 7.4 if ( !list_empty(&page_scrub_list) ) \ 7.5 raise_softirq(PAGE_SCRUB_SOFTIRQ); \ 7.6 } while ( 0 ) 7.7 +unsigned long avail_scrub_pages(void); 7.8 7.9 #include <asm/mm.h> 7.10