ia64/xen-unstable

view xen/common/page_alloc.c @ 14092:eb3ebf11abfe

xen: Fix dumb boot allocator breakage that prevents loop termination
in some cases. :-)
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Feb 23 14:36:33 2007 +0000 (2007-02-23)
parents 3746b3d4f301
children c64aa7fb7712
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <xen/domain_page.h>
35 #include <xen/keyhandler.h>
36 #include <xen/perfc.h>
37 #include <xen/numa.h>
38 #include <xen/nodemask.h>
39 #include <asm/page.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * Bit width of the DMA heap.
50 */
51 unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
52 unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1;
53 static void parse_dma_bits(char *s)
54 {
55 unsigned int v = simple_strtol(s, NULL, 0);
56 if ( v >= (sizeof(long)*8 + PAGE_SHIFT) )
57 {
58 dma_bitsize = sizeof(long)*8 + PAGE_SHIFT;
59 max_dma_mfn = ~0UL;
60 }
61 else
62 {
63 dma_bitsize = v;
64 max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1;
65 }
66 }
67 custom_param("dma_bits", parse_dma_bits);
69 /*
70 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
71 * allocation requests. Ordinary requests will not fall back to the
72 * lowmem emergency pool.
73 */
74 static unsigned long dma_emergency_pool_pages;
75 static void parse_dma_emergency_pool(char *s)
76 {
77 unsigned long long bytes;
78 bytes = parse_size_and_unit(s, NULL);
79 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
80 }
81 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
83 #define round_pgdown(_p) ((_p)&PAGE_MASK)
84 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
86 static DEFINE_SPINLOCK(page_scrub_lock);
87 LIST_HEAD(page_scrub_list);
88 static unsigned long scrub_pages;
90 /*********************
91 * ALLOCATION BITMAP
92 * One bit per page of memory. Bit set => page is allocated.
93 */
95 static unsigned long *alloc_bitmap;
96 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
98 #define allocated_in_map(_pn) \
99 ({ unsigned long ___pn = (_pn); \
100 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
101 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
103 /*
104 * Hint regarding bitwise arithmetic in map_{alloc,free}:
105 * -(1<<n) sets all bits >= n.
106 * (1<<n)-1 sets all bits < n.
107 * Variable names in map_{alloc,free}:
108 * *_idx == Index into `alloc_bitmap' array.
109 * *_off == Bit offset within an element of the `alloc_bitmap' array.
110 */
112 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
113 {
114 unsigned long start_off, end_off, curr_idx, end_idx;
116 #ifndef NDEBUG
117 unsigned long i;
118 /* Check that the block isn't already allocated. */
119 for ( i = 0; i < nr_pages; i++ )
120 ASSERT(!allocated_in_map(first_page + i));
121 #endif
123 curr_idx = first_page / PAGES_PER_MAPWORD;
124 start_off = first_page & (PAGES_PER_MAPWORD-1);
125 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
126 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
128 if ( curr_idx == end_idx )
129 {
130 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
131 }
132 else
133 {
134 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
135 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
136 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
137 }
138 }
140 static void map_free(unsigned long first_page, unsigned long nr_pages)
141 {
142 unsigned long start_off, end_off, curr_idx, end_idx;
144 #ifndef NDEBUG
145 unsigned long i;
146 /* Check that the block isn't already freed. */
147 for ( i = 0; i < nr_pages; i++ )
148 ASSERT(allocated_in_map(first_page + i));
149 #endif
151 curr_idx = first_page / PAGES_PER_MAPWORD;
152 start_off = first_page & (PAGES_PER_MAPWORD-1);
153 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
154 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
156 if ( curr_idx == end_idx )
157 {
158 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
159 }
160 else
161 {
162 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
163 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
164 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
165 }
166 }
170 /*************************
171 * BOOT-TIME ALLOCATOR
172 */
174 static unsigned long first_valid_mfn = ~0UL;
176 /* Initialise allocator to handle up to @max_page pages. */
177 paddr_t init_boot_allocator(paddr_t bitmap_start)
178 {
179 unsigned long bitmap_size;
181 bitmap_start = round_pgup(bitmap_start);
183 /*
184 * Allocate space for the allocation bitmap. Include an extra longword
185 * of padding for possible overrun in map_alloc and map_free.
186 */
187 bitmap_size = max_page / 8;
188 bitmap_size += sizeof(unsigned long);
189 bitmap_size = round_pgup(bitmap_size);
190 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
192 /* All allocated by default. */
193 memset(alloc_bitmap, ~0, bitmap_size);
195 return bitmap_start + bitmap_size;
196 }
198 void init_boot_pages(paddr_t ps, paddr_t pe)
199 {
200 unsigned long bad_spfn, bad_epfn, i;
201 const char *p;
203 ps = round_pgup(ps);
204 pe = round_pgdown(pe);
205 if ( pe <= ps )
206 return;
208 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
210 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
212 /* Check new pages against the bad-page list. */
213 p = opt_badpage;
214 while ( *p != '\0' )
215 {
216 bad_spfn = simple_strtoul(p, &p, 0);
217 bad_epfn = bad_spfn;
219 if ( *p == '-' )
220 {
221 p++;
222 bad_epfn = simple_strtoul(p, &p, 0);
223 if ( bad_epfn < bad_spfn )
224 bad_epfn = bad_spfn;
225 }
227 if ( *p == ',' )
228 p++;
229 else if ( *p != '\0' )
230 break;
232 if ( bad_epfn == bad_spfn )
233 printk("Marking page %lx as bad\n", bad_spfn);
234 else
235 printk("Marking pages %lx through %lx as bad\n",
236 bad_spfn, bad_epfn);
238 for ( i = bad_spfn; i <= bad_epfn; i++ )
239 if ( (i < max_page) && !allocated_in_map(i) )
240 map_alloc(i, 1);
241 }
242 }
244 int reserve_boot_pages(unsigned long first_pfn, unsigned long nr_pfns)
245 {
246 unsigned long i;
248 for ( i = 0; i < nr_pfns; i++ )
249 if ( allocated_in_map(first_pfn + i) )
250 break;
252 if ( i != nr_pfns )
253 return 0;
255 map_alloc(first_pfn, nr_pfns);
256 return 1;
257 }
259 unsigned long alloc_boot_low_pages(
260 unsigned long nr_pfns, unsigned long pfn_align)
261 {
262 unsigned long pg, i;
264 /* Search forwards to obtain lowest available range. */
265 for ( pg = first_valid_mfn & ~(pfn_align - 1);
266 (pg + nr_pfns) <= max_page;
267 pg = (pg + i + pfn_align) & ~(pfn_align - 1) )
268 {
269 for ( i = 0; i < nr_pfns; i++ )
270 if ( allocated_in_map(pg+i) )
271 break;
272 if ( i == nr_pfns )
273 {
274 map_alloc(pg, nr_pfns);
275 return pg;
276 }
277 }
279 return 0;
280 }
282 unsigned long alloc_boot_pages(
283 unsigned long nr_pfns, unsigned long pfn_align)
284 {
285 unsigned long pg, i;
287 /* Search backwards to obtain highest available range. */
288 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
289 pg >= first_valid_mfn;
290 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
291 {
292 for ( i = 0; i < nr_pfns; i++ )
293 if ( allocated_in_map(pg+i) )
294 break;
295 if ( i == nr_pfns )
296 {
297 map_alloc(pg, nr_pfns);
298 return pg;
299 }
300 }
302 return 0;
303 }
307 /*************************
308 * BINARY BUDDY ALLOCATOR
309 */
311 #define MEMZONE_XEN 0
312 #define MEMZONE_DOM 1
313 #define MEMZONE_DMADOM 2
314 #define NR_ZONES 3
316 #define pfn_dom_zone_type(_pfn) \
317 (((_pfn) <= max_dma_mfn) ? MEMZONE_DMADOM : MEMZONE_DOM)
319 static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
321 static unsigned long avail[NR_ZONES][MAX_NUMNODES];
323 static DEFINE_SPINLOCK(heap_lock);
325 void end_boot_allocator(void)
326 {
327 unsigned long i, j, k;
328 int curr_free, next_free;
330 memset(avail, 0, sizeof(avail));
332 for ( i = 0; i < NR_ZONES; i++ )
333 for ( j = 0; j < MAX_NUMNODES; j++ )
334 for ( k = 0; k <= MAX_ORDER; k++ )
335 INIT_LIST_HEAD(&heap[i][j][k]);
337 /* Pages that are free now go to the domain sub-allocator. */
338 if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
339 map_alloc(first_valid_mfn, 1);
340 for ( i = first_valid_mfn; i < max_page; i++ )
341 {
342 curr_free = next_free;
343 next_free = !allocated_in_map(i+1);
344 if ( next_free )
345 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
346 if ( curr_free )
347 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
348 }
350 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
351 }
353 /*
354 * Hand the specified arbitrary page range to the specified heap zone
355 * checking the node_id of the previous page. If they differ and the
356 * latter is not on a MAX_ORDER boundary, then we reserve the page by
357 * not freeing it to the buddy allocator.
358 */
359 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
360 void init_heap_pages(
361 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
362 {
363 unsigned int nid_curr, nid_prev;
364 unsigned long i;
366 ASSERT(zone < NR_ZONES);
368 if ( likely(page_to_mfn(pg) != 0) )
369 nid_prev = phys_to_nid(page_to_maddr(pg-1));
370 else
371 nid_prev = phys_to_nid(page_to_maddr(pg));
373 for ( i = 0; i < nr_pages; i++ )
374 {
375 nid_curr = phys_to_nid(page_to_maddr(pg+i));
377 /*
378 * free pages of the same node, or if they differ, but are on a
379 * MAX_ORDER alignement boundary (which already get reserved)
380 */
381 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
382 MAX_ORDER_ALIGNED) )
383 free_heap_pages(zone, pg+i, 0);
384 else
385 printk("Reserving non-aligned node boundary @ mfn %lu\n",
386 page_to_mfn(pg+i));
388 nid_prev = nid_curr;
389 }
390 }
392 /* Allocate 2^@order contiguous pages. */
393 struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
394 unsigned int order)
395 {
396 unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
397 unsigned int request = (1UL << order);
398 struct page_info *pg;
400 ASSERT(node >= 0);
401 ASSERT(node < num_nodes);
402 ASSERT(zone < NR_ZONES);
404 if ( unlikely(order > MAX_ORDER) )
405 return NULL;
407 spin_lock(&heap_lock);
409 /* start with requested node, but exhaust all node memory
410 * in requested zone before failing, only calc new node
411 * value if we fail to find memory in target node, this avoids
412 * needless computation on fast-path */
413 for ( i = 0; i < num_nodes; i++ )
414 {
415 /* check if target node can support the allocation */
416 if ( avail[zone][node] >= request )
417 {
418 /* Find smallest order which can satisfy the request. */
419 for ( j = order; j <= MAX_ORDER; j++ )
420 {
421 if ( !list_empty(&heap[zone][node][j]) )
422 goto found;
423 }
424 }
425 /* pick next node, wrapping around if needed */
426 if ( ++node == num_nodes )
427 node = 0;
428 }
430 /* No suitable memory blocks. Fail the request. */
431 spin_unlock(&heap_lock);
432 return NULL;
434 found:
435 pg = list_entry(heap[zone][node][j].next, struct page_info, list);
436 list_del(&pg->list);
438 /* We may have to halve the chunk a number of times. */
439 while ( j != order )
440 {
441 PFN_ORDER(pg) = --j;
442 list_add_tail(&pg->list, &heap[zone][node][j]);
443 pg += 1 << j;
444 }
446 map_alloc(page_to_mfn(pg), request);
447 ASSERT(avail[zone][node] >= request);
448 avail[zone][node] -= request;
450 spin_unlock(&heap_lock);
452 return pg;
453 }
456 /* Free 2^@order set of pages. */
457 void free_heap_pages(
458 unsigned int zone, struct page_info *pg, unsigned int order)
459 {
460 unsigned long mask;
461 int node = phys_to_nid(page_to_maddr(pg));
463 ASSERT(zone < NR_ZONES);
464 ASSERT(order <= MAX_ORDER);
465 ASSERT(node >= 0);
466 ASSERT(node < num_online_nodes());
468 spin_lock(&heap_lock);
470 map_free(page_to_mfn(pg), 1 << order);
471 avail[zone][node] += 1 << order;
473 /* Merge chunks as far as possible. */
474 while ( order < MAX_ORDER )
475 {
476 mask = 1 << order;
478 if ( (page_to_mfn(pg) & mask) )
479 {
480 /* Merge with predecessor block? */
481 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
482 (PFN_ORDER(pg-mask) != order) )
483 break;
484 list_del(&(pg-mask)->list);
485 pg -= mask;
486 }
487 else
488 {
489 /* Merge with successor block? */
490 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
491 (PFN_ORDER(pg+mask) != order) )
492 break;
493 list_del(&(pg+mask)->list);
494 }
496 order++;
498 /* after merging, pg should be in the same node */
499 ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
500 }
502 PFN_ORDER(pg) = order;
503 list_add_tail(&pg->list, &heap[zone][node][order]);
505 spin_unlock(&heap_lock);
506 }
509 /*
510 * Scrub all unallocated pages in all heap zones. This function is more
511 * convoluted than appears necessary because we do not want to continuously
512 * hold the lock or disable interrupts while scrubbing very large memory areas.
513 */
514 void scrub_heap_pages(void)
515 {
516 void *p;
517 unsigned long mfn;
519 printk("Scrubbing Free RAM: ");
521 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
522 {
523 process_pending_timers();
525 /* Quick lock-free check. */
526 if ( allocated_in_map(mfn) )
527 continue;
529 /* Every 100MB, print a progress dot. */
530 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
531 printk(".");
533 spin_lock_irq(&heap_lock);
535 /* Re-check page status with lock held. */
536 if ( !allocated_in_map(mfn) )
537 {
538 if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
539 {
540 p = page_to_virt(mfn_to_page(mfn));
541 memguard_unguard_range(p, PAGE_SIZE);
542 clear_page(p);
543 memguard_guard_range(p, PAGE_SIZE);
544 }
545 else
546 {
547 p = map_domain_page(mfn);
548 clear_page(p);
549 unmap_domain_page(p);
550 }
551 }
553 spin_unlock_irq(&heap_lock);
554 }
556 printk("done.\n");
557 }
561 /*************************
562 * XEN-HEAP SUB-ALLOCATOR
563 */
565 void init_xenheap_pages(paddr_t ps, paddr_t pe)
566 {
567 unsigned long flags;
569 ps = round_pgup(ps);
570 pe = round_pgdown(pe);
571 if ( pe <= ps )
572 return;
574 memguard_guard_range(maddr_to_virt(ps), pe - ps);
576 /*
577 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
578 * prevent merging of power-of-two blocks across the zone boundary.
579 */
580 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
581 pe -= PAGE_SIZE;
583 local_irq_save(flags);
584 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
585 local_irq_restore(flags);
586 }
589 void *alloc_xenheap_pages(unsigned int order)
590 {
591 unsigned long flags;
592 struct page_info *pg;
593 int i;
595 local_irq_save(flags);
596 pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
597 local_irq_restore(flags);
599 if ( unlikely(pg == NULL) )
600 goto no_memory;
602 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
604 for ( i = 0; i < (1 << order); i++ )
605 {
606 pg[i].count_info = 0;
607 pg[i].u.inuse._domain = 0;
608 pg[i].u.inuse.type_info = 0;
609 }
611 return page_to_virt(pg);
613 no_memory:
614 printk("Cannot handle page request order %d!\n", order);
615 return NULL;
616 }
619 void free_xenheap_pages(void *v, unsigned int order)
620 {
621 unsigned long flags;
623 if ( v == NULL )
624 return;
626 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
628 local_irq_save(flags);
629 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
630 local_irq_restore(flags);
631 }
635 /*************************
636 * DOMAIN-HEAP SUB-ALLOCATOR
637 */
639 void init_domheap_pages(paddr_t ps, paddr_t pe)
640 {
641 unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm;
643 ASSERT(!in_irq());
645 s_tot = round_pgup(ps) >> PAGE_SHIFT;
646 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
648 s_dma = min(s_tot, max_dma_mfn + 1);
649 e_dma = min(e_tot, max_dma_mfn + 1);
650 if ( s_dma < e_dma )
651 init_heap_pages(MEMZONE_DMADOM, mfn_to_page(s_dma), e_dma - s_dma);
653 s_nrm = max(s_tot, max_dma_mfn + 1);
654 e_nrm = max(e_tot, max_dma_mfn + 1);
655 if ( s_nrm < e_nrm )
656 init_heap_pages(MEMZONE_DOM, mfn_to_page(s_nrm), e_nrm - s_nrm);
657 }
660 int assign_pages(
661 struct domain *d,
662 struct page_info *pg,
663 unsigned int order,
664 unsigned int memflags)
665 {
666 unsigned long i;
668 spin_lock(&d->page_alloc_lock);
670 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
671 {
672 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
673 d->domain_id);
674 goto fail;
675 }
677 if ( !(memflags & MEMF_no_refcount) )
678 {
679 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
680 {
681 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
682 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
683 goto fail;
684 }
686 if ( unlikely(d->tot_pages == 0) )
687 get_knownalive_domain(d);
689 d->tot_pages += 1 << order;
690 }
692 for ( i = 0; i < (1 << order); i++ )
693 {
694 ASSERT(page_get_owner(&pg[i]) == NULL);
695 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
696 page_set_owner(&pg[i], d);
697 wmb(); /* Domain pointer must be visible before updating refcnt. */
698 pg[i].count_info = PGC_allocated | 1;
699 list_add_tail(&pg[i].list, &d->page_list);
700 }
702 spin_unlock(&d->page_alloc_lock);
703 return 0;
705 fail:
706 spin_unlock(&d->page_alloc_lock);
707 return -1;
708 }
711 struct page_info *__alloc_domheap_pages(
712 struct domain *d, unsigned int cpu, unsigned int order,
713 unsigned int memflags)
714 {
715 struct page_info *pg = NULL;
716 cpumask_t mask;
717 unsigned long i;
719 ASSERT(!in_irq());
721 if ( !(memflags & MEMF_dma) )
722 {
723 pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
724 /* Failure? Then check if we can fall back to the DMA pool. */
725 if ( unlikely(pg == NULL) &&
726 ((order > MAX_ORDER) ||
727 (avail_heap_pages(MEMZONE_DMADOM,-1) <
728 (dma_emergency_pool_pages + (1UL << order)))) )
729 return NULL;
730 }
732 if ( pg == NULL )
733 if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
734 return NULL;
736 mask = pg->u.free.cpumask;
737 tlbflush_filter(mask, pg->tlbflush_timestamp);
739 pg->count_info = 0;
740 pg->u.inuse._domain = 0;
741 pg->u.inuse.type_info = 0;
743 for ( i = 1; i < (1 << order); i++ )
744 {
745 /* Add in any extra CPUs that need flushing because of this page. */
746 cpumask_t extra_cpus_mask;
747 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
748 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
749 cpus_or(mask, mask, extra_cpus_mask);
751 pg[i].count_info = 0;
752 pg[i].u.inuse._domain = 0;
753 pg[i].u.inuse.type_info = 0;
754 page_set_owner(&pg[i], NULL);
755 }
757 if ( unlikely(!cpus_empty(mask)) )
758 {
759 perfc_incrc(need_flush_tlb_flush);
760 flush_tlb_mask(mask);
761 }
763 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
764 {
765 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
766 return NULL;
767 }
769 return pg;
770 }
772 inline struct page_info *alloc_domheap_pages(
773 struct domain *d, unsigned int order, unsigned int flags)
774 {
775 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
776 }
778 void free_domheap_pages(struct page_info *pg, unsigned int order)
779 {
780 int i, drop_dom_ref;
781 struct domain *d = page_get_owner(pg);
783 ASSERT(!in_irq());
785 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
786 {
787 /* NB. May recursively lock from relinquish_memory(). */
788 spin_lock_recursive(&d->page_alloc_lock);
790 for ( i = 0; i < (1 << order); i++ )
791 list_del(&pg[i].list);
793 d->xenheap_pages -= 1 << order;
794 drop_dom_ref = (d->xenheap_pages == 0);
796 spin_unlock_recursive(&d->page_alloc_lock);
797 }
798 else if ( likely(d != NULL) )
799 {
800 /* NB. May recursively lock from relinquish_memory(). */
801 spin_lock_recursive(&d->page_alloc_lock);
803 for ( i = 0; i < (1 << order); i++ )
804 {
805 shadow_drop_references(d, &pg[i]);
806 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
807 pg[i].tlbflush_timestamp = tlbflush_current_time();
808 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
809 list_del(&pg[i].list);
810 }
812 d->tot_pages -= 1 << order;
813 drop_dom_ref = (d->tot_pages == 0);
815 spin_unlock_recursive(&d->page_alloc_lock);
817 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
818 {
819 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
820 }
821 else
822 {
823 /*
824 * Normally we expect a domain to clear pages before freeing them,
825 * if it cares about the secrecy of their contents. However, after
826 * a domain has died we assume responsibility for erasure.
827 */
828 for ( i = 0; i < (1 << order); i++ )
829 {
830 spin_lock(&page_scrub_lock);
831 list_add(&pg[i].list, &page_scrub_list);
832 scrub_pages++;
833 spin_unlock(&page_scrub_lock);
834 }
835 }
836 }
837 else
838 {
839 /* Freeing anonymous domain-heap pages. */
840 for ( i = 0; i < (1 << order); i++ )
841 cpus_clear(pg[i].u.free.cpumask);
842 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
843 drop_dom_ref = 0;
844 }
846 if ( drop_dom_ref )
847 put_domain(d);
848 }
851 unsigned long avail_heap_pages(int zone, int node)
852 {
853 int i,j, num_nodes = num_online_nodes();
854 unsigned long free_pages = 0;
856 for (i=0; i<NR_ZONES; i++)
857 if ( (zone == -1) || (zone == i) )
858 for (j=0; j < num_nodes; j++)
859 if ( (node == -1) || (node == j) )
860 free_pages += avail[i][j];
862 return free_pages;
863 }
865 unsigned long avail_domheap_pages(void)
866 {
867 unsigned long avail_nrm, avail_dma;
869 avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
871 avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
872 if ( avail_dma > dma_emergency_pool_pages )
873 avail_dma -= dma_emergency_pool_pages;
874 else
875 avail_dma = 0;
877 return avail_nrm + avail_dma;
878 }
880 unsigned long avail_nodeheap_pages(int node)
881 {
882 return avail_heap_pages(-1, node);
883 }
885 static void pagealloc_keyhandler(unsigned char key)
886 {
887 printk("Physical memory information:\n");
888 printk(" Xen heap: %lukB free\n"
889 " DMA heap: %lukB free\n"
890 " Dom heap: %lukB free\n",
891 avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10),
892 avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10),
893 avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
894 }
897 static __init int pagealloc_keyhandler_init(void)
898 {
899 register_keyhandler('m', pagealloc_keyhandler, "memory info");
900 return 0;
901 }
902 __initcall(pagealloc_keyhandler_init);
906 /*************************
907 * PAGE SCRUBBING
908 */
910 static void page_scrub_softirq(void)
911 {
912 struct list_head *ent;
913 struct page_info *pg;
914 void *p;
915 int i;
916 s_time_t start = NOW();
918 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
919 do {
920 spin_lock(&page_scrub_lock);
922 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
923 {
924 spin_unlock(&page_scrub_lock);
925 return;
926 }
928 /* Peel up to 16 pages from the list. */
929 for ( i = 0; i < 16; i++ )
930 {
931 if ( ent->next == &page_scrub_list )
932 break;
933 ent = ent->next;
934 }
936 /* Remove peeled pages from the list. */
937 ent->next->prev = &page_scrub_list;
938 page_scrub_list.next = ent->next;
939 scrub_pages -= (i+1);
941 spin_unlock(&page_scrub_lock);
943 /* Working backwards, scrub each page in turn. */
944 while ( ent != &page_scrub_list )
945 {
946 pg = list_entry(ent, struct page_info, list);
947 ent = ent->prev;
948 p = map_domain_page(page_to_mfn(pg));
949 clear_page(p);
950 unmap_domain_page(p);
951 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
952 }
953 } while ( (NOW() - start) < MILLISECS(1) );
954 }
956 unsigned long avail_scrub_pages(void)
957 {
958 return scrub_pages;
959 }
961 static unsigned long count_bucket(struct list_head* l, int order)
962 {
963 unsigned long total_pages = 0;
964 int pages = 1 << order;
965 struct page_info *pg;
967 list_for_each_entry(pg, l, list)
968 total_pages += pages;
970 return total_pages;
971 }
973 static void dump_heap(unsigned char key)
974 {
975 s_time_t now = NOW();
976 int i,j,k;
977 unsigned long total;
979 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
980 (u32)(now>>32), (u32)now);
982 for (i=0; i<NR_ZONES; i++ )
983 for (j=0;j<MAX_NUMNODES;j++)
984 for (k=0;k<=MAX_ORDER;k++)
985 if ( !list_empty(&heap[i][j][k]) )
986 {
987 total = count_bucket(&heap[i][j][k], k);
988 printk("heap[%d][%d][%d]-> %lu pages\n",
989 i, j, k, total);
990 }
991 }
993 static __init int register_heap_trigger(void)
994 {
995 register_keyhandler('H', dump_heap, "dump heap info");
996 return 0;
997 }
998 __initcall(register_heap_trigger);
1001 static __init int page_scrub_init(void)
1003 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1004 return 0;
1006 __initcall(page_scrub_init);
1008 /*
1009 * Local variables:
1010 * mode: C
1011 * c-set-style: "BSD"
1012 * c-basic-offset: 4
1013 * tab-width: 4
1014 * indent-tabs-mode: nil
1015 * End:
1016 */