direct-io.hg

view xen/common/page_alloc.c @ 13079:8752418b58d8

[XEN] Only print progress dots when scrubbing actual memory.
Signed-off-by: Jes Sorensen <jes@sgi.com>
author kfraser@localhost.localdomain
date Mon Dec 18 14:38:38 2006 +0000 (2006-12-18)
parents 7b6aba313aac
children 3e2d3d737624
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <xen/domain_page.h>
35 #include <xen/keyhandler.h>
36 #include <xen/perfc.h>
37 #include <xen/numa.h>
38 #include <xen/nodemask.h>
39 #include <asm/page.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * Bit width of the DMA heap.
50 */
51 unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
52 unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1;
53 static void parse_dma_bits(char *s)
54 {
55 unsigned int v = simple_strtol(s, NULL, 0);
56 if ( v >= (sizeof(long)*8 + PAGE_SHIFT) )
57 {
58 dma_bitsize = sizeof(long)*8 + PAGE_SHIFT;
59 max_dma_mfn = ~0UL;
60 }
61 else
62 {
63 dma_bitsize = v;
64 max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1;
65 }
66 }
67 custom_param("dma_bits", parse_dma_bits);
69 /*
70 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
71 * allocation requests. Ordinary requests will not fall back to the
72 * lowmem emergency pool.
73 */
74 static unsigned long dma_emergency_pool_pages;
75 static void parse_dma_emergency_pool(char *s)
76 {
77 unsigned long long bytes;
78 bytes = parse_size_and_unit(s, NULL);
79 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
80 }
81 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
83 #define round_pgdown(_p) ((_p)&PAGE_MASK)
84 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
86 static DEFINE_SPINLOCK(page_scrub_lock);
87 LIST_HEAD(page_scrub_list);
88 static unsigned long scrub_pages;
90 /*********************
91 * ALLOCATION BITMAP
92 * One bit per page of memory. Bit set => page is allocated.
93 */
95 static unsigned long *alloc_bitmap;
96 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
98 #define allocated_in_map(_pn) \
99 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
100 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
102 /*
103 * Hint regarding bitwise arithmetic in map_{alloc,free}:
104 * -(1<<n) sets all bits >= n.
105 * (1<<n)-1 sets all bits < n.
106 * Variable names in map_{alloc,free}:
107 * *_idx == Index into `alloc_bitmap' array.
108 * *_off == Bit offset within an element of the `alloc_bitmap' array.
109 */
111 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
112 {
113 unsigned long start_off, end_off, curr_idx, end_idx;
115 #ifndef NDEBUG
116 unsigned long i;
117 /* Check that the block isn't already allocated. */
118 for ( i = 0; i < nr_pages; i++ )
119 ASSERT(!allocated_in_map(first_page + i));
120 #endif
122 curr_idx = first_page / PAGES_PER_MAPWORD;
123 start_off = first_page & (PAGES_PER_MAPWORD-1);
124 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
125 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
127 if ( curr_idx == end_idx )
128 {
129 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
130 }
131 else
132 {
133 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
134 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
135 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
136 }
137 }
139 static void map_free(unsigned long first_page, unsigned long nr_pages)
140 {
141 unsigned long start_off, end_off, curr_idx, end_idx;
143 #ifndef NDEBUG
144 unsigned long i;
145 /* Check that the block isn't already freed. */
146 for ( i = 0; i < nr_pages; i++ )
147 ASSERT(allocated_in_map(first_page + i));
148 #endif
150 curr_idx = first_page / PAGES_PER_MAPWORD;
151 start_off = first_page & (PAGES_PER_MAPWORD-1);
152 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
153 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
155 if ( curr_idx == end_idx )
156 {
157 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
158 }
159 else
160 {
161 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
162 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
163 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
164 }
165 }
169 /*************************
170 * BOOT-TIME ALLOCATOR
171 */
173 static unsigned long first_valid_mfn = ~0UL;
175 /* Initialise allocator to handle up to @max_page pages. */
176 paddr_t init_boot_allocator(paddr_t bitmap_start)
177 {
178 unsigned long bitmap_size;
180 bitmap_start = round_pgup(bitmap_start);
182 /*
183 * Allocate space for the allocation bitmap. Include an extra longword
184 * of padding for possible overrun in map_alloc and map_free.
185 */
186 bitmap_size = max_page / 8;
187 bitmap_size += sizeof(unsigned long);
188 bitmap_size = round_pgup(bitmap_size);
189 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
191 /* All allocated by default. */
192 memset(alloc_bitmap, ~0, bitmap_size);
194 return bitmap_start + bitmap_size;
195 }
197 void init_boot_pages(paddr_t ps, paddr_t pe)
198 {
199 unsigned long bad_spfn, bad_epfn, i;
200 char *p;
202 ps = round_pgup(ps);
203 pe = round_pgdown(pe);
204 if ( pe <= ps )
205 return;
207 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
209 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
211 /* Check new pages against the bad-page list. */
212 p = opt_badpage;
213 while ( *p != '\0' )
214 {
215 bad_spfn = simple_strtoul(p, &p, 0);
216 bad_epfn = bad_spfn;
218 if ( *p == '-' )
219 {
220 p++;
221 bad_epfn = simple_strtoul(p, &p, 0);
222 if ( bad_epfn < bad_spfn )
223 bad_epfn = bad_spfn;
224 }
226 if ( *p == ',' )
227 p++;
228 else if ( *p != '\0' )
229 break;
231 if ( bad_epfn == bad_spfn )
232 printk("Marking page %lx as bad\n", bad_spfn);
233 else
234 printk("Marking pages %lx through %lx as bad\n",
235 bad_spfn, bad_epfn);
237 for ( i = bad_spfn; i <= bad_epfn; i++ )
238 if ( (i < max_page) && !allocated_in_map(i) )
239 map_alloc(i, 1);
240 }
241 }
243 unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at)
244 {
245 unsigned long i;
247 for ( i = 0; i < nr_pfns; i++ )
248 if ( allocated_in_map(pfn_at + i) )
249 break;
251 if ( i == nr_pfns )
252 {
253 map_alloc(pfn_at, nr_pfns);
254 return pfn_at;
255 }
257 return 0;
258 }
260 unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
261 {
262 unsigned long pg;
264 pg = first_valid_mfn & ~(pfn_align-1);
265 while ( (pg + nr_pfns) < max_page )
266 {
267 if ( alloc_boot_pages_at(nr_pfns, pg) != 0 )
268 break;
269 pg += pfn_align;
270 }
272 return pg;
273 }
277 /*************************
278 * BINARY BUDDY ALLOCATOR
279 */
281 #define MEMZONE_XEN 0
282 #define MEMZONE_DOM 1
283 #define MEMZONE_DMADOM 2
284 #define NR_ZONES 3
286 #define pfn_dom_zone_type(_pfn) \
287 (((_pfn) <= max_dma_mfn) ? MEMZONE_DMADOM : MEMZONE_DOM)
289 static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
291 static unsigned long avail[NR_ZONES][MAX_NUMNODES];
293 static DEFINE_SPINLOCK(heap_lock);
295 void end_boot_allocator(void)
296 {
297 unsigned long i, j, k;
298 int curr_free, next_free;
300 memset(avail, 0, sizeof(avail));
302 for ( i = 0; i < NR_ZONES; i++ )
303 for ( j = 0; j < MAX_NUMNODES; j++ )
304 for ( k = 0; k <= MAX_ORDER; k++ )
305 INIT_LIST_HEAD(&heap[i][j][k]);
307 /* Pages that are free now go to the domain sub-allocator. */
308 if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
309 map_alloc(first_valid_mfn, 1);
310 for ( i = first_valid_mfn; i < max_page; i++ )
311 {
312 curr_free = next_free;
313 next_free = !allocated_in_map(i+1);
314 if ( next_free )
315 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
316 if ( curr_free )
317 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
318 }
320 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
321 }
323 /*
324 * Hand the specified arbitrary page range to the specified heap zone
325 * checking the node_id of the previous page. If they differ and the
326 * latter is not on a MAX_ORDER boundary, then we reserve the page by
327 * not freeing it to the buddy allocator.
328 */
329 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
330 void init_heap_pages(
331 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
332 {
333 unsigned int nid_curr, nid_prev;
334 unsigned long i;
336 ASSERT(zone < NR_ZONES);
338 if ( likely(page_to_mfn(pg) != 0) )
339 nid_prev = phys_to_nid(page_to_maddr(pg-1));
340 else
341 nid_prev = phys_to_nid(page_to_maddr(pg));
343 for ( i = 0; i < nr_pages; i++ )
344 {
345 nid_curr = phys_to_nid(page_to_maddr(pg+i));
347 /*
348 * free pages of the same node, or if they differ, but are on a
349 * MAX_ORDER alignement boundary (which already get reserved)
350 */
351 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
352 MAX_ORDER_ALIGNED) )
353 free_heap_pages(zone, pg+i, 0);
354 else
355 printk("Reserving non-aligned node boundary @ mfn %lu\n",
356 page_to_mfn(pg+i));
358 nid_prev = nid_curr;
359 }
360 }
362 /* Allocate 2^@order contiguous pages. */
363 struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
364 unsigned int order)
365 {
366 unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
367 unsigned int request = (1UL << order);
368 struct page_info *pg;
370 ASSERT(node >= 0);
371 ASSERT(node < num_nodes);
372 ASSERT(zone < NR_ZONES);
374 if ( unlikely(order > MAX_ORDER) )
375 return NULL;
377 spin_lock(&heap_lock);
379 /* start with requested node, but exhaust all node memory
380 * in requested zone before failing, only calc new node
381 * value if we fail to find memory in target node, this avoids
382 * needless computation on fast-path */
383 for ( i = 0; i < num_nodes; i++ )
384 {
385 /* check if target node can support the allocation */
386 if ( avail[zone][node] >= request )
387 {
388 /* Find smallest order which can satisfy the request. */
389 for ( j = order; j <= MAX_ORDER; j++ )
390 {
391 if ( !list_empty(&heap[zone][node][j]) )
392 goto found;
393 }
394 }
395 /* pick next node, wrapping around if needed */
396 if ( ++node == num_nodes )
397 node = 0;
398 }
400 /* No suitable memory blocks. Fail the request. */
401 spin_unlock(&heap_lock);
402 return NULL;
404 found:
405 pg = list_entry(heap[zone][node][j].next, struct page_info, list);
406 list_del(&pg->list);
408 /* We may have to halve the chunk a number of times. */
409 while ( j != order )
410 {
411 PFN_ORDER(pg) = --j;
412 list_add_tail(&pg->list, &heap[zone][node][j]);
413 pg += 1 << j;
414 }
416 map_alloc(page_to_mfn(pg), request);
417 ASSERT(avail[zone][node] >= request);
418 avail[zone][node] -= request;
420 spin_unlock(&heap_lock);
422 return pg;
423 }
426 /* Free 2^@order set of pages. */
427 void free_heap_pages(
428 unsigned int zone, struct page_info *pg, unsigned int order)
429 {
430 unsigned long mask;
431 int node = phys_to_nid(page_to_maddr(pg));
433 ASSERT(zone < NR_ZONES);
434 ASSERT(order <= MAX_ORDER);
435 ASSERT(node >= 0);
436 ASSERT(node < num_online_nodes());
438 spin_lock(&heap_lock);
440 map_free(page_to_mfn(pg), 1 << order);
441 avail[zone][node] += 1 << order;
443 /* Merge chunks as far as possible. */
444 while ( order < MAX_ORDER )
445 {
446 mask = 1 << order;
448 if ( (page_to_mfn(pg) & mask) )
449 {
450 /* Merge with predecessor block? */
451 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
452 (PFN_ORDER(pg-mask) != order) )
453 break;
454 list_del(&(pg-mask)->list);
455 pg -= mask;
456 }
457 else
458 {
459 /* Merge with successor block? */
460 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
461 (PFN_ORDER(pg+mask) != order) )
462 break;
463 list_del(&(pg+mask)->list);
464 }
466 order++;
468 /* after merging, pg should be in the same node */
469 ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
470 }
472 PFN_ORDER(pg) = order;
473 list_add_tail(&pg->list, &heap[zone][node][order]);
475 spin_unlock(&heap_lock);
476 }
479 /*
480 * Scrub all unallocated pages in all heap zones. This function is more
481 * convoluted than appears necessary because we do not want to continuously
482 * hold the lock or disable interrupts while scrubbing very large memory areas.
483 */
484 void scrub_heap_pages(void)
485 {
486 void *p;
487 unsigned long mfn;
489 printk("Scrubbing Free RAM: ");
491 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
492 {
493 process_pending_timers();
495 /* Quick lock-free check. */
496 if ( allocated_in_map(mfn) )
497 continue;
499 /* Every 100MB, print a progress dot. */
500 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
501 printk(".");
503 spin_lock_irq(&heap_lock);
505 /* Re-check page status with lock held. */
506 if ( !allocated_in_map(mfn) )
507 {
508 if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
509 {
510 p = page_to_virt(mfn_to_page(mfn));
511 memguard_unguard_range(p, PAGE_SIZE);
512 clear_page(p);
513 memguard_guard_range(p, PAGE_SIZE);
514 }
515 else
516 {
517 p = map_domain_page(mfn);
518 clear_page(p);
519 unmap_domain_page(p);
520 }
521 }
523 spin_unlock_irq(&heap_lock);
524 }
526 printk("done.\n");
527 }
531 /*************************
532 * XEN-HEAP SUB-ALLOCATOR
533 */
535 void init_xenheap_pages(paddr_t ps, paddr_t pe)
536 {
537 unsigned long flags;
539 ps = round_pgup(ps);
540 pe = round_pgdown(pe);
541 if ( pe <= ps )
542 return;
544 memguard_guard_range(maddr_to_virt(ps), pe - ps);
546 /*
547 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
548 * prevent merging of power-of-two blocks across the zone boundary.
549 */
550 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
551 pe -= PAGE_SIZE;
553 local_irq_save(flags);
554 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
555 local_irq_restore(flags);
556 }
559 void *alloc_xenheap_pages(unsigned int order)
560 {
561 unsigned long flags;
562 struct page_info *pg;
563 int i;
565 local_irq_save(flags);
566 pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
567 local_irq_restore(flags);
569 if ( unlikely(pg == NULL) )
570 goto no_memory;
572 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
574 for ( i = 0; i < (1 << order); i++ )
575 {
576 pg[i].count_info = 0;
577 pg[i].u.inuse._domain = 0;
578 pg[i].u.inuse.type_info = 0;
579 }
581 return page_to_virt(pg);
583 no_memory:
584 printk("Cannot handle page request order %d!\n", order);
585 return NULL;
586 }
589 void free_xenheap_pages(void *v, unsigned int order)
590 {
591 unsigned long flags;
593 if ( v == NULL )
594 return;
596 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
598 local_irq_save(flags);
599 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
600 local_irq_restore(flags);
601 }
605 /*************************
606 * DOMAIN-HEAP SUB-ALLOCATOR
607 */
609 void init_domheap_pages(paddr_t ps, paddr_t pe)
610 {
611 unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm;
613 ASSERT(!in_irq());
615 s_tot = round_pgup(ps) >> PAGE_SHIFT;
616 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
618 s_dma = min(s_tot, max_dma_mfn + 1);
619 e_dma = min(e_tot, max_dma_mfn + 1);
620 if ( s_dma < e_dma )
621 init_heap_pages(MEMZONE_DMADOM, mfn_to_page(s_dma), e_dma - s_dma);
623 s_nrm = max(s_tot, max_dma_mfn + 1);
624 e_nrm = max(e_tot, max_dma_mfn + 1);
625 if ( s_nrm < e_nrm )
626 init_heap_pages(MEMZONE_DOM, mfn_to_page(s_nrm), e_nrm - s_nrm);
627 }
630 int assign_pages(
631 struct domain *d,
632 struct page_info *pg,
633 unsigned int order,
634 unsigned int memflags)
635 {
636 unsigned long i;
638 spin_lock(&d->page_alloc_lock);
640 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
641 {
642 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
643 d->domain_id);
644 goto fail;
645 }
647 if ( !(memflags & MEMF_no_refcount) )
648 {
649 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
650 {
651 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
652 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
653 goto fail;
654 }
656 if ( unlikely(d->tot_pages == 0) )
657 get_knownalive_domain(d);
659 d->tot_pages += 1 << order;
660 }
662 for ( i = 0; i < (1 << order); i++ )
663 {
664 ASSERT(page_get_owner(&pg[i]) == NULL);
665 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
666 page_set_owner(&pg[i], d);
667 wmb(); /* Domain pointer must be visible before updating refcnt. */
668 pg[i].count_info = PGC_allocated | 1;
669 list_add_tail(&pg[i].list, &d->page_list);
670 }
672 spin_unlock(&d->page_alloc_lock);
673 return 0;
675 fail:
676 spin_unlock(&d->page_alloc_lock);
677 return -1;
678 }
681 struct page_info *__alloc_domheap_pages(
682 struct domain *d, unsigned int cpu, unsigned int order,
683 unsigned int memflags)
684 {
685 struct page_info *pg = NULL;
686 cpumask_t mask;
687 unsigned long i;
689 ASSERT(!in_irq());
691 if ( !(memflags & MEMF_dma) )
692 {
693 pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
694 /* Failure? Then check if we can fall back to the DMA pool. */
695 if ( unlikely(pg == NULL) &&
696 ((order > MAX_ORDER) ||
697 (avail_heap_pages(MEMZONE_DMADOM,-1) <
698 (dma_emergency_pool_pages + (1UL << order)))) )
699 return NULL;
700 }
702 if ( pg == NULL )
703 if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
704 return NULL;
706 mask = pg->u.free.cpumask;
707 tlbflush_filter(mask, pg->tlbflush_timestamp);
709 pg->count_info = 0;
710 pg->u.inuse._domain = 0;
711 pg->u.inuse.type_info = 0;
713 for ( i = 1; i < (1 << order); i++ )
714 {
715 /* Add in any extra CPUs that need flushing because of this page. */
716 cpumask_t extra_cpus_mask;
717 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
718 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
719 cpus_or(mask, mask, extra_cpus_mask);
721 pg[i].count_info = 0;
722 pg[i].u.inuse._domain = 0;
723 pg[i].u.inuse.type_info = 0;
724 page_set_owner(&pg[i], NULL);
725 }
727 if ( unlikely(!cpus_empty(mask)) )
728 {
729 perfc_incrc(need_flush_tlb_flush);
730 flush_tlb_mask(mask);
731 }
733 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
734 {
735 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
736 return NULL;
737 }
739 return pg;
740 }
742 inline struct page_info *alloc_domheap_pages(
743 struct domain *d, unsigned int order, unsigned int flags)
744 {
745 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
746 }
748 void free_domheap_pages(struct page_info *pg, unsigned int order)
749 {
750 int i, drop_dom_ref;
751 struct domain *d = page_get_owner(pg);
753 ASSERT(!in_irq());
755 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
756 {
757 /* NB. May recursively lock from relinquish_memory(). */
758 spin_lock_recursive(&d->page_alloc_lock);
760 for ( i = 0; i < (1 << order); i++ )
761 list_del(&pg[i].list);
763 d->xenheap_pages -= 1 << order;
764 drop_dom_ref = (d->xenheap_pages == 0);
766 spin_unlock_recursive(&d->page_alloc_lock);
767 }
768 else if ( likely(d != NULL) )
769 {
770 /* NB. May recursively lock from relinquish_memory(). */
771 spin_lock_recursive(&d->page_alloc_lock);
773 for ( i = 0; i < (1 << order); i++ )
774 {
775 shadow_drop_references(d, &pg[i]);
776 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
777 pg[i].tlbflush_timestamp = tlbflush_current_time();
778 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
779 list_del(&pg[i].list);
780 }
782 d->tot_pages -= 1 << order;
783 drop_dom_ref = (d->tot_pages == 0);
785 spin_unlock_recursive(&d->page_alloc_lock);
787 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
788 {
789 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
790 }
791 else
792 {
793 /*
794 * Normally we expect a domain to clear pages before freeing them,
795 * if it cares about the secrecy of their contents. However, after
796 * a domain has died we assume responsibility for erasure.
797 */
798 for ( i = 0; i < (1 << order); i++ )
799 {
800 spin_lock(&page_scrub_lock);
801 list_add(&pg[i].list, &page_scrub_list);
802 scrub_pages++;
803 spin_unlock(&page_scrub_lock);
804 }
805 }
806 }
807 else
808 {
809 /* Freeing anonymous domain-heap pages. */
810 for ( i = 0; i < (1 << order); i++ )
811 cpus_clear(pg[i].u.free.cpumask);
812 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
813 drop_dom_ref = 0;
814 }
816 if ( drop_dom_ref )
817 put_domain(d);
818 }
821 unsigned long avail_heap_pages(int zone, int node)
822 {
823 int i,j, num_nodes = num_online_nodes();
824 unsigned long free_pages = 0;
826 for (i=0; i<NR_ZONES; i++)
827 if ( (zone == -1) || (zone == i) )
828 for (j=0; j < num_nodes; j++)
829 if ( (node == -1) || (node == j) )
830 free_pages += avail[i][j];
832 return free_pages;
833 }
835 unsigned long avail_domheap_pages(void)
836 {
837 unsigned long avail_nrm, avail_dma;
839 avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
841 avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
842 if ( avail_dma > dma_emergency_pool_pages )
843 avail_dma -= dma_emergency_pool_pages;
844 else
845 avail_dma = 0;
847 return avail_nrm + avail_dma;
848 }
850 unsigned long avail_nodeheap_pages(int node)
851 {
852 return avail_heap_pages(-1, node);
853 }
855 static void pagealloc_keyhandler(unsigned char key)
856 {
857 printk("Physical memory information:\n");
858 printk(" Xen heap: %lukB free\n"
859 " DMA heap: %lukB free\n"
860 " Dom heap: %lukB free\n",
861 avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10),
862 avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10),
863 avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
864 }
867 static __init int pagealloc_keyhandler_init(void)
868 {
869 register_keyhandler('m', pagealloc_keyhandler, "memory info");
870 return 0;
871 }
872 __initcall(pagealloc_keyhandler_init);
876 /*************************
877 * PAGE SCRUBBING
878 */
880 static void page_scrub_softirq(void)
881 {
882 struct list_head *ent;
883 struct page_info *pg;
884 void *p;
885 int i;
886 s_time_t start = NOW();
888 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
889 do {
890 spin_lock(&page_scrub_lock);
892 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
893 {
894 spin_unlock(&page_scrub_lock);
895 return;
896 }
898 /* Peel up to 16 pages from the list. */
899 for ( i = 0; i < 16; i++ )
900 {
901 if ( ent->next == &page_scrub_list )
902 break;
903 ent = ent->next;
904 }
906 /* Remove peeled pages from the list. */
907 ent->next->prev = &page_scrub_list;
908 page_scrub_list.next = ent->next;
909 scrub_pages -= (i+1);
911 spin_unlock(&page_scrub_lock);
913 /* Working backwards, scrub each page in turn. */
914 while ( ent != &page_scrub_list )
915 {
916 pg = list_entry(ent, struct page_info, list);
917 ent = ent->prev;
918 p = map_domain_page(page_to_mfn(pg));
919 clear_page(p);
920 unmap_domain_page(p);
921 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
922 }
923 } while ( (NOW() - start) < MILLISECS(1) );
924 }
926 unsigned long avail_scrub_pages(void)
927 {
928 return scrub_pages;
929 }
931 static unsigned long count_bucket(struct list_head* l, int order)
932 {
933 unsigned long total_pages = 0;
934 int pages = 1 << order;
935 struct page_info *pg;
937 list_for_each_entry(pg, l, list)
938 total_pages += pages;
940 return total_pages;
941 }
943 static void dump_heap(unsigned char key)
944 {
945 s_time_t now = NOW();
946 int i,j,k;
947 unsigned long total;
949 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
950 (u32)(now>>32), (u32)now);
952 for (i=0; i<NR_ZONES; i++ )
953 for (j=0;j<MAX_NUMNODES;j++)
954 for (k=0;k<=MAX_ORDER;k++)
955 if ( !list_empty(&heap[i][j][k]) )
956 {
957 total = count_bucket(&heap[i][j][k], k);
958 printk("heap[%d][%d][%d]-> %lu pages\n",
959 i, j, k, total);
960 }
961 }
963 static __init int register_heap_trigger(void)
964 {
965 register_keyhandler('H', dump_heap, "dump heap info");
966 return 0;
967 }
968 __initcall(register_heap_trigger);
971 static __init int page_scrub_init(void)
972 {
973 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
974 return 0;
975 }
976 __initcall(page_scrub_init);
978 /*
979 * Local variables:
980 * mode: C
981 * c-set-style: "BSD"
982 * c-basic-offset: 4
983 * tab-width: 4
984 * indent-tabs-mode: nil
985 * End:
986 */