ia64/xen-unstable

view xen/common/page_alloc.c @ 15709:cb3e6fcb7f34

On debug builds, scrub pages with non-zero poison.
Will flush out guests which are relying on zeroed memory.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Aug 03 12:22:33 2007 +0100 (2007-08-03)
parents 3c28bc13a3f8
children 256160ff19b7
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/domain_page.h>
34 #include <xen/keyhandler.h>
35 #include <xen/perfc.h>
36 #include <xen/numa.h>
37 #include <xen/nodemask.h>
38 #include <asm/page.h>
39 #include <asm/flushtlb.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * no-bootscrub -> Free pages are not zeroed during boot.
50 */
51 static int opt_bootscrub __initdata = 1;
52 boolean_param("bootscrub", opt_bootscrub);
54 /*
55 * Bit width of the DMA heap.
56 */
57 static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
58 static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
59 static void __init parse_dma_bits(char *s)
60 {
61 unsigned int v = simple_strtol(s, NULL, 0);
62 if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
63 {
64 dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
65 max_dma_mfn = ~0UL;
66 }
67 else if ( v > PAGE_SHIFT + 1 )
68 {
69 dma_bitsize = v;
70 max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1;
71 }
72 else
73 printk("Invalid dma_bits value of %u ignored.\n", v);
74 }
75 custom_param("dma_bits", parse_dma_bits);
77 /*
78 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
79 * allocation requests. Ordinary requests will not fall back to the
80 * lowmem emergency pool.
81 */
82 static unsigned long dma_emergency_pool_pages;
83 static void __init parse_dma_emergency_pool(char *s)
84 {
85 unsigned long long bytes;
86 bytes = parse_size_and_unit(s, NULL);
87 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
88 }
89 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
91 #define round_pgdown(_p) ((_p)&PAGE_MASK)
92 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
94 #ifndef NDEBUG
95 /* Avoid callers relying on allocations returning zeroed pages. */
96 #define scrub_page(p) memset((p), 0xc2, PAGE_SIZE)
97 #else
98 /* For a production build, clear_page() is the fastest way to scrub. */
99 #define scrub_page(p) clear_page(p)
100 #endif
102 static DEFINE_SPINLOCK(page_scrub_lock);
103 LIST_HEAD(page_scrub_list);
104 static unsigned long scrub_pages;
106 /*********************
107 * ALLOCATION BITMAP
108 * One bit per page of memory. Bit set => page is allocated.
109 */
111 static unsigned long *alloc_bitmap;
112 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
114 #define allocated_in_map(_pn) \
115 ({ unsigned long ___pn = (_pn); \
116 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
117 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
119 /*
120 * Hint regarding bitwise arithmetic in map_{alloc,free}:
121 * -(1<<n) sets all bits >= n.
122 * (1<<n)-1 sets all bits < n.
123 * Variable names in map_{alloc,free}:
124 * *_idx == Index into `alloc_bitmap' array.
125 * *_off == Bit offset within an element of the `alloc_bitmap' array.
126 */
128 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
129 {
130 unsigned long start_off, end_off, curr_idx, end_idx;
132 #ifndef NDEBUG
133 unsigned long i;
134 /* Check that the block isn't already allocated. */
135 for ( i = 0; i < nr_pages; i++ )
136 ASSERT(!allocated_in_map(first_page + i));
137 #endif
139 curr_idx = first_page / PAGES_PER_MAPWORD;
140 start_off = first_page & (PAGES_PER_MAPWORD-1);
141 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
142 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
144 if ( curr_idx == end_idx )
145 {
146 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
147 }
148 else
149 {
150 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
151 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
152 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
153 }
154 }
156 static void map_free(unsigned long first_page, unsigned long nr_pages)
157 {
158 unsigned long start_off, end_off, curr_idx, end_idx;
160 #ifndef NDEBUG
161 unsigned long i;
162 /* Check that the block isn't already freed. */
163 for ( i = 0; i < nr_pages; i++ )
164 ASSERT(allocated_in_map(first_page + i));
165 #endif
167 curr_idx = first_page / PAGES_PER_MAPWORD;
168 start_off = first_page & (PAGES_PER_MAPWORD-1);
169 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
170 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
172 if ( curr_idx == end_idx )
173 {
174 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
175 }
176 else
177 {
178 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
179 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
180 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
181 }
182 }
186 /*************************
187 * BOOT-TIME ALLOCATOR
188 */
190 static unsigned long first_valid_mfn = ~0UL;
192 /* Initialise allocator to handle up to @max_page pages. */
193 paddr_t __init init_boot_allocator(paddr_t bitmap_start)
194 {
195 unsigned long bitmap_size;
197 bitmap_start = round_pgup(bitmap_start);
199 /*
200 * Allocate space for the allocation bitmap. Include an extra longword
201 * of padding for possible overrun in map_alloc and map_free.
202 */
203 bitmap_size = max_page / 8;
204 bitmap_size += sizeof(unsigned long);
205 bitmap_size = round_pgup(bitmap_size);
206 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
208 /* All allocated by default. */
209 memset(alloc_bitmap, ~0, bitmap_size);
211 return bitmap_start + bitmap_size;
212 }
214 void __init init_boot_pages(paddr_t ps, paddr_t pe)
215 {
216 unsigned long bad_spfn, bad_epfn, i;
217 const char *p;
219 ps = round_pgup(ps);
220 pe = round_pgdown(pe);
221 if ( pe <= ps )
222 return;
224 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
226 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
228 /* Check new pages against the bad-page list. */
229 p = opt_badpage;
230 while ( *p != '\0' )
231 {
232 bad_spfn = simple_strtoul(p, &p, 0);
233 bad_epfn = bad_spfn;
235 if ( *p == '-' )
236 {
237 p++;
238 bad_epfn = simple_strtoul(p, &p, 0);
239 if ( bad_epfn < bad_spfn )
240 bad_epfn = bad_spfn;
241 }
243 if ( *p == ',' )
244 p++;
245 else if ( *p != '\0' )
246 break;
248 if ( bad_epfn == bad_spfn )
249 printk("Marking page %lx as bad\n", bad_spfn);
250 else
251 printk("Marking pages %lx through %lx as bad\n",
252 bad_spfn, bad_epfn);
254 for ( i = bad_spfn; i <= bad_epfn; i++ )
255 if ( (i < max_page) && !allocated_in_map(i) )
256 map_alloc(i, 1);
257 }
258 }
260 unsigned long __init alloc_boot_pages(
261 unsigned long nr_pfns, unsigned long pfn_align)
262 {
263 unsigned long pg, i;
265 /* Search backwards to obtain highest available range. */
266 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
267 pg >= first_valid_mfn;
268 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
269 {
270 for ( i = 0; i < nr_pfns; i++ )
271 if ( allocated_in_map(pg+i) )
272 break;
273 if ( i == nr_pfns )
274 {
275 map_alloc(pg, nr_pfns);
276 return pg;
277 }
278 }
280 return 0;
281 }
285 /*************************
286 * BINARY BUDDY ALLOCATOR
287 */
289 #define MEMZONE_XEN 0
290 #ifdef PADDR_BITS
291 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
292 #else
293 #define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT)
294 #endif
296 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
298 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
299 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
300 #define heap(node, zone, order) ((*_heap[node])[zone][order])
302 static unsigned long *avail[MAX_NUMNODES];
304 static DEFINE_SPINLOCK(heap_lock);
306 static void init_node_heap(int node)
307 {
308 /* First node to be discovered has its heap metadata statically alloced. */
309 static heap_by_zone_and_order_t _heap_static;
310 static unsigned long avail_static[NR_ZONES];
311 static unsigned long first_node_initialised;
313 int i, j;
315 if ( !test_and_set_bit(0, &first_node_initialised) )
316 {
317 _heap[node] = &_heap_static;
318 avail[node] = avail_static;
319 }
320 else
321 {
322 _heap[node] = xmalloc(heap_by_zone_and_order_t);
323 avail[node] = xmalloc_array(unsigned long, NR_ZONES);
324 BUG_ON(!_heap[node] || !avail[node]);
325 }
327 memset(avail[node], 0, NR_ZONES * sizeof(long));
329 for ( i = 0; i < NR_ZONES; i++ )
330 for ( j = 0; j <= MAX_ORDER; j++ )
331 INIT_LIST_HEAD(&(*_heap[node])[i][j]);
332 }
334 /* Allocate 2^@order contiguous pages. */
335 static struct page_info *alloc_heap_pages(
336 unsigned int zone_lo, unsigned int zone_hi,
337 unsigned int cpu, unsigned int order)
338 {
339 unsigned int i, j, zone;
340 unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
341 unsigned long request = 1UL << order;
342 cpumask_t extra_cpus_mask, mask;
343 struct page_info *pg;
345 ASSERT(node >= 0);
346 ASSERT(node < num_nodes);
347 ASSERT(zone_lo <= zone_hi);
348 ASSERT(zone_hi < NR_ZONES);
350 if ( unlikely(order > MAX_ORDER) )
351 return NULL;
353 spin_lock(&heap_lock);
355 /*
356 * Start with requested node, but exhaust all node memory in requested
357 * zone before failing, only calc new node value if we fail to find memory
358 * in target node, this avoids needless computation on fast-path.
359 */
360 for ( i = 0; i < num_nodes; i++ )
361 {
362 zone = zone_hi;
363 do {
364 /* Check if target node can support the allocation. */
365 if ( !avail[node] || (avail[node][zone] < request) )
366 continue;
368 /* Find smallest order which can satisfy the request. */
369 for ( j = order; j <= MAX_ORDER; j++ )
370 if ( !list_empty(&heap(node, zone, j)) )
371 goto found;
372 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
374 /* Pick next node, wrapping around if needed. */
375 if ( ++node == num_nodes )
376 node = 0;
377 }
379 /* No suitable memory blocks. Fail the request. */
380 spin_unlock(&heap_lock);
381 return NULL;
383 found:
384 pg = list_entry(heap(node, zone, j).next, struct page_info, list);
385 list_del(&pg->list);
387 /* We may have to halve the chunk a number of times. */
388 while ( j != order )
389 {
390 PFN_ORDER(pg) = --j;
391 list_add_tail(&pg->list, &heap(node, zone, j));
392 pg += 1 << j;
393 }
395 map_alloc(page_to_mfn(pg), request);
396 ASSERT(avail[node][zone] >= request);
397 avail[node][zone] -= request;
399 spin_unlock(&heap_lock);
401 cpus_clear(mask);
403 for ( i = 0; i < (1 << order); i++ )
404 {
405 /* Reference count must continuously be zero for free pages. */
406 BUG_ON(pg[i].count_info != 0);
408 /* Add in any extra CPUs that need flushing because of this page. */
409 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
410 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
411 cpus_or(mask, mask, extra_cpus_mask);
413 /* Initialise fields which have other uses for free pages. */
414 pg[i].u.inuse.type_info = 0;
415 page_set_owner(&pg[i], NULL);
416 }
418 if ( unlikely(!cpus_empty(mask)) )
419 {
420 perfc_incr(need_flush_tlb_flush);
421 flush_tlb_mask(mask);
422 }
424 return pg;
425 }
427 /* Free 2^@order set of pages. */
428 static void free_heap_pages(
429 unsigned int zone, struct page_info *pg, unsigned int order)
430 {
431 unsigned long mask;
432 unsigned int i, node = phys_to_nid(page_to_maddr(pg));
433 struct domain *d;
435 ASSERT(zone < NR_ZONES);
436 ASSERT(order <= MAX_ORDER);
437 ASSERT(node >= 0);
438 ASSERT(node < num_online_nodes());
440 for ( i = 0; i < (1 << order); i++ )
441 {
442 /*
443 * Cannot assume that count_info == 0, as there are some corner cases
444 * where it isn't the case and yet it isn't a bug:
445 * 1. page_get_owner() is NULL
446 * 2. page_get_owner() is a domain that was never accessible by
447 * its domid (e.g., failed to fully construct the domain).
448 * 3. page was never addressable by the guest (e.g., it's an
449 * auto-translate-physmap guest and the page was never included
450 * in its pseudophysical address space).
451 * In all the above cases there can be no guest mappings of this page.
452 */
453 pg[i].count_info = 0;
455 if ( (d = page_get_owner(&pg[i])) != NULL )
456 {
457 pg[i].tlbflush_timestamp = tlbflush_current_time();
458 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
459 }
460 else
461 {
462 cpus_clear(pg[i].u.free.cpumask);
463 }
464 }
466 spin_lock(&heap_lock);
468 map_free(page_to_mfn(pg), 1 << order);
469 avail[node][zone] += 1 << order;
471 /* Merge chunks as far as possible. */
472 while ( order < MAX_ORDER )
473 {
474 mask = 1UL << order;
476 if ( (page_to_mfn(pg) & mask) )
477 {
478 /* Merge with predecessor block? */
479 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
480 (PFN_ORDER(pg-mask) != order) )
481 break;
482 list_del(&(pg-mask)->list);
483 pg -= mask;
484 }
485 else
486 {
487 /* Merge with successor block? */
488 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
489 (PFN_ORDER(pg+mask) != order) )
490 break;
491 list_del(&(pg+mask)->list);
492 }
494 order++;
496 /* After merging, pg should remain in the same node. */
497 ASSERT(phys_to_nid(page_to_maddr(pg)) == node);
498 }
500 PFN_ORDER(pg) = order;
501 list_add_tail(&pg->list, &heap(node, zone, order));
503 spin_unlock(&heap_lock);
504 }
506 /*
507 * Hand the specified arbitrary page range to the specified heap zone
508 * checking the node_id of the previous page. If they differ and the
509 * latter is not on a MAX_ORDER boundary, then we reserve the page by
510 * not freeing it to the buddy allocator.
511 */
512 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
513 void init_heap_pages(
514 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
515 {
516 unsigned int nid_curr, nid_prev;
517 unsigned long i;
519 ASSERT(zone < NR_ZONES);
521 if ( likely(page_to_mfn(pg) != 0) )
522 nid_prev = phys_to_nid(page_to_maddr(pg-1));
523 else
524 nid_prev = phys_to_nid(page_to_maddr(pg));
526 for ( i = 0; i < nr_pages; i++ )
527 {
528 nid_curr = phys_to_nid(page_to_maddr(pg+i));
530 if ( unlikely(!avail[nid_curr]) )
531 init_node_heap(nid_curr);
533 /*
534 * free pages of the same node, or if they differ, but are on a
535 * MAX_ORDER alignement boundary (which already get reserved)
536 */
537 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
538 MAX_ORDER_ALIGNED) )
539 free_heap_pages(zone, pg+i, 0);
540 else
541 printk("Reserving non-aligned node boundary @ mfn %lu\n",
542 page_to_mfn(pg+i));
544 nid_prev = nid_curr;
545 }
546 }
548 static unsigned long avail_heap_pages(
549 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
550 {
551 unsigned int i, zone, num_nodes = num_online_nodes();
552 unsigned long free_pages = 0;
554 if ( zone_hi >= NR_ZONES )
555 zone_hi = NR_ZONES - 1;
557 for ( i = 0; i < num_nodes; i++ )
558 {
559 if ( !avail[i] )
560 continue;
561 for ( zone = zone_lo; zone <= zone_hi; zone++ )
562 if ( (node == -1) || (node == i) )
563 free_pages += avail[i][zone];
564 }
566 return free_pages;
567 }
569 #define avail_for_domheap(mfn) \
570 (!allocated_in_map(mfn) && !is_xen_heap_frame(mfn_to_page(mfn)))
571 void __init end_boot_allocator(void)
572 {
573 unsigned long i;
574 int curr_free, next_free;
576 /* Pages that are free now go to the domain sub-allocator. */
577 if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
578 map_alloc(first_valid_mfn, 1);
579 for ( i = first_valid_mfn; i < max_page; i++ )
580 {
581 curr_free = next_free;
582 next_free = avail_for_domheap(i+1);
583 if ( next_free )
584 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
585 if ( curr_free )
586 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
587 }
589 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
590 }
591 #undef avail_for_domheap
593 /*
594 * Scrub all unallocated pages in all heap zones. This function is more
595 * convoluted than appears necessary because we do not want to continuously
596 * hold the lock while scrubbing very large memory areas.
597 */
598 void __init scrub_heap_pages(void)
599 {
600 void *p;
601 unsigned long mfn;
603 if ( !opt_bootscrub )
604 return;
606 printk("Scrubbing Free RAM: ");
608 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
609 {
610 process_pending_timers();
612 /* Quick lock-free check. */
613 if ( allocated_in_map(mfn) )
614 continue;
616 /* Every 100MB, print a progress dot. */
617 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
618 printk(".");
620 spin_lock(&heap_lock);
622 /* Re-check page status with lock held. */
623 if ( !allocated_in_map(mfn) )
624 {
625 if ( is_xen_heap_frame(mfn_to_page(mfn)) )
626 {
627 p = page_to_virt(mfn_to_page(mfn));
628 memguard_unguard_range(p, PAGE_SIZE);
629 scrub_page(p);
630 memguard_guard_range(p, PAGE_SIZE);
631 }
632 else
633 {
634 p = map_domain_page(mfn);
635 scrub_page(p);
636 unmap_domain_page(p);
637 }
638 }
640 spin_unlock(&heap_lock);
641 }
643 printk("done.\n");
644 }
648 /*************************
649 * XEN-HEAP SUB-ALLOCATOR
650 */
652 void init_xenheap_pages(paddr_t ps, paddr_t pe)
653 {
654 ps = round_pgup(ps);
655 pe = round_pgdown(pe);
656 if ( pe <= ps )
657 return;
659 memguard_guard_range(maddr_to_virt(ps), pe - ps);
661 /*
662 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
663 * prevent merging of power-of-two blocks across the zone boundary.
664 */
665 if ( ps && !is_xen_heap_frame(maddr_to_page(ps)-1) )
666 ps += PAGE_SIZE;
667 if ( !is_xen_heap_frame(maddr_to_page(pe)) )
668 pe -= PAGE_SIZE;
670 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
671 }
674 void *alloc_xenheap_pages(unsigned int order)
675 {
676 struct page_info *pg;
678 ASSERT(!in_irq());
680 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
681 if ( unlikely(pg == NULL) )
682 goto no_memory;
684 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
686 return page_to_virt(pg);
688 no_memory:
689 printk("Cannot handle page request order %d!\n", order);
690 return NULL;
691 }
694 void free_xenheap_pages(void *v, unsigned int order)
695 {
696 ASSERT(!in_irq());
698 if ( v == NULL )
699 return;
701 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
703 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
704 }
708 /*************************
709 * DOMAIN-HEAP SUB-ALLOCATOR
710 */
712 void init_domheap_pages(paddr_t ps, paddr_t pe)
713 {
714 unsigned long s_tot, e_tot;
715 unsigned int zone;
717 ASSERT(!in_irq());
719 s_tot = round_pgup(ps) >> PAGE_SHIFT;
720 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
722 zone = fls(s_tot);
723 BUG_ON(zone <= MEMZONE_XEN + 1);
724 for ( --zone; s_tot < e_tot; ++zone )
725 {
726 unsigned long end = e_tot;
728 BUILD_BUG_ON(NR_ZONES > BITS_PER_LONG);
729 if ( zone < BITS_PER_LONG - 1 && end > 1UL << (zone + 1) )
730 end = 1UL << (zone + 1);
731 init_heap_pages(zone, mfn_to_page(s_tot), end - s_tot);
732 s_tot = end;
733 }
734 }
737 int assign_pages(
738 struct domain *d,
739 struct page_info *pg,
740 unsigned int order,
741 unsigned int memflags)
742 {
743 unsigned long i;
745 spin_lock(&d->page_alloc_lock);
747 if ( unlikely(d->is_dying) )
748 {
749 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
750 d->domain_id);
751 goto fail;
752 }
754 if ( !(memflags & MEMF_no_refcount) )
755 {
756 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
757 {
758 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
759 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
760 goto fail;
761 }
763 if ( unlikely(d->tot_pages == 0) )
764 get_knownalive_domain(d);
766 d->tot_pages += 1 << order;
767 }
769 for ( i = 0; i < (1 << order); i++ )
770 {
771 ASSERT(page_get_owner(&pg[i]) == NULL);
772 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
773 page_set_owner(&pg[i], d);
774 wmb(); /* Domain pointer must be visible before updating refcnt. */
775 pg[i].count_info = PGC_allocated | 1;
776 list_add_tail(&pg[i].list, &d->page_list);
777 }
779 spin_unlock(&d->page_alloc_lock);
780 return 0;
782 fail:
783 spin_unlock(&d->page_alloc_lock);
784 return -1;
785 }
788 struct page_info *__alloc_domheap_pages(
789 struct domain *d, unsigned int cpu, unsigned int order,
790 unsigned int memflags)
791 {
792 struct page_info *pg = NULL;
793 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
795 ASSERT(!in_irq());
797 if ( bits )
798 {
799 bits = domain_clamp_alloc_bitsize(d, bits);
800 if ( bits <= (PAGE_SHIFT + 1) )
801 return NULL;
802 bits -= PAGE_SHIFT + 1;
803 if ( bits < zone_hi )
804 zone_hi = bits;
805 }
807 if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
808 {
809 pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
811 /* Failure? Then check if we can fall back to the DMA pool. */
812 if ( unlikely(pg == NULL) &&
813 ((order > MAX_ORDER) ||
814 (avail_heap_pages(MEMZONE_XEN + 1,
815 dma_bitsize - PAGE_SHIFT - 1,
816 -1) <
817 (dma_emergency_pool_pages + (1UL << order)))) )
818 return NULL;
819 }
821 if ( (pg == NULL) &&
822 ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
823 cpu, order)) == NULL) )
824 return NULL;
826 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
827 {
828 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
829 return NULL;
830 }
832 return pg;
833 }
835 struct page_info *alloc_domheap_pages(
836 struct domain *d, unsigned int order, unsigned int flags)
837 {
838 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
839 }
841 void free_domheap_pages(struct page_info *pg, unsigned int order)
842 {
843 int i, drop_dom_ref;
844 struct domain *d = page_get_owner(pg);
846 ASSERT(!in_irq());
848 if ( unlikely(is_xen_heap_frame(pg)) )
849 {
850 /* NB. May recursively lock from relinquish_memory(). */
851 spin_lock_recursive(&d->page_alloc_lock);
853 for ( i = 0; i < (1 << order); i++ )
854 list_del(&pg[i].list);
856 d->xenheap_pages -= 1 << order;
857 drop_dom_ref = (d->xenheap_pages == 0);
859 spin_unlock_recursive(&d->page_alloc_lock);
860 }
861 else if ( likely(d != NULL) )
862 {
863 /* NB. May recursively lock from relinquish_memory(). */
864 spin_lock_recursive(&d->page_alloc_lock);
866 for ( i = 0; i < (1 << order); i++ )
867 {
868 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
869 list_del(&pg[i].list);
870 }
872 d->tot_pages -= 1 << order;
873 drop_dom_ref = (d->tot_pages == 0);
875 spin_unlock_recursive(&d->page_alloc_lock);
877 if ( likely(!d->is_dying) )
878 {
879 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
880 }
881 else
882 {
883 /*
884 * Normally we expect a domain to clear pages before freeing them,
885 * if it cares about the secrecy of their contents. However, after
886 * a domain has died we assume responsibility for erasure.
887 */
888 for ( i = 0; i < (1 << order); i++ )
889 {
890 page_set_owner(&pg[i], NULL);
891 spin_lock(&page_scrub_lock);
892 list_add(&pg[i].list, &page_scrub_list);
893 scrub_pages++;
894 spin_unlock(&page_scrub_lock);
895 }
896 }
897 }
898 else
899 {
900 /* Freeing anonymous domain-heap pages. */
901 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
902 drop_dom_ref = 0;
903 }
905 if ( drop_dom_ref )
906 put_domain(d);
907 }
909 unsigned long avail_domheap_pages_region(
910 unsigned int node, unsigned int min_width, unsigned int max_width)
911 {
912 int zone_lo, zone_hi;
914 zone_lo = min_width ? (min_width - (PAGE_SHIFT + 1)) : (MEMZONE_XEN + 1);
915 zone_lo = max_t(int, MEMZONE_XEN + 1, zone_lo);
916 zone_lo = min_t(int, NR_ZONES - 1, zone_lo);
918 zone_hi = max_width ? (max_width - (PAGE_SHIFT + 1)) : (NR_ZONES - 1);
919 zone_hi = max_t(int, MEMZONE_XEN + 1, zone_hi);
920 zone_hi = min_t(int, NR_ZONES - 1, zone_hi);
922 return avail_heap_pages(zone_lo, zone_hi, node);
923 }
925 unsigned long avail_domheap_pages(void)
926 {
927 unsigned long avail_nrm, avail_dma;
929 avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
930 NR_ZONES - 1,
931 -1);
933 avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
934 dma_bitsize - PAGE_SHIFT - 1,
935 -1);
937 if ( avail_dma > dma_emergency_pool_pages )
938 avail_dma -= dma_emergency_pool_pages;
939 else
940 avail_dma = 0;
942 return avail_nrm + avail_dma;
943 }
945 static void pagealloc_keyhandler(unsigned char key)
946 {
947 unsigned int zone = MEMZONE_XEN;
948 unsigned long total = 0;
950 printk("Physical memory information:\n");
951 printk(" Xen heap: %lukB free\n",
952 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
954 while ( ++zone < NR_ZONES )
955 {
956 unsigned long n;
958 if ( zone == dma_bitsize - PAGE_SHIFT )
959 {
960 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
961 total = 0;
962 }
964 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
965 {
966 total += n;
967 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
968 }
969 }
971 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
972 }
975 static __init int pagealloc_keyhandler_init(void)
976 {
977 register_keyhandler('m', pagealloc_keyhandler, "memory info");
978 return 0;
979 }
980 __initcall(pagealloc_keyhandler_init);
984 /*************************
985 * PAGE SCRUBBING
986 */
988 static DEFINE_PER_CPU(struct timer, page_scrub_timer);
990 static void page_scrub_softirq(void)
991 {
992 struct list_head *ent;
993 struct page_info *pg;
994 void *p;
995 int i;
996 s_time_t start = NOW();
998 /* Aim to do 1ms of work every 10ms. */
999 do {
1000 spin_lock(&page_scrub_lock);
1002 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
1004 spin_unlock(&page_scrub_lock);
1005 return;
1008 /* Peel up to 16 pages from the list. */
1009 for ( i = 0; i < 16; i++ )
1011 if ( ent->next == &page_scrub_list )
1012 break;
1013 ent = ent->next;
1016 /* Remove peeled pages from the list. */
1017 ent->next->prev = &page_scrub_list;
1018 page_scrub_list.next = ent->next;
1019 scrub_pages -= (i+1);
1021 spin_unlock(&page_scrub_lock);
1023 /* Working backwards, scrub each page in turn. */
1024 while ( ent != &page_scrub_list )
1026 pg = list_entry(ent, struct page_info, list);
1027 ent = ent->prev;
1028 p = map_domain_page(page_to_mfn(pg));
1029 scrub_page(p);
1030 unmap_domain_page(p);
1031 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
1033 } while ( (NOW() - start) < MILLISECS(1) );
1035 set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
1038 static void page_scrub_timer_fn(void *unused)
1040 page_scrub_schedule_work();
1043 unsigned long avail_scrub_pages(void)
1045 return scrub_pages;
1048 static void dump_heap(unsigned char key)
1050 s_time_t now = NOW();
1051 int i, j;
1053 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
1054 (u32)(now>>32), (u32)now);
1056 for ( i = 0; i < MAX_NUMNODES; i++ )
1058 if ( !avail[i] )
1059 continue;
1060 for ( j = 0; j < NR_ZONES; j++ )
1061 printk("heap[node=%d][zone=%d] -> %lu pages\n",
1062 i, j, avail[i][j]);
1066 static __init int register_heap_trigger(void)
1068 register_keyhandler('H', dump_heap, "dump heap info");
1069 return 0;
1071 __initcall(register_heap_trigger);
1074 static __init int page_scrub_init(void)
1076 int cpu;
1077 for_each_cpu ( cpu )
1078 init_timer(&per_cpu(page_scrub_timer, cpu),
1079 page_scrub_timer_fn, NULL, cpu);
1080 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1081 return 0;
1083 __initcall(page_scrub_init);
1085 /*
1086 * Local variables:
1087 * mode: C
1088 * c-set-style: "BSD"
1089 * c-basic-offset: 4
1090 * tab-width: 4
1091 * indent-tabs-mode: nil
1092 * End:
1093 */