ia64/xen-unstable

view xen/common/page_alloc.c @ 19091:fa5887d90110

x86_64: No restriction on Xen heap address width.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jan 26 16:21:21 2009 +0000 (2009-01-26)
parents 108b45539cda
children 696351cde9a4
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/domain_page.h>
34 #include <xen/keyhandler.h>
35 #include <xen/perfc.h>
36 #include <xen/numa.h>
37 #include <xen/nodemask.h>
38 #include <asm/page.h>
39 #include <asm/numa.h>
40 #include <asm/flushtlb.h>
42 /*
43 * Comma-separated list of hexadecimal page numbers containing bad bytes.
44 * e.g. 'badpage=0x3f45,0x8a321'.
45 */
46 static char opt_badpage[100] = "";
47 string_param("badpage", opt_badpage);
49 /*
50 * no-bootscrub -> Free pages are not zeroed during boot.
51 */
52 static int opt_bootscrub __initdata = 1;
53 boolean_param("bootscrub", opt_bootscrub);
55 /*
56 * Bit width of the DMA heap -- used to override NUMA-node-first.
57 * allocation strategy, which can otherwise exhaust low memory.
58 */
59 static unsigned int dma_bitsize;
60 integer_param("dma_bits", dma_bitsize);
62 #define round_pgdown(_p) ((_p)&PAGE_MASK)
63 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
65 #ifndef NDEBUG
66 /* Avoid callers relying on allocations returning zeroed pages. */
67 #define scrub_page(p) memset((p), 0xc2, PAGE_SIZE)
68 #else
69 /* For a production build, clear_page() is the fastest way to scrub. */
70 #define scrub_page(p) clear_page(p)
71 #endif
73 static DEFINE_SPINLOCK(page_scrub_lock);
74 LIST_HEAD(page_scrub_list);
75 static unsigned long scrub_pages;
77 /*********************
78 * ALLOCATION BITMAP
79 * One bit per page of memory. Bit set => page is allocated.
80 */
82 unsigned long *alloc_bitmap;
83 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
85 #define allocated_in_map(_pn) \
86 ({ unsigned long ___pn = (_pn); \
87 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
88 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
90 /*
91 * Hint regarding bitwise arithmetic in map_{alloc,free}:
92 * -(1<<n) sets all bits >= n.
93 * (1<<n)-1 sets all bits < n.
94 * Variable names in map_{alloc,free}:
95 * *_idx == Index into `alloc_bitmap' array.
96 * *_off == Bit offset within an element of the `alloc_bitmap' array.
97 */
99 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
100 {
101 unsigned long start_off, end_off, curr_idx, end_idx;
103 #ifndef NDEBUG
104 unsigned long i;
105 /* Check that the block isn't already allocated. */
106 for ( i = 0; i < nr_pages; i++ )
107 ASSERT(!allocated_in_map(first_page + i));
108 #endif
110 curr_idx = first_page / PAGES_PER_MAPWORD;
111 start_off = first_page & (PAGES_PER_MAPWORD-1);
112 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
113 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
115 if ( curr_idx == end_idx )
116 {
117 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
118 }
119 else
120 {
121 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
122 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
123 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
124 }
125 }
127 static void map_free(unsigned long first_page, unsigned long nr_pages)
128 {
129 unsigned long start_off, end_off, curr_idx, end_idx;
131 #ifndef NDEBUG
132 unsigned long i;
133 /* Check that the block isn't already freed. */
134 for ( i = 0; i < nr_pages; i++ )
135 ASSERT(allocated_in_map(first_page + i));
136 #endif
138 curr_idx = first_page / PAGES_PER_MAPWORD;
139 start_off = first_page & (PAGES_PER_MAPWORD-1);
140 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
141 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
143 if ( curr_idx == end_idx )
144 {
145 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
146 }
147 else
148 {
149 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
150 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
151 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
152 }
153 }
157 /*************************
158 * BOOT-TIME ALLOCATOR
159 */
161 static unsigned long first_valid_mfn = ~0UL;
163 /* Initialise allocator to handle up to @max_page pages. */
164 paddr_t __init init_boot_allocator(paddr_t bitmap_start)
165 {
166 unsigned long bitmap_size;
168 bitmap_start = round_pgup(bitmap_start);
170 /*
171 * Allocate space for the allocation bitmap. Include an extra longword
172 * of padding for possible overrun in map_alloc and map_free.
173 */
174 bitmap_size = max_page / 8;
175 bitmap_size += sizeof(unsigned long);
176 bitmap_size = round_pgup(bitmap_size);
177 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
179 /* All allocated by default. */
180 memset(alloc_bitmap, ~0, bitmap_size);
182 return bitmap_start + bitmap_size;
183 }
185 void __init init_boot_pages(paddr_t ps, paddr_t pe)
186 {
187 unsigned long bad_spfn, bad_epfn, i;
188 const char *p;
190 ps = round_pgup(ps);
191 pe = round_pgdown(pe);
192 if ( pe <= ps )
193 return;
195 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
197 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
199 /* Check new pages against the bad-page list. */
200 p = opt_badpage;
201 while ( *p != '\0' )
202 {
203 bad_spfn = simple_strtoul(p, &p, 0);
204 bad_epfn = bad_spfn;
206 if ( *p == '-' )
207 {
208 p++;
209 bad_epfn = simple_strtoul(p, &p, 0);
210 if ( bad_epfn < bad_spfn )
211 bad_epfn = bad_spfn;
212 }
214 if ( *p == ',' )
215 p++;
216 else if ( *p != '\0' )
217 break;
219 if ( bad_epfn == bad_spfn )
220 printk("Marking page %lx as bad\n", bad_spfn);
221 else
222 printk("Marking pages %lx through %lx as bad\n",
223 bad_spfn, bad_epfn);
225 for ( i = bad_spfn; i <= bad_epfn; i++ )
226 if ( (i < max_page) && !allocated_in_map(i) )
227 map_alloc(i, 1);
228 }
229 }
231 unsigned long __init alloc_boot_pages(
232 unsigned long nr_pfns, unsigned long pfn_align)
233 {
234 unsigned long pg, i;
236 /* Search backwards to obtain highest available range. */
237 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
238 pg >= first_valid_mfn;
239 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
240 {
241 for ( i = 0; i < nr_pfns; i++ )
242 if ( allocated_in_map(pg+i) )
243 break;
244 if ( i == nr_pfns )
245 {
246 map_alloc(pg, nr_pfns);
247 return pg;
248 }
249 }
251 return 0;
252 }
256 /*************************
257 * BINARY BUDDY ALLOCATOR
258 */
260 #define MEMZONE_XEN 0
261 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
263 #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 0 : ((b) - PAGE_SHIFT - 1))
264 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \
265 (fls(page_to_mfn(pg)) - 1))
267 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
268 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
269 #define heap(node, zone, order) ((*_heap[node])[zone][order])
271 static unsigned long *avail[MAX_NUMNODES];
273 static DEFINE_SPINLOCK(heap_lock);
275 static void init_node_heap(int node)
276 {
277 /* First node to be discovered has its heap metadata statically alloced. */
278 static heap_by_zone_and_order_t _heap_static;
279 static unsigned long avail_static[NR_ZONES];
280 static int first_node_initialised;
282 int i, j;
284 if ( !first_node_initialised )
285 {
286 _heap[node] = &_heap_static;
287 avail[node] = avail_static;
288 first_node_initialised = 1;
289 }
290 else
291 {
292 _heap[node] = xmalloc(heap_by_zone_and_order_t);
293 avail[node] = xmalloc_array(unsigned long, NR_ZONES);
294 BUG_ON(!_heap[node] || !avail[node]);
295 }
297 memset(avail[node], 0, NR_ZONES * sizeof(long));
299 for ( i = 0; i < NR_ZONES; i++ )
300 for ( j = 0; j <= MAX_ORDER; j++ )
301 INIT_LIST_HEAD(&(*_heap[node])[i][j]);
302 }
304 /* Allocate 2^@order contiguous pages. */
305 static struct page_info *alloc_heap_pages(
306 unsigned int zone_lo, unsigned int zone_hi,
307 unsigned int node, unsigned int order)
308 {
309 unsigned int i, j, zone;
310 unsigned int num_nodes = num_online_nodes();
311 unsigned long request = 1UL << order;
312 cpumask_t extra_cpus_mask, mask;
313 struct page_info *pg;
315 if ( node == NUMA_NO_NODE )
316 node = cpu_to_node(smp_processor_id());
318 ASSERT(node >= 0);
319 ASSERT(node < num_nodes);
320 ASSERT(zone_lo <= zone_hi);
321 ASSERT(zone_hi < NR_ZONES);
323 if ( unlikely(order > MAX_ORDER) )
324 return NULL;
326 spin_lock(&heap_lock);
328 /*
329 * Start with requested node, but exhaust all node memory in requested
330 * zone before failing, only calc new node value if we fail to find memory
331 * in target node, this avoids needless computation on fast-path.
332 */
333 for ( i = 0; i < num_nodes; i++ )
334 {
335 zone = zone_hi;
336 do {
337 /* Check if target node can support the allocation. */
338 if ( !avail[node] || (avail[node][zone] < request) )
339 continue;
341 /* Find smallest order which can satisfy the request. */
342 for ( j = order; j <= MAX_ORDER; j++ )
343 if ( !list_empty(&heap(node, zone, j)) )
344 goto found;
345 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
347 /* Pick next node, wrapping around if needed. */
348 if ( ++node == num_nodes )
349 node = 0;
350 }
352 /* No suitable memory blocks. Fail the request. */
353 spin_unlock(&heap_lock);
354 return NULL;
356 found:
357 pg = list_entry(heap(node, zone, j).next, struct page_info, list);
358 list_del(&pg->list);
360 /* We may have to halve the chunk a number of times. */
361 while ( j != order )
362 {
363 PFN_ORDER(pg) = --j;
364 list_add_tail(&pg->list, &heap(node, zone, j));
365 pg += 1 << j;
366 }
368 map_alloc(page_to_mfn(pg), request);
369 ASSERT(avail[node][zone] >= request);
370 avail[node][zone] -= request;
372 spin_unlock(&heap_lock);
374 cpus_clear(mask);
376 for ( i = 0; i < (1 << order); i++ )
377 {
378 /* Reference count must continuously be zero for free pages. */
379 BUG_ON(pg[i].count_info != 0);
381 /* Add in any extra CPUs that need flushing because of this page. */
382 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
383 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
384 cpus_or(mask, mask, extra_cpus_mask);
386 /* Initialise fields which have other uses for free pages. */
387 pg[i].u.inuse.type_info = 0;
388 page_set_owner(&pg[i], NULL);
389 }
391 if ( unlikely(!cpus_empty(mask)) )
392 {
393 perfc_incr(need_flush_tlb_flush);
394 flush_tlb_mask(mask);
395 }
397 return pg;
398 }
400 /* Free 2^@order set of pages. */
401 static void free_heap_pages(
402 struct page_info *pg, unsigned int order)
403 {
404 unsigned long mask;
405 unsigned int i, node = phys_to_nid(page_to_maddr(pg));
406 unsigned int zone = page_to_zone(pg);
407 struct domain *d;
409 ASSERT(order <= MAX_ORDER);
410 ASSERT(node >= 0);
411 ASSERT(node < num_online_nodes());
413 for ( i = 0; i < (1 << order); i++ )
414 {
415 /*
416 * Cannot assume that count_info == 0, as there are some corner cases
417 * where it isn't the case and yet it isn't a bug:
418 * 1. page_get_owner() is NULL
419 * 2. page_get_owner() is a domain that was never accessible by
420 * its domid (e.g., failed to fully construct the domain).
421 * 3. page was never addressable by the guest (e.g., it's an
422 * auto-translate-physmap guest and the page was never included
423 * in its pseudophysical address space).
424 * In all the above cases there can be no guest mappings of this page.
425 */
426 pg[i].count_info = 0;
428 if ( (d = page_get_owner(&pg[i])) != NULL )
429 {
430 pg[i].tlbflush_timestamp = tlbflush_current_time();
431 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
432 }
433 else
434 {
435 cpus_clear(pg[i].u.free.cpumask);
436 }
437 }
439 spin_lock(&heap_lock);
441 map_free(page_to_mfn(pg), 1 << order);
442 avail[node][zone] += 1 << order;
444 /* Merge chunks as far as possible. */
445 while ( order < MAX_ORDER )
446 {
447 mask = 1UL << order;
449 if ( (page_to_mfn(pg) & mask) )
450 {
451 /* Merge with predecessor block? */
452 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
453 (PFN_ORDER(pg-mask) != order) )
454 break;
455 list_del(&(pg-mask)->list);
456 pg -= mask;
457 }
458 else
459 {
460 /* Merge with successor block? */
461 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
462 (PFN_ORDER(pg+mask) != order) )
463 break;
464 list_del(&(pg+mask)->list);
465 }
467 order++;
469 /* After merging, pg should remain in the same node. */
470 ASSERT(phys_to_nid(page_to_maddr(pg)) == node);
471 }
473 PFN_ORDER(pg) = order;
474 list_add_tail(&pg->list, &heap(node, zone, order));
476 spin_unlock(&heap_lock);
477 }
479 /*
480 * Hand the specified arbitrary page range to the specified heap zone
481 * checking the node_id of the previous page. If they differ and the
482 * latter is not on a MAX_ORDER boundary, then we reserve the page by
483 * not freeing it to the buddy allocator.
484 */
485 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
486 static void init_heap_pages(
487 struct page_info *pg, unsigned long nr_pages)
488 {
489 unsigned int nid_curr, nid_prev;
490 unsigned long i;
492 nid_prev = phys_to_nid(page_to_maddr(pg-1));
494 for ( i = 0; i < nr_pages; i++ )
495 {
496 nid_curr = phys_to_nid(page_to_maddr(pg+i));
498 if ( unlikely(!avail[nid_curr]) )
499 init_node_heap(nid_curr);
501 /*
502 * free pages of the same node, or if they differ, but are on a
503 * MAX_ORDER alignement boundary (which already get reserved)
504 */
505 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
506 MAX_ORDER_ALIGNED) )
507 free_heap_pages(pg+i, 0);
508 else
509 printk("Reserving non-aligned node boundary @ mfn %lu\n",
510 page_to_mfn(pg+i));
512 nid_prev = nid_curr;
513 }
514 }
516 static unsigned long avail_heap_pages(
517 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
518 {
519 unsigned int i, zone, num_nodes = num_online_nodes();
520 unsigned long free_pages = 0;
522 if ( zone_hi >= NR_ZONES )
523 zone_hi = NR_ZONES - 1;
525 for ( i = 0; i < num_nodes; i++ )
526 {
527 if ( !avail[i] )
528 continue;
529 for ( zone = zone_lo; zone <= zone_hi; zone++ )
530 if ( (node == -1) || (node == i) )
531 free_pages += avail[i][zone];
532 }
534 return free_pages;
535 }
537 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
538 void __init end_boot_allocator(void)
539 {
540 unsigned long i;
541 int curr_free, next_free;
543 /* Pages that are free now go to the domain sub-allocator. */
544 if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
545 map_alloc(first_valid_mfn, 1);
546 for ( i = first_valid_mfn; i < max_page; i++ )
547 {
548 curr_free = next_free;
549 next_free = avail_for_domheap(i+1);
550 if ( next_free )
551 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
552 if ( curr_free )
553 init_heap_pages(mfn_to_page(i), 1);
554 }
556 if ( !dma_bitsize && (num_online_nodes() > 1) )
557 {
558 #ifdef CONFIG_X86
559 dma_bitsize = min_t(unsigned int,
560 fls(NODE_DATA(0)->node_spanned_pages) - 1
561 + PAGE_SHIFT - 2,
562 32);
563 #else
564 dma_bitsize = 32;
565 #endif
566 }
568 printk("Domain heap initialised");
569 if ( dma_bitsize )
570 printk(" DMA width %u bits", dma_bitsize);
571 printk("\n");
572 }
573 #undef avail_for_domheap
575 /*
576 * Scrub all unallocated pages in all heap zones. This function is more
577 * convoluted than appears necessary because we do not want to continuously
578 * hold the lock while scrubbing very large memory areas.
579 */
580 void __init scrub_heap_pages(void)
581 {
582 void *p;
583 unsigned long mfn;
585 if ( !opt_bootscrub )
586 return;
588 printk("Scrubbing Free RAM: ");
590 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
591 {
592 process_pending_timers();
594 /* Quick lock-free check. */
595 if ( allocated_in_map(mfn) )
596 continue;
598 /* Every 100MB, print a progress dot. */
599 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
600 printk(".");
602 spin_lock(&heap_lock);
604 /* Re-check page status with lock held. */
605 if ( !allocated_in_map(mfn) )
606 {
607 if ( is_xen_heap_mfn(mfn) )
608 {
609 p = page_to_virt(mfn_to_page(mfn));
610 memguard_unguard_range(p, PAGE_SIZE);
611 scrub_page(p);
612 memguard_guard_range(p, PAGE_SIZE);
613 }
614 else
615 {
616 p = map_domain_page(mfn);
617 scrub_page(p);
618 unmap_domain_page(p);
619 }
620 }
622 spin_unlock(&heap_lock);
623 }
625 printk("done.\n");
626 }
630 /*************************
631 * XEN-HEAP SUB-ALLOCATOR
632 */
634 #ifndef __x86_64__
636 void init_xenheap_pages(paddr_t ps, paddr_t pe)
637 {
638 ps = round_pgup(ps);
639 pe = round_pgdown(pe);
640 if ( pe <= ps )
641 return;
643 memguard_guard_range(maddr_to_virt(ps), pe - ps);
645 /*
646 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
647 * prevent merging of power-of-two blocks across the zone boundary.
648 */
649 if ( ps && !is_xen_heap_mfn(paddr_to_pfn(ps)-1) )
650 ps += PAGE_SIZE;
651 if ( !is_xen_heap_mfn(paddr_to_pfn(pe)) )
652 pe -= PAGE_SIZE;
654 init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
655 }
658 void *alloc_xenheap_pages(unsigned int order)
659 {
660 struct page_info *pg;
662 ASSERT(!in_irq());
664 pg = alloc_heap_pages(
665 MEMZONE_XEN, MEMZONE_XEN, cpu_to_node(smp_processor_id()), order);
666 if ( unlikely(pg == NULL) )
667 goto no_memory;
669 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
671 return page_to_virt(pg);
673 no_memory:
674 printk("Cannot handle page request order %d!\n", order);
675 return NULL;
676 }
679 void free_xenheap_pages(void *v, unsigned int order)
680 {
681 ASSERT(!in_irq());
683 if ( v == NULL )
684 return;
686 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
688 free_heap_pages(virt_to_page(v), order);
689 }
691 #else
693 void init_xenheap_pages(paddr_t ps, paddr_t pe)
694 {
695 init_domheap_pages(ps, pe);
696 }
698 void *alloc_xenheap_pages(unsigned int order)
699 {
700 struct page_info *pg;
701 unsigned int i;
703 ASSERT(!in_irq());
705 pg = alloc_heap_pages(
706 MEMZONE_XEN+1, NR_ZONES-1, cpu_to_node(smp_processor_id()), order);
707 if ( unlikely(pg == NULL) )
708 goto no_memory;
710 for ( i = 0; i < (1u << order); i++ )
711 pg[i].count_info |= PGC_xen_heap;
713 return page_to_virt(pg);
715 no_memory:
716 printk("Cannot handle page request order %d!\n", order);
717 return NULL;
718 }
720 void free_xenheap_pages(void *v, unsigned int order)
721 {
722 struct page_info *pg;
723 unsigned int i;
725 ASSERT(!in_irq());
727 if ( v == NULL )
728 return;
730 pg = virt_to_page(v);
732 for ( i = 0; i < (1u << order); i++ )
733 pg[i].count_info &= ~PGC_xen_heap;
735 free_heap_pages(pg, order);
736 }
738 #endif
742 /*************************
743 * DOMAIN-HEAP SUB-ALLOCATOR
744 */
746 void init_domheap_pages(paddr_t ps, paddr_t pe)
747 {
748 unsigned long smfn, emfn;
750 ASSERT(!in_irq());
752 smfn = round_pgup(ps) >> PAGE_SHIFT;
753 emfn = round_pgdown(pe) >> PAGE_SHIFT;
755 init_heap_pages(mfn_to_page(smfn), emfn - smfn);
756 }
759 int assign_pages(
760 struct domain *d,
761 struct page_info *pg,
762 unsigned int order,
763 unsigned int memflags)
764 {
765 unsigned long i;
767 spin_lock(&d->page_alloc_lock);
769 if ( unlikely(d->is_dying) )
770 {
771 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
772 d->domain_id);
773 goto fail;
774 }
776 if ( !(memflags & MEMF_no_refcount) )
777 {
778 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
779 {
780 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
781 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
782 goto fail;
783 }
785 if ( unlikely(d->tot_pages == 0) )
786 get_knownalive_domain(d);
788 d->tot_pages += 1 << order;
789 }
791 for ( i = 0; i < (1 << order); i++ )
792 {
793 ASSERT(page_get_owner(&pg[i]) == NULL);
794 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
795 page_set_owner(&pg[i], d);
796 wmb(); /* Domain pointer must be visible before updating refcnt. */
797 pg[i].count_info = PGC_allocated | 1;
798 list_add_tail(&pg[i].list, &d->page_list);
799 }
801 spin_unlock(&d->page_alloc_lock);
802 return 0;
804 fail:
805 spin_unlock(&d->page_alloc_lock);
806 return -1;
807 }
810 struct page_info *alloc_domheap_pages(
811 struct domain *d, unsigned int order, unsigned int memflags)
812 {
813 struct page_info *pg = NULL;
814 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
815 unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1), dma_zone;
817 ASSERT(!in_irq());
819 if ( (node == NUMA_NO_NODE) && (d != NULL) )
820 node = domain_to_node(d);
822 bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
823 if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 )
824 return NULL;
826 if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) )
827 pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order);
829 if ( (pg == NULL) &&
830 ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
831 node, order)) == NULL) )
832 return NULL;
834 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
835 {
836 free_heap_pages(pg, order);
837 return NULL;
838 }
840 return pg;
841 }
843 void free_domheap_pages(struct page_info *pg, unsigned int order)
844 {
845 int i, drop_dom_ref;
846 struct domain *d = page_get_owner(pg);
848 ASSERT(!in_irq());
850 if ( unlikely(is_xen_heap_page(pg)) )
851 {
852 /* NB. May recursively lock from relinquish_memory(). */
853 spin_lock_recursive(&d->page_alloc_lock);
855 for ( i = 0; i < (1 << order); i++ )
856 list_del(&pg[i].list);
858 d->xenheap_pages -= 1 << order;
859 drop_dom_ref = (d->xenheap_pages == 0);
861 spin_unlock_recursive(&d->page_alloc_lock);
862 }
863 else if ( likely(d != NULL) )
864 {
865 /* NB. May recursively lock from relinquish_memory(). */
866 spin_lock_recursive(&d->page_alloc_lock);
868 for ( i = 0; i < (1 << order); i++ )
869 {
870 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
871 list_del(&pg[i].list);
872 }
874 d->tot_pages -= 1 << order;
875 drop_dom_ref = (d->tot_pages == 0);
877 spin_unlock_recursive(&d->page_alloc_lock);
879 if ( likely(!d->is_dying) )
880 {
881 free_heap_pages(pg, order);
882 }
883 else
884 {
885 /*
886 * Normally we expect a domain to clear pages before freeing them,
887 * if it cares about the secrecy of their contents. However, after
888 * a domain has died we assume responsibility for erasure.
889 */
890 for ( i = 0; i < (1 << order); i++ )
891 {
892 page_set_owner(&pg[i], NULL);
893 spin_lock(&page_scrub_lock);
894 list_add(&pg[i].list, &page_scrub_list);
895 scrub_pages++;
896 spin_unlock(&page_scrub_lock);
897 }
898 }
899 }
900 else
901 {
902 /* Freeing anonymous domain-heap pages. */
903 free_heap_pages(pg, order);
904 drop_dom_ref = 0;
905 }
907 if ( drop_dom_ref )
908 put_domain(d);
909 }
911 unsigned long avail_domheap_pages_region(
912 unsigned int node, unsigned int min_width, unsigned int max_width)
913 {
914 int zone_lo, zone_hi;
916 zone_lo = min_width ? bits_to_zone(min_width) : (MEMZONE_XEN + 1);
917 zone_lo = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_lo));
919 zone_hi = max_width ? bits_to_zone(max_width) : (NR_ZONES - 1);
920 zone_hi = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_hi));
922 return avail_heap_pages(zone_lo, zone_hi, node);
923 }
925 unsigned long avail_domheap_pages(void)
926 {
927 return avail_heap_pages(MEMZONE_XEN + 1,
928 NR_ZONES - 1,
929 -1);
930 }
932 static void pagealloc_keyhandler(unsigned char key)
933 {
934 unsigned int zone = MEMZONE_XEN;
935 unsigned long n, total = 0;
937 printk("Physical memory information:\n");
938 printk(" Xen heap: %lukB free\n",
939 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
941 while ( ++zone < NR_ZONES )
942 {
943 if ( (zone + PAGE_SHIFT) == dma_bitsize )
944 {
945 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
946 total = 0;
947 }
949 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
950 {
951 total += n;
952 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
953 }
954 }
956 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
957 }
960 static __init int pagealloc_keyhandler_init(void)
961 {
962 register_keyhandler('m', pagealloc_keyhandler, "memory info");
963 return 0;
964 }
965 __initcall(pagealloc_keyhandler_init);
969 /*************************
970 * PAGE SCRUBBING
971 */
973 static DEFINE_PER_CPU(struct timer, page_scrub_timer);
975 static void page_scrub_softirq(void)
976 {
977 struct list_head *ent;
978 struct page_info *pg;
979 void *p;
980 int i;
981 s_time_t start = NOW();
982 static spinlock_t serialise_lock = SPIN_LOCK_UNLOCKED;
984 /* free_heap_pages() does not parallelise well. Serialise this function. */
985 if ( !spin_trylock(&serialise_lock) )
986 {
987 set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1));
988 return;
989 }
991 /* Aim to do 1ms of work every 10ms. */
992 do {
993 spin_lock(&page_scrub_lock);
995 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
996 {
997 spin_unlock(&page_scrub_lock);
998 goto out;
999 }
1001 /* Peel up to 16 pages from the list. */
1002 for ( i = 0; i < 16; i++ )
1004 if ( ent->next == &page_scrub_list )
1005 break;
1006 ent = ent->next;
1009 /* Remove peeled pages from the list. */
1010 ent->next->prev = &page_scrub_list;
1011 page_scrub_list.next = ent->next;
1012 scrub_pages -= (i+1);
1014 spin_unlock(&page_scrub_lock);
1016 /* Working backwards, scrub each page in turn. */
1017 while ( ent != &page_scrub_list )
1019 pg = list_entry(ent, struct page_info, list);
1020 ent = ent->prev;
1021 p = map_domain_page(page_to_mfn(pg));
1022 scrub_page(p);
1023 unmap_domain_page(p);
1024 free_heap_pages(pg, 0);
1026 } while ( (NOW() - start) < MILLISECS(1) );
1028 set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
1030 out:
1031 spin_unlock(&serialise_lock);
1034 static void page_scrub_timer_fn(void *unused)
1036 page_scrub_schedule_work();
1039 unsigned long avail_scrub_pages(void)
1041 return scrub_pages;
1044 static void dump_heap(unsigned char key)
1046 s_time_t now = NOW();
1047 int i, j;
1049 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
1050 (u32)(now>>32), (u32)now);
1052 for ( i = 0; i < MAX_NUMNODES; i++ )
1054 if ( !avail[i] )
1055 continue;
1056 for ( j = 0; j < NR_ZONES; j++ )
1057 printk("heap[node=%d][zone=%d] -> %lu pages\n",
1058 i, j, avail[i][j]);
1062 static __init int register_heap_trigger(void)
1064 register_keyhandler('H', dump_heap, "dump heap info");
1065 return 0;
1067 __initcall(register_heap_trigger);
1070 static __init int page_scrub_init(void)
1072 int cpu;
1073 for_each_cpu ( cpu )
1074 init_timer(&per_cpu(page_scrub_timer, cpu),
1075 page_scrub_timer_fn, NULL, cpu);
1076 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1077 return 0;
1079 __initcall(page_scrub_init);
1081 /*
1082 * Local variables:
1083 * mode: C
1084 * c-set-style: "BSD"
1085 * c-basic-offset: 4
1086 * tab-width: 4
1087 * indent-tabs-mode: nil
1088 * End:
1089 */