ia64/xen-unstable

view xen/common/page_alloc.c @ 14103:ee4850bc895b

xen memory alloctor: remove bit width restrictions

Hide the (default or user specified) DMA width from anything outside
the heap allocator. I/O-capable guests can now request any width for
the memory they want exchanged/added.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Fri Feb 23 17:02:58 2007 +0000 (2007-02-23)
parents 70098102f84d
children 8afe591c272b
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <xen/domain_page.h>
35 #include <xen/keyhandler.h>
36 #include <xen/perfc.h>
37 #include <xen/numa.h>
38 #include <xen/nodemask.h>
39 #include <asm/page.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * Bit width of the DMA heap.
50 */
51 static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
52 static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1;
53 static void parse_dma_bits(char *s)
54 {
55 unsigned int v = simple_strtol(s, NULL, 0);
56 if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
57 {
58 dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
59 max_dma_mfn = ~0UL;
60 }
61 else if ( v > PAGE_SHIFT + 1 )
62 {
63 dma_bitsize = v;
64 max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1;
65 }
66 else
67 printk("Invalid dma_bits value of %u ignored.\n", v);
68 }
69 custom_param("dma_bits", parse_dma_bits);
71 /*
72 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
73 * allocation requests. Ordinary requests will not fall back to the
74 * lowmem emergency pool.
75 */
76 static unsigned long dma_emergency_pool_pages;
77 static void parse_dma_emergency_pool(char *s)
78 {
79 unsigned long long bytes;
80 bytes = parse_size_and_unit(s, NULL);
81 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
82 }
83 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
85 #define round_pgdown(_p) ((_p)&PAGE_MASK)
86 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
88 static DEFINE_SPINLOCK(page_scrub_lock);
89 LIST_HEAD(page_scrub_list);
90 static unsigned long scrub_pages;
92 /*********************
93 * ALLOCATION BITMAP
94 * One bit per page of memory. Bit set => page is allocated.
95 */
97 static unsigned long *alloc_bitmap;
98 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
100 #define allocated_in_map(_pn) \
101 ({ unsigned long ___pn = (_pn); \
102 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
103 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
105 /*
106 * Hint regarding bitwise arithmetic in map_{alloc,free}:
107 * -(1<<n) sets all bits >= n.
108 * (1<<n)-1 sets all bits < n.
109 * Variable names in map_{alloc,free}:
110 * *_idx == Index into `alloc_bitmap' array.
111 * *_off == Bit offset within an element of the `alloc_bitmap' array.
112 */
114 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
115 {
116 unsigned long start_off, end_off, curr_idx, end_idx;
118 #ifndef NDEBUG
119 unsigned long i;
120 /* Check that the block isn't already allocated. */
121 for ( i = 0; i < nr_pages; i++ )
122 ASSERT(!allocated_in_map(first_page + i));
123 #endif
125 curr_idx = first_page / PAGES_PER_MAPWORD;
126 start_off = first_page & (PAGES_PER_MAPWORD-1);
127 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
128 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
130 if ( curr_idx == end_idx )
131 {
132 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
133 }
134 else
135 {
136 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
137 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
138 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
139 }
140 }
142 static void map_free(unsigned long first_page, unsigned long nr_pages)
143 {
144 unsigned long start_off, end_off, curr_idx, end_idx;
146 #ifndef NDEBUG
147 unsigned long i;
148 /* Check that the block isn't already freed. */
149 for ( i = 0; i < nr_pages; i++ )
150 ASSERT(allocated_in_map(first_page + i));
151 #endif
153 curr_idx = first_page / PAGES_PER_MAPWORD;
154 start_off = first_page & (PAGES_PER_MAPWORD-1);
155 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
156 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
158 if ( curr_idx == end_idx )
159 {
160 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
161 }
162 else
163 {
164 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
165 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
166 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
167 }
168 }
172 /*************************
173 * BOOT-TIME ALLOCATOR
174 */
176 static unsigned long first_valid_mfn = ~0UL;
178 /* Initialise allocator to handle up to @max_page pages. */
179 paddr_t init_boot_allocator(paddr_t bitmap_start)
180 {
181 unsigned long bitmap_size;
183 bitmap_start = round_pgup(bitmap_start);
185 /*
186 * Allocate space for the allocation bitmap. Include an extra longword
187 * of padding for possible overrun in map_alloc and map_free.
188 */
189 bitmap_size = max_page / 8;
190 bitmap_size += sizeof(unsigned long);
191 bitmap_size = round_pgup(bitmap_size);
192 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
194 /* All allocated by default. */
195 memset(alloc_bitmap, ~0, bitmap_size);
197 return bitmap_start + bitmap_size;
198 }
200 void init_boot_pages(paddr_t ps, paddr_t pe)
201 {
202 unsigned long bad_spfn, bad_epfn, i;
203 const char *p;
205 ps = round_pgup(ps);
206 pe = round_pgdown(pe);
207 if ( pe <= ps )
208 return;
210 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
212 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
214 /* Check new pages against the bad-page list. */
215 p = opt_badpage;
216 while ( *p != '\0' )
217 {
218 bad_spfn = simple_strtoul(p, &p, 0);
219 bad_epfn = bad_spfn;
221 if ( *p == '-' )
222 {
223 p++;
224 bad_epfn = simple_strtoul(p, &p, 0);
225 if ( bad_epfn < bad_spfn )
226 bad_epfn = bad_spfn;
227 }
229 if ( *p == ',' )
230 p++;
231 else if ( *p != '\0' )
232 break;
234 if ( bad_epfn == bad_spfn )
235 printk("Marking page %lx as bad\n", bad_spfn);
236 else
237 printk("Marking pages %lx through %lx as bad\n",
238 bad_spfn, bad_epfn);
240 for ( i = bad_spfn; i <= bad_epfn; i++ )
241 if ( (i < max_page) && !allocated_in_map(i) )
242 map_alloc(i, 1);
243 }
244 }
246 int reserve_boot_pages(unsigned long first_pfn, unsigned long nr_pfns)
247 {
248 unsigned long i;
250 for ( i = 0; i < nr_pfns; i++ )
251 if ( allocated_in_map(first_pfn + i) )
252 break;
254 if ( i != nr_pfns )
255 return 0;
257 map_alloc(first_pfn, nr_pfns);
258 return 1;
259 }
261 unsigned long alloc_boot_low_pages(
262 unsigned long nr_pfns, unsigned long pfn_align)
263 {
264 unsigned long pg, i;
266 /* Search forwards to obtain lowest available range. */
267 for ( pg = first_valid_mfn & ~(pfn_align - 1);
268 (pg + nr_pfns) <= max_page;
269 pg = (pg + i + pfn_align) & ~(pfn_align - 1) )
270 {
271 for ( i = 0; i < nr_pfns; i++ )
272 if ( allocated_in_map(pg+i) )
273 break;
274 if ( i == nr_pfns )
275 {
276 map_alloc(pg, nr_pfns);
277 return pg;
278 }
279 }
281 return 0;
282 }
284 unsigned long alloc_boot_pages(
285 unsigned long nr_pfns, unsigned long pfn_align)
286 {
287 unsigned long pg, i;
289 /* Search backwards to obtain highest available range. */
290 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
291 pg >= first_valid_mfn;
292 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
293 {
294 for ( i = 0; i < nr_pfns; i++ )
295 if ( allocated_in_map(pg+i) )
296 break;
297 if ( i == nr_pfns )
298 {
299 map_alloc(pg, nr_pfns);
300 return pg;
301 }
302 }
304 return 0;
305 }
309 /*************************
310 * BINARY BUDDY ALLOCATOR
311 */
313 #define MEMZONE_XEN 0
314 #ifdef PADDR_BITS
315 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
316 #else
317 #define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT)
318 #endif
320 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
322 static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
324 static unsigned long avail[NR_ZONES][MAX_NUMNODES];
326 static DEFINE_SPINLOCK(heap_lock);
328 /* Allocate 2^@order contiguous pages. */
329 static struct page_info *alloc_heap_pages(
330 unsigned int zone_lo, unsigned zone_hi,
331 unsigned int cpu, unsigned int order)
332 {
333 unsigned int i, j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
334 unsigned int zone, request = (1UL << order);
335 struct page_info *pg;
337 ASSERT(node >= 0);
338 ASSERT(node < num_nodes);
339 ASSERT(zone_lo <= zone_hi);
340 ASSERT(zone_hi < NR_ZONES);
342 if ( unlikely(order > MAX_ORDER) )
343 return NULL;
345 spin_lock(&heap_lock);
347 /* start with requested node, but exhaust all node memory
348 * in requested zone before failing, only calc new node
349 * value if we fail to find memory in target node, this avoids
350 * needless computation on fast-path */
351 for ( i = 0; i < num_nodes; i++ )
352 {
353 for ( zone = zone_hi; zone >= zone_lo; --zone )
354 {
355 /* check if target node can support the allocation */
356 if ( avail[zone][node] >= request )
357 {
358 /* Find smallest order which can satisfy the request. */
359 for ( j = order; j <= MAX_ORDER; j++ )
360 {
361 if ( !list_empty(&heap[zone][node][j]) )
362 goto found;
363 }
364 }
365 }
366 /* pick next node, wrapping around if needed */
367 if ( ++node == num_nodes )
368 node = 0;
369 }
371 /* No suitable memory blocks. Fail the request. */
372 spin_unlock(&heap_lock);
373 return NULL;
375 found:
376 pg = list_entry(heap[zone][node][j].next, struct page_info, list);
377 list_del(&pg->list);
379 /* We may have to halve the chunk a number of times. */
380 while ( j != order )
381 {
382 PFN_ORDER(pg) = --j;
383 list_add_tail(&pg->list, &heap[zone][node][j]);
384 pg += 1 << j;
385 }
387 map_alloc(page_to_mfn(pg), request);
388 ASSERT(avail[zone][node] >= request);
389 avail[zone][node] -= request;
391 spin_unlock(&heap_lock);
393 return pg;
394 }
396 /* Free 2^@order set of pages. */
397 static void free_heap_pages(
398 unsigned int zone, struct page_info *pg, unsigned int order)
399 {
400 unsigned long mask;
401 unsigned int node = phys_to_nid(page_to_maddr(pg));
403 ASSERT(zone < NR_ZONES);
404 ASSERT(order <= MAX_ORDER);
405 ASSERT(node >= 0);
406 ASSERT(node < num_online_nodes());
408 spin_lock(&heap_lock);
410 map_free(page_to_mfn(pg), 1 << order);
411 avail[zone][node] += 1 << order;
413 /* Merge chunks as far as possible. */
414 while ( order < MAX_ORDER )
415 {
416 mask = 1 << order;
418 if ( (page_to_mfn(pg) & mask) )
419 {
420 /* Merge with predecessor block? */
421 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
422 (PFN_ORDER(pg-mask) != order) )
423 break;
424 list_del(&(pg-mask)->list);
425 pg -= mask;
426 }
427 else
428 {
429 /* Merge with successor block? */
430 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
431 (PFN_ORDER(pg+mask) != order) )
432 break;
433 list_del(&(pg+mask)->list);
434 }
436 order++;
438 /* after merging, pg should be in the same node */
439 ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
440 }
442 PFN_ORDER(pg) = order;
443 list_add_tail(&pg->list, &heap[zone][node][order]);
445 spin_unlock(&heap_lock);
446 }
448 /*
449 * Hand the specified arbitrary page range to the specified heap zone
450 * checking the node_id of the previous page. If they differ and the
451 * latter is not on a MAX_ORDER boundary, then we reserve the page by
452 * not freeing it to the buddy allocator.
453 */
454 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
455 void init_heap_pages(
456 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
457 {
458 unsigned int nid_curr, nid_prev;
459 unsigned long i;
461 ASSERT(zone < NR_ZONES);
463 if ( likely(page_to_mfn(pg) != 0) )
464 nid_prev = phys_to_nid(page_to_maddr(pg-1));
465 else
466 nid_prev = phys_to_nid(page_to_maddr(pg));
468 for ( i = 0; i < nr_pages; i++ )
469 {
470 nid_curr = phys_to_nid(page_to_maddr(pg+i));
472 /*
473 * free pages of the same node, or if they differ, but are on a
474 * MAX_ORDER alignement boundary (which already get reserved)
475 */
476 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
477 MAX_ORDER_ALIGNED) )
478 free_heap_pages(zone, pg+i, 0);
479 else
480 printk("Reserving non-aligned node boundary @ mfn %lu\n",
481 page_to_mfn(pg+i));
483 nid_prev = nid_curr;
484 }
485 }
487 static unsigned long avail_heap_pages(
488 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
489 {
490 unsigned int i, zone, num_nodes = num_online_nodes();
491 unsigned long free_pages = 0;
493 if ( zone_hi >= NR_ZONES )
494 zone_hi = NR_ZONES - 1;
495 for ( zone = zone_lo; zone <= zone_hi; zone++ )
496 for ( i = 0; i < num_nodes; i++ )
497 if ( (node == -1) || (node == i) )
498 free_pages += avail[zone][i];
500 return free_pages;
501 }
503 void end_boot_allocator(void)
504 {
505 unsigned long i, j, k;
506 int curr_free, next_free;
508 memset(avail, 0, sizeof(avail));
510 for ( i = 0; i < NR_ZONES; i++ )
511 for ( j = 0; j < MAX_NUMNODES; j++ )
512 for ( k = 0; k <= MAX_ORDER; k++ )
513 INIT_LIST_HEAD(&heap[i][j][k]);
515 /* Pages that are free now go to the domain sub-allocator. */
516 if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
517 map_alloc(first_valid_mfn, 1);
518 for ( i = first_valid_mfn; i < max_page; i++ )
519 {
520 curr_free = next_free;
521 next_free = !allocated_in_map(i+1);
522 if ( next_free )
523 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
524 if ( curr_free )
525 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
526 }
528 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
529 }
531 /*
532 * Scrub all unallocated pages in all heap zones. This function is more
533 * convoluted than appears necessary because we do not want to continuously
534 * hold the lock or disable interrupts while scrubbing very large memory areas.
535 */
536 void scrub_heap_pages(void)
537 {
538 void *p;
539 unsigned long mfn;
541 printk("Scrubbing Free RAM: ");
543 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
544 {
545 process_pending_timers();
547 /* Quick lock-free check. */
548 if ( allocated_in_map(mfn) )
549 continue;
551 /* Every 100MB, print a progress dot. */
552 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
553 printk(".");
555 spin_lock_irq(&heap_lock);
557 /* Re-check page status with lock held. */
558 if ( !allocated_in_map(mfn) )
559 {
560 if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
561 {
562 p = page_to_virt(mfn_to_page(mfn));
563 memguard_unguard_range(p, PAGE_SIZE);
564 clear_page(p);
565 memguard_guard_range(p, PAGE_SIZE);
566 }
567 else
568 {
569 p = map_domain_page(mfn);
570 clear_page(p);
571 unmap_domain_page(p);
572 }
573 }
575 spin_unlock_irq(&heap_lock);
576 }
578 printk("done.\n");
579 }
583 /*************************
584 * XEN-HEAP SUB-ALLOCATOR
585 */
587 void init_xenheap_pages(paddr_t ps, paddr_t pe)
588 {
589 unsigned long flags;
591 ps = round_pgup(ps);
592 pe = round_pgdown(pe);
593 if ( pe <= ps )
594 return;
596 memguard_guard_range(maddr_to_virt(ps), pe - ps);
598 /*
599 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
600 * prevent merging of power-of-two blocks across the zone boundary.
601 */
602 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
603 pe -= PAGE_SIZE;
605 local_irq_save(flags);
606 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
607 local_irq_restore(flags);
608 }
611 void *alloc_xenheap_pages(unsigned int order)
612 {
613 unsigned long flags;
614 struct page_info *pg;
615 int i;
617 local_irq_save(flags);
618 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
619 local_irq_restore(flags);
621 if ( unlikely(pg == NULL) )
622 goto no_memory;
624 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
626 for ( i = 0; i < (1 << order); i++ )
627 {
628 pg[i].count_info = 0;
629 pg[i].u.inuse._domain = 0;
630 pg[i].u.inuse.type_info = 0;
631 }
633 return page_to_virt(pg);
635 no_memory:
636 printk("Cannot handle page request order %d!\n", order);
637 return NULL;
638 }
641 void free_xenheap_pages(void *v, unsigned int order)
642 {
643 unsigned long flags;
645 if ( v == NULL )
646 return;
648 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
650 local_irq_save(flags);
651 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
652 local_irq_restore(flags);
653 }
657 /*************************
658 * DOMAIN-HEAP SUB-ALLOCATOR
659 */
661 void init_domheap_pages(paddr_t ps, paddr_t pe)
662 {
663 unsigned long s_tot, e_tot;
664 unsigned int zone;
666 ASSERT(!in_irq());
668 s_tot = round_pgup(ps) >> PAGE_SHIFT;
669 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
671 zone = fls(s_tot);
672 BUG_ON(zone <= MEMZONE_XEN + 1);
673 for ( --zone; s_tot < e_tot; ++zone )
674 {
675 unsigned long end = e_tot;
677 BUILD_BUG_ON(NR_ZONES > BITS_PER_LONG);
678 if ( zone < BITS_PER_LONG - 1 && end > 1UL << (zone + 1) )
679 end = 1UL << (zone + 1);
680 init_heap_pages(zone, mfn_to_page(s_tot), end - s_tot);
681 s_tot = end;
682 }
683 }
686 int assign_pages(
687 struct domain *d,
688 struct page_info *pg,
689 unsigned int order,
690 unsigned int memflags)
691 {
692 unsigned long i;
694 spin_lock(&d->page_alloc_lock);
696 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
697 {
698 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
699 d->domain_id);
700 goto fail;
701 }
703 if ( !(memflags & MEMF_no_refcount) )
704 {
705 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
706 {
707 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
708 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
709 goto fail;
710 }
712 if ( unlikely(d->tot_pages == 0) )
713 get_knownalive_domain(d);
715 d->tot_pages += 1 << order;
716 }
718 for ( i = 0; i < (1 << order); i++ )
719 {
720 ASSERT(page_get_owner(&pg[i]) == NULL);
721 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
722 page_set_owner(&pg[i], d);
723 wmb(); /* Domain pointer must be visible before updating refcnt. */
724 pg[i].count_info = PGC_allocated | 1;
725 list_add_tail(&pg[i].list, &d->page_list);
726 }
728 spin_unlock(&d->page_alloc_lock);
729 return 0;
731 fail:
732 spin_unlock(&d->page_alloc_lock);
733 return -1;
734 }
737 struct page_info *__alloc_domheap_pages(
738 struct domain *d, unsigned int cpu, unsigned int order,
739 unsigned int memflags)
740 {
741 struct page_info *pg = NULL;
742 cpumask_t mask;
743 unsigned long i;
744 unsigned int bits = memflags >> _MEMF_bits, zone_hi;
746 ASSERT(!in_irq());
748 if ( bits && bits <= PAGE_SHIFT + 1 )
749 return NULL;
751 zone_hi = bits - PAGE_SHIFT - 1;
752 if ( zone_hi >= NR_ZONES )
753 zone_hi = NR_ZONES - 1;
755 if ( NR_ZONES + PAGE_SHIFT > dma_bitsize &&
756 (!bits || bits > dma_bitsize) )
757 {
758 pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
760 /* Failure? Then check if we can fall back to the DMA pool. */
761 if ( unlikely(pg == NULL) &&
762 ((order > MAX_ORDER) ||
763 (avail_heap_pages(MEMZONE_XEN + 1,
764 dma_bitsize - PAGE_SHIFT - 1,
765 -1) <
766 (dma_emergency_pool_pages + (1UL << order)))) )
767 return NULL;
768 }
770 if ( pg == NULL )
771 if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1,
772 zone_hi,
773 cpu, order)) == NULL )
774 return NULL;
776 mask = pg->u.free.cpumask;
777 tlbflush_filter(mask, pg->tlbflush_timestamp);
779 pg->count_info = 0;
780 pg->u.inuse._domain = 0;
781 pg->u.inuse.type_info = 0;
783 for ( i = 1; i < (1 << order); i++ )
784 {
785 /* Add in any extra CPUs that need flushing because of this page. */
786 cpumask_t extra_cpus_mask;
787 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
788 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
789 cpus_or(mask, mask, extra_cpus_mask);
791 pg[i].count_info = 0;
792 pg[i].u.inuse._domain = 0;
793 pg[i].u.inuse.type_info = 0;
794 page_set_owner(&pg[i], NULL);
795 }
797 if ( unlikely(!cpus_empty(mask)) )
798 {
799 perfc_incrc(need_flush_tlb_flush);
800 flush_tlb_mask(mask);
801 }
803 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
804 {
805 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
806 return NULL;
807 }
809 return pg;
810 }
812 struct page_info *alloc_domheap_pages(
813 struct domain *d, unsigned int order, unsigned int flags)
814 {
815 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
816 }
818 void free_domheap_pages(struct page_info *pg, unsigned int order)
819 {
820 int i, drop_dom_ref;
821 struct domain *d = page_get_owner(pg);
823 ASSERT(!in_irq());
825 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
826 {
827 /* NB. May recursively lock from relinquish_memory(). */
828 spin_lock_recursive(&d->page_alloc_lock);
830 for ( i = 0; i < (1 << order); i++ )
831 list_del(&pg[i].list);
833 d->xenheap_pages -= 1 << order;
834 drop_dom_ref = (d->xenheap_pages == 0);
836 spin_unlock_recursive(&d->page_alloc_lock);
837 }
838 else if ( likely(d != NULL) )
839 {
840 /* NB. May recursively lock from relinquish_memory(). */
841 spin_lock_recursive(&d->page_alloc_lock);
843 for ( i = 0; i < (1 << order); i++ )
844 {
845 shadow_drop_references(d, &pg[i]);
846 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
847 pg[i].tlbflush_timestamp = tlbflush_current_time();
848 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
849 list_del(&pg[i].list);
850 }
852 d->tot_pages -= 1 << order;
853 drop_dom_ref = (d->tot_pages == 0);
855 spin_unlock_recursive(&d->page_alloc_lock);
857 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
858 {
859 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
860 }
861 else
862 {
863 /*
864 * Normally we expect a domain to clear pages before freeing them,
865 * if it cares about the secrecy of their contents. However, after
866 * a domain has died we assume responsibility for erasure.
867 */
868 for ( i = 0; i < (1 << order); i++ )
869 {
870 spin_lock(&page_scrub_lock);
871 list_add(&pg[i].list, &page_scrub_list);
872 scrub_pages++;
873 spin_unlock(&page_scrub_lock);
874 }
875 }
876 }
877 else
878 {
879 /* Freeing anonymous domain-heap pages. */
880 for ( i = 0; i < (1 << order); i++ )
881 cpus_clear(pg[i].u.free.cpumask);
882 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
883 drop_dom_ref = 0;
884 }
886 if ( drop_dom_ref )
887 put_domain(d);
888 }
891 unsigned long avail_domheap_pages(void)
892 {
893 unsigned long avail_nrm, avail_dma;
895 avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
896 NR_ZONES - 1,
897 -1);
899 avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
900 dma_bitsize - PAGE_SHIFT - 1,
901 -1);
903 if ( avail_dma > dma_emergency_pool_pages )
904 avail_dma -= dma_emergency_pool_pages;
905 else
906 avail_dma = 0;
908 return avail_nrm + avail_dma;
909 }
911 unsigned long avail_nodeheap_pages(int node)
912 {
913 return avail_heap_pages(0, NR_ZONES - 1, node);
914 }
916 static void pagealloc_keyhandler(unsigned char key)
917 {
918 unsigned int zone = MEMZONE_XEN;
919 unsigned long total = 0;
921 printk("Physical memory information:\n");
922 printk(" Xen heap: %lukB free\n",
923 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
925 while ( ++zone < NR_ZONES )
926 {
927 unsigned long n;
929 if ( zone == dma_bitsize - PAGE_SHIFT )
930 {
931 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
932 total = 0;
933 }
935 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
936 {
937 total += n;
938 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
939 }
940 }
942 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
943 }
946 static __init int pagealloc_keyhandler_init(void)
947 {
948 register_keyhandler('m', pagealloc_keyhandler, "memory info");
949 return 0;
950 }
951 __initcall(pagealloc_keyhandler_init);
955 /*************************
956 * PAGE SCRUBBING
957 */
959 static void page_scrub_softirq(void)
960 {
961 struct list_head *ent;
962 struct page_info *pg;
963 void *p;
964 int i;
965 s_time_t start = NOW();
967 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
968 do {
969 spin_lock(&page_scrub_lock);
971 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
972 {
973 spin_unlock(&page_scrub_lock);
974 return;
975 }
977 /* Peel up to 16 pages from the list. */
978 for ( i = 0; i < 16; i++ )
979 {
980 if ( ent->next == &page_scrub_list )
981 break;
982 ent = ent->next;
983 }
985 /* Remove peeled pages from the list. */
986 ent->next->prev = &page_scrub_list;
987 page_scrub_list.next = ent->next;
988 scrub_pages -= (i+1);
990 spin_unlock(&page_scrub_lock);
992 /* Working backwards, scrub each page in turn. */
993 while ( ent != &page_scrub_list )
994 {
995 pg = list_entry(ent, struct page_info, list);
996 ent = ent->prev;
997 p = map_domain_page(page_to_mfn(pg));
998 clear_page(p);
999 unmap_domain_page(p);
1000 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
1002 } while ( (NOW() - start) < MILLISECS(1) );
1005 unsigned long avail_scrub_pages(void)
1007 return scrub_pages;
1010 static unsigned long count_bucket(struct list_head* l, int order)
1012 unsigned long total_pages = 0;
1013 int pages = 1 << order;
1014 struct page_info *pg;
1016 list_for_each_entry(pg, l, list)
1017 total_pages += pages;
1019 return total_pages;
1022 static void dump_heap(unsigned char key)
1024 s_time_t now = NOW();
1025 int i,j,k;
1026 unsigned long total;
1028 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
1029 (u32)(now>>32), (u32)now);
1031 for (i=0; i<NR_ZONES; i++ )
1032 for (j=0;j<MAX_NUMNODES;j++)
1033 for (k=0;k<=MAX_ORDER;k++)
1034 if ( !list_empty(&heap[i][j][k]) )
1036 total = count_bucket(&heap[i][j][k], k);
1037 printk("heap[%d][%d][%d]-> %lu pages\n",
1038 i, j, k, total);
1042 static __init int register_heap_trigger(void)
1044 register_keyhandler('H', dump_heap, "dump heap info");
1045 return 0;
1047 __initcall(register_heap_trigger);
1050 static __init int page_scrub_init(void)
1052 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1053 return 0;
1055 __initcall(page_scrub_init);
1057 /*
1058 * Local variables:
1059 * mode: C
1060 * c-set-style: "BSD"
1061 * c-basic-offset: 4
1062 * tab-width: 4
1063 * indent-tabs-mode: nil
1064 * End:
1065 */