ia64/xen-unstable

view xen/common/page_alloc.c @ 9065:e77ea156075c

Add 'm' debug key to print free-memory info.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Feb 28 11:07:18 2006 +0100 (2006-02-28)
parents eaeb26494a39
children 1e5788066d1f
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/init.h>
25 #include <xen/types.h>
26 #include <xen/lib.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <xen/domain_page.h>
35 #include <xen/keyhandler.h>
36 #include <asm/page.h>
38 /*
39 * Comma-separated list of hexadecimal page numbers containing bad bytes.
40 * e.g. 'badpage=0x3f45,0x8a321'.
41 */
42 static char opt_badpage[100] = "";
43 string_param("badpage", opt_badpage);
45 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
48 static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
49 LIST_HEAD(page_scrub_list);
51 /*********************
52 * ALLOCATION BITMAP
53 * One bit per page of memory. Bit set => page is allocated.
54 */
56 static unsigned long *alloc_bitmap;
57 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
59 #define allocated_in_map(_pn) \
60 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
61 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
63 /*
64 * Hint regarding bitwise arithmetic in map_{alloc,free}:
65 * -(1<<n) sets all bits >= n.
66 * (1<<n)-1 sets all bits < n.
67 * Variable names in map_{alloc,free}:
68 * *_idx == Index into `alloc_bitmap' array.
69 * *_off == Bit offset within an element of the `alloc_bitmap' array.
70 */
72 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
73 {
74 unsigned long start_off, end_off, curr_idx, end_idx;
76 #ifndef NDEBUG
77 unsigned long i;
78 /* Check that the block isn't already allocated. */
79 for ( i = 0; i < nr_pages; i++ )
80 ASSERT(!allocated_in_map(first_page + i));
81 #endif
83 curr_idx = first_page / PAGES_PER_MAPWORD;
84 start_off = first_page & (PAGES_PER_MAPWORD-1);
85 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
86 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
88 if ( curr_idx == end_idx )
89 {
90 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
91 }
92 else
93 {
94 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
95 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
96 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
97 }
98 }
101 static void map_free(unsigned long first_page, unsigned long nr_pages)
102 {
103 unsigned long start_off, end_off, curr_idx, end_idx;
105 #ifndef NDEBUG
106 unsigned long i;
107 /* Check that the block isn't already freed. */
108 for ( i = 0; i < nr_pages; i++ )
109 ASSERT(allocated_in_map(first_page + i));
110 #endif
112 curr_idx = first_page / PAGES_PER_MAPWORD;
113 start_off = first_page & (PAGES_PER_MAPWORD-1);
114 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
115 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
117 if ( curr_idx == end_idx )
118 {
119 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
120 }
121 else
122 {
123 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
124 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
125 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
126 }
127 }
131 /*************************
132 * BOOT-TIME ALLOCATOR
133 */
135 /* Initialise allocator to handle up to @max_page pages. */
136 paddr_t init_boot_allocator(paddr_t bitmap_start)
137 {
138 unsigned long bitmap_size;
140 bitmap_start = round_pgup(bitmap_start);
142 /*
143 * Allocate space for the allocation bitmap. Include an extra longword
144 * of padding for possible overrun in map_alloc and map_free.
145 */
146 bitmap_size = max_page / 8;
147 bitmap_size += sizeof(unsigned long);
148 bitmap_size = round_pgup(bitmap_size);
149 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
151 /* All allocated by default. */
152 memset(alloc_bitmap, ~0, bitmap_size);
154 return bitmap_start + bitmap_size;
155 }
157 void init_boot_pages(paddr_t ps, paddr_t pe)
158 {
159 unsigned long bad_pfn;
160 char *p;
162 ps = round_pgup(ps);
163 pe = round_pgdown(pe);
164 if ( pe <= ps )
165 return;
167 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
169 /* Check new pages against the bad-page list. */
170 p = opt_badpage;
171 while ( *p != '\0' )
172 {
173 bad_pfn = simple_strtoul(p, &p, 0);
175 if ( *p == ',' )
176 p++;
177 else if ( *p != '\0' )
178 break;
180 if ( (bad_pfn < max_page) && !allocated_in_map(bad_pfn) )
181 {
182 printk("Marking page %lx as bad\n", bad_pfn);
183 map_alloc(bad_pfn, 1);
184 }
185 }
186 }
188 unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
189 {
190 unsigned long pg, i;
192 for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
193 {
194 for ( i = 0; i < nr_pfns; i++ )
195 if ( allocated_in_map(pg + i) )
196 break;
198 if ( i == nr_pfns )
199 {
200 map_alloc(pg, nr_pfns);
201 return pg;
202 }
203 }
205 return 0;
206 }
210 /*************************
211 * BINARY BUDDY ALLOCATOR
212 */
214 #define MEMZONE_XEN 0
215 #define MEMZONE_DOM 1
216 #define MEMZONE_DMADOM 2
217 #define NR_ZONES 3
219 #define pfn_dom_zone_type(_pfn) \
220 (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
222 /* Up to 2^20 pages can be allocated at once. */
223 #define MAX_ORDER 20
224 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
226 static unsigned long avail[NR_ZONES];
228 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
230 void end_boot_allocator(void)
231 {
232 unsigned long i, j;
233 int curr_free = 0, next_free = 0;
235 memset(avail, 0, sizeof(avail));
237 for ( i = 0; i < NR_ZONES; i++ )
238 for ( j = 0; j <= MAX_ORDER; j++ )
239 INIT_LIST_HEAD(&heap[i][j]);
241 /* Pages that are free now go to the domain sub-allocator. */
242 for ( i = 0; i < max_page; i++ )
243 {
244 curr_free = next_free;
245 next_free = !allocated_in_map(i+1);
246 if ( next_free )
247 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
248 if ( curr_free )
249 free_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 0);
250 }
251 }
253 /* Hand the specified arbitrary page range to the specified heap zone. */
254 void init_heap_pages(
255 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
256 {
257 unsigned long i;
259 ASSERT(zone < NR_ZONES);
261 for ( i = 0; i < nr_pages; i++ )
262 free_heap_pages(zone, pg+i, 0);
263 }
266 /* Allocate 2^@order contiguous pages. */
267 struct page_info *alloc_heap_pages(unsigned int zone, unsigned int order)
268 {
269 int i;
270 struct page_info *pg;
272 ASSERT(zone < NR_ZONES);
274 if ( unlikely(order > MAX_ORDER) )
275 return NULL;
277 spin_lock(&heap_lock);
279 /* Find smallest order which can satisfy the request. */
280 for ( i = order; i <= MAX_ORDER; i++ )
281 if ( !list_empty(&heap[zone][i]) )
282 goto found;
284 /* No suitable memory blocks. Fail the request. */
285 spin_unlock(&heap_lock);
286 return NULL;
288 found:
289 pg = list_entry(heap[zone][i].next, struct page_info, list);
290 list_del(&pg->list);
292 /* We may have to halve the chunk a number of times. */
293 while ( i != order )
294 {
295 PFN_ORDER(pg) = --i;
296 list_add_tail(&pg->list, &heap[zone][i]);
297 pg += 1 << i;
298 }
300 map_alloc(page_to_mfn(pg), 1 << order);
301 avail[zone] -= 1 << order;
303 spin_unlock(&heap_lock);
305 return pg;
306 }
309 /* Free 2^@order set of pages. */
310 void free_heap_pages(
311 unsigned int zone, struct page_info *pg, unsigned int order)
312 {
313 unsigned long mask;
315 ASSERT(zone < NR_ZONES);
316 ASSERT(order <= MAX_ORDER);
318 spin_lock(&heap_lock);
320 map_free(page_to_mfn(pg), 1 << order);
321 avail[zone] += 1 << order;
323 /* Merge chunks as far as possible. */
324 while ( order < MAX_ORDER )
325 {
326 mask = 1 << order;
328 if ( (page_to_mfn(pg) & mask) )
329 {
330 /* Merge with predecessor block? */
331 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
332 (PFN_ORDER(pg-mask) != order) )
333 break;
334 list_del(&(pg-mask)->list);
335 pg -= mask;
336 }
337 else
338 {
339 /* Merge with successor block? */
340 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
341 (PFN_ORDER(pg+mask) != order) )
342 break;
343 list_del(&(pg+mask)->list);
344 }
346 order++;
347 }
349 PFN_ORDER(pg) = order;
350 list_add_tail(&pg->list, &heap[zone][order]);
352 spin_unlock(&heap_lock);
353 }
356 /*
357 * Scrub all unallocated pages in all heap zones. This function is more
358 * convoluted than appears necessary because we do not want to continuously
359 * hold the lock or disable interrupts while scrubbing very large memory areas.
360 */
361 void scrub_heap_pages(void)
362 {
363 void *p;
364 unsigned long pfn;
365 int cpu = smp_processor_id();
367 printk("Scrubbing Free RAM: ");
369 for ( pfn = 0; pfn < max_page; pfn++ )
370 {
371 /* Every 100MB, print a progress dot. */
372 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
373 printk(".");
375 if ( unlikely(softirq_pending(cpu)) )
376 do_softirq();
378 /* Quick lock-free check. */
379 if ( allocated_in_map(pfn) )
380 continue;
382 spin_lock_irq(&heap_lock);
384 /* Re-check page status with lock held. */
385 if ( !allocated_in_map(pfn) )
386 {
387 if ( IS_XEN_HEAP_FRAME(mfn_to_page(pfn)) )
388 {
389 p = page_to_virt(mfn_to_page(pfn));
390 memguard_unguard_range(p, PAGE_SIZE);
391 clear_page(p);
392 memguard_guard_range(p, PAGE_SIZE);
393 }
394 else
395 {
396 p = map_domain_page(pfn);
397 clear_page(p);
398 unmap_domain_page(p);
399 }
400 }
402 spin_unlock_irq(&heap_lock);
403 }
405 printk("done.\n");
406 }
410 /*************************
411 * XEN-HEAP SUB-ALLOCATOR
412 */
414 void init_xenheap_pages(paddr_t ps, paddr_t pe)
415 {
416 unsigned long flags;
418 ps = round_pgup(ps);
419 pe = round_pgdown(pe);
420 if ( pe <= ps )
421 return;
423 memguard_guard_range(maddr_to_virt(ps), pe - ps);
425 /*
426 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
427 * prevent merging of power-of-two blocks across the zone boundary.
428 */
429 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
430 pe -= PAGE_SIZE;
432 local_irq_save(flags);
433 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
434 local_irq_restore(flags);
435 }
438 void *alloc_xenheap_pages(unsigned int order)
439 {
440 unsigned long flags;
441 struct page_info *pg;
442 int i;
444 local_irq_save(flags);
445 pg = alloc_heap_pages(MEMZONE_XEN, order);
446 local_irq_restore(flags);
448 if ( unlikely(pg == NULL) )
449 goto no_memory;
451 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
453 for ( i = 0; i < (1 << order); i++ )
454 {
455 pg[i].count_info = 0;
456 pg[i].u.inuse._domain = 0;
457 pg[i].u.inuse.type_info = 0;
458 }
460 return page_to_virt(pg);
462 no_memory:
463 printk("Cannot handle page request order %d!\n", order);
464 return NULL;
465 }
468 void free_xenheap_pages(void *v, unsigned int order)
469 {
470 unsigned long flags;
472 if ( v == NULL )
473 return;
475 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
477 local_irq_save(flags);
478 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
479 local_irq_restore(flags);
480 }
484 /*************************
485 * DOMAIN-HEAP SUB-ALLOCATOR
486 */
488 void init_domheap_pages(paddr_t ps, paddr_t pe)
489 {
490 unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm;
492 ASSERT(!in_irq());
494 s_tot = round_pgup(ps) >> PAGE_SHIFT;
495 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
497 s_dma = min(s_tot, MAX_DMADOM_PFN + 1);
498 e_dma = min(e_tot, MAX_DMADOM_PFN + 1);
499 if ( s_dma < e_dma )
500 init_heap_pages(MEMZONE_DMADOM, mfn_to_page(s_dma), e_dma - s_dma);
502 s_nrm = max(s_tot, MAX_DMADOM_PFN + 1);
503 e_nrm = max(e_tot, MAX_DMADOM_PFN + 1);
504 if ( s_nrm < e_nrm )
505 init_heap_pages(MEMZONE_DOM, mfn_to_page(s_nrm), e_nrm - s_nrm);
506 }
509 struct page_info *alloc_domheap_pages(
510 struct domain *d, unsigned int order, unsigned int flags)
511 {
512 struct page_info *pg = NULL;
513 cpumask_t mask;
514 int i;
516 ASSERT(!in_irq());
518 if ( !(flags & ALLOC_DOM_DMA) )
519 pg = alloc_heap_pages(MEMZONE_DOM, order);
521 if ( pg == NULL )
522 if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL )
523 return NULL;
525 mask = pg->u.free.cpumask;
526 tlbflush_filter(mask, pg->tlbflush_timestamp);
528 pg->count_info = 0;
529 pg->u.inuse._domain = 0;
530 pg->u.inuse.type_info = 0;
532 for ( i = 1; i < (1 << order); i++ )
533 {
534 /* Add in any extra CPUs that need flushing because of this page. */
535 cpumask_t extra_cpus_mask;
536 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
537 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
538 cpus_or(mask, mask, extra_cpus_mask);
540 pg[i].count_info = 0;
541 pg[i].u.inuse._domain = 0;
542 pg[i].u.inuse.type_info = 0;
543 page_set_owner(&pg[i], NULL);
544 }
546 if ( unlikely(!cpus_empty(mask)) )
547 {
548 perfc_incrc(need_flush_tlb_flush);
549 flush_tlb_mask(mask);
550 }
552 if ( d == NULL )
553 return pg;
555 spin_lock(&d->page_alloc_lock);
557 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ||
558 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
559 {
560 DPRINTK("Over-allocation for domain %u: %u > %u\n",
561 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
562 DPRINTK("...or the domain is dying (%d)\n",
563 !!test_bit(_DOMF_dying, &d->domain_flags));
564 spin_unlock(&d->page_alloc_lock);
565 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
566 return NULL;
567 }
569 if ( unlikely(d->tot_pages == 0) )
570 get_knownalive_domain(d);
572 d->tot_pages += 1 << order;
574 for ( i = 0; i < (1 << order); i++ )
575 {
576 page_set_owner(&pg[i], d);
577 wmb(); /* Domain pointer must be visible before updating refcnt. */
578 pg[i].count_info |= PGC_allocated | 1;
579 list_add_tail(&pg[i].list, &d->page_list);
580 }
582 spin_unlock(&d->page_alloc_lock);
584 return pg;
585 }
588 void free_domheap_pages(struct page_info *pg, unsigned int order)
589 {
590 int i, drop_dom_ref;
591 struct domain *d = page_get_owner(pg);
593 ASSERT(!in_irq());
595 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
596 {
597 /* NB. May recursively lock from relinquish_memory(). */
598 spin_lock_recursive(&d->page_alloc_lock);
600 for ( i = 0; i < (1 << order); i++ )
601 list_del(&pg[i].list);
603 d->xenheap_pages -= 1 << order;
604 drop_dom_ref = (d->xenheap_pages == 0);
606 spin_unlock_recursive(&d->page_alloc_lock);
607 }
608 else if ( likely(d != NULL) )
609 {
610 /* NB. May recursively lock from relinquish_memory(). */
611 spin_lock_recursive(&d->page_alloc_lock);
613 for ( i = 0; i < (1 << order); i++ )
614 {
615 shadow_drop_references(d, &pg[i]);
616 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
617 pg[i].tlbflush_timestamp = tlbflush_current_time();
618 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
619 list_del(&pg[i].list);
620 }
622 d->tot_pages -= 1 << order;
623 drop_dom_ref = (d->tot_pages == 0);
625 spin_unlock_recursive(&d->page_alloc_lock);
627 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
628 {
629 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
630 }
631 else
632 {
633 /*
634 * Normally we expect a domain to clear pages before freeing them,
635 * if it cares about the secrecy of their contents. However, after
636 * a domain has died we assume responsibility for erasure.
637 */
638 for ( i = 0; i < (1 << order); i++ )
639 {
640 spin_lock(&page_scrub_lock);
641 list_add(&pg[i].list, &page_scrub_list);
642 spin_unlock(&page_scrub_lock);
643 }
644 }
645 }
646 else
647 {
648 /* Freeing anonymous domain-heap pages. */
649 for ( i = 0; i < (1 << order); i++ )
650 pg[i].u.free.cpumask = CPU_MASK_NONE;
651 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
652 drop_dom_ref = 0;
653 }
655 if ( drop_dom_ref )
656 put_domain(d);
657 }
660 unsigned long avail_domheap_pages(void)
661 {
662 return avail[MEMZONE_DOM] + avail[MEMZONE_DMADOM];
663 }
666 static void pagealloc_keyhandler(unsigned char key)
667 {
668 printk("Physical memory information:\n");
669 printk(" Xen heap: %lukB free\n"
670 " DMA heap: %lukB free\n"
671 " Dom heap: %lukB free\n",
672 avail[MEMZONE_XEN]<<(PAGE_SHIFT-10),
673 avail[MEMZONE_DMADOM]<<(PAGE_SHIFT-10),
674 avail[MEMZONE_DOM]<<(PAGE_SHIFT-10));
675 }
678 static __init int pagealloc_keyhandler_init(void)
679 {
680 register_keyhandler('m', pagealloc_keyhandler, "memory info");
681 return 0;
682 }
683 __initcall(pagealloc_keyhandler_init);
687 /*************************
688 * PAGE SCRUBBING
689 */
691 static void page_scrub_softirq(void)
692 {
693 struct list_head *ent;
694 struct page_info *pg;
695 void *p;
696 int i;
697 s_time_t start = NOW();
699 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
700 do {
701 spin_lock(&page_scrub_lock);
703 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
704 {
705 spin_unlock(&page_scrub_lock);
706 return;
707 }
709 /* Peel up to 16 pages from the list. */
710 for ( i = 0; i < 16; i++ )
711 {
712 if ( ent->next == &page_scrub_list )
713 break;
714 ent = ent->next;
715 }
717 /* Remove peeled pages from the list. */
718 ent->next->prev = &page_scrub_list;
719 page_scrub_list.next = ent->next;
721 spin_unlock(&page_scrub_lock);
723 /* Working backwards, scrub each page in turn. */
724 while ( ent != &page_scrub_list )
725 {
726 pg = list_entry(ent, struct page_info, list);
727 ent = ent->prev;
728 p = map_domain_page(page_to_mfn(pg));
729 clear_page(p);
730 unmap_domain_page(p);
731 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
732 }
733 } while ( (NOW() - start) < MILLISECS(1) );
734 }
736 static __init int page_scrub_init(void)
737 {
738 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
739 return 0;
740 }
741 __initcall(page_scrub_init);
743 /*
744 * Local variables:
745 * mode: C
746 * c-set-style: "BSD"
747 * c-basic-offset: 4
748 * tab-width: 4
749 * indent-tabs-mode: nil
750 * End:
751 */