ia64/xen-unstable

view xen/common/page_alloc.c @ 5074:07bd52c0f8de

bitkeeper revision 1.1504 (428f10aaD4iY_Mj4uu6RLDmF65qx-w)

Fix memory scrubbing to deal with memguarded Xen heap pages.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sat May 21 10:42:50 2005 +0000 (2005-05-21)
parents 77586a4591eb
children de3abc161c24
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/init.h>
25 #include <xen/types.h>
26 #include <xen/lib.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/slab.h>
31 #include <xen/mm.h>
32 #include <xen/irq.h>
33 #include <xen/softirq.h>
34 #include <xen/shadow.h>
35 #include <asm/domain_page.h>
36 #include <asm/page.h>
38 /*
39 * Comma-separated list of hexadecimal page numbers containing bad bytes.
40 * e.g. 'badpage=0x3f45,0x8a321'.
41 */
42 static char opt_badpage[100] = "";
43 string_param("badpage", opt_badpage);
45 #define round_pgdown(_p) ((_p)&PAGE_MASK)
46 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
48 static spinlock_t page_scrub_lock;
49 struct list_head page_scrub_list;
51 /*********************
52 * ALLOCATION BITMAP
53 * One bit per page of memory. Bit set => page is allocated.
54 */
56 static unsigned long bitmap_size; /* in bytes */
57 static unsigned long *alloc_bitmap;
58 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
60 #define allocated_in_map(_pn) \
61 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
62 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
64 /*
65 * Hint regarding bitwise arithmetic in map_{alloc,free}:
66 * -(1<<n) sets all bits >= n.
67 * (1<<n)-1 sets all bits < n.
68 * Variable names in map_{alloc,free}:
69 * *_idx == Index into `alloc_bitmap' array.
70 * *_off == Bit offset within an element of the `alloc_bitmap' array.
71 */
73 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
74 {
75 unsigned long start_off, end_off, curr_idx, end_idx;
77 #ifndef NDEBUG
78 unsigned long i;
79 /* Check that the block isn't already allocated. */
80 for ( i = 0; i < nr_pages; i++ )
81 ASSERT(!allocated_in_map(first_page + i));
82 #endif
84 curr_idx = first_page / PAGES_PER_MAPWORD;
85 start_off = first_page & (PAGES_PER_MAPWORD-1);
86 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
87 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
89 if ( curr_idx == end_idx )
90 {
91 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
92 }
93 else
94 {
95 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
96 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
97 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
98 }
99 }
102 static void map_free(unsigned long first_page, unsigned long nr_pages)
103 {
104 unsigned long start_off, end_off, curr_idx, end_idx;
106 #ifndef NDEBUG
107 unsigned long i;
108 /* Check that the block isn't already freed. */
109 for ( i = 0; i < nr_pages; i++ )
110 ASSERT(allocated_in_map(first_page + i));
111 #endif
113 curr_idx = first_page / PAGES_PER_MAPWORD;
114 start_off = first_page & (PAGES_PER_MAPWORD-1);
115 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
116 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
118 if ( curr_idx == end_idx )
119 {
120 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
121 }
122 else
123 {
124 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
125 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
126 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
127 }
128 }
132 /*************************
133 * BOOT-TIME ALLOCATOR
134 */
136 /* Initialise allocator to handle up to @max_page pages. */
137 unsigned long init_boot_allocator(unsigned long bitmap_start)
138 {
139 bitmap_start = round_pgup(bitmap_start);
141 /* Allocate space for the allocation bitmap. */
142 bitmap_size = max_page / 8;
143 bitmap_size = round_pgup(bitmap_size);
144 alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
146 /* All allocated by default. */
147 memset(alloc_bitmap, ~0, bitmap_size);
149 return bitmap_start + bitmap_size;
150 }
152 void init_boot_pages(unsigned long ps, unsigned long pe)
153 {
154 unsigned long bad_pfn;
155 char *p;
157 ps = round_pgup(ps);
158 pe = round_pgdown(pe);
159 if ( pe <= ps )
160 return;
162 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
164 /* Check new pages against the bad-page list. */
165 p = opt_badpage;
166 while ( *p != '\0' )
167 {
168 bad_pfn = simple_strtoul(p, &p, 0);
170 if ( *p == ',' )
171 p++;
172 else if ( *p != '\0' )
173 break;
175 if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
176 {
177 printk("Marking page %lx as bad\n", bad_pfn);
178 map_alloc(bad_pfn, 1);
179 }
180 }
181 }
183 unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
184 {
185 unsigned long pg, i;
187 size = round_pgup(size) >> PAGE_SHIFT;
188 align = round_pgup(align) >> PAGE_SHIFT;
190 for ( pg = 0; (pg + size) < (bitmap_size*8); pg += align )
191 {
192 for ( i = 0; i < size; i++ )
193 if ( allocated_in_map(pg + i) )
194 break;
196 if ( i == size )
197 {
198 map_alloc(pg, size);
199 return pg << PAGE_SHIFT;
200 }
201 }
203 return 0;
204 }
208 /*************************
209 * BINARY BUDDY ALLOCATOR
210 */
212 #define MEMZONE_XEN 0
213 #define MEMZONE_DOM 1
214 #define NR_ZONES 2
216 /* Up to 2^20 pages can be allocated at once. */
217 #define MAX_ORDER 20
218 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
220 static unsigned long avail[NR_ZONES];
222 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
224 void end_boot_allocator(void)
225 {
226 unsigned long i, j;
227 int curr_free = 0, next_free = 0;
229 memset(avail, 0, sizeof(avail));
231 for ( i = 0; i < NR_ZONES; i++ )
232 for ( j = 0; j <= MAX_ORDER; j++ )
233 INIT_LIST_HEAD(&heap[i][j]);
235 /* Pages that are free now go to the domain sub-allocator. */
236 for ( i = 0; i < max_page; i++ )
237 {
238 curr_free = next_free;
239 next_free = !allocated_in_map(i+1);
240 if ( next_free )
241 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
242 if ( curr_free )
243 free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
244 }
245 }
247 /* Hand the specified arbitrary page range to the specified heap zone. */
248 void init_heap_pages(
249 unsigned int zone, struct pfn_info *pg, unsigned long nr_pages)
250 {
251 unsigned long i;
253 ASSERT(zone < NR_ZONES);
255 for ( i = 0; i < nr_pages; i++ )
256 free_heap_pages(zone, pg+i, 0);
257 }
260 /* Allocate 2^@order contiguous pages. */
261 struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
262 {
263 int i;
264 struct pfn_info *pg;
266 ASSERT(zone < NR_ZONES);
268 if ( unlikely(order > MAX_ORDER) )
269 return NULL;
271 spin_lock(&heap_lock);
273 /* Find smallest order which can satisfy the request. */
274 for ( i = order; i <= MAX_ORDER; i++ )
275 if ( !list_empty(&heap[zone][i]) )
276 goto found;
278 /* No suitable memory blocks. Fail the request. */
279 spin_unlock(&heap_lock);
280 return NULL;
282 found:
283 pg = list_entry(heap[zone][i].next, struct pfn_info, list);
284 list_del(&pg->list);
286 /* We may have to halve the chunk a number of times. */
287 while ( i != order )
288 {
289 PFN_ORDER(pg) = --i;
290 list_add_tail(&pg->list, &heap[zone][i]);
291 pg += 1 << i;
292 }
294 map_alloc(page_to_pfn(pg), 1 << order);
295 avail[zone] -= 1 << order;
297 spin_unlock(&heap_lock);
299 return pg;
300 }
303 /* Free 2^@order set of pages. */
304 void free_heap_pages(
305 unsigned int zone, struct pfn_info *pg, unsigned int order)
306 {
307 unsigned long mask;
309 ASSERT(zone < NR_ZONES);
310 ASSERT(order <= MAX_ORDER);
312 spin_lock(&heap_lock);
314 map_free(page_to_pfn(pg), 1 << order);
315 avail[zone] += 1 << order;
317 /* Merge chunks as far as possible. */
318 while ( order < MAX_ORDER )
319 {
320 mask = 1 << order;
322 if ( (page_to_pfn(pg) & mask) )
323 {
324 /* Merge with predecessor block? */
325 if ( allocated_in_map(page_to_pfn(pg)-mask) ||
326 (PFN_ORDER(pg-mask) != order) )
327 break;
328 list_del(&(pg-mask)->list);
329 pg -= mask;
330 }
331 else
332 {
333 /* Merge with successor block? */
334 if ( allocated_in_map(page_to_pfn(pg)+mask) ||
335 (PFN_ORDER(pg+mask) != order) )
336 break;
337 list_del(&(pg+mask)->list);
338 }
340 order++;
341 }
343 PFN_ORDER(pg) = order;
344 list_add_tail(&pg->list, &heap[zone][order]);
346 spin_unlock(&heap_lock);
347 }
350 /*
351 * Scrub all unallocated pages in all heap zones. This function is more
352 * convoluted than appears necessary because we do not want to continuously
353 * hold the lock or disable interrupts while scrubbing very large memory areas.
354 */
355 void scrub_heap_pages(void)
356 {
357 void *p;
358 unsigned long pfn, flags;
360 printk("Scrubbing Free RAM: ");
361 watchdog_disable();
363 for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
364 {
365 /* Every 100MB, print a progress dot. */
366 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
367 printk(".");
369 /* Quick lock-free check. */
370 if ( allocated_in_map(pfn) )
371 continue;
373 spin_lock_irqsave(&heap_lock, flags);
375 /* Re-check page status with lock held. */
376 if ( !allocated_in_map(pfn) )
377 {
378 if ( IS_XEN_HEAP_FRAME(pfn_to_page(pfn)) )
379 {
380 p = page_to_virt(pfn_to_page(pfn));
381 memguard_unguard_range(p, PAGE_SIZE);
382 clear_page(p);
383 memguard_guard_range(p, PAGE_SIZE);
384 }
385 else
386 {
387 p = map_domain_mem(pfn << PAGE_SHIFT);
388 clear_page(p);
389 unmap_domain_mem(p);
390 }
391 }
393 spin_unlock_irqrestore(&heap_lock, flags);
394 }
396 watchdog_enable();
397 printk("done.\n");
398 }
402 /*************************
403 * XEN-HEAP SUB-ALLOCATOR
404 */
406 void init_xenheap_pages(unsigned long ps, unsigned long pe)
407 {
408 unsigned long flags;
410 ps = round_pgup(ps);
411 pe = round_pgdown(pe);
413 memguard_guard_range(__va(ps), pe - ps);
415 /*
416 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
417 * prevent merging of power-of-two blocks across the zone boundary.
418 */
419 if ( !IS_XEN_HEAP_FRAME(phys_to_page(pe)) )
420 pe -= PAGE_SIZE;
422 local_irq_save(flags);
423 init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
424 local_irq_restore(flags);
425 }
428 unsigned long alloc_xenheap_pages(unsigned int order)
429 {
430 unsigned long flags;
431 struct pfn_info *pg;
432 int i;
434 local_irq_save(flags);
435 pg = alloc_heap_pages(MEMZONE_XEN, order);
436 local_irq_restore(flags);
438 if ( unlikely(pg == NULL) )
439 goto no_memory;
441 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
443 for ( i = 0; i < (1 << order); i++ )
444 {
445 pg[i].count_info = 0;
446 pg[i].u.inuse._domain = 0;
447 pg[i].u.inuse.type_info = 0;
448 }
450 return (unsigned long)page_to_virt(pg);
452 no_memory:
453 printk("Cannot handle page request order %d!\n", order);
454 return 0;
455 }
458 void free_xenheap_pages(unsigned long p, unsigned int order)
459 {
460 unsigned long flags;
462 memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
464 local_irq_save(flags);
465 free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
466 local_irq_restore(flags);
467 }
471 /*************************
472 * DOMAIN-HEAP SUB-ALLOCATOR
473 */
475 void init_domheap_pages(unsigned long ps, unsigned long pe)
476 {
477 ASSERT(!in_irq());
479 ps = round_pgup(ps);
480 pe = round_pgdown(pe);
482 init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
483 }
486 struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
487 {
488 struct pfn_info *pg;
489 unsigned long mask = 0;
490 int i;
492 ASSERT(!in_irq());
494 if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
495 return NULL;
497 for ( i = 0; i < (1 << order); i++ )
498 {
499 mask |= tlbflush_filter_cpuset(
500 pg[i].u.free.cpu_mask & ~mask, pg[i].tlbflush_timestamp);
502 pg[i].count_info = 0;
503 pg[i].u.inuse._domain = 0;
504 pg[i].u.inuse.type_info = 0;
505 }
507 if ( unlikely(mask != 0) )
508 {
509 perfc_incrc(need_flush_tlb_flush);
510 flush_tlb_mask(mask);
511 }
513 if ( d == NULL )
514 return pg;
516 spin_lock(&d->page_alloc_lock);
518 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ||
519 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
520 {
521 DPRINTK("Over-allocation for domain %u: %u > %u\n",
522 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
523 DPRINTK("...or the domain is dying (%d)\n",
524 !!test_bit(_DOMF_dying, &d->domain_flags));
525 spin_unlock(&d->page_alloc_lock);
526 free_heap_pages(MEMZONE_DOM, pg, order);
527 return NULL;
528 }
530 if ( unlikely(d->tot_pages == 0) )
531 get_knownalive_domain(d);
533 d->tot_pages += 1 << order;
535 for ( i = 0; i < (1 << order); i++ )
536 {
537 page_set_owner(&pg[i], d);
538 wmb(); /* Domain pointer must be visible before updating refcnt. */
539 pg[i].count_info |= PGC_allocated | 1;
540 list_add_tail(&pg[i].list, &d->page_list);
541 }
543 spin_unlock(&d->page_alloc_lock);
545 return pg;
546 }
549 void free_domheap_pages(struct pfn_info *pg, unsigned int order)
550 {
551 int i, drop_dom_ref;
552 struct domain *d = page_get_owner(pg);
554 ASSERT(!in_irq());
556 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
557 {
558 /* NB. May recursively lock from relinquish_memory(). */
559 spin_lock_recursive(&d->page_alloc_lock);
561 for ( i = 0; i < (1 << order); i++ )
562 list_del(&pg[i].list);
564 d->xenheap_pages -= 1 << order;
565 drop_dom_ref = (d->xenheap_pages == 0);
567 spin_unlock_recursive(&d->page_alloc_lock);
568 }
569 else if ( likely(d != NULL) )
570 {
571 /* NB. May recursively lock from relinquish_memory(). */
572 spin_lock_recursive(&d->page_alloc_lock);
574 for ( i = 0; i < (1 << order); i++ )
575 {
576 shadow_drop_references(d, &pg[i]);
577 ASSERT(((pg[i].u.inuse.type_info & PGT_count_mask) == 0) ||
578 shadow_tainted_refcnts(d));
579 pg[i].tlbflush_timestamp = tlbflush_current_time();
580 pg[i].u.free.cpu_mask = d->cpuset;
581 list_del(&pg[i].list);
582 }
584 d->tot_pages -= 1 << order;
585 drop_dom_ref = (d->tot_pages == 0);
587 spin_unlock_recursive(&d->page_alloc_lock);
589 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
590 {
591 free_heap_pages(MEMZONE_DOM, pg, order);
592 }
593 else
594 {
595 /*
596 * Normally we expect a domain to clear pages before freeing them,
597 * if it cares about the secrecy of their contents. However, after
598 * a domain has died we assume responsibility for erasure.
599 */
600 for ( i = 0; i < (1 << order); i++ )
601 {
602 spin_lock(&page_scrub_lock);
603 list_add(&pg[i].list, &page_scrub_list);
604 spin_unlock(&page_scrub_lock);
605 }
606 }
607 }
608 else
609 {
610 /* Freeing an anonymous domain-heap page. */
611 free_heap_pages(MEMZONE_DOM, pg, order);
612 drop_dom_ref = 0;
613 }
615 if ( drop_dom_ref )
616 put_domain(d);
617 }
620 unsigned long avail_domheap_pages(void)
621 {
622 return avail[MEMZONE_DOM];
623 }
627 /*************************
628 * PAGE SCRUBBING
629 */
631 static void page_scrub_softirq(void)
632 {
633 struct list_head *ent;
634 struct pfn_info *pg;
635 void *p;
636 int i;
637 s_time_t start = NOW();
639 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
640 do {
641 spin_lock(&page_scrub_lock);
643 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
644 {
645 spin_unlock(&page_scrub_lock);
646 return;
647 }
649 /* Peel up to 16 pages from the list. */
650 for ( i = 0; i < 16; i++ )
651 {
652 if ( ent->next == &page_scrub_list )
653 break;
654 ent = ent->next;
655 }
657 /* Remove peeled pages from the list. */
658 ent->next->prev = &page_scrub_list;
659 page_scrub_list.next = ent->next;
661 spin_unlock(&page_scrub_lock);
663 /* Working backwards, scrub each page in turn. */
664 while ( ent != &page_scrub_list )
665 {
666 pg = list_entry(ent, struct pfn_info, list);
667 ent = ent->prev;
668 p = map_domain_mem(page_to_phys(pg));
669 clear_page(p);
670 unmap_domain_mem(p);
671 free_heap_pages(MEMZONE_DOM, pg, 0);
672 }
673 } while ( (NOW() - start) < MILLISECS(1) );
674 }
676 static __init int page_scrub_init(void)
677 {
678 spin_lock_init(&page_scrub_lock);
679 INIT_LIST_HEAD(&page_scrub_list);
680 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
681 return 0;
682 }
683 __initcall(page_scrub_init);
685 /*
686 * Local variables:
687 * mode: C
688 * c-set-style: "BSD"
689 * c-basic-offset: 4
690 * tab-width: 4
691 * indent-tabs-mode: nil
692 * End:
693 */