ia64/xen-unstable

view xen/common/page_alloc.c @ 12390:e28beea6d228

[IA64] Fix time services of EFI emulation

This patch serializes the execution of following efi.runtimes.
- GetTime
- SetTime
- GetWakeTime
- SetWakeTime

Linux/ia64 uses similar spinlocks in the EFI RTC driver.

Signed-off-by: Masaki Kanno <kanno.masaki@jp.fujitsu.com>
author awilliam@xenbuild.aw
date Fri Nov 10 12:03:19 2006 -0700 (2006-11-10)
parents 444496ecb14e
children 39e40ccf7df5
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <xen/domain_page.h>
35 #include <xen/keyhandler.h>
36 #include <xen/perfc.h>
37 #include <xen/numa.h>
38 #include <xen/nodemask.h>
39 #include <asm/page.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
50 * allocation requests. Ordinary requests will not fall back to the
51 * lowmem emergency pool.
52 */
53 static unsigned long lowmem_emergency_pool_pages;
54 static void parse_lowmem_emergency_pool(char *s)
55 {
56 unsigned long long bytes;
57 bytes = parse_size_and_unit(s, NULL);
58 lowmem_emergency_pool_pages = bytes >> PAGE_SHIFT;
59 }
60 custom_param("lowmem_emergency_pool", parse_lowmem_emergency_pool);
62 #define round_pgdown(_p) ((_p)&PAGE_MASK)
63 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
65 static DEFINE_SPINLOCK(page_scrub_lock);
66 LIST_HEAD(page_scrub_list);
67 static unsigned long scrub_pages;
69 /*********************
70 * ALLOCATION BITMAP
71 * One bit per page of memory. Bit set => page is allocated.
72 */
74 static unsigned long *alloc_bitmap;
75 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
77 #define allocated_in_map(_pn) \
78 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
79 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
81 /*
82 * Hint regarding bitwise arithmetic in map_{alloc,free}:
83 * -(1<<n) sets all bits >= n.
84 * (1<<n)-1 sets all bits < n.
85 * Variable names in map_{alloc,free}:
86 * *_idx == Index into `alloc_bitmap' array.
87 * *_off == Bit offset within an element of the `alloc_bitmap' array.
88 */
90 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
91 {
92 unsigned long start_off, end_off, curr_idx, end_idx;
94 #ifndef NDEBUG
95 unsigned long i;
96 /* Check that the block isn't already allocated. */
97 for ( i = 0; i < nr_pages; i++ )
98 ASSERT(!allocated_in_map(first_page + i));
99 #endif
101 curr_idx = first_page / PAGES_PER_MAPWORD;
102 start_off = first_page & (PAGES_PER_MAPWORD-1);
103 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
104 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
106 if ( curr_idx == end_idx )
107 {
108 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
109 }
110 else
111 {
112 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
113 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
114 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
115 }
116 }
119 static void map_free(unsigned long first_page, unsigned long nr_pages)
120 {
121 unsigned long start_off, end_off, curr_idx, end_idx;
123 #ifndef NDEBUG
124 unsigned long i;
125 /* Check that the block isn't already freed. */
126 for ( i = 0; i < nr_pages; i++ )
127 ASSERT(allocated_in_map(first_page + i));
128 #endif
130 curr_idx = first_page / PAGES_PER_MAPWORD;
131 start_off = first_page & (PAGES_PER_MAPWORD-1);
132 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
133 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
135 if ( curr_idx == end_idx )
136 {
137 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
138 }
139 else
140 {
141 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
142 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
143 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
144 }
145 }
149 /*************************
150 * BOOT-TIME ALLOCATOR
151 */
153 /* Initialise allocator to handle up to @max_page pages. */
154 paddr_t init_boot_allocator(paddr_t bitmap_start)
155 {
156 unsigned long bitmap_size;
158 bitmap_start = round_pgup(bitmap_start);
160 /*
161 * Allocate space for the allocation bitmap. Include an extra longword
162 * of padding for possible overrun in map_alloc and map_free.
163 */
164 bitmap_size = max_page / 8;
165 bitmap_size += sizeof(unsigned long);
166 bitmap_size = round_pgup(bitmap_size);
167 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
169 /* All allocated by default. */
170 memset(alloc_bitmap, ~0, bitmap_size);
172 return bitmap_start + bitmap_size;
173 }
175 void init_boot_pages(paddr_t ps, paddr_t pe)
176 {
177 unsigned long bad_spfn, bad_epfn, i;
178 char *p;
180 ps = round_pgup(ps);
181 pe = round_pgdown(pe);
182 if ( pe <= ps )
183 return;
185 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
187 /* Check new pages against the bad-page list. */
188 p = opt_badpage;
189 while ( *p != '\0' )
190 {
191 bad_spfn = simple_strtoul(p, &p, 0);
192 bad_epfn = bad_spfn;
194 if ( *p == '-' )
195 {
196 p++;
197 bad_epfn = simple_strtoul(p, &p, 0);
198 if ( bad_epfn < bad_spfn )
199 bad_epfn = bad_spfn;
200 }
202 if ( *p == ',' )
203 p++;
204 else if ( *p != '\0' )
205 break;
207 if ( bad_epfn == bad_spfn )
208 printk("Marking page %lx as bad\n", bad_spfn);
209 else
210 printk("Marking pages %lx through %lx as bad\n",
211 bad_spfn, bad_epfn);
213 for ( i = bad_spfn; i <= bad_epfn; i++ )
214 if ( (i < max_page) && !allocated_in_map(i) )
215 map_alloc(i, 1);
216 }
217 }
219 unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
220 {
221 unsigned long pg, i;
223 for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
224 {
225 for ( i = 0; i < nr_pfns; i++ )
226 if ( allocated_in_map(pg + i) )
227 break;
229 if ( i == nr_pfns )
230 {
231 map_alloc(pg, nr_pfns);
232 return pg;
233 }
234 }
236 return 0;
237 }
241 /*************************
242 * BINARY BUDDY ALLOCATOR
243 */
245 #define MEMZONE_XEN 0
246 #define MEMZONE_DOM 1
247 #define MEMZONE_DMADOM 2
248 #define NR_ZONES 3
250 #define pfn_dom_zone_type(_pfn) \
251 (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
253 static struct list_head heap[NR_ZONES][MAX_NUMNODES][MAX_ORDER+1];
255 static unsigned long avail[NR_ZONES][MAX_NUMNODES];
257 static DEFINE_SPINLOCK(heap_lock);
259 void end_boot_allocator(void)
260 {
261 unsigned long i, j, k;
262 int curr_free = 0, next_free = 0;
264 memset(avail, 0, sizeof(avail));
266 for ( i = 0; i < NR_ZONES; i++ )
267 for ( j = 0; j < MAX_NUMNODES; j++ )
268 for ( k = 0; k <= MAX_ORDER; k++ )
269 INIT_LIST_HEAD(&heap[i][j][k]);
271 /* Pages that are free now go to the domain sub-allocator. */
272 for ( i = 0; i < max_page; i++ )
273 {
274 curr_free = next_free;
275 next_free = !allocated_in_map(i+1);
276 if ( next_free )
277 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
278 if ( curr_free )
279 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
280 }
281 }
283 /*
284 * Hand the specified arbitrary page range to the specified heap zone
285 * checking the node_id of the previous page. If they differ and the
286 * latter is not on a MAX_ORDER boundary, then we reserve the page by
287 * not freeing it to the buddy allocator.
288 */
289 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
290 void init_heap_pages(
291 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
292 {
293 unsigned int nid_curr,nid_prev;
294 unsigned long i;
296 ASSERT(zone < NR_ZONES);
298 if ( likely(page_to_mfn(pg) != 0) )
299 nid_prev = phys_to_nid(page_to_maddr(pg-1));
300 else
301 nid_prev = phys_to_nid(page_to_maddr(pg));
303 for ( i = 0; i < nr_pages; i++ )
304 {
305 nid_curr = phys_to_nid(page_to_maddr(pg+i));
307 /*
308 * free pages of the same node, or if they differ, but are on a
309 * MAX_ORDER alignement boundary (which already get reserved)
310 */
311 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
312 MAX_ORDER_ALIGNED) )
313 free_heap_pages(zone, pg+i, 0);
314 else
315 printk("Reserving non-aligned node boundary @ mfn %lu\n",
316 page_to_mfn(pg+i));
318 nid_prev = nid_curr;
319 }
320 }
322 /* Allocate 2^@order contiguous pages. */
323 struct page_info *alloc_heap_pages(unsigned int zone, unsigned int cpu,
324 unsigned int order)
325 {
326 unsigned int i,j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
327 unsigned int request = (1UL << order);
328 struct page_info *pg;
330 ASSERT(node >= 0);
331 ASSERT(node < num_nodes);
332 ASSERT(zone < NR_ZONES);
334 if ( unlikely(order > MAX_ORDER) )
335 return NULL;
337 spin_lock(&heap_lock);
339 /* start with requested node, but exhaust all node memory
340 * in requested zone before failing, only calc new node
341 * value if we fail to find memory in target node, this avoids
342 * needless computation on fast-path */
343 for ( i = 0; i < num_nodes; i++ )
344 {
345 /* check if target node can support the allocation */
346 if ( avail[zone][node] >= request )
347 {
348 /* Find smallest order which can satisfy the request. */
349 for ( j = order; j <= MAX_ORDER; j++ )
350 {
351 if ( !list_empty(&heap[zone][node][j]) )
352 goto found;
353 }
354 }
355 /* pick next node, wrapping around if needed */
356 if ( ++node == num_nodes )
357 node = 0;
358 }
360 /* No suitable memory blocks. Fail the request. */
361 spin_unlock(&heap_lock);
362 return NULL;
364 found:
365 pg = list_entry(heap[zone][node][j].next, struct page_info, list);
366 list_del(&pg->list);
368 /* We may have to halve the chunk a number of times. */
369 while ( j != order )
370 {
371 PFN_ORDER(pg) = --j;
372 list_add_tail(&pg->list, &heap[zone][node][j]);
373 pg += 1 << j;
374 }
376 map_alloc(page_to_mfn(pg), request);
377 ASSERT(avail[zone][node] >= request);
378 avail[zone][node] -= request;
380 spin_unlock(&heap_lock);
382 return pg;
383 }
386 /* Free 2^@order set of pages. */
387 void free_heap_pages(
388 unsigned int zone, struct page_info *pg, unsigned int order)
389 {
390 unsigned long mask;
391 int node = phys_to_nid(page_to_maddr(pg));
393 ASSERT(zone < NR_ZONES);
394 ASSERT(order <= MAX_ORDER);
395 ASSERT(node >= 0);
396 ASSERT(node < num_online_nodes());
398 spin_lock(&heap_lock);
400 map_free(page_to_mfn(pg), 1 << order);
401 avail[zone][node] += 1 << order;
403 /* Merge chunks as far as possible. */
404 while ( order < MAX_ORDER )
405 {
406 mask = 1 << order;
408 if ( (page_to_mfn(pg) & mask) )
409 {
410 /* Merge with predecessor block? */
411 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
412 (PFN_ORDER(pg-mask) != order) )
413 break;
414 list_del(&(pg-mask)->list);
415 pg -= mask;
416 }
417 else
418 {
419 /* Merge with successor block? */
420 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
421 (PFN_ORDER(pg+mask) != order) )
422 break;
423 list_del(&(pg+mask)->list);
424 }
426 order++;
428 /* after merging, pg should be in the same node */
429 ASSERT(phys_to_nid(page_to_maddr(pg)) == node );
430 }
432 PFN_ORDER(pg) = order;
433 list_add_tail(&pg->list, &heap[zone][node][order]);
435 spin_unlock(&heap_lock);
436 }
439 /*
440 * Scrub all unallocated pages in all heap zones. This function is more
441 * convoluted than appears necessary because we do not want to continuously
442 * hold the lock or disable interrupts while scrubbing very large memory areas.
443 */
444 void scrub_heap_pages(void)
445 {
446 void *p;
447 unsigned long pfn;
449 printk("Scrubbing Free RAM: ");
451 for ( pfn = 0; pfn < max_page; pfn++ )
452 {
453 /* Every 100MB, print a progress dot. */
454 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
455 printk(".");
457 process_pending_timers();
459 /* Quick lock-free check. */
460 if ( allocated_in_map(pfn) )
461 continue;
463 spin_lock_irq(&heap_lock);
465 /* Re-check page status with lock held. */
466 if ( !allocated_in_map(pfn) )
467 {
468 if ( IS_XEN_HEAP_FRAME(mfn_to_page(pfn)) )
469 {
470 p = page_to_virt(mfn_to_page(pfn));
471 memguard_unguard_range(p, PAGE_SIZE);
472 clear_page(p);
473 memguard_guard_range(p, PAGE_SIZE);
474 }
475 else
476 {
477 p = map_domain_page(pfn);
478 clear_page(p);
479 unmap_domain_page(p);
480 }
481 }
483 spin_unlock_irq(&heap_lock);
484 }
486 printk("done.\n");
487 }
491 /*************************
492 * XEN-HEAP SUB-ALLOCATOR
493 */
495 void init_xenheap_pages(paddr_t ps, paddr_t pe)
496 {
497 unsigned long flags;
499 ps = round_pgup(ps);
500 pe = round_pgdown(pe);
501 if ( pe <= ps )
502 return;
504 memguard_guard_range(maddr_to_virt(ps), pe - ps);
506 /*
507 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
508 * prevent merging of power-of-two blocks across the zone boundary.
509 */
510 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
511 pe -= PAGE_SIZE;
513 local_irq_save(flags);
514 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
515 local_irq_restore(flags);
516 }
519 void *alloc_xenheap_pages(unsigned int order)
520 {
521 unsigned long flags;
522 struct page_info *pg;
523 int i;
525 local_irq_save(flags);
526 pg = alloc_heap_pages(MEMZONE_XEN, smp_processor_id(), order);
527 local_irq_restore(flags);
529 if ( unlikely(pg == NULL) )
530 goto no_memory;
532 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
534 for ( i = 0; i < (1 << order); i++ )
535 {
536 pg[i].count_info = 0;
537 pg[i].u.inuse._domain = 0;
538 pg[i].u.inuse.type_info = 0;
539 }
541 return page_to_virt(pg);
543 no_memory:
544 printk("Cannot handle page request order %d!\n", order);
545 return NULL;
546 }
549 void free_xenheap_pages(void *v, unsigned int order)
550 {
551 unsigned long flags;
553 if ( v == NULL )
554 return;
556 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
558 local_irq_save(flags);
559 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
560 local_irq_restore(flags);
561 }
565 /*************************
566 * DOMAIN-HEAP SUB-ALLOCATOR
567 */
569 void init_domheap_pages(paddr_t ps, paddr_t pe)
570 {
571 unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm;
573 ASSERT(!in_irq());
575 s_tot = round_pgup(ps) >> PAGE_SHIFT;
576 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
578 s_dma = min(s_tot, MAX_DMADOM_PFN + 1);
579 e_dma = min(e_tot, MAX_DMADOM_PFN + 1);
580 if ( s_dma < e_dma )
581 init_heap_pages(MEMZONE_DMADOM, mfn_to_page(s_dma), e_dma - s_dma);
583 s_nrm = max(s_tot, MAX_DMADOM_PFN + 1);
584 e_nrm = max(e_tot, MAX_DMADOM_PFN + 1);
585 if ( s_nrm < e_nrm )
586 init_heap_pages(MEMZONE_DOM, mfn_to_page(s_nrm), e_nrm - s_nrm);
587 }
590 int assign_pages(
591 struct domain *d,
592 struct page_info *pg,
593 unsigned int order,
594 unsigned int memflags)
595 {
596 unsigned long i;
598 spin_lock(&d->page_alloc_lock);
600 if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
601 {
602 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
603 d->domain_id);
604 goto fail;
605 }
607 if ( !(memflags & MEMF_no_refcount) )
608 {
609 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
610 {
611 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
612 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
613 goto fail;
614 }
616 if ( unlikely(d->tot_pages == 0) )
617 get_knownalive_domain(d);
619 d->tot_pages += 1 << order;
620 }
622 for ( i = 0; i < (1 << order); i++ )
623 {
624 ASSERT(page_get_owner(&pg[i]) == NULL);
625 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
626 page_set_owner(&pg[i], d);
627 wmb(); /* Domain pointer must be visible before updating refcnt. */
628 pg[i].count_info = PGC_allocated | 1;
629 list_add_tail(&pg[i].list, &d->page_list);
630 }
632 spin_unlock(&d->page_alloc_lock);
633 return 0;
635 fail:
636 spin_unlock(&d->page_alloc_lock);
637 return -1;
638 }
641 struct page_info *__alloc_domheap_pages(
642 struct domain *d, unsigned int cpu, unsigned int order,
643 unsigned int memflags)
644 {
645 struct page_info *pg = NULL;
646 cpumask_t mask;
647 unsigned long i;
649 ASSERT(!in_irq());
651 if ( !(memflags & MEMF_dma) )
652 {
653 pg = alloc_heap_pages(MEMZONE_DOM, cpu, order);
654 /* Failure? Then check if we can fall back to the DMA pool. */
655 if ( unlikely(pg == NULL) &&
656 ((order > MAX_ORDER) ||
657 (avail_heap_pages(MEMZONE_DMADOM,-1) <
658 (lowmem_emergency_pool_pages + (1UL << order)))) )
659 return NULL;
660 }
662 if ( pg == NULL )
663 if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, cpu, order)) == NULL )
664 return NULL;
666 mask = pg->u.free.cpumask;
667 tlbflush_filter(mask, pg->tlbflush_timestamp);
669 pg->count_info = 0;
670 pg->u.inuse._domain = 0;
671 pg->u.inuse.type_info = 0;
673 for ( i = 1; i < (1 << order); i++ )
674 {
675 /* Add in any extra CPUs that need flushing because of this page. */
676 cpumask_t extra_cpus_mask;
677 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
678 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
679 cpus_or(mask, mask, extra_cpus_mask);
681 pg[i].count_info = 0;
682 pg[i].u.inuse._domain = 0;
683 pg[i].u.inuse.type_info = 0;
684 page_set_owner(&pg[i], NULL);
685 }
687 if ( unlikely(!cpus_empty(mask)) )
688 {
689 perfc_incrc(need_flush_tlb_flush);
690 flush_tlb_mask(mask);
691 }
693 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
694 {
695 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
696 return NULL;
697 }
699 return pg;
700 }
702 inline struct page_info *alloc_domheap_pages(
703 struct domain *d, unsigned int order, unsigned int flags)
704 {
705 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
706 }
708 void free_domheap_pages(struct page_info *pg, unsigned int order)
709 {
710 int i, drop_dom_ref;
711 struct domain *d = page_get_owner(pg);
713 ASSERT(!in_irq());
715 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
716 {
717 /* NB. May recursively lock from relinquish_memory(). */
718 spin_lock_recursive(&d->page_alloc_lock);
720 for ( i = 0; i < (1 << order); i++ )
721 list_del(&pg[i].list);
723 d->xenheap_pages -= 1 << order;
724 drop_dom_ref = (d->xenheap_pages == 0);
726 spin_unlock_recursive(&d->page_alloc_lock);
727 }
728 else if ( likely(d != NULL) )
729 {
730 /* NB. May recursively lock from relinquish_memory(). */
731 spin_lock_recursive(&d->page_alloc_lock);
733 for ( i = 0; i < (1 << order); i++ )
734 {
735 shadow_drop_references(d, &pg[i]);
736 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
737 pg[i].tlbflush_timestamp = tlbflush_current_time();
738 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
739 list_del(&pg[i].list);
740 }
742 d->tot_pages -= 1 << order;
743 drop_dom_ref = (d->tot_pages == 0);
745 spin_unlock_recursive(&d->page_alloc_lock);
747 if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
748 {
749 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
750 }
751 else
752 {
753 /*
754 * Normally we expect a domain to clear pages before freeing them,
755 * if it cares about the secrecy of their contents. However, after
756 * a domain has died we assume responsibility for erasure.
757 */
758 for ( i = 0; i < (1 << order); i++ )
759 {
760 spin_lock(&page_scrub_lock);
761 list_add(&pg[i].list, &page_scrub_list);
762 scrub_pages++;
763 spin_unlock(&page_scrub_lock);
764 }
765 }
766 }
767 else
768 {
769 /* Freeing anonymous domain-heap pages. */
770 for ( i = 0; i < (1 << order); i++ )
771 cpus_clear(pg[i].u.free.cpumask);
772 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
773 drop_dom_ref = 0;
774 }
776 if ( drop_dom_ref )
777 put_domain(d);
778 }
781 unsigned long avail_heap_pages(int zone, int node)
782 {
783 int i,j, num_nodes = num_online_nodes();
784 unsigned long free_pages = 0;
786 for (i=0; i<NR_ZONES; i++)
787 if ( (zone == -1) || (zone == i) )
788 for (j=0; j < num_nodes; j++)
789 if ( (node == -1) || (node == j) )
790 free_pages += avail[i][j];
792 return free_pages;
793 }
795 unsigned long avail_domheap_pages(void)
796 {
797 unsigned long avail_nrm, avail_dma;
799 avail_nrm = avail_heap_pages(MEMZONE_DOM,-1);
801 avail_dma = avail_heap_pages(MEMZONE_DMADOM,-1);
802 if ( avail_dma > lowmem_emergency_pool_pages )
803 avail_dma -= lowmem_emergency_pool_pages;
804 else
805 avail_dma = 0;
807 return avail_nrm + avail_dma;
808 }
810 unsigned long avail_nodeheap_pages(int node)
811 {
812 return avail_heap_pages(-1, node);
813 }
815 static void pagealloc_keyhandler(unsigned char key)
816 {
817 printk("Physical memory information:\n");
818 printk(" Xen heap: %lukB free\n"
819 " DMA heap: %lukB free\n"
820 " Dom heap: %lukB free\n",
821 avail_heap_pages(MEMZONE_XEN, -1) << (PAGE_SHIFT-10),
822 avail_heap_pages(MEMZONE_DMADOM, -1) <<(PAGE_SHIFT-10),
823 avail_heap_pages(MEMZONE_DOM, -1) <<(PAGE_SHIFT-10));
824 }
827 static __init int pagealloc_keyhandler_init(void)
828 {
829 register_keyhandler('m', pagealloc_keyhandler, "memory info");
830 return 0;
831 }
832 __initcall(pagealloc_keyhandler_init);
836 /*************************
837 * PAGE SCRUBBING
838 */
840 static void page_scrub_softirq(void)
841 {
842 struct list_head *ent;
843 struct page_info *pg;
844 void *p;
845 int i;
846 s_time_t start = NOW();
848 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
849 do {
850 spin_lock(&page_scrub_lock);
852 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
853 {
854 spin_unlock(&page_scrub_lock);
855 return;
856 }
858 /* Peel up to 16 pages from the list. */
859 for ( i = 0; i < 16; i++ )
860 {
861 if ( ent->next == &page_scrub_list )
862 break;
863 ent = ent->next;
864 }
866 /* Remove peeled pages from the list. */
867 ent->next->prev = &page_scrub_list;
868 page_scrub_list.next = ent->next;
869 scrub_pages -= (i+1);
871 spin_unlock(&page_scrub_lock);
873 /* Working backwards, scrub each page in turn. */
874 while ( ent != &page_scrub_list )
875 {
876 pg = list_entry(ent, struct page_info, list);
877 ent = ent->prev;
878 p = map_domain_page(page_to_mfn(pg));
879 clear_page(p);
880 unmap_domain_page(p);
881 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
882 }
883 } while ( (NOW() - start) < MILLISECS(1) );
884 }
886 unsigned long avail_scrub_pages(void)
887 {
888 return scrub_pages;
889 }
891 static unsigned long count_bucket(struct list_head* l, int order)
892 {
893 unsigned long total_pages = 0;
894 int pages = 1 << order;
895 struct page_info *pg;
897 list_for_each_entry(pg, l, list)
898 total_pages += pages;
900 return total_pages;
901 }
903 static void dump_heap(unsigned char key)
904 {
905 s_time_t now = NOW();
906 int i,j,k;
907 unsigned long total;
909 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
910 (u32)(now>>32), (u32)now);
912 for (i=0; i<NR_ZONES; i++ )
913 for (j=0;j<MAX_NUMNODES;j++)
914 for (k=0;k<=MAX_ORDER;k++)
915 if ( !list_empty(&heap[i][j][k]) )
916 {
917 total = count_bucket(&heap[i][j][k], k);
918 printk("heap[%d][%d][%d]-> %lu pages\n",
919 i, j, k, total);
920 }
921 }
923 static __init int register_heap_trigger(void)
924 {
925 register_keyhandler('H', dump_heap, "dump heap info");
926 return 0;
927 }
928 __initcall(register_heap_trigger);
931 static __init int page_scrub_init(void)
932 {
933 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
934 return 0;
935 }
936 __initcall(page_scrub_init);
938 /*
939 * Local variables:
940 * mode: C
941 * c-set-style: "BSD"
942 * c-basic-offset: 4
943 * tab-width: 4
944 * indent-tabs-mode: nil
945 * End:
946 */