ia64/xen-unstable

view xen/common/page_alloc.c @ 17062:0769835cf50f

x86 shadow: Reduce scope of shadow lock.

emulate_map_dest doesn't require holding lock, since
only shadow related operation possibly involved is to
remove shadow which is less frequent and can acquire
lock inside. Rest are either guest table walk or
per-vcpu monitor table manipulation

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 14 10:33:12 2008 +0000 (2008-02-14)
parents 40812c9d96e7
children 57febe0264e1
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/domain_page.h>
34 #include <xen/keyhandler.h>
35 #include <xen/perfc.h>
36 #include <xen/numa.h>
37 #include <xen/nodemask.h>
38 #include <asm/page.h>
39 #include <asm/flushtlb.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * no-bootscrub -> Free pages are not zeroed during boot.
50 */
51 static int opt_bootscrub __initdata = 1;
52 boolean_param("bootscrub", opt_bootscrub);
54 /*
55 * Bit width of the DMA heap.
56 */
57 static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
58 static void __init parse_dma_bits(char *s)
59 {
60 unsigned int v = simple_strtol(s, NULL, 0);
61 if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
62 dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
63 else if ( v > PAGE_SHIFT + 1 )
64 dma_bitsize = v;
65 else
66 printk("Invalid dma_bits value of %u ignored.\n", v);
67 }
68 custom_param("dma_bits", parse_dma_bits);
70 /*
71 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
72 * allocation requests. Ordinary requests will not fall back to the
73 * lowmem emergency pool.
74 */
75 static unsigned long dma_emergency_pool_pages;
76 static void __init parse_dma_emergency_pool(char *s)
77 {
78 unsigned long long bytes;
79 bytes = parse_size_and_unit(s, NULL);
80 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
81 }
82 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
84 #define round_pgdown(_p) ((_p)&PAGE_MASK)
85 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
87 #ifndef NDEBUG
88 /* Avoid callers relying on allocations returning zeroed pages. */
89 #define scrub_page(p) memset((p), 0xc2, PAGE_SIZE)
90 #else
91 /* For a production build, clear_page() is the fastest way to scrub. */
92 #define scrub_page(p) clear_page(p)
93 #endif
95 static DEFINE_SPINLOCK(page_scrub_lock);
96 LIST_HEAD(page_scrub_list);
97 static unsigned long scrub_pages;
99 /*********************
100 * ALLOCATION BITMAP
101 * One bit per page of memory. Bit set => page is allocated.
102 */
104 static unsigned long *alloc_bitmap;
105 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
107 #define allocated_in_map(_pn) \
108 ({ unsigned long ___pn = (_pn); \
109 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
110 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
112 /*
113 * Hint regarding bitwise arithmetic in map_{alloc,free}:
114 * -(1<<n) sets all bits >= n.
115 * (1<<n)-1 sets all bits < n.
116 * Variable names in map_{alloc,free}:
117 * *_idx == Index into `alloc_bitmap' array.
118 * *_off == Bit offset within an element of the `alloc_bitmap' array.
119 */
121 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
122 {
123 unsigned long start_off, end_off, curr_idx, end_idx;
125 #ifndef NDEBUG
126 unsigned long i;
127 /* Check that the block isn't already allocated. */
128 for ( i = 0; i < nr_pages; i++ )
129 ASSERT(!allocated_in_map(first_page + i));
130 #endif
132 curr_idx = first_page / PAGES_PER_MAPWORD;
133 start_off = first_page & (PAGES_PER_MAPWORD-1);
134 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
135 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
137 if ( curr_idx == end_idx )
138 {
139 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
140 }
141 else
142 {
143 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
144 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
145 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
146 }
147 }
149 static void map_free(unsigned long first_page, unsigned long nr_pages)
150 {
151 unsigned long start_off, end_off, curr_idx, end_idx;
153 #ifndef NDEBUG
154 unsigned long i;
155 /* Check that the block isn't already freed. */
156 for ( i = 0; i < nr_pages; i++ )
157 ASSERT(allocated_in_map(first_page + i));
158 #endif
160 curr_idx = first_page / PAGES_PER_MAPWORD;
161 start_off = first_page & (PAGES_PER_MAPWORD-1);
162 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
163 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
165 if ( curr_idx == end_idx )
166 {
167 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
168 }
169 else
170 {
171 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
172 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
173 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
174 }
175 }
179 /*************************
180 * BOOT-TIME ALLOCATOR
181 */
183 static unsigned long first_valid_mfn = ~0UL;
185 /* Initialise allocator to handle up to @max_page pages. */
186 paddr_t __init init_boot_allocator(paddr_t bitmap_start)
187 {
188 unsigned long bitmap_size;
190 bitmap_start = round_pgup(bitmap_start);
192 /*
193 * Allocate space for the allocation bitmap. Include an extra longword
194 * of padding for possible overrun in map_alloc and map_free.
195 */
196 bitmap_size = max_page / 8;
197 bitmap_size += sizeof(unsigned long);
198 bitmap_size = round_pgup(bitmap_size);
199 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
201 /* All allocated by default. */
202 memset(alloc_bitmap, ~0, bitmap_size);
204 return bitmap_start + bitmap_size;
205 }
207 void __init init_boot_pages(paddr_t ps, paddr_t pe)
208 {
209 unsigned long bad_spfn, bad_epfn, i;
210 const char *p;
212 ps = round_pgup(ps);
213 pe = round_pgdown(pe);
214 if ( pe <= ps )
215 return;
217 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
219 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
221 /* Check new pages against the bad-page list. */
222 p = opt_badpage;
223 while ( *p != '\0' )
224 {
225 bad_spfn = simple_strtoul(p, &p, 0);
226 bad_epfn = bad_spfn;
228 if ( *p == '-' )
229 {
230 p++;
231 bad_epfn = simple_strtoul(p, &p, 0);
232 if ( bad_epfn < bad_spfn )
233 bad_epfn = bad_spfn;
234 }
236 if ( *p == ',' )
237 p++;
238 else if ( *p != '\0' )
239 break;
241 if ( bad_epfn == bad_spfn )
242 printk("Marking page %lx as bad\n", bad_spfn);
243 else
244 printk("Marking pages %lx through %lx as bad\n",
245 bad_spfn, bad_epfn);
247 for ( i = bad_spfn; i <= bad_epfn; i++ )
248 if ( (i < max_page) && !allocated_in_map(i) )
249 map_alloc(i, 1);
250 }
251 }
253 unsigned long __init alloc_boot_pages(
254 unsigned long nr_pfns, unsigned long pfn_align)
255 {
256 unsigned long pg, i;
258 /* Search backwards to obtain highest available range. */
259 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
260 pg >= first_valid_mfn;
261 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
262 {
263 for ( i = 0; i < nr_pfns; i++ )
264 if ( allocated_in_map(pg+i) )
265 break;
266 if ( i == nr_pfns )
267 {
268 map_alloc(pg, nr_pfns);
269 return pg;
270 }
271 }
273 return 0;
274 }
278 /*************************
279 * BINARY BUDDY ALLOCATOR
280 */
282 #define MEMZONE_XEN 0
283 #ifdef PADDR_BITS
284 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
285 #else
286 #define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT)
287 #endif
289 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
291 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
292 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
293 #define heap(node, zone, order) ((*_heap[node])[zone][order])
295 static unsigned long *avail[MAX_NUMNODES];
297 static DEFINE_SPINLOCK(heap_lock);
299 static void init_node_heap(int node)
300 {
301 /* First node to be discovered has its heap metadata statically alloced. */
302 static heap_by_zone_and_order_t _heap_static;
303 static unsigned long avail_static[NR_ZONES];
304 static int first_node_initialised;
306 int i, j;
308 if ( !first_node_initialised )
309 {
310 _heap[node] = &_heap_static;
311 avail[node] = avail_static;
312 first_node_initialised = 1;
313 }
314 else
315 {
316 _heap[node] = xmalloc(heap_by_zone_and_order_t);
317 avail[node] = xmalloc_array(unsigned long, NR_ZONES);
318 BUG_ON(!_heap[node] || !avail[node]);
319 }
321 memset(avail[node], 0, NR_ZONES * sizeof(long));
323 for ( i = 0; i < NR_ZONES; i++ )
324 for ( j = 0; j <= MAX_ORDER; j++ )
325 INIT_LIST_HEAD(&(*_heap[node])[i][j]);
326 }
328 /* Allocate 2^@order contiguous pages. */
329 static struct page_info *alloc_heap_pages(
330 unsigned int zone_lo, unsigned int zone_hi,
331 unsigned int cpu, unsigned int order)
332 {
333 unsigned int i, j, zone;
334 unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
335 unsigned long request = 1UL << order;
336 cpumask_t extra_cpus_mask, mask;
337 struct page_info *pg;
339 ASSERT(node >= 0);
340 ASSERT(node < num_nodes);
341 ASSERT(zone_lo <= zone_hi);
342 ASSERT(zone_hi < NR_ZONES);
344 if ( unlikely(order > MAX_ORDER) )
345 return NULL;
347 spin_lock(&heap_lock);
349 /*
350 * Start with requested node, but exhaust all node memory in requested
351 * zone before failing, only calc new node value if we fail to find memory
352 * in target node, this avoids needless computation on fast-path.
353 */
354 for ( i = 0; i < num_nodes; i++ )
355 {
356 zone = zone_hi;
357 do {
358 /* Check if target node can support the allocation. */
359 if ( !avail[node] || (avail[node][zone] < request) )
360 continue;
362 /* Find smallest order which can satisfy the request. */
363 for ( j = order; j <= MAX_ORDER; j++ )
364 if ( !list_empty(&heap(node, zone, j)) )
365 goto found;
366 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
368 /* Pick next node, wrapping around if needed. */
369 if ( ++node == num_nodes )
370 node = 0;
371 }
373 /* No suitable memory blocks. Fail the request. */
374 spin_unlock(&heap_lock);
375 return NULL;
377 found:
378 pg = list_entry(heap(node, zone, j).next, struct page_info, list);
379 list_del(&pg->list);
381 /* We may have to halve the chunk a number of times. */
382 while ( j != order )
383 {
384 PFN_ORDER(pg) = --j;
385 list_add_tail(&pg->list, &heap(node, zone, j));
386 pg += 1 << j;
387 }
389 map_alloc(page_to_mfn(pg), request);
390 ASSERT(avail[node][zone] >= request);
391 avail[node][zone] -= request;
393 spin_unlock(&heap_lock);
395 cpus_clear(mask);
397 for ( i = 0; i < (1 << order); i++ )
398 {
399 /* Reference count must continuously be zero for free pages. */
400 BUG_ON(pg[i].count_info != 0);
402 /* Add in any extra CPUs that need flushing because of this page. */
403 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
404 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
405 cpus_or(mask, mask, extra_cpus_mask);
407 /* Initialise fields which have other uses for free pages. */
408 pg[i].u.inuse.type_info = 0;
409 page_set_owner(&pg[i], NULL);
410 }
412 if ( unlikely(!cpus_empty(mask)) )
413 {
414 perfc_incr(need_flush_tlb_flush);
415 flush_tlb_mask(mask);
416 }
418 return pg;
419 }
421 /* Free 2^@order set of pages. */
422 static void free_heap_pages(
423 unsigned int zone, struct page_info *pg, unsigned int order)
424 {
425 unsigned long mask;
426 unsigned int i, node = phys_to_nid(page_to_maddr(pg));
427 struct domain *d;
429 ASSERT(zone < NR_ZONES);
430 ASSERT(order <= MAX_ORDER);
431 ASSERT(node >= 0);
432 ASSERT(node < num_online_nodes());
434 for ( i = 0; i < (1 << order); i++ )
435 {
436 /*
437 * Cannot assume that count_info == 0, as there are some corner cases
438 * where it isn't the case and yet it isn't a bug:
439 * 1. page_get_owner() is NULL
440 * 2. page_get_owner() is a domain that was never accessible by
441 * its domid (e.g., failed to fully construct the domain).
442 * 3. page was never addressable by the guest (e.g., it's an
443 * auto-translate-physmap guest and the page was never included
444 * in its pseudophysical address space).
445 * In all the above cases there can be no guest mappings of this page.
446 */
447 pg[i].count_info = 0;
449 if ( (d = page_get_owner(&pg[i])) != NULL )
450 {
451 pg[i].tlbflush_timestamp = tlbflush_current_time();
452 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
453 }
454 else
455 {
456 cpus_clear(pg[i].u.free.cpumask);
457 }
458 }
460 spin_lock(&heap_lock);
462 map_free(page_to_mfn(pg), 1 << order);
463 avail[node][zone] += 1 << order;
465 /* Merge chunks as far as possible. */
466 while ( order < MAX_ORDER )
467 {
468 mask = 1UL << order;
470 if ( (page_to_mfn(pg) & mask) )
471 {
472 /* Merge with predecessor block? */
473 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
474 (PFN_ORDER(pg-mask) != order) )
475 break;
476 list_del(&(pg-mask)->list);
477 pg -= mask;
478 }
479 else
480 {
481 /* Merge with successor block? */
482 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
483 (PFN_ORDER(pg+mask) != order) )
484 break;
485 list_del(&(pg+mask)->list);
486 }
488 order++;
490 /* After merging, pg should remain in the same node. */
491 ASSERT(phys_to_nid(page_to_maddr(pg)) == node);
492 }
494 PFN_ORDER(pg) = order;
495 list_add_tail(&pg->list, &heap(node, zone, order));
497 spin_unlock(&heap_lock);
498 }
500 /*
501 * Hand the specified arbitrary page range to the specified heap zone
502 * checking the node_id of the previous page. If they differ and the
503 * latter is not on a MAX_ORDER boundary, then we reserve the page by
504 * not freeing it to the buddy allocator.
505 */
506 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
507 static void init_heap_pages(
508 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
509 {
510 unsigned int nid_curr, nid_prev;
511 unsigned long i;
513 ASSERT(zone < NR_ZONES);
515 if ( likely(page_to_mfn(pg) != 0) )
516 nid_prev = phys_to_nid(page_to_maddr(pg-1));
517 else
518 nid_prev = phys_to_nid(page_to_maddr(pg));
520 for ( i = 0; i < nr_pages; i++ )
521 {
522 nid_curr = phys_to_nid(page_to_maddr(pg+i));
524 if ( unlikely(!avail[nid_curr]) )
525 init_node_heap(nid_curr);
527 /*
528 * free pages of the same node, or if they differ, but are on a
529 * MAX_ORDER alignement boundary (which already get reserved)
530 */
531 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
532 MAX_ORDER_ALIGNED) )
533 free_heap_pages(zone, pg+i, 0);
534 else
535 printk("Reserving non-aligned node boundary @ mfn %lu\n",
536 page_to_mfn(pg+i));
538 nid_prev = nid_curr;
539 }
540 }
542 static unsigned long avail_heap_pages(
543 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
544 {
545 unsigned int i, zone, num_nodes = num_online_nodes();
546 unsigned long free_pages = 0;
548 if ( zone_hi >= NR_ZONES )
549 zone_hi = NR_ZONES - 1;
551 for ( i = 0; i < num_nodes; i++ )
552 {
553 if ( !avail[i] )
554 continue;
555 for ( zone = zone_lo; zone <= zone_hi; zone++ )
556 if ( (node == -1) || (node == i) )
557 free_pages += avail[i][zone];
558 }
560 return free_pages;
561 }
563 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
564 void __init end_boot_allocator(void)
565 {
566 unsigned long i;
567 int curr_free, next_free;
569 /* Pages that are free now go to the domain sub-allocator. */
570 if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
571 map_alloc(first_valid_mfn, 1);
572 for ( i = first_valid_mfn; i < max_page; i++ )
573 {
574 curr_free = next_free;
575 next_free = avail_for_domheap(i+1);
576 if ( next_free )
577 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
578 if ( curr_free )
579 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
580 }
582 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
583 }
584 #undef avail_for_domheap
586 /*
587 * Scrub all unallocated pages in all heap zones. This function is more
588 * convoluted than appears necessary because we do not want to continuously
589 * hold the lock while scrubbing very large memory areas.
590 */
591 void __init scrub_heap_pages(void)
592 {
593 void *p;
594 unsigned long mfn;
596 if ( !opt_bootscrub )
597 return;
599 printk("Scrubbing Free RAM: ");
601 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
602 {
603 process_pending_timers();
605 /* Quick lock-free check. */
606 if ( allocated_in_map(mfn) )
607 continue;
609 /* Every 100MB, print a progress dot. */
610 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
611 printk(".");
613 spin_lock(&heap_lock);
615 /* Re-check page status with lock held. */
616 if ( !allocated_in_map(mfn) )
617 {
618 if ( is_xen_heap_mfn(mfn) )
619 {
620 p = page_to_virt(mfn_to_page(mfn));
621 memguard_unguard_range(p, PAGE_SIZE);
622 scrub_page(p);
623 memguard_guard_range(p, PAGE_SIZE);
624 }
625 else
626 {
627 p = map_domain_page(mfn);
628 scrub_page(p);
629 unmap_domain_page(p);
630 }
631 }
633 spin_unlock(&heap_lock);
634 }
636 printk("done.\n");
637 }
641 /*************************
642 * XEN-HEAP SUB-ALLOCATOR
643 */
645 void init_xenheap_pages(paddr_t ps, paddr_t pe)
646 {
647 ps = round_pgup(ps);
648 pe = round_pgdown(pe);
649 if ( pe <= ps )
650 return;
652 memguard_guard_range(maddr_to_virt(ps), pe - ps);
654 /*
655 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
656 * prevent merging of power-of-two blocks across the zone boundary.
657 */
658 if ( ps && !is_xen_heap_mfn(paddr_to_pfn(ps)-1) )
659 ps += PAGE_SIZE;
660 if ( !is_xen_heap_mfn(paddr_to_pfn(pe)) )
661 pe -= PAGE_SIZE;
663 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
664 }
667 void *alloc_xenheap_pages(unsigned int order)
668 {
669 struct page_info *pg;
671 ASSERT(!in_irq());
673 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
674 if ( unlikely(pg == NULL) )
675 goto no_memory;
677 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
679 return page_to_virt(pg);
681 no_memory:
682 printk("Cannot handle page request order %d!\n", order);
683 return NULL;
684 }
687 void free_xenheap_pages(void *v, unsigned int order)
688 {
689 ASSERT(!in_irq());
691 if ( v == NULL )
692 return;
694 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
696 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
697 }
701 /*************************
702 * DOMAIN-HEAP SUB-ALLOCATOR
703 */
705 void init_domheap_pages(paddr_t ps, paddr_t pe)
706 {
707 unsigned long s_tot, e_tot;
708 unsigned int zone;
710 ASSERT(!in_irq());
712 s_tot = round_pgup(ps) >> PAGE_SHIFT;
713 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
715 zone = fls(s_tot);
716 BUG_ON(zone <= MEMZONE_XEN + 1);
717 for ( --zone; s_tot < e_tot; ++zone )
718 {
719 unsigned long end = e_tot;
721 BUILD_BUG_ON(NR_ZONES > BITS_PER_LONG);
722 if ( zone < BITS_PER_LONG - 1 && end > 1UL << (zone + 1) )
723 end = 1UL << (zone + 1);
724 init_heap_pages(zone, mfn_to_page(s_tot), end - s_tot);
725 s_tot = end;
726 }
727 }
730 int assign_pages(
731 struct domain *d,
732 struct page_info *pg,
733 unsigned int order,
734 unsigned int memflags)
735 {
736 unsigned long i;
738 spin_lock(&d->page_alloc_lock);
740 if ( unlikely(d->is_dying) )
741 {
742 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
743 d->domain_id);
744 goto fail;
745 }
747 if ( !(memflags & MEMF_no_refcount) )
748 {
749 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
750 {
751 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
752 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
753 goto fail;
754 }
756 if ( unlikely(d->tot_pages == 0) )
757 get_knownalive_domain(d);
759 d->tot_pages += 1 << order;
760 }
762 for ( i = 0; i < (1 << order); i++ )
763 {
764 ASSERT(page_get_owner(&pg[i]) == NULL);
765 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
766 page_set_owner(&pg[i], d);
767 wmb(); /* Domain pointer must be visible before updating refcnt. */
768 pg[i].count_info = PGC_allocated | 1;
769 list_add_tail(&pg[i].list, &d->page_list);
770 }
772 spin_unlock(&d->page_alloc_lock);
773 return 0;
775 fail:
776 spin_unlock(&d->page_alloc_lock);
777 return -1;
778 }
781 struct page_info *__alloc_domheap_pages(
782 struct domain *d, unsigned int cpu, unsigned int order,
783 unsigned int memflags)
784 {
785 struct page_info *pg = NULL;
786 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
788 ASSERT(!in_irq());
790 bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
791 if ( bits <= (PAGE_SHIFT + 1) )
792 return NULL;
794 bits -= PAGE_SHIFT + 1;
795 if ( bits < zone_hi )
796 zone_hi = bits;
798 if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
799 {
800 pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
802 /* Failure? Then check if we can fall back to the DMA pool. */
803 if ( unlikely(pg == NULL) &&
804 ((order > MAX_ORDER) ||
805 (avail_heap_pages(MEMZONE_XEN + 1,
806 dma_bitsize - PAGE_SHIFT - 1,
807 -1) <
808 (dma_emergency_pool_pages + (1UL << order)))) )
809 return NULL;
810 }
812 if ( (pg == NULL) &&
813 ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
814 cpu, order)) == NULL) )
815 return NULL;
817 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
818 {
819 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
820 return NULL;
821 }
823 return pg;
824 }
826 struct page_info *alloc_domheap_pages(
827 struct domain *d, unsigned int order, unsigned int flags)
828 {
829 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
830 }
832 void free_domheap_pages(struct page_info *pg, unsigned int order)
833 {
834 int i, drop_dom_ref;
835 struct domain *d = page_get_owner(pg);
837 ASSERT(!in_irq());
839 if ( unlikely(is_xen_heap_page(pg)) )
840 {
841 /* NB. May recursively lock from relinquish_memory(). */
842 spin_lock_recursive(&d->page_alloc_lock);
844 for ( i = 0; i < (1 << order); i++ )
845 list_del(&pg[i].list);
847 d->xenheap_pages -= 1 << order;
848 drop_dom_ref = (d->xenheap_pages == 0);
850 spin_unlock_recursive(&d->page_alloc_lock);
851 }
852 else if ( likely(d != NULL) )
853 {
854 /* NB. May recursively lock from relinquish_memory(). */
855 spin_lock_recursive(&d->page_alloc_lock);
857 for ( i = 0; i < (1 << order); i++ )
858 {
859 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
860 list_del(&pg[i].list);
861 }
863 d->tot_pages -= 1 << order;
864 drop_dom_ref = (d->tot_pages == 0);
866 spin_unlock_recursive(&d->page_alloc_lock);
868 if ( likely(!d->is_dying) )
869 {
870 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
871 }
872 else
873 {
874 /*
875 * Normally we expect a domain to clear pages before freeing them,
876 * if it cares about the secrecy of their contents. However, after
877 * a domain has died we assume responsibility for erasure.
878 */
879 for ( i = 0; i < (1 << order); i++ )
880 {
881 page_set_owner(&pg[i], NULL);
882 spin_lock(&page_scrub_lock);
883 list_add(&pg[i].list, &page_scrub_list);
884 scrub_pages++;
885 spin_unlock(&page_scrub_lock);
886 }
887 }
888 }
889 else
890 {
891 /* Freeing anonymous domain-heap pages. */
892 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
893 drop_dom_ref = 0;
894 }
896 if ( drop_dom_ref )
897 put_domain(d);
898 }
900 unsigned long avail_domheap_pages_region(
901 unsigned int node, unsigned int min_width, unsigned int max_width)
902 {
903 int zone_lo, zone_hi;
905 zone_lo = min_width ? (min_width - (PAGE_SHIFT + 1)) : (MEMZONE_XEN + 1);
906 zone_lo = max_t(int, MEMZONE_XEN + 1, zone_lo);
907 zone_lo = min_t(int, NR_ZONES - 1, zone_lo);
909 zone_hi = max_width ? (max_width - (PAGE_SHIFT + 1)) : (NR_ZONES - 1);
910 zone_hi = max_t(int, MEMZONE_XEN + 1, zone_hi);
911 zone_hi = min_t(int, NR_ZONES - 1, zone_hi);
913 return avail_heap_pages(zone_lo, zone_hi, node);
914 }
916 unsigned long avail_domheap_pages(void)
917 {
918 unsigned long avail_nrm, avail_dma;
920 avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
921 NR_ZONES - 1,
922 -1);
924 avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
925 dma_bitsize - PAGE_SHIFT - 1,
926 -1);
928 if ( avail_dma > dma_emergency_pool_pages )
929 avail_dma -= dma_emergency_pool_pages;
930 else
931 avail_dma = 0;
933 return avail_nrm + avail_dma;
934 }
936 static void pagealloc_keyhandler(unsigned char key)
937 {
938 unsigned int zone = MEMZONE_XEN;
939 unsigned long total = 0;
941 printk("Physical memory information:\n");
942 printk(" Xen heap: %lukB free\n",
943 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
945 while ( ++zone < NR_ZONES )
946 {
947 unsigned long n;
949 if ( zone == dma_bitsize - PAGE_SHIFT )
950 {
951 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
952 total = 0;
953 }
955 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
956 {
957 total += n;
958 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
959 }
960 }
962 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
963 }
966 static __init int pagealloc_keyhandler_init(void)
967 {
968 register_keyhandler('m', pagealloc_keyhandler, "memory info");
969 return 0;
970 }
971 __initcall(pagealloc_keyhandler_init);
975 /*************************
976 * PAGE SCRUBBING
977 */
979 static DEFINE_PER_CPU(struct timer, page_scrub_timer);
981 static void page_scrub_softirq(void)
982 {
983 struct list_head *ent;
984 struct page_info *pg;
985 void *p;
986 int i;
987 s_time_t start = NOW();
989 /* Aim to do 1ms of work every 10ms. */
990 do {
991 spin_lock(&page_scrub_lock);
993 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
994 {
995 spin_unlock(&page_scrub_lock);
996 return;
997 }
999 /* Peel up to 16 pages from the list. */
1000 for ( i = 0; i < 16; i++ )
1002 if ( ent->next == &page_scrub_list )
1003 break;
1004 ent = ent->next;
1007 /* Remove peeled pages from the list. */
1008 ent->next->prev = &page_scrub_list;
1009 page_scrub_list.next = ent->next;
1010 scrub_pages -= (i+1);
1012 spin_unlock(&page_scrub_lock);
1014 /* Working backwards, scrub each page in turn. */
1015 while ( ent != &page_scrub_list )
1017 pg = list_entry(ent, struct page_info, list);
1018 ent = ent->prev;
1019 p = map_domain_page(page_to_mfn(pg));
1020 scrub_page(p);
1021 unmap_domain_page(p);
1022 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
1024 } while ( (NOW() - start) < MILLISECS(1) );
1026 set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
1029 static void page_scrub_timer_fn(void *unused)
1031 page_scrub_schedule_work();
1034 unsigned long avail_scrub_pages(void)
1036 return scrub_pages;
1039 static void dump_heap(unsigned char key)
1041 s_time_t now = NOW();
1042 int i, j;
1044 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
1045 (u32)(now>>32), (u32)now);
1047 for ( i = 0; i < MAX_NUMNODES; i++ )
1049 if ( !avail[i] )
1050 continue;
1051 for ( j = 0; j < NR_ZONES; j++ )
1052 printk("heap[node=%d][zone=%d] -> %lu pages\n",
1053 i, j, avail[i][j]);
1057 static __init int register_heap_trigger(void)
1059 register_keyhandler('H', dump_heap, "dump heap info");
1060 return 0;
1062 __initcall(register_heap_trigger);
1065 static __init int page_scrub_init(void)
1067 int cpu;
1068 for_each_cpu ( cpu )
1069 init_timer(&per_cpu(page_scrub_timer, cpu),
1070 page_scrub_timer_fn, NULL, cpu);
1071 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1072 return 0;
1074 __initcall(page_scrub_init);
1076 /*
1077 * Local variables:
1078 * mode: C
1079 * c-set-style: "BSD"
1080 * c-basic-offset: 4
1081 * tab-width: 4
1082 * indent-tabs-mode: nil
1083 * End:
1084 */