ia64/xen-unstable

view xen/common/page_alloc.c @ 15757:256160ff19b7

Remove unused max_dma_mfn variable.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Aug 16 13:27:59 2007 +0100 (2007-08-16)
parents cb3e6fcb7f34
children 5b8730c78454
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/mm.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/domain_page.h>
34 #include <xen/keyhandler.h>
35 #include <xen/perfc.h>
36 #include <xen/numa.h>
37 #include <xen/nodemask.h>
38 #include <asm/page.h>
39 #include <asm/flushtlb.h>
41 /*
42 * Comma-separated list of hexadecimal page numbers containing bad bytes.
43 * e.g. 'badpage=0x3f45,0x8a321'.
44 */
45 static char opt_badpage[100] = "";
46 string_param("badpage", opt_badpage);
48 /*
49 * no-bootscrub -> Free pages are not zeroed during boot.
50 */
51 static int opt_bootscrub __initdata = 1;
52 boolean_param("bootscrub", opt_bootscrub);
54 /*
55 * Bit width of the DMA heap.
56 */
57 static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
58 static void __init parse_dma_bits(char *s)
59 {
60 unsigned int v = simple_strtol(s, NULL, 0);
61 if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
62 dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
63 else if ( v > PAGE_SHIFT + 1 )
64 dma_bitsize = v;
65 else
66 printk("Invalid dma_bits value of %u ignored.\n", v);
67 }
68 custom_param("dma_bits", parse_dma_bits);
70 /*
71 * Amount of memory to reserve in a low-memory (<4GB) pool for specific
72 * allocation requests. Ordinary requests will not fall back to the
73 * lowmem emergency pool.
74 */
75 static unsigned long dma_emergency_pool_pages;
76 static void __init parse_dma_emergency_pool(char *s)
77 {
78 unsigned long long bytes;
79 bytes = parse_size_and_unit(s, NULL);
80 dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
81 }
82 custom_param("dma_emergency_pool", parse_dma_emergency_pool);
84 #define round_pgdown(_p) ((_p)&PAGE_MASK)
85 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
87 #ifndef NDEBUG
88 /* Avoid callers relying on allocations returning zeroed pages. */
89 #define scrub_page(p) memset((p), 0xc2, PAGE_SIZE)
90 #else
91 /* For a production build, clear_page() is the fastest way to scrub. */
92 #define scrub_page(p) clear_page(p)
93 #endif
95 static DEFINE_SPINLOCK(page_scrub_lock);
96 LIST_HEAD(page_scrub_list);
97 static unsigned long scrub_pages;
99 /*********************
100 * ALLOCATION BITMAP
101 * One bit per page of memory. Bit set => page is allocated.
102 */
104 static unsigned long *alloc_bitmap;
105 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
107 #define allocated_in_map(_pn) \
108 ({ unsigned long ___pn = (_pn); \
109 !!(alloc_bitmap[___pn/PAGES_PER_MAPWORD] & \
110 (1UL<<(___pn&(PAGES_PER_MAPWORD-1)))); })
112 /*
113 * Hint regarding bitwise arithmetic in map_{alloc,free}:
114 * -(1<<n) sets all bits >= n.
115 * (1<<n)-1 sets all bits < n.
116 * Variable names in map_{alloc,free}:
117 * *_idx == Index into `alloc_bitmap' array.
118 * *_off == Bit offset within an element of the `alloc_bitmap' array.
119 */
121 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
122 {
123 unsigned long start_off, end_off, curr_idx, end_idx;
125 #ifndef NDEBUG
126 unsigned long i;
127 /* Check that the block isn't already allocated. */
128 for ( i = 0; i < nr_pages; i++ )
129 ASSERT(!allocated_in_map(first_page + i));
130 #endif
132 curr_idx = first_page / PAGES_PER_MAPWORD;
133 start_off = first_page & (PAGES_PER_MAPWORD-1);
134 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
135 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
137 if ( curr_idx == end_idx )
138 {
139 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
140 }
141 else
142 {
143 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
144 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
145 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
146 }
147 }
149 static void map_free(unsigned long first_page, unsigned long nr_pages)
150 {
151 unsigned long start_off, end_off, curr_idx, end_idx;
153 #ifndef NDEBUG
154 unsigned long i;
155 /* Check that the block isn't already freed. */
156 for ( i = 0; i < nr_pages; i++ )
157 ASSERT(allocated_in_map(first_page + i));
158 #endif
160 curr_idx = first_page / PAGES_PER_MAPWORD;
161 start_off = first_page & (PAGES_PER_MAPWORD-1);
162 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
163 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
165 if ( curr_idx == end_idx )
166 {
167 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
168 }
169 else
170 {
171 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
172 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
173 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
174 }
175 }
179 /*************************
180 * BOOT-TIME ALLOCATOR
181 */
183 static unsigned long first_valid_mfn = ~0UL;
185 /* Initialise allocator to handle up to @max_page pages. */
186 paddr_t __init init_boot_allocator(paddr_t bitmap_start)
187 {
188 unsigned long bitmap_size;
190 bitmap_start = round_pgup(bitmap_start);
192 /*
193 * Allocate space for the allocation bitmap. Include an extra longword
194 * of padding for possible overrun in map_alloc and map_free.
195 */
196 bitmap_size = max_page / 8;
197 bitmap_size += sizeof(unsigned long);
198 bitmap_size = round_pgup(bitmap_size);
199 alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);
201 /* All allocated by default. */
202 memset(alloc_bitmap, ~0, bitmap_size);
204 return bitmap_start + bitmap_size;
205 }
207 void __init init_boot_pages(paddr_t ps, paddr_t pe)
208 {
209 unsigned long bad_spfn, bad_epfn, i;
210 const char *p;
212 ps = round_pgup(ps);
213 pe = round_pgdown(pe);
214 if ( pe <= ps )
215 return;
217 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
219 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
221 /* Check new pages against the bad-page list. */
222 p = opt_badpage;
223 while ( *p != '\0' )
224 {
225 bad_spfn = simple_strtoul(p, &p, 0);
226 bad_epfn = bad_spfn;
228 if ( *p == '-' )
229 {
230 p++;
231 bad_epfn = simple_strtoul(p, &p, 0);
232 if ( bad_epfn < bad_spfn )
233 bad_epfn = bad_spfn;
234 }
236 if ( *p == ',' )
237 p++;
238 else if ( *p != '\0' )
239 break;
241 if ( bad_epfn == bad_spfn )
242 printk("Marking page %lx as bad\n", bad_spfn);
243 else
244 printk("Marking pages %lx through %lx as bad\n",
245 bad_spfn, bad_epfn);
247 for ( i = bad_spfn; i <= bad_epfn; i++ )
248 if ( (i < max_page) && !allocated_in_map(i) )
249 map_alloc(i, 1);
250 }
251 }
253 unsigned long __init alloc_boot_pages(
254 unsigned long nr_pfns, unsigned long pfn_align)
255 {
256 unsigned long pg, i;
258 /* Search backwards to obtain highest available range. */
259 for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
260 pg >= first_valid_mfn;
261 pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
262 {
263 for ( i = 0; i < nr_pfns; i++ )
264 if ( allocated_in_map(pg+i) )
265 break;
266 if ( i == nr_pfns )
267 {
268 map_alloc(pg, nr_pfns);
269 return pg;
270 }
271 }
273 return 0;
274 }
278 /*************************
279 * BINARY BUDDY ALLOCATOR
280 */
282 #define MEMZONE_XEN 0
283 #ifdef PADDR_BITS
284 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
285 #else
286 #define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT)
287 #endif
289 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
291 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
292 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
293 #define heap(node, zone, order) ((*_heap[node])[zone][order])
295 static unsigned long *avail[MAX_NUMNODES];
297 static DEFINE_SPINLOCK(heap_lock);
299 static void init_node_heap(int node)
300 {
301 /* First node to be discovered has its heap metadata statically alloced. */
302 static heap_by_zone_and_order_t _heap_static;
303 static unsigned long avail_static[NR_ZONES];
304 static unsigned long first_node_initialised;
306 int i, j;
308 if ( !test_and_set_bit(0, &first_node_initialised) )
309 {
310 _heap[node] = &_heap_static;
311 avail[node] = avail_static;
312 }
313 else
314 {
315 _heap[node] = xmalloc(heap_by_zone_and_order_t);
316 avail[node] = xmalloc_array(unsigned long, NR_ZONES);
317 BUG_ON(!_heap[node] || !avail[node]);
318 }
320 memset(avail[node], 0, NR_ZONES * sizeof(long));
322 for ( i = 0; i < NR_ZONES; i++ )
323 for ( j = 0; j <= MAX_ORDER; j++ )
324 INIT_LIST_HEAD(&(*_heap[node])[i][j]);
325 }
327 /* Allocate 2^@order contiguous pages. */
328 static struct page_info *alloc_heap_pages(
329 unsigned int zone_lo, unsigned int zone_hi,
330 unsigned int cpu, unsigned int order)
331 {
332 unsigned int i, j, zone;
333 unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
334 unsigned long request = 1UL << order;
335 cpumask_t extra_cpus_mask, mask;
336 struct page_info *pg;
338 ASSERT(node >= 0);
339 ASSERT(node < num_nodes);
340 ASSERT(zone_lo <= zone_hi);
341 ASSERT(zone_hi < NR_ZONES);
343 if ( unlikely(order > MAX_ORDER) )
344 return NULL;
346 spin_lock(&heap_lock);
348 /*
349 * Start with requested node, but exhaust all node memory in requested
350 * zone before failing, only calc new node value if we fail to find memory
351 * in target node, this avoids needless computation on fast-path.
352 */
353 for ( i = 0; i < num_nodes; i++ )
354 {
355 zone = zone_hi;
356 do {
357 /* Check if target node can support the allocation. */
358 if ( !avail[node] || (avail[node][zone] < request) )
359 continue;
361 /* Find smallest order which can satisfy the request. */
362 for ( j = order; j <= MAX_ORDER; j++ )
363 if ( !list_empty(&heap(node, zone, j)) )
364 goto found;
365 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
367 /* Pick next node, wrapping around if needed. */
368 if ( ++node == num_nodes )
369 node = 0;
370 }
372 /* No suitable memory blocks. Fail the request. */
373 spin_unlock(&heap_lock);
374 return NULL;
376 found:
377 pg = list_entry(heap(node, zone, j).next, struct page_info, list);
378 list_del(&pg->list);
380 /* We may have to halve the chunk a number of times. */
381 while ( j != order )
382 {
383 PFN_ORDER(pg) = --j;
384 list_add_tail(&pg->list, &heap(node, zone, j));
385 pg += 1 << j;
386 }
388 map_alloc(page_to_mfn(pg), request);
389 ASSERT(avail[node][zone] >= request);
390 avail[node][zone] -= request;
392 spin_unlock(&heap_lock);
394 cpus_clear(mask);
396 for ( i = 0; i < (1 << order); i++ )
397 {
398 /* Reference count must continuously be zero for free pages. */
399 BUG_ON(pg[i].count_info != 0);
401 /* Add in any extra CPUs that need flushing because of this page. */
402 cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
403 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
404 cpus_or(mask, mask, extra_cpus_mask);
406 /* Initialise fields which have other uses for free pages. */
407 pg[i].u.inuse.type_info = 0;
408 page_set_owner(&pg[i], NULL);
409 }
411 if ( unlikely(!cpus_empty(mask)) )
412 {
413 perfc_incr(need_flush_tlb_flush);
414 flush_tlb_mask(mask);
415 }
417 return pg;
418 }
420 /* Free 2^@order set of pages. */
421 static void free_heap_pages(
422 unsigned int zone, struct page_info *pg, unsigned int order)
423 {
424 unsigned long mask;
425 unsigned int i, node = phys_to_nid(page_to_maddr(pg));
426 struct domain *d;
428 ASSERT(zone < NR_ZONES);
429 ASSERT(order <= MAX_ORDER);
430 ASSERT(node >= 0);
431 ASSERT(node < num_online_nodes());
433 for ( i = 0; i < (1 << order); i++ )
434 {
435 /*
436 * Cannot assume that count_info == 0, as there are some corner cases
437 * where it isn't the case and yet it isn't a bug:
438 * 1. page_get_owner() is NULL
439 * 2. page_get_owner() is a domain that was never accessible by
440 * its domid (e.g., failed to fully construct the domain).
441 * 3. page was never addressable by the guest (e.g., it's an
442 * auto-translate-physmap guest and the page was never included
443 * in its pseudophysical address space).
444 * In all the above cases there can be no guest mappings of this page.
445 */
446 pg[i].count_info = 0;
448 if ( (d = page_get_owner(&pg[i])) != NULL )
449 {
450 pg[i].tlbflush_timestamp = tlbflush_current_time();
451 pg[i].u.free.cpumask = d->domain_dirty_cpumask;
452 }
453 else
454 {
455 cpus_clear(pg[i].u.free.cpumask);
456 }
457 }
459 spin_lock(&heap_lock);
461 map_free(page_to_mfn(pg), 1 << order);
462 avail[node][zone] += 1 << order;
464 /* Merge chunks as far as possible. */
465 while ( order < MAX_ORDER )
466 {
467 mask = 1UL << order;
469 if ( (page_to_mfn(pg) & mask) )
470 {
471 /* Merge with predecessor block? */
472 if ( allocated_in_map(page_to_mfn(pg)-mask) ||
473 (PFN_ORDER(pg-mask) != order) )
474 break;
475 list_del(&(pg-mask)->list);
476 pg -= mask;
477 }
478 else
479 {
480 /* Merge with successor block? */
481 if ( allocated_in_map(page_to_mfn(pg)+mask) ||
482 (PFN_ORDER(pg+mask) != order) )
483 break;
484 list_del(&(pg+mask)->list);
485 }
487 order++;
489 /* After merging, pg should remain in the same node. */
490 ASSERT(phys_to_nid(page_to_maddr(pg)) == node);
491 }
493 PFN_ORDER(pg) = order;
494 list_add_tail(&pg->list, &heap(node, zone, order));
496 spin_unlock(&heap_lock);
497 }
499 /*
500 * Hand the specified arbitrary page range to the specified heap zone
501 * checking the node_id of the previous page. If they differ and the
502 * latter is not on a MAX_ORDER boundary, then we reserve the page by
503 * not freeing it to the buddy allocator.
504 */
505 #define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
506 void init_heap_pages(
507 unsigned int zone, struct page_info *pg, unsigned long nr_pages)
508 {
509 unsigned int nid_curr, nid_prev;
510 unsigned long i;
512 ASSERT(zone < NR_ZONES);
514 if ( likely(page_to_mfn(pg) != 0) )
515 nid_prev = phys_to_nid(page_to_maddr(pg-1));
516 else
517 nid_prev = phys_to_nid(page_to_maddr(pg));
519 for ( i = 0; i < nr_pages; i++ )
520 {
521 nid_curr = phys_to_nid(page_to_maddr(pg+i));
523 if ( unlikely(!avail[nid_curr]) )
524 init_node_heap(nid_curr);
526 /*
527 * free pages of the same node, or if they differ, but are on a
528 * MAX_ORDER alignement boundary (which already get reserved)
529 */
530 if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
531 MAX_ORDER_ALIGNED) )
532 free_heap_pages(zone, pg+i, 0);
533 else
534 printk("Reserving non-aligned node boundary @ mfn %lu\n",
535 page_to_mfn(pg+i));
537 nid_prev = nid_curr;
538 }
539 }
541 static unsigned long avail_heap_pages(
542 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
543 {
544 unsigned int i, zone, num_nodes = num_online_nodes();
545 unsigned long free_pages = 0;
547 if ( zone_hi >= NR_ZONES )
548 zone_hi = NR_ZONES - 1;
550 for ( i = 0; i < num_nodes; i++ )
551 {
552 if ( !avail[i] )
553 continue;
554 for ( zone = zone_lo; zone <= zone_hi; zone++ )
555 if ( (node == -1) || (node == i) )
556 free_pages += avail[i][zone];
557 }
559 return free_pages;
560 }
562 #define avail_for_domheap(mfn) \
563 (!allocated_in_map(mfn) && !is_xen_heap_frame(mfn_to_page(mfn)))
564 void __init end_boot_allocator(void)
565 {
566 unsigned long i;
567 int curr_free, next_free;
569 /* Pages that are free now go to the domain sub-allocator. */
570 if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
571 map_alloc(first_valid_mfn, 1);
572 for ( i = first_valid_mfn; i < max_page; i++ )
573 {
574 curr_free = next_free;
575 next_free = avail_for_domheap(i+1);
576 if ( next_free )
577 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
578 if ( curr_free )
579 init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
580 }
582 printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
583 }
584 #undef avail_for_domheap
586 /*
587 * Scrub all unallocated pages in all heap zones. This function is more
588 * convoluted than appears necessary because we do not want to continuously
589 * hold the lock while scrubbing very large memory areas.
590 */
591 void __init scrub_heap_pages(void)
592 {
593 void *p;
594 unsigned long mfn;
596 if ( !opt_bootscrub )
597 return;
599 printk("Scrubbing Free RAM: ");
601 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
602 {
603 process_pending_timers();
605 /* Quick lock-free check. */
606 if ( allocated_in_map(mfn) )
607 continue;
609 /* Every 100MB, print a progress dot. */
610 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
611 printk(".");
613 spin_lock(&heap_lock);
615 /* Re-check page status with lock held. */
616 if ( !allocated_in_map(mfn) )
617 {
618 if ( is_xen_heap_frame(mfn_to_page(mfn)) )
619 {
620 p = page_to_virt(mfn_to_page(mfn));
621 memguard_unguard_range(p, PAGE_SIZE);
622 scrub_page(p);
623 memguard_guard_range(p, PAGE_SIZE);
624 }
625 else
626 {
627 p = map_domain_page(mfn);
628 scrub_page(p);
629 unmap_domain_page(p);
630 }
631 }
633 spin_unlock(&heap_lock);
634 }
636 printk("done.\n");
637 }
641 /*************************
642 * XEN-HEAP SUB-ALLOCATOR
643 */
645 void init_xenheap_pages(paddr_t ps, paddr_t pe)
646 {
647 ps = round_pgup(ps);
648 pe = round_pgdown(pe);
649 if ( pe <= ps )
650 return;
652 memguard_guard_range(maddr_to_virt(ps), pe - ps);
654 /*
655 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
656 * prevent merging of power-of-two blocks across the zone boundary.
657 */
658 if ( ps && !is_xen_heap_frame(maddr_to_page(ps)-1) )
659 ps += PAGE_SIZE;
660 if ( !is_xen_heap_frame(maddr_to_page(pe)) )
661 pe -= PAGE_SIZE;
663 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
664 }
667 void *alloc_xenheap_pages(unsigned int order)
668 {
669 struct page_info *pg;
671 ASSERT(!in_irq());
673 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
674 if ( unlikely(pg == NULL) )
675 goto no_memory;
677 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
679 return page_to_virt(pg);
681 no_memory:
682 printk("Cannot handle page request order %d!\n", order);
683 return NULL;
684 }
687 void free_xenheap_pages(void *v, unsigned int order)
688 {
689 ASSERT(!in_irq());
691 if ( v == NULL )
692 return;
694 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
696 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
697 }
701 /*************************
702 * DOMAIN-HEAP SUB-ALLOCATOR
703 */
705 void init_domheap_pages(paddr_t ps, paddr_t pe)
706 {
707 unsigned long s_tot, e_tot;
708 unsigned int zone;
710 ASSERT(!in_irq());
712 s_tot = round_pgup(ps) >> PAGE_SHIFT;
713 e_tot = round_pgdown(pe) >> PAGE_SHIFT;
715 zone = fls(s_tot);
716 BUG_ON(zone <= MEMZONE_XEN + 1);
717 for ( --zone; s_tot < e_tot; ++zone )
718 {
719 unsigned long end = e_tot;
721 BUILD_BUG_ON(NR_ZONES > BITS_PER_LONG);
722 if ( zone < BITS_PER_LONG - 1 && end > 1UL << (zone + 1) )
723 end = 1UL << (zone + 1);
724 init_heap_pages(zone, mfn_to_page(s_tot), end - s_tot);
725 s_tot = end;
726 }
727 }
730 int assign_pages(
731 struct domain *d,
732 struct page_info *pg,
733 unsigned int order,
734 unsigned int memflags)
735 {
736 unsigned long i;
738 spin_lock(&d->page_alloc_lock);
740 if ( unlikely(d->is_dying) )
741 {
742 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
743 d->domain_id);
744 goto fail;
745 }
747 if ( !(memflags & MEMF_no_refcount) )
748 {
749 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
750 {
751 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
752 d->domain_id, d->tot_pages + (1 << order), d->max_pages);
753 goto fail;
754 }
756 if ( unlikely(d->tot_pages == 0) )
757 get_knownalive_domain(d);
759 d->tot_pages += 1 << order;
760 }
762 for ( i = 0; i < (1 << order); i++ )
763 {
764 ASSERT(page_get_owner(&pg[i]) == NULL);
765 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
766 page_set_owner(&pg[i], d);
767 wmb(); /* Domain pointer must be visible before updating refcnt. */
768 pg[i].count_info = PGC_allocated | 1;
769 list_add_tail(&pg[i].list, &d->page_list);
770 }
772 spin_unlock(&d->page_alloc_lock);
773 return 0;
775 fail:
776 spin_unlock(&d->page_alloc_lock);
777 return -1;
778 }
781 struct page_info *__alloc_domheap_pages(
782 struct domain *d, unsigned int cpu, unsigned int order,
783 unsigned int memflags)
784 {
785 struct page_info *pg = NULL;
786 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
788 ASSERT(!in_irq());
790 if ( bits )
791 {
792 bits = domain_clamp_alloc_bitsize(d, bits);
793 if ( bits <= (PAGE_SHIFT + 1) )
794 return NULL;
795 bits -= PAGE_SHIFT + 1;
796 if ( bits < zone_hi )
797 zone_hi = bits;
798 }
800 if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
801 {
802 pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
804 /* Failure? Then check if we can fall back to the DMA pool. */
805 if ( unlikely(pg == NULL) &&
806 ((order > MAX_ORDER) ||
807 (avail_heap_pages(MEMZONE_XEN + 1,
808 dma_bitsize - PAGE_SHIFT - 1,
809 -1) <
810 (dma_emergency_pool_pages + (1UL << order)))) )
811 return NULL;
812 }
814 if ( (pg == NULL) &&
815 ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
816 cpu, order)) == NULL) )
817 return NULL;
819 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
820 {
821 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
822 return NULL;
823 }
825 return pg;
826 }
828 struct page_info *alloc_domheap_pages(
829 struct domain *d, unsigned int order, unsigned int flags)
830 {
831 return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
832 }
834 void free_domheap_pages(struct page_info *pg, unsigned int order)
835 {
836 int i, drop_dom_ref;
837 struct domain *d = page_get_owner(pg);
839 ASSERT(!in_irq());
841 if ( unlikely(is_xen_heap_frame(pg)) )
842 {
843 /* NB. May recursively lock from relinquish_memory(). */
844 spin_lock_recursive(&d->page_alloc_lock);
846 for ( i = 0; i < (1 << order); i++ )
847 list_del(&pg[i].list);
849 d->xenheap_pages -= 1 << order;
850 drop_dom_ref = (d->xenheap_pages == 0);
852 spin_unlock_recursive(&d->page_alloc_lock);
853 }
854 else if ( likely(d != NULL) )
855 {
856 /* NB. May recursively lock from relinquish_memory(). */
857 spin_lock_recursive(&d->page_alloc_lock);
859 for ( i = 0; i < (1 << order); i++ )
860 {
861 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
862 list_del(&pg[i].list);
863 }
865 d->tot_pages -= 1 << order;
866 drop_dom_ref = (d->tot_pages == 0);
868 spin_unlock_recursive(&d->page_alloc_lock);
870 if ( likely(!d->is_dying) )
871 {
872 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
873 }
874 else
875 {
876 /*
877 * Normally we expect a domain to clear pages before freeing them,
878 * if it cares about the secrecy of their contents. However, after
879 * a domain has died we assume responsibility for erasure.
880 */
881 for ( i = 0; i < (1 << order); i++ )
882 {
883 page_set_owner(&pg[i], NULL);
884 spin_lock(&page_scrub_lock);
885 list_add(&pg[i].list, &page_scrub_list);
886 scrub_pages++;
887 spin_unlock(&page_scrub_lock);
888 }
889 }
890 }
891 else
892 {
893 /* Freeing anonymous domain-heap pages. */
894 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
895 drop_dom_ref = 0;
896 }
898 if ( drop_dom_ref )
899 put_domain(d);
900 }
902 unsigned long avail_domheap_pages_region(
903 unsigned int node, unsigned int min_width, unsigned int max_width)
904 {
905 int zone_lo, zone_hi;
907 zone_lo = min_width ? (min_width - (PAGE_SHIFT + 1)) : (MEMZONE_XEN + 1);
908 zone_lo = max_t(int, MEMZONE_XEN + 1, zone_lo);
909 zone_lo = min_t(int, NR_ZONES - 1, zone_lo);
911 zone_hi = max_width ? (max_width - (PAGE_SHIFT + 1)) : (NR_ZONES - 1);
912 zone_hi = max_t(int, MEMZONE_XEN + 1, zone_hi);
913 zone_hi = min_t(int, NR_ZONES - 1, zone_hi);
915 return avail_heap_pages(zone_lo, zone_hi, node);
916 }
918 unsigned long avail_domheap_pages(void)
919 {
920 unsigned long avail_nrm, avail_dma;
922 avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
923 NR_ZONES - 1,
924 -1);
926 avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
927 dma_bitsize - PAGE_SHIFT - 1,
928 -1);
930 if ( avail_dma > dma_emergency_pool_pages )
931 avail_dma -= dma_emergency_pool_pages;
932 else
933 avail_dma = 0;
935 return avail_nrm + avail_dma;
936 }
938 static void pagealloc_keyhandler(unsigned char key)
939 {
940 unsigned int zone = MEMZONE_XEN;
941 unsigned long total = 0;
943 printk("Physical memory information:\n");
944 printk(" Xen heap: %lukB free\n",
945 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
947 while ( ++zone < NR_ZONES )
948 {
949 unsigned long n;
951 if ( zone == dma_bitsize - PAGE_SHIFT )
952 {
953 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
954 total = 0;
955 }
957 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
958 {
959 total += n;
960 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
961 }
962 }
964 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
965 }
968 static __init int pagealloc_keyhandler_init(void)
969 {
970 register_keyhandler('m', pagealloc_keyhandler, "memory info");
971 return 0;
972 }
973 __initcall(pagealloc_keyhandler_init);
977 /*************************
978 * PAGE SCRUBBING
979 */
981 static DEFINE_PER_CPU(struct timer, page_scrub_timer);
983 static void page_scrub_softirq(void)
984 {
985 struct list_head *ent;
986 struct page_info *pg;
987 void *p;
988 int i;
989 s_time_t start = NOW();
991 /* Aim to do 1ms of work every 10ms. */
992 do {
993 spin_lock(&page_scrub_lock);
995 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
996 {
997 spin_unlock(&page_scrub_lock);
998 return;
999 }
1001 /* Peel up to 16 pages from the list. */
1002 for ( i = 0; i < 16; i++ )
1004 if ( ent->next == &page_scrub_list )
1005 break;
1006 ent = ent->next;
1009 /* Remove peeled pages from the list. */
1010 ent->next->prev = &page_scrub_list;
1011 page_scrub_list.next = ent->next;
1012 scrub_pages -= (i+1);
1014 spin_unlock(&page_scrub_lock);
1016 /* Working backwards, scrub each page in turn. */
1017 while ( ent != &page_scrub_list )
1019 pg = list_entry(ent, struct page_info, list);
1020 ent = ent->prev;
1021 p = map_domain_page(page_to_mfn(pg));
1022 scrub_page(p);
1023 unmap_domain_page(p);
1024 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
1026 } while ( (NOW() - start) < MILLISECS(1) );
1028 set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
1031 static void page_scrub_timer_fn(void *unused)
1033 page_scrub_schedule_work();
1036 unsigned long avail_scrub_pages(void)
1038 return scrub_pages;
1041 static void dump_heap(unsigned char key)
1043 s_time_t now = NOW();
1044 int i, j;
1046 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
1047 (u32)(now>>32), (u32)now);
1049 for ( i = 0; i < MAX_NUMNODES; i++ )
1051 if ( !avail[i] )
1052 continue;
1053 for ( j = 0; j < NR_ZONES; j++ )
1054 printk("heap[node=%d][zone=%d] -> %lu pages\n",
1055 i, j, avail[i][j]);
1059 static __init int register_heap_trigger(void)
1061 register_keyhandler('H', dump_heap, "dump heap info");
1062 return 0;
1064 __initcall(register_heap_trigger);
1067 static __init int page_scrub_init(void)
1069 int cpu;
1070 for_each_cpu ( cpu )
1071 init_timer(&per_cpu(page_scrub_timer, cpu),
1072 page_scrub_timer_fn, NULL, cpu);
1073 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
1074 return 0;
1076 __initcall(page_scrub_init);
1078 /*
1079 * Local variables:
1080 * mode: C
1081 * c-set-style: "BSD"
1082 * c-basic-offset: 4
1083 * tab-width: 4
1084 * indent-tabs-mode: nil
1085 * End:
1086 */