ia64/xen-unstable

view xen/arch/x86/mm/p2m.c @ 19801:c3a94ac4c031

P2M: check whether hap mode is enabled before using 2mb pages

This small patch checks whether hap mode is enabled when guest is
trying to allocate 2MB pages inside P2M. This prevents potential
errors when hap is disabled.

Cc: Chris Lalancette <clalance@redhat.com>
Cc: Sarathy, Bhavna <Bhavna.Sarathy@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 19 08:42:58 2009 +0100 (2009-06-19)
parents 6e286d08c4a9
children
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /*
38 * The P2M lock. This protects all updates to the p2m table.
39 * Updates are expected to be safe against concurrent reads,
40 * which do *not* require the lock.
41 *
42 * Locking discipline: always acquire this lock before the shadow or HAP one
43 */
45 #define p2m_lock_init(_p2m) \
46 do { \
47 spin_lock_init(&(_p2m)->lock); \
48 (_p2m)->locker = -1; \
49 (_p2m)->locker_function = "nobody"; \
50 } while (0)
52 #define p2m_lock(_p2m) \
53 do { \
54 if ( unlikely((_p2m)->locker == current->processor) ) \
55 { \
56 printk("Error: p2m lock held by %s\n", \
57 (_p2m)->locker_function); \
58 BUG(); \
59 } \
60 spin_lock(&(_p2m)->lock); \
61 ASSERT((_p2m)->locker == -1); \
62 (_p2m)->locker = current->processor; \
63 (_p2m)->locker_function = __func__; \
64 } while (0)
66 #define p2m_unlock(_p2m) \
67 do { \
68 ASSERT((_p2m)->locker == current->processor); \
69 (_p2m)->locker = -1; \
70 (_p2m)->locker_function = "nobody"; \
71 spin_unlock(&(_p2m)->lock); \
72 } while (0)
74 #define p2m_locked_by_me(_p2m) \
75 (current->processor == (_p2m)->locker)
77 /* Printouts */
78 #define P2M_PRINTK(_f, _a...) \
79 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
80 #define P2M_ERROR(_f, _a...) \
81 printk("pg error: %s(): " _f, __func__, ##_a)
82 #if P2M_DEBUGGING
83 #define P2M_DEBUG(_f, _a...) \
84 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
85 #else
86 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
87 #endif
90 /* Override macros from asm/page.h to make them work with mfn_t */
91 #undef mfn_to_page
92 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
93 #undef mfn_valid
94 #define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
95 #undef page_to_mfn
96 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
99 /* PTE flags for the various types of p2m entry */
100 #define P2M_BASE_FLAGS \
101 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
103 static unsigned long p2m_type_to_flags(p2m_type_t t)
104 {
105 unsigned long flags = (t & 0x7UL) << 9;
106 switch(t)
107 {
108 case p2m_invalid:
109 default:
110 return flags;
111 case p2m_ram_rw:
112 return flags | P2M_BASE_FLAGS | _PAGE_RW;
113 case p2m_ram_logdirty:
114 return flags | P2M_BASE_FLAGS;
115 case p2m_ram_ro:
116 return flags | P2M_BASE_FLAGS;
117 case p2m_mmio_dm:
118 return flags;
119 case p2m_mmio_direct:
120 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
121 case p2m_populate_on_demand:
122 return flags;
123 }
124 }
126 #if P2M_AUDIT
127 static void audit_p2m(struct domain *d);
128 #else
129 # define audit_p2m(_d) do { (void)(_d); } while(0)
130 #endif /* P2M_AUDIT */
132 // Find the next level's P2M entry, checking for out-of-range gfn's...
133 // Returns NULL on error.
134 //
135 static l1_pgentry_t *
136 p2m_find_entry(void *table, unsigned long *gfn_remainder,
137 unsigned long gfn, u32 shift, u32 max)
138 {
139 u32 index;
141 index = *gfn_remainder >> shift;
142 if ( index >= max )
143 {
144 P2M_DEBUG("gfn=0x%lx out of range "
145 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
146 gfn, *gfn_remainder, shift, index, max);
147 return NULL;
148 }
149 *gfn_remainder &= (1 << shift) - 1;
150 return (l1_pgentry_t *)table + index;
151 }
153 // Walk one level of the P2M table, allocating a new table if required.
154 // Returns 0 on error.
155 //
156 static int
157 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
158 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
159 u32 max, unsigned long type)
160 {
161 l1_pgentry_t *l1_entry;
162 l1_pgentry_t *p2m_entry;
163 l1_pgentry_t new_entry;
164 void *next;
165 int i;
166 ASSERT(d->arch.p2m->alloc_page);
168 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
169 shift, max)) )
170 return 0;
172 /* PoD: Not present doesn't imply empty. */
173 if ( !l1e_get_flags(*p2m_entry) )
174 {
175 struct page_info *pg = d->arch.p2m->alloc_page(d);
176 if ( pg == NULL )
177 return 0;
178 page_list_add_tail(pg, &d->arch.p2m->pages);
179 pg->u.inuse.type_info = type | 1 | PGT_validated;
180 pg->count_info |= 1;
182 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
183 __PAGE_HYPERVISOR|_PAGE_USER);
185 switch ( type ) {
186 case PGT_l3_page_table:
187 paging_write_p2m_entry(d, gfn,
188 p2m_entry, *table_mfn, new_entry, 4);
189 break;
190 case PGT_l2_page_table:
191 #if CONFIG_PAGING_LEVELS == 3
192 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
193 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
194 #endif
195 paging_write_p2m_entry(d, gfn,
196 p2m_entry, *table_mfn, new_entry, 3);
197 break;
198 case PGT_l1_page_table:
199 paging_write_p2m_entry(d, gfn,
200 p2m_entry, *table_mfn, new_entry, 2);
201 break;
202 default:
203 BUG();
204 break;
205 }
206 }
208 ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
210 /* split single large page into 4KB page in P2M table */
211 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
212 {
213 unsigned long flags, pfn;
214 struct page_info *pg = d->arch.p2m->alloc_page(d);
215 if ( pg == NULL )
216 return 0;
217 page_list_add_tail(pg, &d->arch.p2m->pages);
218 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
219 pg->count_info |= 1;
221 /* New splintered mappings inherit the flags of the old superpage,
222 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
223 flags = l1e_get_flags(*p2m_entry);
224 pfn = l1e_get_pfn(*p2m_entry);
225 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
226 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
227 else
228 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
230 l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
231 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
232 {
233 new_entry = l1e_from_pfn(pfn + i, flags);
234 paging_write_p2m_entry(d, gfn,
235 l1_entry+i, *table_mfn, new_entry, 1);
236 }
237 unmap_domain_page(l1_entry);
239 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
240 __PAGE_HYPERVISOR|_PAGE_USER);
241 paging_write_p2m_entry(d, gfn,
242 p2m_entry, *table_mfn, new_entry, 2);
243 }
245 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
246 next = map_domain_page(mfn_x(*table_mfn));
247 unmap_domain_page(*table);
248 *table = next;
250 return 1;
251 }
253 /*
254 * Populate-on-demand functionality
255 */
256 static
257 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
258 unsigned int page_order, p2m_type_t p2mt);
260 int
261 p2m_pod_cache_add(struct domain *d,
262 struct page_info *page,
263 unsigned long order)
264 {
265 int i;
266 struct page_info *p;
267 struct p2m_domain *p2md = d->arch.p2m;
269 #ifndef NDEBUG
270 mfn_t mfn;
272 mfn = page_to_mfn(page);
274 /* Check to make sure this is a contiguous region */
275 if( mfn_x(mfn) & ((1 << order) - 1) )
276 {
277 printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
278 __func__, mfn_x(mfn), order, ((1UL << order) - 1));
279 return -1;
280 }
282 for(i=0; i < 1 << order ; i++) {
283 struct domain * od;
285 p = mfn_to_page(_mfn(mfn_x(mfn) + i));
286 od = page_get_owner(p);
287 if(od != d)
288 {
289 printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
290 __func__, mfn_x(mfn), d->domain_id,
291 od?od->domain_id:-1);
292 return -1;
293 }
294 }
295 #endif
297 spin_lock(&d->page_alloc_lock);
299 /* First, take all pages off the domain list */
300 for(i=0; i < 1 << order ; i++)
301 {
302 p = page + i;
303 page_list_del(p, &d->page_list);
304 }
306 /* Then add the first one to the appropriate populate-on-demand list */
307 switch(order)
308 {
309 case 9:
310 page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
311 p2md->pod.count += 1 << order;
312 break;
313 case 0:
314 page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
315 p2md->pod.count += 1 ;
316 break;
317 default:
318 BUG();
319 }
321 spin_unlock(&d->page_alloc_lock);
323 return 0;
324 }
326 /* Get a page of size order from the populate-on-demand cache. Will break
327 * down 2-meg pages into singleton pages automatically. Returns null if
328 * a superpage is requested and no superpages are available. Must be called
329 * with the d->page_lock held. */
330 static struct page_info * p2m_pod_cache_get(struct domain *d,
331 unsigned long order)
332 {
333 struct p2m_domain *p2md = d->arch.p2m;
334 struct page_info *p = NULL;
335 int i;
337 if ( order == 9 && page_list_empty(&p2md->pod.super) )
338 {
339 return NULL;
340 }
341 else if ( order == 0 && page_list_empty(&p2md->pod.single) )
342 {
343 unsigned long mfn;
344 struct page_info *q;
346 BUG_ON( page_list_empty(&p2md->pod.super) );
348 /* Break up a superpage to make single pages. NB count doesn't
349 * need to be adjusted. */
350 printk("%s: Breaking up superpage.\n", __func__);
351 p = page_list_remove_head(&p2md->pod.super);
352 mfn = mfn_x(page_to_mfn(p));
354 for ( i=0; i<(1<<9); i++ )
355 {
356 q = mfn_to_page(_mfn(mfn+i));
357 page_list_add_tail(q, &p2md->pod.single);
358 }
359 }
361 switch ( order )
362 {
363 case 9:
364 BUG_ON( page_list_empty(&p2md->pod.super) );
365 p = page_list_remove_head(&p2md->pod.super);
366 p2md->pod.count -= 1 << order; /* Lock: page_alloc */
367 break;
368 case 0:
369 BUG_ON( page_list_empty(&p2md->pod.single) );
370 p = page_list_remove_head(&p2md->pod.single);
371 p2md->pod.count -= 1;
372 break;
373 default:
374 BUG();
375 }
377 /* Put the pages back on the domain page_list */
378 for ( i = 0 ; i < (1 << order) ; i++ )
379 {
380 BUG_ON(page_get_owner(p + i) != d);
381 page_list_add_tail(p + i, &d->page_list);
382 }
384 return p;
385 }
387 /* Set the size of the cache, allocating or freeing as necessary. */
388 static int
389 p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
390 {
391 struct p2m_domain *p2md = d->arch.p2m;
392 int ret = 0;
394 /* Increasing the target */
395 while ( pod_target > p2md->pod.count )
396 {
397 struct page_info * page;
398 int order;
400 if ( (pod_target - p2md->pod.count) >= (1>>9) )
401 order = 9;
402 else
403 order = 0;
405 page = alloc_domheap_pages(d, order, 0);
406 if ( unlikely(page == NULL) )
407 goto out;
409 p2m_pod_cache_add(d, page, order);
410 }
412 /* Decreasing the target */
413 /* We hold the p2m lock here, so we don't need to worry about
414 * cache disappearing under our feet. */
415 while ( pod_target < p2md->pod.count )
416 {
417 struct page_info * page;
418 int order, i;
420 /* Grab the lock before checking that pod.super is empty, or the last
421 * entries may disappear before we grab the lock. */
422 spin_lock(&d->page_alloc_lock);
424 if ( (p2md->pod.count - pod_target) > (1>>9)
425 && !page_list_empty(&p2md->pod.super) )
426 order = 9;
427 else
428 order = 0;
430 page = p2m_pod_cache_get(d, order);
432 ASSERT(page != NULL);
434 spin_unlock(&d->page_alloc_lock);
436 /* Then free them */
437 for ( i = 0 ; i < (1 << order) ; i++ )
438 {
439 /* Copied from common/memory.c:guest_remove_page() */
440 if ( unlikely(!get_page(page+i, d)) )
441 {
442 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
443 ret = -EINVAL;
444 goto out;
445 }
447 if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
448 put_page_and_type(page+i);
450 if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
451 put_page(page+i);
453 put_page(page+i);
454 }
455 }
457 out:
458 return ret;
459 }
461 /*
462 * The "right behavior" here requires some careful thought. First, some
463 * definitions:
464 * + M: static_max
465 * + B: number of pages the balloon driver has ballooned down to.
466 * + P: Number of populated pages.
467 * + T: Old target
468 * + T': New target
469 *
470 * The following equations should hold:
471 * 0 <= P <= T <= B <= M
472 * d->arch.p2m->pod.entry_count == B - P
473 * d->tot_pages == P + d->arch.p2m->pod.count
474 *
475 * Now we have the following potential cases to cover:
476 * B <T': Set the PoD cache size equal to the number of outstanding PoD
477 * entries. The balloon driver will deflate the balloon to give back
478 * the remainder of the ram to the guest OS.
479 * T <T'<B : Increase PoD cache size.
480 * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
481 * get the memory right away. However, that means every time we
482 * reduce the memory target we risk the guest attempting to populate the
483 * memory before the balloon driver has reached its new target. Safer to
484 * never reduce the cache size here, but only when the balloon driver frees
485 * PoD ranges.
486 *
487 * If there are many zero pages, we could reach the target also by doing
488 * zero sweeps and marking the ranges PoD; but the balloon driver will have
489 * to free this memory eventually anyway, so we don't actually gain that much
490 * by doing so.
491 *
492 * NB that the equation (B<T') may require adjustment to the cache
493 * size as PoD pages are freed as well; i.e., freeing a PoD-backed
494 * entry when pod.entry_count == pod.count requires us to reduce both
495 * pod.entry_count and pod.count.
496 */
497 int
498 p2m_pod_set_mem_target(struct domain *d, unsigned long target)
499 {
500 unsigned pod_target;
501 struct p2m_domain *p2md = d->arch.p2m;
502 int ret = 0;
503 unsigned long populated;
505 /* P == B: Nothing to do. */
506 if ( p2md->pod.entry_count == 0 )
507 goto out;
509 /* T' < B: Don't reduce the cache size; let the balloon driver
510 * take care of it. */
511 if ( target < d->tot_pages )
512 goto out;
514 populated = d->tot_pages - p2md->pod.count;
516 pod_target = target - populated;
518 /* B < T': Set the cache size equal to # of outstanding entries,
519 * let the balloon driver fill in the rest. */
520 if ( pod_target > p2md->pod.entry_count )
521 pod_target = p2md->pod.entry_count;
523 ASSERT( pod_target > p2md->pod.count );
525 ret = p2m_pod_set_cache_target(d, pod_target);
527 out:
528 return ret;
529 }
531 void
532 p2m_pod_empty_cache(struct domain *d)
533 {
534 struct p2m_domain *p2md = d->arch.p2m;
535 struct page_info *page;
537 spin_lock(&d->page_alloc_lock);
539 while ( (page = page_list_remove_head(&p2md->pod.super)) )
540 {
541 int i;
543 for ( i = 0 ; i < (1 << 9) ; i++ )
544 {
545 BUG_ON(page_get_owner(page + i) != d);
546 page_list_add_tail(page + i, &d->page_list);
547 }
549 p2md->pod.count -= 1<<9;
550 }
552 while ( (page = page_list_remove_head(&p2md->pod.single)) )
553 {
554 BUG_ON(page_get_owner(page) != d);
555 page_list_add_tail(page, &d->page_list);
557 p2md->pod.count -= 1;
558 }
560 BUG_ON(p2md->pod.count != 0);
562 spin_unlock(&d->page_alloc_lock);
563 }
565 /* This function is needed for two reasons:
566 * + To properly handle clearing of PoD entries
567 * + To "steal back" memory being freed for the PoD cache, rather than
568 * releasing it.
569 *
570 * Once both of these functions have been completed, we can return and
571 * allow decrease_reservation() to handle everything else.
572 */
573 int
574 p2m_pod_decrease_reservation(struct domain *d,
575 xen_pfn_t gpfn,
576 unsigned int order)
577 {
578 struct p2m_domain *p2md = d->arch.p2m;
579 int ret=0;
580 int i;
582 int steal_for_cache = 0;
583 int pod = 0, nonpod = 0, ram = 0;
586 /* If we don't have any outstanding PoD entries, let things take their
587 * course */
588 if ( p2md->pod.entry_count == 0 )
589 goto out;
591 /* Figure out if we need to steal some freed memory for our cache */
592 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
594 p2m_lock(p2md);
595 audit_p2m(d);
597 /* See what's in here. */
598 /* FIXME: Add contiguous; query for PSE entries? */
599 for ( i=0; i<(1<<order); i++)
600 {
601 p2m_type_t t;
603 gfn_to_mfn_query(d, gpfn + i, &t);
605 if ( t == p2m_populate_on_demand )
606 pod++;
607 else
608 {
609 nonpod++;
610 if ( p2m_is_ram(t) )
611 ram++;
612 }
613 }
615 /* No populate-on-demand? Don't need to steal anything? Then we're done!*/
616 if(!pod && !steal_for_cache)
617 goto out_unlock;
619 if ( !nonpod )
620 {
621 /* All PoD: Mark the whole region invalid and tell caller
622 * we're done. */
623 set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
624 p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
625 BUG_ON(p2md->pod.entry_count < 0);
626 ret = 1;
627 goto out_unlock;
628 }
630 /* FIXME: Steal contig 2-meg regions for cache */
632 /* Process as long as:
633 * + There are PoD entries to handle, or
634 * + There is ram left, and we want to steal it
635 */
636 for ( i=0;
637 i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
638 i++)
639 {
640 mfn_t mfn;
641 p2m_type_t t;
643 mfn = gfn_to_mfn_query(d, gpfn + i, &t);
644 if ( t == p2m_populate_on_demand )
645 {
646 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
647 p2md->pod.entry_count--; /* Lock: p2m */
648 BUG_ON(p2md->pod.entry_count < 0);
649 pod--;
650 }
651 else if ( steal_for_cache && p2m_is_ram(t) )
652 {
653 struct page_info *page;
655 ASSERT(mfn_valid(mfn));
657 page = mfn_to_page(mfn);
659 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
660 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
662 p2m_pod_cache_add(d, page, 0);
664 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
666 nonpod--;
667 ram--;
668 }
669 }
671 /* If we've reduced our "liabilities" beyond our "assets", free some */
672 if ( p2md->pod.entry_count < p2md->pod.count )
673 {
674 printk("b %d\n", p2md->pod.entry_count);
675 p2m_pod_set_cache_target(d, p2md->pod.entry_count);
676 }
678 /* If there are no more non-PoD entries, tell decrease_reservation() that
679 * there's nothing left to do. */
680 if ( nonpod == 0 )
681 ret = 1;
683 out_unlock:
684 audit_p2m(d);
685 p2m_unlock(p2md);
687 out:
688 return ret;
689 }
691 void
692 p2m_pod_dump_data(struct domain *d)
693 {
694 struct p2m_domain *p2md = d->arch.p2m;
696 printk(" PoD entries=%d cachesize=%d\n",
697 p2md->pod.entry_count, p2md->pod.count);
698 }
700 #define superpage_aligned(_x) (((_x)&((1<<9)-1))==0)
702 /* Search for all-zero superpages to be reclaimed as superpages for the
703 * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
704 static int
705 p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
706 {
707 mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
708 p2m_type_t type, type0 = 0;
709 unsigned long * map = NULL;
710 int ret=0, reset = 0;
711 int i, j;
712 int max_ref = 1;
714 if ( !superpage_aligned(gfn) )
715 goto out;
717 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
718 if ( paging_mode_shadow(d) )
719 max_ref++;
721 /* Look up the mfns, checking to make sure they're the same mfn
722 * and aligned, and mapping them. */
723 for ( i=0; i<(1<<9); i++ )
724 {
726 mfn = gfn_to_mfn_query(d, gfn + i, &type);
728 if ( i == 0 )
729 {
730 mfn0 = mfn;
731 type0 = type;
732 }
734 /* Conditions that must be met for superpage-superpage:
735 * + All gfns are ram types
736 * + All gfns have the same type
737 * + All of the mfns are allocated to a domain
738 * + None of the mfns are used as pagetables
739 * + The first mfn is 2-meg aligned
740 * + All the other mfns are in sequence
741 * Adding for good measure:
742 * + None of the mfns are likely to be mapped elsewhere (refcount
743 * 2 or less for shadow, 1 for hap)
744 */
745 if ( !p2m_is_ram(type)
746 || type != type0
747 || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
748 || ( (mfn_to_page(mfn)->count_info & PGC_page_table) != 0 )
749 || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
750 || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
751 || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
752 goto out;
753 }
755 /* Now, do a quick check to see if it may be zero before unmapping. */
756 for ( i=0; i<(1<<9); i++ )
757 {
758 /* Quick zero-check */
759 map = map_domain_page(mfn_x(mfn0) + i);
761 for ( j=0; j<16; j++ )
762 if( *(map+j) != 0 )
763 break;
765 unmap_domain_page(map);
767 if ( j < 16 )
768 goto out;
770 }
772 /* Try to remove the page, restoring old mapping if it fails. */
773 set_p2m_entry(d, gfn,
774 _mfn(POPULATE_ON_DEMAND_MFN), 9,
775 p2m_populate_on_demand);
777 /* Make none of the MFNs are used elsewhere... for example, mapped
778 * via the grant table interface, or by qemu. Allow one refcount for
779 * being allocated to the domain. */
780 for ( i=0; i < (1<<9); i++ )
781 {
782 mfn = _mfn(mfn_x(mfn0) + i);
783 if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
784 {
785 reset = 1;
786 goto out_reset;
787 }
788 }
790 /* Finally, do a full zero-check */
791 for ( i=0; i < (1<<9); i++ )
792 {
793 map = map_domain_page(mfn_x(mfn0) + i);
795 for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
796 if( *(map+j) != 0 )
797 {
798 reset = 1;
799 break;
800 }
802 unmap_domain_page(map);
804 if ( reset )
805 goto out_reset;
806 }
808 /* Finally! We've passed all the checks, and can add the mfn superpage
809 * back on the PoD cache, and account for the new p2m PoD entries */
810 p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
811 d->arch.p2m->pod.entry_count += (1<<9);
813 out_reset:
814 if ( reset )
815 set_p2m_entry(d, gfn, mfn0, 9, type0);
817 out:
818 return ret;
819 }
821 static void
822 p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
823 {
824 mfn_t mfns[count];
825 p2m_type_t types[count];
826 unsigned long * map[count];
828 int i, j;
829 int max_ref = 1;
831 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
832 if ( paging_mode_shadow(d) )
833 max_ref++;
835 /* First, get the gfn list, translate to mfns, and map the pages. */
836 for ( i=0; i<count; i++ )
837 {
838 mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
839 /* If this is ram, and not a pagetable, and probably not mapped
840 elsewhere, map it; otherwise, skip. */
841 if ( p2m_is_ram(types[i])
842 && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
843 && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 )
844 && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
845 map[i] = map_domain_page(mfn_x(mfns[i]));
846 else
847 map[i] = NULL;
848 }
850 /* Then, go through and check for zeroed pages, removing write permission
851 * for those with zeroes. */
852 for ( i=0; i<count; i++ )
853 {
854 if(!map[i])
855 continue;
857 /* Quick zero-check */
858 for ( j=0; j<16; j++ )
859 if( *(map[i]+j) != 0 )
860 break;
862 if ( j < 16 )
863 {
864 unmap_domain_page(map[i]);
865 map[i] = NULL;
866 continue;
867 }
869 /* Try to remove the page, restoring old mapping if it fails. */
870 set_p2m_entry(d, gfns[i],
871 _mfn(POPULATE_ON_DEMAND_MFN), 0,
872 p2m_populate_on_demand);
874 /* See if the page was successfully unmapped. (Allow one refcount
875 * for being allocated to a domain.) */
876 if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
877 {
878 unmap_domain_page(map[i]);
879 map[i] = NULL;
881 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
883 continue;
884 }
885 }
887 /* Now check each page for real */
888 for ( i=0; i < count; i++ )
889 {
890 if(!map[i])
891 continue;
893 for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
894 if( *(map[i]+j) != 0 )
895 break;
897 /* See comment in p2m_pod_zero_check_superpage() re gnttab
898 * check timing. */
899 if ( j < PAGE_SIZE/sizeof(*map[i]) )
900 {
901 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
902 continue;
903 }
904 else
905 {
906 /* Add to cache, and account for the new p2m PoD entry */
907 p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
908 d->arch.p2m->pod.entry_count++;
909 }
911 unmap_domain_page(map[i]);
912 map[i] = NULL;
913 }
915 }
917 #define POD_SWEEP_LIMIT 1024
918 static void
919 p2m_pod_emergency_sweep_super(struct domain *d)
920 {
921 struct p2m_domain *p2md = d->arch.p2m;
922 unsigned long i, start, limit;
924 if ( p2md->pod.reclaim_super == 0 )
925 {
926 p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
927 p2md->pod.reclaim_super -= (1<<9);
928 }
930 start = p2md->pod.reclaim_super;
931 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
933 for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=(1<<9) )
934 {
935 p2m_pod_zero_check_superpage(d, i);
936 /* Stop if we're past our limit and we have found *something*.
937 *
938 * NB that this is a zero-sum game; we're increasing our cache size
939 * by increasing our 'debt'. Since we hold the p2m lock,
940 * (entry_count - count) must remain the same. */
941 if ( !page_list_empty(&p2md->pod.super) && i < limit )
942 break;
943 }
945 p2md->pod.reclaim_super = i ? i - (1<<9) : 0;
947 }
949 #define POD_SWEEP_STRIDE 16
950 static void
951 p2m_pod_emergency_sweep(struct domain *d)
952 {
953 struct p2m_domain *p2md = d->arch.p2m;
954 unsigned long gfns[POD_SWEEP_STRIDE];
955 unsigned long i, j=0, start, limit;
956 p2m_type_t t;
959 if ( p2md->pod.reclaim_single == 0 )
960 p2md->pod.reclaim_single = p2md->pod.max_guest;
962 start = p2md->pod.reclaim_single;
963 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
965 /* FIXME: Figure out how to avoid superpages */
966 for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
967 {
968 gfn_to_mfn_query(d, i, &t );
969 if ( p2m_is_ram(t) )
970 {
971 gfns[j] = i;
972 j++;
973 BUG_ON(j > POD_SWEEP_STRIDE);
974 if ( j == POD_SWEEP_STRIDE )
975 {
976 p2m_pod_zero_check(d, gfns, j);
977 j = 0;
978 }
979 }
980 /* Stop if we're past our limit and we have found *something*.
981 *
982 * NB that this is a zero-sum game; we're increasing our cache size
983 * by re-increasing our 'debt'. Since we hold the p2m lock,
984 * (entry_count - count) must remain the same. */
985 if ( p2md->pod.count > 0 && i < limit )
986 break;
987 }
989 if ( j )
990 p2m_pod_zero_check(d, gfns, j);
992 p2md->pod.reclaim_single = i ? i - 1 : i;
994 }
996 static int
997 p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
998 mfn_t table_mfn,
999 l1_pgentry_t *p2m_entry,
1000 unsigned int order,
1001 p2m_query_t q)
1003 struct page_info *p = NULL; /* Compiler warnings */
1004 unsigned long gfn_aligned;
1005 mfn_t mfn;
1006 l1_pgentry_t entry_content = l1e_empty();
1007 struct p2m_domain *p2md = d->arch.p2m;
1008 int i;
1010 /* We need to grab the p2m lock here and re-check the entry to make
1011 * sure that someone else hasn't populated it for us, then hold it
1012 * until we're done. */
1013 p2m_lock(p2md);
1014 audit_p2m(d);
1016 /* Check to make sure this is still PoD */
1017 if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
1019 p2m_unlock(p2md);
1020 return 0;
1023 /* If we're low, start a sweep */
1024 if ( order == 9 && page_list_empty(&p2md->pod.super) )
1025 p2m_pod_emergency_sweep_super(d);
1027 if ( page_list_empty(&p2md->pod.single) &&
1028 ( ( order == 0 )
1029 || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
1030 p2m_pod_emergency_sweep(d);
1032 /* Keep track of the highest gfn demand-populated by a guest fault */
1033 if ( q == p2m_guest && gfn > p2md->pod.max_guest )
1034 p2md->pod.max_guest = gfn;
1036 spin_lock(&d->page_alloc_lock);
1038 if ( p2md->pod.count == 0 )
1039 goto out_of_memory;
1041 /* Get a page f/ the cache. A NULL return value indicates that the
1042 * 2-meg range should be marked singleton PoD, and retried */
1043 if ( (p = p2m_pod_cache_get(d, order)) == NULL )
1044 goto remap_and_retry;
1046 mfn = page_to_mfn(p);
1048 BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1050 spin_unlock(&d->page_alloc_lock);
1052 /* Fill in the entry in the p2m */
1053 switch ( order )
1055 case 9:
1057 l2_pgentry_t l2e_content;
1059 l2e_content = l2e_from_pfn(mfn_x(mfn),
1060 p2m_type_to_flags(p2m_ram_rw) | _PAGE_PSE);
1062 entry_content.l1 = l2e_content.l2;
1064 break;
1065 case 0:
1066 entry_content = l1e_from_pfn(mfn_x(mfn),
1067 p2m_type_to_flags(p2m_ram_rw));
1068 break;
1072 gfn_aligned = (gfn >> order) << order;
1074 paging_write_p2m_entry(d, gfn_aligned, p2m_entry, table_mfn,
1075 entry_content, (order==9)?2:1);
1077 for( i = 0 ; i < (1UL << order) ; i++ )
1078 set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1080 p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
1081 BUG_ON(p2md->pod.entry_count < 0);
1082 audit_p2m(d);
1083 p2m_unlock(p2md);
1085 return 0;
1086 out_of_memory:
1087 spin_unlock(&d->page_alloc_lock);
1088 audit_p2m(d);
1089 p2m_unlock(p2md);
1090 printk("%s: Out of populate-on-demand memory!\n", __func__);
1091 domain_crash(d);
1092 return -1;
1093 remap_and_retry:
1094 BUG_ON(order != 9);
1095 spin_unlock(&d->page_alloc_lock);
1097 /* Remap this 2-meg region in singleton chunks */
1098 gfn_aligned = (gfn>>order)<<order;
1099 for(i=0; i<(1<<order); i++)
1100 set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
1101 p2m_populate_on_demand);
1102 audit_p2m(d);
1103 p2m_unlock(p2md);
1104 return 0;
1107 // Returns 0 on error (out of memory)
1108 static int
1109 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1110 unsigned int page_order, p2m_type_t p2mt)
1112 // XXX -- this might be able to be faster iff current->domain == d
1113 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
1114 void *table =map_domain_page(mfn_x(table_mfn));
1115 unsigned long i, gfn_remainder = gfn;
1116 l1_pgentry_t *p2m_entry;
1117 l1_pgentry_t entry_content;
1118 l2_pgentry_t l2e_content;
1119 int rv=0;
1121 #if CONFIG_PAGING_LEVELS >= 4
1122 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1123 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
1124 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
1125 goto out;
1126 #endif
1127 /*
1128 * When using PAE Xen, we only allow 33 bits of pseudo-physical
1129 * address in translated guests (i.e. 8 GBytes). This restriction
1130 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
1131 * in Xen's address space for translated PV guests.
1132 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
1133 */
1134 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1135 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
1136 ((CONFIG_PAGING_LEVELS == 3)
1137 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
1138 : L3_PAGETABLE_ENTRIES),
1139 PGT_l2_page_table) )
1140 goto out;
1142 if ( page_order == 0 )
1144 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1145 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1146 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
1147 goto out;
1149 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1150 0, L1_PAGETABLE_ENTRIES);
1151 ASSERT(p2m_entry);
1153 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
1154 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
1155 else
1156 entry_content = l1e_empty();
1158 /* level 1 entry */
1159 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
1161 else
1163 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1164 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1165 L2_PAGETABLE_ENTRIES);
1166 ASSERT(p2m_entry);
1168 /* FIXME: Deal with 4k replaced by 2meg pages */
1169 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
1170 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
1172 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
1173 domain_crash(d);
1174 goto out;
1177 if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
1178 l2e_content = l2e_from_pfn(mfn_x(mfn),
1179 p2m_type_to_flags(p2mt) | _PAGE_PSE);
1180 else
1181 l2e_content = l2e_empty();
1183 entry_content.l1 = l2e_content.l2;
1184 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
1187 /* Track the highest gfn for which we have ever had a valid mapping */
1188 if ( mfn_valid(mfn)
1189 && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
1190 d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
1192 if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
1194 if ( p2mt == p2m_ram_rw )
1195 for ( i = 0; i < (1UL << page_order); i++ )
1196 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
1197 else
1198 for ( int i = 0; i < (1UL << page_order); i++ )
1199 iommu_unmap_page(d, gfn+i);
1202 /* Success */
1203 rv = 1;
1205 out:
1206 unmap_domain_page(table);
1207 return rv;
1210 static mfn_t
1211 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
1212 p2m_query_t q)
1214 mfn_t mfn;
1215 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1216 l2_pgentry_t *l2e;
1217 l1_pgentry_t *l1e;
1219 ASSERT(paging_mode_translate(d));
1221 /* XXX This is for compatibility with the old model, where anything not
1222 * XXX marked as RAM was considered to be emulated MMIO space.
1223 * XXX Once we start explicitly registering MMIO regions in the p2m
1224 * XXX we will return p2m_invalid for unmapped gfns */
1225 *t = p2m_mmio_dm;
1227 mfn = pagetable_get_mfn(d->arch.phys_table);
1229 if ( gfn > d->arch.p2m->max_mapped_pfn )
1230 /* This pfn is higher than the highest the p2m map currently holds */
1231 return _mfn(INVALID_MFN);
1233 #if CONFIG_PAGING_LEVELS >= 4
1235 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
1236 l4e += l4_table_offset(addr);
1237 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
1239 unmap_domain_page(l4e);
1240 return _mfn(INVALID_MFN);
1242 mfn = _mfn(l4e_get_pfn(*l4e));
1243 unmap_domain_page(l4e);
1245 #endif
1247 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
1248 #if CONFIG_PAGING_LEVELS == 3
1249 /* On PAE hosts the p2m has eight l3 entries, not four (see
1250 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
1251 * Instead, just count the number of l3es from zero. It's safe
1252 * to do this because we already checked that the gfn is within
1253 * the bounds of the p2m. */
1254 l3e += (addr >> L3_PAGETABLE_SHIFT);
1255 #else
1256 l3e += l3_table_offset(addr);
1257 #endif
1258 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1260 unmap_domain_page(l3e);
1261 return _mfn(INVALID_MFN);
1263 mfn = _mfn(l3e_get_pfn(*l3e));
1264 unmap_domain_page(l3e);
1267 l2e = map_domain_page(mfn_x(mfn));
1268 l2e += l2_table_offset(addr);
1270 pod_retry_l2:
1271 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
1273 /* PoD: Try to populate a 2-meg chunk */
1274 if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
1276 if ( q != p2m_query ) {
1277 if( !p2m_pod_demand_populate(d, gfn, mfn,
1278 (l1_pgentry_t *)l2e, 9, q) )
1279 goto pod_retry_l2;
1280 } else
1281 *t = p2m_populate_on_demand;
1284 unmap_domain_page(l2e);
1285 return _mfn(INVALID_MFN);
1287 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
1289 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
1290 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
1291 unmap_domain_page(l2e);
1293 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1294 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1297 mfn = _mfn(l2e_get_pfn(*l2e));
1298 unmap_domain_page(l2e);
1300 l1e = map_domain_page(mfn_x(mfn));
1301 l1e += l1_table_offset(addr);
1302 pod_retry_l1:
1303 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
1305 /* PoD: Try to populate */
1306 if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
1308 if ( q != p2m_query ) {
1309 if( !p2m_pod_demand_populate(d, gfn, mfn,
1310 (l1_pgentry_t *)l1e, 0, q) )
1311 goto pod_retry_l1;
1312 } else
1313 *t = p2m_populate_on_demand;
1316 unmap_domain_page(l1e);
1317 return _mfn(INVALID_MFN);
1319 mfn = _mfn(l1e_get_pfn(*l1e));
1320 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
1321 unmap_domain_page(l1e);
1323 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1324 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1327 /* Read the current domain's p2m table (through the linear mapping). */
1328 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
1329 p2m_query_t q)
1331 mfn_t mfn = _mfn(INVALID_MFN);
1332 p2m_type_t p2mt = p2m_mmio_dm;
1333 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1334 /* XXX This is for compatibility with the old model, where anything not
1335 * XXX marked as RAM was considered to be emulated MMIO space.
1336 * XXX Once we start explicitly registering MMIO regions in the p2m
1337 * XXX we will return p2m_invalid for unmapped gfns */
1339 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
1341 l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
1342 l2_pgentry_t l2e = l2e_empty();
1343 int ret;
1345 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
1346 / sizeof(l1_pgentry_t));
1348 /*
1349 * Read & process L2
1350 */
1351 p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
1352 + l2_linear_offset(addr)];
1354 pod_retry_l2:
1355 ret = __copy_from_user(&l2e,
1356 p2m_entry,
1357 sizeof(l2e));
1358 if ( ret != 0
1359 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1361 if( (l2e_get_flags(l2e) & _PAGE_PSE)
1362 && ( p2m_flags_to_type(l2e_get_flags(l2e))
1363 == p2m_populate_on_demand ) )
1365 /* The read has succeeded, so we know that the mapping
1366 * exits at this point. */
1367 if ( q != p2m_query )
1369 if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
1370 p2m_entry, 9, q) )
1371 goto pod_retry_l2;
1373 /* Allocate failed. */
1374 p2mt = p2m_invalid;
1375 printk("%s: Allocate failed!\n", __func__);
1376 goto out;
1378 else
1380 p2mt = p2m_populate_on_demand;
1381 goto out;
1385 goto pod_retry_l1;
1388 if (l2e_get_flags(l2e) & _PAGE_PSE)
1390 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
1391 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
1393 if ( p2m_is_valid(p2mt) )
1394 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
1395 else
1396 p2mt = p2m_mmio_dm;
1398 goto out;
1401 /*
1402 * Read and process L1
1403 */
1405 /* Need to __copy_from_user because the p2m is sparse and this
1406 * part might not exist */
1407 pod_retry_l1:
1408 p2m_entry = &phys_to_machine_mapping[gfn];
1410 ret = __copy_from_user(&l1e,
1411 p2m_entry,
1412 sizeof(l1e));
1414 if ( ret == 0 ) {
1415 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
1416 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
1418 if ( p2m_flags_to_type(l1e_get_flags(l1e))
1419 == p2m_populate_on_demand )
1421 /* The read has succeeded, so we know that the mapping
1422 * exits at this point. */
1423 if ( q != p2m_query )
1425 if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
1426 (l1_pgentry_t *)p2m_entry, 0,
1427 q) )
1428 goto pod_retry_l1;
1430 /* Allocate failed. */
1431 p2mt = p2m_invalid;
1432 goto out;
1434 else
1436 p2mt = p2m_populate_on_demand;
1437 goto out;
1441 if ( p2m_is_valid(p2mt) )
1442 mfn = _mfn(l1e_get_pfn(l1e));
1443 else
1444 /* XXX see above */
1445 p2mt = p2m_mmio_dm;
1448 out:
1449 *t = p2mt;
1450 return mfn;
1453 /* Init the datastructures for later use by the p2m code */
1454 int p2m_init(struct domain *d)
1456 struct p2m_domain *p2m;
1458 p2m = xmalloc(struct p2m_domain);
1459 if ( p2m == NULL )
1460 return -ENOMEM;
1462 d->arch.p2m = p2m;
1464 memset(p2m, 0, sizeof(*p2m));
1465 p2m_lock_init(p2m);
1466 INIT_PAGE_LIST_HEAD(&p2m->pages);
1467 INIT_PAGE_LIST_HEAD(&p2m->pod.super);
1468 INIT_PAGE_LIST_HEAD(&p2m->pod.single);
1470 p2m->set_entry = p2m_set_entry;
1471 p2m->get_entry = p2m_gfn_to_mfn;
1472 p2m->get_entry_current = p2m_gfn_to_mfn_current;
1473 p2m->change_entry_type_global = p2m_change_type_global;
1475 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
1476 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
1477 ept_p2m_init(d);
1479 return 0;
1482 void p2m_change_entry_type_global(struct domain *d,
1483 p2m_type_t ot, p2m_type_t nt)
1485 struct p2m_domain *p2m = d->arch.p2m;
1487 p2m_lock(p2m);
1488 p2m->change_entry_type_global(d, ot, nt);
1489 p2m_unlock(p2m);
1492 static
1493 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1494 unsigned int page_order, p2m_type_t p2mt)
1496 unsigned long todo = 1ul << page_order;
1497 unsigned int order;
1498 int rc = 0;
1500 while ( todo )
1502 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled )
1503 order = (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ?
1504 9 : 0;
1505 else
1506 order = 0;
1507 rc = d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt);
1508 gfn += 1ul << order;
1509 if ( mfn_x(mfn) != INVALID_MFN )
1510 mfn = _mfn(mfn_x(mfn) + (1ul << order));
1511 todo -= 1ul << order;
1514 return rc;
1517 // Allocate a new p2m table for a domain.
1518 //
1519 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
1520 // controlled by CONFIG_PAGING_LEVELS).
1521 //
1522 // The alloc_page and free_page functions will be used to get memory to
1523 // build the p2m, and to release it again at the end of day.
1524 //
1525 // Returns 0 for success or -errno.
1526 //
1527 int p2m_alloc_table(struct domain *d,
1528 struct page_info * (*alloc_page)(struct domain *d),
1529 void (*free_page)(struct domain *d, struct page_info *pg))
1532 mfn_t mfn = _mfn(INVALID_MFN);
1533 struct page_info *page, *p2m_top;
1534 unsigned int page_count = 0;
1535 unsigned long gfn = -1UL;
1536 struct p2m_domain *p2m = d->arch.p2m;
1538 p2m_lock(p2m);
1540 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1542 P2M_ERROR("p2m already allocated for this domain\n");
1543 p2m_unlock(p2m);
1544 return -EINVAL;
1547 P2M_PRINTK("allocating p2m table\n");
1549 p2m->alloc_page = alloc_page;
1550 p2m->free_page = free_page;
1552 p2m_top = p2m->alloc_page(d);
1553 if ( p2m_top == NULL )
1555 p2m_unlock(p2m);
1556 return -ENOMEM;
1558 page_list_add_tail(p2m_top, &p2m->pages);
1560 p2m_top->count_info = 1;
1561 p2m_top->u.inuse.type_info =
1562 #if CONFIG_PAGING_LEVELS == 4
1563 PGT_l4_page_table
1564 #else
1565 PGT_l3_page_table
1566 #endif
1567 | 1 | PGT_validated;
1569 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
1571 P2M_PRINTK("populating p2m table\n");
1573 /* Initialise physmap tables for slot zero. Other code assumes this. */
1574 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
1575 p2m_invalid) )
1576 goto error;
1578 /* Copy all existing mappings from the page list and m2p */
1579 page_list_for_each(page, &d->page_list)
1581 mfn = page_to_mfn(page);
1582 gfn = get_gpfn_from_mfn(mfn_x(mfn));
1583 page_count++;
1584 if (
1585 #ifdef __x86_64__
1586 (gfn != 0x5555555555555555L)
1587 #else
1588 (gfn != 0x55555555L)
1589 #endif
1590 && gfn != INVALID_M2P_ENTRY
1591 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
1592 goto error;
1595 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
1596 p2m_unlock(p2m);
1597 return 0;
1599 error:
1600 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
1601 PRI_mfn "\n", gfn, mfn_x(mfn));
1602 p2m_unlock(p2m);
1603 return -ENOMEM;
1606 void p2m_teardown(struct domain *d)
1607 /* Return all the p2m pages to Xen.
1608 * We know we don't have any extra mappings to these pages */
1610 struct page_info *pg;
1611 struct p2m_domain *p2m = d->arch.p2m;
1613 p2m_lock(p2m);
1614 d->arch.phys_table = pagetable_null();
1616 while ( (pg = page_list_remove_head(&p2m->pages)) )
1617 p2m->free_page(d, pg);
1618 p2m_unlock(p2m);
1621 void p2m_final_teardown(struct domain *d)
1623 xfree(d->arch.p2m);
1624 d->arch.p2m = NULL;
1627 #if P2M_AUDIT
1628 static void audit_p2m(struct domain *d)
1630 struct page_info *page;
1631 struct domain *od;
1632 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
1633 int entry_count = 0;
1634 mfn_t p2mfn;
1635 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
1636 int test_linear;
1637 p2m_type_t type;
1639 if ( !paging_mode_translate(d) )
1640 return;
1642 //P2M_PRINTK("p2m audit starts\n");
1644 test_linear = ( (d == current->domain)
1645 && !pagetable_is_null(current->arch.monitor_table) );
1646 if ( test_linear )
1647 flush_tlb_local();
1649 spin_lock(&d->page_alloc_lock);
1651 /* Audit part one: walk the domain's page allocation list, checking
1652 * the m2p entries. */
1653 page_list_for_each ( page, &d->page_list )
1655 mfn = mfn_x(page_to_mfn(page));
1657 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
1659 od = page_get_owner(page);
1661 if ( od != d )
1663 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
1664 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
1665 continue;
1668 gfn = get_gpfn_from_mfn(mfn);
1669 if ( gfn == INVALID_M2P_ENTRY )
1671 orphans_i++;
1672 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
1673 // mfn);
1674 continue;
1677 if ( gfn == 0x55555555 )
1679 orphans_d++;
1680 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
1681 // mfn);
1682 continue;
1685 p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
1686 if ( mfn_x(p2mfn) != mfn )
1688 mpbad++;
1689 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
1690 " (-> gfn %#lx)\n",
1691 mfn, gfn, mfn_x(p2mfn),
1692 (mfn_valid(p2mfn)
1693 ? get_gpfn_from_mfn(mfn_x(p2mfn))
1694 : -1u));
1695 /* This m2p entry is stale: the domain has another frame in
1696 * this physical slot. No great disaster, but for neatness,
1697 * blow away the m2p entry. */
1698 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1701 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
1703 lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
1704 if ( lp2mfn != mfn_x(p2mfn) )
1706 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
1707 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
1711 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
1712 // mfn, gfn, p2mfn, lp2mfn);
1715 spin_unlock(&d->page_alloc_lock);
1717 /* Audit part two: walk the domain's p2m table, checking the entries. */
1718 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1720 l2_pgentry_t *l2e;
1721 l1_pgentry_t *l1e;
1722 int i1, i2;
1724 #if CONFIG_PAGING_LEVELS == 4
1725 l4_pgentry_t *l4e;
1726 l3_pgentry_t *l3e;
1727 int i3, i4;
1728 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1729 #else /* CONFIG_PAGING_LEVELS == 3 */
1730 l3_pgentry_t *l3e;
1731 int i3;
1732 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1733 #endif
1735 gfn = 0;
1736 #if CONFIG_PAGING_LEVELS >= 4
1737 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1739 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1741 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1742 continue;
1744 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
1745 #endif
1746 for ( i3 = 0;
1747 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1748 i3++ )
1750 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1752 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1753 continue;
1755 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
1756 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1758 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1760 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1761 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1762 == p2m_populate_on_demand ) )
1763 entry_count+=(1<<9);
1764 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1765 continue;
1768 /* check for super page */
1769 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1771 mfn = l2e_get_pfn(l2e[i2]);
1772 ASSERT(mfn_valid(_mfn(mfn)));
1773 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1775 m2pfn = get_gpfn_from_mfn(mfn+i1);
1776 if ( m2pfn != (gfn + i1) )
1778 pmbad++;
1779 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1780 " -> gfn %#lx\n", gfn+i1, mfn+i1,
1781 m2pfn);
1782 BUG();
1785 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1786 continue;
1789 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
1791 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1793 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1795 if ( p2m_flags_to_type(l1e_get_flags(l1e[i1]))
1796 == p2m_populate_on_demand )
1797 entry_count++;
1798 continue;
1800 mfn = l1e_get_pfn(l1e[i1]);
1801 ASSERT(mfn_valid(_mfn(mfn)));
1802 m2pfn = get_gpfn_from_mfn(mfn);
1803 if ( m2pfn != gfn &&
1804 p2m_flags_to_type(l1e_get_flags(l1e[i1])) != p2m_mmio_direct )
1806 pmbad++;
1807 printk("mismatch: gfn %#lx -> mfn %#lx"
1808 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1809 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1810 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1811 BUG();
1814 unmap_domain_page(l1e);
1816 unmap_domain_page(l2e);
1818 #if CONFIG_PAGING_LEVELS >= 4
1819 unmap_domain_page(l3e);
1821 #endif
1823 #if CONFIG_PAGING_LEVELS == 4
1824 unmap_domain_page(l4e);
1825 #else /* CONFIG_PAGING_LEVELS == 3 */
1826 unmap_domain_page(l3e);
1827 #endif
1831 if ( entry_count != d->arch.p2m->pod.entry_count )
1833 printk("%s: refcounted entry count %d, audit count %d!\n",
1834 __func__,
1835 d->arch.p2m->pod.entry_count,
1836 entry_count);
1837 BUG();
1840 //P2M_PRINTK("p2m audit complete\n");
1841 //if ( orphans_i | orphans_d | mpbad | pmbad )
1842 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
1843 // orphans_i + orphans_d, orphans_i, orphans_d,
1844 if ( mpbad | pmbad )
1845 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
1846 pmbad, mpbad);
1848 #endif /* P2M_AUDIT */
1852 static void
1853 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
1854 unsigned int page_order)
1856 unsigned long i;
1858 if ( !paging_mode_translate(d) )
1860 if ( need_iommu(d) )
1861 for ( i = 0; i < (1 << page_order); i++ )
1862 iommu_unmap_page(d, mfn + i);
1863 return;
1866 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
1868 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
1869 for ( i = 0; i < (1UL << page_order); i++ )
1870 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
1873 void
1874 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
1875 unsigned long mfn, unsigned int page_order)
1877 p2m_lock(d->arch.p2m);
1878 audit_p2m(d);
1879 p2m_remove_page(d, gfn, mfn, page_order);
1880 audit_p2m(d);
1881 p2m_unlock(d->arch.p2m);
1884 #if CONFIG_PAGING_LEVELS == 3
1885 static int gfn_check_limit(
1886 struct domain *d, unsigned long gfn, unsigned int order)
1888 /*
1889 * 32bit AMD nested paging does not support over 4GB guest due to
1890 * hardware translation limit. This limitation is checked by comparing
1891 * gfn with 0xfffffUL.
1892 */
1893 if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
1894 (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
1895 return 0;
1897 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
1898 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
1899 " 4GB: specify 'hap=0' domain config option.\n",
1900 d->domain_id);
1902 return -EINVAL;
1904 #else
1905 #define gfn_check_limit(d, g, o) 0
1906 #endif
1908 int
1909 guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
1910 unsigned int order)
1912 struct p2m_domain *p2md = d->arch.p2m;
1913 unsigned long i;
1914 p2m_type_t ot;
1915 mfn_t omfn;
1916 int pod_count = 0;
1917 int rc = 0;
1919 BUG_ON(!paging_mode_translate(d));
1921 rc = gfn_check_limit(d, gfn, order);
1922 if ( rc != 0 )
1923 return rc;
1925 p2m_lock(p2md);
1926 audit_p2m(d);
1928 P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
1930 /* Make sure all gpfns are unused */
1931 for ( i = 0; i < (1UL << order); i++ )
1933 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
1934 if ( p2m_is_ram(ot) )
1936 printk("%s: gfn_to_mfn returned type %d!\n",
1937 __func__, ot);
1938 rc = -EBUSY;
1939 goto out;
1941 else if ( ot == p2m_populate_on_demand )
1943 /* Count how man PoD entries we'll be replacing if successful */
1944 pod_count++;
1948 /* Now, actually do the two-way mapping */
1949 if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
1950 p2m_populate_on_demand) )
1951 rc = -EINVAL;
1952 else
1954 p2md->pod.entry_count += 1 << order; /* Lock: p2m */
1955 p2md->pod.entry_count -= pod_count;
1956 BUG_ON(p2md->pod.entry_count < 0);
1959 audit_p2m(d);
1960 p2m_unlock(p2md);
1962 out:
1963 return rc;
1967 int
1968 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
1969 unsigned long mfn, unsigned int page_order,
1970 p2m_type_t t)
1972 unsigned long i, ogfn;
1973 p2m_type_t ot;
1974 mfn_t omfn;
1975 int pod_count = 0;
1976 int rc = 0;
1978 if ( !paging_mode_translate(d) )
1980 if ( need_iommu(d) && t == p2m_ram_rw )
1982 for ( i = 0; i < (1 << page_order); i++ )
1983 if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
1985 while ( i-- > 0 )
1986 iommu_unmap_page(d, mfn + i);
1987 return rc;
1990 return 0;
1993 rc = gfn_check_limit(d, gfn, page_order);
1994 if ( rc != 0 )
1995 return rc;
1997 p2m_lock(d->arch.p2m);
1998 audit_p2m(d);
2000 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
2002 /* First, remove m->p mappings for existing p->m mappings */
2003 for ( i = 0; i < (1UL << page_order); i++ )
2005 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2006 if ( p2m_is_ram(ot) )
2008 ASSERT(mfn_valid(omfn));
2009 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2011 else if ( ot == p2m_populate_on_demand )
2013 /* Count how man PoD entries we'll be replacing if successful */
2014 pod_count++;
2018 /* Then, look for m->p mappings for this range and deal with them */
2019 for ( i = 0; i < (1UL << page_order); i++ )
2021 ogfn = mfn_to_gfn(d, _mfn(mfn+i));
2022 if (
2023 #ifdef __x86_64__
2024 (ogfn != 0x5555555555555555L)
2025 #else
2026 (ogfn != 0x55555555L)
2027 #endif
2028 && (ogfn != INVALID_M2P_ENTRY)
2029 && (ogfn != gfn + i) )
2031 /* This machine frame is already mapped at another physical
2032 * address */
2033 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
2034 mfn + i, ogfn, gfn + i);
2035 omfn = gfn_to_mfn_query(d, ogfn, &ot);
2036 if ( p2m_is_ram(ot) )
2038 ASSERT(mfn_valid(omfn));
2039 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
2040 ogfn , mfn_x(omfn));
2041 if ( mfn_x(omfn) == (mfn + i) )
2042 p2m_remove_page(d, ogfn, mfn + i, 0);
2047 /* Now, actually do the two-way mapping */
2048 if ( mfn_valid(_mfn(mfn)) )
2050 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
2051 rc = -EINVAL;
2052 for ( i = 0; i < (1UL << page_order); i++ )
2053 set_gpfn_from_mfn(mfn+i, gfn+i);
2055 else
2057 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
2058 gfn, mfn);
2059 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
2060 p2m_invalid) )
2061 rc = -EINVAL;
2062 else
2064 d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
2065 BUG_ON(d->arch.p2m->pod.entry_count < 0);
2069 audit_p2m(d);
2070 p2m_unlock(d->arch.p2m);
2072 return rc;
2075 /* Walk the whole p2m table, changing any entries of the old type
2076 * to the new type. This is used in hardware-assisted paging to
2077 * quickly enable or diable log-dirty tracking */
2078 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
2080 unsigned long mfn, gfn, flags;
2081 l1_pgentry_t l1e_content;
2082 l1_pgentry_t *l1e;
2083 l2_pgentry_t *l2e;
2084 mfn_t l1mfn, l2mfn;
2085 int i1, i2;
2086 l3_pgentry_t *l3e;
2087 int i3;
2088 #if CONFIG_PAGING_LEVELS == 4
2089 l4_pgentry_t *l4e;
2090 int i4;
2091 #endif /* CONFIG_PAGING_LEVELS == 4 */
2093 if ( !paging_mode_translate(d) )
2094 return;
2096 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
2097 return;
2099 ASSERT(p2m_locked_by_me(d->arch.p2m));
2101 #if CONFIG_PAGING_LEVELS == 4
2102 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2103 #else /* CONFIG_PAGING_LEVELS == 3 */
2104 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2105 #endif
2107 #if CONFIG_PAGING_LEVELS >= 4
2108 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
2110 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
2112 continue;
2114 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
2115 #endif
2116 for ( i3 = 0;
2117 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
2118 i3++ )
2120 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
2122 continue;
2124 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
2125 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
2126 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
2128 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
2130 continue;
2133 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
2135 flags = l2e_get_flags(l2e[i2]);
2136 if ( p2m_flags_to_type(flags) != ot )
2137 continue;
2138 mfn = l2e_get_pfn(l2e[i2]);
2139 gfn = get_gpfn_from_mfn(mfn);
2140 flags = p2m_type_to_flags(nt);
2141 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
2142 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
2143 l2mfn, l1e_content, 2);
2144 continue;
2147 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
2148 l1e = map_domain_page(mfn_x(l1mfn));
2150 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
2152 flags = l1e_get_flags(l1e[i1]);
2153 if ( p2m_flags_to_type(flags) != ot )
2154 continue;
2155 mfn = l1e_get_pfn(l1e[i1]);
2156 gfn = get_gpfn_from_mfn(mfn);
2157 /* create a new 1le entry with the new type */
2158 flags = p2m_type_to_flags(nt);
2159 l1e_content = l1e_from_pfn(mfn, flags);
2160 paging_write_p2m_entry(d, gfn, &l1e[i1],
2161 l1mfn, l1e_content, 1);
2163 unmap_domain_page(l1e);
2165 unmap_domain_page(l2e);
2167 #if CONFIG_PAGING_LEVELS >= 4
2168 unmap_domain_page(l3e);
2170 #endif
2172 #if CONFIG_PAGING_LEVELS == 4
2173 unmap_domain_page(l4e);
2174 #else /* CONFIG_PAGING_LEVELS == 3 */
2175 unmap_domain_page(l3e);
2176 #endif
2180 /* Modify the p2m type of a single gfn from ot to nt, returning the
2181 * entry's previous type */
2182 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
2183 p2m_type_t ot, p2m_type_t nt)
2185 p2m_type_t pt;
2186 mfn_t mfn;
2188 p2m_lock(d->arch.p2m);
2190 mfn = gfn_to_mfn(d, gfn, &pt);
2191 if ( pt == ot )
2192 set_p2m_entry(d, gfn, mfn, 0, nt);
2194 p2m_unlock(d->arch.p2m);
2196 return pt;
2199 int
2200 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2202 int rc = 0;
2203 p2m_type_t ot;
2204 mfn_t omfn;
2206 if ( !paging_mode_translate(d) )
2207 return 0;
2209 omfn = gfn_to_mfn_query(d, gfn, &ot);
2210 if ( p2m_is_ram(ot) )
2212 ASSERT(mfn_valid(omfn));
2213 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2216 P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
2217 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
2218 if ( 0 == rc )
2219 gdprintk(XENLOG_ERR,
2220 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2221 gmfn_to_mfn(d, gfn));
2222 return rc;
2225 int
2226 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
2228 int rc = 0;
2229 unsigned long mfn;
2231 if ( !paging_mode_translate(d) )
2232 return 0;
2234 mfn = gmfn_to_mfn(d, gfn);
2235 if ( INVALID_MFN == mfn )
2237 gdprintk(XENLOG_ERR,
2238 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
2239 return 0;
2241 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
2243 return rc;
2246 /*
2247 * Local variables:
2248 * mode: C
2249 * c-set-style: "BSD"
2250 * c-basic-offset: 4
2251 * indent-tabs-mode: nil
2252 * End:
2253 */