ia64/xen-unstable

view xen/arch/x86/mm/p2m.c @ 19122:916ca93a8658

x86: Fix the build.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 29 23:40:37 2009 +0000 (2009-01-29)
parents e1352667df95
children 5848b49b74fc
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /*
38 * The P2M lock. This protects all updates to the p2m table.
39 * Updates are expected to be safe against concurrent reads,
40 * which do *not* require the lock.
41 *
42 * Locking discipline: always acquire this lock before the shadow or HAP one
43 */
45 #define p2m_lock_init(_p2m) \
46 do { \
47 spin_lock_init(&(_p2m)->lock); \
48 (_p2m)->locker = -1; \
49 (_p2m)->locker_function = "nobody"; \
50 } while (0)
52 #define p2m_lock(_p2m) \
53 do { \
54 if ( unlikely((_p2m)->locker == current->processor) ) \
55 { \
56 printk("Error: p2m lock held by %s\n", \
57 (_p2m)->locker_function); \
58 BUG(); \
59 } \
60 spin_lock(&(_p2m)->lock); \
61 ASSERT((_p2m)->locker == -1); \
62 (_p2m)->locker = current->processor; \
63 (_p2m)->locker_function = __func__; \
64 } while (0)
66 #define p2m_unlock(_p2m) \
67 do { \
68 ASSERT((_p2m)->locker == current->processor); \
69 (_p2m)->locker = -1; \
70 (_p2m)->locker_function = "nobody"; \
71 spin_unlock(&(_p2m)->lock); \
72 } while (0)
74 #define p2m_locked_by_me(_p2m) \
75 (current->processor == (_p2m)->locker)
77 /* Printouts */
78 #define P2M_PRINTK(_f, _a...) \
79 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
80 #define P2M_ERROR(_f, _a...) \
81 printk("pg error: %s(): " _f, __func__, ##_a)
82 #if P2M_DEBUGGING
83 #define P2M_DEBUG(_f, _a...) \
84 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
85 #else
86 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
87 #endif
90 /* Override macros from asm/page.h to make them work with mfn_t */
91 #undef mfn_to_page
92 #define mfn_to_page(_m) (frame_table + mfn_x(_m))
93 #undef mfn_valid
94 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
95 #undef page_to_mfn
96 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
99 /* PTE flags for the various types of p2m entry */
100 #define P2M_BASE_FLAGS \
101 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
103 static unsigned long p2m_type_to_flags(p2m_type_t t)
104 {
105 unsigned long flags = (t & 0x7UL) << 9;
106 switch(t)
107 {
108 case p2m_invalid:
109 default:
110 return flags;
111 case p2m_ram_rw:
112 return flags | P2M_BASE_FLAGS | _PAGE_RW;
113 case p2m_ram_logdirty:
114 return flags | P2M_BASE_FLAGS;
115 case p2m_ram_ro:
116 return flags | P2M_BASE_FLAGS;
117 case p2m_mmio_dm:
118 return flags;
119 case p2m_mmio_direct:
120 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
121 case p2m_populate_on_demand:
122 return flags;
123 }
124 }
126 #if P2M_AUDIT
127 static void audit_p2m(struct domain *d);
128 #else
129 # define audit_p2m(_d) do { (void)(_d); } while(0)
130 #endif /* P2M_AUDIT */
132 // Find the next level's P2M entry, checking for out-of-range gfn's...
133 // Returns NULL on error.
134 //
135 static l1_pgentry_t *
136 p2m_find_entry(void *table, unsigned long *gfn_remainder,
137 unsigned long gfn, u32 shift, u32 max)
138 {
139 u32 index;
141 index = *gfn_remainder >> shift;
142 if ( index >= max )
143 {
144 P2M_DEBUG("gfn=0x%lx out of range "
145 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
146 gfn, *gfn_remainder, shift, index, max);
147 return NULL;
148 }
149 *gfn_remainder &= (1 << shift) - 1;
150 return (l1_pgentry_t *)table + index;
151 }
153 // Walk one level of the P2M table, allocating a new table if required.
154 // Returns 0 on error.
155 //
156 static int
157 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
158 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
159 u32 max, unsigned long type)
160 {
161 l1_pgentry_t *l1_entry;
162 l1_pgentry_t *p2m_entry;
163 l1_pgentry_t new_entry;
164 void *next;
165 int i;
166 ASSERT(d->arch.p2m->alloc_page);
168 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
169 shift, max)) )
170 return 0;
172 /* PoD: Not present doesn't imply empty. */
173 if ( !l1e_get_flags(*p2m_entry) )
174 {
175 struct page_info *pg = d->arch.p2m->alloc_page(d);
176 if ( pg == NULL )
177 return 0;
178 list_add_tail(&pg->list, &d->arch.p2m->pages);
179 pg->u.inuse.type_info = type | 1 | PGT_validated;
180 pg->count_info = 1;
182 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
183 __PAGE_HYPERVISOR|_PAGE_USER);
185 switch ( type ) {
186 case PGT_l3_page_table:
187 paging_write_p2m_entry(d, gfn,
188 p2m_entry, *table_mfn, new_entry, 4);
189 break;
190 case PGT_l2_page_table:
191 #if CONFIG_PAGING_LEVELS == 3
192 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
193 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
194 #endif
195 paging_write_p2m_entry(d, gfn,
196 p2m_entry, *table_mfn, new_entry, 3);
197 break;
198 case PGT_l1_page_table:
199 paging_write_p2m_entry(d, gfn,
200 p2m_entry, *table_mfn, new_entry, 2);
201 break;
202 default:
203 BUG();
204 break;
205 }
206 }
208 ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
210 /* split single large page into 4KB page in P2M table */
211 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
212 {
213 unsigned long flags, pfn;
214 struct page_info *pg = d->arch.p2m->alloc_page(d);
215 if ( pg == NULL )
216 return 0;
217 list_add_tail(&pg->list, &d->arch.p2m->pages);
218 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
219 pg->count_info = 1;
221 /* New splintered mappings inherit the flags of the old superpage,
222 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
223 flags = l1e_get_flags(*p2m_entry);
224 pfn = l1e_get_pfn(*p2m_entry);
225 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
226 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
227 else
228 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
230 l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
231 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
232 {
233 new_entry = l1e_from_pfn(pfn + i, flags);
234 paging_write_p2m_entry(d, gfn,
235 l1_entry+i, *table_mfn, new_entry, 1);
236 }
237 unmap_domain_page(l1_entry);
239 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
240 __PAGE_HYPERVISOR|_PAGE_USER);
241 paging_write_p2m_entry(d, gfn,
242 p2m_entry, *table_mfn, new_entry, 2);
243 }
245 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
246 next = map_domain_page(mfn_x(*table_mfn));
247 unmap_domain_page(*table);
248 *table = next;
250 return 1;
251 }
253 /*
254 * Populate-on-demand functionality
255 */
256 static
257 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
258 unsigned int page_order, p2m_type_t p2mt);
260 int
261 p2m_pod_cache_add(struct domain *d,
262 struct page_info *page,
263 unsigned long order)
264 {
265 int i;
266 struct page_info *p;
267 struct p2m_domain *p2md = d->arch.p2m;
269 #ifndef NDEBUG
270 mfn_t mfn;
272 mfn = page_to_mfn(page);
274 /* Check to make sure this is a contiguous region */
275 if( mfn_x(mfn) & ((1 << order) - 1) )
276 {
277 printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
278 __func__, mfn_x(mfn), order, ((1UL << order) - 1));
279 return -1;
280 }
282 for(i=0; i < 1 << order ; i++) {
283 struct domain * od;
285 p = mfn_to_page(_mfn(mfn_x(mfn) + i));
286 od = page_get_owner(p);
287 if(od != d)
288 {
289 printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
290 __func__, mfn_x(mfn), d->domain_id,
291 od?od->domain_id:-1);
292 return -1;
293 }
294 }
295 #endif
297 spin_lock(&d->page_alloc_lock);
299 /* First, take all pages off the domain list */
300 for(i=0; i < 1 << order ; i++)
301 {
302 p = page + i;
303 list_del(&p->list);
304 }
306 /* Then add the first one to the appropriate populate-on-demand list */
307 switch(order)
308 {
309 case 9:
310 list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
311 p2md->pod.count += 1 << order;
312 break;
313 case 0:
314 list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
315 p2md->pod.count += 1 ;
316 break;
317 default:
318 BUG();
319 }
321 spin_unlock(&d->page_alloc_lock);
323 return 0;
324 }
326 /* Get a page of size order from the populate-on-demand cache. Will break
327 * down 2-meg pages into singleton pages automatically. Returns null if
328 * a superpage is requested and no superpages are available. Must be called
329 * with the d->page_lock held. */
330 static struct page_info * p2m_pod_cache_get(struct domain *d,
331 unsigned long order)
332 {
333 struct p2m_domain *p2md = d->arch.p2m;
334 struct page_info *p = NULL;
335 int i;
337 if ( order == 9 && list_empty(&p2md->pod.super) )
338 {
339 return NULL;
340 }
341 else if ( order == 0 && list_empty(&p2md->pod.single) )
342 {
343 unsigned long mfn;
344 struct page_info *q;
346 BUG_ON( list_empty(&p2md->pod.super) );
348 /* Break up a superpage to make single pages. NB count doesn't
349 * need to be adjusted. */
350 printk("%s: Breaking up superpage.\n", __func__);
351 p = list_entry(p2md->pod.super.next, struct page_info, list);
352 list_del(&p->list);
353 mfn = mfn_x(page_to_mfn(p));
355 for ( i=0; i<(1<<9); i++ )
356 {
357 q = mfn_to_page(_mfn(mfn+i));
358 list_add_tail(&q->list, &p2md->pod.single);
359 }
360 }
362 switch ( order )
363 {
364 case 9:
365 BUG_ON( list_empty(&p2md->pod.super) );
366 p = list_entry(p2md->pod.super.next, struct page_info, list);
367 p2md->pod.count -= 1 << order; /* Lock: page_alloc */
368 break;
369 case 0:
370 BUG_ON( list_empty(&p2md->pod.single) );
371 p = list_entry(p2md->pod.single.next, struct page_info, list);
372 p2md->pod.count -= 1;
373 break;
374 default:
375 BUG();
376 }
378 list_del(&p->list);
380 /* Put the pages back on the domain page_list */
381 for ( i = 0 ; i < (1 << order) ; i++ )
382 {
383 BUG_ON(page_get_owner(p + i) != d);
384 list_add_tail(&p[i].list, &d->page_list);
385 }
387 return p;
388 }
390 /* Set the size of the cache, allocating or freeing as necessary. */
391 static int
392 p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
393 {
394 struct p2m_domain *p2md = d->arch.p2m;
395 int ret = 0;
397 /* Increasing the target */
398 while ( pod_target > p2md->pod.count )
399 {
400 struct page_info * page;
401 int order;
403 if ( (pod_target - p2md->pod.count) >= (1>>9) )
404 order = 9;
405 else
406 order = 0;
408 page = alloc_domheap_pages(d, order, 0);
409 if ( unlikely(page == NULL) )
410 goto out;
412 p2m_pod_cache_add(d, page, order);
413 }
415 /* Decreasing the target */
416 /* We hold the p2m lock here, so we don't need to worry about
417 * cache disappearing under our feet. */
418 while ( pod_target < p2md->pod.count )
419 {
420 struct page_info * page;
421 int order, i;
423 /* Grab the lock before checking that pod.super is empty, or the last
424 * entries may disappear before we grab the lock. */
425 spin_lock(&d->page_alloc_lock);
427 if ( (p2md->pod.count - pod_target) > (1>>9)
428 && !list_empty(&p2md->pod.super) )
429 order = 9;
430 else
431 order = 0;
433 page = p2m_pod_cache_get(d, order);
435 ASSERT(page != NULL);
437 spin_unlock(&d->page_alloc_lock);
439 /* Then free them */
440 for ( i = 0 ; i < (1 << order) ; i++ )
441 {
442 /* Copied from common/memory.c:guest_remove_page() */
443 if ( unlikely(!get_page(page+i, d)) )
444 {
445 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
446 ret = -EINVAL;
447 goto out;
448 }
450 if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
451 put_page_and_type(page+i);
453 if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
454 put_page(page+i);
456 put_page(page+i);
457 }
458 }
460 out:
461 return ret;
462 }
464 /*
465 * The "right behavior" here requires some careful thought. First, some
466 * definitions:
467 * + M: static_max
468 * + B: number of pages the balloon driver has ballooned down to.
469 * + P: Number of populated pages.
470 * + T: Old target
471 * + T': New target
472 *
473 * The following equations should hold:
474 * 0 <= P <= T <= B <= M
475 * d->arch.p2m->pod.entry_count == B - P
476 * d->tot_pages == P + d->arch.p2m->pod.count
477 *
478 * Now we have the following potential cases to cover:
479 * B <T': Set the PoD cache size equal to the number of outstanding PoD
480 * entries. The balloon driver will deflate the balloon to give back
481 * the remainder of the ram to the guest OS.
482 * T <T'<B : Increase PoD cache size.
483 * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
484 * get the memory right away. However, that means every time we
485 * reduce the memory target we risk the guest attempting to populate the
486 * memory before the balloon driver has reached its new target. Safer to
487 * never reduce the cache size here, but only when the balloon driver frees
488 * PoD ranges.
489 *
490 * If there are many zero pages, we could reach the target also by doing
491 * zero sweeps and marking the ranges PoD; but the balloon driver will have
492 * to free this memory eventually anyway, so we don't actually gain that much
493 * by doing so.
494 *
495 * NB that the equation (B<T') may require adjustment to the cache
496 * size as PoD pages are freed as well; i.e., freeing a PoD-backed
497 * entry when pod.entry_count == pod.count requires us to reduce both
498 * pod.entry_count and pod.count.
499 */
500 int
501 p2m_pod_set_mem_target(struct domain *d, unsigned long target)
502 {
503 unsigned pod_target;
504 struct p2m_domain *p2md = d->arch.p2m;
505 int ret = 0;
506 unsigned long populated;
508 /* P == B: Nothing to do. */
509 if ( p2md->pod.entry_count == 0 )
510 goto out;
512 /* T' < B: Don't reduce the cache size; let the balloon driver
513 * take care of it. */
514 if ( target < d->tot_pages )
515 goto out;
517 populated = d->tot_pages - p2md->pod.count;
519 pod_target = target - populated;
521 /* B < T': Set the cache size equal to # of outstanding entries,
522 * let the balloon driver fill in the rest. */
523 if ( pod_target > p2md->pod.entry_count )
524 pod_target = p2md->pod.entry_count;
526 ASSERT( pod_target > p2md->pod.count );
528 ret = p2m_pod_set_cache_target(d, pod_target);
530 out:
531 return ret;
532 }
534 void
535 p2m_pod_empty_cache(struct domain *d)
536 {
537 struct p2m_domain *p2md = d->arch.p2m;
538 struct list_head *q, *p;
540 spin_lock(&d->page_alloc_lock);
542 list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
543 {
544 int i;
545 struct page_info *page;
547 list_del(p);
549 page = list_entry(p, struct page_info, list);
551 for ( i = 0 ; i < (1 << 9) ; i++ )
552 {
553 BUG_ON(page_get_owner(page + i) != d);
554 list_add_tail(&page[i].list, &d->page_list);
555 }
557 p2md->pod.count -= 1<<9;
558 }
560 list_for_each_safe(p, q, &p2md->pod.single)
561 {
562 struct page_info *page;
564 list_del(p);
566 page = list_entry(p, struct page_info, list);
568 BUG_ON(page_get_owner(page) != d);
569 list_add_tail(&page->list, &d->page_list);
571 p2md->pod.count -= 1;
572 }
574 BUG_ON(p2md->pod.count != 0);
576 spin_unlock(&d->page_alloc_lock);
577 }
579 /* This function is needed for two reasons:
580 * + To properly handle clearing of PoD entries
581 * + To "steal back" memory being freed for the PoD cache, rather than
582 * releasing it.
583 *
584 * Once both of these functions have been completed, we can return and
585 * allow decrease_reservation() to handle everything else.
586 */
587 int
588 p2m_pod_decrease_reservation(struct domain *d,
589 xen_pfn_t gpfn,
590 unsigned int order)
591 {
592 struct p2m_domain *p2md = d->arch.p2m;
593 int ret=0;
594 int i;
596 int steal_for_cache = 0;
597 int pod = 0, nonpod = 0, ram = 0;
600 /* If we don't have any outstanding PoD entries, let things take their
601 * course */
602 if ( p2md->pod.entry_count == 0 )
603 goto out;
605 /* Figure out if we need to steal some freed memory for our cache */
606 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
608 p2m_lock(p2md);
609 audit_p2m(d);
611 /* See what's in here. */
612 /* FIXME: Add contiguous; query for PSE entries? */
613 for ( i=0; i<(1<<order); i++)
614 {
615 p2m_type_t t;
617 gfn_to_mfn_query(d, gpfn + i, &t);
619 if ( t == p2m_populate_on_demand )
620 pod++;
621 else
622 {
623 nonpod++;
624 if ( p2m_is_ram(t) )
625 ram++;
626 }
627 }
629 /* No populate-on-demand? Don't need to steal anything? Then we're done!*/
630 if(!pod && !steal_for_cache)
631 goto out_unlock;
633 if ( !nonpod )
634 {
635 /* All PoD: Mark the whole region invalid and tell caller
636 * we're done. */
637 set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
638 p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
639 BUG_ON(p2md->pod.entry_count < 0);
640 ret = 1;
641 goto out_unlock;
642 }
644 /* FIXME: Steal contig 2-meg regions for cache */
646 /* Process as long as:
647 * + There are PoD entries to handle, or
648 * + There is ram left, and we want to steal it
649 */
650 for ( i=0;
651 i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
652 i++)
653 {
654 mfn_t mfn;
655 p2m_type_t t;
657 mfn = gfn_to_mfn_query(d, gpfn + i, &t);
658 if ( t == p2m_populate_on_demand )
659 {
660 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
661 p2md->pod.entry_count--; /* Lock: p2m */
662 BUG_ON(p2md->pod.entry_count < 0);
663 pod--;
664 }
665 else if ( steal_for_cache && p2m_is_ram(t) )
666 {
667 struct page_info *page;
669 ASSERT(mfn_valid(mfn));
671 page = mfn_to_page(mfn);
673 set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
674 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
676 p2m_pod_cache_add(d, page, 0);
678 steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
680 nonpod--;
681 ram--;
682 }
683 }
685 /* If we've reduced our "liabilities" beyond our "assets", free some */
686 if ( p2md->pod.entry_count < p2md->pod.count )
687 {
688 printk("b %d\n", p2md->pod.entry_count);
689 p2m_pod_set_cache_target(d, p2md->pod.entry_count);
690 }
692 /* If there are no more non-PoD entries, tell decrease_reservation() that
693 * there's nothing left to do. */
694 if ( nonpod == 0 )
695 ret = 1;
697 out_unlock:
698 audit_p2m(d);
699 p2m_unlock(p2md);
701 out:
702 return ret;
703 }
705 void
706 p2m_pod_dump_data(struct domain *d)
707 {
708 struct p2m_domain *p2md = d->arch.p2m;
710 printk(" PoD entries=%d cachesize=%d\n",
711 p2md->pod.entry_count, p2md->pod.count);
712 }
714 #define superpage_aligned(_x) (((_x)&((1<<9)-1))==0)
716 /* Search for all-zero superpages to be reclaimed as superpages for the
717 * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
718 static int
719 p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
720 {
721 mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
722 p2m_type_t type, type0 = 0;
723 unsigned long * map = NULL;
724 int ret=0, reset = 0;
725 int i, j;
726 int max_ref = 1;
728 if ( !superpage_aligned(gfn) )
729 goto out;
731 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
732 if ( paging_mode_shadow(d) )
733 max_ref++;
735 /* Look up the mfns, checking to make sure they're the same mfn
736 * and aligned, and mapping them. */
737 for ( i=0; i<(1<<9); i++ )
738 {
740 mfn = gfn_to_mfn_query(d, gfn + i, &type);
742 if ( i == 0 )
743 {
744 mfn0 = mfn;
745 type0 = type;
746 }
748 /* Conditions that must be met for superpage-superpage:
749 * + All gfns are ram types
750 * + All gfns have the same type
751 * + All of the mfns are allocated to a domain
752 * + None of the mfns are used as pagetables
753 * + The first mfn is 2-meg aligned
754 * + All the other mfns are in sequence
755 * Adding for good measure:
756 * + None of the mfns are likely to be mapped elsewhere (refcount
757 * 2 or less for shadow, 1 for hap)
758 */
759 if ( !p2m_is_ram(type)
760 || type != type0
761 || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
762 || ( (mfn_to_page(mfn)->count_info & PGC_page_table) != 0 )
763 || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
764 || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
765 || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
766 goto out;
767 }
769 /* Now, do a quick check to see if it may be zero before unmapping. */
770 for ( i=0; i<(1<<9); i++ )
771 {
772 /* Quick zero-check */
773 map = map_domain_page(mfn_x(mfn0) + i);
775 for ( j=0; j<16; j++ )
776 if( *(map+j) != 0 )
777 break;
779 unmap_domain_page(map);
781 if ( j < 16 )
782 goto out;
784 }
786 /* Try to remove the page, restoring old mapping if it fails. */
787 set_p2m_entry(d, gfn,
788 _mfn(POPULATE_ON_DEMAND_MFN), 9,
789 p2m_populate_on_demand);
791 /* Make none of the MFNs are used elsewhere... for example, mapped
792 * via the grant table interface, or by qemu. Allow one refcount for
793 * being allocated to the domain. */
794 for ( i=0; i < (1<<9); i++ )
795 {
796 mfn = _mfn(mfn_x(mfn0) + i);
797 if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
798 {
799 reset = 1;
800 goto out_reset;
801 }
802 }
804 /* Finally, do a full zero-check */
805 for ( i=0; i < (1<<9); i++ )
806 {
807 map = map_domain_page(mfn_x(mfn0) + i);
809 for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
810 if( *(map+j) != 0 )
811 {
812 reset = 1;
813 break;
814 }
816 unmap_domain_page(map);
818 if ( reset )
819 goto out_reset;
820 }
822 /* Finally! We've passed all the checks, and can add the mfn superpage
823 * back on the PoD cache, and account for the new p2m PoD entries */
824 p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
825 d->arch.p2m->pod.entry_count += (1<<9);
827 out_reset:
828 if ( reset )
829 set_p2m_entry(d, gfn, mfn0, 9, type0);
831 out:
832 return ret;
833 }
835 static void
836 p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
837 {
838 mfn_t mfns[count];
839 p2m_type_t types[count];
840 unsigned long * map[count];
842 int i, j;
843 int max_ref = 1;
845 /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
846 if ( paging_mode_shadow(d) )
847 max_ref++;
849 /* First, get the gfn list, translate to mfns, and map the pages. */
850 for ( i=0; i<count; i++ )
851 {
852 mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
853 /* If this is ram, and not a pagetable, and probably not mapped
854 elsewhere, map it; otherwise, skip. */
855 if ( p2m_is_ram(types[i])
856 && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
857 && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 )
858 && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
859 map[i] = map_domain_page(mfn_x(mfns[i]));
860 else
861 map[i] = NULL;
862 }
864 /* Then, go through and check for zeroed pages, removing write permission
865 * for those with zeroes. */
866 for ( i=0; i<count; i++ )
867 {
868 if(!map[i])
869 continue;
871 /* Quick zero-check */
872 for ( j=0; j<16; j++ )
873 if( *(map[i]+j) != 0 )
874 break;
876 if ( j < 16 )
877 {
878 unmap_domain_page(map[i]);
879 map[i] = NULL;
880 continue;
881 }
883 /* Try to remove the page, restoring old mapping if it fails. */
884 set_p2m_entry(d, gfns[i],
885 _mfn(POPULATE_ON_DEMAND_MFN), 0,
886 p2m_populate_on_demand);
888 /* See if the page was successfully unmapped. (Allow one refcount
889 * for being allocated to a domain.) */
890 if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
891 {
892 unmap_domain_page(map[i]);
893 map[i] = NULL;
895 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
897 continue;
898 }
899 }
901 /* Now check each page for real */
902 for ( i=0; i < count; i++ )
903 {
904 if(!map[i])
905 continue;
907 for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
908 if( *(map[i]+j) != 0 )
909 break;
911 /* See comment in p2m_pod_zero_check_superpage() re gnttab
912 * check timing. */
913 if ( j < PAGE_SIZE/sizeof(*map[i]) )
914 {
915 set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
916 continue;
917 }
918 else
919 {
920 /* Add to cache, and account for the new p2m PoD entry */
921 p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
922 d->arch.p2m->pod.entry_count++;
923 }
925 unmap_domain_page(map[i]);
926 map[i] = NULL;
927 }
929 }
931 #define POD_SWEEP_LIMIT 1024
932 static void
933 p2m_pod_emergency_sweep_super(struct domain *d)
934 {
935 struct p2m_domain *p2md = d->arch.p2m;
936 unsigned long i, start, limit;
938 if ( p2md->pod.reclaim_super == 0 )
939 {
940 p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
941 p2md->pod.reclaim_super -= (1<<9);
942 }
944 start = p2md->pod.reclaim_super;
945 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
947 for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=(1<<9) )
948 {
949 p2m_pod_zero_check_superpage(d, i);
950 /* Stop if we're past our limit and we have found *something*.
951 *
952 * NB that this is a zero-sum game; we're increasing our cache size
953 * by increasing our 'debt'. Since we hold the p2m lock,
954 * (entry_count - count) must remain the same. */
955 if ( !list_empty(&p2md->pod.super) && i < limit )
956 break;
957 }
959 p2md->pod.reclaim_super = i ? i - (1<<9) : 0;
961 }
963 #define POD_SWEEP_STRIDE 16
964 static void
965 p2m_pod_emergency_sweep(struct domain *d)
966 {
967 struct p2m_domain *p2md = d->arch.p2m;
968 unsigned long gfns[POD_SWEEP_STRIDE];
969 unsigned long i, j=0, start, limit;
970 p2m_type_t t;
973 if ( p2md->pod.reclaim_single == 0 )
974 p2md->pod.reclaim_single = p2md->pod.max_guest;
976 start = p2md->pod.reclaim_single;
977 limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
979 /* FIXME: Figure out how to avoid superpages */
980 for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
981 {
982 gfn_to_mfn_query(d, i, &t );
983 if ( p2m_is_ram(t) )
984 {
985 gfns[j] = i;
986 j++;
987 BUG_ON(j > POD_SWEEP_STRIDE);
988 if ( j == POD_SWEEP_STRIDE )
989 {
990 p2m_pod_zero_check(d, gfns, j);
991 j = 0;
992 }
993 }
994 /* Stop if we're past our limit and we have found *something*.
995 *
996 * NB that this is a zero-sum game; we're increasing our cache size
997 * by re-increasing our 'debt'. Since we hold the p2m lock,
998 * (entry_count - count) must remain the same. */
999 if ( p2md->pod.count > 0 && i < limit )
1000 break;
1003 if ( j )
1004 p2m_pod_zero_check(d, gfns, j);
1006 p2md->pod.reclaim_single = i ? i - 1 : i;
1010 static int
1011 p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
1012 mfn_t table_mfn,
1013 l1_pgentry_t *p2m_entry,
1014 unsigned int order,
1015 p2m_query_t q)
1017 struct page_info *p = NULL; /* Compiler warnings */
1018 unsigned long gfn_aligned;
1019 mfn_t mfn;
1020 l1_pgentry_t entry_content = l1e_empty();
1021 struct p2m_domain *p2md = d->arch.p2m;
1022 int i;
1024 /* We need to grab the p2m lock here and re-check the entry to make
1025 * sure that someone else hasn't populated it for us, then hold it
1026 * until we're done. */
1027 p2m_lock(p2md);
1028 audit_p2m(d);
1030 /* Check to make sure this is still PoD */
1031 if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
1033 p2m_unlock(p2md);
1034 return 0;
1037 /* If we're low, start a sweep */
1038 if ( order == 9 && list_empty(&p2md->pod.super) )
1039 p2m_pod_emergency_sweep_super(d);
1041 if ( list_empty(&p2md->pod.single) &&
1042 ( ( order == 0 )
1043 || (order == 9 && list_empty(&p2md->pod.super) ) ) )
1044 p2m_pod_emergency_sweep(d);
1046 /* Keep track of the highest gfn demand-populated by a guest fault */
1047 if ( q == p2m_guest && gfn > p2md->pod.max_guest )
1048 p2md->pod.max_guest = gfn;
1050 spin_lock(&d->page_alloc_lock);
1052 if ( p2md->pod.count == 0 )
1053 goto out_of_memory;
1055 /* Get a page f/ the cache. A NULL return value indicates that the
1056 * 2-meg range should be marked singleton PoD, and retried */
1057 if ( (p = p2m_pod_cache_get(d, order)) == NULL )
1058 goto remap_and_retry;
1060 mfn = page_to_mfn(p);
1062 BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
1064 spin_unlock(&d->page_alloc_lock);
1066 /* Fill in the entry in the p2m */
1067 switch ( order )
1069 case 9:
1071 l2_pgentry_t l2e_content;
1073 l2e_content = l2e_from_pfn(mfn_x(mfn),
1074 p2m_type_to_flags(p2m_ram_rw) | _PAGE_PSE);
1076 entry_content.l1 = l2e_content.l2;
1078 break;
1079 case 0:
1080 entry_content = l1e_from_pfn(mfn_x(mfn),
1081 p2m_type_to_flags(p2m_ram_rw));
1082 break;
1086 gfn_aligned = (gfn >> order) << order;
1088 paging_write_p2m_entry(d, gfn_aligned, p2m_entry, table_mfn,
1089 entry_content, (order==9)?2:1);
1091 for( i = 0 ; i < (1UL << order) ; i++ )
1092 set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
1094 p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
1095 BUG_ON(p2md->pod.entry_count < 0);
1096 audit_p2m(d);
1097 p2m_unlock(p2md);
1099 return 0;
1100 out_of_memory:
1101 spin_unlock(&d->page_alloc_lock);
1102 audit_p2m(d);
1103 p2m_unlock(p2md);
1104 printk("%s: Out of populate-on-demand memory!\n", __func__);
1105 domain_crash(d);
1106 return -1;
1107 remap_and_retry:
1108 BUG_ON(order != 9);
1109 spin_unlock(&d->page_alloc_lock);
1111 /* Remap this 2-meg region in singleton chunks */
1112 gfn_aligned = (gfn>>order)<<order;
1113 for(i=0; i<(1<<order); i++)
1114 set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
1115 p2m_populate_on_demand);
1116 audit_p2m(d);
1117 p2m_unlock(p2md);
1118 return 0;
1121 // Returns 0 on error (out of memory)
1122 static int
1123 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1124 unsigned int page_order, p2m_type_t p2mt)
1126 // XXX -- this might be able to be faster iff current->domain == d
1127 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
1128 void *table =map_domain_page(mfn_x(table_mfn));
1129 unsigned long i, gfn_remainder = gfn;
1130 l1_pgentry_t *p2m_entry;
1131 l1_pgentry_t entry_content;
1132 l2_pgentry_t l2e_content;
1133 int rv=0;
1135 #if CONFIG_PAGING_LEVELS >= 4
1136 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1137 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
1138 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
1139 goto out;
1140 #endif
1141 /*
1142 * When using PAE Xen, we only allow 33 bits of pseudo-physical
1143 * address in translated guests (i.e. 8 GBytes). This restriction
1144 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
1145 * in Xen's address space for translated PV guests.
1146 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
1147 */
1148 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1149 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
1150 ((CONFIG_PAGING_LEVELS == 3)
1151 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
1152 : L3_PAGETABLE_ENTRIES),
1153 PGT_l2_page_table) )
1154 goto out;
1156 if ( page_order == 0 )
1158 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
1159 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1160 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
1161 goto out;
1163 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1164 0, L1_PAGETABLE_ENTRIES);
1165 ASSERT(p2m_entry);
1167 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
1168 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
1169 else
1170 entry_content = l1e_empty();
1172 /* level 1 entry */
1173 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
1175 else
1177 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
1178 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
1179 L2_PAGETABLE_ENTRIES);
1180 ASSERT(p2m_entry);
1182 /* FIXME: Deal with 4k replaced by 2meg pages */
1183 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
1184 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
1186 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
1187 domain_crash(d);
1188 goto out;
1191 if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
1192 l2e_content = l2e_from_pfn(mfn_x(mfn),
1193 p2m_type_to_flags(p2mt) | _PAGE_PSE);
1194 else
1195 l2e_content = l2e_empty();
1197 entry_content.l1 = l2e_content.l2;
1198 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
1201 /* Track the highest gfn for which we have ever had a valid mapping */
1202 if ( mfn_valid(mfn)
1203 && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
1204 d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
1206 if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
1208 if ( p2mt == p2m_ram_rw )
1209 for ( i = 0; i < (1UL << page_order); i++ )
1210 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
1211 else
1212 for ( int i = 0; i < (1UL << page_order); i++ )
1213 iommu_unmap_page(d, gfn+i);
1216 /* Success */
1217 rv = 1;
1219 out:
1220 unmap_domain_page(table);
1221 return rv;
1224 static mfn_t
1225 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
1226 p2m_query_t q)
1228 mfn_t mfn;
1229 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1230 l2_pgentry_t *l2e;
1231 l1_pgentry_t *l1e;
1233 ASSERT(paging_mode_translate(d));
1235 /* XXX This is for compatibility with the old model, where anything not
1236 * XXX marked as RAM was considered to be emulated MMIO space.
1237 * XXX Once we start explicitly registering MMIO regions in the p2m
1238 * XXX we will return p2m_invalid for unmapped gfns */
1239 *t = p2m_mmio_dm;
1241 mfn = pagetable_get_mfn(d->arch.phys_table);
1243 if ( gfn > d->arch.p2m->max_mapped_pfn )
1244 /* This pfn is higher than the highest the p2m map currently holds */
1245 return _mfn(INVALID_MFN);
1247 #if CONFIG_PAGING_LEVELS >= 4
1249 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
1250 l4e += l4_table_offset(addr);
1251 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
1253 unmap_domain_page(l4e);
1254 return _mfn(INVALID_MFN);
1256 mfn = _mfn(l4e_get_pfn(*l4e));
1257 unmap_domain_page(l4e);
1259 #endif
1261 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
1262 #if CONFIG_PAGING_LEVELS == 3
1263 /* On PAE hosts the p2m has eight l3 entries, not four (see
1264 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
1265 * Instead, just count the number of l3es from zero. It's safe
1266 * to do this because we already checked that the gfn is within
1267 * the bounds of the p2m. */
1268 l3e += (addr >> L3_PAGETABLE_SHIFT);
1269 #else
1270 l3e += l3_table_offset(addr);
1271 #endif
1272 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
1274 unmap_domain_page(l3e);
1275 return _mfn(INVALID_MFN);
1277 mfn = _mfn(l3e_get_pfn(*l3e));
1278 unmap_domain_page(l3e);
1281 l2e = map_domain_page(mfn_x(mfn));
1282 l2e += l2_table_offset(addr);
1284 pod_retry_l2:
1285 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
1287 /* PoD: Try to populate a 2-meg chunk */
1288 if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
1290 if ( q != p2m_query ) {
1291 if( !p2m_pod_demand_populate(d, gfn, mfn,
1292 (l1_pgentry_t *)l2e, 9, q) )
1293 goto pod_retry_l2;
1294 } else
1295 *t = p2m_populate_on_demand;
1298 unmap_domain_page(l2e);
1299 return _mfn(INVALID_MFN);
1301 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
1303 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
1304 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
1305 unmap_domain_page(l2e);
1307 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1308 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1311 mfn = _mfn(l2e_get_pfn(*l2e));
1312 unmap_domain_page(l2e);
1314 l1e = map_domain_page(mfn_x(mfn));
1315 l1e += l1_table_offset(addr);
1316 pod_retry_l1:
1317 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
1319 /* PoD: Try to populate */
1320 if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
1322 if ( q != p2m_query ) {
1323 if( !p2m_pod_demand_populate(d, gfn, mfn,
1324 (l1_pgentry_t *)l1e, 0, q) )
1325 goto pod_retry_l1;
1326 } else
1327 *t = p2m_populate_on_demand;
1330 unmap_domain_page(l1e);
1331 return _mfn(INVALID_MFN);
1333 mfn = _mfn(l1e_get_pfn(*l1e));
1334 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
1335 unmap_domain_page(l1e);
1337 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
1338 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
1341 /* Read the current domain's p2m table (through the linear mapping). */
1342 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
1343 p2m_query_t q)
1345 mfn_t mfn = _mfn(INVALID_MFN);
1346 p2m_type_t p2mt = p2m_mmio_dm;
1347 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
1348 /* XXX This is for compatibility with the old model, where anything not
1349 * XXX marked as RAM was considered to be emulated MMIO space.
1350 * XXX Once we start explicitly registering MMIO regions in the p2m
1351 * XXX we will return p2m_invalid for unmapped gfns */
1353 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
1355 l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
1356 l2_pgentry_t l2e = l2e_empty();
1357 int ret;
1359 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
1360 / sizeof(l1_pgentry_t));
1362 /*
1363 * Read & process L2
1364 */
1365 p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
1366 + l2_linear_offset(addr)];
1368 pod_retry_l2:
1369 ret = __copy_from_user(&l2e,
1370 p2m_entry,
1371 sizeof(l2e));
1372 if ( ret != 0
1373 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1375 if( (l2e_get_flags(l2e) & _PAGE_PSE)
1376 && ( p2m_flags_to_type(l2e_get_flags(l2e))
1377 == p2m_populate_on_demand ) )
1379 /* The read has succeeded, so we know that the mapping
1380 * exits at this point. */
1381 if ( q != p2m_query )
1383 if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
1384 p2m_entry, 9, q) )
1385 goto pod_retry_l2;
1387 /* Allocate failed. */
1388 p2mt = p2m_invalid;
1389 printk("%s: Allocate failed!\n", __func__);
1390 goto out;
1392 else
1394 p2mt = p2m_populate_on_demand;
1395 goto out;
1399 goto pod_retry_l1;
1402 if (l2e_get_flags(l2e) & _PAGE_PSE)
1404 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
1405 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
1407 if ( p2m_is_valid(p2mt) )
1408 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
1409 else
1410 p2mt = p2m_mmio_dm;
1412 goto out;
1415 /*
1416 * Read and process L1
1417 */
1419 /* Need to __copy_from_user because the p2m is sparse and this
1420 * part might not exist */
1421 pod_retry_l1:
1422 p2m_entry = &phys_to_machine_mapping[gfn];
1424 ret = __copy_from_user(&l1e,
1425 p2m_entry,
1426 sizeof(l1e));
1428 if ( ret == 0 ) {
1429 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
1430 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
1432 if ( p2m_flags_to_type(l1e_get_flags(l1e))
1433 == p2m_populate_on_demand )
1435 /* The read has succeeded, so we know that the mapping
1436 * exits at this point. */
1437 if ( q != p2m_query )
1439 if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
1440 (l1_pgentry_t *)p2m_entry, 0,
1441 q) )
1442 goto pod_retry_l1;
1444 /* Allocate failed. */
1445 p2mt = p2m_invalid;
1446 goto out;
1448 else
1450 p2mt = p2m_populate_on_demand;
1451 goto out;
1455 if ( p2m_is_valid(p2mt) )
1456 mfn = _mfn(l1e_get_pfn(l1e));
1457 else
1458 /* XXX see above */
1459 p2mt = p2m_mmio_dm;
1462 out:
1463 *t = p2mt;
1464 return mfn;
1467 /* Init the datastructures for later use by the p2m code */
1468 int p2m_init(struct domain *d)
1470 struct p2m_domain *p2m;
1472 p2m = xmalloc(struct p2m_domain);
1473 if ( p2m == NULL )
1474 return -ENOMEM;
1476 d->arch.p2m = p2m;
1478 memset(p2m, 0, sizeof(*p2m));
1479 p2m_lock_init(p2m);
1480 INIT_LIST_HEAD(&p2m->pages);
1481 INIT_LIST_HEAD(&p2m->pod.super);
1482 INIT_LIST_HEAD(&p2m->pod.single);
1484 p2m->set_entry = p2m_set_entry;
1485 p2m->get_entry = p2m_gfn_to_mfn;
1486 p2m->get_entry_current = p2m_gfn_to_mfn_current;
1487 p2m->change_entry_type_global = p2m_change_type_global;
1489 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
1490 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
1491 ept_p2m_init(d);
1493 return 0;
1496 void p2m_change_entry_type_global(struct domain *d,
1497 p2m_type_t ot, p2m_type_t nt)
1499 struct p2m_domain *p2m = d->arch.p2m;
1501 p2m_lock(p2m);
1502 p2m->change_entry_type_global(d, ot, nt);
1503 p2m_unlock(p2m);
1506 static
1507 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1508 unsigned int page_order, p2m_type_t p2mt)
1510 unsigned long todo = 1ul << page_order;
1511 unsigned int order;
1512 int rc = 0;
1514 while ( todo )
1516 order = (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0;
1517 rc = d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt);
1518 gfn += 1ul << order;
1519 if ( mfn_x(mfn) != INVALID_MFN )
1520 mfn = _mfn(mfn_x(mfn) + (1ul << order));
1521 todo -= 1ul << order;
1524 return rc;
1527 // Allocate a new p2m table for a domain.
1528 //
1529 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
1530 // controlled by CONFIG_PAGING_LEVELS).
1531 //
1532 // The alloc_page and free_page functions will be used to get memory to
1533 // build the p2m, and to release it again at the end of day.
1534 //
1535 // Returns 0 for success or -errno.
1536 //
1537 int p2m_alloc_table(struct domain *d,
1538 struct page_info * (*alloc_page)(struct domain *d),
1539 void (*free_page)(struct domain *d, struct page_info *pg))
1542 mfn_t mfn = _mfn(INVALID_MFN);
1543 struct list_head *entry;
1544 struct page_info *page, *p2m_top;
1545 unsigned int page_count = 0;
1546 unsigned long gfn = -1UL;
1547 struct p2m_domain *p2m = d->arch.p2m;
1549 p2m_lock(p2m);
1551 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1553 P2M_ERROR("p2m already allocated for this domain\n");
1554 p2m_unlock(p2m);
1555 return -EINVAL;
1558 P2M_PRINTK("allocating p2m table\n");
1560 p2m->alloc_page = alloc_page;
1561 p2m->free_page = free_page;
1563 p2m_top = p2m->alloc_page(d);
1564 if ( p2m_top == NULL )
1566 p2m_unlock(p2m);
1567 return -ENOMEM;
1569 list_add_tail(&p2m_top->list, &p2m->pages);
1571 p2m_top->count_info = 1;
1572 p2m_top->u.inuse.type_info =
1573 #if CONFIG_PAGING_LEVELS == 4
1574 PGT_l4_page_table
1575 #else
1576 PGT_l3_page_table
1577 #endif
1578 | 1 | PGT_validated;
1580 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
1582 P2M_PRINTK("populating p2m table\n");
1584 /* Initialise physmap tables for slot zero. Other code assumes this. */
1585 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
1586 p2m_invalid) )
1587 goto error;
1589 /* Copy all existing mappings from the page list and m2p */
1590 for ( entry = d->page_list.next;
1591 entry != &d->page_list;
1592 entry = entry->next )
1594 page = list_entry(entry, struct page_info, list);
1595 mfn = page_to_mfn(page);
1596 gfn = get_gpfn_from_mfn(mfn_x(mfn));
1597 page_count++;
1598 if (
1599 #ifdef __x86_64__
1600 (gfn != 0x5555555555555555L)
1601 #else
1602 (gfn != 0x55555555L)
1603 #endif
1604 && gfn != INVALID_M2P_ENTRY
1605 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
1606 goto error;
1609 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
1610 p2m_unlock(p2m);
1611 return 0;
1613 error:
1614 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
1615 PRI_mfn "\n", gfn, mfn_x(mfn));
1616 p2m_unlock(p2m);
1617 return -ENOMEM;
1620 void p2m_teardown(struct domain *d)
1621 /* Return all the p2m pages to Xen.
1622 * We know we don't have any extra mappings to these pages */
1624 struct list_head *entry, *n;
1625 struct page_info *pg;
1626 struct p2m_domain *p2m = d->arch.p2m;
1628 p2m_lock(p2m);
1629 d->arch.phys_table = pagetable_null();
1631 list_for_each_safe(entry, n, &p2m->pages)
1633 pg = list_entry(entry, struct page_info, list);
1634 list_del(entry);
1635 p2m->free_page(d, pg);
1637 p2m_unlock(p2m);
1640 void p2m_final_teardown(struct domain *d)
1642 xfree(d->arch.p2m);
1643 d->arch.p2m = NULL;
1646 #if P2M_AUDIT
1647 static void audit_p2m(struct domain *d)
1649 struct list_head *entry;
1650 struct page_info *page;
1651 struct domain *od;
1652 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
1653 int entry_count = 0;
1654 mfn_t p2mfn;
1655 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
1656 int test_linear;
1657 p2m_type_t type;
1659 if ( !paging_mode_translate(d) )
1660 return;
1662 //P2M_PRINTK("p2m audit starts\n");
1664 test_linear = ( (d == current->domain)
1665 && !pagetable_is_null(current->arch.monitor_table) );
1666 if ( test_linear )
1667 flush_tlb_local();
1669 spin_lock(&d->page_alloc_lock);
1671 /* Audit part one: walk the domain's page allocation list, checking
1672 * the m2p entries. */
1673 for ( entry = d->page_list.next;
1674 entry != &d->page_list;
1675 entry = entry->next )
1677 page = list_entry(entry, struct page_info, list);
1678 mfn = mfn_x(page_to_mfn(page));
1680 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
1682 od = page_get_owner(page);
1684 if ( od != d )
1686 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
1687 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
1688 continue;
1691 gfn = get_gpfn_from_mfn(mfn);
1692 if ( gfn == INVALID_M2P_ENTRY )
1694 orphans_i++;
1695 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
1696 // mfn);
1697 continue;
1700 if ( gfn == 0x55555555 )
1702 orphans_d++;
1703 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
1704 // mfn);
1705 continue;
1708 p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
1709 if ( mfn_x(p2mfn) != mfn )
1711 mpbad++;
1712 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
1713 " (-> gfn %#lx)\n",
1714 mfn, gfn, mfn_x(p2mfn),
1715 (mfn_valid(p2mfn)
1716 ? get_gpfn_from_mfn(mfn_x(p2mfn))
1717 : -1u));
1718 /* This m2p entry is stale: the domain has another frame in
1719 * this physical slot. No great disaster, but for neatness,
1720 * blow away the m2p entry. */
1721 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1724 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
1726 lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
1727 if ( lp2mfn != mfn_x(p2mfn) )
1729 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
1730 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
1734 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
1735 // mfn, gfn, p2mfn, lp2mfn);
1738 spin_unlock(&d->page_alloc_lock);
1740 /* Audit part two: walk the domain's p2m table, checking the entries. */
1741 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
1743 l2_pgentry_t *l2e;
1744 l1_pgentry_t *l1e;
1745 int i1, i2;
1747 #if CONFIG_PAGING_LEVELS == 4
1748 l4_pgentry_t *l4e;
1749 l3_pgentry_t *l3e;
1750 int i3, i4;
1751 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1752 #else /* CONFIG_PAGING_LEVELS == 3 */
1753 l3_pgentry_t *l3e;
1754 int i3;
1755 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1756 #endif
1758 gfn = 0;
1759 #if CONFIG_PAGING_LEVELS >= 4
1760 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1762 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1764 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1765 continue;
1767 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
1768 #endif
1769 for ( i3 = 0;
1770 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1771 i3++ )
1773 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1775 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1776 continue;
1778 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
1779 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1781 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1783 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1784 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1785 == p2m_populate_on_demand ) )
1786 entry_count+=(1<<9);
1787 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1788 continue;
1791 /* check for super page */
1792 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1794 mfn = l2e_get_pfn(l2e[i2]);
1795 ASSERT(mfn_valid(_mfn(mfn)));
1796 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1798 m2pfn = get_gpfn_from_mfn(mfn+i1);
1799 if ( m2pfn != (gfn + i1) )
1801 pmbad++;
1802 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1803 " -> gfn %#lx\n", gfn+i, mfn+i,
1804 m2pfn);
1805 BUG();
1808 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1809 continue;
1812 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
1814 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1816 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1818 if ( p2m_flags_to_type(l1e_get_flags(l1e[i1]))
1819 == p2m_populate_on_demand )
1820 entry_count++;
1821 continue;
1823 mfn = l1e_get_pfn(l1e[i1]);
1824 ASSERT(mfn_valid(_mfn(mfn)));
1825 m2pfn = get_gpfn_from_mfn(mfn);
1826 if ( m2pfn != gfn )
1828 pmbad++;
1829 printk("mismatch: gfn %#lx -> mfn %#lx"
1830 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1831 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1832 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1833 BUG();
1836 unmap_domain_page(l1e);
1838 unmap_domain_page(l2e);
1840 #if CONFIG_PAGING_LEVELS >= 4
1841 unmap_domain_page(l3e);
1843 #endif
1845 #if CONFIG_PAGING_LEVELS == 4
1846 unmap_domain_page(l4e);
1847 #else /* CONFIG_PAGING_LEVELS == 3 */
1848 unmap_domain_page(l3e);
1849 #endif
1853 if ( entry_count != d->arch.p2m->pod.entry_count )
1855 printk("%s: refcounted entry count %d, audit count %d!\n",
1856 __func__,
1857 d->arch.p2m->pod.entry_count,
1858 entry_count);
1859 BUG();
1862 //P2M_PRINTK("p2m audit complete\n");
1863 //if ( orphans_i | orphans_d | mpbad | pmbad )
1864 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
1865 // orphans_i + orphans_d, orphans_i, orphans_d,
1866 if ( mpbad | pmbad )
1867 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
1868 pmbad, mpbad);
1870 #endif /* P2M_AUDIT */
1874 static void
1875 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
1876 unsigned int page_order)
1878 unsigned long i;
1880 if ( !paging_mode_translate(d) )
1882 if ( need_iommu(d) )
1883 for ( i = 0; i < (1 << page_order); i++ )
1884 iommu_unmap_page(d, mfn + i);
1885 return;
1888 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
1890 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
1891 for ( i = 0; i < (1UL << page_order); i++ )
1892 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
1895 void
1896 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
1897 unsigned long mfn, unsigned int page_order)
1899 p2m_lock(d->arch.p2m);
1900 audit_p2m(d);
1901 p2m_remove_page(d, gfn, mfn, page_order);
1902 audit_p2m(d);
1903 p2m_unlock(d->arch.p2m);
1906 #if CONFIG_PAGING_LEVELS == 3
1907 static int gfn_check_limit(
1908 struct domain *d, unsigned long gfn, unsigned int order)
1910 /*
1911 * 32bit AMD nested paging does not support over 4GB guest due to
1912 * hardware translation limit. This limitation is checked by comparing
1913 * gfn with 0xfffffUL.
1914 */
1915 if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
1916 (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
1917 return 0;
1919 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
1920 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
1921 " 4GB: specify 'hap=0' domain config option.\n",
1922 d->domain_id);
1924 return -EINVAL;
1926 #else
1927 #define gfn_check_limit(d, g, o) 0
1928 #endif
1930 int
1931 guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
1932 unsigned int order)
1934 struct p2m_domain *p2md = d->arch.p2m;
1935 unsigned long i;
1936 p2m_type_t ot;
1937 mfn_t omfn;
1938 int pod_count = 0;
1939 int rc = 0;
1941 BUG_ON(!paging_mode_translate(d));
1943 rc = gfn_check_limit(d, gfn, order);
1944 if ( rc != 0 )
1945 return rc;
1947 p2m_lock(p2md);
1948 audit_p2m(d);
1950 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
1952 /* Make sure all gpfns are unused */
1953 for ( i = 0; i < (1UL << order); i++ )
1955 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
1956 if ( p2m_is_ram(ot) )
1958 printk("%s: gfn_to_mfn returned type %d!\n",
1959 __func__, ot);
1960 rc = -EBUSY;
1961 goto out;
1963 else if ( ot == p2m_populate_on_demand )
1965 /* Count how man PoD entries we'll be replacing if successful */
1966 pod_count++;
1970 /* Now, actually do the two-way mapping */
1971 if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
1972 p2m_populate_on_demand) )
1973 rc = -EINVAL;
1974 else
1976 p2md->pod.entry_count += 1 << order; /* Lock: p2m */
1977 p2md->pod.entry_count -= pod_count;
1978 BUG_ON(p2md->pod.entry_count < 0);
1981 audit_p2m(d);
1982 p2m_unlock(p2md);
1984 out:
1985 return rc;
1989 int
1990 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
1991 unsigned long mfn, unsigned int page_order,
1992 p2m_type_t t)
1994 unsigned long i, ogfn;
1995 p2m_type_t ot;
1996 mfn_t omfn;
1997 int pod_count = 0;
1998 int rc = 0;
2000 if ( !paging_mode_translate(d) )
2002 if ( need_iommu(d) && t == p2m_ram_rw )
2004 for ( i = 0; i < (1 << page_order); i++ )
2005 if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
2007 while ( i-- > 0 )
2008 iommu_unmap_page(d, mfn + i);
2009 return rc;
2012 return 0;
2015 rc = gfn_check_limit(d, gfn, page_order);
2016 if ( rc != 0 )
2017 return rc;
2019 p2m_lock(d->arch.p2m);
2020 audit_p2m(d);
2022 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
2024 /* First, remove m->p mappings for existing p->m mappings */
2025 for ( i = 0; i < (1UL << page_order); i++ )
2027 omfn = gfn_to_mfn_query(d, gfn + i, &ot);
2028 if ( p2m_is_ram(ot) )
2030 ASSERT(mfn_valid(omfn));
2031 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2033 else if ( ot == p2m_populate_on_demand )
2035 /* Count how man PoD entries we'll be replacing if successful */
2036 pod_count++;
2040 /* Then, look for m->p mappings for this range and deal with them */
2041 for ( i = 0; i < (1UL << page_order); i++ )
2043 ogfn = mfn_to_gfn(d, _mfn(mfn+i));
2044 if (
2045 #ifdef __x86_64__
2046 (ogfn != 0x5555555555555555L)
2047 #else
2048 (ogfn != 0x55555555L)
2049 #endif
2050 && (ogfn != INVALID_M2P_ENTRY)
2051 && (ogfn != gfn + i) )
2053 /* This machine frame is already mapped at another physical
2054 * address */
2055 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
2056 mfn + i, ogfn, gfn + i);
2057 omfn = gfn_to_mfn_query(d, ogfn, &ot);
2058 if ( p2m_is_ram(ot) )
2060 ASSERT(mfn_valid(omfn));
2061 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
2062 ogfn , mfn_x(omfn));
2063 if ( mfn_x(omfn) == (mfn + i) )
2064 p2m_remove_page(d, ogfn, mfn + i, 0);
2069 /* Now, actually do the two-way mapping */
2070 if ( mfn_valid(_mfn(mfn)) )
2072 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
2073 rc = -EINVAL;
2074 for ( i = 0; i < (1UL << page_order); i++ )
2075 set_gpfn_from_mfn(mfn+i, gfn+i);
2077 else
2079 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
2080 gfn, mfn);
2081 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
2082 p2m_invalid) )
2083 rc = -EINVAL;
2084 else
2086 d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
2087 BUG_ON(d->arch.p2m->pod.entry_count < 0);
2091 audit_p2m(d);
2092 p2m_unlock(d->arch.p2m);
2094 return rc;
2097 /* Walk the whole p2m table, changing any entries of the old type
2098 * to the new type. This is used in hardware-assisted paging to
2099 * quickly enable or diable log-dirty tracking */
2100 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
2102 unsigned long mfn, gfn, flags;
2103 l1_pgentry_t l1e_content;
2104 l1_pgentry_t *l1e;
2105 l2_pgentry_t *l2e;
2106 mfn_t l1mfn, l2mfn;
2107 int i1, i2;
2108 l3_pgentry_t *l3e;
2109 int i3;
2110 #if CONFIG_PAGING_LEVELS == 4
2111 l4_pgentry_t *l4e;
2112 int i4;
2113 #endif /* CONFIG_PAGING_LEVELS == 4 */
2115 if ( !paging_mode_translate(d) )
2116 return;
2118 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
2119 return;
2121 ASSERT(p2m_locked_by_me(d->arch.p2m));
2123 #if CONFIG_PAGING_LEVELS == 4
2124 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2125 #else /* CONFIG_PAGING_LEVELS == 3 */
2126 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
2127 #endif
2129 #if CONFIG_PAGING_LEVELS >= 4
2130 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
2132 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
2134 continue;
2136 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
2137 #endif
2138 for ( i3 = 0;
2139 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
2140 i3++ )
2142 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
2144 continue;
2146 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
2147 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
2148 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
2150 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
2152 continue;
2155 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
2157 flags = l2e_get_flags(l2e[i2]);
2158 if ( p2m_flags_to_type(flags) != ot )
2159 continue;
2160 mfn = l2e_get_pfn(l2e[i2]);
2161 gfn = get_gpfn_from_mfn(mfn);
2162 flags = p2m_type_to_flags(nt);
2163 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
2164 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
2165 l2mfn, l1e_content, 2);
2166 continue;
2169 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
2170 l1e = map_domain_page(mfn_x(l1mfn));
2172 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
2174 flags = l1e_get_flags(l1e[i1]);
2175 if ( p2m_flags_to_type(flags) != ot )
2176 continue;
2177 mfn = l1e_get_pfn(l1e[i1]);
2178 gfn = get_gpfn_from_mfn(mfn);
2179 /* create a new 1le entry with the new type */
2180 flags = p2m_type_to_flags(nt);
2181 l1e_content = l1e_from_pfn(mfn, flags);
2182 paging_write_p2m_entry(d, gfn, &l1e[i1],
2183 l1mfn, l1e_content, 1);
2185 unmap_domain_page(l1e);
2187 unmap_domain_page(l2e);
2189 #if CONFIG_PAGING_LEVELS >= 4
2190 unmap_domain_page(l3e);
2192 #endif
2194 #if CONFIG_PAGING_LEVELS == 4
2195 unmap_domain_page(l4e);
2196 #else /* CONFIG_PAGING_LEVELS == 3 */
2197 unmap_domain_page(l3e);
2198 #endif
2202 /* Modify the p2m type of a single gfn from ot to nt, returning the
2203 * entry's previous type */
2204 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
2205 p2m_type_t ot, p2m_type_t nt)
2207 p2m_type_t pt;
2208 mfn_t mfn;
2210 p2m_lock(d->arch.p2m);
2212 mfn = gfn_to_mfn(d, gfn, &pt);
2213 if ( pt == ot )
2214 set_p2m_entry(d, gfn, mfn, 0, nt);
2216 p2m_unlock(d->arch.p2m);
2218 return pt;
2221 int
2222 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
2224 int rc = 0;
2225 p2m_type_t ot;
2226 mfn_t omfn;
2228 if ( !paging_mode_translate(d) )
2229 return 0;
2231 omfn = gfn_to_mfn_query(d, gfn, &ot);
2232 if ( p2m_is_ram(ot) )
2234 ASSERT(mfn_valid(omfn));
2235 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
2238 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
2239 if ( 0 == rc )
2240 gdprintk(XENLOG_ERR,
2241 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
2242 gmfn_to_mfn(d, gfn));
2243 return rc;
2246 int
2247 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
2249 int rc = 0;
2250 unsigned long mfn;
2252 if ( !paging_mode_translate(d) )
2253 return 0;
2255 mfn = gmfn_to_mfn(d, gfn);
2256 if ( INVALID_MFN == mfn )
2258 gdprintk(XENLOG_ERR,
2259 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
2260 return 0;
2262 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
2264 return rc;
2267 /*
2268 * Local variables:
2269 * mode: C
2270 * c-set-style: "BSD"
2271 * c-basic-offset: 4
2272 * indent-tabs-mode: nil
2273 * End:
2274 */