ia64/xen-unstable

view xen/arch/x86/mm/p2m.c @ 18006:f3afb8625a92

p2m: Support page orders other than 0 (4kB) and 9 (2MB)
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jul 09 11:04:18 2008 +0100 (2008-07-09)
parents 02b6977de4b5
children 7a32c2325fdc
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /*
38 * The P2M lock. This protects all updates to the p2m table.
39 * Updates are expected to be safe against concurrent reads,
40 * which do *not* require the lock.
41 *
42 * Locking discipline: always acquire this lock before the shadow or HAP one
43 */
45 #define p2m_lock_init(_p2m) \
46 do { \
47 spin_lock_init(&(_p2m)->lock); \
48 (_p2m)->locker = -1; \
49 (_p2m)->locker_function = "nobody"; \
50 } while (0)
52 #define p2m_lock(_p2m) \
53 do { \
54 if ( unlikely((_p2m)->locker == current->processor) ) \
55 { \
56 printk("Error: p2m lock held by %s\n", \
57 (_p2m)->locker_function); \
58 BUG(); \
59 } \
60 spin_lock(&(_p2m)->lock); \
61 ASSERT((_p2m)->locker == -1); \
62 (_p2m)->locker = current->processor; \
63 (_p2m)->locker_function = __func__; \
64 } while (0)
66 #define p2m_unlock(_p2m) \
67 do { \
68 ASSERT((_p2m)->locker == current->processor); \
69 (_p2m)->locker = -1; \
70 (_p2m)->locker_function = "nobody"; \
71 spin_unlock(&(_p2m)->lock); \
72 } while (0)
74 #define p2m_locked_by_me(_p2m) \
75 (current->processor == (_p2m)->locker)
77 /* Printouts */
78 #define P2M_PRINTK(_f, _a...) \
79 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
80 #define P2M_ERROR(_f, _a...) \
81 printk("pg error: %s(): " _f, __func__, ##_a)
82 #if P2M_DEBUGGING
83 #define P2M_DEBUG(_f, _a...) \
84 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
85 #else
86 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
87 #endif
90 /* Override macros from asm/page.h to make them work with mfn_t */
91 #undef mfn_to_page
92 #define mfn_to_page(_m) (frame_table + mfn_x(_m))
93 #undef mfn_valid
94 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
95 #undef page_to_mfn
96 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
99 /* PTE flags for the various types of p2m entry */
100 #define P2M_BASE_FLAGS \
101 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
103 static unsigned long p2m_type_to_flags(p2m_type_t t)
104 {
105 unsigned long flags = (t & 0x7UL) << 9;
106 switch(t)
107 {
108 case p2m_invalid:
109 default:
110 return flags;
111 case p2m_ram_rw:
112 return flags | P2M_BASE_FLAGS | _PAGE_RW;
113 case p2m_ram_logdirty:
114 return flags | P2M_BASE_FLAGS;
115 case p2m_ram_ro:
116 return flags | P2M_BASE_FLAGS;
117 case p2m_mmio_dm:
118 return flags;
119 case p2m_mmio_direct:
120 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
121 }
122 }
125 // Find the next level's P2M entry, checking for out-of-range gfn's...
126 // Returns NULL on error.
127 //
128 static l1_pgentry_t *
129 p2m_find_entry(void *table, unsigned long *gfn_remainder,
130 unsigned long gfn, u32 shift, u32 max)
131 {
132 u32 index;
134 index = *gfn_remainder >> shift;
135 if ( index >= max )
136 {
137 P2M_DEBUG("gfn=0x%lx out of range "
138 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
139 gfn, *gfn_remainder, shift, index, max);
140 return NULL;
141 }
142 *gfn_remainder &= (1 << shift) - 1;
143 return (l1_pgentry_t *)table + index;
144 }
146 // Walk one level of the P2M table, allocating a new table if required.
147 // Returns 0 on error.
148 //
149 static int
150 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
151 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
152 u32 max, unsigned long type)
153 {
154 l1_pgentry_t *l1_entry;
155 l1_pgentry_t *p2m_entry;
156 l1_pgentry_t new_entry;
157 void *next;
158 int i;
159 ASSERT(d->arch.p2m->alloc_page);
161 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
162 shift, max)) )
163 return 0;
165 if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
166 {
167 struct page_info *pg = d->arch.p2m->alloc_page(d);
168 if ( pg == NULL )
169 return 0;
170 list_add_tail(&pg->list, &d->arch.p2m->pages);
171 pg->u.inuse.type_info = type | 1 | PGT_validated;
172 pg->count_info = 1;
174 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
175 __PAGE_HYPERVISOR|_PAGE_USER);
177 switch ( type ) {
178 case PGT_l3_page_table:
179 paging_write_p2m_entry(d, gfn,
180 p2m_entry, *table_mfn, new_entry, 4);
181 break;
182 case PGT_l2_page_table:
183 #if CONFIG_PAGING_LEVELS == 3
184 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
185 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
186 #endif
187 paging_write_p2m_entry(d, gfn,
188 p2m_entry, *table_mfn, new_entry, 3);
189 break;
190 case PGT_l1_page_table:
191 paging_write_p2m_entry(d, gfn,
192 p2m_entry, *table_mfn, new_entry, 2);
193 break;
194 default:
195 BUG();
196 break;
197 }
198 }
200 ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
202 /* split single large page into 4KB page in P2M table */
203 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
204 {
205 unsigned long flags, pfn;
206 struct page_info *pg = d->arch.p2m->alloc_page(d);
207 if ( pg == NULL )
208 return 0;
209 list_add_tail(&pg->list, &d->arch.p2m->pages);
210 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
211 pg->count_info = 1;
213 /* New splintered mappings inherit the flags of the old superpage,
214 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
215 flags = l1e_get_flags(*p2m_entry);
216 pfn = l1e_get_pfn(*p2m_entry);
217 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
218 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
219 else
220 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
222 l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
223 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
224 {
225 new_entry = l1e_from_pfn(pfn + i, flags);
226 paging_write_p2m_entry(d, gfn,
227 l1_entry+i, *table_mfn, new_entry, 1);
228 }
229 unmap_domain_page(l1_entry);
231 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
232 __PAGE_HYPERVISOR|_PAGE_USER);
233 paging_write_p2m_entry(d, gfn,
234 p2m_entry, *table_mfn, new_entry, 2);
235 }
237 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
238 next = map_domain_page(mfn_x(*table_mfn));
239 unmap_domain_page(*table);
240 *table = next;
242 return 1;
243 }
245 // Returns 0 on error (out of memory)
246 static int
247 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
248 unsigned int page_order, p2m_type_t p2mt)
249 {
250 // XXX -- this might be able to be faster iff current->domain == d
251 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
252 void *table =map_domain_page(mfn_x(table_mfn));
253 unsigned long i, gfn_remainder = gfn;
254 l1_pgentry_t *p2m_entry;
255 l1_pgentry_t entry_content;
256 l2_pgentry_t l2e_content;
257 int rv=0;
259 #if CONFIG_PAGING_LEVELS >= 4
260 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
261 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
262 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
263 goto out;
264 #endif
265 /*
266 * When using PAE Xen, we only allow 33 bits of pseudo-physical
267 * address in translated guests (i.e. 8 GBytes). This restriction
268 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
269 * in Xen's address space for translated PV guests.
270 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
271 */
272 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
273 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
274 ((CONFIG_PAGING_LEVELS == 3)
275 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
276 : L3_PAGETABLE_ENTRIES),
277 PGT_l2_page_table) )
278 goto out;
280 if ( page_order == 0 )
281 {
282 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
283 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
284 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
285 goto out;
287 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
288 0, L1_PAGETABLE_ENTRIES);
289 ASSERT(p2m_entry);
291 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
292 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
293 else
294 entry_content = l1e_empty();
296 /* level 1 entry */
297 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
298 }
299 else
300 {
301 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
302 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
303 L2_PAGETABLE_ENTRIES);
304 ASSERT(p2m_entry);
306 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
307 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
308 {
309 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
310 domain_crash(d);
311 goto out;
312 }
314 if ( mfn_valid(mfn) )
315 l2e_content = l2e_from_pfn(mfn_x(mfn),
316 p2m_type_to_flags(p2mt) | _PAGE_PSE);
317 else
318 l2e_content = l2e_empty();
320 entry_content.l1 = l2e_content.l2;
321 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
322 }
324 /* Track the highest gfn for which we have ever had a valid mapping */
325 if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
326 d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
328 if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
329 {
330 if ( p2mt == p2m_ram_rw )
331 for ( i = 0; i < (1UL << page_order); i++ )
332 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
333 else
334 for ( int i = 0; i < (1UL << page_order); i++ )
335 iommu_unmap_page(d, gfn+i);
336 }
338 /* Success */
339 rv = 1;
341 out:
342 unmap_domain_page(table);
343 return rv;
344 }
346 static mfn_t
347 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
348 {
349 mfn_t mfn;
350 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
351 l2_pgentry_t *l2e;
352 l1_pgentry_t *l1e;
354 ASSERT(paging_mode_translate(d));
356 /* XXX This is for compatibility with the old model, where anything not
357 * XXX marked as RAM was considered to be emulated MMIO space.
358 * XXX Once we start explicitly registering MMIO regions in the p2m
359 * XXX we will return p2m_invalid for unmapped gfns */
360 *t = p2m_mmio_dm;
362 mfn = pagetable_get_mfn(d->arch.phys_table);
364 if ( gfn > d->arch.p2m->max_mapped_pfn )
365 /* This pfn is higher than the highest the p2m map currently holds */
366 return _mfn(INVALID_MFN);
368 #if CONFIG_PAGING_LEVELS >= 4
369 {
370 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
371 l4e += l4_table_offset(addr);
372 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
373 {
374 unmap_domain_page(l4e);
375 return _mfn(INVALID_MFN);
376 }
377 mfn = _mfn(l4e_get_pfn(*l4e));
378 unmap_domain_page(l4e);
379 }
380 #endif
381 {
382 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
383 #if CONFIG_PAGING_LEVELS == 3
384 /* On PAE hosts the p2m has eight l3 entries, not four (see
385 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
386 * Instead, just count the number of l3es from zero. It's safe
387 * to do this because we already checked that the gfn is within
388 * the bounds of the p2m. */
389 l3e += (addr >> L3_PAGETABLE_SHIFT);
390 #else
391 l3e += l3_table_offset(addr);
392 #endif
393 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
394 {
395 unmap_domain_page(l3e);
396 return _mfn(INVALID_MFN);
397 }
398 mfn = _mfn(l3e_get_pfn(*l3e));
399 unmap_domain_page(l3e);
400 }
402 l2e = map_domain_page(mfn_x(mfn));
403 l2e += l2_table_offset(addr);
404 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
405 {
406 unmap_domain_page(l2e);
407 return _mfn(INVALID_MFN);
408 }
409 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
410 {
411 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
412 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
413 unmap_domain_page(l2e);
415 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
416 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
417 }
419 mfn = _mfn(l2e_get_pfn(*l2e));
420 unmap_domain_page(l2e);
422 l1e = map_domain_page(mfn_x(mfn));
423 l1e += l1_table_offset(addr);
424 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
425 {
426 unmap_domain_page(l1e);
427 return _mfn(INVALID_MFN);
428 }
429 mfn = _mfn(l1e_get_pfn(*l1e));
430 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
431 unmap_domain_page(l1e);
433 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
434 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
435 }
437 /* Read the current domain's p2m table (through the linear mapping). */
438 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
439 {
440 mfn_t mfn = _mfn(INVALID_MFN);
441 p2m_type_t p2mt = p2m_mmio_dm;
442 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
443 /* XXX This is for compatibility with the old model, where anything not
444 * XXX marked as RAM was considered to be emulated MMIO space.
445 * XXX Once we start explicitly registering MMIO regions in the p2m
446 * XXX we will return p2m_invalid for unmapped gfns */
448 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
449 {
450 l1_pgentry_t l1e = l1e_empty();
451 l2_pgentry_t l2e = l2e_empty();
452 int ret;
454 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
455 / sizeof(l1_pgentry_t));
457 ret = __copy_from_user(&l2e,
458 &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
459 sizeof(l2e));
461 if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
462 (l2e_get_flags(l2e) & _PAGE_PSE) )
463 {
464 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
465 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
466 if ( p2m_is_valid(p2mt) )
467 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
468 else
469 p2mt = p2m_mmio_dm;
470 }
471 else
472 {
474 /* Need to __copy_from_user because the p2m is sparse and this
475 * part might not exist */
476 ret = __copy_from_user(&l1e,
477 &phys_to_machine_mapping[gfn],
478 sizeof(l1e));
480 if ( ret == 0 ) {
481 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
482 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
483 if ( p2m_is_valid(p2mt) )
484 mfn = _mfn(l1e_get_pfn(l1e));
485 else
486 /* XXX see above */
487 p2mt = p2m_mmio_dm;
488 }
489 }
490 }
492 *t = p2mt;
493 return mfn;
494 }
496 /* Init the datastructures for later use by the p2m code */
497 int p2m_init(struct domain *d)
498 {
499 struct p2m_domain *p2m;
501 p2m = xmalloc(struct p2m_domain);
502 if ( p2m == NULL )
503 return -ENOMEM;
505 d->arch.p2m = p2m;
507 memset(p2m, 0, sizeof(*p2m));
508 p2m_lock_init(p2m);
509 INIT_LIST_HEAD(&p2m->pages);
511 p2m->set_entry = p2m_set_entry;
512 p2m->get_entry = p2m_gfn_to_mfn;
513 p2m->get_entry_current = p2m_gfn_to_mfn_current;
514 p2m->change_entry_type_global = p2m_change_type_global;
516 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
517 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
518 ept_p2m_init(d);
520 return 0;
521 }
523 void p2m_change_entry_type_global(struct domain *d,
524 p2m_type_t ot, p2m_type_t nt)
525 {
526 struct p2m_domain *p2m = d->arch.p2m;
528 p2m_lock(p2m);
529 p2m->change_entry_type_global(d, ot, nt);
530 p2m_unlock(p2m);
531 }
533 static
534 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
535 unsigned int page_order, p2m_type_t p2mt)
536 {
537 unsigned long todo = 1ul << page_order;
538 unsigned int order;
539 int rc = 0;
541 while ( todo )
542 {
543 order = (((gfn | mfn_x(mfn) | todo) & ((1ul << 9) - 1)) == 0) ? 9 : 0;
544 rc = d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt);
545 gfn += 1ul << order;
546 if ( mfn_x(mfn) != INVALID_MFN )
547 mfn = _mfn(mfn_x(mfn) + (1ul << order));
548 todo -= 1ul << order;
549 }
551 return rc;
552 }
554 // Allocate a new p2m table for a domain.
555 //
556 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
557 // controlled by CONFIG_PAGING_LEVELS).
558 //
559 // The alloc_page and free_page functions will be used to get memory to
560 // build the p2m, and to release it again at the end of day.
561 //
562 // Returns 0 for success or -errno.
563 //
564 int p2m_alloc_table(struct domain *d,
565 struct page_info * (*alloc_page)(struct domain *d),
566 void (*free_page)(struct domain *d, struct page_info *pg))
568 {
569 mfn_t mfn = _mfn(INVALID_MFN);
570 struct list_head *entry;
571 struct page_info *page, *p2m_top;
572 unsigned int page_count = 0;
573 unsigned long gfn = -1UL;
574 struct p2m_domain *p2m = d->arch.p2m;
576 p2m_lock(p2m);
578 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
579 {
580 P2M_ERROR("p2m already allocated for this domain\n");
581 p2m_unlock(p2m);
582 return -EINVAL;
583 }
585 P2M_PRINTK("allocating p2m table\n");
587 p2m->alloc_page = alloc_page;
588 p2m->free_page = free_page;
590 p2m_top = p2m->alloc_page(d);
591 if ( p2m_top == NULL )
592 {
593 p2m_unlock(p2m);
594 return -ENOMEM;
595 }
596 list_add_tail(&p2m_top->list, &p2m->pages);
598 p2m_top->count_info = 1;
599 p2m_top->u.inuse.type_info =
600 #if CONFIG_PAGING_LEVELS == 4
601 PGT_l4_page_table
602 #else
603 PGT_l3_page_table
604 #endif
605 | 1 | PGT_validated;
607 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
609 P2M_PRINTK("populating p2m table\n");
611 /* Initialise physmap tables for slot zero. Other code assumes this. */
612 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
613 p2m_invalid) )
614 goto error;
616 /* Copy all existing mappings from the page list and m2p */
617 for ( entry = d->page_list.next;
618 entry != &d->page_list;
619 entry = entry->next )
620 {
621 page = list_entry(entry, struct page_info, list);
622 mfn = page_to_mfn(page);
623 gfn = get_gpfn_from_mfn(mfn_x(mfn));
624 page_count++;
625 if (
626 #ifdef __x86_64__
627 (gfn != 0x5555555555555555L)
628 #else
629 (gfn != 0x55555555L)
630 #endif
631 && gfn != INVALID_M2P_ENTRY
632 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
633 goto error;
634 }
636 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
637 p2m_unlock(p2m);
638 return 0;
640 error:
641 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
642 PRI_mfn "\n", gfn, mfn_x(mfn));
643 p2m_unlock(p2m);
644 return -ENOMEM;
645 }
647 void p2m_teardown(struct domain *d)
648 /* Return all the p2m pages to Xen.
649 * We know we don't have any extra mappings to these pages */
650 {
651 struct list_head *entry, *n;
652 struct page_info *pg;
653 struct p2m_domain *p2m = d->arch.p2m;
655 p2m_lock(p2m);
656 d->arch.phys_table = pagetable_null();
658 list_for_each_safe(entry, n, &p2m->pages)
659 {
660 pg = list_entry(entry, struct page_info, list);
661 list_del(entry);
662 p2m->free_page(d, pg);
663 }
664 p2m_unlock(p2m);
665 }
667 void p2m_final_teardown(struct domain *d)
668 {
669 xfree(d->arch.p2m);
670 d->arch.p2m = NULL;
671 }
673 #if P2M_AUDIT
674 static void audit_p2m(struct domain *d)
675 {
676 struct list_head *entry;
677 struct page_info *page;
678 struct domain *od;
679 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
680 mfn_t p2mfn;
681 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
682 int test_linear;
683 p2m_type_t type;
685 if ( !paging_mode_translate(d) )
686 return;
688 //P2M_PRINTK("p2m audit starts\n");
690 test_linear = ( (d == current->domain)
691 && !pagetable_is_null(current->arch.monitor_table) );
692 if ( test_linear )
693 flush_tlb_local();
695 /* Audit part one: walk the domain's page allocation list, checking
696 * the m2p entries. */
697 for ( entry = d->page_list.next;
698 entry != &d->page_list;
699 entry = entry->next )
700 {
701 page = list_entry(entry, struct page_info, list);
702 mfn = mfn_x(page_to_mfn(page));
704 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
706 od = page_get_owner(page);
708 if ( od != d )
709 {
710 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
711 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
712 continue;
713 }
715 gfn = get_gpfn_from_mfn(mfn);
716 if ( gfn == INVALID_M2P_ENTRY )
717 {
718 orphans_i++;
719 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
720 // mfn);
721 continue;
722 }
724 if ( gfn == 0x55555555 )
725 {
726 orphans_d++;
727 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
728 // mfn);
729 continue;
730 }
732 p2mfn = gfn_to_mfn_foreign(d, gfn, &type);
733 if ( mfn_x(p2mfn) != mfn )
734 {
735 mpbad++;
736 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
737 " (-> gfn %#lx)\n",
738 mfn, gfn, mfn_x(p2mfn),
739 (mfn_valid(p2mfn)
740 ? get_gpfn_from_mfn(mfn_x(p2mfn))
741 : -1u));
742 /* This m2p entry is stale: the domain has another frame in
743 * this physical slot. No great disaster, but for neatness,
744 * blow away the m2p entry. */
745 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
746 }
748 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
749 {
750 lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
751 if ( lp2mfn != mfn_x(p2mfn) )
752 {
753 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
754 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
755 }
756 }
758 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
759 // mfn, gfn, p2mfn, lp2mfn);
760 }
762 /* Audit part two: walk the domain's p2m table, checking the entries. */
763 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
764 {
765 l2_pgentry_t *l2e;
766 l1_pgentry_t *l1e;
767 int i1, i2;
769 #if CONFIG_PAGING_LEVELS == 4
770 l4_pgentry_t *l4e;
771 l3_pgentry_t *l3e;
772 int i3, i4;
773 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
774 #else /* CONFIG_PAGING_LEVELS == 3 */
775 l3_pgentry_t *l3e;
776 int i3;
777 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
778 #endif
780 gfn = 0;
781 #if CONFIG_PAGING_LEVELS >= 4
782 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
783 {
784 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
785 {
786 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
787 continue;
788 }
789 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
790 #endif
791 for ( i3 = 0;
792 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
793 i3++ )
794 {
795 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
796 {
797 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
798 continue;
799 }
800 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
801 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
802 {
803 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
804 {
805 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
806 continue;
807 }
809 /* check for super page */
810 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
811 {
812 mfn = l2e_get_pfn(l2e[i2]);
813 ASSERT(mfn_valid(_mfn(mfn)));
814 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
815 {
816 m2pfn = get_gpfn_from_mfn(mfn+i1);
817 if ( m2pfn != (gfn + i) )
818 {
819 pmbad++;
820 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
821 " -> gfn %#lx\n", gfn+i, mfn+i,
822 m2pfn);
823 BUG();
824 }
825 }
826 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
827 continue;
828 }
830 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
832 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
833 {
834 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
835 continue;
836 mfn = l1e_get_pfn(l1e[i1]);
837 ASSERT(mfn_valid(_mfn(mfn)));
838 m2pfn = get_gpfn_from_mfn(mfn);
839 if ( m2pfn != gfn )
840 {
841 pmbad++;
842 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
843 " -> gfn %#lx\n", gfn, mfn, m2pfn);
844 BUG();
845 }
846 }
847 unmap_domain_page(l1e);
848 }
849 unmap_domain_page(l2e);
850 }
851 #if CONFIG_PAGING_LEVELS >= 4
852 unmap_domain_page(l3e);
853 }
854 #endif
856 #if CONFIG_PAGING_LEVELS == 4
857 unmap_domain_page(l4e);
858 #else /* CONFIG_PAGING_LEVELS == 3 */
859 unmap_domain_page(l3e);
860 #endif
862 }
864 //P2M_PRINTK("p2m audit complete\n");
865 //if ( orphans_i | orphans_d | mpbad | pmbad )
866 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
867 // orphans_i + orphans_d, orphans_i, orphans_d,
868 if ( mpbad | pmbad )
869 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
870 pmbad, mpbad);
871 }
872 #else
873 #define audit_p2m(_d) do { (void)(_d); } while(0)
874 #endif /* P2M_AUDIT */
878 static void
879 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
880 unsigned int page_order)
881 {
882 unsigned long i;
884 if ( !paging_mode_translate(d) )
885 {
886 if ( need_iommu(d) )
887 for ( i = 0; i < (1 << page_order); i++ )
888 iommu_unmap_page(d, mfn + i);
889 return;
890 }
892 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
894 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
895 for ( i = 0; i < (1UL << page_order); i++ )
896 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
897 }
899 void
900 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
901 unsigned long mfn, unsigned int page_order)
902 {
903 p2m_lock(d->arch.p2m);
904 audit_p2m(d);
905 p2m_remove_page(d, gfn, mfn, page_order);
906 audit_p2m(d);
907 p2m_unlock(d->arch.p2m);
908 }
910 int
911 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
912 unsigned long mfn, unsigned int page_order,
913 p2m_type_t t)
914 {
915 unsigned long i, ogfn;
916 p2m_type_t ot;
917 mfn_t omfn;
918 int rc = 0;
920 if ( !paging_mode_translate(d) )
921 {
922 if ( need_iommu(d) && t == p2m_ram_rw )
923 {
924 for ( i = 0; i < (1 << page_order); i++ )
925 if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
926 {
927 while ( i-- > 0 )
928 iommu_unmap_page(d, mfn + i);
929 return rc;
930 }
931 }
932 return 0;
933 }
935 #if CONFIG_PAGING_LEVELS == 3
936 /*
937 * 32bit PAE nested paging does not support over 4GB guest due to
938 * hardware translation limit. This limitation is checked by comparing
939 * gfn with 0xfffffUL.
940 */
941 if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
942 {
943 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
944 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
945 " 4GB: specify 'hap=0' domain config option.\n",
946 d->domain_id);
947 return -EINVAL;
948 }
949 #endif
951 p2m_lock(d->arch.p2m);
952 audit_p2m(d);
954 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
956 omfn = gfn_to_mfn(d, gfn, &ot);
957 if ( p2m_is_ram(ot) )
958 {
959 ASSERT(mfn_valid(omfn));
960 for ( i = 0; i < (1UL << page_order); i++ )
961 set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
962 }
964 ogfn = mfn_to_gfn(d, _mfn(mfn));
965 if (
966 #ifdef __x86_64__
967 (ogfn != 0x5555555555555555L)
968 #else
969 (ogfn != 0x55555555L)
970 #endif
971 && (ogfn != INVALID_M2P_ENTRY)
972 && (ogfn != gfn) )
973 {
974 /* This machine frame is already mapped at another physical address */
975 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
976 mfn, ogfn, gfn);
977 omfn = gfn_to_mfn(d, ogfn, &ot);
978 if ( p2m_is_ram(ot) )
979 {
980 ASSERT(mfn_valid(omfn));
981 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
982 ogfn , mfn_x(omfn));
983 if ( mfn_x(omfn) == mfn )
984 p2m_remove_page(d, ogfn, mfn, page_order);
985 }
986 }
988 if ( mfn_valid(_mfn(mfn)) )
989 {
990 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
991 rc = -EINVAL;
992 for ( i = 0; i < (1UL << page_order); i++ )
993 set_gpfn_from_mfn(mfn+i, gfn+i);
994 }
995 else
996 {
997 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
998 gfn, mfn);
999 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
1000 p2m_invalid) )
1001 rc = -EINVAL;
1004 audit_p2m(d);
1005 p2m_unlock(d->arch.p2m);
1007 return rc;
1010 /* Walk the whole p2m table, changing any entries of the old type
1011 * to the new type. This is used in hardware-assisted paging to
1012 * quickly enable or diable log-dirty tracking */
1013 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
1015 unsigned long mfn, gfn, flags;
1016 l1_pgentry_t l1e_content;
1017 l1_pgentry_t *l1e;
1018 l2_pgentry_t *l2e;
1019 mfn_t l1mfn, l2mfn;
1020 int i1, i2;
1021 l3_pgentry_t *l3e;
1022 int i3;
1023 #if CONFIG_PAGING_LEVELS == 4
1024 l4_pgentry_t *l4e;
1025 int i4;
1026 #endif /* CONFIG_PAGING_LEVELS == 4 */
1028 if ( !paging_mode_translate(d) )
1029 return;
1031 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
1032 return;
1034 ASSERT(p2m_locked_by_me(d->arch.p2m));
1036 #if CONFIG_PAGING_LEVELS == 4
1037 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1038 #else /* CONFIG_PAGING_LEVELS == 3 */
1039 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1040 #endif
1042 #if CONFIG_PAGING_LEVELS >= 4
1043 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1045 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1047 continue;
1049 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
1050 #endif
1051 for ( i3 = 0;
1052 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1053 i3++ )
1055 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1057 continue;
1059 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
1060 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
1061 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1063 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1065 continue;
1068 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
1070 flags = l2e_get_flags(l2e[i2]);
1071 if ( p2m_flags_to_type(flags) != ot )
1072 continue;
1073 mfn = l2e_get_pfn(l2e[i2]);
1074 gfn = get_gpfn_from_mfn(mfn);
1075 flags = p2m_type_to_flags(nt);
1076 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
1077 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
1078 l2mfn, l1e_content, 2);
1079 continue;
1082 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
1083 l1e = map_domain_page(mfn_x(l1mfn));
1085 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1087 flags = l1e_get_flags(l1e[i1]);
1088 if ( p2m_flags_to_type(flags) != ot )
1089 continue;
1090 mfn = l1e_get_pfn(l1e[i1]);
1091 gfn = get_gpfn_from_mfn(mfn);
1092 /* create a new 1le entry with the new type */
1093 flags = p2m_type_to_flags(nt);
1094 l1e_content = l1e_from_pfn(mfn, flags);
1095 paging_write_p2m_entry(d, gfn, &l1e[i1],
1096 l1mfn, l1e_content, 1);
1098 unmap_domain_page(l1e);
1100 unmap_domain_page(l2e);
1102 #if CONFIG_PAGING_LEVELS >= 4
1103 unmap_domain_page(l3e);
1105 #endif
1107 #if CONFIG_PAGING_LEVELS == 4
1108 unmap_domain_page(l4e);
1109 #else /* CONFIG_PAGING_LEVELS == 3 */
1110 unmap_domain_page(l3e);
1111 #endif
1115 /* Modify the p2m type of a single gfn from ot to nt, returning the
1116 * entry's previous type */
1117 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
1118 p2m_type_t ot, p2m_type_t nt)
1120 p2m_type_t pt;
1121 mfn_t mfn;
1123 p2m_lock(d->arch.p2m);
1125 mfn = gfn_to_mfn(d, gfn, &pt);
1126 if ( pt == ot )
1127 set_p2m_entry(d, gfn, mfn, 0, nt);
1129 p2m_unlock(d->arch.p2m);
1131 return pt;
1134 int
1135 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
1137 int rc = 0;
1138 p2m_type_t ot;
1139 mfn_t omfn;
1141 if ( !paging_mode_translate(d) )
1142 return 0;
1144 omfn = gfn_to_mfn(d, gfn, &ot);
1145 if ( p2m_is_ram(ot) )
1147 ASSERT(mfn_valid(omfn));
1148 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1151 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
1152 if ( 0 == rc )
1153 gdprintk(XENLOG_ERR,
1154 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
1155 gmfn_to_mfn(d, gfn));
1156 return rc;
1159 int
1160 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
1162 int rc = 0;
1163 unsigned long mfn;
1165 if ( !paging_mode_translate(d) )
1166 return 0;
1168 mfn = gmfn_to_mfn(d, gfn);
1169 if ( INVALID_MFN == mfn )
1171 gdprintk(XENLOG_ERR,
1172 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
1173 return 0;
1175 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
1177 return rc;
1180 /*
1181 * Local variables:
1182 * mode: C
1183 * c-set-style: "BSD"
1184 * c-basic-offset: 4
1185 * indent-tabs-mode: nil
1186 * End:
1187 */