ia64/xen-unstable

view xen/arch/x86/mm/p2m.c @ 17692:70ca37d22895

Handle IOMMU pagetable allocations when set_p2m_entry is called with
non-zero page order.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed May 21 11:07:23 2008 +0100 (2008-05-21)
parents 8d18e52a1b23
children c684cf331f94
line source
1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
7 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
26 #include <asm/domain.h>
27 #include <asm/page.h>
28 #include <asm/paging.h>
29 #include <asm/p2m.h>
30 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
31 #include <xen/iommu.h>
33 /* Debugging and auditing of the P2M code? */
34 #define P2M_AUDIT 0
35 #define P2M_DEBUGGING 0
37 /*
38 * The P2M lock. This protects all updates to the p2m table.
39 * Updates are expected to be safe against concurrent reads,
40 * which do *not* require the lock.
41 *
42 * Locking discipline: always acquire this lock before the shadow or HAP one
43 */
45 #define p2m_lock_init(_p2m) \
46 do { \
47 spin_lock_init(&(_p2m)->lock); \
48 (_p2m)->locker = -1; \
49 (_p2m)->locker_function = "nobody"; \
50 } while (0)
52 #define p2m_lock(_p2m) \
53 do { \
54 if ( unlikely((_p2m)->locker == current->processor) ) \
55 { \
56 printk("Error: p2m lock held by %s\n", \
57 (_p2m)->locker_function); \
58 BUG(); \
59 } \
60 spin_lock(&(_p2m)->lock); \
61 ASSERT((_p2m)->locker == -1); \
62 (_p2m)->locker = current->processor; \
63 (_p2m)->locker_function = __func__; \
64 } while (0)
66 #define p2m_unlock(_p2m) \
67 do { \
68 ASSERT((_p2m)->locker == current->processor); \
69 (_p2m)->locker = -1; \
70 (_p2m)->locker_function = "nobody"; \
71 spin_unlock(&(_p2m)->lock); \
72 } while (0)
74 #define p2m_locked_by_me(_p2m) \
75 (current->processor == (_p2m)->locker)
77 /* Printouts */
78 #define P2M_PRINTK(_f, _a...) \
79 debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
80 #define P2M_ERROR(_f, _a...) \
81 printk("pg error: %s(): " _f, __func__, ##_a)
82 #if P2M_DEBUGGING
83 #define P2M_DEBUG(_f, _a...) \
84 debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a)
85 #else
86 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
87 #endif
90 /* Override macros from asm/page.h to make them work with mfn_t */
91 #undef mfn_to_page
92 #define mfn_to_page(_m) (frame_table + mfn_x(_m))
93 #undef mfn_valid
94 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
95 #undef page_to_mfn
96 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
99 /* PTE flags for the various types of p2m entry */
100 #define P2M_BASE_FLAGS \
101 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
103 static unsigned long p2m_type_to_flags(p2m_type_t t)
104 {
105 unsigned long flags = (t & 0x7UL) << 9;
106 switch(t)
107 {
108 case p2m_invalid:
109 default:
110 return flags;
111 case p2m_ram_rw:
112 return flags | P2M_BASE_FLAGS | _PAGE_RW;
113 case p2m_ram_logdirty:
114 return flags | P2M_BASE_FLAGS;
115 case p2m_ram_ro:
116 return flags | P2M_BASE_FLAGS;
117 case p2m_mmio_dm:
118 return flags;
119 case p2m_mmio_direct:
120 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
121 }
122 }
125 // Find the next level's P2M entry, checking for out-of-range gfn's...
126 // Returns NULL on error.
127 //
128 static l1_pgentry_t *
129 p2m_find_entry(void *table, unsigned long *gfn_remainder,
130 unsigned long gfn, u32 shift, u32 max)
131 {
132 u32 index;
134 index = *gfn_remainder >> shift;
135 if ( index >= max )
136 {
137 P2M_DEBUG("gfn=0x%lx out of range "
138 "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
139 gfn, *gfn_remainder, shift, index, max);
140 return NULL;
141 }
142 *gfn_remainder &= (1 << shift) - 1;
143 return (l1_pgentry_t *)table + index;
144 }
146 // Walk one level of the P2M table, allocating a new table if required.
147 // Returns 0 on error.
148 //
149 static int
150 p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
151 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
152 u32 max, unsigned long type)
153 {
154 l1_pgentry_t *l1_entry;
155 l1_pgentry_t *p2m_entry;
156 l1_pgentry_t new_entry;
157 void *next;
158 int i;
159 ASSERT(d->arch.p2m->alloc_page);
161 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
162 shift, max)) )
163 return 0;
165 if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
166 {
167 struct page_info *pg = d->arch.p2m->alloc_page(d);
168 if ( pg == NULL )
169 return 0;
170 list_add_tail(&pg->list, &d->arch.p2m->pages);
171 pg->u.inuse.type_info = type | 1 | PGT_validated;
172 pg->count_info = 1;
174 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
175 __PAGE_HYPERVISOR|_PAGE_USER);
177 switch ( type ) {
178 case PGT_l3_page_table:
179 paging_write_p2m_entry(d, gfn,
180 p2m_entry, *table_mfn, new_entry, 4);
181 break;
182 case PGT_l2_page_table:
183 #if CONFIG_PAGING_LEVELS == 3
184 /* for PAE mode, PDPE only has PCD/PWT/P bits available */
185 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT);
186 #endif
187 paging_write_p2m_entry(d, gfn,
188 p2m_entry, *table_mfn, new_entry, 3);
189 break;
190 case PGT_l1_page_table:
191 paging_write_p2m_entry(d, gfn,
192 p2m_entry, *table_mfn, new_entry, 2);
193 break;
194 default:
195 BUG();
196 break;
197 }
198 }
200 ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
202 /* split single large page into 4KB page in P2M table */
203 if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
204 {
205 unsigned long flags, pfn;
206 struct page_info *pg = d->arch.p2m->alloc_page(d);
207 if ( pg == NULL )
208 return 0;
209 list_add_tail(&pg->list, &d->arch.p2m->pages);
210 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
211 pg->count_info = 1;
213 /* New splintered mappings inherit the flags of the old superpage,
214 * with a little reorganisation for the _PAGE_PSE_PAT bit. */
215 flags = l1e_get_flags(*p2m_entry);
216 pfn = l1e_get_pfn(*p2m_entry);
217 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
218 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
219 else
220 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
222 l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
223 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
224 {
225 new_entry = l1e_from_pfn(pfn + i, flags);
226 paging_write_p2m_entry(d, gfn,
227 l1_entry+i, *table_mfn, new_entry, 1);
228 }
229 unmap_domain_page(l1_entry);
231 new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
232 __PAGE_HYPERVISOR|_PAGE_USER);
233 paging_write_p2m_entry(d, gfn,
234 p2m_entry, *table_mfn, new_entry, 2);
235 }
237 *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
238 next = map_domain_page(mfn_x(*table_mfn));
239 unmap_domain_page(*table);
240 *table = next;
242 return 1;
243 }
245 // Returns 0 on error (out of memory)
246 static int
247 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
248 unsigned int page_order, p2m_type_t p2mt)
249 {
250 // XXX -- this might be able to be faster iff current->domain == d
251 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
252 void *table =map_domain_page(mfn_x(table_mfn));
253 unsigned long i, gfn_remainder = gfn;
254 l1_pgentry_t *p2m_entry;
255 l1_pgentry_t entry_content;
256 l2_pgentry_t l2e_content;
257 int rv=0;
259 #if CONFIG_PAGING_LEVELS >= 4
260 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
261 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
262 L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
263 goto out;
264 #endif
265 /*
266 * When using PAE Xen, we only allow 33 bits of pseudo-physical
267 * address in translated guests (i.e. 8 GBytes). This restriction
268 * comes from wanting to map the P2M table into the 16MB RO_MPT hole
269 * in Xen's address space for translated PV guests.
270 * When using AMD's NPT on PAE Xen, we are restricted to 4GB.
271 */
272 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
273 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
274 ((CONFIG_PAGING_LEVELS == 3)
275 ? (d->arch.hvm_domain.hap_enabled ? 4 : 8)
276 : L3_PAGETABLE_ENTRIES),
277 PGT_l2_page_table) )
278 goto out;
280 if ( page_order == 0 )
281 {
282 if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
283 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
284 L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
285 goto out;
287 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
288 0, L1_PAGETABLE_ENTRIES);
289 ASSERT(p2m_entry);
291 if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
292 entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
293 else
294 entry_content = l1e_empty();
296 /* level 1 entry */
297 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
298 }
299 else
300 {
301 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
302 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
303 L2_PAGETABLE_ENTRIES);
304 ASSERT(p2m_entry);
306 if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
307 !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
308 {
309 P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
310 domain_crash(d);
311 goto out;
312 }
314 if ( mfn_valid(mfn) )
315 l2e_content = l2e_from_pfn(mfn_x(mfn),
316 p2m_type_to_flags(p2mt) | _PAGE_PSE);
317 else
318 l2e_content = l2e_empty();
320 entry_content.l1 = l2e_content.l2;
321 paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
322 }
324 /* Track the highest gfn for which we have ever had a valid mapping */
325 if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
326 d->arch.p2m->max_mapped_pfn = gfn;
328 if ( iommu_enabled && is_hvm_domain(d) )
329 {
330 if ( p2mt == p2m_ram_rw )
331 for ( i = 0; i < (1UL << page_order); i++ )
332 iommu_map_page(d, gfn+i, mfn_x(mfn)+i );
333 else
334 for ( int i = 0; i < (1UL << page_order); i++ )
335 iommu_unmap_page(d, gfn+i);
336 }
338 /* Success */
339 rv = 1;
341 out:
342 unmap_domain_page(table);
343 return rv;
344 }
346 static mfn_t
347 p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
348 {
349 mfn_t mfn;
350 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
351 l2_pgentry_t *l2e;
352 l1_pgentry_t *l1e;
354 ASSERT(paging_mode_translate(d));
356 /* XXX This is for compatibility with the old model, where anything not
357 * XXX marked as RAM was considered to be emulated MMIO space.
358 * XXX Once we start explicitly registering MMIO regions in the p2m
359 * XXX we will return p2m_invalid for unmapped gfns */
360 *t = p2m_mmio_dm;
362 mfn = pagetable_get_mfn(d->arch.phys_table);
364 if ( gfn > d->arch.p2m->max_mapped_pfn )
365 /* This pfn is higher than the highest the p2m map currently holds */
366 return _mfn(INVALID_MFN);
368 #if CONFIG_PAGING_LEVELS >= 4
369 {
370 l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
371 l4e += l4_table_offset(addr);
372 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
373 {
374 unmap_domain_page(l4e);
375 return _mfn(INVALID_MFN);
376 }
377 mfn = _mfn(l4e_get_pfn(*l4e));
378 unmap_domain_page(l4e);
379 }
380 #endif
381 {
382 l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
383 #if CONFIG_PAGING_LEVELS == 3
384 /* On PAE hosts the p2m has eight l3 entries, not four (see
385 * shadow_set_p2m_entry()) so we can't use l3_table_offset.
386 * Instead, just count the number of l3es from zero. It's safe
387 * to do this because we already checked that the gfn is within
388 * the bounds of the p2m. */
389 l3e += (addr >> L3_PAGETABLE_SHIFT);
390 #else
391 l3e += l3_table_offset(addr);
392 #endif
393 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
394 {
395 unmap_domain_page(l3e);
396 return _mfn(INVALID_MFN);
397 }
398 mfn = _mfn(l3e_get_pfn(*l3e));
399 unmap_domain_page(l3e);
400 }
402 l2e = map_domain_page(mfn_x(mfn));
403 l2e += l2_table_offset(addr);
404 if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
405 {
406 unmap_domain_page(l2e);
407 return _mfn(INVALID_MFN);
408 }
409 else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
410 {
411 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
412 *t = p2m_flags_to_type(l2e_get_flags(*l2e));
413 unmap_domain_page(l2e);
415 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
416 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
417 }
419 mfn = _mfn(l2e_get_pfn(*l2e));
420 unmap_domain_page(l2e);
422 l1e = map_domain_page(mfn_x(mfn));
423 l1e += l1_table_offset(addr);
424 if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
425 {
426 unmap_domain_page(l1e);
427 return _mfn(INVALID_MFN);
428 }
429 mfn = _mfn(l1e_get_pfn(*l1e));
430 *t = p2m_flags_to_type(l1e_get_flags(*l1e));
431 unmap_domain_page(l1e);
433 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
434 return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
435 }
437 /* Read the current domain's p2m table (through the linear mapping). */
438 static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
439 {
440 mfn_t mfn = _mfn(INVALID_MFN);
441 p2m_type_t p2mt = p2m_mmio_dm;
442 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
443 /* XXX This is for compatibility with the old model, where anything not
444 * XXX marked as RAM was considered to be emulated MMIO space.
445 * XXX Once we start explicitly registering MMIO regions in the p2m
446 * XXX we will return p2m_invalid for unmapped gfns */
448 if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
449 {
450 l1_pgentry_t l1e = l1e_empty();
451 l2_pgentry_t l2e = l2e_empty();
452 int ret;
454 ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
455 / sizeof(l1_pgentry_t));
457 ret = __copy_from_user(&l2e,
458 &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
459 sizeof(l2e));
461 if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
462 (l2e_get_flags(l2e) & _PAGE_PSE) )
463 {
464 p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
465 ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
466 if ( p2m_is_valid(p2mt) )
467 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
468 else
469 p2mt = p2m_mmio_dm;
470 }
471 else
472 {
474 /* Need to __copy_from_user because the p2m is sparse and this
475 * part might not exist */
476 ret = __copy_from_user(&l1e,
477 &phys_to_machine_mapping[gfn],
478 sizeof(l1e));
480 if ( ret == 0 ) {
481 p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
482 ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
483 if ( p2m_is_valid(p2mt) )
484 mfn = _mfn(l1e_get_pfn(l1e));
485 else
486 /* XXX see above */
487 p2mt = p2m_mmio_dm;
488 }
489 }
490 }
492 *t = p2mt;
493 return mfn;
494 }
496 /* Init the datastructures for later use by the p2m code */
497 int p2m_init(struct domain *d)
498 {
499 struct p2m_domain *p2m;
501 p2m = xmalloc(struct p2m_domain);
502 if ( p2m == NULL )
503 return -ENOMEM;
505 d->arch.p2m = p2m;
507 memset(p2m, 0, sizeof(*p2m));
508 p2m_lock_init(p2m);
509 INIT_LIST_HEAD(&p2m->pages);
511 p2m->set_entry = p2m_set_entry;
512 p2m->get_entry = p2m_gfn_to_mfn;
513 p2m->get_entry_current = p2m_gfn_to_mfn_current;
514 p2m->change_entry_type_global = p2m_change_type_global;
516 if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
517 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
518 ept_p2m_init(d);
520 return 0;
521 }
523 void p2m_change_entry_type_global(struct domain *d,
524 p2m_type_t ot, p2m_type_t nt)
525 {
526 struct p2m_domain *p2m = d->arch.p2m;
528 p2m_lock(p2m);
529 p2m->change_entry_type_global(d, ot, nt);
530 p2m_unlock(p2m);
531 }
533 static inline
534 int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
535 unsigned int page_order, p2m_type_t p2mt)
536 {
537 return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
538 }
540 // Allocate a new p2m table for a domain.
541 //
542 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
543 // controlled by CONFIG_PAGING_LEVELS).
544 //
545 // The alloc_page and free_page functions will be used to get memory to
546 // build the p2m, and to release it again at the end of day.
547 //
548 // Returns 0 for success or -errno.
549 //
550 int p2m_alloc_table(struct domain *d,
551 struct page_info * (*alloc_page)(struct domain *d),
552 void (*free_page)(struct domain *d, struct page_info *pg))
554 {
555 mfn_t mfn = _mfn(INVALID_MFN);
556 struct list_head *entry;
557 struct page_info *page, *p2m_top;
558 unsigned int page_count = 0;
559 unsigned long gfn = -1UL;
560 struct p2m_domain *p2m = d->arch.p2m;
562 p2m_lock(p2m);
564 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
565 {
566 P2M_ERROR("p2m already allocated for this domain\n");
567 p2m_unlock(p2m);
568 return -EINVAL;
569 }
571 P2M_PRINTK("allocating p2m table\n");
573 p2m->alloc_page = alloc_page;
574 p2m->free_page = free_page;
576 p2m_top = p2m->alloc_page(d);
577 if ( p2m_top == NULL )
578 {
579 p2m_unlock(p2m);
580 return -ENOMEM;
581 }
582 list_add_tail(&p2m_top->list, &p2m->pages);
584 p2m_top->count_info = 1;
585 p2m_top->u.inuse.type_info =
586 #if CONFIG_PAGING_LEVELS == 4
587 PGT_l4_page_table
588 #else
589 PGT_l3_page_table
590 #endif
591 | 1 | PGT_validated;
593 d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
595 P2M_PRINTK("populating p2m table\n");
597 /* Initialise physmap tables for slot zero. Other code assumes this. */
598 if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
599 p2m_invalid) )
600 goto error;
602 /* Copy all existing mappings from the page list and m2p */
603 for ( entry = d->page_list.next;
604 entry != &d->page_list;
605 entry = entry->next )
606 {
607 page = list_entry(entry, struct page_info, list);
608 mfn = page_to_mfn(page);
609 gfn = get_gpfn_from_mfn(mfn_x(mfn));
610 page_count++;
611 if (
612 #ifdef __x86_64__
613 (gfn != 0x5555555555555555L)
614 #else
615 (gfn != 0x55555555L)
616 #endif
617 && gfn != INVALID_M2P_ENTRY
618 && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
619 goto error;
620 }
622 P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
623 p2m_unlock(p2m);
624 return 0;
626 error:
627 P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
628 PRI_mfn "\n", gfn, mfn_x(mfn));
629 p2m_unlock(p2m);
630 return -ENOMEM;
631 }
633 void p2m_teardown(struct domain *d)
634 /* Return all the p2m pages to Xen.
635 * We know we don't have any extra mappings to these pages */
636 {
637 struct list_head *entry, *n;
638 struct page_info *pg;
639 struct p2m_domain *p2m = d->arch.p2m;
641 p2m_lock(p2m);
642 d->arch.phys_table = pagetable_null();
644 list_for_each_safe(entry, n, &p2m->pages)
645 {
646 pg = list_entry(entry, struct page_info, list);
647 list_del(entry);
648 p2m->free_page(d, pg);
649 }
650 p2m_unlock(p2m);
651 }
653 void p2m_final_teardown(struct domain *d)
654 {
655 xfree(d->arch.p2m);
656 d->arch.p2m = NULL;
657 }
659 #if P2M_AUDIT
660 static void audit_p2m(struct domain *d)
661 {
662 struct list_head *entry;
663 struct page_info *page;
664 struct domain *od;
665 unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
666 mfn_t p2mfn;
667 unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
668 int test_linear;
669 p2m_type_t type;
671 if ( !paging_mode_translate(d) )
672 return;
674 //P2M_PRINTK("p2m audit starts\n");
676 test_linear = ( (d == current->domain)
677 && !pagetable_is_null(current->arch.monitor_table) );
678 if ( test_linear )
679 flush_tlb_local();
681 /* Audit part one: walk the domain's page allocation list, checking
682 * the m2p entries. */
683 for ( entry = d->page_list.next;
684 entry != &d->page_list;
685 entry = entry->next )
686 {
687 page = list_entry(entry, struct page_info, list);
688 mfn = mfn_x(page_to_mfn(page));
690 // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
692 od = page_get_owner(page);
694 if ( od != d )
695 {
696 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
697 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
698 continue;
699 }
701 gfn = get_gpfn_from_mfn(mfn);
702 if ( gfn == INVALID_M2P_ENTRY )
703 {
704 orphans_i++;
705 //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
706 // mfn);
707 continue;
708 }
710 if ( gfn == 0x55555555 )
711 {
712 orphans_d++;
713 //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
714 // mfn);
715 continue;
716 }
718 p2mfn = gfn_to_mfn_foreign(d, gfn, &type);
719 if ( mfn_x(p2mfn) != mfn )
720 {
721 mpbad++;
722 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
723 " (-> gfn %#lx)\n",
724 mfn, gfn, mfn_x(p2mfn),
725 (mfn_valid(p2mfn)
726 ? get_gpfn_from_mfn(mfn_x(p2mfn))
727 : -1u));
728 /* This m2p entry is stale: the domain has another frame in
729 * this physical slot. No great disaster, but for neatness,
730 * blow away the m2p entry. */
731 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
732 }
734 if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
735 {
736 lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
737 if ( lp2mfn != mfn_x(p2mfn) )
738 {
739 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
740 "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
741 }
742 }
744 // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
745 // mfn, gfn, p2mfn, lp2mfn);
746 }
748 /* Audit part two: walk the domain's p2m table, checking the entries. */
749 if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
750 {
751 l2_pgentry_t *l2e;
752 l1_pgentry_t *l1e;
753 int i1, i2;
755 #if CONFIG_PAGING_LEVELS == 4
756 l4_pgentry_t *l4e;
757 l3_pgentry_t *l3e;
758 int i3, i4;
759 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
760 #else /* CONFIG_PAGING_LEVELS == 3 */
761 l3_pgentry_t *l3e;
762 int i3;
763 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
764 #endif
766 gfn = 0;
767 #if CONFIG_PAGING_LEVELS >= 4
768 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
769 {
770 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
771 {
772 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
773 continue;
774 }
775 l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
776 #endif
777 for ( i3 = 0;
778 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
779 i3++ )
780 {
781 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
782 {
783 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
784 continue;
785 }
786 l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
787 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
788 {
789 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
790 {
791 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
792 continue;
793 }
795 /* check for super page */
796 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
797 {
798 mfn = l2e_get_pfn(l2e[i2]);
799 ASSERT(mfn_valid(_mfn(mfn)));
800 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
801 {
802 m2pfn = get_gpfn_from_mfn(mfn+i1);
803 if ( m2pfn != (gfn + i) )
804 {
805 pmbad++;
806 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
807 " -> gfn %#lx\n", gfn+i, mfn+i,
808 m2pfn);
809 BUG();
810 }
811 }
812 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
813 continue;
814 }
816 l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
818 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
819 {
820 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
821 continue;
822 mfn = l1e_get_pfn(l1e[i1]);
823 ASSERT(mfn_valid(_mfn(mfn)));
824 m2pfn = get_gpfn_from_mfn(mfn);
825 if ( m2pfn != gfn )
826 {
827 pmbad++;
828 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
829 " -> gfn %#lx\n", gfn, mfn, m2pfn);
830 BUG();
831 }
832 }
833 unmap_domain_page(l1e);
834 }
835 unmap_domain_page(l2e);
836 }
837 #if CONFIG_PAGING_LEVELS >= 4
838 unmap_domain_page(l3e);
839 }
840 #endif
842 #if CONFIG_PAGING_LEVELS == 4
843 unmap_domain_page(l4e);
844 #else /* CONFIG_PAGING_LEVELS == 3 */
845 unmap_domain_page(l3e);
846 #endif
848 }
850 //P2M_PRINTK("p2m audit complete\n");
851 //if ( orphans_i | orphans_d | mpbad | pmbad )
852 // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
853 // orphans_i + orphans_d, orphans_i, orphans_d,
854 if ( mpbad | pmbad )
855 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
856 pmbad, mpbad);
857 }
858 #else
859 #define audit_p2m(_d) do { (void)(_d); } while(0)
860 #endif /* P2M_AUDIT */
864 static void
865 p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
866 unsigned int page_order)
867 {
868 unsigned long i;
870 if ( !paging_mode_translate(d) )
871 return;
873 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
875 set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
876 for ( i = 0; i < (1UL << page_order); i++ )
877 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
878 }
880 void
881 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
882 unsigned long mfn, unsigned int page_order)
883 {
884 p2m_lock(d->arch.p2m);
885 audit_p2m(d);
886 p2m_remove_page(d, gfn, mfn, page_order);
887 audit_p2m(d);
888 p2m_unlock(d->arch.p2m);
889 }
891 int
892 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
893 unsigned long mfn, unsigned int page_order,
894 p2m_type_t t)
895 {
896 unsigned long i, ogfn;
897 p2m_type_t ot;
898 mfn_t omfn;
899 int rc = 0;
901 if ( !paging_mode_translate(d) )
902 return -EINVAL;
904 #if CONFIG_PAGING_LEVELS == 3
905 /*
906 * 32bit PAE nested paging does not support over 4GB guest due to
907 * hardware translation limit. This limitation is checked by comparing
908 * gfn with 0xfffffUL.
909 */
910 if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
911 {
912 if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
913 dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
914 " 4GB: specify 'hap=0' domain config option.\n",
915 d->domain_id);
916 return -EINVAL;
917 }
918 #endif
920 p2m_lock(d->arch.p2m);
921 audit_p2m(d);
923 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
925 omfn = gfn_to_mfn(d, gfn, &ot);
926 if ( p2m_is_ram(ot) )
927 {
928 ASSERT(mfn_valid(omfn));
929 for ( i = 0; i < (1UL << page_order); i++ )
930 set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
931 }
933 ogfn = mfn_to_gfn(d, _mfn(mfn));
934 if (
935 #ifdef __x86_64__
936 (ogfn != 0x5555555555555555L)
937 #else
938 (ogfn != 0x55555555L)
939 #endif
940 && (ogfn != INVALID_M2P_ENTRY)
941 && (ogfn != gfn) )
942 {
943 /* This machine frame is already mapped at another physical address */
944 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
945 mfn, ogfn, gfn);
946 omfn = gfn_to_mfn(d, ogfn, &ot);
947 if ( p2m_is_ram(ot) )
948 {
949 ASSERT(mfn_valid(omfn));
950 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
951 ogfn , mfn_x(omfn));
952 if ( mfn_x(omfn) == mfn )
953 p2m_remove_page(d, ogfn, mfn, page_order);
954 }
955 }
957 if ( mfn_valid(_mfn(mfn)) )
958 {
959 if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
960 rc = -EINVAL;
961 for ( i = 0; i < (1UL << page_order); i++ )
962 set_gpfn_from_mfn(mfn+i, gfn+i);
963 }
964 else
965 {
966 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
967 gfn, mfn);
968 if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
969 p2m_invalid) )
970 rc = -EINVAL;
971 }
973 audit_p2m(d);
974 p2m_unlock(d->arch.p2m);
976 return rc;
977 }
979 /* Walk the whole p2m table, changing any entries of the old type
980 * to the new type. This is used in hardware-assisted paging to
981 * quickly enable or diable log-dirty tracking */
982 void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
983 {
984 unsigned long mfn, gfn, flags;
985 l1_pgentry_t l1e_content;
986 l1_pgentry_t *l1e;
987 l2_pgentry_t *l2e;
988 mfn_t l1mfn, l2mfn;
989 int i1, i2;
990 l3_pgentry_t *l3e;
991 int i3;
992 #if CONFIG_PAGING_LEVELS == 4
993 l4_pgentry_t *l4e;
994 int i4;
995 #endif /* CONFIG_PAGING_LEVELS == 4 */
997 if ( !paging_mode_translate(d) )
998 return;
1000 if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
1001 return;
1003 ASSERT(p2m_locked_by_me(d->arch.p2m));
1005 #if CONFIG_PAGING_LEVELS == 4
1006 l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1007 #else /* CONFIG_PAGING_LEVELS == 3 */
1008 l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
1009 #endif
1011 #if CONFIG_PAGING_LEVELS >= 4
1012 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
1014 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1016 continue;
1018 l3e = map_domain_page(l4e_get_pfn(l4e[i4]));
1019 #endif
1020 for ( i3 = 0;
1021 i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
1022 i3++ )
1024 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1026 continue;
1028 l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
1029 l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
1030 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1032 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1034 continue;
1037 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
1039 flags = l2e_get_flags(l2e[i2]);
1040 if ( p2m_flags_to_type(flags) != ot )
1041 continue;
1042 mfn = l2e_get_pfn(l2e[i2]);
1043 gfn = get_gpfn_from_mfn(mfn);
1044 flags = p2m_flags_to_type(nt);
1045 l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
1046 paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
1047 l2mfn, l1e_content, 2);
1048 continue;
1051 l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
1052 l1e = map_domain_page(mfn_x(l1mfn));
1054 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1056 flags = l1e_get_flags(l1e[i1]);
1057 if ( p2m_flags_to_type(flags) != ot )
1058 continue;
1059 mfn = l1e_get_pfn(l1e[i1]);
1060 gfn = get_gpfn_from_mfn(mfn);
1061 /* create a new 1le entry with the new type */
1062 flags = p2m_type_to_flags(nt);
1063 l1e_content = l1e_from_pfn(mfn, flags);
1064 paging_write_p2m_entry(d, gfn, &l1e[i1],
1065 l1mfn, l1e_content, 1);
1067 unmap_domain_page(l1e);
1069 unmap_domain_page(l2e);
1071 #if CONFIG_PAGING_LEVELS >= 4
1072 unmap_domain_page(l3e);
1074 #endif
1076 #if CONFIG_PAGING_LEVELS == 4
1077 unmap_domain_page(l4e);
1078 #else /* CONFIG_PAGING_LEVELS == 3 */
1079 unmap_domain_page(l3e);
1080 #endif
1084 /* Modify the p2m type of a single gfn from ot to nt, returning the
1085 * entry's previous type */
1086 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
1087 p2m_type_t ot, p2m_type_t nt)
1089 p2m_type_t pt;
1090 mfn_t mfn;
1092 p2m_lock(d->arch.p2m);
1094 mfn = gfn_to_mfn(d, gfn, &pt);
1095 if ( pt == ot )
1096 set_p2m_entry(d, gfn, mfn, 0, nt);
1098 p2m_unlock(d->arch.p2m);
1100 return pt;
1103 int
1104 set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
1106 int rc = 0;
1107 p2m_type_t ot;
1108 mfn_t omfn;
1110 if ( !paging_mode_translate(d) )
1111 return 0;
1113 omfn = gfn_to_mfn(d, gfn, &ot);
1114 if ( p2m_is_ram(ot) )
1116 ASSERT(mfn_valid(omfn));
1117 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1120 rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
1121 if ( 0 == rc )
1122 gdprintk(XENLOG_ERR,
1123 "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
1124 gmfn_to_mfn(d, gfn));
1125 return rc;
1128 int
1129 clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
1131 int rc = 0;
1132 unsigned long mfn;
1134 if ( !paging_mode_translate(d) )
1135 return 0;
1137 mfn = gmfn_to_mfn(d, gfn);
1138 if ( INVALID_MFN == mfn )
1140 gdprintk(XENLOG_ERR,
1141 "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
1142 return 0;
1144 rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
1146 return rc;
1149 /*
1150 * Local variables:
1151 * mode: C
1152 * c-set-style: "BSD"
1153 * c-basic-offset: 4
1154 * indent-tabs-mode: nil
1155 * End:
1156 */