ia64/xen-unstable

view xen/arch/x86/mm/shadow/common.c @ 14175:5943a8314d69

[XEN] Make the compat-mode l4 page table look more like a page table
and remove some special-case code in the shadows.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Wed Feb 28 13:17:27 2007 +0000 (2007-02-28)
parents 720afbf74001
children a69d98bf0c55
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/common.c
3 *
4 * Shadow code that does not need to be multiply compiled.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/types.h>
26 #include <xen/mm.h>
27 #include <xen/trace.h>
28 #include <xen/sched.h>
29 #include <xen/perfc.h>
30 #include <xen/irq.h>
31 #include <xen/domain_page.h>
32 #include <xen/guest_access.h>
33 #include <xen/keyhandler.h>
34 #include <asm/event.h>
35 #include <asm/page.h>
36 #include <asm/current.h>
37 #include <asm/flushtlb.h>
38 #include <asm/shadow.h>
39 #include <asm/shared.h>
40 #include "private.h"
43 /* Set up the shadow-specific parts of a domain struct at start of day.
44 * Called for every domain from arch_domain_create() */
45 void shadow_domain_init(struct domain *d)
46 {
47 int i;
48 shadow_lock_init(d);
49 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
50 INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
51 INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
52 INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
53 }
55 /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
56 * job is to initialize the update_paging_modes() function pointer, which is
57 * used to initialized the rest of resources. Therefore, it really does not
58 * matter to have v->arch.paging.mode pointing to any mode, as long as it can
59 * be compiled.
60 */
61 void shadow_vcpu_init(struct vcpu *v)
62 {
63 #if CONFIG_PAGING_LEVELS == 4
64 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
65 #elif CONFIG_PAGING_LEVELS == 3
66 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
67 #elif CONFIG_PAGING_LEVELS == 2
68 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
69 #endif
70 }
72 #if SHADOW_AUDIT
73 int shadow_audit_enable = 0;
75 static void shadow_audit_key(unsigned char key)
76 {
77 shadow_audit_enable = !shadow_audit_enable;
78 printk("%s shadow_audit_enable=%d\n",
79 __func__, shadow_audit_enable);
80 }
82 static int __init shadow_audit_key_init(void)
83 {
84 register_keyhandler(
85 'O', shadow_audit_key, "toggle shadow audits");
86 return 0;
87 }
88 __initcall(shadow_audit_key_init);
89 #endif /* SHADOW_AUDIT */
91 static void sh_free_log_dirty_bitmap(struct domain *d);
93 int _shadow_mode_refcounts(struct domain *d)
94 {
95 return shadow_mode_refcounts(d);
96 }
99 /**************************************************************************/
100 /* x86 emulator support for the shadow code
101 */
103 struct segment_register *hvm_get_seg_reg(
104 enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
105 {
106 struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
107 if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
108 hvm_get_segment_register(current, seg, seg_reg);
109 return seg_reg;
110 }
112 enum hvm_access_type {
113 hvm_access_insn_fetch, hvm_access_read, hvm_access_write
114 };
116 static int hvm_translate_linear_addr(
117 enum x86_segment seg,
118 unsigned long offset,
119 unsigned int bytes,
120 enum hvm_access_type access_type,
121 struct sh_emulate_ctxt *sh_ctxt,
122 unsigned long *paddr)
123 {
124 struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
125 unsigned long limit, addr = offset;
126 uint32_t last_byte;
128 if ( sh_ctxt->ctxt.addr_size != 64 )
129 {
130 /*
131 * COMPATIBILITY MODE: Apply segment checks and add base.
132 */
134 switch ( access_type )
135 {
136 case hvm_access_read:
137 if ( (reg->attr.fields.type & 0xa) == 0x8 )
138 goto gpf; /* execute-only code segment */
139 break;
140 case hvm_access_write:
141 if ( (reg->attr.fields.type & 0xa) != 0x2 )
142 goto gpf; /* not a writable data segment */
143 break;
144 default:
145 break;
146 }
148 /* Calculate the segment limit, including granularity flag. */
149 limit = reg->limit;
150 if ( reg->attr.fields.g )
151 limit = (limit << 12) | 0xfff;
153 last_byte = offset + bytes - 1;
155 /* Is this a grows-down data segment? Special limit check if so. */
156 if ( (reg->attr.fields.type & 0xc) == 0x4 )
157 {
158 /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
159 if ( !reg->attr.fields.db )
160 last_byte = (uint16_t)last_byte;
162 /* Check first byte and last byte against respective bounds. */
163 if ( (offset <= limit) || (last_byte < offset) )
164 goto gpf;
165 }
166 else if ( (last_byte > limit) || (last_byte < offset) )
167 goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
169 /*
170 * Hardware truncates to 32 bits in compatibility mode.
171 * It does not truncate to 16 bits in 16-bit address-size mode.
172 */
173 addr = (uint32_t)(addr + reg->base);
174 }
175 else
176 {
177 /*
178 * LONG MODE: FS and GS add segment base. Addresses must be canonical.
179 */
181 if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
182 addr += reg->base;
184 if ( !is_canonical_address(addr) )
185 goto gpf;
186 }
188 *paddr = addr;
189 return 0;
191 gpf:
192 /* Inject #GP(0). */
193 hvm_inject_exception(TRAP_gp_fault, 0, 0);
194 return X86EMUL_EXCEPTION;
195 }
197 static int
198 hvm_read(enum x86_segment seg,
199 unsigned long offset,
200 unsigned long *val,
201 unsigned int bytes,
202 enum hvm_access_type access_type,
203 struct sh_emulate_ctxt *sh_ctxt)
204 {
205 unsigned long addr;
206 int rc, errcode;
208 rc = hvm_translate_linear_addr(
209 seg, offset, bytes, access_type, sh_ctxt, &addr);
210 if ( rc )
211 return rc;
213 *val = 0;
214 // XXX -- this is WRONG.
215 // It entirely ignores the permissions in the page tables.
216 // In this case, that is only a user vs supervisor access check.
217 //
218 if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
219 return X86EMUL_OKAY;
221 /* If we got here, there was nothing mapped here, or a bad GFN
222 * was mapped here. This should never happen: we're here because
223 * of a write fault at the end of the instruction we're emulating. */
224 SHADOW_PRINTK("read failed to va %#lx\n", addr);
225 errcode = ring_3(sh_ctxt->ctxt.regs) ? PFEC_user_mode : 0;
226 if ( access_type == hvm_access_insn_fetch )
227 errcode |= PFEC_insn_fetch;
228 hvm_inject_exception(TRAP_page_fault, errcode, addr + bytes - rc);
229 return X86EMUL_EXCEPTION;
230 }
232 static int
233 hvm_emulate_read(enum x86_segment seg,
234 unsigned long offset,
235 unsigned long *val,
236 unsigned int bytes,
237 struct x86_emulate_ctxt *ctxt)
238 {
239 return hvm_read(seg, offset, val, bytes, hvm_access_read,
240 container_of(ctxt, struct sh_emulate_ctxt, ctxt));
241 }
243 static int
244 hvm_emulate_insn_fetch(enum x86_segment seg,
245 unsigned long offset,
246 unsigned long *val,
247 unsigned int bytes,
248 struct x86_emulate_ctxt *ctxt)
249 {
250 struct sh_emulate_ctxt *sh_ctxt =
251 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
252 unsigned int insn_off = offset - ctxt->regs->eip;
254 /* Fall back if requested bytes are not in the prefetch cache. */
255 if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
256 return hvm_read(seg, offset, val, bytes,
257 hvm_access_insn_fetch, sh_ctxt);
259 /* Hit the cache. Simple memcpy. */
260 *val = 0;
261 memcpy(val, &sh_ctxt->insn_buf[insn_off], bytes);
262 return X86EMUL_OKAY;
263 }
265 static int
266 hvm_emulate_write(enum x86_segment seg,
267 unsigned long offset,
268 unsigned long val,
269 unsigned int bytes,
270 struct x86_emulate_ctxt *ctxt)
271 {
272 struct sh_emulate_ctxt *sh_ctxt =
273 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
274 struct vcpu *v = current;
275 unsigned long addr;
276 int rc;
278 /* How many emulations could we save if we unshadowed on stack writes? */
279 if ( seg == x86_seg_ss )
280 perfc_incrc(shadow_fault_emulate_stack);
282 rc = hvm_translate_linear_addr(
283 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
284 if ( rc )
285 return rc;
287 return v->arch.paging.mode->shadow.x86_emulate_write(
288 v, addr, &val, bytes, sh_ctxt);
289 }
291 static int
292 hvm_emulate_cmpxchg(enum x86_segment seg,
293 unsigned long offset,
294 unsigned long old,
295 unsigned long new,
296 unsigned int bytes,
297 struct x86_emulate_ctxt *ctxt)
298 {
299 struct sh_emulate_ctxt *sh_ctxt =
300 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
301 struct vcpu *v = current;
302 unsigned long addr;
303 int rc;
305 rc = hvm_translate_linear_addr(
306 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
307 if ( rc )
308 return rc;
310 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
311 v, addr, old, new, bytes, sh_ctxt);
312 }
314 static int
315 hvm_emulate_cmpxchg8b(enum x86_segment seg,
316 unsigned long offset,
317 unsigned long old_lo,
318 unsigned long old_hi,
319 unsigned long new_lo,
320 unsigned long new_hi,
321 struct x86_emulate_ctxt *ctxt)
322 {
323 struct sh_emulate_ctxt *sh_ctxt =
324 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
325 struct vcpu *v = current;
326 unsigned long addr;
327 int rc;
329 rc = hvm_translate_linear_addr(
330 seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
331 if ( rc )
332 return rc;
334 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
335 v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
336 }
338 static struct x86_emulate_ops hvm_shadow_emulator_ops = {
339 .read = hvm_emulate_read,
340 .insn_fetch = hvm_emulate_insn_fetch,
341 .write = hvm_emulate_write,
342 .cmpxchg = hvm_emulate_cmpxchg,
343 .cmpxchg8b = hvm_emulate_cmpxchg8b,
344 };
346 static int
347 pv_emulate_read(enum x86_segment seg,
348 unsigned long offset,
349 unsigned long *val,
350 unsigned int bytes,
351 struct x86_emulate_ctxt *ctxt)
352 {
353 unsigned int rc;
355 *val = 0;
356 if ( (rc = copy_from_user((void *)val, (void *)offset, bytes)) != 0 )
357 {
358 propagate_page_fault(offset + bytes - rc, 0); /* read fault */
359 return X86EMUL_EXCEPTION;
360 }
362 return X86EMUL_OKAY;
363 }
365 static int
366 pv_emulate_write(enum x86_segment seg,
367 unsigned long offset,
368 unsigned long val,
369 unsigned int bytes,
370 struct x86_emulate_ctxt *ctxt)
371 {
372 struct sh_emulate_ctxt *sh_ctxt =
373 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
374 struct vcpu *v = current;
375 return v->arch.paging.mode->shadow.x86_emulate_write(
376 v, offset, &val, bytes, sh_ctxt);
377 }
379 static int
380 pv_emulate_cmpxchg(enum x86_segment seg,
381 unsigned long offset,
382 unsigned long old,
383 unsigned long new,
384 unsigned int bytes,
385 struct x86_emulate_ctxt *ctxt)
386 {
387 struct sh_emulate_ctxt *sh_ctxt =
388 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
389 struct vcpu *v = current;
390 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
391 v, offset, old, new, bytes, sh_ctxt);
392 }
394 static int
395 pv_emulate_cmpxchg8b(enum x86_segment seg,
396 unsigned long offset,
397 unsigned long old_lo,
398 unsigned long old_hi,
399 unsigned long new_lo,
400 unsigned long new_hi,
401 struct x86_emulate_ctxt *ctxt)
402 {
403 struct sh_emulate_ctxt *sh_ctxt =
404 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
405 struct vcpu *v = current;
406 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
407 v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
408 }
410 static struct x86_emulate_ops pv_shadow_emulator_ops = {
411 .read = pv_emulate_read,
412 .insn_fetch = pv_emulate_read,
413 .write = pv_emulate_write,
414 .cmpxchg = pv_emulate_cmpxchg,
415 .cmpxchg8b = pv_emulate_cmpxchg8b,
416 };
418 struct x86_emulate_ops *shadow_init_emulation(
419 struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs)
420 {
421 struct segment_register *creg, *sreg;
422 struct vcpu *v = current;
423 unsigned long addr;
425 sh_ctxt->ctxt.regs = regs;
427 if ( !is_hvm_vcpu(v) )
428 {
429 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG;
430 return &pv_shadow_emulator_ops;
431 }
433 /* Segment cache initialisation. Primed with CS. */
434 sh_ctxt->valid_seg_regs = 0;
435 creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt);
437 /* Work out the emulation mode. */
438 if ( hvm_long_mode_enabled(v) && creg->attr.fields.l )
439 {
440 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64;
441 }
442 else if ( regs->eflags & X86_EFLAGS_VM )
443 {
444 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 16;
445 }
446 else
447 {
448 sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
449 sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16;
450 sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16;
451 }
453 /* Attempt to prefetch whole instruction. */
454 sh_ctxt->insn_buf_bytes =
455 (!hvm_translate_linear_addr(
456 x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
457 hvm_access_insn_fetch, sh_ctxt, &addr) &&
458 !hvm_copy_from_guest_virt(
459 sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
460 ? sizeof(sh_ctxt->insn_buf) : 0;
462 return &hvm_shadow_emulator_ops;
463 }
465 /**************************************************************************/
466 /* Code for "promoting" a guest page to the point where the shadow code is
467 * willing to let it be treated as a guest page table. This generally
468 * involves making sure there are no writable mappings available to the guest
469 * for this page.
470 */
471 void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type)
472 {
473 struct page_info *page = mfn_to_page(gmfn);
475 ASSERT(mfn_valid(gmfn));
477 /* We should never try to promote a gmfn that has writeable mappings */
478 ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0);
480 /* Is the page already shadowed? */
481 if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
482 page->shadow_flags = 0;
484 ASSERT(!test_bit(type, &page->shadow_flags));
485 set_bit(type, &page->shadow_flags);
486 }
488 void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
489 {
490 struct page_info *page = mfn_to_page(gmfn);
492 ASSERT(test_bit(_PGC_page_table, &page->count_info));
493 ASSERT(test_bit(type, &page->shadow_flags));
495 clear_bit(type, &page->shadow_flags);
497 if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
498 {
499 /* tlbflush timestamp field is valid again */
500 page->tlbflush_timestamp = tlbflush_current_time();
501 clear_bit(_PGC_page_table, &page->count_info);
502 }
503 }
505 /**************************************************************************/
506 /* Validate a pagetable change from the guest and update the shadows.
507 * Returns a bitmask of SHADOW_SET_* flags. */
509 int
510 sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
511 {
512 int result = 0;
513 struct page_info *page = mfn_to_page(gmfn);
515 sh_mark_dirty(v->domain, gmfn);
517 // Determine which types of shadows are affected, and update each.
518 //
519 // Always validate L1s before L2s to prevent another cpu with a linear
520 // mapping of this gmfn from seeing a walk that results from
521 // using the new L2 value and the old L1 value. (It is OK for such a
522 // guest to see a walk that uses the old L2 value with the new L1 value,
523 // as hardware could behave this way if one level of the pagewalk occurs
524 // before the store, and the next level of the pagewalk occurs after the
525 // store.
526 //
527 // Ditto for L2s before L3s, etc.
528 //
530 if ( !(page->count_info & PGC_page_table) )
531 return 0; /* Not shadowed at all */
533 #if CONFIG_PAGING_LEVELS == 2
534 if ( page->shadow_flags & SHF_L1_32 )
535 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
536 (v, gmfn, entry, size);
537 #else
538 if ( page->shadow_flags & SHF_L1_32 )
539 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
540 (v, gmfn, entry, size);
541 #endif
543 #if CONFIG_PAGING_LEVELS == 2
544 if ( page->shadow_flags & SHF_L2_32 )
545 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
546 (v, gmfn, entry, size);
547 #else
548 if ( page->shadow_flags & SHF_L2_32 )
549 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
550 (v, gmfn, entry, size);
551 #endif
553 #if CONFIG_PAGING_LEVELS >= 3
554 if ( page->shadow_flags & SHF_L1_PAE )
555 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
556 (v, gmfn, entry, size);
557 if ( page->shadow_flags & SHF_L2_PAE )
558 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
559 (v, gmfn, entry, size);
560 if ( page->shadow_flags & SHF_L2H_PAE )
561 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
562 (v, gmfn, entry, size);
563 #else /* 32-bit non-PAE hypervisor does not support PAE guests */
564 ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
565 #endif
567 #if CONFIG_PAGING_LEVELS >= 4
568 if ( page->shadow_flags & SHF_L1_64 )
569 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
570 (v, gmfn, entry, size);
571 if ( page->shadow_flags & SHF_L2_64 )
572 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
573 (v, gmfn, entry, size);
574 if ( page->shadow_flags & SHF_L2H_64 )
575 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 4, 4)
576 (v, gmfn, entry, size);
577 if ( page->shadow_flags & SHF_L3_64 )
578 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
579 (v, gmfn, entry, size);
580 if ( page->shadow_flags & SHF_L4_64 )
581 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
582 (v, gmfn, entry, size);
583 #else /* 32-bit/PAE hypervisor does not support 64-bit guests */
584 ASSERT((page->shadow_flags
585 & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
586 #endif
588 return result;
589 }
592 void
593 sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
594 void *entry, u32 size)
595 /* This is the entry point for emulated writes to pagetables in HVM guests and
596 * PV translated guests.
597 */
598 {
599 struct domain *d = v->domain;
600 int rc;
602 ASSERT(shadow_locked_by_me(v->domain));
603 rc = sh_validate_guest_entry(v, gmfn, entry, size);
604 if ( rc & SHADOW_SET_FLUSH )
605 /* Need to flush TLBs to pick up shadow PT changes */
606 flush_tlb_mask(d->domain_dirty_cpumask);
607 if ( rc & SHADOW_SET_ERROR )
608 {
609 /* This page is probably not a pagetable any more: tear it out of the
610 * shadows, along with any tables that reference it.
611 * Since the validate call above will have made a "safe" (i.e. zero)
612 * shadow entry, we can let the domain live even if we can't fully
613 * unshadow the page. */
614 sh_remove_shadows(v, gmfn, 0, 0);
615 }
616 }
618 int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
619 intpte_t new, mfn_t gmfn)
620 /* Write a new value into the guest pagetable, and update the shadows
621 * appropriately. Returns 0 if we page-faulted, 1 for success. */
622 {
623 int failed;
624 shadow_lock(v->domain);
625 failed = __copy_to_user(p, &new, sizeof(new));
626 if ( failed != sizeof(new) )
627 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
628 shadow_unlock(v->domain);
629 return (failed == 0);
630 }
632 int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
633 intpte_t *old, intpte_t new, mfn_t gmfn)
634 /* Cmpxchg a new value into the guest pagetable, and update the shadows
635 * appropriately. Returns 0 if we page-faulted, 1 if not.
636 * N.B. caller should check the value of "old" to see if the
637 * cmpxchg itself was successful. */
638 {
639 int failed;
640 intpte_t t = *old;
641 shadow_lock(v->domain);
642 failed = cmpxchg_user(p, t, new);
643 if ( t == *old )
644 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
645 *old = t;
646 shadow_unlock(v->domain);
647 return (failed == 0);
648 }
651 /**************************************************************************/
652 /* Memory management for shadow pages. */
654 /* Allocating shadow pages
655 * -----------------------
656 *
657 * Most shadow pages are allocated singly, but there is one case where
658 * we need to allocate multiple pages together: shadowing 32-bit guest
659 * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB
660 * of virtuial address space, and needs to be shadowed by two PAE/64-bit
661 * l1 tables (covering 2MB of virtual address space each). Similarly, a
662 * 32-bit guest l2 table (4GB va) needs to be shadowed by four
663 * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are
664 * contiguous and aligned; functions for handling offsets into them are
665 * defined in shadow.c (shadow_l1_index() etc.)
666 *
667 * This table shows the allocation behaviour of the different modes:
668 *
669 * Xen paging 32b pae pae 64b 64b 64b
670 * Guest paging 32b 32b pae 32b pae 64b
671 * PV or HVM * HVM * HVM HVM *
672 * Shadow paging 32b pae pae pae pae 64b
673 *
674 * sl1 size 4k 8k 4k 8k 4k 4k
675 * sl2 size 4k 16k 4k 16k 4k 4k
676 * sl3 size - - - - - 4k
677 * sl4 size - - - - - 4k
678 *
679 * We allocate memory from xen in four-page units and break them down
680 * with a simple buddy allocator. Can't use the xen allocator to handle
681 * this as it only works for contiguous zones, and a domain's shadow
682 * pool is made of fragments.
683 *
684 * In HVM guests, the p2m table is built out of shadow pages, and we provide
685 * a function for the p2m management to steal pages, in max-order chunks, from
686 * the free pool. We don't provide for giving them back, yet.
687 */
689 /* Figure out the least acceptable quantity of shadow memory.
690 * The minimum memory requirement for always being able to free up a
691 * chunk of memory is very small -- only three max-order chunks per
692 * vcpu to hold the top level shadows and pages with Xen mappings in them.
693 *
694 * But for a guest to be guaranteed to successfully execute a single
695 * instruction, we must be able to map a large number (about thirty) VAs
696 * at the same time, which means that to guarantee progress, we must
697 * allow for more than ninety allocated pages per vcpu. We round that
698 * up to 128 pages, or half a megabyte per vcpu. */
699 unsigned int shadow_min_acceptable_pages(struct domain *d)
700 {
701 u32 vcpu_count = 0;
702 struct vcpu *v;
704 for_each_vcpu(d, v)
705 vcpu_count++;
707 return (vcpu_count * 128);
708 }
710 /* Figure out the order of allocation needed for a given shadow type */
711 static inline u32
712 shadow_order(unsigned int shadow_type)
713 {
714 #if CONFIG_PAGING_LEVELS > 2
715 static const u32 type_to_order[SH_type_unused] = {
716 0, /* SH_type_none */
717 1, /* SH_type_l1_32_shadow */
718 1, /* SH_type_fl1_32_shadow */
719 2, /* SH_type_l2_32_shadow */
720 0, /* SH_type_l1_pae_shadow */
721 0, /* SH_type_fl1_pae_shadow */
722 0, /* SH_type_l2_pae_shadow */
723 0, /* SH_type_l2h_pae_shadow */
724 0, /* SH_type_l1_64_shadow */
725 0, /* SH_type_fl1_64_shadow */
726 0, /* SH_type_l2_64_shadow */
727 0, /* SH_type_l2h_64_shadow */
728 0, /* SH_type_l3_64_shadow */
729 0, /* SH_type_l4_64_shadow */
730 2, /* SH_type_p2m_table */
731 0 /* SH_type_monitor_table */
732 };
733 ASSERT(shadow_type < SH_type_unused);
734 return type_to_order[shadow_type];
735 #else /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
736 return 0;
737 #endif
738 }
741 /* Do we have a free chunk of at least this order? */
742 static inline int chunk_is_available(struct domain *d, int order)
743 {
744 int i;
746 for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
747 if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
748 return 1;
749 return 0;
750 }
752 /* Dispatcher function: call the per-mode function that will unhook the
753 * non-Xen mappings in this top-level shadow mfn */
754 void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
755 {
756 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
757 switch ( sp->type )
758 {
759 case SH_type_l2_32_shadow:
760 #if CONFIG_PAGING_LEVELS == 2
761 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
762 #else
763 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
764 #endif
765 break;
766 #if CONFIG_PAGING_LEVELS >= 3
767 case SH_type_l2_pae_shadow:
768 case SH_type_l2h_pae_shadow:
769 SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
770 break;
771 #endif
772 #if CONFIG_PAGING_LEVELS >= 4
773 case SH_type_l4_64_shadow:
774 SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
775 break;
776 #endif
777 default:
778 SHADOW_PRINTK("top-level shadow has bad type %08x\n", sp->type);
779 BUG();
780 }
781 }
784 /* Make sure there is at least one chunk of the required order available
785 * in the shadow page pool. This must be called before any calls to
786 * shadow_alloc(). Since this will free existing shadows to make room,
787 * it must be called early enough to avoid freeing shadows that the
788 * caller is currently working on. */
789 void shadow_prealloc(struct domain *d, unsigned int order)
790 {
791 /* Need a vpcu for calling unpins; for now, since we don't have
792 * per-vcpu shadows, any will do */
793 struct vcpu *v, *v2;
794 struct list_head *l, *t;
795 struct shadow_page_info *sp;
796 cpumask_t flushmask = CPU_MASK_NONE;
797 mfn_t smfn;
798 int i;
800 if ( chunk_is_available(d, order) ) return;
802 v = current;
803 if ( v->domain != d )
804 v = d->vcpu[0];
805 ASSERT(v != NULL);
807 /* Stage one: walk the list of pinned pages, unpinning them */
808 perfc_incrc(shadow_prealloc_1);
809 list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
810 {
811 sp = list_entry(l, struct shadow_page_info, list);
812 smfn = shadow_page_to_mfn(sp);
814 /* Unpin this top-level shadow */
815 sh_unpin(v, smfn);
817 /* See if that freed up a chunk of appropriate size */
818 if ( chunk_is_available(d, order) ) return;
819 }
821 /* Stage two: all shadow pages are in use in hierarchies that are
822 * loaded in cr3 on some vcpu. Walk them, unhooking the non-Xen
823 * mappings. */
824 perfc_incrc(shadow_prealloc_2);
826 for_each_vcpu(d, v2)
827 for ( i = 0 ; i < 4 ; i++ )
828 {
829 if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
830 {
831 shadow_unhook_mappings(v,
832 pagetable_get_mfn(v2->arch.shadow_table[i]));
833 cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask);
835 /* See if that freed up a chunk of appropriate size */
836 if ( chunk_is_available(d, order) )
837 {
838 flush_tlb_mask(flushmask);
839 return;
840 }
841 }
842 }
844 /* Nothing more we can do: all remaining shadows are of pages that
845 * hold Xen mappings for some vcpu. This can never happen. */
846 SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n"
847 " shadow pages total = %u, free = %u, p2m=%u\n",
848 1 << order,
849 d->arch.paging.shadow.total_pages,
850 d->arch.paging.shadow.free_pages,
851 d->arch.paging.shadow.p2m_pages);
852 BUG();
853 }
855 /* Deliberately free all the memory we can: this will tear down all of
856 * this domain's shadows */
857 static void shadow_blow_tables(struct domain *d)
858 {
859 struct list_head *l, *t;
860 struct shadow_page_info *sp;
861 struct vcpu *v = d->vcpu[0];
862 mfn_t smfn;
863 int i;
865 /* Pass one: unpin all pinned pages */
866 list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
867 {
868 sp = list_entry(l, struct shadow_page_info, list);
869 smfn = shadow_page_to_mfn(sp);
870 sh_unpin(v, smfn);
871 }
873 /* Second pass: unhook entries of in-use shadows */
874 for_each_vcpu(d, v)
875 for ( i = 0 ; i < 4 ; i++ )
876 if ( !pagetable_is_null(v->arch.shadow_table[i]) )
877 shadow_unhook_mappings(v,
878 pagetable_get_mfn(v->arch.shadow_table[i]));
880 /* Make sure everyone sees the unshadowings */
881 flush_tlb_mask(d->domain_dirty_cpumask);
882 }
885 #ifndef NDEBUG
886 /* Blow all shadows of all shadowed domains: this can be used to cause the
887 * guest's pagetables to be re-shadowed if we suspect that the shadows
888 * have somehow got out of sync */
889 static void shadow_blow_all_tables(unsigned char c)
890 {
891 struct domain *d;
892 printk("'%c' pressed -> blowing all shadow tables\n", c);
893 rcu_read_lock(&domlist_read_lock);
894 for_each_domain(d)
895 {
896 if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
897 {
898 shadow_lock(d);
899 shadow_blow_tables(d);
900 shadow_unlock(d);
901 }
902 }
903 rcu_read_unlock(&domlist_read_lock);
904 }
906 /* Register this function in the Xen console keypress table */
907 static __init int shadow_blow_tables_keyhandler_init(void)
908 {
909 register_keyhandler('S', shadow_blow_all_tables,"reset shadow pagetables");
910 return 0;
911 }
912 __initcall(shadow_blow_tables_keyhandler_init);
913 #endif /* !NDEBUG */
915 /* Allocate another shadow's worth of (contiguous, aligned) pages,
916 * and fill in the type and backpointer fields of their page_infos.
917 * Never fails to allocate. */
918 mfn_t shadow_alloc(struct domain *d,
919 u32 shadow_type,
920 unsigned long backpointer)
921 {
922 struct shadow_page_info *sp = NULL;
923 unsigned int order = shadow_order(shadow_type);
924 cpumask_t mask;
925 void *p;
926 int i;
928 ASSERT(shadow_locked_by_me(d));
929 ASSERT(order <= SHADOW_MAX_ORDER);
930 ASSERT(shadow_type != SH_type_none);
931 perfc_incrc(shadow_alloc);
933 /* Find smallest order which can satisfy the request. */
934 for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
935 if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
936 goto found;
938 /* If we get here, we failed to allocate. This should never happen.
939 * It means that we didn't call shadow_prealloc() correctly before
940 * we allocated. We can't recover by calling prealloc here, because
941 * we might free up higher-level pages that the caller is working on. */
942 SHADOW_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
943 BUG();
945 found:
946 sp = list_entry(d->arch.paging.shadow.freelists[i].next,
947 struct shadow_page_info, list);
948 list_del(&sp->list);
950 /* We may have to halve the chunk a number of times. */
951 while ( i != order )
952 {
953 i--;
954 sp->order = i;
955 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
956 sp += 1 << i;
957 }
958 d->arch.paging.shadow.free_pages -= 1 << order;
960 /* Init page info fields and clear the pages */
961 for ( i = 0; i < 1<<order ; i++ )
962 {
963 /* Before we overwrite the old contents of this page,
964 * we need to be sure that no TLB holds a pointer to it. */
965 mask = d->domain_dirty_cpumask;
966 tlbflush_filter(mask, sp[i].tlbflush_timestamp);
967 if ( unlikely(!cpus_empty(mask)) )
968 {
969 perfc_incrc(shadow_alloc_tlbflush);
970 flush_tlb_mask(mask);
971 }
972 /* Now safe to clear the page for reuse */
973 p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
974 ASSERT(p != NULL);
975 clear_page(p);
976 sh_unmap_domain_page(p);
977 INIT_LIST_HEAD(&sp[i].list);
978 sp[i].type = shadow_type;
979 sp[i].pinned = 0;
980 sp[i].count = 0;
981 sp[i].backpointer = backpointer;
982 sp[i].next_shadow = NULL;
983 perfc_incr(shadow_alloc_count);
984 }
985 return shadow_page_to_mfn(sp);
986 }
989 /* Return some shadow pages to the pool. */
990 void shadow_free(struct domain *d, mfn_t smfn)
991 {
992 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
993 u32 shadow_type;
994 unsigned long order;
995 unsigned long mask;
996 int i;
998 ASSERT(shadow_locked_by_me(d));
999 perfc_incrc(shadow_free);
1001 shadow_type = sp->type;
1002 ASSERT(shadow_type != SH_type_none);
1003 ASSERT(shadow_type != SH_type_p2m_table);
1004 order = shadow_order(shadow_type);
1006 d->arch.paging.shadow.free_pages += 1 << order;
1008 for ( i = 0; i < 1<<order; i++ )
1010 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1011 struct vcpu *v;
1012 for_each_vcpu(d, v)
1014 /* No longer safe to look for a writeable mapping in this shadow */
1015 if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i )
1016 v->arch.paging.shadow.last_writeable_pte_smfn = 0;
1018 #endif
1019 /* Strip out the type: this is now a free shadow page */
1020 sp[i].type = 0;
1021 /* Remember the TLB timestamp so we will know whether to flush
1022 * TLBs when we reuse the page. Because the destructors leave the
1023 * contents of the pages in place, we can delay TLB flushes until
1024 * just before the allocator hands the page out again. */
1025 sp[i].tlbflush_timestamp = tlbflush_current_time();
1026 perfc_decr(shadow_alloc_count);
1029 /* Merge chunks as far as possible. */
1030 while ( order < SHADOW_MAX_ORDER )
1032 mask = 1 << order;
1033 if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
1034 /* Merge with predecessor block? */
1035 if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
1036 break;
1037 list_del(&(sp-mask)->list);
1038 sp -= mask;
1039 } else {
1040 /* Merge with successor block? */
1041 if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
1042 break;
1043 list_del(&(sp+mask)->list);
1045 order++;
1048 sp->order = order;
1049 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
1052 /* Divert some memory from the pool to be used by the p2m mapping.
1053 * This action is irreversible: the p2m mapping only ever grows.
1054 * That's OK because the p2m table only exists for translated domains,
1055 * and those domains can't ever turn off shadow mode.
1056 * Also, we only ever allocate a max-order chunk, so as to preserve
1057 * the invariant that shadow_prealloc() always works.
1058 * Returns 0 iff it can't get a chunk (the caller should then
1059 * free up some pages in domheap and call sh_set_allocation);
1060 * returns non-zero on success.
1061 */
1062 static int
1063 sh_alloc_p2m_pages(struct domain *d)
1065 struct page_info *pg;
1066 u32 i;
1067 ASSERT(shadow_locked_by_me(d));
1069 if ( d->arch.paging.shadow.total_pages
1070 < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
1071 return 0; /* Not enough shadow memory: need to increase it first */
1073 pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
1074 d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
1075 d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
1076 for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
1078 /* Unlike shadow pages, mark p2m pages as owned by the domain.
1079 * Marking the domain as the owner would normally allow the guest to
1080 * create mappings of these pages, but these p2m pages will never be
1081 * in the domain's guest-physical address space, and so that is not
1082 * believed to be a concern.
1083 */
1084 page_set_owner(&pg[i], d);
1085 pg[i].count_info = 1;
1086 list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
1088 return 1;
1091 // Returns 0 if no memory is available...
1092 struct page_info *
1093 shadow_alloc_p2m_page(struct domain *d)
1095 struct list_head *entry;
1096 struct page_info *pg;
1097 mfn_t mfn;
1098 void *p;
1100 shadow_lock(d);
1102 if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
1103 !sh_alloc_p2m_pages(d) )
1105 shadow_unlock(d);
1106 return NULL;
1108 entry = d->arch.paging.shadow.p2m_freelist.next;
1109 list_del(entry);
1111 shadow_unlock(d);
1113 pg = list_entry(entry, struct page_info, list);
1114 mfn = page_to_mfn(pg);
1115 p = sh_map_domain_page(mfn);
1116 clear_page(p);
1117 sh_unmap_domain_page(p);
1119 return pg;
1122 void
1123 shadow_free_p2m_page(struct domain *d, struct page_info *pg)
1125 ASSERT(page_get_owner(pg) == d);
1126 /* Should have just the one ref we gave it in alloc_p2m_page() */
1127 if ( (pg->count_info & PGC_count_mask) != 1 )
1129 SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
1130 pg->count_info, pg->u.inuse.type_info);
1132 /* Free should not decrement domain's total allocation, since
1133 * these pages were allocated without an owner. */
1134 page_set_owner(pg, NULL);
1135 free_domheap_pages(pg, 0);
1136 d->arch.paging.shadow.p2m_pages--;
1137 perfc_decr(shadow_alloc_count);
1140 #if CONFIG_PAGING_LEVELS == 3
1141 static void p2m_install_entry_in_monitors(struct domain *d,
1142 l3_pgentry_t *l3e)
1143 /* Special case, only used for external-mode domains on PAE hosts:
1144 * update the mapping of the p2m table. Once again, this is trivial in
1145 * other paging modes (one top-level entry points to the top-level p2m,
1146 * no maintenance needed), but PAE makes life difficult by needing a
1147 * copy the eight l3es of the p2m table in eight l2h slots in the
1148 * monitor table. This function makes fresh copies when a p2m l3e
1149 * changes. */
1151 l2_pgentry_t *ml2e;
1152 struct vcpu *v;
1153 unsigned int index;
1155 index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
1156 ASSERT(index < MACHPHYS_MBYTES>>1);
1158 for_each_vcpu(d, v)
1160 if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
1161 continue;
1162 ASSERT(shadow_mode_external(v->domain));
1164 SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
1165 d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
1167 if ( v == current ) /* OK to use linear map of monitor_table */
1168 ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
1169 else
1171 l3_pgentry_t *ml3e;
1172 ml3e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
1173 ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
1174 ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
1175 ml2e += l2_table_offset(RO_MPT_VIRT_START);
1176 sh_unmap_domain_page(ml3e);
1178 ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
1179 if ( v != current )
1180 sh_unmap_domain_page(ml2e);
1183 #endif
1185 /* Set the pool of shadow pages to the required number of pages.
1186 * Input will be rounded up to at least shadow_min_acceptable_pages(),
1187 * plus space for the p2m table.
1188 * Returns 0 for success, non-zero for failure. */
1189 static unsigned int sh_set_allocation(struct domain *d,
1190 unsigned int pages,
1191 int *preempted)
1193 struct shadow_page_info *sp;
1194 unsigned int lower_bound;
1195 int j;
1197 ASSERT(shadow_locked_by_me(d));
1199 /* Don't allocate less than the minimum acceptable, plus one page per
1200 * megabyte of RAM (for the p2m table) */
1201 lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
1202 if ( pages > 0 && pages < lower_bound )
1203 pages = lower_bound;
1204 /* Round up to largest block size */
1205 pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
1207 SHADOW_PRINTK("current %i target %i\n",
1208 d->arch.paging.shadow.total_pages, pages);
1210 while ( d->arch.paging.shadow.total_pages != pages )
1212 if ( d->arch.paging.shadow.total_pages < pages )
1214 /* Need to allocate more memory from domheap */
1215 sp = (struct shadow_page_info *)
1216 alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0);
1217 if ( sp == NULL )
1219 SHADOW_PRINTK("failed to allocate shadow pages.\n");
1220 return -ENOMEM;
1222 d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
1223 d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
1224 for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ )
1226 sp[j].type = 0;
1227 sp[j].pinned = 0;
1228 sp[j].count = 0;
1229 sp[j].mbz = 0;
1230 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
1232 sp->order = SHADOW_MAX_ORDER;
1233 list_add_tail(&sp->list,
1234 &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]);
1236 else if ( d->arch.paging.shadow.total_pages > pages )
1238 /* Need to return memory to domheap */
1239 shadow_prealloc(d, SHADOW_MAX_ORDER);
1240 ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]));
1241 sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next,
1242 struct shadow_page_info, list);
1243 list_del(&sp->list);
1244 d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
1245 d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
1246 free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER);
1249 /* Check to see if we need to yield and try again */
1250 if ( preempted && hypercall_preempt_check() )
1252 *preempted = 1;
1253 return 0;
1257 return 0;
1260 /* Return the size of the shadow pool, rounded up to the nearest MB */
1261 static unsigned int shadow_get_allocation(struct domain *d)
1263 unsigned int pg = d->arch.paging.shadow.total_pages;
1264 return ((pg >> (20 - PAGE_SHIFT))
1265 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
1268 /**************************************************************************/
1269 /* Hash table for storing the guest->shadow mappings.
1270 * The table itself is an array of pointers to shadows; the shadows are then
1271 * threaded on a singly-linked list of shadows with the same hash value */
1273 #define SHADOW_HASH_BUCKETS 251
1274 /* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
1276 /* Hash function that takes a gfn or mfn, plus another byte of type info */
1277 typedef u32 key_t;
1278 static inline key_t sh_hash(unsigned long n, unsigned int t)
1280 unsigned char *p = (unsigned char *)&n;
1281 key_t k = t;
1282 int i;
1283 for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
1284 return k % SHADOW_HASH_BUCKETS;
1287 #if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
1289 /* Before we get to the mechanism, define a pair of audit functions
1290 * that sanity-check the contents of the hash table. */
1291 static void sh_hash_audit_bucket(struct domain *d, int bucket)
1292 /* Audit one bucket of the hash table */
1294 struct shadow_page_info *sp, *x;
1296 if ( !(SHADOW_AUDIT_ENABLE) )
1297 return;
1299 sp = d->arch.paging.shadow.hash_table[bucket];
1300 while ( sp )
1302 /* Not a shadow? */
1303 BUG_ON( sp->mbz != 0 );
1304 /* Bogus type? */
1305 BUG_ON( sp->type == 0 );
1306 BUG_ON( sp->type > SH_type_max_shadow );
1307 /* Wrong bucket? */
1308 BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket );
1309 /* Duplicate entry? */
1310 for ( x = sp->next_shadow; x; x = x->next_shadow )
1311 BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
1312 /* Follow the backpointer to the guest pagetable */
1313 if ( sp->type != SH_type_fl1_32_shadow
1314 && sp->type != SH_type_fl1_pae_shadow
1315 && sp->type != SH_type_fl1_64_shadow )
1317 struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
1318 /* Bad shadow flags on guest page? */
1319 BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
1320 /* Bad type count on guest page? */
1321 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
1322 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
1324 SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
1325 " but has typecount %#lx\n",
1326 sp->backpointer, mfn_x(shadow_page_to_mfn(sp)),
1327 gpg->u.inuse.type_info);
1328 BUG();
1331 /* That entry was OK; on we go */
1332 sp = sp->next_shadow;
1336 #else
1337 #define sh_hash_audit_bucket(_d, _b) do {} while(0)
1338 #endif /* Hashtable bucket audit */
1341 #if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
1343 static void sh_hash_audit(struct domain *d)
1344 /* Full audit: audit every bucket in the table */
1346 int i;
1348 if ( !(SHADOW_AUDIT_ENABLE) )
1349 return;
1351 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1353 sh_hash_audit_bucket(d, i);
1357 #else
1358 #define sh_hash_audit(_d) do {} while(0)
1359 #endif /* Hashtable bucket audit */
1361 /* Allocate and initialise the table itself.
1362 * Returns 0 for success, 1 for error. */
1363 static int shadow_hash_alloc(struct domain *d)
1365 struct shadow_page_info **table;
1367 ASSERT(shadow_locked_by_me(d));
1368 ASSERT(!d->arch.paging.shadow.hash_table);
1370 table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
1371 if ( !table ) return 1;
1372 memset(table, 0,
1373 SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
1374 d->arch.paging.shadow.hash_table = table;
1375 return 0;
1378 /* Tear down the hash table and return all memory to Xen.
1379 * This function does not care whether the table is populated. */
1380 static void shadow_hash_teardown(struct domain *d)
1382 ASSERT(shadow_locked_by_me(d));
1383 ASSERT(d->arch.paging.shadow.hash_table);
1385 xfree(d->arch.paging.shadow.hash_table);
1386 d->arch.paging.shadow.hash_table = NULL;
1390 mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, unsigned int t)
1391 /* Find an entry in the hash table. Returns the MFN of the shadow,
1392 * or INVALID_MFN if it doesn't exist */
1394 struct domain *d = v->domain;
1395 struct shadow_page_info *sp, *prev;
1396 key_t key;
1398 ASSERT(shadow_locked_by_me(d));
1399 ASSERT(d->arch.paging.shadow.hash_table);
1400 ASSERT(t);
1402 sh_hash_audit(d);
1404 perfc_incrc(shadow_hash_lookups);
1405 key = sh_hash(n, t);
1406 sh_hash_audit_bucket(d, key);
1408 sp = d->arch.paging.shadow.hash_table[key];
1409 prev = NULL;
1410 while(sp)
1412 if ( sp->backpointer == n && sp->type == t )
1414 /* Pull-to-front if 'sp' isn't already the head item */
1415 if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
1417 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
1418 /* Can't reorder: someone is walking the hash chains */
1419 return shadow_page_to_mfn(sp);
1420 else
1422 ASSERT(prev);
1423 /* Delete sp from the list */
1424 prev->next_shadow = sp->next_shadow;
1425 /* Re-insert it at the head of the list */
1426 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1427 d->arch.paging.shadow.hash_table[key] = sp;
1430 else
1432 perfc_incrc(shadow_hash_lookup_head);
1434 return shadow_page_to_mfn(sp);
1436 prev = sp;
1437 sp = sp->next_shadow;
1440 perfc_incrc(shadow_hash_lookup_miss);
1441 return _mfn(INVALID_MFN);
1444 void shadow_hash_insert(struct vcpu *v, unsigned long n, unsigned int t,
1445 mfn_t smfn)
1446 /* Put a mapping (n,t)->smfn into the hash table */
1448 struct domain *d = v->domain;
1449 struct shadow_page_info *sp;
1450 key_t key;
1452 ASSERT(shadow_locked_by_me(d));
1453 ASSERT(d->arch.paging.shadow.hash_table);
1454 ASSERT(t);
1456 sh_hash_audit(d);
1458 perfc_incrc(shadow_hash_inserts);
1459 key = sh_hash(n, t);
1460 sh_hash_audit_bucket(d, key);
1462 /* Insert this shadow at the top of the bucket */
1463 sp = mfn_to_shadow_page(smfn);
1464 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1465 d->arch.paging.shadow.hash_table[key] = sp;
1467 sh_hash_audit_bucket(d, key);
1470 void shadow_hash_delete(struct vcpu *v, unsigned long n, unsigned int t,
1471 mfn_t smfn)
1472 /* Excise the mapping (n,t)->smfn from the hash table */
1474 struct domain *d = v->domain;
1475 struct shadow_page_info *sp, *x;
1476 key_t key;
1478 ASSERT(shadow_locked_by_me(d));
1479 ASSERT(d->arch.paging.shadow.hash_table);
1480 ASSERT(t);
1482 sh_hash_audit(d);
1484 perfc_incrc(shadow_hash_deletes);
1485 key = sh_hash(n, t);
1486 sh_hash_audit_bucket(d, key);
1488 sp = mfn_to_shadow_page(smfn);
1489 if ( d->arch.paging.shadow.hash_table[key] == sp )
1490 /* Easy case: we're deleting the head item. */
1491 d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
1492 else
1494 /* Need to search for the one we want */
1495 x = d->arch.paging.shadow.hash_table[key];
1496 while ( 1 )
1498 ASSERT(x); /* We can't have hit the end, since our target is
1499 * still in the chain somehwere... */
1500 if ( x->next_shadow == sp )
1502 x->next_shadow = sp->next_shadow;
1503 break;
1505 x = x->next_shadow;
1508 sp->next_shadow = NULL;
1510 sh_hash_audit_bucket(d, key);
1513 typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
1515 static void hash_foreach(struct vcpu *v,
1516 unsigned int callback_mask,
1517 hash_callback_t callbacks[],
1518 mfn_t callback_mfn)
1519 /* Walk the hash table looking at the types of the entries and
1520 * calling the appropriate callback function for each entry.
1521 * The mask determines which shadow types we call back for, and the array
1522 * of callbacks tells us which function to call.
1523 * Any callback may return non-zero to let us skip the rest of the scan.
1525 * WARNING: Callbacks MUST NOT add or remove hash entries unless they
1526 * then return non-zero to terminate the scan. */
1528 int i, done = 0;
1529 struct domain *d = v->domain;
1530 struct shadow_page_info *x;
1532 /* Say we're here, to stop hash-lookups reordering the chains */
1533 ASSERT(shadow_locked_by_me(d));
1534 ASSERT(d->arch.paging.shadow.hash_walking == 0);
1535 d->arch.paging.shadow.hash_walking = 1;
1537 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1539 /* WARNING: This is not safe against changes to the hash table.
1540 * The callback *must* return non-zero if it has inserted or
1541 * deleted anything from the hash (lookups are OK, though). */
1542 for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
1544 if ( callback_mask & (1 << x->type) )
1546 ASSERT(x->type <= 15);
1547 ASSERT(callbacks[x->type] != NULL);
1548 done = callbacks[x->type](v, shadow_page_to_mfn(x),
1549 callback_mfn);
1550 if ( done ) break;
1553 if ( done ) break;
1555 d->arch.paging.shadow.hash_walking = 0;
1559 /**************************************************************************/
1560 /* Destroy a shadow page: simple dispatcher to call the per-type destructor
1561 * which will decrement refcounts appropriately and return memory to the
1562 * free pool. */
1564 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
1566 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1567 unsigned int t = sp->type;
1570 SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
1572 /* Double-check, if we can, that the shadowed page belongs to this
1573 * domain, (by following the back-pointer). */
1574 ASSERT(t == SH_type_fl1_32_shadow ||
1575 t == SH_type_fl1_pae_shadow ||
1576 t == SH_type_fl1_64_shadow ||
1577 t == SH_type_monitor_table ||
1578 #ifdef CONFIG_COMPAT
1579 (IS_COMPAT(v->domain) && t == SH_type_l4_64_shadow) ||
1580 #endif
1581 (page_get_owner(mfn_to_page(_mfn(sp->backpointer)))
1582 == v->domain));
1584 /* The down-shifts here are so that the switch statement is on nice
1585 * small numbers that the compiler will enjoy */
1586 switch ( t )
1588 #if CONFIG_PAGING_LEVELS == 2
1589 case SH_type_l1_32_shadow:
1590 case SH_type_fl1_32_shadow:
1591 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn);
1592 break;
1593 case SH_type_l2_32_shadow:
1594 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
1595 break;
1596 #else /* PAE or 64bit */
1597 case SH_type_l1_32_shadow:
1598 case SH_type_fl1_32_shadow:
1599 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
1600 break;
1601 case SH_type_l2_32_shadow:
1602 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
1603 break;
1604 #endif
1606 #if CONFIG_PAGING_LEVELS >= 3
1607 case SH_type_l1_pae_shadow:
1608 case SH_type_fl1_pae_shadow:
1609 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
1610 break;
1611 case SH_type_l2_pae_shadow:
1612 case SH_type_l2h_pae_shadow:
1613 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
1614 break;
1615 #endif
1617 #if CONFIG_PAGING_LEVELS >= 4
1618 case SH_type_l1_64_shadow:
1619 case SH_type_fl1_64_shadow:
1620 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
1621 break;
1622 case SH_type_l2h_64_shadow:
1623 ASSERT( IS_COMPAT(v->domain) );
1624 /* Fall through... */
1625 case SH_type_l2_64_shadow:
1626 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
1627 break;
1628 case SH_type_l3_64_shadow:
1629 SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
1630 break;
1631 case SH_type_l4_64_shadow:
1632 SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
1633 break;
1634 #endif
1635 default:
1636 SHADOW_PRINTK("tried to destroy shadow of bad type %08lx\n",
1637 (unsigned long)t);
1638 BUG();
1642 /**************************************************************************/
1643 /* Remove all writeable mappings of a guest frame from the shadow tables
1644 * Returns non-zero if we need to flush TLBs.
1645 * level and fault_addr desribe how we found this to be a pagetable;
1646 * level==0 means we have some other reason for revoking write access.*/
1648 int sh_remove_write_access(struct vcpu *v, mfn_t gmfn,
1649 unsigned int level,
1650 unsigned long fault_addr)
1652 /* Dispatch table for getting per-type functions */
1653 static hash_callback_t callbacks[SH_type_unused] = {
1654 NULL, /* none */
1655 #if CONFIG_PAGING_LEVELS == 2
1656 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */
1657 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */
1658 #else
1659 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */
1660 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */
1661 #endif
1662 NULL, /* l2_32 */
1663 #if CONFIG_PAGING_LEVELS >= 3
1664 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */
1665 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
1666 #else
1667 NULL, /* l1_pae */
1668 NULL, /* fl1_pae */
1669 #endif
1670 NULL, /* l2_pae */
1671 NULL, /* l2h_pae */
1672 #if CONFIG_PAGING_LEVELS >= 4
1673 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */
1674 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */
1675 #else
1676 NULL, /* l1_64 */
1677 NULL, /* fl1_64 */
1678 #endif
1679 NULL, /* l2_64 */
1680 NULL, /* l2h_64 */
1681 NULL, /* l3_64 */
1682 NULL, /* l4_64 */
1683 NULL, /* p2m */
1684 NULL /* unused */
1685 };
1687 static unsigned int callback_mask =
1688 1 << SH_type_l1_32_shadow
1689 | 1 << SH_type_fl1_32_shadow
1690 | 1 << SH_type_l1_pae_shadow
1691 | 1 << SH_type_fl1_pae_shadow
1692 | 1 << SH_type_l1_64_shadow
1693 | 1 << SH_type_fl1_64_shadow
1695 struct page_info *pg = mfn_to_page(gmfn);
1697 ASSERT(shadow_locked_by_me(v->domain));
1699 /* Only remove writable mappings if we are doing shadow refcounts.
1700 * In guest refcounting, we trust Xen to already be restricting
1701 * all the writes to the guest page tables, so we do not need to
1702 * do more. */
1703 if ( !shadow_mode_refcounts(v->domain) )
1704 return 0;
1706 /* Early exit if it's already a pagetable, or otherwise not writeable */
1707 if ( sh_mfn_is_a_page_table(gmfn)
1708 || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1709 return 0;
1711 perfc_incrc(shadow_writeable);
1713 /* If this isn't a "normal" writeable page, the domain is trying to
1714 * put pagetables in special memory of some kind. We can't allow that. */
1715 if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
1717 SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %"
1718 PRtype_info "\n",
1719 mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
1720 domain_crash(v->domain);
1723 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1724 if ( v == current && level != 0 )
1726 unsigned long gfn;
1727 /* Heuristic: there is likely to be only one writeable mapping,
1728 * and that mapping is likely to be in the current pagetable,
1729 * in the guest's linear map (on non-HIGHPTE linux and windows)*/
1731 #define GUESS(_a, _h) do { \
1732 if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
1733 perfc_incrc(shadow_writeable_h_ ## _h); \
1734 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \
1735 return 1; \
1736 } while (0)
1739 if ( v->arch.paging.mode->guest_levels == 2 )
1741 if ( level == 1 )
1742 /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
1743 GUESS(0xC0000000UL + (fault_addr >> 10), 1);
1745 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1746 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1747 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1750 #if CONFIG_PAGING_LEVELS >= 3
1751 else if ( v->arch.paging.mode->guest_levels == 3 )
1753 /* 32bit PAE w2k3: linear map at 0xC0000000 */
1754 switch ( level )
1756 case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
1757 case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
1760 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1761 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1762 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1764 #if CONFIG_PAGING_LEVELS >= 4
1765 else if ( v->arch.paging.mode->guest_levels == 4 )
1767 /* 64bit w2k3: linear map at 0x0000070000000000 */
1768 switch ( level )
1770 case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
1771 case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
1772 case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
1775 /* 64bit Linux direct map at 0xffff810000000000; older kernels
1776 * had it at 0x0000010000000000UL */
1777 gfn = mfn_to_gfn(v->domain, gmfn);
1778 GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4);
1779 GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4);
1781 #endif /* CONFIG_PAGING_LEVELS >= 4 */
1782 #endif /* CONFIG_PAGING_LEVELS >= 3 */
1784 #undef GUESS
1787 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1788 return 1;
1790 /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
1791 * (entries in the fixmap) where linux maps its pagetables. Since
1792 * we expect to hit them most of the time, we start the search for
1793 * the writeable mapping by looking at the same MFN where the last
1794 * brute-force search succeeded. */
1796 if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 )
1798 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
1799 mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
1800 int shtype = mfn_to_shadow_page(last_smfn)->type;
1802 if ( callbacks[shtype] )
1803 callbacks[shtype](v, last_smfn, gmfn);
1805 if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
1806 perfc_incrc(shadow_writeable_h_5);
1809 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1810 return 1;
1812 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
1814 /* Brute-force search of all the shadows, by walking the hash */
1815 perfc_incrc(shadow_writeable_bf);
1816 hash_foreach(v, callback_mask, callbacks, gmfn);
1818 /* If that didn't catch the mapping, something is very wrong */
1819 if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
1821 SHADOW_ERROR("can't find all writeable mappings of mfn %lx: "
1822 "%lu left\n", mfn_x(gmfn),
1823 (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
1824 domain_crash(v->domain);
1827 /* We killed at least one writeable mapping, so must flush TLBs. */
1828 return 1;
1833 /**************************************************************************/
1834 /* Remove all mappings of a guest frame from the shadow tables.
1835 * Returns non-zero if we need to flush TLBs. */
1837 int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
1839 struct page_info *page = mfn_to_page(gmfn);
1840 int expected_count, do_locking;
1842 /* Dispatch table for getting per-type functions */
1843 static hash_callback_t callbacks[SH_type_unused] = {
1844 NULL, /* none */
1845 #if CONFIG_PAGING_LEVELS == 2
1846 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */
1847 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */
1848 #else
1849 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */
1850 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */
1851 #endif
1852 NULL, /* l2_32 */
1853 #if CONFIG_PAGING_LEVELS >= 3
1854 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */
1855 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
1856 #else
1857 NULL, /* l1_pae */
1858 NULL, /* fl1_pae */
1859 #endif
1860 NULL, /* l2_pae */
1861 NULL, /* l2h_pae */
1862 #if CONFIG_PAGING_LEVELS >= 4
1863 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */
1864 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */
1865 #else
1866 NULL, /* l1_64 */
1867 NULL, /* fl1_64 */
1868 #endif
1869 NULL, /* l2_64 */
1870 NULL, /* l2h_64 */
1871 NULL, /* l3_64 */
1872 NULL, /* l4_64 */
1873 NULL, /* p2m */
1874 NULL /* unused */
1875 };
1877 static unsigned int callback_mask =
1878 1 << SH_type_l1_32_shadow
1879 | 1 << SH_type_fl1_32_shadow
1880 | 1 << SH_type_l1_pae_shadow
1881 | 1 << SH_type_fl1_pae_shadow
1882 | 1 << SH_type_l1_64_shadow
1883 | 1 << SH_type_fl1_64_shadow
1886 perfc_incrc(shadow_mappings);
1887 if ( (page->count_info & PGC_count_mask) == 0 )
1888 return 0;
1890 /* Although this is an externally visible function, we do not know
1891 * whether the shadow lock will be held when it is called (since it
1892 * can be called via put_page_type when we clear a shadow l1e).
1893 * If the lock isn't held, take it for the duration of the call. */
1894 do_locking = !shadow_locked_by_me(v->domain);
1895 if ( do_locking ) shadow_lock(v->domain);
1897 /* XXX TODO:
1898 * Heuristics for finding the (probably) single mapping of this gmfn */
1900 /* Brute-force search of all the shadows, by walking the hash */
1901 perfc_incrc(shadow_mappings_bf);
1902 hash_foreach(v, callback_mask, callbacks, gmfn);
1904 /* If that didn't catch the mapping, something is very wrong */
1905 expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
1906 if ( (page->count_info & PGC_count_mask) != expected_count )
1908 /* Don't complain if we're in HVM and there are some extra mappings:
1909 * The qemu helper process has an untyped mapping of this dom's RAM
1910 * and the HVM restore program takes another. */
1911 if ( !(shadow_mode_external(v->domain)
1912 && (page->count_info & PGC_count_mask) <= 3
1913 && (page->u.inuse.type_info & PGT_count_mask) == 0) )
1915 SHADOW_ERROR("can't find all mappings of mfn %lx: "
1916 "c=%08x t=%08lx\n", mfn_x(gmfn),
1917 page->count_info, page->u.inuse.type_info);
1921 if ( do_locking ) shadow_unlock(v->domain);
1923 /* We killed at least one mapping, so must flush TLBs. */
1924 return 1;
1928 /**************************************************************************/
1929 /* Remove all shadows of a guest frame from the shadow tables */
1931 static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
1932 /* Follow this shadow's up-pointer, if it has one, and remove the reference
1933 * found there. Returns 1 if that was the only reference to this shadow */
1935 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1936 mfn_t pmfn;
1937 void *vaddr;
1938 int rc;
1940 ASSERT(sp->type > 0);
1941 ASSERT(sp->type < SH_type_max_shadow);
1942 ASSERT(sp->type != SH_type_l2_32_shadow);
1943 ASSERT(sp->type != SH_type_l2_pae_shadow);
1944 ASSERT(sp->type != SH_type_l2h_pae_shadow);
1945 ASSERT(sp->type != SH_type_l4_64_shadow);
1947 if (sp->up == 0) return 0;
1948 pmfn = _mfn(sp->up >> PAGE_SHIFT);
1949 ASSERT(mfn_valid(pmfn));
1950 vaddr = sh_map_domain_page(pmfn);
1951 ASSERT(vaddr);
1952 vaddr += sp->up & (PAGE_SIZE-1);
1953 ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
1955 /* Is this the only reference to this shadow? */
1956 rc = (sp->count == 1) ? 1 : 0;
1958 /* Blank the offending entry */
1959 switch (sp->type)
1961 case SH_type_l1_32_shadow:
1962 case SH_type_l2_32_shadow:
1963 #if CONFIG_PAGING_LEVELS == 2
1964 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
1965 #else
1966 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
1967 #endif
1968 break;
1969 #if CONFIG_PAGING_LEVELS >=3
1970 case SH_type_l1_pae_shadow:
1971 case SH_type_l2_pae_shadow:
1972 case SH_type_l2h_pae_shadow:
1973 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
1974 break;
1975 #if CONFIG_PAGING_LEVELS >= 4
1976 case SH_type_l1_64_shadow:
1977 case SH_type_l2_64_shadow:
1978 case SH_type_l2h_64_shadow:
1979 case SH_type_l3_64_shadow:
1980 case SH_type_l4_64_shadow:
1981 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
1982 break;
1983 #endif
1984 #endif
1985 default: BUG(); /* Some wierd unknown shadow type */
1988 sh_unmap_domain_page(vaddr);
1989 if ( rc )
1990 perfc_incrc(shadow_up_pointer);
1991 else
1992 perfc_incrc(shadow_unshadow_bf);
1994 return rc;
1997 void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
1998 /* Remove the shadows of this guest page.
1999 * If fast != 0, just try the quick heuristic, which will remove
2000 * at most one reference to each shadow of the page. Otherwise, walk
2001 * all the shadow tables looking for refs to shadows of this gmfn.
2002 * If all != 0, kill the domain if we can't find all the shadows.
2003 * (all != 0 implies fast == 0)
2004 */
2006 struct page_info *pg = mfn_to_page(gmfn);
2007 mfn_t smfn;
2008 u32 sh_flags;
2009 int do_locking;
2010 unsigned char t;
2012 /* Dispatch table for getting per-type functions: each level must
2013 * be called with the function to remove a lower-level shadow. */
2014 static hash_callback_t callbacks[SH_type_unused] = {
2015 NULL, /* none */
2016 NULL, /* l1_32 */
2017 NULL, /* fl1_32 */
2018 #if CONFIG_PAGING_LEVELS == 2
2019 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32 */
2020 #else
2021 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32 */
2022 #endif
2023 NULL, /* l1_pae */
2024 NULL, /* fl1_pae */
2025 #if CONFIG_PAGING_LEVELS >= 3
2026 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */
2027 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
2028 #else
2029 NULL, /* l2_pae */
2030 NULL, /* l2h_pae */
2031 #endif
2032 NULL, /* l1_64 */
2033 NULL, /* fl1_64 */
2034 #if CONFIG_PAGING_LEVELS >= 4
2035 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64 */
2036 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2h_64 */
2037 SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64 */
2038 SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64 */
2039 #else
2040 NULL, /* l2_64 */
2041 NULL, /* l2h_64 */
2042 NULL, /* l3_64 */
2043 NULL, /* l4_64 */
2044 #endif
2045 NULL, /* p2m */
2046 NULL /* unused */
2047 };
2049 /* Another lookup table, for choosing which mask to use */
2050 static unsigned int masks[SH_type_unused] = {
2051 0, /* none */
2052 1 << SH_type_l2_32_shadow, /* l1_32 */
2053 0, /* fl1_32 */
2054 0, /* l2_32 */
2055 ((1 << SH_type_l2h_pae_shadow)
2056 | (1 << SH_type_l2_pae_shadow)), /* l1_pae */
2057 0, /* fl1_pae */
2058 0, /* l2_pae */
2059 0, /* l2h_pae */
2060 ((1 << SH_type_l2h_64_shadow)
2061 | (1 << SH_type_l2_64_shadow)), /* l1_64 */
2062 0, /* fl1_64 */
2063 1 << SH_type_l3_64_shadow, /* l2_64 */
2064 1 << SH_type_l3_64_shadow, /* l2h_64 */
2065 1 << SH_type_l4_64_shadow, /* l3_64 */
2066 0, /* l4_64 */
2067 0, /* p2m */
2068 0 /* unused */
2069 };
2071 ASSERT(!(all && fast));
2073 /* Although this is an externally visible function, we do not know
2074 * whether the shadow lock will be held when it is called (since it
2075 * can be called via put_page_type when we clear a shadow l1e).
2076 * If the lock isn't held, take it for the duration of the call. */
2077 do_locking = !shadow_locked_by_me(v->domain);
2078 if ( do_locking ) shadow_lock(v->domain);
2080 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
2081 v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
2083 /* Bail out now if the page is not shadowed */
2084 if ( (pg->count_info & PGC_page_table) == 0 )
2086 if ( do_locking ) shadow_unlock(v->domain);
2087 return;
2090 /* Search for this shadow in all appropriate shadows */
2091 perfc_incrc(shadow_unshadow);
2092 sh_flags = pg->shadow_flags;
2094 /* Lower-level shadows need to be excised from upper-level shadows.
2095 * This call to hash_foreach() looks dangerous but is in fact OK: each
2096 * call will remove at most one shadow, and terminate immediately when
2097 * it does remove it, so we never walk the hash after doing a deletion. */
2098 #define DO_UNSHADOW(_type) do { \
2099 t = (_type); \
2100 smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \
2101 if ( unlikely(!mfn_valid(smfn)) ) \
2102 { \
2103 SHADOW_ERROR(": gmfn %#lx has flags 0x%"PRIx32 \
2104 " but no type-0x%"PRIx32" shadow\n", \
2105 mfn_x(gmfn), sh_flags, t); \
2106 break; \
2107 } \
2108 if ( sh_type_is_pinnable(v, t) ) \
2109 sh_unpin(v, smfn); \
2110 else \
2111 sh_remove_shadow_via_pointer(v, smfn); \
2112 if ( (pg->count_info & PGC_page_table) && !fast ) \
2113 hash_foreach(v, masks[t], callbacks, smfn); \
2114 } while (0)
2116 if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(SH_type_l1_32_shadow);
2117 if ( sh_flags & SHF_L2_32 ) DO_UNSHADOW(SH_type_l2_32_shadow);
2118 #if CONFIG_PAGING_LEVELS >= 3
2119 if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(SH_type_l1_pae_shadow);
2120 if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(SH_type_l2_pae_shadow);
2121 if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(SH_type_l2h_pae_shadow);
2122 #if CONFIG_PAGING_LEVELS >= 4
2123 if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(SH_type_l1_64_shadow);
2124 if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(SH_type_l2_64_shadow);
2125 if ( sh_flags & SHF_L2H_64 ) DO_UNSHADOW(SH_type_l2h_64_shadow);
2126 if ( sh_flags & SHF_L3_64 ) DO_UNSHADOW(SH_type_l3_64_shadow);
2127 if ( sh_flags & SHF_L4_64 ) DO_UNSHADOW(SH_type_l4_64_shadow);
2128 #endif
2129 #endif
2131 #undef DO_UNSHADOW
2133 /* If that didn't catch the shadows, something is wrong */
2134 if ( !fast && (pg->count_info & PGC_page_table) )
2136 SHADOW_ERROR("can't find all shadows of mfn %05lx "
2137 "(shadow_flags=%08lx)\n",
2138 mfn_x(gmfn), pg->shadow_flags);
2139 if ( all )
2140 domain_crash(v->domain);
2143 /* Need to flush TLBs now, so that linear maps are safe next time we
2144 * take a fault. */
2145 flush_tlb_mask(v->domain->domain_dirty_cpumask);
2147 if ( do_locking ) shadow_unlock(v->domain);
2150 static void
2151 sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
2152 /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
2153 * Unshadow it, and recursively unshadow pages that reference it. */
2155 sh_remove_shadows(v, gmfn, 0, 1);
2156 /* XXX TODO:
2157 * Rework this hashtable walker to return a linked-list of all
2158 * the shadows it modified, then do breadth-first recursion
2159 * to find the way up to higher-level tables and unshadow them too.
2161 * The current code (just tearing down each page's shadows as we
2162 * detect that it is not a pagetable) is correct, but very slow.
2163 * It means extra emulated writes and slows down removal of mappings. */
2166 /**************************************************************************/
2168 static void sh_update_paging_modes(struct vcpu *v)
2170 struct domain *d = v->domain;
2171 struct paging_mode *old_mode = v->arch.paging.mode;
2172 mfn_t old_guest_table;
2174 ASSERT(shadow_locked_by_me(d));
2176 // Valid transitions handled by this function:
2177 // - For PV guests:
2178 // - after a shadow mode has been changed
2179 // - For HVM guests:
2180 // - after a shadow mode has been changed
2181 // - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
2182 //
2184 // First, tear down any old shadow tables held by this vcpu.
2185 //
2186 if ( v->arch.paging.mode )
2187 v->arch.paging.mode->shadow.detach_old_tables(v);
2189 if ( !is_hvm_domain(d) )
2191 ///
2192 /// PV guest
2193 ///
2194 #if CONFIG_PAGING_LEVELS == 4
2195 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
2196 #elif CONFIG_PAGING_LEVELS == 3
2197 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2198 #elif CONFIG_PAGING_LEVELS == 2
2199 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
2200 #else
2201 #error unexpected paging mode
2202 #endif
2203 v->arch.paging.translate_enabled = !!shadow_mode_translate(d);
2205 else
2207 ///
2208 /// HVM guest
2209 ///
2210 ASSERT(shadow_mode_translate(d));
2211 ASSERT(shadow_mode_external(d));
2213 v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
2214 if ( !v->arch.paging.translate_enabled )
2216 /* Set v->arch.guest_table to use the p2m map, and choose
2217 * the appropriate shadow mode */
2218 old_guest_table = pagetable_get_mfn(v->arch.guest_table);
2219 #if CONFIG_PAGING_LEVELS == 2
2220 v->arch.guest_table =
2221 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
2222 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
2223 #elif CONFIG_PAGING_LEVELS == 3
2224 v->arch.guest_table =
2225 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
2226 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2227 #else /* CONFIG_PAGING_LEVELS == 4 */
2229 l4_pgentry_t *l4e;
2230 /* Use the start of the first l3 table as a PAE l3 */
2231 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
2232 l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
2233 ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
2234 v->arch.guest_table =
2235 pagetable_from_pfn(l4e_get_pfn(l4e[0]));
2236 sh_unmap_domain_page(l4e);
2238 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2239 #endif
2240 /* Fix up refcounts on guest_table */
2241 get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
2242 if ( mfn_x(old_guest_table) != 0 )
2243 put_page(mfn_to_page(old_guest_table));
2245 else
2247 #ifdef __x86_64__
2248 if ( hvm_long_mode_enabled(v) )
2250 // long mode guest...
2251 v->arch.paging.mode =
2252 &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
2254 else
2255 #endif
2256 if ( hvm_pae_enabled(v) )
2258 #if CONFIG_PAGING_LEVELS >= 3
2259 // 32-bit PAE mode guest...
2260 v->arch.paging.mode =
2261 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
2262 #else
2263 SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
2264 domain_crash(d);
2265 return;
2266 #endif
2268 else
2270 // 32-bit 2 level guest...
2271 #if CONFIG_PAGING_LEVELS >= 3
2272 v->arch.paging.mode =
2273 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
2274 #else
2275 v->arch.paging.mode =
2276 &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
2277 #endif
2281 if ( pagetable_is_null(v->arch.monitor_table) )
2283 mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2284 v->arch.monitor_table = pagetable_from_mfn(mmfn);
2285 make_cr3(v, mfn_x(mmfn));
2286 hvm_update_host_cr3(v);
2289 if ( v->arch.paging.mode != old_mode )
2291 SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
2292 "(was g=%u s=%u)\n",
2293 d->domain_id, v->vcpu_id,
2294 is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1,
2295 v->arch.paging.mode->guest_levels,
2296 v->arch.paging.mode->shadow.shadow_levels,
2297 old_mode ? old_mode->guest_levels : 0,
2298 old_mode ? old_mode->shadow.shadow_levels : 0);
2299 if ( old_mode &&
2300 (v->arch.paging.mode->shadow.shadow_levels !=
2301 old_mode->shadow.shadow_levels) )
2303 /* Need to make a new monitor table for the new mode */
2304 mfn_t new_mfn, old_mfn;
2306 if ( v != current && vcpu_runnable(v) )
2308 SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
2309 "this HVM vcpu's (d=%u v=%u) paging mode "
2310 "while it is running.\n",
2311 current->domain->domain_id, current->vcpu_id,
2312 v->domain->domain_id, v->vcpu_id);
2313 /* It's not safe to do that because we can't change
2314 * the host CR£ for a running domain */
2315 domain_crash(v->domain);
2316 return;
2319 old_mfn = pagetable_get_mfn(v->arch.monitor_table);
2320 v->arch.monitor_table = pagetable_null();
2321 new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2322 v->arch.monitor_table = pagetable_from_mfn(new_mfn);
2323 SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
2324 mfn_x(new_mfn));
2326 /* Don't be running on the old monitor table when we
2327 * pull it down! Switch CR3, and warn the HVM code that
2328 * its host cr3 has changed. */
2329 make_cr3(v, mfn_x(new_mfn));
2330 if ( v == current )
2331 write_ptbase(v);
2332 hvm_update_host_cr3(v);
2333 old_mode->shadow.destroy_monitor_table(v, old_mfn);
2337 // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
2338 // These are HARD: think about the case where two CPU's have
2339 // different values for CR4.PSE and CR4.PGE at the same time.
2340 // This *does* happen, at least for CR4.PGE...
2343 v->arch.paging.mode->update_cr3(v, 0);
2346 void shadow_update_paging_modes(struct vcpu *v)
2348 shadow_lock(v->domain);
2349 sh_update_paging_modes(v);
2350 shadow_unlock(v->domain);
2353 /**************************************************************************/
2354 /* Turning on and off shadow features */
2356 static void sh_new_mode(struct domain *d, u32 new_mode)
2357 /* Inform all the vcpus that the shadow mode has been changed */
2359 struct vcpu *v;
2361 ASSERT(shadow_locked_by_me(d));
2362 ASSERT(d != current->domain);
2363 d->arch.paging.mode = new_mode;
2364 for_each_vcpu(d, v)
2365 sh_update_paging_modes(v);
2368 int shadow_enable(struct domain *d, u32 mode)
2369 /* Turn on "permanent" shadow features: external, translate, refcount.
2370 * Can only be called once on a domain, and these features cannot be
2371 * disabled.
2372 * Returns 0 for success, -errno for failure. */
2374 unsigned int old_pages;
2375 int rv = 0;
2377 mode |= PG_SH_enable;
2379 domain_pause(d);
2381 /* Sanity check the arguments */
2382 if ( (d == current->domain) ||
2383 shadow_mode_enabled(d) ||
2384 ((mode & PG_translate) && !(mode & PG_refcounts)) ||
2385 ((mode & PG_external) && !(mode & PG_translate)) )
2387 rv = -EINVAL;
2388 goto out_unlocked;
2391 /* Init the shadow memory allocation if the user hasn't done so */
2392 old_pages = d->arch.paging.shadow.total_pages;
2393 if ( old_pages == 0 )
2395 unsigned int r;
2396 shadow_lock(d);
2397 r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */
2398 shadow_unlock(d);
2399 if ( r != 0 )
2401 sh_set_allocation(d, 0, NULL);
2402 rv = -ENOMEM;
2403 goto out_unlocked;
2407 /* Init the P2M table. Must be done before we take the shadow lock
2408 * to avoid possible deadlock. */
2409 if ( mode & PG_translate )
2411 rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page);
2412 if (rv != 0)
2413 goto out_unlocked;
2416 shadow_lock(d);
2418 /* Sanity check again with the lock held */
2419 if ( shadow_mode_enabled(d) )
2421 rv = -EINVAL;
2422 goto out_locked;
2425 /* Init the hash table */
2426 if ( shadow_hash_alloc(d) != 0 )
2428 rv = -ENOMEM;
2429 goto out_locked;
2432 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2433 /* We assume we're dealing with an older 64bit linux guest until we
2434 * see the guest use more than one l4 per vcpu. */
2435 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2436 #endif
2438 /* Update the bits */
2439 sh_new_mode(d, mode);
2441 out_locked:
2442 shadow_unlock(d);
2443 out_unlocked:
2444 if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) )
2445 p2m_teardown(d);
2446 domain_unpause(d);
2447 return rv;
2450 void shadow_teardown(struct domain *d)
2451 /* Destroy the shadow pagetables of this domain and free its shadow memory.
2452 * Should only be called for dying domains. */
2454 struct vcpu *v;
2455 mfn_t mfn;
2456 struct list_head *entry, *n;
2457 struct page_info *pg;
2459 ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
2460 ASSERT(d != current->domain);
2462 if ( !shadow_locked_by_me(d) )
2463 shadow_lock(d); /* Keep various asserts happy */
2465 if ( shadow_mode_enabled(d) )
2467 /* Release the shadow and monitor tables held by each vcpu */
2468 for_each_vcpu(d, v)
2470 if ( v->arch.paging.mode )
2472 v->arch.paging.mode->shadow.detach_old_tables(v);
2473 if ( shadow_mode_external(d) )
2475 mfn = pagetable_get_mfn(v->arch.monitor_table);
2476 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
2477 v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn);
2478 v->arch.monitor_table = pagetable_null();
2484 list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
2486 list_del(entry);
2487 pg = list_entry(entry, struct page_info, list);
2488 shadow_free_p2m_page(d, pg);
2491 if ( d->arch.paging.shadow.total_pages != 0 )
2493 SHADOW_PRINTK("teardown of domain %u starts."
2494 " Shadow pages total = %u, free = %u, p2m=%u\n",
2495 d->domain_id,
2496 d->arch.paging.shadow.total_pages,
2497 d->arch.paging.shadow.free_pages,
2498 d->arch.paging.shadow.p2m_pages);
2499 /* Destroy all the shadows and release memory to domheap */
2500 sh_set_allocation(d, 0, NULL);
2501 /* Release the hash table back to xenheap */
2502 if (d->arch.paging.shadow.hash_table)
2503 shadow_hash_teardown(d);
2504 /* Release the log-dirty bitmap of dirtied pages */
2505 sh_free_log_dirty_bitmap(d);
2506 /* Should not have any more memory held */
2507 SHADOW_PRINTK("teardown done."
2508 " Shadow pages total = %u, free = %u, p2m=%u\n",
2509 d->arch.paging.shadow.total_pages,
2510 d->arch.paging.shadow.free_pages,
2511 d->arch.paging.shadow.p2m_pages);
2512 ASSERT(d->arch.paging.shadow.total_pages == 0);
2515 /* We leave the "permanent" shadow modes enabled, but clear the
2516 * log-dirty mode bit. We don't want any more mark_dirty()
2517 * calls now that we've torn down the bitmap */
2518 d->arch.paging.mode &= ~PG_log_dirty;
2520 shadow_unlock(d);
2523 void shadow_final_teardown(struct domain *d)
2524 /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
2526 SHADOW_PRINTK("dom %u final teardown starts."
2527 " Shadow pages total = %u, free = %u, p2m=%u\n",
2528 d->domain_id,
2529 d->arch.paging.shadow.total_pages,
2530 d->arch.paging.shadow.free_pages,
2531 d->arch.paging.shadow.p2m_pages);
2533 /* Double-check that the domain didn't have any shadow memory.
2534 * It is possible for a domain that never got domain_kill()ed
2535 * to get here with its shadow allocation intact. */
2536 if ( d->arch.paging.shadow.total_pages != 0 )
2537 shadow_teardown(d);
2539 /* It is now safe to pull down the p2m map. */
2540 p2m_teardown(d);
2542 SHADOW_PRINTK("dom %u final teardown done."
2543 " Shadow pages total = %u, free = %u, p2m=%u\n",
2544 d->domain_id,
2545 d->arch.paging.shadow.total_pages,
2546 d->arch.paging.shadow.free_pages,
2547 d->arch.paging.shadow.p2m_pages);
2550 static int shadow_one_bit_enable(struct domain *d, u32 mode)
2551 /* Turn on a single shadow mode feature */
2553 ASSERT(shadow_locked_by_me(d));
2555 /* Sanity check the call */
2556 if ( d == current->domain || (d->arch.paging.mode & mode) == mode )
2558 return -EINVAL;
2561 mode |= PG_SH_enable;
2563 if ( d->arch.paging.mode == 0 )
2565 /* Init the shadow memory allocation and the hash table */
2566 if ( sh_set_allocation(d, 1, NULL) != 0
2567 || shadow_hash_alloc(d) != 0 )
2569 sh_set_allocation(d, 0, NULL);
2570 return -ENOMEM;
2574 /* Update the bits */
2575 sh_new_mode(d, d->arch.paging.mode | mode);
2577 return 0;
2580 static int shadow_one_bit_disable(struct domain *d, u32 mode)
2581 /* Turn off a single shadow mode feature */
2583 struct vcpu *v;
2584 ASSERT(shadow_locked_by_me(d));
2586 /* Sanity check the call */
2587 if ( d == current->domain || !((d->arch.paging.mode & mode) == mode) )
2589 return -EINVAL;
2592 /* Update the bits */
2593 sh_new_mode(d, d->arch.paging.mode & ~mode);
2594 if ( d->arch.paging.mode == 0 )
2596 /* Get this domain off shadows */
2597 SHADOW_PRINTK("un-shadowing of domain %u starts."
2598 " Shadow pages total = %u, free = %u, p2m=%u\n",
2599 d->domain_id,
2600 d->arch.paging.shadow.total_pages,
2601 d->arch.paging.shadow.free_pages,
2602 d->arch.paging.shadow.p2m_pages);
2603 for_each_vcpu(d, v)
2605 if ( v->arch.paging.mode )
2606 v->arch.paging.mode->shadow.detach_old_tables(v);
2607 #if CONFIG_PAGING_LEVELS == 4
2608 if ( !(v->arch.flags & TF_kernel_mode) )
2609 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
2610 else
2611 #endif
2612 make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
2616 /* Pull down the memory allocation */
2617 if ( sh_set_allocation(d, 0, NULL) != 0 )
2619 // XXX - How can this occur?
2620 // Seems like a bug to return an error now that we've
2621 // disabled the relevant shadow mode.
2622 //
2623 return -ENOMEM;
2625 shadow_hash_teardown(d);
2626 SHADOW_PRINTK("un-shadowing of domain %u done."
2627 " Shadow pages total = %u, free = %u, p2m=%u\n",
2628 d->domain_id,
2629 d->arch.paging.shadow.total_pages,
2630 d->arch.paging.shadow.free_pages,
2631 d->arch.paging.shadow.p2m_pages);
2634 return 0;
2637 /* Enable/disable ops for the "test" and "log-dirty" modes */
2638 static int shadow_test_enable(struct domain *d)
2640 int ret;
2642 domain_pause(d);
2643 shadow_lock(d);
2644 ret = shadow_one_bit_enable(d, PG_SH_enable);
2645 shadow_unlock(d);
2646 domain_unpause(d);
2648 return ret;
2651 static int shadow_test_disable(struct domain *d)
2653 int ret;
2655 domain_pause(d);
2656 shadow_lock(d);
2657 ret = shadow_one_bit_disable(d, PG_SH_enable);
2658 shadow_unlock(d);
2659 domain_unpause(d);
2661 return ret;
2664 static int
2665 sh_alloc_log_dirty_bitmap(struct domain *d)
2667 ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL);
2668 d->arch.paging.shadow.dirty_bitmap_size =
2669 (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) &
2670 ~(BITS_PER_LONG - 1);
2671 d->arch.paging.shadow.dirty_bitmap =
2672 xmalloc_array(unsigned long,
2673 d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG);
2674 if ( d->arch.paging.shadow.dirty_bitmap == NULL )
2676 d->arch.paging.shadow.dirty_bitmap_size = 0;
2677 return -ENOMEM;
2679 memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8);
2681 return 0;
2684 static void
2685 sh_free_log_dirty_bitmap(struct domain *d)
2687 d->arch.paging.shadow.dirty_bitmap_size = 0;
2688 if ( d->arch.paging.shadow.dirty_bitmap )
2690 xfree(d->arch.paging.shadow.dirty_bitmap);
2691 d->arch.paging.shadow.dirty_bitmap = NULL;
2695 static int shadow_log_dirty_enable(struct domain *d)
2697 int ret;
2699 domain_pause(d);
2700 shadow_lock(d);
2702 if ( shadow_mode_log_dirty(d) )
2704 ret = -EINVAL;
2705 goto out;
2708 if ( shadow_mode_enabled(d) )
2710 /* This domain already has some shadows: need to clear them out
2711 * of the way to make sure that all references to guest memory are
2712 * properly write-protected */
2713 shadow_blow_tables(d);
2716 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2717 if ( IS_COMPAT(d) )
2718 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2719 #endif
2721 ret = sh_alloc_log_dirty_bitmap(d);
2722 if ( ret != 0 )
2724 sh_free_log_dirty_bitmap(d);
2725 goto out;
2728 ret = shadow_one_bit_enable(d, PG_log_dirty);
2729 if ( ret != 0 )
2730 sh_free_log_dirty_bitmap(d);
2732 out:
2733 shadow_unlock(d);
2734 domain_unpause(d);
2735 return ret;
2738 static int shadow_log_dirty_disable(struct domain *d)
2740 int ret;
2742 domain_pause(d);
2743 shadow_lock(d);
2744 ret = shadow_one_bit_disable(d, PG_log_dirty);
2745 if ( !shadow_mode_log_dirty(d) )
2746 sh_free_log_dirty_bitmap(d);
2747 shadow_unlock(d);
2748 domain_unpause(d);
2750 return ret;
2753 /**************************************************************************/
2754 /* P2M map manipulations */
2756 /* shadow specific code which should be called when P2M table entry is updated
2757 * with new content. It is responsible for update the entry, as well as other
2758 * shadow processing jobs.
2759 */
2760 void
2761 shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
2762 l1_pgentry_t new, unsigned int level)
2764 struct domain *d = v->domain;
2765 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
2766 mfn_t mfn;
2768 shadow_lock(d);
2770 /* handle physmap_add and physmap_remove */
2771 mfn = gfn_to_mfn(d, gfn);
2772 if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
2773 sh_remove_all_shadows_and_parents(v, mfn);
2774 if ( sh_remove_all_mappings(v, mfn) )
2775 flush_tlb_mask(d->domain_dirty_cpumask);
2778 /* update the entry with new content */
2779 safe_write_pte(p, new);
2781 /* The P2M can be shadowed: keep the shadows synced */
2782 if ( d->vcpu[0] != NULL )
2783 (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p));
2785 /* install P2M in monitors for PAE Xen */
2786 #if CONFIG_PAGING_LEVELS == 3
2787 if ( level == 3 ) {
2788 struct vcpu *v;
2789 /* We have written to the p2m l3: need to sync the per-vcpu
2790 * copies of it in the monitor tables */
2791 p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p);
2792 /* Also, any vcpus running on shadows of the p2m need to
2793 * reload their CR3s so the change propagates to the shadow */
2794 for_each_vcpu(d, v) {
2795 if ( pagetable_get_pfn(v->arch.guest_table)
2796 == pagetable_get_pfn(d->arch.phys_table)
2797 && v->arch.paging.mode != NULL )
2798 v->arch.paging.mode->update_cr3(v, 0);
2801 #endif
2803 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
2804 /* If we're doing FAST_FAULT_PATH, then shadow mode may have
2805 cached the fact that this is an mmio region in the shadow
2806 page tables. Blow the tables away to remove the cache.
2807 This is pretty heavy handed, but this is a rare operation
2808 (it might happen a dozen times during boot and then never
2809 again), so it doesn't matter too much. */
2810 shadow_blow_tables(d);
2811 #endif
2813 shadow_unlock(d);
2816 /**************************************************************************/
2817 /* Log-dirty mode support */
2819 /* Convert a shadow to log-dirty mode. */
2820 void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
2822 BUG();
2826 /* Read a domain's log-dirty bitmap and stats.
2827 * If the operation is a CLEAN, clear the bitmap and stats as well. */
2828 static int shadow_log_dirty_op(
2829 struct domain *d, struct xen_domctl_shadow_op *sc)
2831 int i, rv = 0, clean = 0, peek = 1;
2833 domain_pause(d);
2834 shadow_lock(d);
2836 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
2838 SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
2839 (clean) ? "clean" : "peek",
2840 d->domain_id,
2841 d->arch.paging.shadow.fault_count,
2842 d->arch.paging.shadow.dirty_count);
2844 sc->stats.fault_count = d->arch.paging.shadow.fault_count;
2845 sc->stats.dirty_count = d->arch.paging.shadow.dirty_count;
2847 if ( clean )
2849 /* Need to revoke write access to the domain's pages again.
2850 * In future, we'll have a less heavy-handed approach to this,
2851 * but for now, we just unshadow everything except Xen. */
2852 shadow_blow_tables(d);
2854 d->arch.paging.shadow.fault_count = 0;
2855 d->arch.paging.shadow.dirty_count = 0;
2858 if ( guest_handle_is_null(sc->dirty_bitmap) )
2859 /* caller may have wanted just to clean the state or access stats. */
2860 peek = 0;
2862 if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) )
2864 rv = -EINVAL; /* perhaps should be ENOMEM? */
2865 goto out;
2868 if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size )
2869 sc->pages = d->arch.paging.shadow.dirty_bitmap_size;
2871 #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
2872 for ( i = 0; i < sc->pages; i += CHUNK )
2874 int bytes = ((((sc->pages - i) > CHUNK)
2875 ? CHUNK
2876 : (sc->pages - i)) + 7) / 8;
2878 if ( likely(peek) )
2880 if ( copy_to_guest_offset(
2881 sc->dirty_bitmap, i/8,
2882 (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) )
2884 rv = -EFAULT;
2885 goto out;
2889 if ( clean )
2890 memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes);
2892 #undef CHUNK
2894 out:
2895 shadow_unlock(d);
2896 domain_unpause(d);
2897 return rv;
2901 /* Mark a page as dirty */
2902 void sh_mark_dirty(struct domain *d, mfn_t gmfn)
2904 unsigned long pfn;
2905 int do_locking;
2907 if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
2908 return;
2910 /* Although this is an externally visible function, we do not know
2911 * whether the shadow lock will be held when it is called (since it
2912 * can be called from __hvm_copy during emulation).
2913 * If the lock isn't held, take it for the duration of the call. */
2914 do_locking = !shadow_locked_by_me(d);
2915 if ( do_locking ) shadow_lock(d);
2917 ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
2919 /* We /really/ mean PFN here, even for non-translated guests. */
2920 pfn = get_gpfn_from_mfn(mfn_x(gmfn));
2922 /*
2923 * Values with the MSB set denote MFNs that aren't really part of the
2924 * domain's pseudo-physical memory map (e.g., the shared info frame).
2925 * Nothing to do here...
2926 */
2927 if ( unlikely(!VALID_M2P(pfn)) )
2928 return;
2930 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
2931 if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) )
2933 if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
2935 SHADOW_DEBUG(LOGDIRTY,
2936 "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
2937 mfn_x(gmfn), pfn, d->domain_id);
2938 d->arch.paging.shadow.dirty_count++;
2941 else
2943 SHADOW_PRINTK("mark_dirty OOR! "
2944 "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
2945 "owner=%d c=%08x t=%" PRtype_info "\n",
2946 mfn_x(gmfn),
2947 pfn,
2948 d->arch.paging.shadow.dirty_bitmap_size,
2949 d->domain_id,
2950 (page_get_owner(mfn_to_page(gmfn))
2951 ? page_get_owner(mfn_to_page(gmfn))->domain_id
2952 : -1),
2953 mfn_to_page(gmfn)->count_info,
2954 mfn_to_page(gmfn)->u.inuse.type_info);
2957 if ( do_locking ) shadow_unlock(d);
2960 /**************************************************************************/
2961 /* Shadow-control XEN_DOMCTL dispatcher */
2963 int shadow_domctl(struct domain *d,
2964 xen_domctl_shadow_op_t *sc,
2965 XEN_GUEST_HANDLE(void) u_domctl)
2967 int rc, preempted = 0;
2969 if ( unlikely(d == current->domain) )
2971 gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n");
2972 return -EINVAL;
2975 switch ( sc->op )
2977 case XEN_DOMCTL_SHADOW_OP_OFF:
2978 if ( shadow_mode_log_dirty(d) )
2979 if ( (rc = shadow_log_dirty_disable(d)) != 0 )
2980 return rc;
2981 if ( d->arch.paging.mode == PG_SH_enable )
2982 if ( (rc = shadow_test_disable(d)) != 0 )
2983 return rc;
2984 return 0;
2986 case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
2987 return shadow_test_enable(d);
2989 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
2990 return shadow_log_dirty_enable(d);
2992 case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
2993 return shadow_enable(d, PG_refcounts|PG_translate);
2995 case XEN_DOMCTL_SHADOW_OP_CLEAN:
2996 case XEN_DOMCTL_SHADOW_OP_PEEK:
2997 return shadow_log_dirty_op(d, sc);
2999 case XEN_DOMCTL_SHADOW_OP_ENABLE:
3000 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
3001 return shadow_log_dirty_enable(d);
3002 return shadow_enable(d, sc->mode << PG_mode_shift);
3004 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
3005 sc->mb = shadow_get_allocation(d);
3006 return 0;
3008 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
3009 shadow_lock(d);
3010 if ( sc->mb == 0 && shadow_mode_enabled(d) )
3012 /* Can't set the allocation to zero unless the domain stops using
3013 * shadow pagetables first */
3014 SHADOW_ERROR("Can't set shadow allocation to zero, domain %u"
3015 " is still using shadows.\n", d->domain_id);
3016 shadow_unlock(d);
3017 return -EINVAL;
3019 rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
3020 shadow_unlock(d);
3021 if ( preempted )
3022 /* Not finished. Set up to re-run the call. */
3023 rc = hypercall_create_continuation(
3024 __HYPERVISOR_domctl, "h", u_domctl);
3025 else
3026 /* Finished. Return the new allocation */
3027 sc->mb = shadow_get_allocation(d);
3028 return rc;
3030 default:
3031 SHADOW_ERROR("Bad shadow op %u\n", sc->op);
3032 return -EINVAL;
3037 /**************************************************************************/
3038 /* Auditing shadow tables */
3040 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
3042 void shadow_audit_tables(struct vcpu *v)
3044 /* Dispatch table for getting per-type functions */
3045 static hash_callback_t callbacks[SH_type_unused] = {
3046 NULL, /* none */
3047 #if CONFIG_PAGING_LEVELS == 2
3048 SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2), /* l1_32 */
3049 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32 */
3050 SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2), /* l2_32 */
3051 #else
3052 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2), /* l1_32 */
3053 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32 */
3054 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2), /* l2_32 */
3055 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3), /* l1_pae */
3056 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
3057 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */
3058 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */
3059 #if CONFIG_PAGING_LEVELS >= 4
3060 SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */
3061 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */
3062 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2_64 */
3063 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2h_64 */
3064 SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4), /* l3_64 */
3065 SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4), /* l4_64 */
3066 #endif /* CONFIG_PAGING_LEVELS >= 4 */
3067 #endif /* CONFIG_PAGING_LEVELS > 2 */
3068 NULL /* All the rest */
3069 };
3070 unsigned int mask;
3072 if ( !(SHADOW_AUDIT_ENABLE) )
3073 return;
3075 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
3076 mask = ~1; /* Audit every table in the system */
3077 else
3079 /* Audit only the current mode's tables */
3080 switch ( v->arch.paging.mode->guest_levels )
3082 case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
3083 case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
3084 |SHF_L2H_PAE); break;
3085 case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64
3086 |SHF_L3_64|SHF_L4_64); break;
3087 default: BUG();
3091 hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
3094 #endif /* Shadow audit */
3096 /*
3097 * Local variables:
3098 * mode: C
3099 * c-set-style: "BSD"
3100 * c-basic-offset: 4
3101 * indent-tabs-mode: nil
3102 * End:
3103 */