ia64/xen-unstable

view xen/arch/x86/mm/shadow/common.c @ 14058:97826d77bd4d

Use RCU for domain_list and domain_hash.

Signed-off-by: Jose Renato Santos <jsantos@hpl.hp.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Feb 21 16:13:49 2007 +0000 (2007-02-21)
parents 4990b2236f06
children b010e556fe2c
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/common.c
3 *
4 * Shadow code that does not need to be multiply compiled.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/types.h>
26 #include <xen/mm.h>
27 #include <xen/trace.h>
28 #include <xen/sched.h>
29 #include <xen/perfc.h>
30 #include <xen/irq.h>
31 #include <xen/domain_page.h>
32 #include <xen/guest_access.h>
33 #include <xen/keyhandler.h>
34 #include <asm/event.h>
35 #include <asm/page.h>
36 #include <asm/current.h>
37 #include <asm/flushtlb.h>
38 #include <asm/shadow.h>
39 #include <asm/shared.h>
40 #include "private.h"
43 /* Set up the shadow-specific parts of a domain struct at start of day.
44 * Called for every domain from arch_domain_create() */
45 void shadow_domain_init(struct domain *d)
46 {
47 int i;
48 shadow_lock_init(d);
49 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
50 INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
51 INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
52 INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
53 }
55 /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
56 * job is to initialize the update_paging_modes() function pointer, which is
57 * used to initialized the rest of resources. Therefore, it really does not
58 * matter to have v->arch.paging.mode pointing to any mode, as long as it can
59 * be compiled.
60 */
61 void shadow_vcpu_init(struct vcpu *v)
62 {
63 #if CONFIG_PAGING_LEVELS == 4
64 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
65 #elif CONFIG_PAGING_LEVELS == 3
66 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
67 #elif CONFIG_PAGING_LEVELS == 2
68 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
69 #endif
70 }
72 #if SHADOW_AUDIT
73 int shadow_audit_enable = 0;
75 static void shadow_audit_key(unsigned char key)
76 {
77 shadow_audit_enable = !shadow_audit_enable;
78 printk("%s shadow_audit_enable=%d\n",
79 __func__, shadow_audit_enable);
80 }
82 static int __init shadow_audit_key_init(void)
83 {
84 register_keyhandler(
85 'O', shadow_audit_key, "toggle shadow audits");
86 return 0;
87 }
88 __initcall(shadow_audit_key_init);
89 #endif /* SHADOW_AUDIT */
91 static void sh_free_log_dirty_bitmap(struct domain *d);
93 int _shadow_mode_refcounts(struct domain *d)
94 {
95 return shadow_mode_refcounts(d);
96 }
99 /**************************************************************************/
100 /* x86 emulator support for the shadow code
101 */
103 struct segment_register *hvm_get_seg_reg(
104 enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
105 {
106 struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
107 if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
108 hvm_get_segment_register(current, seg, seg_reg);
109 return seg_reg;
110 }
112 enum hvm_access_type {
113 hvm_access_insn_fetch, hvm_access_read, hvm_access_write
114 };
116 static int hvm_translate_linear_addr(
117 enum x86_segment seg,
118 unsigned long offset,
119 unsigned int bytes,
120 enum hvm_access_type access_type,
121 struct sh_emulate_ctxt *sh_ctxt,
122 unsigned long *paddr)
123 {
124 struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
125 unsigned long limit, addr = offset;
126 uint32_t last_byte;
128 if ( sh_ctxt->ctxt.addr_size != 64 )
129 {
130 /*
131 * COMPATIBILITY MODE: Apply segment checks and add base.
132 */
134 switch ( access_type )
135 {
136 case hvm_access_read:
137 if ( (reg->attr.fields.type & 0xa) == 0x8 )
138 goto gpf; /* execute-only code segment */
139 break;
140 case hvm_access_write:
141 if ( (reg->attr.fields.type & 0xa) != 0x2 )
142 goto gpf; /* not a writable data segment */
143 break;
144 default:
145 break;
146 }
148 /* Calculate the segment limit, including granularity flag. */
149 limit = reg->limit;
150 if ( reg->attr.fields.g )
151 limit = (limit << 12) | 0xfff;
153 last_byte = offset + bytes - 1;
155 /* Is this a grows-down data segment? Special limit check if so. */
156 if ( (reg->attr.fields.type & 0xc) == 0x4 )
157 {
158 /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
159 if ( !reg->attr.fields.db )
160 last_byte = (uint16_t)last_byte;
162 /* Check first byte and last byte against respective bounds. */
163 if ( (offset <= limit) || (last_byte < offset) )
164 goto gpf;
165 }
166 else if ( (last_byte > limit) || (last_byte < offset) )
167 goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
169 /*
170 * Hardware truncates to 32 bits in compatibility mode.
171 * It does not truncate to 16 bits in 16-bit address-size mode.
172 */
173 addr = (uint32_t)(addr + reg->base);
174 }
175 else
176 {
177 /*
178 * LONG MODE: FS and GS add segment base. Addresses must be canonical.
179 */
181 if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
182 addr += reg->base;
184 if ( !is_canonical_address(addr) )
185 goto gpf;
186 }
188 *paddr = addr;
189 return 0;
191 gpf:
192 /* Inject #GP(0). */
193 hvm_inject_exception(TRAP_gp_fault, 0, 0);
194 return X86EMUL_PROPAGATE_FAULT;
195 }
197 static int
198 hvm_read(enum x86_segment seg,
199 unsigned long offset,
200 unsigned long *val,
201 unsigned int bytes,
202 enum hvm_access_type access_type,
203 struct sh_emulate_ctxt *sh_ctxt)
204 {
205 unsigned long addr;
206 int rc, errcode;
208 rc = hvm_translate_linear_addr(
209 seg, offset, bytes, access_type, sh_ctxt, &addr);
210 if ( rc )
211 return rc;
213 *val = 0;
214 // XXX -- this is WRONG.
215 // It entirely ignores the permissions in the page tables.
216 // In this case, that is only a user vs supervisor access check.
217 //
218 if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
219 return X86EMUL_CONTINUE;
221 /* If we got here, there was nothing mapped here, or a bad GFN
222 * was mapped here. This should never happen: we're here because
223 * of a write fault at the end of the instruction we're emulating. */
224 SHADOW_PRINTK("read failed to va %#lx\n", addr);
225 errcode = ring_3(sh_ctxt->ctxt.regs) ? PFEC_user_mode : 0;
226 if ( access_type == hvm_access_insn_fetch )
227 errcode |= PFEC_insn_fetch;
228 hvm_inject_exception(TRAP_page_fault, errcode, addr + bytes - rc);
229 return X86EMUL_PROPAGATE_FAULT;
230 }
232 static int
233 hvm_emulate_read(enum x86_segment seg,
234 unsigned long offset,
235 unsigned long *val,
236 unsigned int bytes,
237 struct x86_emulate_ctxt *ctxt)
238 {
239 return hvm_read(seg, offset, val, bytes, hvm_access_read,
240 container_of(ctxt, struct sh_emulate_ctxt, ctxt));
241 }
243 static int
244 hvm_emulate_insn_fetch(enum x86_segment seg,
245 unsigned long offset,
246 unsigned long *val,
247 unsigned int bytes,
248 struct x86_emulate_ctxt *ctxt)
249 {
250 struct sh_emulate_ctxt *sh_ctxt =
251 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
252 unsigned int insn_off = offset - ctxt->regs->eip;
254 /* Fall back if requested bytes are not in the prefetch cache. */
255 if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
256 return hvm_read(seg, offset, val, bytes,
257 hvm_access_insn_fetch, sh_ctxt);
259 /* Hit the cache. Simple memcpy. */
260 *val = 0;
261 memcpy(val, &sh_ctxt->insn_buf[insn_off], bytes);
262 return X86EMUL_CONTINUE;
263 }
265 static int
266 hvm_emulate_write(enum x86_segment seg,
267 unsigned long offset,
268 unsigned long val,
269 unsigned int bytes,
270 struct x86_emulate_ctxt *ctxt)
271 {
272 struct sh_emulate_ctxt *sh_ctxt =
273 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
274 struct vcpu *v = current;
275 unsigned long addr;
276 int rc;
278 rc = hvm_translate_linear_addr(
279 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
280 if ( rc )
281 return rc;
283 return v->arch.paging.mode->shadow.x86_emulate_write(
284 v, addr, &val, bytes, sh_ctxt);
285 }
287 static int
288 hvm_emulate_cmpxchg(enum x86_segment seg,
289 unsigned long offset,
290 unsigned long old,
291 unsigned long new,
292 unsigned int bytes,
293 struct x86_emulate_ctxt *ctxt)
294 {
295 struct sh_emulate_ctxt *sh_ctxt =
296 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
297 struct vcpu *v = current;
298 unsigned long addr;
299 int rc;
301 rc = hvm_translate_linear_addr(
302 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
303 if ( rc )
304 return rc;
306 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
307 v, addr, old, new, bytes, sh_ctxt);
308 }
310 static int
311 hvm_emulate_cmpxchg8b(enum x86_segment seg,
312 unsigned long offset,
313 unsigned long old_lo,
314 unsigned long old_hi,
315 unsigned long new_lo,
316 unsigned long new_hi,
317 struct x86_emulate_ctxt *ctxt)
318 {
319 struct sh_emulate_ctxt *sh_ctxt =
320 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
321 struct vcpu *v = current;
322 unsigned long addr;
323 int rc;
325 rc = hvm_translate_linear_addr(
326 seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
327 if ( rc )
328 return rc;
330 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
331 v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
332 }
334 static struct x86_emulate_ops hvm_shadow_emulator_ops = {
335 .read = hvm_emulate_read,
336 .insn_fetch = hvm_emulate_insn_fetch,
337 .write = hvm_emulate_write,
338 .cmpxchg = hvm_emulate_cmpxchg,
339 .cmpxchg8b = hvm_emulate_cmpxchg8b,
340 };
342 static int
343 pv_emulate_read(enum x86_segment seg,
344 unsigned long offset,
345 unsigned long *val,
346 unsigned int bytes,
347 struct x86_emulate_ctxt *ctxt)
348 {
349 unsigned int rc;
351 *val = 0;
352 if ( (rc = copy_from_user((void *)val, (void *)offset, bytes)) != 0 )
353 {
354 propagate_page_fault(offset + bytes - rc, 0); /* read fault */
355 return X86EMUL_PROPAGATE_FAULT;
356 }
358 return X86EMUL_CONTINUE;
359 }
361 static int
362 pv_emulate_write(enum x86_segment seg,
363 unsigned long offset,
364 unsigned long val,
365 unsigned int bytes,
366 struct x86_emulate_ctxt *ctxt)
367 {
368 struct sh_emulate_ctxt *sh_ctxt =
369 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
370 struct vcpu *v = current;
371 return v->arch.paging.mode->shadow.x86_emulate_write(
372 v, offset, &val, bytes, sh_ctxt);
373 }
375 static int
376 pv_emulate_cmpxchg(enum x86_segment seg,
377 unsigned long offset,
378 unsigned long old,
379 unsigned long new,
380 unsigned int bytes,
381 struct x86_emulate_ctxt *ctxt)
382 {
383 struct sh_emulate_ctxt *sh_ctxt =
384 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
385 struct vcpu *v = current;
386 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
387 v, offset, old, new, bytes, sh_ctxt);
388 }
390 static int
391 pv_emulate_cmpxchg8b(enum x86_segment seg,
392 unsigned long offset,
393 unsigned long old_lo,
394 unsigned long old_hi,
395 unsigned long new_lo,
396 unsigned long new_hi,
397 struct x86_emulate_ctxt *ctxt)
398 {
399 struct sh_emulate_ctxt *sh_ctxt =
400 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
401 struct vcpu *v = current;
402 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
403 v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
404 }
406 static struct x86_emulate_ops pv_shadow_emulator_ops = {
407 .read = pv_emulate_read,
408 .insn_fetch = pv_emulate_read,
409 .write = pv_emulate_write,
410 .cmpxchg = pv_emulate_cmpxchg,
411 .cmpxchg8b = pv_emulate_cmpxchg8b,
412 };
414 struct x86_emulate_ops *shadow_init_emulation(
415 struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs)
416 {
417 struct segment_register *creg, *sreg;
418 struct vcpu *v = current;
419 unsigned long addr;
421 sh_ctxt->ctxt.regs = regs;
423 if ( !is_hvm_vcpu(v) )
424 {
425 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG;
426 return &pv_shadow_emulator_ops;
427 }
429 /* Segment cache initialisation. Primed with CS. */
430 sh_ctxt->valid_seg_regs = 0;
431 creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt);
433 /* Work out the emulation mode. */
434 if ( hvm_long_mode_enabled(v) && creg->attr.fields.l )
435 {
436 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64;
437 }
438 else if ( regs->eflags & X86_EFLAGS_VM )
439 {
440 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 16;
441 }
442 else
443 {
444 sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
445 sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16;
446 sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16;
447 }
449 /* Attempt to prefetch whole instruction. */
450 sh_ctxt->insn_buf_bytes =
451 (!hvm_translate_linear_addr(
452 x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
453 hvm_access_insn_fetch, sh_ctxt, &addr) &&
454 !hvm_copy_from_guest_virt(
455 sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
456 ? sizeof(sh_ctxt->insn_buf) : 0;
458 return &hvm_shadow_emulator_ops;
459 }
461 /**************************************************************************/
462 /* Code for "promoting" a guest page to the point where the shadow code is
463 * willing to let it be treated as a guest page table. This generally
464 * involves making sure there are no writable mappings available to the guest
465 * for this page.
466 */
467 void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type)
468 {
469 struct page_info *page = mfn_to_page(gmfn);
471 ASSERT(mfn_valid(gmfn));
473 /* We should never try to promote a gmfn that has writeable mappings */
474 ASSERT(sh_remove_write_access(v, gmfn, 0, 0) == 0);
476 /* Is the page already shadowed? */
477 if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
478 page->shadow_flags = 0;
480 ASSERT(!test_bit(type, &page->shadow_flags));
481 set_bit(type, &page->shadow_flags);
482 }
484 void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
485 {
486 struct page_info *page = mfn_to_page(gmfn);
488 #ifdef CONFIG_COMPAT
489 if ( !IS_COMPAT(v->domain) || type != SH_type_l4_64_shadow )
490 #endif
491 ASSERT(test_bit(_PGC_page_table, &page->count_info));
493 ASSERT(test_bit(type, &page->shadow_flags));
495 clear_bit(type, &page->shadow_flags);
497 if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
498 {
499 /* tlbflush timestamp field is valid again */
500 page->tlbflush_timestamp = tlbflush_current_time();
501 clear_bit(_PGC_page_table, &page->count_info);
502 }
503 }
505 /**************************************************************************/
506 /* Validate a pagetable change from the guest and update the shadows.
507 * Returns a bitmask of SHADOW_SET_* flags. */
509 int
510 sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
511 {
512 int result = 0;
513 struct page_info *page = mfn_to_page(gmfn);
515 sh_mark_dirty(v->domain, gmfn);
517 // Determine which types of shadows are affected, and update each.
518 //
519 // Always validate L1s before L2s to prevent another cpu with a linear
520 // mapping of this gmfn from seeing a walk that results from
521 // using the new L2 value and the old L1 value. (It is OK for such a
522 // guest to see a walk that uses the old L2 value with the new L1 value,
523 // as hardware could behave this way if one level of the pagewalk occurs
524 // before the store, and the next level of the pagewalk occurs after the
525 // store.
526 //
527 // Ditto for L2s before L3s, etc.
528 //
530 if ( !(page->count_info & PGC_page_table) )
531 return 0; /* Not shadowed at all */
533 #if CONFIG_PAGING_LEVELS == 2
534 if ( page->shadow_flags & SHF_L1_32 )
535 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
536 (v, gmfn, entry, size);
537 #else
538 if ( page->shadow_flags & SHF_L1_32 )
539 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
540 (v, gmfn, entry, size);
541 #endif
543 #if CONFIG_PAGING_LEVELS == 2
544 if ( page->shadow_flags & SHF_L2_32 )
545 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
546 (v, gmfn, entry, size);
547 #else
548 if ( page->shadow_flags & SHF_L2_32 )
549 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
550 (v, gmfn, entry, size);
551 #endif
553 #if CONFIG_PAGING_LEVELS >= 3
554 if ( page->shadow_flags & SHF_L1_PAE )
555 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
556 (v, gmfn, entry, size);
557 if ( page->shadow_flags & SHF_L2_PAE )
558 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
559 (v, gmfn, entry, size);
560 if ( page->shadow_flags & SHF_L2H_PAE )
561 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
562 (v, gmfn, entry, size);
563 #else /* 32-bit non-PAE hypervisor does not support PAE guests */
564 ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
565 #endif
567 #if CONFIG_PAGING_LEVELS >= 4
568 if ( page->shadow_flags & SHF_L1_64 )
569 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
570 (v, gmfn, entry, size);
571 if ( page->shadow_flags & SHF_L2_64 )
572 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
573 (v, gmfn, entry, size);
574 if ( page->shadow_flags & SHF_L2H_64 )
575 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 4, 4)
576 (v, gmfn, entry, size);
577 if ( page->shadow_flags & SHF_L3_64 )
578 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
579 (v, gmfn, entry, size);
580 if ( page->shadow_flags & SHF_L4_64 )
581 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
582 (v, gmfn, entry, size);
583 #else /* 32-bit/PAE hypervisor does not support 64-bit guests */
584 ASSERT((page->shadow_flags
585 & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
586 #endif
588 return result;
589 }
592 void
593 sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
594 void *entry, u32 size)
595 /* This is the entry point for emulated writes to pagetables in HVM guests and
596 * PV translated guests.
597 */
598 {
599 struct domain *d = v->domain;
600 int rc;
602 ASSERT(shadow_locked_by_me(v->domain));
603 rc = sh_validate_guest_entry(v, gmfn, entry, size);
604 if ( rc & SHADOW_SET_FLUSH )
605 /* Need to flush TLBs to pick up shadow PT changes */
606 flush_tlb_mask(d->domain_dirty_cpumask);
607 if ( rc & SHADOW_SET_ERROR )
608 {
609 /* This page is probably not a pagetable any more: tear it out of the
610 * shadows, along with any tables that reference it.
611 * Since the validate call above will have made a "safe" (i.e. zero)
612 * shadow entry, we can let the domain live even if we can't fully
613 * unshadow the page. */
614 sh_remove_shadows(v, gmfn, 0, 0);
615 }
616 }
618 int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
619 intpte_t new, mfn_t gmfn)
620 /* Write a new value into the guest pagetable, and update the shadows
621 * appropriately. Returns 0 if we page-faulted, 1 for success. */
622 {
623 int failed;
624 shadow_lock(v->domain);
625 failed = __copy_to_user(p, &new, sizeof(new));
626 if ( failed != sizeof(new) )
627 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
628 shadow_unlock(v->domain);
629 return (failed == 0);
630 }
632 int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
633 intpte_t *old, intpte_t new, mfn_t gmfn)
634 /* Cmpxchg a new value into the guest pagetable, and update the shadows
635 * appropriately. Returns 0 if we page-faulted, 1 if not.
636 * N.B. caller should check the value of "old" to see if the
637 * cmpxchg itself was successful. */
638 {
639 int failed;
640 intpte_t t = *old;
641 shadow_lock(v->domain);
642 failed = cmpxchg_user(p, t, new);
643 if ( t == *old )
644 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
645 *old = t;
646 shadow_unlock(v->domain);
647 return (failed == 0);
648 }
651 /**************************************************************************/
652 /* Memory management for shadow pages. */
654 /* Allocating shadow pages
655 * -----------------------
656 *
657 * Most shadow pages are allocated singly, but there is one case where
658 * we need to allocate multiple pages together: shadowing 32-bit guest
659 * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB
660 * of virtuial address space, and needs to be shadowed by two PAE/64-bit
661 * l1 tables (covering 2MB of virtual address space each). Similarly, a
662 * 32-bit guest l2 table (4GB va) needs to be shadowed by four
663 * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are
664 * contiguous and aligned; functions for handling offsets into them are
665 * defined in shadow.c (shadow_l1_index() etc.)
666 *
667 * This table shows the allocation behaviour of the different modes:
668 *
669 * Xen paging 32b pae pae 64b 64b 64b
670 * Guest paging 32b 32b pae 32b pae 64b
671 * PV or HVM * HVM * HVM HVM *
672 * Shadow paging 32b pae pae pae pae 64b
673 *
674 * sl1 size 4k 8k 4k 8k 4k 4k
675 * sl2 size 4k 16k 4k 16k 4k 4k
676 * sl3 size - - - - - 4k
677 * sl4 size - - - - - 4k
678 *
679 * We allocate memory from xen in four-page units and break them down
680 * with a simple buddy allocator. Can't use the xen allocator to handle
681 * this as it only works for contiguous zones, and a domain's shadow
682 * pool is made of fragments.
683 *
684 * In HVM guests, the p2m table is built out of shadow pages, and we provide
685 * a function for the p2m management to steal pages, in max-order chunks, from
686 * the free pool. We don't provide for giving them back, yet.
687 */
689 /* Figure out the least acceptable quantity of shadow memory.
690 * The minimum memory requirement for always being able to free up a
691 * chunk of memory is very small -- only three max-order chunks per
692 * vcpu to hold the top level shadows and pages with Xen mappings in them.
693 *
694 * But for a guest to be guaranteed to successfully execute a single
695 * instruction, we must be able to map a large number (about thirty) VAs
696 * at the same time, which means that to guarantee progress, we must
697 * allow for more than ninety allocated pages per vcpu. We round that
698 * up to 128 pages, or half a megabyte per vcpu. */
699 unsigned int shadow_min_acceptable_pages(struct domain *d)
700 {
701 u32 vcpu_count = 0;
702 struct vcpu *v;
704 for_each_vcpu(d, v)
705 vcpu_count++;
707 return (vcpu_count * 128);
708 }
710 /* Figure out the order of allocation needed for a given shadow type */
711 static inline u32
712 shadow_order(unsigned int shadow_type)
713 {
714 #if CONFIG_PAGING_LEVELS > 2
715 static const u32 type_to_order[SH_type_unused] = {
716 0, /* SH_type_none */
717 1, /* SH_type_l1_32_shadow */
718 1, /* SH_type_fl1_32_shadow */
719 2, /* SH_type_l2_32_shadow */
720 0, /* SH_type_l1_pae_shadow */
721 0, /* SH_type_fl1_pae_shadow */
722 0, /* SH_type_l2_pae_shadow */
723 0, /* SH_type_l2h_pae_shadow */
724 0, /* SH_type_l1_64_shadow */
725 0, /* SH_type_fl1_64_shadow */
726 0, /* SH_type_l2_64_shadow */
727 0, /* SH_type_l2h_64_shadow */
728 0, /* SH_type_l3_64_shadow */
729 0, /* SH_type_l4_64_shadow */
730 2, /* SH_type_p2m_table */
731 0 /* SH_type_monitor_table */
732 };
733 ASSERT(shadow_type < SH_type_unused);
734 return type_to_order[shadow_type];
735 #else /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
736 return 0;
737 #endif
738 }
741 /* Do we have a free chunk of at least this order? */
742 static inline int chunk_is_available(struct domain *d, int order)
743 {
744 int i;
746 for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
747 if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
748 return 1;
749 return 0;
750 }
752 /* Dispatcher function: call the per-mode function that will unhook the
753 * non-Xen mappings in this top-level shadow mfn */
754 void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
755 {
756 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
757 switch ( sp->type )
758 {
759 case SH_type_l2_32_shadow:
760 #if CONFIG_PAGING_LEVELS == 2
761 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
762 #else
763 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
764 #endif
765 break;
766 #if CONFIG_PAGING_LEVELS >= 3
767 case SH_type_l2_pae_shadow:
768 case SH_type_l2h_pae_shadow:
769 SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
770 break;
771 #endif
772 #if CONFIG_PAGING_LEVELS >= 4
773 case SH_type_l4_64_shadow:
774 SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
775 break;
776 #endif
777 default:
778 SHADOW_PRINTK("top-level shadow has bad type %08x\n", sp->type);
779 BUG();
780 }
781 }
784 /* Make sure there is at least one chunk of the required order available
785 * in the shadow page pool. This must be called before any calls to
786 * shadow_alloc(). Since this will free existing shadows to make room,
787 * it must be called early enough to avoid freeing shadows that the
788 * caller is currently working on. */
789 void shadow_prealloc(struct domain *d, unsigned int order)
790 {
791 /* Need a vpcu for calling unpins; for now, since we don't have
792 * per-vcpu shadows, any will do */
793 struct vcpu *v, *v2;
794 struct list_head *l, *t;
795 struct shadow_page_info *sp;
796 cpumask_t flushmask = CPU_MASK_NONE;
797 mfn_t smfn;
798 int i;
800 if ( chunk_is_available(d, order) ) return;
802 v = current;
803 if ( v->domain != d )
804 v = d->vcpu[0];
805 ASSERT(v != NULL);
807 /* Stage one: walk the list of pinned pages, unpinning them */
808 perfc_incrc(shadow_prealloc_1);
809 list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
810 {
811 sp = list_entry(l, struct shadow_page_info, list);
812 smfn = shadow_page_to_mfn(sp);
814 /* Unpin this top-level shadow */
815 sh_unpin(v, smfn);
817 /* See if that freed up a chunk of appropriate size */
818 if ( chunk_is_available(d, order) ) return;
819 }
821 /* Stage two: all shadow pages are in use in hierarchies that are
822 * loaded in cr3 on some vcpu. Walk them, unhooking the non-Xen
823 * mappings. */
824 perfc_incrc(shadow_prealloc_2);
826 for_each_vcpu(d, v2)
827 for ( i = 0 ; i < 4 ; i++ )
828 {
829 if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
830 {
831 shadow_unhook_mappings(v,
832 pagetable_get_mfn(v2->arch.shadow_table[i]));
833 cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask);
835 /* See if that freed up a chunk of appropriate size */
836 if ( chunk_is_available(d, order) )
837 {
838 flush_tlb_mask(flushmask);
839 return;
840 }
841 }
842 }
844 /* Nothing more we can do: all remaining shadows are of pages that
845 * hold Xen mappings for some vcpu. This can never happen. */
846 SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n"
847 " shadow pages total = %u, free = %u, p2m=%u\n",
848 1 << order,
849 d->arch.paging.shadow.total_pages,
850 d->arch.paging.shadow.free_pages,
851 d->arch.paging.shadow.p2m_pages);
852 BUG();
853 }
855 /* Deliberately free all the memory we can: this will tear down all of
856 * this domain's shadows */
857 static void shadow_blow_tables(struct domain *d)
858 {
859 struct list_head *l, *t;
860 struct shadow_page_info *sp;
861 struct vcpu *v = d->vcpu[0];
862 mfn_t smfn;
863 int i;
865 /* Pass one: unpin all pinned pages */
866 list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
867 {
868 sp = list_entry(l, struct shadow_page_info, list);
869 smfn = shadow_page_to_mfn(sp);
870 sh_unpin(v, smfn);
871 }
873 /* Second pass: unhook entries of in-use shadows */
874 for_each_vcpu(d, v)
875 for ( i = 0 ; i < 4 ; i++ )
876 if ( !pagetable_is_null(v->arch.shadow_table[i]) )
877 shadow_unhook_mappings(v,
878 pagetable_get_mfn(v->arch.shadow_table[i]));
880 /* Make sure everyone sees the unshadowings */
881 flush_tlb_mask(d->domain_dirty_cpumask);
882 }
885 #ifndef NDEBUG
886 /* Blow all shadows of all shadowed domains: this can be used to cause the
887 * guest's pagetables to be re-shadowed if we suspect that the shadows
888 * have somehow got out of sync */
889 static void shadow_blow_all_tables(unsigned char c)
890 {
891 struct domain *d;
892 printk("'%c' pressed -> blowing all shadow tables\n", c);
893 rcu_read_lock(&domlist_read_lock);
894 for_each_domain(d)
895 {
896 if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
897 {
898 shadow_lock(d);
899 shadow_blow_tables(d);
900 shadow_unlock(d);
901 }
902 }
903 rcu_read_unlock(&domlist_read_lock);
904 }
906 /* Register this function in the Xen console keypress table */
907 static __init int shadow_blow_tables_keyhandler_init(void)
908 {
909 register_keyhandler('S', shadow_blow_all_tables,"reset shadow pagetables");
910 return 0;
911 }
912 __initcall(shadow_blow_tables_keyhandler_init);
913 #endif /* !NDEBUG */
915 /* Allocate another shadow's worth of (contiguous, aligned) pages,
916 * and fill in the type and backpointer fields of their page_infos.
917 * Never fails to allocate. */
918 mfn_t shadow_alloc(struct domain *d,
919 u32 shadow_type,
920 unsigned long backpointer)
921 {
922 struct shadow_page_info *sp = NULL;
923 unsigned int order = shadow_order(shadow_type);
924 cpumask_t mask;
925 void *p;
926 int i;
928 ASSERT(shadow_locked_by_me(d));
929 ASSERT(order <= SHADOW_MAX_ORDER);
930 ASSERT(shadow_type != SH_type_none);
931 perfc_incrc(shadow_alloc);
933 /* Find smallest order which can satisfy the request. */
934 for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
935 if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
936 goto found;
938 /* If we get here, we failed to allocate. This should never happen.
939 * It means that we didn't call shadow_prealloc() correctly before
940 * we allocated. We can't recover by calling prealloc here, because
941 * we might free up higher-level pages that the caller is working on. */
942 SHADOW_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
943 BUG();
945 found:
946 sp = list_entry(d->arch.paging.shadow.freelists[i].next,
947 struct shadow_page_info, list);
948 list_del(&sp->list);
950 /* We may have to halve the chunk a number of times. */
951 while ( i != order )
952 {
953 i--;
954 sp->order = i;
955 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
956 sp += 1 << i;
957 }
958 d->arch.paging.shadow.free_pages -= 1 << order;
960 /* Init page info fields and clear the pages */
961 for ( i = 0; i < 1<<order ; i++ )
962 {
963 /* Before we overwrite the old contents of this page,
964 * we need to be sure that no TLB holds a pointer to it. */
965 mask = d->domain_dirty_cpumask;
966 tlbflush_filter(mask, sp[i].tlbflush_timestamp);
967 if ( unlikely(!cpus_empty(mask)) )
968 {
969 perfc_incrc(shadow_alloc_tlbflush);
970 flush_tlb_mask(mask);
971 }
972 /* Now safe to clear the page for reuse */
973 p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
974 ASSERT(p != NULL);
975 clear_page(p);
976 sh_unmap_domain_page(p);
977 INIT_LIST_HEAD(&sp[i].list);
978 sp[i].type = shadow_type;
979 sp[i].pinned = 0;
980 sp[i].logdirty = 0;
981 sp[i].count = 0;
982 sp[i].backpointer = backpointer;
983 sp[i].next_shadow = NULL;
984 perfc_incr(shadow_alloc_count);
985 }
986 return shadow_page_to_mfn(sp);
987 }
990 /* Return some shadow pages to the pool. */
991 void shadow_free(struct domain *d, mfn_t smfn)
992 {
993 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
994 u32 shadow_type;
995 unsigned long order;
996 unsigned long mask;
997 int i;
999 ASSERT(shadow_locked_by_me(d));
1000 perfc_incrc(shadow_free);
1002 shadow_type = sp->type;
1003 ASSERT(shadow_type != SH_type_none);
1004 ASSERT(shadow_type != SH_type_p2m_table);
1005 order = shadow_order(shadow_type);
1007 d->arch.paging.shadow.free_pages += 1 << order;
1009 for ( i = 0; i < 1<<order; i++ )
1011 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1012 struct vcpu *v;
1013 for_each_vcpu(d, v)
1015 /* No longer safe to look for a writeable mapping in this shadow */
1016 if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i )
1017 v->arch.paging.shadow.last_writeable_pte_smfn = 0;
1019 #endif
1020 /* Strip out the type: this is now a free shadow page */
1021 sp[i].type = 0;
1022 /* Remember the TLB timestamp so we will know whether to flush
1023 * TLBs when we reuse the page. Because the destructors leave the
1024 * contents of the pages in place, we can delay TLB flushes until
1025 * just before the allocator hands the page out again. */
1026 sp[i].tlbflush_timestamp = tlbflush_current_time();
1027 perfc_decr(shadow_alloc_count);
1030 /* Merge chunks as far as possible. */
1031 while ( order < SHADOW_MAX_ORDER )
1033 mask = 1 << order;
1034 if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
1035 /* Merge with predecessor block? */
1036 if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
1037 break;
1038 list_del(&(sp-mask)->list);
1039 sp -= mask;
1040 } else {
1041 /* Merge with successor block? */
1042 if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
1043 break;
1044 list_del(&(sp+mask)->list);
1046 order++;
1049 sp->order = order;
1050 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
1053 /* Divert some memory from the pool to be used by the p2m mapping.
1054 * This action is irreversible: the p2m mapping only ever grows.
1055 * That's OK because the p2m table only exists for translated domains,
1056 * and those domains can't ever turn off shadow mode.
1057 * Also, we only ever allocate a max-order chunk, so as to preserve
1058 * the invariant that shadow_prealloc() always works.
1059 * Returns 0 iff it can't get a chunk (the caller should then
1060 * free up some pages in domheap and call sh_set_allocation);
1061 * returns non-zero on success.
1062 */
1063 static int
1064 sh_alloc_p2m_pages(struct domain *d)
1066 struct page_info *pg;
1067 u32 i;
1068 ASSERT(shadow_locked_by_me(d));
1070 if ( d->arch.paging.shadow.total_pages
1071 < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
1072 return 0; /* Not enough shadow memory: need to increase it first */
1074 pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
1075 d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
1076 d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
1077 for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
1079 /* Unlike shadow pages, mark p2m pages as owned by the domain.
1080 * Marking the domain as the owner would normally allow the guest to
1081 * create mappings of these pages, but these p2m pages will never be
1082 * in the domain's guest-physical address space, and so that is not
1083 * believed to be a concern.
1084 */
1085 page_set_owner(&pg[i], d);
1086 pg[i].count_info = 1;
1087 list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
1089 return 1;
1092 // Returns 0 if no memory is available...
1093 struct page_info *
1094 shadow_alloc_p2m_page(struct domain *d)
1096 struct list_head *entry;
1097 struct page_info *pg;
1098 mfn_t mfn;
1099 void *p;
1101 shadow_lock(d);
1103 if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
1104 !sh_alloc_p2m_pages(d) )
1106 shadow_unlock(d);
1107 return NULL;
1109 entry = d->arch.paging.shadow.p2m_freelist.next;
1110 list_del(entry);
1112 shadow_unlock(d);
1114 pg = list_entry(entry, struct page_info, list);
1115 mfn = page_to_mfn(pg);
1116 p = sh_map_domain_page(mfn);
1117 clear_page(p);
1118 sh_unmap_domain_page(p);
1120 return pg;
1123 void
1124 shadow_free_p2m_page(struct domain *d, struct page_info *pg)
1126 ASSERT(page_get_owner(pg) == d);
1127 /* Should have just the one ref we gave it in alloc_p2m_page() */
1128 if ( (pg->count_info & PGC_count_mask) != 1 )
1130 SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
1131 pg->count_info, pg->u.inuse.type_info);
1133 /* Free should not decrement domain's total allocation, since
1134 * these pages were allocated without an owner. */
1135 page_set_owner(pg, NULL);
1136 free_domheap_pages(pg, 0);
1137 d->arch.paging.shadow.p2m_pages--;
1138 perfc_decr(shadow_alloc_count);
1141 #if CONFIG_PAGING_LEVELS == 3
1142 static void p2m_install_entry_in_monitors(struct domain *d,
1143 l3_pgentry_t *l3e)
1144 /* Special case, only used for external-mode domains on PAE hosts:
1145 * update the mapping of the p2m table. Once again, this is trivial in
1146 * other paging modes (one top-level entry points to the top-level p2m,
1147 * no maintenance needed), but PAE makes life difficult by needing a
1148 * copy the eight l3es of the p2m table in eight l2h slots in the
1149 * monitor table. This function makes fresh copies when a p2m l3e
1150 * changes. */
1152 l2_pgentry_t *ml2e;
1153 struct vcpu *v;
1154 unsigned int index;
1156 index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
1157 ASSERT(index < MACHPHYS_MBYTES>>1);
1159 for_each_vcpu(d, v)
1161 if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
1162 continue;
1163 ASSERT(shadow_mode_external(v->domain));
1165 SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
1166 d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
1168 if ( v == current ) /* OK to use linear map of monitor_table */
1169 ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
1170 else
1172 l3_pgentry_t *ml3e;
1173 ml3e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
1174 ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
1175 ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
1176 ml2e += l2_table_offset(RO_MPT_VIRT_START);
1177 sh_unmap_domain_page(ml3e);
1179 ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
1180 if ( v != current )
1181 sh_unmap_domain_page(ml2e);
1184 #endif
1186 /* Set the pool of shadow pages to the required number of pages.
1187 * Input will be rounded up to at least shadow_min_acceptable_pages(),
1188 * plus space for the p2m table.
1189 * Returns 0 for success, non-zero for failure. */
1190 static unsigned int sh_set_allocation(struct domain *d,
1191 unsigned int pages,
1192 int *preempted)
1194 struct shadow_page_info *sp;
1195 unsigned int lower_bound;
1196 int j;
1198 ASSERT(shadow_locked_by_me(d));
1200 /* Don't allocate less than the minimum acceptable, plus one page per
1201 * megabyte of RAM (for the p2m table) */
1202 lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
1203 if ( pages > 0 && pages < lower_bound )
1204 pages = lower_bound;
1205 /* Round up to largest block size */
1206 pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
1208 SHADOW_PRINTK("current %i target %i\n",
1209 d->arch.paging.shadow.total_pages, pages);
1211 while ( d->arch.paging.shadow.total_pages != pages )
1213 if ( d->arch.paging.shadow.total_pages < pages )
1215 /* Need to allocate more memory from domheap */
1216 sp = (struct shadow_page_info *)
1217 alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0);
1218 if ( sp == NULL )
1220 SHADOW_PRINTK("failed to allocate shadow pages.\n");
1221 return -ENOMEM;
1223 d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
1224 d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
1225 for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ )
1227 sp[j].type = 0;
1228 sp[j].pinned = 0;
1229 sp[j].logdirty = 0;
1230 sp[j].count = 0;
1231 sp[j].mbz = 0;
1232 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
1234 sp->order = SHADOW_MAX_ORDER;
1235 list_add_tail(&sp->list,
1236 &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]);
1238 else if ( d->arch.paging.shadow.total_pages > pages )
1240 /* Need to return memory to domheap */
1241 shadow_prealloc(d, SHADOW_MAX_ORDER);
1242 ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]));
1243 sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next,
1244 struct shadow_page_info, list);
1245 list_del(&sp->list);
1246 d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
1247 d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
1248 free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER);
1251 /* Check to see if we need to yield and try again */
1252 if ( preempted && hypercall_preempt_check() )
1254 *preempted = 1;
1255 return 0;
1259 return 0;
1262 /* Return the size of the shadow pool, rounded up to the nearest MB */
1263 static unsigned int shadow_get_allocation(struct domain *d)
1265 unsigned int pg = d->arch.paging.shadow.total_pages;
1266 return ((pg >> (20 - PAGE_SHIFT))
1267 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
1270 /**************************************************************************/
1271 /* Hash table for storing the guest->shadow mappings.
1272 * The table itself is an array of pointers to shadows; the shadows are then
1273 * threaded on a singly-linked list of shadows with the same hash value */
1275 #define SHADOW_HASH_BUCKETS 251
1276 /* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
1278 /* Hash function that takes a gfn or mfn, plus another byte of type info */
1279 typedef u32 key_t;
1280 static inline key_t sh_hash(unsigned long n, unsigned int t)
1282 unsigned char *p = (unsigned char *)&n;
1283 key_t k = t;
1284 int i;
1285 for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
1286 return k % SHADOW_HASH_BUCKETS;
1289 #if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
1291 /* Before we get to the mechanism, define a pair of audit functions
1292 * that sanity-check the contents of the hash table. */
1293 static void sh_hash_audit_bucket(struct domain *d, int bucket)
1294 /* Audit one bucket of the hash table */
1296 struct shadow_page_info *sp, *x;
1298 if ( !(SHADOW_AUDIT_ENABLE) )
1299 return;
1301 sp = d->arch.paging.shadow.hash_table[bucket];
1302 while ( sp )
1304 /* Not a shadow? */
1305 BUG_ON( sp->mbz != 0 );
1306 /* Bogus type? */
1307 BUG_ON( sp->type == 0 );
1308 BUG_ON( sp->type > SH_type_max_shadow );
1309 /* Wrong bucket? */
1310 BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket );
1311 /* Duplicate entry? */
1312 for ( x = sp->next_shadow; x; x = x->next_shadow )
1313 BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
1314 /* Follow the backpointer to the guest pagetable */
1315 if ( sp->type != SH_type_fl1_32_shadow
1316 && sp->type != SH_type_fl1_pae_shadow
1317 && sp->type != SH_type_fl1_64_shadow )
1319 struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
1320 /* Bad shadow flags on guest page? */
1321 BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
1322 /* Bad type count on guest page? */
1323 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
1324 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
1326 SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
1327 " but has typecount %#lx\n",
1328 sp->backpointer, mfn_x(shadow_page_to_mfn(sp)),
1329 gpg->u.inuse.type_info);
1330 BUG();
1333 /* That entry was OK; on we go */
1334 sp = sp->next_shadow;
1338 #else
1339 #define sh_hash_audit_bucket(_d, _b) do {} while(0)
1340 #endif /* Hashtable bucket audit */
1343 #if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
1345 static void sh_hash_audit(struct domain *d)
1346 /* Full audit: audit every bucket in the table */
1348 int i;
1350 if ( !(SHADOW_AUDIT_ENABLE) )
1351 return;
1353 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1355 sh_hash_audit_bucket(d, i);
1359 #else
1360 #define sh_hash_audit(_d) do {} while(0)
1361 #endif /* Hashtable bucket audit */
1363 /* Allocate and initialise the table itself.
1364 * Returns 0 for success, 1 for error. */
1365 static int shadow_hash_alloc(struct domain *d)
1367 struct shadow_page_info **table;
1369 ASSERT(shadow_locked_by_me(d));
1370 ASSERT(!d->arch.paging.shadow.hash_table);
1372 table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
1373 if ( !table ) return 1;
1374 memset(table, 0,
1375 SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
1376 d->arch.paging.shadow.hash_table = table;
1377 return 0;
1380 /* Tear down the hash table and return all memory to Xen.
1381 * This function does not care whether the table is populated. */
1382 static void shadow_hash_teardown(struct domain *d)
1384 ASSERT(shadow_locked_by_me(d));
1385 ASSERT(d->arch.paging.shadow.hash_table);
1387 xfree(d->arch.paging.shadow.hash_table);
1388 d->arch.paging.shadow.hash_table = NULL;
1392 mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, unsigned int t)
1393 /* Find an entry in the hash table. Returns the MFN of the shadow,
1394 * or INVALID_MFN if it doesn't exist */
1396 struct domain *d = v->domain;
1397 struct shadow_page_info *sp, *prev;
1398 key_t key;
1400 ASSERT(shadow_locked_by_me(d));
1401 ASSERT(d->arch.paging.shadow.hash_table);
1402 ASSERT(t);
1404 sh_hash_audit(d);
1406 perfc_incrc(shadow_hash_lookups);
1407 key = sh_hash(n, t);
1408 sh_hash_audit_bucket(d, key);
1410 sp = d->arch.paging.shadow.hash_table[key];
1411 prev = NULL;
1412 while(sp)
1414 if ( sp->backpointer == n && sp->type == t )
1416 /* Pull-to-front if 'sp' isn't already the head item */
1417 if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
1419 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
1420 /* Can't reorder: someone is walking the hash chains */
1421 return shadow_page_to_mfn(sp);
1422 else
1424 ASSERT(prev);
1425 /* Delete sp from the list */
1426 prev->next_shadow = sp->next_shadow;
1427 /* Re-insert it at the head of the list */
1428 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1429 d->arch.paging.shadow.hash_table[key] = sp;
1432 else
1434 perfc_incrc(shadow_hash_lookup_head);
1436 return shadow_page_to_mfn(sp);
1438 prev = sp;
1439 sp = sp->next_shadow;
1442 perfc_incrc(shadow_hash_lookup_miss);
1443 return _mfn(INVALID_MFN);
1446 void shadow_hash_insert(struct vcpu *v, unsigned long n, unsigned int t,
1447 mfn_t smfn)
1448 /* Put a mapping (n,t)->smfn into the hash table */
1450 struct domain *d = v->domain;
1451 struct shadow_page_info *sp;
1452 key_t key;
1454 ASSERT(shadow_locked_by_me(d));
1455 ASSERT(d->arch.paging.shadow.hash_table);
1456 ASSERT(t);
1458 sh_hash_audit(d);
1460 perfc_incrc(shadow_hash_inserts);
1461 key = sh_hash(n, t);
1462 sh_hash_audit_bucket(d, key);
1464 /* Insert this shadow at the top of the bucket */
1465 sp = mfn_to_shadow_page(smfn);
1466 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1467 d->arch.paging.shadow.hash_table[key] = sp;
1469 sh_hash_audit_bucket(d, key);
1472 void shadow_hash_delete(struct vcpu *v, unsigned long n, unsigned int t,
1473 mfn_t smfn)
1474 /* Excise the mapping (n,t)->smfn from the hash table */
1476 struct domain *d = v->domain;
1477 struct shadow_page_info *sp, *x;
1478 key_t key;
1480 ASSERT(shadow_locked_by_me(d));
1481 ASSERT(d->arch.paging.shadow.hash_table);
1482 ASSERT(t);
1484 sh_hash_audit(d);
1486 perfc_incrc(shadow_hash_deletes);
1487 key = sh_hash(n, t);
1488 sh_hash_audit_bucket(d, key);
1490 sp = mfn_to_shadow_page(smfn);
1491 if ( d->arch.paging.shadow.hash_table[key] == sp )
1492 /* Easy case: we're deleting the head item. */
1493 d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
1494 else
1496 /* Need to search for the one we want */
1497 x = d->arch.paging.shadow.hash_table[key];
1498 while ( 1 )
1500 ASSERT(x); /* We can't have hit the end, since our target is
1501 * still in the chain somehwere... */
1502 if ( x->next_shadow == sp )
1504 x->next_shadow = sp->next_shadow;
1505 break;
1507 x = x->next_shadow;
1510 sp->next_shadow = NULL;
1512 sh_hash_audit_bucket(d, key);
1515 typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
1517 static void hash_foreach(struct vcpu *v,
1518 unsigned int callback_mask,
1519 hash_callback_t callbacks[],
1520 mfn_t callback_mfn)
1521 /* Walk the hash table looking at the types of the entries and
1522 * calling the appropriate callback function for each entry.
1523 * The mask determines which shadow types we call back for, and the array
1524 * of callbacks tells us which function to call.
1525 * Any callback may return non-zero to let us skip the rest of the scan.
1527 * WARNING: Callbacks MUST NOT add or remove hash entries unless they
1528 * then return non-zero to terminate the scan. */
1530 int i, done = 0;
1531 struct domain *d = v->domain;
1532 struct shadow_page_info *x;
1534 /* Say we're here, to stop hash-lookups reordering the chains */
1535 ASSERT(shadow_locked_by_me(d));
1536 ASSERT(d->arch.paging.shadow.hash_walking == 0);
1537 d->arch.paging.shadow.hash_walking = 1;
1539 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1541 /* WARNING: This is not safe against changes to the hash table.
1542 * The callback *must* return non-zero if it has inserted or
1543 * deleted anything from the hash (lookups are OK, though). */
1544 for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
1546 if ( callback_mask & (1 << x->type) )
1548 ASSERT(x->type <= 15);
1549 ASSERT(callbacks[x->type] != NULL);
1550 done = callbacks[x->type](v, shadow_page_to_mfn(x),
1551 callback_mfn);
1552 if ( done ) break;
1555 if ( done ) break;
1557 d->arch.paging.shadow.hash_walking = 0;
1561 /**************************************************************************/
1562 /* Destroy a shadow page: simple dispatcher to call the per-type destructor
1563 * which will decrement refcounts appropriately and return memory to the
1564 * free pool. */
1566 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
1568 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1569 unsigned int t = sp->type;
1572 SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
1574 /* Double-check, if we can, that the shadowed page belongs to this
1575 * domain, (by following the back-pointer). */
1576 ASSERT(t == SH_type_fl1_32_shadow ||
1577 t == SH_type_fl1_pae_shadow ||
1578 t == SH_type_fl1_64_shadow ||
1579 t == SH_type_monitor_table ||
1580 #ifdef CONFIG_COMPAT
1581 (IS_COMPAT(v->domain) && t == SH_type_l4_64_shadow) ||
1582 #endif
1583 (page_get_owner(mfn_to_page(_mfn(sp->backpointer)))
1584 == v->domain));
1586 /* The down-shifts here are so that the switch statement is on nice
1587 * small numbers that the compiler will enjoy */
1588 switch ( t )
1590 #if CONFIG_PAGING_LEVELS == 2
1591 case SH_type_l1_32_shadow:
1592 case SH_type_fl1_32_shadow:
1593 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn);
1594 break;
1595 case SH_type_l2_32_shadow:
1596 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
1597 break;
1598 #else /* PAE or 64bit */
1599 case SH_type_l1_32_shadow:
1600 case SH_type_fl1_32_shadow:
1601 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
1602 break;
1603 case SH_type_l2_32_shadow:
1604 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
1605 break;
1606 #endif
1608 #if CONFIG_PAGING_LEVELS >= 3
1609 case SH_type_l1_pae_shadow:
1610 case SH_type_fl1_pae_shadow:
1611 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
1612 break;
1613 case SH_type_l2_pae_shadow:
1614 case SH_type_l2h_pae_shadow:
1615 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
1616 break;
1617 #endif
1619 #if CONFIG_PAGING_LEVELS >= 4
1620 case SH_type_l1_64_shadow:
1621 case SH_type_fl1_64_shadow:
1622 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
1623 break;
1624 case SH_type_l2h_64_shadow:
1625 ASSERT( IS_COMPAT(v->domain) );
1626 case SH_type_l2_64_shadow:
1627 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
1628 break;
1629 case SH_type_l3_64_shadow:
1630 SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
1631 break;
1632 case SH_type_l4_64_shadow:
1633 SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
1634 break;
1635 #endif
1636 default:
1637 SHADOW_PRINTK("tried to destroy shadow of bad type %08lx\n",
1638 (unsigned long)t);
1639 BUG();
1643 /**************************************************************************/
1644 /* Remove all writeable mappings of a guest frame from the shadow tables
1645 * Returns non-zero if we need to flush TLBs.
1646 * level and fault_addr desribe how we found this to be a pagetable;
1647 * level==0 means we have some other reason for revoking write access.*/
1649 int sh_remove_write_access(struct vcpu *v, mfn_t gmfn,
1650 unsigned int level,
1651 unsigned long fault_addr)
1653 /* Dispatch table for getting per-type functions */
1654 static hash_callback_t callbacks[SH_type_unused] = {
1655 NULL, /* none */
1656 #if CONFIG_PAGING_LEVELS == 2
1657 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */
1658 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */
1659 #else
1660 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */
1661 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */
1662 #endif
1663 NULL, /* l2_32 */
1664 #if CONFIG_PAGING_LEVELS >= 3
1665 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */
1666 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
1667 #else
1668 NULL, /* l1_pae */
1669 NULL, /* fl1_pae */
1670 #endif
1671 NULL, /* l2_pae */
1672 NULL, /* l2h_pae */
1673 #if CONFIG_PAGING_LEVELS >= 4
1674 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */
1675 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */
1676 #else
1677 NULL, /* l1_64 */
1678 NULL, /* fl1_64 */
1679 #endif
1680 NULL, /* l2_64 */
1681 NULL, /* l2h_64 */
1682 NULL, /* l3_64 */
1683 NULL, /* l4_64 */
1684 NULL, /* p2m */
1685 NULL /* unused */
1686 };
1688 static unsigned int callback_mask =
1689 1 << SH_type_l1_32_shadow
1690 | 1 << SH_type_fl1_32_shadow
1691 | 1 << SH_type_l1_pae_shadow
1692 | 1 << SH_type_fl1_pae_shadow
1693 | 1 << SH_type_l1_64_shadow
1694 | 1 << SH_type_fl1_64_shadow
1696 struct page_info *pg = mfn_to_page(gmfn);
1698 ASSERT(shadow_locked_by_me(v->domain));
1700 /* Only remove writable mappings if we are doing shadow refcounts.
1701 * In guest refcounting, we trust Xen to already be restricting
1702 * all the writes to the guest page tables, so we do not need to
1703 * do more. */
1704 if ( !shadow_mode_refcounts(v->domain) )
1705 return 0;
1707 /* Early exit if it's already a pagetable, or otherwise not writeable */
1708 if ( sh_mfn_is_a_page_table(gmfn)
1709 || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1710 return 0;
1712 perfc_incrc(shadow_writeable);
1714 /* If this isn't a "normal" writeable page, the domain is trying to
1715 * put pagetables in special memory of some kind. We can't allow that. */
1716 if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
1718 SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %"
1719 PRtype_info "\n",
1720 mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
1721 domain_crash(v->domain);
1724 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1725 if ( v == current && level != 0 )
1727 unsigned long gfn;
1728 /* Heuristic: there is likely to be only one writeable mapping,
1729 * and that mapping is likely to be in the current pagetable,
1730 * in the guest's linear map (on non-HIGHPTE linux and windows)*/
1732 #define GUESS(_a, _h) do { \
1733 if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
1734 perfc_incrc(shadow_writeable_h_ ## _h); \
1735 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \
1736 return 1; \
1737 } while (0)
1740 if ( v->arch.paging.mode->guest_levels == 2 )
1742 if ( level == 1 )
1743 /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
1744 GUESS(0xC0000000UL + (fault_addr >> 10), 1);
1746 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1747 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1748 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1751 #if CONFIG_PAGING_LEVELS >= 3
1752 else if ( v->arch.paging.mode->guest_levels == 3 )
1754 /* 32bit PAE w2k3: linear map at 0xC0000000 */
1755 switch ( level )
1757 case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
1758 case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
1761 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1762 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1763 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1765 #if CONFIG_PAGING_LEVELS >= 4
1766 else if ( v->arch.paging.mode->guest_levels == 4 )
1768 /* 64bit w2k3: linear map at 0x0000070000000000 */
1769 switch ( level )
1771 case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
1772 case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
1773 case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
1776 /* 64bit Linux direct map at 0xffff810000000000; older kernels
1777 * had it at 0x0000010000000000UL */
1778 gfn = mfn_to_gfn(v->domain, gmfn);
1779 GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4);
1780 GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4);
1782 #endif /* CONFIG_PAGING_LEVELS >= 4 */
1783 #endif /* CONFIG_PAGING_LEVELS >= 3 */
1785 #undef GUESS
1788 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1789 return 1;
1791 /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
1792 * (entries in the fixmap) where linux maps its pagetables. Since
1793 * we expect to hit them most of the time, we start the search for
1794 * the writeable mapping by looking at the same MFN where the last
1795 * brute-force search succeeded. */
1797 if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 )
1799 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
1800 mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
1801 int shtype = mfn_to_shadow_page(last_smfn)->type;
1803 if ( callbacks[shtype] )
1804 callbacks[shtype](v, last_smfn, gmfn);
1806 if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
1807 perfc_incrc(shadow_writeable_h_5);
1810 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1811 return 1;
1813 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
1815 /* Brute-force search of all the shadows, by walking the hash */
1816 perfc_incrc(shadow_writeable_bf);
1817 hash_foreach(v, callback_mask, callbacks, gmfn);
1819 /* If that didn't catch the mapping, something is very wrong */
1820 if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
1822 SHADOW_ERROR("can't find all writeable mappings of mfn %lx: "
1823 "%lu left\n", mfn_x(gmfn),
1824 (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
1825 domain_crash(v->domain);
1828 /* We killed at least one writeable mapping, so must flush TLBs. */
1829 return 1;
1834 /**************************************************************************/
1835 /* Remove all mappings of a guest frame from the shadow tables.
1836 * Returns non-zero if we need to flush TLBs. */
1838 int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
1840 struct page_info *page = mfn_to_page(gmfn);
1841 int expected_count, do_locking;
1843 /* Dispatch table for getting per-type functions */
1844 static hash_callback_t callbacks[SH_type_unused] = {
1845 NULL, /* none */
1846 #if CONFIG_PAGING_LEVELS == 2
1847 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */
1848 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */
1849 #else
1850 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */
1851 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */
1852 #endif
1853 NULL, /* l2_32 */
1854 #if CONFIG_PAGING_LEVELS >= 3
1855 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */
1856 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
1857 #else
1858 NULL, /* l1_pae */
1859 NULL, /* fl1_pae */
1860 #endif
1861 NULL, /* l2_pae */
1862 NULL, /* l2h_pae */
1863 #if CONFIG_PAGING_LEVELS >= 4
1864 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */
1865 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */
1866 #else
1867 NULL, /* l1_64 */
1868 NULL, /* fl1_64 */
1869 #endif
1870 NULL, /* l2_64 */
1871 NULL, /* l2h_64 */
1872 NULL, /* l3_64 */
1873 NULL, /* l4_64 */
1874 NULL, /* p2m */
1875 NULL /* unused */
1876 };
1878 static unsigned int callback_mask =
1879 1 << SH_type_l1_32_shadow
1880 | 1 << SH_type_fl1_32_shadow
1881 | 1 << SH_type_l1_pae_shadow
1882 | 1 << SH_type_fl1_pae_shadow
1883 | 1 << SH_type_l1_64_shadow
1884 | 1 << SH_type_fl1_64_shadow
1887 perfc_incrc(shadow_mappings);
1888 if ( (page->count_info & PGC_count_mask) == 0 )
1889 return 0;
1891 /* Although this is an externally visible function, we do not know
1892 * whether the shadow lock will be held when it is called (since it
1893 * can be called via put_page_type when we clear a shadow l1e).
1894 * If the lock isn't held, take it for the duration of the call. */
1895 do_locking = !shadow_locked_by_me(v->domain);
1896 if ( do_locking ) shadow_lock(v->domain);
1898 /* XXX TODO:
1899 * Heuristics for finding the (probably) single mapping of this gmfn */
1901 /* Brute-force search of all the shadows, by walking the hash */
1902 perfc_incrc(shadow_mappings_bf);
1903 hash_foreach(v, callback_mask, callbacks, gmfn);
1905 /* If that didn't catch the mapping, something is very wrong */
1906 expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
1907 if ( (page->count_info & PGC_count_mask) != expected_count )
1909 /* Don't complain if we're in HVM and there are some extra mappings:
1910 * The qemu helper process has an untyped mapping of this dom's RAM
1911 * and the HVM restore program takes another. */
1912 if ( !(shadow_mode_external(v->domain)
1913 && (page->count_info & PGC_count_mask) <= 3
1914 && (page->u.inuse.type_info & PGT_count_mask) == 0) )
1916 SHADOW_ERROR("can't find all mappings of mfn %lx: "
1917 "c=%08x t=%08lx\n", mfn_x(gmfn),
1918 page->count_info, page->u.inuse.type_info);
1922 if ( do_locking ) shadow_unlock(v->domain);
1924 /* We killed at least one mapping, so must flush TLBs. */
1925 return 1;
1929 /**************************************************************************/
1930 /* Remove all shadows of a guest frame from the shadow tables */
1932 static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
1933 /* Follow this shadow's up-pointer, if it has one, and remove the reference
1934 * found there. Returns 1 if that was the only reference to this shadow */
1936 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1937 mfn_t pmfn;
1938 void *vaddr;
1939 int rc;
1941 ASSERT(sp->type > 0);
1942 ASSERT(sp->type < SH_type_max_shadow);
1943 ASSERT(sp->type != SH_type_l2_32_shadow);
1944 ASSERT(sp->type != SH_type_l2_pae_shadow);
1945 ASSERT(sp->type != SH_type_l2h_pae_shadow);
1946 ASSERT(sp->type != SH_type_l4_64_shadow);
1948 if (sp->up == 0) return 0;
1949 pmfn = _mfn(sp->up >> PAGE_SHIFT);
1950 ASSERT(mfn_valid(pmfn));
1951 vaddr = sh_map_domain_page(pmfn);
1952 ASSERT(vaddr);
1953 vaddr += sp->up & (PAGE_SIZE-1);
1954 ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
1956 /* Is this the only reference to this shadow? */
1957 rc = (sp->count == 1) ? 1 : 0;
1959 /* Blank the offending entry */
1960 switch (sp->type)
1962 case SH_type_l1_32_shadow:
1963 case SH_type_l2_32_shadow:
1964 #if CONFIG_PAGING_LEVELS == 2
1965 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
1966 #else
1967 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
1968 #endif
1969 break;
1970 #if CONFIG_PAGING_LEVELS >=3
1971 case SH_type_l1_pae_shadow:
1972 case SH_type_l2_pae_shadow:
1973 case SH_type_l2h_pae_shadow:
1974 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
1975 break;
1976 #if CONFIG_PAGING_LEVELS >= 4
1977 case SH_type_l1_64_shadow:
1978 case SH_type_l2_64_shadow:
1979 case SH_type_l2h_64_shadow:
1980 case SH_type_l3_64_shadow:
1981 case SH_type_l4_64_shadow:
1982 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
1983 break;
1984 #endif
1985 #endif
1986 default: BUG(); /* Some wierd unknown shadow type */
1989 sh_unmap_domain_page(vaddr);
1990 if ( rc )
1991 perfc_incrc(shadow_up_pointer);
1992 else
1993 perfc_incrc(shadow_unshadow_bf);
1995 return rc;
1998 void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
1999 /* Remove the shadows of this guest page.
2000 * If fast != 0, just try the quick heuristic, which will remove
2001 * at most one reference to each shadow of the page. Otherwise, walk
2002 * all the shadow tables looking for refs to shadows of this gmfn.
2003 * If all != 0, kill the domain if we can't find all the shadows.
2004 * (all != 0 implies fast == 0)
2005 */
2007 struct page_info *pg = mfn_to_page(gmfn);
2008 mfn_t smfn;
2009 u32 sh_flags;
2010 int do_locking;
2011 unsigned char t;
2013 /* Dispatch table for getting per-type functions: each level must
2014 * be called with the function to remove a lower-level shadow. */
2015 static hash_callback_t callbacks[SH_type_unused] = {
2016 NULL, /* none */
2017 NULL, /* l1_32 */
2018 NULL, /* fl1_32 */
2019 #if CONFIG_PAGING_LEVELS == 2
2020 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32 */
2021 #else
2022 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32 */
2023 #endif
2024 NULL, /* l1_pae */
2025 NULL, /* fl1_pae */
2026 #if CONFIG_PAGING_LEVELS >= 3
2027 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */
2028 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
2029 #else
2030 NULL, /* l2_pae */
2031 NULL, /* l2h_pae */
2032 #endif
2033 NULL, /* l1_64 */
2034 NULL, /* fl1_64 */
2035 #if CONFIG_PAGING_LEVELS >= 4
2036 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64 */
2037 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2h_64 */
2038 SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64 */
2039 SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64 */
2040 #else
2041 NULL, /* l2_64 */
2042 NULL, /* l2h_64 */
2043 NULL, /* l3_64 */
2044 NULL, /* l4_64 */
2045 #endif
2046 NULL, /* p2m */
2047 NULL /* unused */
2048 };
2050 /* Another lookup table, for choosing which mask to use */
2051 static unsigned int masks[SH_type_unused] = {
2052 0, /* none */
2053 1 << SH_type_l2_32_shadow, /* l1_32 */
2054 0, /* fl1_32 */
2055 0, /* l2_32 */
2056 ((1 << SH_type_l2h_pae_shadow)
2057 | (1 << SH_type_l2_pae_shadow)), /* l1_pae */
2058 0, /* fl1_pae */
2059 0, /* l2_pae */
2060 0, /* l2h_pae */
2061 ((1 << SH_type_l2h_64_shadow)
2062 | (1 << SH_type_l2_64_shadow)), /* l1_64 */
2063 0, /* fl1_64 */
2064 1 << SH_type_l3_64_shadow, /* l2_64 */
2065 1 << SH_type_l3_64_shadow, /* l2h_64 */
2066 1 << SH_type_l4_64_shadow, /* l3_64 */
2067 0, /* l4_64 */
2068 0, /* p2m */
2069 0 /* unused */
2070 };
2072 ASSERT(!(all && fast));
2074 /* Although this is an externally visible function, we do not know
2075 * whether the shadow lock will be held when it is called (since it
2076 * can be called via put_page_type when we clear a shadow l1e).
2077 * If the lock isn't held, take it for the duration of the call. */
2078 do_locking = !shadow_locked_by_me(v->domain);
2079 if ( do_locking ) shadow_lock(v->domain);
2081 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
2082 v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
2084 /* Bail out now if the page is not shadowed */
2085 if ( (pg->count_info & PGC_page_table) == 0 )
2087 if ( do_locking ) shadow_unlock(v->domain);
2088 return;
2091 /* Search for this shadow in all appropriate shadows */
2092 perfc_incrc(shadow_unshadow);
2093 sh_flags = pg->shadow_flags;
2095 /* Lower-level shadows need to be excised from upper-level shadows.
2096 * This call to hash_foreach() looks dangerous but is in fact OK: each
2097 * call will remove at most one shadow, and terminate immediately when
2098 * it does remove it, so we never walk the hash after doing a deletion. */
2099 #define DO_UNSHADOW(_type) do { \
2100 t = (_type); \
2101 smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \
2102 if ( unlikely(!mfn_valid(smfn)) ) \
2103 { \
2104 SHADOW_ERROR(": gmfn %#lx has flags 0x%"PRIx32 \
2105 " but no type-0x%"PRIx32" shadow\n", \
2106 mfn_x(gmfn), sh_flags, t); \
2107 break; \
2108 } \
2109 if ( sh_type_is_pinnable(v, t) ) \
2110 sh_unpin(v, smfn); \
2111 else \
2112 sh_remove_shadow_via_pointer(v, smfn); \
2113 if ( (pg->count_info & PGC_page_table) && !fast ) \
2114 hash_foreach(v, masks[t], callbacks, smfn); \
2115 } while (0)
2117 if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(SH_type_l1_32_shadow);
2118 if ( sh_flags & SHF_L2_32 ) DO_UNSHADOW(SH_type_l2_32_shadow);
2119 #if CONFIG_PAGING_LEVELS >= 3
2120 if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(SH_type_l1_pae_shadow);
2121 if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(SH_type_l2_pae_shadow);
2122 if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(SH_type_l2h_pae_shadow);
2123 #if CONFIG_PAGING_LEVELS >= 4
2124 if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(SH_type_l1_64_shadow);
2125 if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(SH_type_l2_64_shadow);
2126 if ( sh_flags & SHF_L2H_64 ) DO_UNSHADOW(SH_type_l2h_64_shadow);
2127 if ( sh_flags & SHF_L3_64 ) DO_UNSHADOW(SH_type_l3_64_shadow);
2128 if ( sh_flags & SHF_L4_64 ) DO_UNSHADOW(SH_type_l4_64_shadow);
2129 #endif
2130 #endif
2132 #undef DO_UNSHADOW
2134 /* If that didn't catch the shadows, something is wrong */
2135 if ( !fast && (pg->count_info & PGC_page_table) )
2137 SHADOW_ERROR("can't find all shadows of mfn %05lx "
2138 "(shadow_flags=%08lx)\n",
2139 mfn_x(gmfn), pg->shadow_flags);
2140 if ( all )
2141 domain_crash(v->domain);
2144 /* Need to flush TLBs now, so that linear maps are safe next time we
2145 * take a fault. */
2146 flush_tlb_mask(v->domain->domain_dirty_cpumask);
2148 if ( do_locking ) shadow_unlock(v->domain);
2151 static void
2152 sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
2153 /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
2154 * Unshadow it, and recursively unshadow pages that reference it. */
2156 sh_remove_shadows(v, gmfn, 0, 1);
2157 /* XXX TODO:
2158 * Rework this hashtable walker to return a linked-list of all
2159 * the shadows it modified, then do breadth-first recursion
2160 * to find the way up to higher-level tables and unshadow them too.
2162 * The current code (just tearing down each page's shadows as we
2163 * detect that it is not a pagetable) is correct, but very slow.
2164 * It means extra emulated writes and slows down removal of mappings. */
2167 /**************************************************************************/
2169 static void sh_update_paging_modes(struct vcpu *v)
2171 struct domain *d = v->domain;
2172 struct paging_mode *old_mode = v->arch.paging.mode;
2173 mfn_t old_guest_table;
2175 ASSERT(shadow_locked_by_me(d));
2177 // Valid transitions handled by this function:
2178 // - For PV guests:
2179 // - after a shadow mode has been changed
2180 // - For HVM guests:
2181 // - after a shadow mode has been changed
2182 // - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
2183 //
2185 // First, tear down any old shadow tables held by this vcpu.
2186 //
2187 if ( v->arch.paging.mode )
2188 v->arch.paging.mode->shadow.detach_old_tables(v);
2190 if ( !is_hvm_domain(d) )
2192 ///
2193 /// PV guest
2194 ///
2195 #if CONFIG_PAGING_LEVELS == 4
2196 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
2197 #elif CONFIG_PAGING_LEVELS == 3
2198 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2199 #elif CONFIG_PAGING_LEVELS == 2
2200 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
2201 #else
2202 #error unexpected paging mode
2203 #endif
2204 v->arch.paging.translate_enabled = !!shadow_mode_translate(d);
2206 else
2208 ///
2209 /// HVM guest
2210 ///
2211 ASSERT(shadow_mode_translate(d));
2212 ASSERT(shadow_mode_external(d));
2214 v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
2215 if ( !v->arch.paging.translate_enabled )
2217 /* Set v->arch.guest_table to use the p2m map, and choose
2218 * the appropriate shadow mode */
2219 old_guest_table = pagetable_get_mfn(v->arch.guest_table);
2220 #if CONFIG_PAGING_LEVELS == 2
2221 v->arch.guest_table =
2222 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
2223 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
2224 #elif CONFIG_PAGING_LEVELS == 3
2225 v->arch.guest_table =
2226 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
2227 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2228 #else /* CONFIG_PAGING_LEVELS == 4 */
2230 l4_pgentry_t *l4e;
2231 /* Use the start of the first l3 table as a PAE l3 */
2232 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
2233 l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
2234 ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
2235 v->arch.guest_table =
2236 pagetable_from_pfn(l4e_get_pfn(l4e[0]));
2237 sh_unmap_domain_page(l4e);
2239 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2240 #endif
2241 /* Fix up refcounts on guest_table */
2242 get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
2243 if ( mfn_x(old_guest_table) != 0 )
2244 put_page(mfn_to_page(old_guest_table));
2246 else
2248 #ifdef __x86_64__
2249 if ( hvm_long_mode_enabled(v) )
2251 // long mode guest...
2252 v->arch.paging.mode =
2253 &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
2255 else
2256 #endif
2257 if ( hvm_pae_enabled(v) )
2259 #if CONFIG_PAGING_LEVELS >= 3
2260 // 32-bit PAE mode guest...
2261 v->arch.paging.mode =
2262 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
2263 #else
2264 SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
2265 domain_crash(d);
2266 return;
2267 #endif
2269 else
2271 // 32-bit 2 level guest...
2272 #if CONFIG_PAGING_LEVELS >= 3
2273 v->arch.paging.mode =
2274 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
2275 #else
2276 v->arch.paging.mode =
2277 &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
2278 #endif
2282 if ( pagetable_is_null(v->arch.monitor_table) )
2284 mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2285 v->arch.monitor_table = pagetable_from_mfn(mmfn);
2286 make_cr3(v, mfn_x(mmfn));
2287 hvm_update_host_cr3(v);
2290 if ( v->arch.paging.mode != old_mode )
2292 SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
2293 "(was g=%u s=%u)\n",
2294 d->domain_id, v->vcpu_id,
2295 is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1,
2296 v->arch.paging.mode->guest_levels,
2297 v->arch.paging.mode->shadow.shadow_levels,
2298 old_mode ? old_mode->guest_levels : 0,
2299 old_mode ? old_mode->shadow.shadow_levels : 0);
2300 if ( old_mode &&
2301 (v->arch.paging.mode->shadow.shadow_levels !=
2302 old_mode->shadow.shadow_levels) )
2304 /* Need to make a new monitor table for the new mode */
2305 mfn_t new_mfn, old_mfn;
2307 if ( v != current && vcpu_runnable(v) )
2309 SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
2310 "this HVM vcpu's (d=%u v=%u) paging mode "
2311 "while it is running.\n",
2312 current->domain->domain_id, current->vcpu_id,
2313 v->domain->domain_id, v->vcpu_id);
2314 /* It's not safe to do that because we can't change
2315 * the host CR£ for a running domain */
2316 domain_crash(v->domain);
2317 return;
2320 old_mfn = pagetable_get_mfn(v->arch.monitor_table);
2321 v->arch.monitor_table = pagetable_null();
2322 new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2323 v->arch.monitor_table = pagetable_from_mfn(new_mfn);
2324 SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
2325 mfn_x(new_mfn));
2327 /* Don't be running on the old monitor table when we
2328 * pull it down! Switch CR3, and warn the HVM code that
2329 * its host cr3 has changed. */
2330 make_cr3(v, mfn_x(new_mfn));
2331 if ( v == current )
2332 write_ptbase(v);
2333 hvm_update_host_cr3(v);
2334 old_mode->shadow.destroy_monitor_table(v, old_mfn);
2338 // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
2339 // These are HARD: think about the case where two CPU's have
2340 // different values for CR4.PSE and CR4.PGE at the same time.
2341 // This *does* happen, at least for CR4.PGE...
2344 v->arch.paging.mode->update_cr3(v, 0);
2347 void shadow_update_paging_modes(struct vcpu *v)
2349 shadow_lock(v->domain);
2350 sh_update_paging_modes(v);
2351 shadow_unlock(v->domain);
2354 /**************************************************************************/
2355 /* Turning on and off shadow features */
2357 static void sh_new_mode(struct domain *d, u32 new_mode)
2358 /* Inform all the vcpus that the shadow mode has been changed */
2360 struct vcpu *v;
2362 ASSERT(shadow_locked_by_me(d));
2363 ASSERT(d != current->domain);
2364 d->arch.paging.mode = new_mode;
2365 for_each_vcpu(d, v)
2366 sh_update_paging_modes(v);
2369 int shadow_enable(struct domain *d, u32 mode)
2370 /* Turn on "permanent" shadow features: external, translate, refcount.
2371 * Can only be called once on a domain, and these features cannot be
2372 * disabled.
2373 * Returns 0 for success, -errno for failure. */
2375 unsigned int old_pages;
2376 int rv = 0;
2378 mode |= PG_SH_enable;
2380 domain_pause(d);
2382 /* Sanity check the arguments */
2383 if ( (d == current->domain) ||
2384 shadow_mode_enabled(d) ||
2385 ((mode & PG_translate) && !(mode & PG_refcounts)) ||
2386 ((mode & PG_external) && !(mode & PG_translate)) )
2388 rv = -EINVAL;
2389 goto out_unlocked;
2392 /* Init the shadow memory allocation if the user hasn't done so */
2393 old_pages = d->arch.paging.shadow.total_pages;
2394 if ( old_pages == 0 )
2396 unsigned int r;
2397 shadow_lock(d);
2398 r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */
2399 shadow_unlock(d);
2400 if ( r != 0 )
2402 sh_set_allocation(d, 0, NULL);
2403 rv = -ENOMEM;
2404 goto out_unlocked;
2408 /* Init the P2M table. Must be done before we take the shadow lock
2409 * to avoid possible deadlock. */
2410 if ( mode & PG_translate )
2412 rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page);
2413 if (rv != 0)
2414 goto out_unlocked;
2417 shadow_lock(d);
2419 /* Sanity check again with the lock held */
2420 if ( shadow_mode_enabled(d) )
2422 rv = -EINVAL;
2423 goto out_locked;
2426 /* Init the hash table */
2427 if ( shadow_hash_alloc(d) != 0 )
2429 rv = -ENOMEM;
2430 goto out_locked;
2433 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2434 /* We assume we're dealing with an older 64bit linux guest until we
2435 * see the guest use more than one l4 per vcpu. */
2436 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2437 #endif
2439 /* Update the bits */
2440 sh_new_mode(d, mode);
2442 out_locked:
2443 shadow_unlock(d);
2444 out_unlocked:
2445 if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) )
2446 p2m_teardown(d);
2447 domain_unpause(d);
2448 return rv;
2451 void shadow_teardown(struct domain *d)
2452 /* Destroy the shadow pagetables of this domain and free its shadow memory.
2453 * Should only be called for dying domains. */
2455 struct vcpu *v;
2456 mfn_t mfn;
2457 struct list_head *entry, *n;
2458 struct page_info *pg;
2460 ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
2461 ASSERT(d != current->domain);
2463 if ( !shadow_locked_by_me(d) )
2464 shadow_lock(d); /* Keep various asserts happy */
2466 if ( shadow_mode_enabled(d) )
2468 /* Release the shadow and monitor tables held by each vcpu */
2469 for_each_vcpu(d, v)
2471 if ( v->arch.paging.mode )
2473 v->arch.paging.mode->shadow.detach_old_tables(v);
2474 if ( shadow_mode_external(d) )
2476 mfn = pagetable_get_mfn(v->arch.monitor_table);
2477 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
2478 v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn);
2479 v->arch.monitor_table = pagetable_null();
2485 list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
2487 list_del(entry);
2488 pg = list_entry(entry, struct page_info, list);
2489 shadow_free_p2m_page(d, pg);
2492 if ( d->arch.paging.shadow.total_pages != 0 )
2494 SHADOW_PRINTK("teardown of domain %u starts."
2495 " Shadow pages total = %u, free = %u, p2m=%u\n",
2496 d->domain_id,
2497 d->arch.paging.shadow.total_pages,
2498 d->arch.paging.shadow.free_pages,
2499 d->arch.paging.shadow.p2m_pages);
2500 /* Destroy all the shadows and release memory to domheap */
2501 sh_set_allocation(d, 0, NULL);
2502 /* Release the hash table back to xenheap */
2503 if (d->arch.paging.shadow.hash_table)
2504 shadow_hash_teardown(d);
2505 /* Release the log-dirty bitmap of dirtied pages */
2506 sh_free_log_dirty_bitmap(d);
2507 /* Should not have any more memory held */
2508 SHADOW_PRINTK("teardown done."
2509 " Shadow pages total = %u, free = %u, p2m=%u\n",
2510 d->arch.paging.shadow.total_pages,
2511 d->arch.paging.shadow.free_pages,
2512 d->arch.paging.shadow.p2m_pages);
2513 ASSERT(d->arch.paging.shadow.total_pages == 0);
2516 /* We leave the "permanent" shadow modes enabled, but clear the
2517 * log-dirty mode bit. We don't want any more mark_dirty()
2518 * calls now that we've torn down the bitmap */
2519 d->arch.paging.mode &= ~PG_log_dirty;
2521 shadow_unlock(d);
2524 void shadow_final_teardown(struct domain *d)
2525 /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
2527 SHADOW_PRINTK("dom %u final teardown starts."
2528 " Shadow pages total = %u, free = %u, p2m=%u\n",
2529 d->domain_id,
2530 d->arch.paging.shadow.total_pages,
2531 d->arch.paging.shadow.free_pages,
2532 d->arch.paging.shadow.p2m_pages);
2534 /* Double-check that the domain didn't have any shadow memory.
2535 * It is possible for a domain that never got domain_kill()ed
2536 * to get here with its shadow allocation intact. */
2537 if ( d->arch.paging.shadow.total_pages != 0 )
2538 shadow_teardown(d);
2540 /* It is now safe to pull down the p2m map. */
2541 p2m_teardown(d);
2543 SHADOW_PRINTK("dom %u final teardown done."
2544 " Shadow pages total = %u, free = %u, p2m=%u\n",
2545 d->domain_id,
2546 d->arch.paging.shadow.total_pages,
2547 d->arch.paging.shadow.free_pages,
2548 d->arch.paging.shadow.p2m_pages);
2551 static int shadow_one_bit_enable(struct domain *d, u32 mode)
2552 /* Turn on a single shadow mode feature */
2554 ASSERT(shadow_locked_by_me(d));
2556 /* Sanity check the call */
2557 if ( d == current->domain || (d->arch.paging.mode & mode) )
2559 return -EINVAL;
2562 mode |= PG_SH_enable;
2564 if ( d->arch.paging.mode == 0 )
2566 /* Init the shadow memory allocation and the hash table */
2567 if ( sh_set_allocation(d, 1, NULL) != 0
2568 || shadow_hash_alloc(d) != 0 )
2570 sh_set_allocation(d, 0, NULL);
2571 return -ENOMEM;
2575 /* Update the bits */
2576 sh_new_mode(d, d->arch.paging.mode | mode);
2578 return 0;
2581 static int shadow_one_bit_disable(struct domain *d, u32 mode)
2582 /* Turn off a single shadow mode feature */
2584 struct vcpu *v;
2585 ASSERT(shadow_locked_by_me(d));
2587 /* Sanity check the call */
2588 if ( d == current->domain || !(d->arch.paging.mode & mode) )
2590 return -EINVAL;
2593 /* Update the bits */
2594 sh_new_mode(d, d->arch.paging.mode & ~mode);
2595 if ( d->arch.paging.mode == 0 )
2597 /* Get this domain off shadows */
2598 SHADOW_PRINTK("un-shadowing of domain %u starts."
2599 " Shadow pages total = %u, free = %u, p2m=%u\n",
2600 d->domain_id,
2601 d->arch.paging.shadow.total_pages,
2602 d->arch.paging.shadow.free_pages,
2603 d->arch.paging.shadow.p2m_pages);
2604 for_each_vcpu(d, v)
2606 if ( v->arch.paging.mode )
2607 v->arch.paging.mode->shadow.detach_old_tables(v);
2608 #if CONFIG_PAGING_LEVELS == 4
2609 if ( !(v->arch.flags & TF_kernel_mode) )
2610 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
2611 else
2612 #endif
2613 make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
2617 /* Pull down the memory allocation */
2618 if ( sh_set_allocation(d, 0, NULL) != 0 )
2620 // XXX - How can this occur?
2621 // Seems like a bug to return an error now that we've
2622 // disabled the relevant shadow mode.
2623 //
2624 return -ENOMEM;
2626 shadow_hash_teardown(d);
2627 SHADOW_PRINTK("un-shadowing of domain %u done."
2628 " Shadow pages total = %u, free = %u, p2m=%u\n",
2629 d->domain_id,
2630 d->arch.paging.shadow.total_pages,
2631 d->arch.paging.shadow.free_pages,
2632 d->arch.paging.shadow.p2m_pages);
2635 return 0;
2638 /* Enable/disable ops for the "test" and "log-dirty" modes */
2639 static int shadow_test_enable(struct domain *d)
2641 int ret;
2643 domain_pause(d);
2644 shadow_lock(d);
2646 if ( shadow_mode_enabled(d) )
2648 SHADOW_ERROR("Don't support enabling test mode"
2649 " on already shadowed doms\n");
2650 ret = -EINVAL;
2651 goto out;
2654 ret = shadow_one_bit_enable(d, PG_SH_enable);
2655 out:
2656 shadow_unlock(d);
2657 domain_unpause(d);
2659 return ret;
2662 static int shadow_test_disable(struct domain *d)
2664 int ret;
2666 domain_pause(d);
2667 shadow_lock(d);
2668 ret = shadow_one_bit_disable(d, PG_SH_enable);
2669 shadow_unlock(d);
2670 domain_unpause(d);
2672 return ret;
2675 static int
2676 sh_alloc_log_dirty_bitmap(struct domain *d)
2678 ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL);
2679 d->arch.paging.shadow.dirty_bitmap_size =
2680 (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) &
2681 ~(BITS_PER_LONG - 1);
2682 d->arch.paging.shadow.dirty_bitmap =
2683 xmalloc_array(unsigned long,
2684 d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG);
2685 if ( d->arch.paging.shadow.dirty_bitmap == NULL )
2687 d->arch.paging.shadow.dirty_bitmap_size = 0;
2688 return -ENOMEM;
2690 memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8);
2692 return 0;
2695 static void
2696 sh_free_log_dirty_bitmap(struct domain *d)
2698 d->arch.paging.shadow.dirty_bitmap_size = 0;
2699 if ( d->arch.paging.shadow.dirty_bitmap )
2701 xfree(d->arch.paging.shadow.dirty_bitmap);
2702 d->arch.paging.shadow.dirty_bitmap = NULL;
2706 static int shadow_log_dirty_enable(struct domain *d)
2708 int ret;
2710 domain_pause(d);
2711 shadow_lock(d);
2713 if ( shadow_mode_log_dirty(d) )
2715 ret = -EINVAL;
2716 goto out;
2719 if ( shadow_mode_enabled(d) )
2721 SHADOW_ERROR("Don't (yet) support enabling log-dirty"
2722 " on already shadowed doms\n");
2723 ret = -EINVAL;
2724 goto out;
2727 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2728 if ( IS_COMPAT(d) )
2729 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2730 #endif
2732 ret = sh_alloc_log_dirty_bitmap(d);
2733 if ( ret != 0 )
2735 sh_free_log_dirty_bitmap(d);
2736 goto out;
2739 ret = shadow_one_bit_enable(d, PG_log_dirty);
2740 if ( ret != 0 )
2741 sh_free_log_dirty_bitmap(d);
2743 out:
2744 shadow_unlock(d);
2745 domain_unpause(d);
2746 return ret;
2749 static int shadow_log_dirty_disable(struct domain *d)
2751 int ret;
2753 domain_pause(d);
2754 shadow_lock(d);
2755 ret = shadow_one_bit_disable(d, PG_log_dirty);
2756 if ( !shadow_mode_log_dirty(d) )
2757 sh_free_log_dirty_bitmap(d);
2758 shadow_unlock(d);
2759 domain_unpause(d);
2761 return ret;
2764 /**************************************************************************/
2765 /* P2M map manipulations */
2767 /* shadow specific code which should be called when P2M table entry is updated
2768 * with new content. It is responsible for update the entry, as well as other
2769 * shadow processing jobs.
2770 */
2771 void
2772 shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
2773 l1_pgentry_t new, unsigned int level)
2775 struct domain *d = v->domain;
2776 mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
2777 mfn_t mfn;
2779 shadow_lock(d);
2781 /* handle physmap_add and physmap_remove */
2782 mfn = gfn_to_mfn(d, gfn);
2783 if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
2784 sh_remove_all_shadows_and_parents(v, mfn);
2785 if ( sh_remove_all_mappings(v, mfn) )
2786 flush_tlb_mask(d->domain_dirty_cpumask);
2789 /* update the entry with new content */
2790 safe_write_pte(p, new);
2792 /* The P2M can be shadowed: keep the shadows synced */
2793 if ( d->vcpu[0] != NULL )
2794 (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p));
2796 /* install P2M in monitors for PAE Xen */
2797 #if CONFIG_PAGING_LEVELS == 3
2798 if ( level == 3 ) {
2799 struct vcpu *v;
2800 /* We have written to the p2m l3: need to sync the per-vcpu
2801 * copies of it in the monitor tables */
2802 p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p);
2803 /* Also, any vcpus running on shadows of the p2m need to
2804 * reload their CR3s so the change propagates to the shadow */
2805 for_each_vcpu(d, v) {
2806 if ( pagetable_get_pfn(v->arch.guest_table)
2807 == pagetable_get_pfn(d->arch.phys_table)
2808 && v->arch.paging.mode != NULL )
2809 v->arch.paging.mode->update_cr3(v, 0);
2812 #endif
2814 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
2815 /* If we're doing FAST_FAULT_PATH, then shadow mode may have
2816 cached the fact that this is an mmio region in the shadow
2817 page tables. Blow the tables away to remove the cache.
2818 This is pretty heavy handed, but this is a rare operation
2819 (it might happen a dozen times during boot and then never
2820 again), so it doesn't matter too much. */
2821 shadow_blow_tables(d);
2822 #endif
2824 shadow_unlock(d);
2827 /**************************************************************************/
2828 /* Log-dirty mode support */
2830 /* Convert a shadow to log-dirty mode. */
2831 void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
2833 BUG();
2837 /* Read a domain's log-dirty bitmap and stats.
2838 * If the operation is a CLEAN, clear the bitmap and stats as well. */
2839 static int shadow_log_dirty_op(
2840 struct domain *d, struct xen_domctl_shadow_op *sc)
2842 int i, rv = 0, clean = 0, peek = 1;
2844 domain_pause(d);
2845 shadow_lock(d);
2847 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
2849 SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
2850 (clean) ? "clean" : "peek",
2851 d->domain_id,
2852 d->arch.paging.shadow.fault_count,
2853 d->arch.paging.shadow.dirty_count);
2855 sc->stats.fault_count = d->arch.paging.shadow.fault_count;
2856 sc->stats.dirty_count = d->arch.paging.shadow.dirty_count;
2858 if ( clean )
2860 /* Need to revoke write access to the domain's pages again.
2861 * In future, we'll have a less heavy-handed approach to this,
2862 * but for now, we just unshadow everything except Xen. */
2863 shadow_blow_tables(d);
2865 d->arch.paging.shadow.fault_count = 0;
2866 d->arch.paging.shadow.dirty_count = 0;
2869 if ( guest_handle_is_null(sc->dirty_bitmap) )
2870 /* caller may have wanted just to clean the state or access stats. */
2871 peek = 0;
2873 if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) )
2875 rv = -EINVAL; /* perhaps should be ENOMEM? */
2876 goto out;
2879 if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size )
2880 sc->pages = d->arch.paging.shadow.dirty_bitmap_size;
2882 #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
2883 for ( i = 0; i < sc->pages; i += CHUNK )
2885 int bytes = ((((sc->pages - i) > CHUNK)
2886 ? CHUNK
2887 : (sc->pages - i)) + 7) / 8;
2889 if ( likely(peek) )
2891 if ( copy_to_guest_offset(
2892 sc->dirty_bitmap, i/8,
2893 (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) )
2895 rv = -EFAULT;
2896 goto out;
2900 if ( clean )
2901 memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes);
2903 #undef CHUNK
2905 out:
2906 shadow_unlock(d);
2907 domain_unpause(d);
2908 return rv;
2912 /* Mark a page as dirty */
2913 void sh_mark_dirty(struct domain *d, mfn_t gmfn)
2915 unsigned long pfn;
2917 ASSERT(shadow_locked_by_me(d));
2919 if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
2920 return;
2922 ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
2924 /* We /really/ mean PFN here, even for non-translated guests. */
2925 pfn = get_gpfn_from_mfn(mfn_x(gmfn));
2927 /*
2928 * Values with the MSB set denote MFNs that aren't really part of the
2929 * domain's pseudo-physical memory map (e.g., the shared info frame).
2930 * Nothing to do here...
2931 */
2932 if ( unlikely(!VALID_M2P(pfn)) )
2933 return;
2935 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
2936 if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) )
2938 if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
2940 SHADOW_DEBUG(LOGDIRTY,
2941 "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
2942 mfn_x(gmfn), pfn, d->domain_id);
2943 d->arch.paging.shadow.dirty_count++;
2946 else
2948 SHADOW_PRINTK("mark_dirty OOR! "
2949 "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
2950 "owner=%d c=%08x t=%" PRtype_info "\n",
2951 mfn_x(gmfn),
2952 pfn,
2953 d->arch.paging.shadow.dirty_bitmap_size,
2954 d->domain_id,
2955 (page_get_owner(mfn_to_page(gmfn))
2956 ? page_get_owner(mfn_to_page(gmfn))->domain_id
2957 : -1),
2958 mfn_to_page(gmfn)->count_info,
2959 mfn_to_page(gmfn)->u.inuse.type_info);
2963 void shadow_mark_dirty(struct domain *d, mfn_t gmfn)
2965 shadow_lock(d);
2966 sh_mark_dirty(d, gmfn);
2967 shadow_unlock(d);
2970 /**************************************************************************/
2971 /* Shadow-control XEN_DOMCTL dispatcher */
2973 int shadow_domctl(struct domain *d,
2974 xen_domctl_shadow_op_t *sc,
2975 XEN_GUEST_HANDLE(void) u_domctl)
2977 int rc, preempted = 0;
2979 if ( unlikely(d == current->domain) )
2981 gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n");
2982 return -EINVAL;
2985 switch ( sc->op )
2987 case XEN_DOMCTL_SHADOW_OP_OFF:
2988 if ( shadow_mode_log_dirty(d) )
2989 if ( (rc = shadow_log_dirty_disable(d)) != 0 )
2990 return rc;
2991 if ( is_hvm_domain(d) )
2992 return -EINVAL;
2993 if ( d->arch.paging.mode & PG_SH_enable )
2994 if ( (rc = shadow_test_disable(d)) != 0 )
2995 return rc;
2996 return 0;
2998 case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
2999 return shadow_test_enable(d);
3001 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
3002 return shadow_log_dirty_enable(d);
3004 case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
3005 return shadow_enable(d, PG_refcounts|PG_translate);
3007 case XEN_DOMCTL_SHADOW_OP_CLEAN:
3008 case XEN_DOMCTL_SHADOW_OP_PEEK:
3009 return shadow_log_dirty_op(d, sc);
3011 case XEN_DOMCTL_SHADOW_OP_ENABLE:
3012 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
3013 return shadow_log_dirty_enable(d);
3014 return shadow_enable(d, sc->mode << PG_mode_shift);
3016 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
3017 sc->mb = shadow_get_allocation(d);
3018 return 0;
3020 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
3021 shadow_lock(d);
3022 if ( sc->mb == 0 && shadow_mode_enabled(d) )
3024 /* Can't set the allocation to zero unless the domain stops using
3025 * shadow pagetables first */
3026 SHADOW_ERROR("Can't set shadow allocation to zero, domain %u"
3027 " is still using shadows.\n", d->domain_id);
3028 shadow_unlock(d);
3029 return -EINVAL;
3031 rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
3032 shadow_unlock(d);
3033 if ( preempted )
3034 /* Not finished. Set up to re-run the call. */
3035 rc = hypercall_create_continuation(
3036 __HYPERVISOR_domctl, "h", u_domctl);
3037 else
3038 /* Finished. Return the new allocation */
3039 sc->mb = shadow_get_allocation(d);
3040 return rc;
3042 default:
3043 SHADOW_ERROR("Bad shadow op %u\n", sc->op);
3044 return -EINVAL;
3049 /**************************************************************************/
3050 /* Auditing shadow tables */
3052 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
3054 void shadow_audit_tables(struct vcpu *v)
3056 /* Dispatch table for getting per-type functions */
3057 static hash_callback_t callbacks[SH_type_unused] = {
3058 NULL, /* none */
3059 #if CONFIG_PAGING_LEVELS == 2
3060 SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2), /* l1_32 */
3061 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32 */
3062 SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2), /* l2_32 */
3063 #else
3064 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2), /* l1_32 */
3065 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32 */
3066 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2), /* l2_32 */
3067 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3), /* l1_pae */
3068 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
3069 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */
3070 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */
3071 #if CONFIG_PAGING_LEVELS >= 4
3072 SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */
3073 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */
3074 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2_64 */
3075 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2h_64 */
3076 SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4), /* l3_64 */
3077 SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4), /* l4_64 */
3078 #endif /* CONFIG_PAGING_LEVELS >= 4 */
3079 #endif /* CONFIG_PAGING_LEVELS > 2 */
3080 NULL /* All the rest */
3081 };
3082 unsigned int mask;
3084 if ( !(SHADOW_AUDIT_ENABLE) )
3085 return;
3087 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
3088 mask = ~1; /* Audit every table in the system */
3089 else
3091 /* Audit only the current mode's tables */
3092 switch ( v->arch.paging.mode->guest_levels )
3094 case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
3095 case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
3096 |SHF_L2H_PAE); break;
3097 case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64
3098 |SHF_L3_64|SHF_L4_64); break;
3099 default: BUG();
3103 hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
3106 #endif /* Shadow audit */
3108 /*
3109 * Local variables:
3110 * mode: C
3111 * c-set-style: "BSD"
3112 * c-basic-offset: 4
3113 * indent-tabs-mode: nil
3114 * End:
3115 */