ia64/xen-unstable

view xen/arch/x86/mm/shadow/common.c @ 17571:b6aa55ca599e

shadow: track video RAM dirty bits

This adds a new HVM op that enables tracking dirty bits of a range of
video RAM. The idea is to optimize just for the most common case
(only one guest mapping, with sometimes some temporary other
mappings), which permits to keep the overhead on shadow as low as
possible.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri May 02 15:08:27 2008 +0100 (2008-05-02)
parents 7689e311f3b5
children 730c253afc30
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/common.c
3 *
4 * Shadow code that does not need to be multiply compiled.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/types.h>
26 #include <xen/mm.h>
27 #include <xen/trace.h>
28 #include <xen/sched.h>
29 #include <xen/perfc.h>
30 #include <xen/irq.h>
31 #include <xen/domain_page.h>
32 #include <xen/guest_access.h>
33 #include <xen/keyhandler.h>
34 #include <asm/event.h>
35 #include <asm/page.h>
36 #include <asm/current.h>
37 #include <asm/flushtlb.h>
38 #include <asm/shadow.h>
39 #include <xen/numa.h>
40 #include "private.h"
43 /* Set up the shadow-specific parts of a domain struct at start of day.
44 * Called for every domain from arch_domain_create() */
45 void shadow_domain_init(struct domain *d)
46 {
47 int i;
48 shadow_lock_init(d);
49 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
50 INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
51 INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
52 INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
54 /* Use shadow pagetables for log-dirty support */
55 paging_log_dirty_init(d, shadow_enable_log_dirty,
56 shadow_disable_log_dirty, shadow_clean_dirty_bitmap);
57 }
59 /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
60 * job is to initialize the update_paging_modes() function pointer, which is
61 * used to initialized the rest of resources. Therefore, it really does not
62 * matter to have v->arch.paging.mode pointing to any mode, as long as it can
63 * be compiled.
64 */
65 void shadow_vcpu_init(struct vcpu *v)
66 {
67 #if CONFIG_PAGING_LEVELS == 4
68 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
69 #elif CONFIG_PAGING_LEVELS == 3
70 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
71 #elif CONFIG_PAGING_LEVELS == 2
72 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
73 #endif
74 }
76 #if SHADOW_AUDIT
77 int shadow_audit_enable = 0;
79 static void shadow_audit_key(unsigned char key)
80 {
81 shadow_audit_enable = !shadow_audit_enable;
82 printk("%s shadow_audit_enable=%d\n",
83 __func__, shadow_audit_enable);
84 }
86 static int __init shadow_audit_key_init(void)
87 {
88 register_keyhandler(
89 'O', shadow_audit_key, "toggle shadow audits");
90 return 0;
91 }
92 __initcall(shadow_audit_key_init);
93 #endif /* SHADOW_AUDIT */
95 int _shadow_mode_refcounts(struct domain *d)
96 {
97 return shadow_mode_refcounts(d);
98 }
101 /**************************************************************************/
102 /* x86 emulator support for the shadow code
103 */
105 struct segment_register *hvm_get_seg_reg(
106 enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
107 {
108 struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
109 if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
110 hvm_get_segment_register(current, seg, seg_reg);
111 return seg_reg;
112 }
114 static int hvm_translate_linear_addr(
115 enum x86_segment seg,
116 unsigned long offset,
117 unsigned int bytes,
118 enum hvm_access_type access_type,
119 struct sh_emulate_ctxt *sh_ctxt,
120 unsigned long *paddr)
121 {
122 struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
123 int okay;
125 okay = hvm_virtual_to_linear_addr(
126 seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
128 if ( !okay )
129 {
130 hvm_inject_exception(TRAP_gp_fault, 0, 0);
131 return X86EMUL_EXCEPTION;
132 }
134 return 0;
135 }
137 static int
138 hvm_read(enum x86_segment seg,
139 unsigned long offset,
140 unsigned long *val,
141 unsigned int bytes,
142 enum hvm_access_type access_type,
143 struct sh_emulate_ctxt *sh_ctxt)
144 {
145 unsigned long addr;
146 int rc;
148 rc = hvm_translate_linear_addr(
149 seg, offset, bytes, access_type, sh_ctxt, &addr);
150 if ( rc )
151 return rc;
153 *val = 0;
155 if ( access_type == hvm_access_insn_fetch )
156 rc = hvm_fetch_from_guest_virt(val, addr, bytes, 0);
157 else
158 rc = hvm_copy_from_guest_virt(val, addr, bytes, 0);
160 switch ( rc )
161 {
162 case HVMCOPY_okay:
163 return X86EMUL_OKAY;
164 case HVMCOPY_bad_gva_to_gfn:
165 return X86EMUL_EXCEPTION;
166 default:
167 break;
168 }
170 return X86EMUL_UNHANDLEABLE;
171 }
173 static int
174 hvm_emulate_read(enum x86_segment seg,
175 unsigned long offset,
176 unsigned long *val,
177 unsigned int bytes,
178 struct x86_emulate_ctxt *ctxt)
179 {
180 if ( !is_x86_user_segment(seg) )
181 return X86EMUL_UNHANDLEABLE;
182 return hvm_read(seg, offset, val, bytes, hvm_access_read,
183 container_of(ctxt, struct sh_emulate_ctxt, ctxt));
184 }
186 static int
187 hvm_emulate_insn_fetch(enum x86_segment seg,
188 unsigned long offset,
189 unsigned long *val,
190 unsigned int bytes,
191 struct x86_emulate_ctxt *ctxt)
192 {
193 struct sh_emulate_ctxt *sh_ctxt =
194 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
195 unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;
197 ASSERT(seg == x86_seg_cs);
199 /* Fall back if requested bytes are not in the prefetch cache. */
200 if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
201 return hvm_read(seg, offset, val, bytes,
202 hvm_access_insn_fetch, sh_ctxt);
204 /* Hit the cache. Simple memcpy. */
205 *val = 0;
206 memcpy(val, &sh_ctxt->insn_buf[insn_off], bytes);
207 return X86EMUL_OKAY;
208 }
210 static int
211 hvm_emulate_write(enum x86_segment seg,
212 unsigned long offset,
213 unsigned long val,
214 unsigned int bytes,
215 struct x86_emulate_ctxt *ctxt)
216 {
217 struct sh_emulate_ctxt *sh_ctxt =
218 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
219 struct vcpu *v = current;
220 unsigned long addr;
221 int rc;
223 if ( !is_x86_user_segment(seg) )
224 return X86EMUL_UNHANDLEABLE;
226 /* How many emulations could we save if we unshadowed on stack writes? */
227 if ( seg == x86_seg_ss )
228 perfc_incr(shadow_fault_emulate_stack);
230 rc = hvm_translate_linear_addr(
231 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
232 if ( rc )
233 return rc;
235 return v->arch.paging.mode->shadow.x86_emulate_write(
236 v, addr, &val, bytes, sh_ctxt);
237 }
239 static int
240 hvm_emulate_cmpxchg(enum x86_segment seg,
241 unsigned long offset,
242 void *p_old,
243 void *p_new,
244 unsigned int bytes,
245 struct x86_emulate_ctxt *ctxt)
246 {
247 struct sh_emulate_ctxt *sh_ctxt =
248 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
249 struct vcpu *v = current;
250 unsigned long addr, old[2], new[2];
251 int rc;
253 if ( !is_x86_user_segment(seg) )
254 return X86EMUL_UNHANDLEABLE;
256 rc = hvm_translate_linear_addr(
257 seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
258 if ( rc )
259 return rc;
261 old[0] = new[0] = 0;
262 memcpy(old, p_old, bytes);
263 memcpy(new, p_new, bytes);
265 if ( bytes <= sizeof(long) )
266 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
267 v, addr, old[0], new[0], bytes, sh_ctxt);
269 #ifdef __i386__
270 if ( bytes == 8 )
271 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
272 v, addr, old[0], old[1], new[0], new[1], sh_ctxt);
273 #endif
275 return X86EMUL_UNHANDLEABLE;
276 }
278 static struct x86_emulate_ops hvm_shadow_emulator_ops = {
279 .read = hvm_emulate_read,
280 .insn_fetch = hvm_emulate_insn_fetch,
281 .write = hvm_emulate_write,
282 .cmpxchg = hvm_emulate_cmpxchg,
283 };
285 static int
286 pv_emulate_read(enum x86_segment seg,
287 unsigned long offset,
288 unsigned long *val,
289 unsigned int bytes,
290 struct x86_emulate_ctxt *ctxt)
291 {
292 unsigned int rc;
294 if ( !is_x86_user_segment(seg) )
295 return X86EMUL_UNHANDLEABLE;
297 *val = 0;
298 if ( (rc = copy_from_user((void *)val, (void *)offset, bytes)) != 0 )
299 {
300 propagate_page_fault(offset + bytes - rc, 0); /* read fault */
301 return X86EMUL_EXCEPTION;
302 }
304 return X86EMUL_OKAY;
305 }
307 static int
308 pv_emulate_write(enum x86_segment seg,
309 unsigned long offset,
310 unsigned long val,
311 unsigned int bytes,
312 struct x86_emulate_ctxt *ctxt)
313 {
314 struct sh_emulate_ctxt *sh_ctxt =
315 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
316 struct vcpu *v = current;
317 if ( !is_x86_user_segment(seg) )
318 return X86EMUL_UNHANDLEABLE;
319 return v->arch.paging.mode->shadow.x86_emulate_write(
320 v, offset, &val, bytes, sh_ctxt);
321 }
323 static int
324 pv_emulate_cmpxchg(enum x86_segment seg,
325 unsigned long offset,
326 void *p_old,
327 void *p_new,
328 unsigned int bytes,
329 struct x86_emulate_ctxt *ctxt)
330 {
331 struct sh_emulate_ctxt *sh_ctxt =
332 container_of(ctxt, struct sh_emulate_ctxt, ctxt);
333 unsigned long old[2], new[2];
334 struct vcpu *v = current;
336 if ( !is_x86_user_segment(seg) )
337 return X86EMUL_UNHANDLEABLE;
339 old[0] = new[0] = 0;
340 memcpy(old, p_old, bytes);
341 memcpy(new, p_new, bytes);
343 if ( bytes <= sizeof(long) )
344 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
345 v, offset, old[0], new[0], bytes, sh_ctxt);
347 #ifdef __i386__
348 if ( bytes == 8 )
349 return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
350 v, offset, old[0], old[1], new[0], new[1], sh_ctxt);
351 #endif
353 return X86EMUL_UNHANDLEABLE;
354 }
356 static struct x86_emulate_ops pv_shadow_emulator_ops = {
357 .read = pv_emulate_read,
358 .insn_fetch = pv_emulate_read,
359 .write = pv_emulate_write,
360 .cmpxchg = pv_emulate_cmpxchg,
361 };
363 struct x86_emulate_ops *shadow_init_emulation(
364 struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs)
365 {
366 struct segment_register *creg, *sreg;
367 struct vcpu *v = current;
368 unsigned long addr;
370 sh_ctxt->ctxt.regs = regs;
371 sh_ctxt->ctxt.force_writeback = 0;
373 if ( !is_hvm_vcpu(v) )
374 {
375 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG;
376 return &pv_shadow_emulator_ops;
377 }
379 /* Segment cache initialisation. Primed with CS. */
380 sh_ctxt->valid_seg_regs = 0;
381 creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt);
383 /* Work out the emulation mode. */
384 if ( hvm_long_mode_enabled(v) && creg->attr.fields.l )
385 {
386 sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64;
387 }
388 else
389 {
390 sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
391 sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16;
392 sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16;
393 }
395 /* Attempt to prefetch whole instruction. */
396 sh_ctxt->insn_buf_eip = regs->eip;
397 sh_ctxt->insn_buf_bytes =
398 (!hvm_translate_linear_addr(
399 x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
400 hvm_access_insn_fetch, sh_ctxt, &addr) &&
401 !hvm_fetch_from_guest_virt_nofault(
402 sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0))
403 ? sizeof(sh_ctxt->insn_buf) : 0;
405 return &hvm_shadow_emulator_ops;
406 }
408 /* Update an initialized emulation context to prepare for the next
409 * instruction */
410 void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt,
411 struct cpu_user_regs *regs)
412 {
413 struct vcpu *v = current;
414 unsigned long addr, diff;
416 /* We don't refetch the segment bases, because we don't emulate
417 * writes to segment registers */
419 if ( is_hvm_vcpu(v) )
420 {
421 diff = regs->eip - sh_ctxt->insn_buf_eip;
422 if ( diff > sh_ctxt->insn_buf_bytes )
423 {
424 /* Prefetch more bytes. */
425 sh_ctxt->insn_buf_bytes =
426 (!hvm_translate_linear_addr(
427 x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
428 hvm_access_insn_fetch, sh_ctxt, &addr) &&
429 !hvm_fetch_from_guest_virt_nofault(
430 sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf), 0))
431 ? sizeof(sh_ctxt->insn_buf) : 0;
432 sh_ctxt->insn_buf_eip = regs->eip;
433 }
434 }
435 }
437 /**************************************************************************/
438 /* Code for "promoting" a guest page to the point where the shadow code is
439 * willing to let it be treated as a guest page table. This generally
440 * involves making sure there are no writable mappings available to the guest
441 * for this page.
442 */
443 void shadow_promote(struct vcpu *v, mfn_t gmfn, unsigned int type)
444 {
445 struct page_info *page = mfn_to_page(gmfn);
447 ASSERT(mfn_valid(gmfn));
449 /* We should never try to promote a gmfn that has writeable mappings */
450 ASSERT((page->u.inuse.type_info & PGT_type_mask) != PGT_writable_page
451 || (page->u.inuse.type_info & PGT_count_mask) == 0
452 || v->domain->is_shutting_down);
454 /* Is the page already shadowed? */
455 if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
456 page->shadow_flags = 0;
458 ASSERT(!test_bit(type, &page->shadow_flags));
459 set_bit(type, &page->shadow_flags);
460 }
462 void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
463 {
464 struct page_info *page = mfn_to_page(gmfn);
466 ASSERT(test_bit(_PGC_page_table, &page->count_info));
467 ASSERT(test_bit(type, &page->shadow_flags));
469 clear_bit(type, &page->shadow_flags);
471 if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
472 {
473 /* tlbflush timestamp field is valid again */
474 page->tlbflush_timestamp = tlbflush_current_time();
475 clear_bit(_PGC_page_table, &page->count_info);
476 }
477 }
479 /**************************************************************************/
480 /* Validate a pagetable change from the guest and update the shadows.
481 * Returns a bitmask of SHADOW_SET_* flags. */
483 int
484 sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size)
485 {
486 int result = 0;
487 struct page_info *page = mfn_to_page(gmfn);
489 paging_mark_dirty(v->domain, mfn_x(gmfn));
491 // Determine which types of shadows are affected, and update each.
492 //
493 // Always validate L1s before L2s to prevent another cpu with a linear
494 // mapping of this gmfn from seeing a walk that results from
495 // using the new L2 value and the old L1 value. (It is OK for such a
496 // guest to see a walk that uses the old L2 value with the new L1 value,
497 // as hardware could behave this way if one level of the pagewalk occurs
498 // before the store, and the next level of the pagewalk occurs after the
499 // store.
500 //
501 // Ditto for L2s before L3s, etc.
502 //
504 if ( !(page->count_info & PGC_page_table) )
505 return 0; /* Not shadowed at all */
507 #if CONFIG_PAGING_LEVELS == 2
508 if ( page->shadow_flags & SHF_L1_32 )
509 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
510 (v, gmfn, entry, size);
511 #else
512 if ( page->shadow_flags & SHF_L1_32 )
513 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
514 (v, gmfn, entry, size);
515 #endif
517 #if CONFIG_PAGING_LEVELS == 2
518 if ( page->shadow_flags & SHF_L2_32 )
519 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
520 (v, gmfn, entry, size);
521 #else
522 if ( page->shadow_flags & SHF_L2_32 )
523 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
524 (v, gmfn, entry, size);
525 #endif
527 #if CONFIG_PAGING_LEVELS >= 3
528 if ( page->shadow_flags & SHF_L1_PAE )
529 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
530 (v, gmfn, entry, size);
531 if ( page->shadow_flags & SHF_L2_PAE )
532 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
533 (v, gmfn, entry, size);
534 if ( page->shadow_flags & SHF_L2H_PAE )
535 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
536 (v, gmfn, entry, size);
537 #else /* 32-bit non-PAE hypervisor does not support PAE guests */
538 ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
539 #endif
541 #if CONFIG_PAGING_LEVELS >= 4
542 if ( page->shadow_flags & SHF_L1_64 )
543 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
544 (v, gmfn, entry, size);
545 if ( page->shadow_flags & SHF_L2_64 )
546 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
547 (v, gmfn, entry, size);
548 if ( page->shadow_flags & SHF_L2H_64 )
549 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 4, 4)
550 (v, gmfn, entry, size);
551 if ( page->shadow_flags & SHF_L3_64 )
552 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
553 (v, gmfn, entry, size);
554 if ( page->shadow_flags & SHF_L4_64 )
555 result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
556 (v, gmfn, entry, size);
557 #else /* 32-bit/PAE hypervisor does not support 64-bit guests */
558 ASSERT((page->shadow_flags
559 & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
560 #endif
562 return result;
563 }
566 void
567 sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
568 void *entry, u32 size)
569 /* This is the entry point for emulated writes to pagetables in HVM guests and
570 * PV translated guests.
571 */
572 {
573 struct domain *d = v->domain;
574 int rc;
576 ASSERT(shadow_locked_by_me(v->domain));
577 rc = sh_validate_guest_entry(v, gmfn, entry, size);
578 if ( rc & SHADOW_SET_FLUSH )
579 /* Need to flush TLBs to pick up shadow PT changes */
580 flush_tlb_mask(d->domain_dirty_cpumask);
581 if ( rc & SHADOW_SET_ERROR )
582 {
583 /* This page is probably not a pagetable any more: tear it out of the
584 * shadows, along with any tables that reference it.
585 * Since the validate call above will have made a "safe" (i.e. zero)
586 * shadow entry, we can let the domain live even if we can't fully
587 * unshadow the page. */
588 sh_remove_shadows(v, gmfn, 0, 0);
589 }
590 }
592 int shadow_write_guest_entry(struct vcpu *v, intpte_t *p,
593 intpte_t new, mfn_t gmfn)
594 /* Write a new value into the guest pagetable, and update the shadows
595 * appropriately. Returns 0 if we page-faulted, 1 for success. */
596 {
597 int failed;
598 shadow_lock(v->domain);
599 failed = __copy_to_user(p, &new, sizeof(new));
600 if ( failed != sizeof(new) )
601 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
602 shadow_unlock(v->domain);
603 return (failed == 0);
604 }
606 int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
607 intpte_t *old, intpte_t new, mfn_t gmfn)
608 /* Cmpxchg a new value into the guest pagetable, and update the shadows
609 * appropriately. Returns 0 if we page-faulted, 1 if not.
610 * N.B. caller should check the value of "old" to see if the
611 * cmpxchg itself was successful. */
612 {
613 int failed;
614 intpte_t t = *old;
615 shadow_lock(v->domain);
616 failed = cmpxchg_user(p, t, new);
617 if ( t == *old )
618 sh_validate_guest_entry(v, gmfn, p, sizeof(new));
619 *old = t;
620 shadow_unlock(v->domain);
621 return (failed == 0);
622 }
625 /**************************************************************************/
626 /* Memory management for shadow pages. */
628 /* Allocating shadow pages
629 * -----------------------
630 *
631 * Most shadow pages are allocated singly, but there is one case where
632 * we need to allocate multiple pages together: shadowing 32-bit guest
633 * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB
634 * of virtuial address space, and needs to be shadowed by two PAE/64-bit
635 * l1 tables (covering 2MB of virtual address space each). Similarly, a
636 * 32-bit guest l2 table (4GB va) needs to be shadowed by four
637 * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are
638 * contiguous and aligned; functions for handling offsets into them are
639 * defined in shadow.c (shadow_l1_index() etc.)
640 *
641 * This table shows the allocation behaviour of the different modes:
642 *
643 * Xen paging 32b pae pae 64b 64b 64b
644 * Guest paging 32b 32b pae 32b pae 64b
645 * PV or HVM * HVM * HVM HVM *
646 * Shadow paging 32b pae pae pae pae 64b
647 *
648 * sl1 size 4k 8k 4k 8k 4k 4k
649 * sl2 size 4k 16k 4k 16k 4k 4k
650 * sl3 size - - - - - 4k
651 * sl4 size - - - - - 4k
652 *
653 * We allocate memory from xen in four-page units and break them down
654 * with a simple buddy allocator. Can't use the xen allocator to handle
655 * this as it only works for contiguous zones, and a domain's shadow
656 * pool is made of fragments.
657 *
658 * In HVM guests, the p2m table is built out of shadow pages, and we provide
659 * a function for the p2m management to steal pages, in max-order chunks, from
660 * the free pool. We don't provide for giving them back, yet.
661 */
663 /* Figure out the least acceptable quantity of shadow memory.
664 * The minimum memory requirement for always being able to free up a
665 * chunk of memory is very small -- only three max-order chunks per
666 * vcpu to hold the top level shadows and pages with Xen mappings in them.
667 *
668 * But for a guest to be guaranteed to successfully execute a single
669 * instruction, we must be able to map a large number (about thirty) VAs
670 * at the same time, which means that to guarantee progress, we must
671 * allow for more than ninety allocated pages per vcpu. We round that
672 * up to 128 pages, or half a megabyte per vcpu. */
673 static unsigned int shadow_min_acceptable_pages(struct domain *d)
674 {
675 u32 vcpu_count = 0;
676 struct vcpu *v;
678 for_each_vcpu(d, v)
679 vcpu_count++;
681 return (vcpu_count * 128);
682 }
684 /* Figure out the order of allocation needed for a given shadow type */
685 static inline u32
686 shadow_order(unsigned int shadow_type)
687 {
688 #if CONFIG_PAGING_LEVELS > 2
689 static const u32 type_to_order[SH_type_unused] = {
690 0, /* SH_type_none */
691 1, /* SH_type_l1_32_shadow */
692 1, /* SH_type_fl1_32_shadow */
693 2, /* SH_type_l2_32_shadow */
694 0, /* SH_type_l1_pae_shadow */
695 0, /* SH_type_fl1_pae_shadow */
696 0, /* SH_type_l2_pae_shadow */
697 0, /* SH_type_l2h_pae_shadow */
698 0, /* SH_type_l1_64_shadow */
699 0, /* SH_type_fl1_64_shadow */
700 0, /* SH_type_l2_64_shadow */
701 0, /* SH_type_l2h_64_shadow */
702 0, /* SH_type_l3_64_shadow */
703 0, /* SH_type_l4_64_shadow */
704 2, /* SH_type_p2m_table */
705 0 /* SH_type_monitor_table */
706 };
707 ASSERT(shadow_type < SH_type_unused);
708 return type_to_order[shadow_type];
709 #else /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
710 return 0;
711 #endif
712 }
714 static inline unsigned int
715 shadow_max_order(struct domain *d)
716 {
717 return is_hvm_domain(d) ? SHADOW_MAX_ORDER : 0;
718 }
720 /* Do we have at total of count pages of the requested order free? */
721 static inline int space_is_available(
722 struct domain *d,
723 unsigned int order,
724 unsigned int count)
725 {
726 for ( ; order <= shadow_max_order(d); ++order )
727 {
728 unsigned int n = count;
729 const struct list_head *p;
731 list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
732 if ( --n == 0 )
733 return 1;
734 count = (count + 1) >> 1;
735 }
737 return 0;
738 }
740 /* Dispatcher function: call the per-mode function that will unhook the
741 * non-Xen mappings in this top-level shadow mfn */
742 static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
743 {
744 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
745 switch ( sp->type )
746 {
747 case SH_type_l2_32_shadow:
748 #if CONFIG_PAGING_LEVELS == 2
749 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
750 #else
751 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
752 #endif
753 break;
754 #if CONFIG_PAGING_LEVELS >= 3
755 case SH_type_l2_pae_shadow:
756 case SH_type_l2h_pae_shadow:
757 SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
758 break;
759 #endif
760 #if CONFIG_PAGING_LEVELS >= 4
761 case SH_type_l4_64_shadow:
762 SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
763 break;
764 #endif
765 default:
766 SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
767 BUG();
768 }
769 }
772 /* Make sure there are at least count order-sized pages
773 * available in the shadow page pool. */
774 static void _shadow_prealloc(
775 struct domain *d,
776 unsigned int order,
777 unsigned int count)
778 {
779 /* Need a vpcu for calling unpins; for now, since we don't have
780 * per-vcpu shadows, any will do */
781 struct vcpu *v, *v2;
782 struct list_head *l, *t;
783 struct shadow_page_info *sp;
784 mfn_t smfn;
785 int i;
787 ASSERT(order <= shadow_max_order(d));
788 if ( space_is_available(d, order, count) ) return;
790 v = current;
791 if ( v->domain != d )
792 v = d->vcpu[0];
793 ASSERT(v != NULL); /* Shouldn't have enabled shadows if we've no vcpus */
795 /* Stage one: walk the list of pinned pages, unpinning them */
796 perfc_incr(shadow_prealloc_1);
797 list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
798 {
799 sp = list_entry(l, struct shadow_page_info, list);
800 smfn = shadow_page_to_mfn(sp);
802 /* Unpin this top-level shadow */
803 sh_unpin(v, smfn);
805 /* See if that freed up enough space */
806 if ( space_is_available(d, order, count) ) return;
807 }
809 /* Stage two: all shadow pages are in use in hierarchies that are
810 * loaded in cr3 on some vcpu. Walk them, unhooking the non-Xen
811 * mappings. */
812 perfc_incr(shadow_prealloc_2);
814 for_each_vcpu(d, v2)
815 for ( i = 0 ; i < 4 ; i++ )
816 {
817 if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
818 {
819 shadow_unhook_mappings(v,
820 pagetable_get_mfn(v2->arch.shadow_table[i]));
822 /* See if that freed up enough space */
823 if ( space_is_available(d, order, count) )
824 {
825 flush_tlb_mask(d->domain_dirty_cpumask);
826 return;
827 }
828 }
829 }
831 /* Nothing more we can do: all remaining shadows are of pages that
832 * hold Xen mappings for some vcpu. This can never happen. */
833 SHADOW_ERROR("Can't pre-allocate %u order-%u shadow pages!\n"
834 " shadow pages total = %u, free = %u, p2m=%u\n",
835 count, order,
836 d->arch.paging.shadow.total_pages,
837 d->arch.paging.shadow.free_pages,
838 d->arch.paging.shadow.p2m_pages);
839 BUG();
840 }
842 /* Make sure there are at least count pages of the order according to
843 * type available in the shadow page pool.
844 * This must be called before any calls to shadow_alloc(). Since this
845 * will free existing shadows to make room, it must be called early enough
846 * to avoid freeing shadows that the caller is currently working on. */
847 void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
848 {
849 ASSERT(type != SH_type_p2m_table);
850 return _shadow_prealloc(d, shadow_order(type), count);
851 }
853 /* Deliberately free all the memory we can: this will tear down all of
854 * this domain's shadows */
855 static void shadow_blow_tables(struct domain *d)
856 {
857 struct list_head *l, *t;
858 struct shadow_page_info *sp;
859 struct vcpu *v = d->vcpu[0];
860 mfn_t smfn;
861 int i;
863 ASSERT(v != NULL);
865 /* Pass one: unpin all pinned pages */
866 list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
867 {
868 sp = list_entry(l, struct shadow_page_info, list);
869 smfn = shadow_page_to_mfn(sp);
870 sh_unpin(v, smfn);
871 }
873 /* Second pass: unhook entries of in-use shadows */
874 for_each_vcpu(d, v)
875 for ( i = 0 ; i < 4 ; i++ )
876 if ( !pagetable_is_null(v->arch.shadow_table[i]) )
877 shadow_unhook_mappings(v,
878 pagetable_get_mfn(v->arch.shadow_table[i]));
880 /* Make sure everyone sees the unshadowings */
881 flush_tlb_mask(d->domain_dirty_cpumask);
882 }
884 void shadow_blow_tables_per_domain(struct domain *d)
885 {
886 if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) {
887 shadow_lock(d);
888 shadow_blow_tables(d);
889 shadow_unlock(d);
890 }
891 }
893 #ifndef NDEBUG
894 /* Blow all shadows of all shadowed domains: this can be used to cause the
895 * guest's pagetables to be re-shadowed if we suspect that the shadows
896 * have somehow got out of sync */
897 static void shadow_blow_all_tables(unsigned char c)
898 {
899 struct domain *d;
900 printk("'%c' pressed -> blowing all shadow tables\n", c);
901 rcu_read_lock(&domlist_read_lock);
902 for_each_domain(d)
903 {
904 if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
905 {
906 shadow_lock(d);
907 shadow_blow_tables(d);
908 shadow_unlock(d);
909 }
910 }
911 rcu_read_unlock(&domlist_read_lock);
912 }
914 /* Register this function in the Xen console keypress table */
915 static __init int shadow_blow_tables_keyhandler_init(void)
916 {
917 register_keyhandler('S', shadow_blow_all_tables,"reset shadow pagetables");
918 return 0;
919 }
920 __initcall(shadow_blow_tables_keyhandler_init);
921 #endif /* !NDEBUG */
923 /* Allocate another shadow's worth of (contiguous, aligned) pages,
924 * and fill in the type and backpointer fields of their page_infos.
925 * Never fails to allocate. */
926 mfn_t shadow_alloc(struct domain *d,
927 u32 shadow_type,
928 unsigned long backpointer)
929 {
930 struct shadow_page_info *sp = NULL;
931 unsigned int order = shadow_order(shadow_type);
932 cpumask_t mask;
933 void *p;
934 int i;
936 ASSERT(shadow_locked_by_me(d));
937 if (shadow_type == SH_type_p2m_table && order > shadow_max_order(d))
938 order = shadow_max_order(d);
939 ASSERT(order <= shadow_max_order(d));
940 ASSERT(shadow_type != SH_type_none);
941 perfc_incr(shadow_alloc);
943 /* Find smallest order which can satisfy the request. */
944 for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
945 if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
946 goto found;
948 /* If we get here, we failed to allocate. This should never happen.
949 * It means that we didn't call shadow_prealloc() correctly before
950 * we allocated. We can't recover by calling prealloc here, because
951 * we might free up higher-level pages that the caller is working on. */
952 SHADOW_ERROR("Can't allocate %i shadow pages!\n", 1 << order);
953 BUG();
955 found:
956 sp = list_entry(d->arch.paging.shadow.freelists[i].next,
957 struct shadow_page_info, list);
958 list_del(&sp->list);
960 /* We may have to halve the chunk a number of times. */
961 while ( i != order )
962 {
963 i--;
964 sp->order = i;
965 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
966 sp += 1 << i;
967 }
968 d->arch.paging.shadow.free_pages -= 1 << order;
970 /* Init page info fields and clear the pages */
971 for ( i = 0; i < 1<<order ; i++ )
972 {
973 /* Before we overwrite the old contents of this page,
974 * we need to be sure that no TLB holds a pointer to it. */
975 mask = d->domain_dirty_cpumask;
976 tlbflush_filter(mask, sp[i].tlbflush_timestamp);
977 if ( unlikely(!cpus_empty(mask)) )
978 {
979 perfc_incr(shadow_alloc_tlbflush);
980 flush_tlb_mask(mask);
981 }
982 /* Now safe to clear the page for reuse */
983 p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
984 ASSERT(p != NULL);
985 clear_page(p);
986 sh_unmap_domain_page(p);
987 INIT_LIST_HEAD(&sp[i].list);
988 sp[i].type = shadow_type;
989 sp[i].pinned = 0;
990 sp[i].count = 0;
991 sp[i].backpointer = backpointer;
992 sp[i].next_shadow = NULL;
993 perfc_incr(shadow_alloc_count);
994 }
995 return shadow_page_to_mfn(sp);
996 }
999 /* Return some shadow pages to the pool. */
1000 void shadow_free(struct domain *d, mfn_t smfn)
1002 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1003 u32 shadow_type;
1004 unsigned long order;
1005 unsigned long mask;
1006 int i;
1008 ASSERT(shadow_locked_by_me(d));
1009 perfc_incr(shadow_free);
1011 shadow_type = sp->type;
1012 ASSERT(shadow_type != SH_type_none);
1013 ASSERT(shadow_type != SH_type_p2m_table);
1014 order = shadow_order(shadow_type);
1016 d->arch.paging.shadow.free_pages += 1 << order;
1018 for ( i = 0; i < 1<<order; i++ )
1020 #if SHADOW_OPTIMIZATIONS & (SHOPT_WRITABLE_HEURISTIC | SHOPT_FAST_EMULATION)
1021 struct vcpu *v;
1022 for_each_vcpu(d, v)
1024 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1025 /* No longer safe to look for a writeable mapping in this shadow */
1026 if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i )
1027 v->arch.paging.shadow.last_writeable_pte_smfn = 0;
1028 #endif
1029 #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
1030 v->arch.paging.last_write_emul_ok = 0;
1031 #endif
1033 #endif
1034 /* Strip out the type: this is now a free shadow page */
1035 sp[i].type = 0;
1036 /* Remember the TLB timestamp so we will know whether to flush
1037 * TLBs when we reuse the page. Because the destructors leave the
1038 * contents of the pages in place, we can delay TLB flushes until
1039 * just before the allocator hands the page out again. */
1040 sp[i].tlbflush_timestamp = tlbflush_current_time();
1041 perfc_decr(shadow_alloc_count);
1044 /* Merge chunks as far as possible. */
1045 for ( ; order < shadow_max_order(d); ++order )
1047 mask = 1 << order;
1048 if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
1049 /* Merge with predecessor block? */
1050 if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
1051 break;
1052 list_del(&(sp-mask)->list);
1053 sp -= mask;
1054 } else {
1055 /* Merge with successor block? */
1056 if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
1057 break;
1058 list_del(&(sp+mask)->list);
1062 sp->order = order;
1063 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
1066 /* Divert some memory from the pool to be used by the p2m mapping.
1067 * This action is irreversible: the p2m mapping only ever grows.
1068 * That's OK because the p2m table only exists for translated domains,
1069 * and those domains can't ever turn off shadow mode.
1070 * Also, we only ever allocate a max-order chunk, so as to preserve
1071 * the invariant that shadow_prealloc() always works.
1072 * Returns 0 iff it can't get a chunk (the caller should then
1073 * free up some pages in domheap and call sh_set_allocation);
1074 * returns non-zero on success.
1075 */
1076 static int
1077 sh_alloc_p2m_pages(struct domain *d)
1079 struct page_info *pg;
1080 u32 i;
1081 unsigned int order = shadow_max_order(d);
1083 ASSERT(shadow_locked_by_me(d));
1085 if ( d->arch.paging.shadow.total_pages
1086 < (shadow_min_acceptable_pages(d) + (1 << order)) )
1087 return 0; /* Not enough shadow memory: need to increase it first */
1089 pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
1090 d->arch.paging.shadow.p2m_pages += (1 << order);
1091 d->arch.paging.shadow.total_pages -= (1 << order);
1092 for (i = 0; i < (1U << order); i++)
1094 /* Unlike shadow pages, mark p2m pages as owned by the domain.
1095 * Marking the domain as the owner would normally allow the guest to
1096 * create mappings of these pages, but these p2m pages will never be
1097 * in the domain's guest-physical address space, and so that is not
1098 * believed to be a concern.
1099 */
1100 page_set_owner(&pg[i], d);
1101 pg[i].count_info = 1;
1102 list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
1104 return 1;
1107 // Returns 0 if no memory is available...
1108 static struct page_info *
1109 shadow_alloc_p2m_page(struct domain *d)
1111 struct list_head *entry;
1112 struct page_info *pg;
1113 mfn_t mfn;
1114 void *p;
1116 shadow_lock(d);
1118 if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
1119 !sh_alloc_p2m_pages(d) )
1121 shadow_unlock(d);
1122 return NULL;
1124 entry = d->arch.paging.shadow.p2m_freelist.next;
1125 list_del(entry);
1127 shadow_unlock(d);
1129 pg = list_entry(entry, struct page_info, list);
1130 mfn = page_to_mfn(pg);
1131 p = sh_map_domain_page(mfn);
1132 clear_page(p);
1133 sh_unmap_domain_page(p);
1135 return pg;
1138 static void
1139 shadow_free_p2m_page(struct domain *d, struct page_info *pg)
1141 ASSERT(page_get_owner(pg) == d);
1142 /* Should have just the one ref we gave it in alloc_p2m_page() */
1143 if ( (pg->count_info & PGC_count_mask) != 1 )
1145 SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
1146 pg->count_info, pg->u.inuse.type_info);
1148 pg->count_info = 0;
1149 /* Free should not decrement domain's total allocation, since
1150 * these pages were allocated without an owner. */
1151 page_set_owner(pg, NULL);
1152 free_domheap_pages(pg, 0);
1153 d->arch.paging.shadow.p2m_pages--;
1154 perfc_decr(shadow_alloc_count);
1157 #if CONFIG_PAGING_LEVELS == 3
1158 static void p2m_install_entry_in_monitors(struct domain *d,
1159 l3_pgentry_t *l3e)
1160 /* Special case, only used for external-mode domains on PAE hosts:
1161 * update the mapping of the p2m table. Once again, this is trivial in
1162 * other paging modes (one top-level entry points to the top-level p2m,
1163 * no maintenance needed), but PAE makes life difficult by needing a
1164 * copy the eight l3es of the p2m table in eight l2h slots in the
1165 * monitor table. This function makes fresh copies when a p2m l3e
1166 * changes. */
1168 l2_pgentry_t *ml2e;
1169 struct vcpu *v;
1170 unsigned int index;
1172 index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
1173 ASSERT(index < MACHPHYS_MBYTES>>1);
1175 for_each_vcpu(d, v)
1177 if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
1178 continue;
1179 ASSERT(shadow_mode_external(v->domain));
1181 SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
1182 d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
1184 if ( v == current ) /* OK to use linear map of monitor_table */
1185 ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
1186 else
1188 l3_pgentry_t *ml3e;
1189 ml3e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
1190 ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
1191 ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
1192 ml2e += l2_table_offset(RO_MPT_VIRT_START);
1193 sh_unmap_domain_page(ml3e);
1195 ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
1196 if ( v != current )
1197 sh_unmap_domain_page(ml2e);
1200 #endif
1202 /* Set the pool of shadow pages to the required number of pages.
1203 * Input will be rounded up to at least shadow_min_acceptable_pages(),
1204 * plus space for the p2m table.
1205 * Returns 0 for success, non-zero for failure. */
1206 static unsigned int sh_set_allocation(struct domain *d,
1207 unsigned int pages,
1208 int *preempted)
1210 struct shadow_page_info *sp;
1211 unsigned int lower_bound;
1212 unsigned int j, order = shadow_max_order(d);
1214 ASSERT(shadow_locked_by_me(d));
1216 /* Don't allocate less than the minimum acceptable, plus one page per
1217 * megabyte of RAM (for the p2m table) */
1218 lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
1219 if ( pages > 0 && pages < lower_bound )
1220 pages = lower_bound;
1221 /* Round up to largest block size */
1222 pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
1224 SHADOW_PRINTK("current %i target %i\n",
1225 d->arch.paging.shadow.total_pages, pages);
1227 while ( d->arch.paging.shadow.total_pages != pages )
1229 if ( d->arch.paging.shadow.total_pages < pages )
1231 /* Need to allocate more memory from domheap */
1232 sp = (struct shadow_page_info *)
1233 alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
1234 if ( sp == NULL )
1236 SHADOW_PRINTK("failed to allocate shadow pages.\n");
1237 return -ENOMEM;
1239 d->arch.paging.shadow.free_pages += 1 << order;
1240 d->arch.paging.shadow.total_pages += 1 << order;
1241 for ( j = 0; j < 1U << order; j++ )
1243 sp[j].type = 0;
1244 sp[j].pinned = 0;
1245 sp[j].count = 0;
1246 sp[j].mbz = 0;
1247 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
1249 sp->order = order;
1250 list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
1252 else if ( d->arch.paging.shadow.total_pages > pages )
1254 /* Need to return memory to domheap */
1255 _shadow_prealloc(d, order, 1);
1256 ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
1257 sp = list_entry(d->arch.paging.shadow.freelists[order].next,
1258 struct shadow_page_info, list);
1259 list_del(&sp->list);
1260 d->arch.paging.shadow.free_pages -= 1 << order;
1261 d->arch.paging.shadow.total_pages -= 1 << order;
1262 free_domheap_pages((struct page_info *)sp, order);
1265 /* Check to see if we need to yield and try again */
1266 if ( preempted && hypercall_preempt_check() )
1268 *preempted = 1;
1269 return 0;
1273 return 0;
1276 /* Return the size of the shadow pool, rounded up to the nearest MB */
1277 static unsigned int shadow_get_allocation(struct domain *d)
1279 unsigned int pg = d->arch.paging.shadow.total_pages;
1280 return ((pg >> (20 - PAGE_SHIFT))
1281 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
1284 /**************************************************************************/
1285 /* Hash table for storing the guest->shadow mappings.
1286 * The table itself is an array of pointers to shadows; the shadows are then
1287 * threaded on a singly-linked list of shadows with the same hash value */
1289 #define SHADOW_HASH_BUCKETS 251
1290 /* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
1292 /* Hash function that takes a gfn or mfn, plus another byte of type info */
1293 typedef u32 key_t;
1294 static inline key_t sh_hash(unsigned long n, unsigned int t)
1296 unsigned char *p = (unsigned char *)&n;
1297 key_t k = t;
1298 int i;
1299 for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
1300 return k % SHADOW_HASH_BUCKETS;
1303 #if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
1305 /* Before we get to the mechanism, define a pair of audit functions
1306 * that sanity-check the contents of the hash table. */
1307 static void sh_hash_audit_bucket(struct domain *d, int bucket)
1308 /* Audit one bucket of the hash table */
1310 struct shadow_page_info *sp, *x;
1312 if ( !(SHADOW_AUDIT_ENABLE) )
1313 return;
1315 sp = d->arch.paging.shadow.hash_table[bucket];
1316 while ( sp )
1318 /* Not a shadow? */
1319 BUG_ON( sp->mbz != 0 );
1320 /* Bogus type? */
1321 BUG_ON( sp->type == 0 );
1322 BUG_ON( sp->type > SH_type_max_shadow );
1323 /* Wrong bucket? */
1324 BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket );
1325 /* Duplicate entry? */
1326 for ( x = sp->next_shadow; x; x = x->next_shadow )
1327 BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
1328 /* Follow the backpointer to the guest pagetable */
1329 if ( sp->type != SH_type_fl1_32_shadow
1330 && sp->type != SH_type_fl1_pae_shadow
1331 && sp->type != SH_type_fl1_64_shadow )
1333 struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
1334 /* Bad shadow flags on guest page? */
1335 BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
1336 /* Bad type count on guest page? */
1337 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
1338 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
1340 SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
1341 " but has typecount %#lx\n",
1342 sp->backpointer, mfn_x(shadow_page_to_mfn(sp)),
1343 gpg->u.inuse.type_info);
1344 BUG();
1347 /* That entry was OK; on we go */
1348 sp = sp->next_shadow;
1352 #else
1353 #define sh_hash_audit_bucket(_d, _b) do {} while(0)
1354 #endif /* Hashtable bucket audit */
1357 #if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
1359 static void sh_hash_audit(struct domain *d)
1360 /* Full audit: audit every bucket in the table */
1362 int i;
1364 if ( !(SHADOW_AUDIT_ENABLE) )
1365 return;
1367 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1369 sh_hash_audit_bucket(d, i);
1373 #else
1374 #define sh_hash_audit(_d) do {} while(0)
1375 #endif /* Hashtable bucket audit */
1377 /* Allocate and initialise the table itself.
1378 * Returns 0 for success, 1 for error. */
1379 static int shadow_hash_alloc(struct domain *d)
1381 struct shadow_page_info **table;
1383 ASSERT(shadow_locked_by_me(d));
1384 ASSERT(!d->arch.paging.shadow.hash_table);
1386 table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
1387 if ( !table ) return 1;
1388 memset(table, 0,
1389 SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
1390 d->arch.paging.shadow.hash_table = table;
1391 return 0;
1394 /* Tear down the hash table and return all memory to Xen.
1395 * This function does not care whether the table is populated. */
1396 static void shadow_hash_teardown(struct domain *d)
1398 ASSERT(shadow_locked_by_me(d));
1399 ASSERT(d->arch.paging.shadow.hash_table);
1401 xfree(d->arch.paging.shadow.hash_table);
1402 d->arch.paging.shadow.hash_table = NULL;
1406 mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, unsigned int t)
1407 /* Find an entry in the hash table. Returns the MFN of the shadow,
1408 * or INVALID_MFN if it doesn't exist */
1410 struct domain *d = v->domain;
1411 struct shadow_page_info *sp, *prev;
1412 key_t key;
1414 ASSERT(shadow_locked_by_me(d));
1415 ASSERT(d->arch.paging.shadow.hash_table);
1416 ASSERT(t);
1418 sh_hash_audit(d);
1420 perfc_incr(shadow_hash_lookups);
1421 key = sh_hash(n, t);
1422 sh_hash_audit_bucket(d, key);
1424 sp = d->arch.paging.shadow.hash_table[key];
1425 prev = NULL;
1426 while(sp)
1428 if ( sp->backpointer == n && sp->type == t )
1430 /* Pull-to-front if 'sp' isn't already the head item */
1431 if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
1433 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
1434 /* Can't reorder: someone is walking the hash chains */
1435 return shadow_page_to_mfn(sp);
1436 else
1438 ASSERT(prev);
1439 /* Delete sp from the list */
1440 prev->next_shadow = sp->next_shadow;
1441 /* Re-insert it at the head of the list */
1442 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1443 d->arch.paging.shadow.hash_table[key] = sp;
1446 else
1448 perfc_incr(shadow_hash_lookup_head);
1450 return shadow_page_to_mfn(sp);
1452 prev = sp;
1453 sp = sp->next_shadow;
1456 perfc_incr(shadow_hash_lookup_miss);
1457 return _mfn(INVALID_MFN);
1460 void shadow_hash_insert(struct vcpu *v, unsigned long n, unsigned int t,
1461 mfn_t smfn)
1462 /* Put a mapping (n,t)->smfn into the hash table */
1464 struct domain *d = v->domain;
1465 struct shadow_page_info *sp;
1466 key_t key;
1468 ASSERT(shadow_locked_by_me(d));
1469 ASSERT(d->arch.paging.shadow.hash_table);
1470 ASSERT(t);
1472 sh_hash_audit(d);
1474 perfc_incr(shadow_hash_inserts);
1475 key = sh_hash(n, t);
1476 sh_hash_audit_bucket(d, key);
1478 /* Insert this shadow at the top of the bucket */
1479 sp = mfn_to_shadow_page(smfn);
1480 sp->next_shadow = d->arch.paging.shadow.hash_table[key];
1481 d->arch.paging.shadow.hash_table[key] = sp;
1483 sh_hash_audit_bucket(d, key);
1486 void shadow_hash_delete(struct vcpu *v, unsigned long n, unsigned int t,
1487 mfn_t smfn)
1488 /* Excise the mapping (n,t)->smfn from the hash table */
1490 struct domain *d = v->domain;
1491 struct shadow_page_info *sp, *x;
1492 key_t key;
1494 ASSERT(shadow_locked_by_me(d));
1495 ASSERT(d->arch.paging.shadow.hash_table);
1496 ASSERT(t);
1498 sh_hash_audit(d);
1500 perfc_incr(shadow_hash_deletes);
1501 key = sh_hash(n, t);
1502 sh_hash_audit_bucket(d, key);
1504 sp = mfn_to_shadow_page(smfn);
1505 if ( d->arch.paging.shadow.hash_table[key] == sp )
1506 /* Easy case: we're deleting the head item. */
1507 d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
1508 else
1510 /* Need to search for the one we want */
1511 x = d->arch.paging.shadow.hash_table[key];
1512 while ( 1 )
1514 ASSERT(x); /* We can't have hit the end, since our target is
1515 * still in the chain somehwere... */
1516 if ( x->next_shadow == sp )
1518 x->next_shadow = sp->next_shadow;
1519 break;
1521 x = x->next_shadow;
1524 sp->next_shadow = NULL;
1526 sh_hash_audit_bucket(d, key);
1529 typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
1531 static void hash_foreach(struct vcpu *v,
1532 unsigned int callback_mask,
1533 hash_callback_t callbacks[],
1534 mfn_t callback_mfn)
1535 /* Walk the hash table looking at the types of the entries and
1536 * calling the appropriate callback function for each entry.
1537 * The mask determines which shadow types we call back for, and the array
1538 * of callbacks tells us which function to call.
1539 * Any callback may return non-zero to let us skip the rest of the scan.
1541 * WARNING: Callbacks MUST NOT add or remove hash entries unless they
1542 * then return non-zero to terminate the scan. */
1544 int i, done = 0;
1545 struct domain *d = v->domain;
1546 struct shadow_page_info *x;
1548 /* Say we're here, to stop hash-lookups reordering the chains */
1549 ASSERT(shadow_locked_by_me(d));
1550 ASSERT(d->arch.paging.shadow.hash_walking == 0);
1551 d->arch.paging.shadow.hash_walking = 1;
1553 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ )
1555 /* WARNING: This is not safe against changes to the hash table.
1556 * The callback *must* return non-zero if it has inserted or
1557 * deleted anything from the hash (lookups are OK, though). */
1558 for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
1560 if ( callback_mask & (1 << x->type) )
1562 ASSERT(x->type <= 15);
1563 ASSERT(callbacks[x->type] != NULL);
1564 done = callbacks[x->type](v, shadow_page_to_mfn(x),
1565 callback_mfn);
1566 if ( done ) break;
1569 if ( done ) break;
1571 d->arch.paging.shadow.hash_walking = 0;
1575 /**************************************************************************/
1576 /* Destroy a shadow page: simple dispatcher to call the per-type destructor
1577 * which will decrement refcounts appropriately and return memory to the
1578 * free pool. */
1580 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
1582 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1583 unsigned int t = sp->type;
1586 SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
1588 /* Double-check, if we can, that the shadowed page belongs to this
1589 * domain, (by following the back-pointer). */
1590 ASSERT(t == SH_type_fl1_32_shadow ||
1591 t == SH_type_fl1_pae_shadow ||
1592 t == SH_type_fl1_64_shadow ||
1593 t == SH_type_monitor_table ||
1594 (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
1595 (page_get_owner(mfn_to_page(_mfn(sp->backpointer)))
1596 == v->domain));
1598 /* The down-shifts here are so that the switch statement is on nice
1599 * small numbers that the compiler will enjoy */
1600 switch ( t )
1602 #if CONFIG_PAGING_LEVELS == 2
1603 case SH_type_l1_32_shadow:
1604 case SH_type_fl1_32_shadow:
1605 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn);
1606 break;
1607 case SH_type_l2_32_shadow:
1608 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
1609 break;
1610 #else /* PAE or 64bit */
1611 case SH_type_l1_32_shadow:
1612 case SH_type_fl1_32_shadow:
1613 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
1614 break;
1615 case SH_type_l2_32_shadow:
1616 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
1617 break;
1618 #endif
1620 #if CONFIG_PAGING_LEVELS >= 3
1621 case SH_type_l1_pae_shadow:
1622 case SH_type_fl1_pae_shadow:
1623 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
1624 break;
1625 case SH_type_l2_pae_shadow:
1626 case SH_type_l2h_pae_shadow:
1627 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
1628 break;
1629 #endif
1631 #if CONFIG_PAGING_LEVELS >= 4
1632 case SH_type_l1_64_shadow:
1633 case SH_type_fl1_64_shadow:
1634 SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
1635 break;
1636 case SH_type_l2h_64_shadow:
1637 ASSERT(is_pv_32on64_vcpu(v));
1638 /* Fall through... */
1639 case SH_type_l2_64_shadow:
1640 SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
1641 break;
1642 case SH_type_l3_64_shadow:
1643 SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
1644 break;
1645 case SH_type_l4_64_shadow:
1646 SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
1647 break;
1648 #endif
1649 default:
1650 SHADOW_ERROR("tried to destroy shadow of bad type %08lx\n",
1651 (unsigned long)t);
1652 BUG();
1656 /**************************************************************************/
1657 /* Remove all writeable mappings of a guest frame from the shadow tables
1658 * Returns non-zero if we need to flush TLBs.
1659 * level and fault_addr desribe how we found this to be a pagetable;
1660 * level==0 means we have some other reason for revoking write access.*/
1662 int sh_remove_write_access(struct vcpu *v, mfn_t gmfn,
1663 unsigned int level,
1664 unsigned long fault_addr)
1666 /* Dispatch table for getting per-type functions */
1667 static hash_callback_t callbacks[SH_type_unused] = {
1668 NULL, /* none */
1669 #if CONFIG_PAGING_LEVELS == 2
1670 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* l1_32 */
1671 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,2,2), /* fl1_32 */
1672 #else
1673 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* l1_32 */
1674 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,2), /* fl1_32 */
1675 #endif
1676 NULL, /* l2_32 */
1677 #if CONFIG_PAGING_LEVELS >= 3
1678 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* l1_pae */
1679 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,3,3), /* fl1_pae */
1680 #else
1681 NULL, /* l1_pae */
1682 NULL, /* fl1_pae */
1683 #endif
1684 NULL, /* l2_pae */
1685 NULL, /* l2h_pae */
1686 #if CONFIG_PAGING_LEVELS >= 4
1687 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* l1_64 */
1688 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1,4,4), /* fl1_64 */
1689 #else
1690 NULL, /* l1_64 */
1691 NULL, /* fl1_64 */
1692 #endif
1693 NULL, /* l2_64 */
1694 NULL, /* l2h_64 */
1695 NULL, /* l3_64 */
1696 NULL, /* l4_64 */
1697 NULL, /* p2m */
1698 NULL /* unused */
1699 };
1701 static unsigned int callback_mask =
1702 1 << SH_type_l1_32_shadow
1703 | 1 << SH_type_fl1_32_shadow
1704 | 1 << SH_type_l1_pae_shadow
1705 | 1 << SH_type_fl1_pae_shadow
1706 | 1 << SH_type_l1_64_shadow
1707 | 1 << SH_type_fl1_64_shadow
1709 struct page_info *pg = mfn_to_page(gmfn);
1711 ASSERT(shadow_locked_by_me(v->domain));
1713 /* Only remove writable mappings if we are doing shadow refcounts.
1714 * In guest refcounting, we trust Xen to already be restricting
1715 * all the writes to the guest page tables, so we do not need to
1716 * do more. */
1717 if ( !shadow_mode_refcounts(v->domain) )
1718 return 0;
1720 /* Early exit if it's already a pagetable, or otherwise not writeable */
1721 if ( sh_mfn_is_a_page_table(gmfn)
1722 || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1723 return 0;
1725 perfc_incr(shadow_writeable);
1727 /* If this isn't a "normal" writeable page, the domain is trying to
1728 * put pagetables in special memory of some kind. We can't allow that. */
1729 if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
1731 SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %"
1732 PRtype_info "\n",
1733 mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
1734 domain_crash(v->domain);
1737 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
1738 if ( v == current && level != 0 )
1740 unsigned long gfn;
1741 /* Heuristic: there is likely to be only one writeable mapping,
1742 * and that mapping is likely to be in the current pagetable,
1743 * in the guest's linear map (on non-HIGHPTE linux and windows)*/
1745 #define GUESS(_a, _h) do { \
1746 if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
1747 perfc_incr(shadow_writeable_h_ ## _h); \
1748 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \
1749 return 1; \
1750 } while (0)
1753 if ( v->arch.paging.mode->guest_levels == 2 )
1755 if ( level == 1 )
1756 /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
1757 GUESS(0xC0000000UL + (fault_addr >> 10), 1);
1759 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1760 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1761 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1764 #if CONFIG_PAGING_LEVELS >= 3
1765 else if ( v->arch.paging.mode->guest_levels == 3 )
1767 /* 32bit PAE w2k3: linear map at 0xC0000000 */
1768 switch ( level )
1770 case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
1771 case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
1774 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */
1775 if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 )
1776 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4);
1778 #if CONFIG_PAGING_LEVELS >= 4
1779 else if ( v->arch.paging.mode->guest_levels == 4 )
1781 /* 64bit w2k3: linear map at 0xfffff68000000000 */
1782 switch ( level )
1784 case 1: GUESS(0xfffff68000000000UL
1785 + ((fault_addr & VADDR_MASK) >> 9), 3); break;
1786 case 2: GUESS(0xfffff6fb40000000UL
1787 + ((fault_addr & VADDR_MASK) >> 18), 3); break;
1788 case 3: GUESS(0xfffff6fb7da00000UL
1789 + ((fault_addr & VADDR_MASK) >> 27), 3); break;
1792 /* 64bit Linux direct map at 0xffff810000000000; older kernels
1793 * had it at 0x0000010000000000UL */
1794 gfn = mfn_to_gfn(v->domain, gmfn);
1795 GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4);
1796 GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4);
1798 #endif /* CONFIG_PAGING_LEVELS >= 4 */
1799 #endif /* CONFIG_PAGING_LEVELS >= 3 */
1801 #undef GUESS
1804 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1805 return 1;
1807 /* Second heuristic: on HIGHPTE linux, there are two particular PTEs
1808 * (entries in the fixmap) where linux maps its pagetables. Since
1809 * we expect to hit them most of the time, we start the search for
1810 * the writeable mapping by looking at the same MFN where the last
1811 * brute-force search succeeded. */
1813 if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 )
1815 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
1816 mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
1817 int shtype = mfn_to_shadow_page(last_smfn)->type;
1819 if ( callbacks[shtype] )
1820 callbacks[shtype](v, last_smfn, gmfn);
1822 if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count )
1823 perfc_incr(shadow_writeable_h_5);
1826 if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )
1827 return 1;
1829 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
1831 /* Brute-force search of all the shadows, by walking the hash */
1832 perfc_incr(shadow_writeable_bf);
1833 hash_foreach(v, callback_mask, callbacks, gmfn);
1835 /* If that didn't catch the mapping, then there's some non-pagetable
1836 * mapping -- ioreq page, grant mapping, &c. */
1837 if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
1839 SHADOW_ERROR("can't remove write access to mfn %lx: guest has "
1840 "%lu special-use mappings of it\n", mfn_x(gmfn),
1841 (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
1842 domain_crash(v->domain);
1845 /* We killed at least one writeable mapping, so must flush TLBs. */
1846 return 1;
1851 /**************************************************************************/
1852 /* Remove all mappings of a guest frame from the shadow tables.
1853 * Returns non-zero if we need to flush TLBs. */
1855 int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
1857 struct page_info *page = mfn_to_page(gmfn);
1858 int expected_count, do_locking;
1860 /* Dispatch table for getting per-type functions */
1861 static hash_callback_t callbacks[SH_type_unused] = {
1862 NULL, /* none */
1863 #if CONFIG_PAGING_LEVELS == 2
1864 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* l1_32 */
1865 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,2,2), /* fl1_32 */
1866 #else
1867 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* l1_32 */
1868 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,2), /* fl1_32 */
1869 #endif
1870 NULL, /* l2_32 */
1871 #if CONFIG_PAGING_LEVELS >= 3
1872 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* l1_pae */
1873 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,3,3), /* fl1_pae */
1874 #else
1875 NULL, /* l1_pae */
1876 NULL, /* fl1_pae */
1877 #endif
1878 NULL, /* l2_pae */
1879 NULL, /* l2h_pae */
1880 #if CONFIG_PAGING_LEVELS >= 4
1881 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* l1_64 */
1882 SHADOW_INTERNAL_NAME(sh_rm_mappings_from_l1,4,4), /* fl1_64 */
1883 #else
1884 NULL, /* l1_64 */
1885 NULL, /* fl1_64 */
1886 #endif
1887 NULL, /* l2_64 */
1888 NULL, /* l2h_64 */
1889 NULL, /* l3_64 */
1890 NULL, /* l4_64 */
1891 NULL, /* p2m */
1892 NULL /* unused */
1893 };
1895 static unsigned int callback_mask =
1896 1 << SH_type_l1_32_shadow
1897 | 1 << SH_type_fl1_32_shadow
1898 | 1 << SH_type_l1_pae_shadow
1899 | 1 << SH_type_fl1_pae_shadow
1900 | 1 << SH_type_l1_64_shadow
1901 | 1 << SH_type_fl1_64_shadow
1904 perfc_incr(shadow_mappings);
1905 if ( (page->count_info & PGC_count_mask) == 0 )
1906 return 0;
1908 /* Although this is an externally visible function, we do not know
1909 * whether the shadow lock will be held when it is called (since it
1910 * can be called via put_page_type when we clear a shadow l1e).
1911 * If the lock isn't held, take it for the duration of the call. */
1912 do_locking = !shadow_locked_by_me(v->domain);
1913 if ( do_locking ) shadow_lock(v->domain);
1915 /* XXX TODO:
1916 * Heuristics for finding the (probably) single mapping of this gmfn */
1918 /* Brute-force search of all the shadows, by walking the hash */
1919 perfc_incr(shadow_mappings_bf);
1920 hash_foreach(v, callback_mask, callbacks, gmfn);
1922 /* If that didn't catch the mapping, something is very wrong */
1923 expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
1924 if ( (page->count_info & PGC_count_mask) != expected_count )
1926 /* Don't complain if we're in HVM and there are some extra mappings:
1927 * The qemu helper process has an untyped mapping of this dom's RAM
1928 * and the HVM restore program takes another. */
1929 if ( !(shadow_mode_external(v->domain)
1930 && (page->count_info & PGC_count_mask) <= 3
1931 && (page->u.inuse.type_info & PGT_count_mask) == 0) )
1933 SHADOW_ERROR("can't find all mappings of mfn %lx: "
1934 "c=%08x t=%08lx\n", mfn_x(gmfn),
1935 page->count_info, page->u.inuse.type_info);
1939 if ( do_locking ) shadow_unlock(v->domain);
1941 /* We killed at least one mapping, so must flush TLBs. */
1942 return 1;
1946 /**************************************************************************/
1947 /* Remove all shadows of a guest frame from the shadow tables */
1949 static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
1950 /* Follow this shadow's up-pointer, if it has one, and remove the reference
1951 * found there. Returns 1 if that was the only reference to this shadow */
1953 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
1954 mfn_t pmfn;
1955 void *vaddr;
1956 int rc;
1958 ASSERT(sp->type > 0);
1959 ASSERT(sp->type < SH_type_max_shadow);
1960 ASSERT(sp->type != SH_type_l2_32_shadow);
1961 ASSERT(sp->type != SH_type_l2_pae_shadow);
1962 ASSERT(sp->type != SH_type_l2h_pae_shadow);
1963 ASSERT(sp->type != SH_type_l4_64_shadow);
1965 if (sp->up == 0) return 0;
1966 pmfn = _mfn(sp->up >> PAGE_SHIFT);
1967 ASSERT(mfn_valid(pmfn));
1968 vaddr = sh_map_domain_page(pmfn);
1969 ASSERT(vaddr);
1970 vaddr += sp->up & (PAGE_SIZE-1);
1971 ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
1973 /* Is this the only reference to this shadow? */
1974 rc = (sp->count == 1) ? 1 : 0;
1976 /* Blank the offending entry */
1977 switch (sp->type)
1979 case SH_type_l1_32_shadow:
1980 case SH_type_l2_32_shadow:
1981 #if CONFIG_PAGING_LEVELS == 2
1982 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
1983 #else
1984 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
1985 #endif
1986 break;
1987 #if CONFIG_PAGING_LEVELS >=3
1988 case SH_type_l1_pae_shadow:
1989 case SH_type_l2_pae_shadow:
1990 case SH_type_l2h_pae_shadow:
1991 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
1992 break;
1993 #if CONFIG_PAGING_LEVELS >= 4
1994 case SH_type_l1_64_shadow:
1995 case SH_type_l2_64_shadow:
1996 case SH_type_l2h_64_shadow:
1997 case SH_type_l3_64_shadow:
1998 case SH_type_l4_64_shadow:
1999 SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
2000 break;
2001 #endif
2002 #endif
2003 default: BUG(); /* Some wierd unknown shadow type */
2006 sh_unmap_domain_page(vaddr);
2007 if ( rc )
2008 perfc_incr(shadow_up_pointer);
2009 else
2010 perfc_incr(shadow_unshadow_bf);
2012 return rc;
2015 void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
2016 /* Remove the shadows of this guest page.
2017 * If fast != 0, just try the quick heuristic, which will remove
2018 * at most one reference to each shadow of the page. Otherwise, walk
2019 * all the shadow tables looking for refs to shadows of this gmfn.
2020 * If all != 0, kill the domain if we can't find all the shadows.
2021 * (all != 0 implies fast == 0)
2022 */
2024 struct page_info *pg = mfn_to_page(gmfn);
2025 mfn_t smfn;
2026 int do_locking;
2027 unsigned char t;
2029 /* Dispatch table for getting per-type functions: each level must
2030 * be called with the function to remove a lower-level shadow. */
2031 static hash_callback_t callbacks[SH_type_unused] = {
2032 NULL, /* none */
2033 NULL, /* l1_32 */
2034 NULL, /* fl1_32 */
2035 #if CONFIG_PAGING_LEVELS == 2
2036 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32 */
2037 #else
2038 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32 */
2039 #endif
2040 NULL, /* l1_pae */
2041 NULL, /* fl1_pae */
2042 #if CONFIG_PAGING_LEVELS >= 3
2043 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */
2044 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
2045 #else
2046 NULL, /* l2_pae */
2047 NULL, /* l2h_pae */
2048 #endif
2049 NULL, /* l1_64 */
2050 NULL, /* fl1_64 */
2051 #if CONFIG_PAGING_LEVELS >= 4
2052 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64 */
2053 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2h_64 */
2054 SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64 */
2055 SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64 */
2056 #else
2057 NULL, /* l2_64 */
2058 NULL, /* l2h_64 */
2059 NULL, /* l3_64 */
2060 NULL, /* l4_64 */
2061 #endif
2062 NULL, /* p2m */
2063 NULL /* unused */
2064 };
2066 /* Another lookup table, for choosing which mask to use */
2067 static unsigned int masks[SH_type_unused] = {
2068 0, /* none */
2069 1 << SH_type_l2_32_shadow, /* l1_32 */
2070 0, /* fl1_32 */
2071 0, /* l2_32 */
2072 ((1 << SH_type_l2h_pae_shadow)
2073 | (1 << SH_type_l2_pae_shadow)), /* l1_pae */
2074 0, /* fl1_pae */
2075 0, /* l2_pae */
2076 0, /* l2h_pae */
2077 ((1 << SH_type_l2h_64_shadow)
2078 | (1 << SH_type_l2_64_shadow)), /* l1_64 */
2079 0, /* fl1_64 */
2080 1 << SH_type_l3_64_shadow, /* l2_64 */
2081 1 << SH_type_l3_64_shadow, /* l2h_64 */
2082 1 << SH_type_l4_64_shadow, /* l3_64 */
2083 0, /* l4_64 */
2084 0, /* p2m */
2085 0 /* unused */
2086 };
2088 ASSERT(!(all && fast));
2090 /* Although this is an externally visible function, we do not know
2091 * whether the shadow lock will be held when it is called (since it
2092 * can be called via put_page_type when we clear a shadow l1e).
2093 * If the lock isn't held, take it for the duration of the call. */
2094 do_locking = !shadow_locked_by_me(v->domain);
2095 if ( do_locking ) shadow_lock(v->domain);
2097 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
2098 v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
2100 /* Bail out now if the page is not shadowed */
2101 if ( (pg->count_info & PGC_page_table) == 0 )
2103 if ( do_locking ) shadow_unlock(v->domain);
2104 return;
2107 /* Search for this shadow in all appropriate shadows */
2108 perfc_incr(shadow_unshadow);
2110 /* Lower-level shadows need to be excised from upper-level shadows.
2111 * This call to hash_foreach() looks dangerous but is in fact OK: each
2112 * call will remove at most one shadow, and terminate immediately when
2113 * it does remove it, so we never walk the hash after doing a deletion. */
2114 #define DO_UNSHADOW(_type) do { \
2115 t = (_type); \
2116 if( !(pg->count_info & PGC_page_table) \
2117 || !(pg->shadow_flags & (1 << t)) ) \
2118 break; \
2119 smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \
2120 if ( unlikely(!mfn_valid(smfn)) ) \
2121 { \
2122 SHADOW_ERROR(": gmfn %#lx has flags 0x%"PRIx32 \
2123 " but no type-0x%"PRIx32" shadow\n", \
2124 mfn_x(gmfn), (uint32_t)pg->shadow_flags, t); \
2125 break; \
2126 } \
2127 if ( sh_type_is_pinnable(v, t) ) \
2128 sh_unpin(v, smfn); \
2129 else \
2130 sh_remove_shadow_via_pointer(v, smfn); \
2131 if( !fast \
2132 && (pg->count_info & PGC_page_table) \
2133 && (pg->shadow_flags & (1 << t)) ) \
2134 hash_foreach(v, masks[t], callbacks, smfn); \
2135 } while (0)
2137 DO_UNSHADOW(SH_type_l2_32_shadow);
2138 DO_UNSHADOW(SH_type_l1_32_shadow);
2139 #if CONFIG_PAGING_LEVELS >= 3
2140 DO_UNSHADOW(SH_type_l2h_pae_shadow);
2141 DO_UNSHADOW(SH_type_l2_pae_shadow);
2142 DO_UNSHADOW(SH_type_l1_pae_shadow);
2143 #if CONFIG_PAGING_LEVELS >= 4
2144 DO_UNSHADOW(SH_type_l4_64_shadow);
2145 DO_UNSHADOW(SH_type_l3_64_shadow);
2146 DO_UNSHADOW(SH_type_l2h_64_shadow);
2147 DO_UNSHADOW(SH_type_l2_64_shadow);
2148 DO_UNSHADOW(SH_type_l1_64_shadow);
2149 #endif
2150 #endif
2152 #undef DO_UNSHADOW
2154 /* If that didn't catch the shadows, something is wrong */
2155 if ( !fast && all && (pg->count_info & PGC_page_table) )
2157 SHADOW_ERROR("can't find all shadows of mfn %05lx "
2158 "(shadow_flags=%08lx)\n",
2159 mfn_x(gmfn), pg->shadow_flags);
2160 domain_crash(v->domain);
2163 /* Need to flush TLBs now, so that linear maps are safe next time we
2164 * take a fault. */
2165 flush_tlb_mask(v->domain->domain_dirty_cpumask);
2167 if ( do_locking ) shadow_unlock(v->domain);
2170 static void
2171 sh_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
2172 /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
2173 * Unshadow it, and recursively unshadow pages that reference it. */
2175 sh_remove_shadows(v, gmfn, 0, 1);
2176 /* XXX TODO:
2177 * Rework this hashtable walker to return a linked-list of all
2178 * the shadows it modified, then do breadth-first recursion
2179 * to find the way up to higher-level tables and unshadow them too.
2181 * The current code (just tearing down each page's shadows as we
2182 * detect that it is not a pagetable) is correct, but very slow.
2183 * It means extra emulated writes and slows down removal of mappings. */
2186 /**************************************************************************/
2188 static void sh_update_paging_modes(struct vcpu *v)
2190 struct domain *d = v->domain;
2191 struct paging_mode *old_mode = v->arch.paging.mode;
2193 ASSERT(shadow_locked_by_me(d));
2195 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
2196 /* Make sure this vcpu has a virtual TLB array allocated */
2197 if ( unlikely(!v->arch.paging.vtlb) )
2199 v->arch.paging.vtlb = xmalloc_array(struct shadow_vtlb, VTLB_ENTRIES);
2200 if ( unlikely(!v->arch.paging.vtlb) )
2202 SHADOW_ERROR("Could not allocate vTLB space for dom %u vcpu %u\n",
2203 d->domain_id, v->vcpu_id);
2204 domain_crash(v->domain);
2205 return;
2207 memset(v->arch.paging.vtlb, 0,
2208 VTLB_ENTRIES * sizeof (struct shadow_vtlb));
2209 spin_lock_init(&v->arch.paging.vtlb_lock);
2211 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
2213 // Valid transitions handled by this function:
2214 // - For PV guests:
2215 // - after a shadow mode has been changed
2216 // - For HVM guests:
2217 // - after a shadow mode has been changed
2218 // - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
2219 //
2221 // First, tear down any old shadow tables held by this vcpu.
2222 //
2223 if ( v->arch.paging.mode )
2224 v->arch.paging.mode->shadow.detach_old_tables(v);
2226 if ( !is_hvm_domain(d) )
2228 ///
2229 /// PV guest
2230 ///
2231 #if CONFIG_PAGING_LEVELS == 4
2232 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
2233 #elif CONFIG_PAGING_LEVELS == 3
2234 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
2235 #elif CONFIG_PAGING_LEVELS == 2
2236 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
2237 #else
2238 #error unexpected paging mode
2239 #endif
2241 else
2243 ///
2244 /// HVM guest
2245 ///
2246 ASSERT(shadow_mode_translate(d));
2247 ASSERT(shadow_mode_external(d));
2249 if ( !hvm_paging_enabled(v) )
2251 /* When the guest has CR0.PG clear, we provide a 32-bit, non-PAE
2252 * pagetable for it, mapping 4 GB one-to-one using a single l2
2253 * page of 1024 superpage mappings */
2254 v->arch.guest_table = d->arch.paging.shadow.unpaged_pagetable;
2255 #if CONFIG_PAGING_LEVELS >= 3
2256 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
2257 #else
2258 v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
2259 #endif
2261 else
2263 #ifdef __x86_64__
2264 if ( hvm_long_mode_enabled(v) )
2266 // long mode guest...
2267 v->arch.paging.mode =
2268 &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
2270 else
2271 #endif
2272 if ( hvm_pae_enabled(v) )
2274 #if CONFIG_PAGING_LEVELS >= 3
2275 // 32-bit PAE mode guest...
2276 v->arch.paging.mode =
2277 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
2278 #else
2279 SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
2280 domain_crash(d);
2281 return;
2282 #endif
2284 else
2286 // 32-bit 2 level guest...
2287 #if CONFIG_PAGING_LEVELS >= 3
2288 v->arch.paging.mode =
2289 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
2290 #else
2291 v->arch.paging.mode =
2292 &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
2293 #endif
2297 if ( pagetable_is_null(v->arch.monitor_table) )
2299 mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2300 v->arch.monitor_table = pagetable_from_mfn(mmfn);
2301 make_cr3(v, mfn_x(mmfn));
2302 hvm_update_host_cr3(v);
2305 if ( v->arch.paging.mode != old_mode )
2307 SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u "
2308 "(was g=%u s=%u)\n",
2309 d->domain_id, v->vcpu_id,
2310 is_hvm_domain(d) ? hvm_paging_enabled(v) : 1,
2311 v->arch.paging.mode->guest_levels,
2312 v->arch.paging.mode->shadow.shadow_levels,
2313 old_mode ? old_mode->guest_levels : 0,
2314 old_mode ? old_mode->shadow.shadow_levels : 0);
2315 if ( old_mode &&
2316 (v->arch.paging.mode->shadow.shadow_levels !=
2317 old_mode->shadow.shadow_levels) )
2319 /* Need to make a new monitor table for the new mode */
2320 mfn_t new_mfn, old_mfn;
2322 if ( v != current && vcpu_runnable(v) )
2324 SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
2325 "this HVM vcpu's (d=%u v=%u) paging mode "
2326 "while it is running.\n",
2327 current->domain->domain_id, current->vcpu_id,
2328 v->domain->domain_id, v->vcpu_id);
2329 /* It's not safe to do that because we can't change
2330 * the host CR£ for a running domain */
2331 domain_crash(v->domain);
2332 return;
2335 old_mfn = pagetable_get_mfn(v->arch.monitor_table);
2336 v->arch.monitor_table = pagetable_null();
2337 new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v);
2338 v->arch.monitor_table = pagetable_from_mfn(new_mfn);
2339 SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
2340 mfn_x(new_mfn));
2342 /* Don't be running on the old monitor table when we
2343 * pull it down! Switch CR3, and warn the HVM code that
2344 * its host cr3 has changed. */
2345 make_cr3(v, mfn_x(new_mfn));
2346 if ( v == current )
2347 write_ptbase(v);
2348 hvm_update_host_cr3(v);
2349 old_mode->shadow.destroy_monitor_table(v, old_mfn);
2353 // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
2354 // These are HARD: think about the case where two CPU's have
2355 // different values for CR4.PSE and CR4.PGE at the same time.
2356 // This *does* happen, at least for CR4.PGE...
2359 v->arch.paging.mode->update_cr3(v, 0);
2362 void shadow_update_paging_modes(struct vcpu *v)
2364 shadow_lock(v->domain);
2365 sh_update_paging_modes(v);
2366 shadow_unlock(v->domain);
2369 /**************************************************************************/
2370 /* Turning on and off shadow features */
2372 static void sh_new_mode(struct domain *d, u32 new_mode)
2373 /* Inform all the vcpus that the shadow mode has been changed */
2375 struct vcpu *v;
2377 ASSERT(shadow_locked_by_me(d));
2378 ASSERT(d != current->domain);
2379 d->arch.paging.mode = new_mode;
2380 for_each_vcpu(d, v)
2381 sh_update_paging_modes(v);
2384 int shadow_enable(struct domain *d, u32 mode)
2385 /* Turn on "permanent" shadow features: external, translate, refcount.
2386 * Can only be called once on a domain, and these features cannot be
2387 * disabled.
2388 * Returns 0 for success, -errno for failure. */
2390 unsigned int old_pages;
2391 struct page_info *pg = NULL;
2392 uint32_t *e;
2393 int i, rv = 0;
2395 mode |= PG_SH_enable;
2397 domain_pause(d);
2399 /* Sanity check the arguments */
2400 if ( (d == current->domain) ||
2401 shadow_mode_enabled(d) ||
2402 ((mode & PG_translate) && !(mode & PG_refcounts)) ||
2403 ((mode & PG_external) && !(mode & PG_translate)) )
2405 rv = -EINVAL;
2406 goto out_unlocked;
2409 /* Init the shadow memory allocation if the user hasn't done so */
2410 old_pages = d->arch.paging.shadow.total_pages;
2411 if ( old_pages == 0 )
2413 unsigned int r;
2414 shadow_lock(d);
2415 r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */
2416 if ( r != 0 )
2418 sh_set_allocation(d, 0, NULL);
2419 rv = -ENOMEM;
2420 goto out_locked;
2422 shadow_unlock(d);
2425 /* Init the P2M table. Must be done before we take the shadow lock
2426 * to avoid possible deadlock. */
2427 if ( mode & PG_translate )
2429 rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page);
2430 if (rv != 0)
2431 goto out_unlocked;
2434 /* HVM domains need an extra pagetable for vcpus that think they
2435 * have paging disabled */
2436 if ( is_hvm_domain(d) )
2438 /* Get a single page from the shadow pool. Take it via the
2439 * P2M interface to make freeing it simpler afterwards. */
2440 pg = shadow_alloc_p2m_page(d);
2441 if ( pg == NULL )
2443 rv = -ENOMEM;
2444 goto out_unlocked;
2446 /* Fill it with 32-bit, non-PAE superpage entries, each mapping 4MB
2447 * of virtual address space onto the same physical address range */
2448 e = sh_map_domain_page(page_to_mfn(pg));
2449 for ( i = 0; i < PAGE_SIZE / sizeof(*e); i++ )
2450 e[i] = ((0x400000U * i)
2451 | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER
2452 | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
2453 sh_unmap_domain_page(e);
2454 pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
2457 shadow_lock(d);
2459 /* Sanity check again with the lock held */
2460 if ( shadow_mode_enabled(d) )
2462 rv = -EINVAL;
2463 goto out_locked;
2466 /* Init the hash table */
2467 if ( shadow_hash_alloc(d) != 0 )
2469 rv = -ENOMEM;
2470 goto out_locked;
2473 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2474 /* We assume we're dealing with an older 64bit linux guest until we
2475 * see the guest use more than one l4 per vcpu. */
2476 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2477 #endif
2479 /* Record the 1-to-1 pagetable we just made */
2480 if ( is_hvm_domain(d) )
2481 d->arch.paging.shadow.unpaged_pagetable = pagetable_from_page(pg);
2483 /* Update the bits */
2484 sh_new_mode(d, mode);
2486 out_locked:
2487 shadow_unlock(d);
2488 out_unlocked:
2489 if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) )
2490 p2m_teardown(d);
2491 if ( rv != 0 && pg != NULL )
2492 shadow_free_p2m_page(d, pg);
2493 domain_unpause(d);
2494 return rv;
2497 void shadow_teardown(struct domain *d)
2498 /* Destroy the shadow pagetables of this domain and free its shadow memory.
2499 * Should only be called for dying domains. */
2501 struct vcpu *v;
2502 mfn_t mfn;
2503 struct list_head *entry, *n;
2504 struct page_info *pg;
2506 ASSERT(d->is_dying);
2507 ASSERT(d != current->domain);
2509 if ( !shadow_locked_by_me(d) )
2510 shadow_lock(d); /* Keep various asserts happy */
2512 if ( shadow_mode_enabled(d) )
2514 /* Release the shadow and monitor tables held by each vcpu */
2515 for_each_vcpu(d, v)
2517 if ( v->arch.paging.mode )
2519 v->arch.paging.mode->shadow.detach_old_tables(v);
2520 if ( shadow_mode_external(d) )
2522 mfn = pagetable_get_mfn(v->arch.monitor_table);
2523 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
2524 v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn);
2525 v->arch.monitor_table = pagetable_null();
2531 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
2532 /* Free the virtual-TLB array attached to each vcpu */
2533 for_each_vcpu(d, v)
2535 if ( v->arch.paging.vtlb )
2537 xfree(v->arch.paging.vtlb);
2538 v->arch.paging.vtlb = NULL;
2541 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
2543 list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
2545 list_del(entry);
2546 pg = list_entry(entry, struct page_info, list);
2547 shadow_free_p2m_page(d, pg);
2550 if ( d->arch.paging.shadow.total_pages != 0 )
2552 SHADOW_PRINTK("teardown of domain %u starts."
2553 " Shadow pages total = %u, free = %u, p2m=%u\n",
2554 d->domain_id,
2555 d->arch.paging.shadow.total_pages,
2556 d->arch.paging.shadow.free_pages,
2557 d->arch.paging.shadow.p2m_pages);
2558 /* Destroy all the shadows and release memory to domheap */
2559 sh_set_allocation(d, 0, NULL);
2560 /* Release the hash table back to xenheap */
2561 if (d->arch.paging.shadow.hash_table)
2562 shadow_hash_teardown(d);
2563 /* Should not have any more memory held */
2564 SHADOW_PRINTK("teardown done."
2565 " Shadow pages total = %u, free = %u, p2m=%u\n",
2566 d->arch.paging.shadow.total_pages,
2567 d->arch.paging.shadow.free_pages,
2568 d->arch.paging.shadow.p2m_pages);
2569 ASSERT(d->arch.paging.shadow.total_pages == 0);
2572 /* Free the non-paged-vcpus pagetable; must happen after we've
2573 * destroyed any shadows of it or sh_destroy_shadow will get confused. */
2574 if ( !pagetable_is_null(d->arch.paging.shadow.unpaged_pagetable) )
2576 for_each_vcpu(d, v)
2578 ASSERT(is_hvm_vcpu(v));
2579 if ( !hvm_paging_enabled(v) )
2580 v->arch.guest_table = pagetable_null();
2582 shadow_free_p2m_page(d,
2583 pagetable_get_page(d->arch.paging.shadow.unpaged_pagetable));
2584 d->arch.paging.shadow.unpaged_pagetable = pagetable_null();
2587 /* We leave the "permanent" shadow modes enabled, but clear the
2588 * log-dirty mode bit. We don't want any more mark_dirty()
2589 * calls now that we've torn down the bitmap */
2590 d->arch.paging.mode &= ~PG_log_dirty;
2592 if (d->dirty_vram) {
2593 xfree(d->dirty_vram->sl1ma);
2594 xfree(d->dirty_vram->dirty_bitmap);
2595 xfree(d->dirty_vram);
2596 d->dirty_vram = NULL;
2599 shadow_unlock(d);
2602 void shadow_final_teardown(struct domain *d)
2603 /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
2605 SHADOW_PRINTK("dom %u final teardown starts."
2606 " Shadow pages total = %u, free = %u, p2m=%u\n",
2607 d->domain_id,
2608 d->arch.paging.shadow.total_pages,
2609 d->arch.paging.shadow.free_pages,
2610 d->arch.paging.shadow.p2m_pages);
2612 /* Double-check that the domain didn't have any shadow memory.
2613 * It is possible for a domain that never got domain_kill()ed
2614 * to get here with its shadow allocation intact. */
2615 if ( d->arch.paging.shadow.total_pages != 0 )
2616 shadow_teardown(d);
2618 /* It is now safe to pull down the p2m map. */
2619 p2m_teardown(d);
2621 SHADOW_PRINTK("dom %u final teardown done."
2622 " Shadow pages total = %u, free = %u, p2m=%u\n",
2623 d->domain_id,
2624 d->arch.paging.shadow.total_pages,
2625 d->arch.paging.shadow.free_pages,
2626 d->arch.paging.shadow.p2m_pages);
2629 static int shadow_one_bit_enable(struct domain *d, u32 mode)
2630 /* Turn on a single shadow mode feature */
2632 ASSERT(shadow_locked_by_me(d));
2634 /* Sanity check the call */
2635 if ( d == current->domain || (d->arch.paging.mode & mode) == mode )
2637 return -EINVAL;
2640 mode |= PG_SH_enable;
2642 if ( d->arch.paging.mode == 0 )
2644 /* Init the shadow memory allocation and the hash table */
2645 if ( sh_set_allocation(d, 1, NULL) != 0
2646 || shadow_hash_alloc(d) != 0 )
2648 sh_set_allocation(d, 0, NULL);
2649 return -ENOMEM;
2653 /* Update the bits */
2654 sh_new_mode(d, d->arch.paging.mode | mode);
2656 return 0;
2659 static int shadow_one_bit_disable(struct domain *d, u32 mode)
2660 /* Turn off a single shadow mode feature */
2662 struct vcpu *v;
2663 ASSERT(shadow_locked_by_me(d));
2665 /* Sanity check the call */
2666 if ( d == current->domain || !((d->arch.paging.mode & mode) == mode) )
2668 return -EINVAL;
2671 /* Update the bits */
2672 sh_new_mode(d, d->arch.paging.mode & ~mode);
2673 if ( d->arch.paging.mode == 0 )
2675 /* Get this domain off shadows */
2676 SHADOW_PRINTK("un-shadowing of domain %u starts."
2677 " Shadow pages total = %u, free = %u, p2m=%u\n",
2678 d->domain_id,
2679 d->arch.paging.shadow.total_pages,
2680 d->arch.paging.shadow.free_pages,
2681 d->arch.paging.shadow.p2m_pages);
2682 for_each_vcpu(d, v)
2684 if ( v->arch.paging.mode )
2685 v->arch.paging.mode->shadow.detach_old_tables(v);
2686 #if CONFIG_PAGING_LEVELS == 4
2687 if ( !(v->arch.flags & TF_kernel_mode) )
2688 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
2689 else
2690 #endif
2691 make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
2695 /* Pull down the memory allocation */
2696 if ( sh_set_allocation(d, 0, NULL) != 0 )
2698 // XXX - How can this occur?
2699 // Seems like a bug to return an error now that we've
2700 // disabled the relevant shadow mode.
2701 //
2702 return -ENOMEM;
2704 shadow_hash_teardown(d);
2705 SHADOW_PRINTK("un-shadowing of domain %u done."
2706 " Shadow pages total = %u, free = %u, p2m=%u\n",
2707 d->domain_id,
2708 d->arch.paging.shadow.total_pages,
2709 d->arch.paging.shadow.free_pages,
2710 d->arch.paging.shadow.p2m_pages);
2713 return 0;
2716 /* Enable/disable ops for the "test" and "log-dirty" modes */
2717 static int shadow_test_enable(struct domain *d)
2719 int ret;
2721 domain_pause(d);
2722 shadow_lock(d);
2723 ret = shadow_one_bit_enable(d, PG_SH_enable);
2724 shadow_unlock(d);
2725 domain_unpause(d);
2727 return ret;
2730 static int shadow_test_disable(struct domain *d)
2732 int ret;
2734 domain_pause(d);
2735 shadow_lock(d);
2736 ret = shadow_one_bit_disable(d, PG_SH_enable);
2737 shadow_unlock(d);
2738 domain_unpause(d);
2740 return ret;
2743 /**************************************************************************/
2744 /* P2M map manipulations */
2746 /* shadow specific code which should be called when P2M table entry is updated
2747 * with new content. It is responsible for update the entry, as well as other
2748 * shadow processing jobs.
2749 */
2750 void
2751 shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn,
2752 l1_pgentry_t *p, mfn_t table_mfn,
2753 l1_pgentry_t new, unsigned int level)
2755 struct domain *d = v->domain;
2757 shadow_lock(d);
2759 /* If we're removing an MFN from the p2m, remove it from the shadows too */
2760 if ( level == 1 )
2762 mfn_t mfn = _mfn(l1e_get_pfn(*p));
2763 p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
2764 if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
2766 sh_remove_all_shadows_and_parents(v, mfn);
2767 if ( sh_remove_all_mappings(v, mfn) )
2768 flush_tlb_mask(d->domain_dirty_cpumask);
2772 /* Update the entry with new content */
2773 safe_write_pte(p, new);
2775 /* install P2M in monitors for PAE Xen */
2776 #if CONFIG_PAGING_LEVELS == 3
2777 if ( level == 3 )
2778 /* We have written to the p2m l3: need to sync the per-vcpu
2779 * copies of it in the monitor tables */
2780 p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p);
2781 #endif
2783 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
2784 /* If we're doing FAST_FAULT_PATH, then shadow mode may have
2785 cached the fact that this is an mmio region in the shadow
2786 page tables. Blow the tables away to remove the cache.
2787 This is pretty heavy handed, but this is a rare operation
2788 (it might happen a dozen times during boot and then never
2789 again), so it doesn't matter too much. */
2790 if ( d->arch.paging.shadow.has_fast_mmio_entries )
2792 shadow_blow_tables(d);
2793 d->arch.paging.shadow.has_fast_mmio_entries = 0;
2795 #endif
2797 shadow_unlock(d);
2800 /**************************************************************************/
2801 /* Log-dirty mode support */
2803 /* Shadow specific code which is called in paging_log_dirty_enable().
2804 * Return 0 if no problem found.
2805 */
2806 int shadow_enable_log_dirty(struct domain *d)
2808 int ret;
2810 /* shadow lock is required here */
2811 shadow_lock(d);
2812 if ( shadow_mode_enabled(d) )
2814 /* This domain already has some shadows: need to clear them out
2815 * of the way to make sure that all references to guest memory are
2816 * properly write-protected */
2817 shadow_blow_tables(d);
2820 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
2821 /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
2822 * change an l4e instead of cr3 to switch tables. Give them the
2823 * same optimization */
2824 if ( is_pv_32on64_domain(d) )
2825 d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
2826 #endif
2828 ret = shadow_one_bit_enable(d, PG_log_dirty);
2829 shadow_unlock(d);
2831 return ret;
2834 /* shadow specfic code which is called in paging_log_dirty_disable() */
2835 int shadow_disable_log_dirty(struct domain *d)
2837 int ret;
2839 /* shadow lock is required here */
2840 shadow_lock(d);
2841 ret = shadow_one_bit_disable(d, PG_log_dirty);
2842 shadow_unlock(d);
2844 return ret;
2847 /* This function is called when we CLEAN log dirty bitmap. See
2848 * paging_log_dirty_op() for details.
2849 */
2850 void shadow_clean_dirty_bitmap(struct domain *d)
2852 shadow_lock(d);
2853 /* Need to revoke write access to the domain's pages again.
2854 * In future, we'll have a less heavy-handed approach to this,
2855 * but for now, we just unshadow everything except Xen. */
2856 shadow_blow_tables(d);
2857 shadow_unlock(d);
2861 /**************************************************************************/
2862 /* VRAM dirty tracking support */
2863 int shadow_track_dirty_vram(struct domain *d,
2864 unsigned long begin_pfn,
2865 unsigned long nr,
2866 XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
2868 int rc;
2869 unsigned long end_pfn = begin_pfn + nr;
2870 unsigned long dirty_size = (nr + 7) / 8;
2871 int flush_tlb = 0;
2873 if (end_pfn < begin_pfn
2874 || begin_pfn > d->arch.p2m->max_mapped_pfn
2875 || end_pfn >= d->arch.p2m->max_mapped_pfn)
2876 return -EINVAL;
2878 shadow_lock(d);
2880 if ( d->dirty_vram && (!nr ||
2881 ( begin_pfn != d->dirty_vram->begin_pfn
2882 || end_pfn != d->dirty_vram->end_pfn )) ) {
2883 /* Different tracking, tear the previous down. */
2884 gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", d->dirty_vram->begin_pfn, d->dirty_vram->end_pfn);
2885 xfree(d->dirty_vram->sl1ma);
2886 xfree(d->dirty_vram->dirty_bitmap);
2887 xfree(d->dirty_vram);
2888 d->dirty_vram = NULL;
2891 if ( !nr ) {
2892 rc = 0;
2893 goto out;
2896 /* This should happen seldomly (Video mode change),
2897 * no need to be careful. */
2898 if ( !d->dirty_vram ) {
2899 unsigned long i;
2900 p2m_type_t t;
2902 /* Just recount from start. */
2903 for ( i = begin_pfn; i < end_pfn; i++ )
2904 flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t));
2906 gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
2908 rc = -ENOMEM;
2909 if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
2910 goto out;
2911 d->dirty_vram->begin_pfn = begin_pfn;
2912 d->dirty_vram->end_pfn = end_pfn;
2914 if ( (d->dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
2915 goto out_dirty_vram;
2916 memset(d->dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
2918 if ( (d->dirty_vram->dirty_bitmap = xmalloc_array(uint8_t, dirty_size)) == NULL )
2919 goto out_sl1ma;
2920 memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
2922 /* Tell the caller that this time we could not track dirty bits. */
2923 rc = -ENODATA;
2924 } else {
2925 int i;
2926 #ifdef __i386__
2927 unsigned long map_mfn = INVALID_MFN;
2928 void *map_sl1p = NULL;
2929 #endif
2931 /* Iterate over VRAM to track dirty bits. */
2932 for ( i = 0; i < nr; i++ ) {
2933 p2m_type_t t;
2934 mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
2935 struct page_info *page = mfn_to_page(mfn);
2936 u32 count_info = page->u.inuse.type_info & PGT_count_mask;
2937 int dirty = 0;
2938 paddr_t sl1ma = d->dirty_vram->sl1ma[i];
2940 switch (count_info) {
2941 case 0:
2942 /* No guest reference, nothing to track. */
2943 break;
2944 case 1:
2945 /* One guest reference. */
2946 if ( sl1ma == INVALID_PADDR ) {
2947 /* We don't know which sl1e points to this, too bad. */
2948 dirty = 1;
2949 /* TODO: Heuristics for finding the single mapping of
2950 * this gmfn */
2951 flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t));
2952 } else {
2953 /* Hopefully the most common case: only one mapping,
2954 * whose dirty bit we can use. */
2955 l1_pgentry_t *sl1e;
2956 #ifdef __i386__
2957 void *sl1p = map_sl1p;
2958 unsigned long sl1mfn = paddr_to_pfn(sl1ma);
2960 if ( sl1mfn != map_mfn ) {
2961 if ( map_sl1p )
2962 sh_unmap_domain_page(map_sl1p);
2963 map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
2964 map_mfn = sl1mfn;
2966 sl1e = sl1p + (sl1ma & ~PAGE_MASK);
2967 #else
2968 sl1e = maddr_to_virt(sl1ma);
2969 #endif
2971 if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) {
2972 dirty = 1;
2973 /* Note: this is atomic, so we may clear a
2974 * _PAGE_ACCESSED set by another processor. */
2975 l1e_remove_flags(*sl1e, _PAGE_DIRTY);
2976 flush_tlb = 1;
2979 break;
2980 default:
2981 /* More than one guest reference,
2982 * we don't afford tracking that. */
2983 dirty = 1;
2984 break;
2987 if ( dirty )
2988 d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
2991 #ifdef __i386__
2992 if ( map_sl1p )
2993 sh_unmap_domain_page(map_sl1p);
2994 #endif
2996 rc = -EFAULT;
2997 if ( copy_to_guest(dirty_bitmap, d->dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
2998 memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
2999 rc = 0;
3002 if ( flush_tlb )
3003 flush_tlb_mask(d->domain_dirty_cpumask);
3004 goto out;
3006 out_sl1ma:
3007 xfree(d->dirty_vram->sl1ma);
3008 out_dirty_vram:
3009 xfree(d->dirty_vram);
3010 d->dirty_vram = NULL;
3012 out:
3013 shadow_unlock(d);
3014 return rc;
3017 /**************************************************************************/
3018 /* Shadow-control XEN_DOMCTL dispatcher */
3020 int shadow_domctl(struct domain *d,
3021 xen_domctl_shadow_op_t *sc,
3022 XEN_GUEST_HANDLE(void) u_domctl)
3024 int rc, preempted = 0;
3026 switch ( sc->op )
3028 case XEN_DOMCTL_SHADOW_OP_OFF:
3029 if ( d->arch.paging.mode == PG_SH_enable )
3030 if ( (rc = shadow_test_disable(d)) != 0 )
3031 return rc;
3032 return 0;
3034 case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
3035 return shadow_test_enable(d);
3037 case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
3038 return shadow_enable(d, PG_refcounts|PG_translate);
3040 case XEN_DOMCTL_SHADOW_OP_ENABLE:
3041 return shadow_enable(d, sc->mode << PG_mode_shift);
3043 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
3044 sc->mb = shadow_get_allocation(d);
3045 return 0;
3047 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
3048 shadow_lock(d);
3049 if ( sc->mb == 0 && shadow_mode_enabled(d) )
3051 /* Can't set the allocation to zero unless the domain stops using
3052 * shadow pagetables first */
3053 SHADOW_ERROR("Can't set shadow allocation to zero, domain %u"
3054 " is still using shadows.\n", d->domain_id);
3055 shadow_unlock(d);
3056 return -EINVAL;
3058 rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
3059 shadow_unlock(d);
3060 if ( preempted )
3061 /* Not finished. Set up to re-run the call. */
3062 rc = hypercall_create_continuation(
3063 __HYPERVISOR_domctl, "h", u_domctl);
3064 else
3065 /* Finished. Return the new allocation */
3066 sc->mb = shadow_get_allocation(d);
3067 return rc;
3069 default:
3070 SHADOW_ERROR("Bad shadow op %u\n", sc->op);
3071 return -EINVAL;
3076 /**************************************************************************/
3077 /* Auditing shadow tables */
3079 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
3081 void shadow_audit_tables(struct vcpu *v)
3083 /* Dispatch table for getting per-type functions */
3084 static hash_callback_t callbacks[SH_type_unused] = {
3085 NULL, /* none */
3086 #if CONFIG_PAGING_LEVELS == 2
3087 SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2), /* l1_32 */
3088 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32 */
3089 SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2), /* l2_32 */
3090 #else
3091 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2), /* l1_32 */
3092 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32 */
3093 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2), /* l2_32 */
3094 SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3), /* l1_pae */
3095 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
3096 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */
3097 SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */
3098 #if CONFIG_PAGING_LEVELS >= 4
3099 SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */
3100 SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */
3101 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2_64 */
3102 SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4), /* l2h_64 */
3103 SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4), /* l3_64 */
3104 SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4), /* l4_64 */
3105 #endif /* CONFIG_PAGING_LEVELS >= 4 */
3106 #endif /* CONFIG_PAGING_LEVELS > 2 */
3107 NULL /* All the rest */
3108 };
3109 unsigned int mask;
3111 if ( !(SHADOW_AUDIT_ENABLE) )
3112 return;
3114 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
3115 mask = ~1; /* Audit every table in the system */
3116 else
3118 /* Audit only the current mode's tables */
3119 switch ( v->arch.paging.mode->guest_levels )
3121 case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
3122 case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
3123 |SHF_L2H_PAE); break;
3124 case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64
3125 |SHF_L3_64|SHF_L4_64); break;
3126 default: BUG();
3130 hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
3133 #endif /* Shadow audit */
3135 /*
3136 * Local variables:
3137 * mode: C
3138 * c-set-style: "BSD"
3139 * c-basic-offset: 4
3140 * indent-tabs-mode: nil
3141 * End:
3142 */