direct-io.hg

view xen/include/asm-x86/shadow.h @ 8113:d963256dc3e0

writable_pl1e in shadow mode is a machine address, so
is type physaddr_t, not unsigned long.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Nov 30 11:36:57 2005 +0100 (2005-11-30)
parents b3f8d3158a1c
children 43a1f899e302
line source
1 /******************************************************************************
2 * include/asm-x86/shadow.h
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 #ifndef _XEN_SHADOW_H
23 #define _XEN_SHADOW_H
25 #include <xen/config.h>
26 #include <xen/types.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/mm.h>
30 #include <xen/domain_page.h>
31 #include <asm/current.h>
32 #include <asm/flushtlb.h>
33 #include <asm/processor.h>
34 #include <asm/vmx.h>
35 #include <public/dom0_ops.h>
36 #include <asm/shadow_public.h>
37 #include <asm/page-guest32.h>
38 #include <asm/shadow_ops.h>
40 /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
42 #define SHM_enable (1<<0) /* we're in one of the shadow modes */
43 #define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
44 guest tables */
45 #define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
46 regardless of pte write permissions */
47 #define SHM_log_dirty (1<<3) /* enable log dirty mode */
48 #define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
49 #define SHM_external (1<<5) /* Xen does not steal address space from the
50 domain for its own booking; requires VT or
51 similar mechanisms */
52 #define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
53 point to page table pages. */
55 #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
56 #define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
57 #define shadow_mode_write_l1(_d) (VM_ASSIST(_d, VMASST_TYPE_writable_pagetables))
58 #define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
59 #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
60 #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
61 #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
62 #define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
64 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
65 #define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
66 (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
67 #define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable)
69 // easy access to the hl2 table (for translated but not external modes only)
70 #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
71 (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
73 /*
74 * For now we use the per-domain BIGLOCK rather than a shadow-specific lock.
75 * We usually have the BIGLOCK already acquired anyway, so this is unlikely
76 * to cause much unnecessary extra serialisation. Also it's a recursive
77 * lock, and there are some code paths containing nested shadow_lock().
78 * The #if0'ed code below is therefore broken until such nesting is removed.
79 */
80 #if 0
81 #define shadow_lock_init(_d) \
82 spin_lock_init(&(_d)->arch.shadow_lock)
83 #define shadow_lock_is_acquired(_d) \
84 spin_is_locked(&(_d)->arch.shadow_lock)
85 #define shadow_lock(_d) \
86 do { \
87 ASSERT(!shadow_lock_is_acquired(_d)); \
88 spin_lock(&(_d)->arch.shadow_lock); \
89 } while (0)
90 #define shadow_unlock(_d) \
91 do { \
92 ASSERT(!shadow_lock_is_acquired(_d)); \
93 spin_unlock(&(_d)->arch.shadow_lock); \
94 } while (0)
95 #else
96 #define shadow_lock_init(_d) \
97 ((_d)->arch.shadow_nest = 0)
98 #define shadow_lock_is_acquired(_d) \
99 (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0))
100 #define shadow_lock(_d) \
101 do { \
102 LOCK_BIGLOCK(_d); \
103 (_d)->arch.shadow_nest++; \
104 } while (0)
105 #define shadow_unlock(_d) \
106 do { \
107 ASSERT(shadow_lock_is_acquired(_d)); \
108 (_d)->arch.shadow_nest--; \
109 UNLOCK_BIGLOCK(_d); \
110 } while (0)
111 #endif
113 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
114 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
115 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
117 extern void shadow_mode_init(void);
118 extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
119 extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
120 extern int shadow_mode_enable(struct domain *p, unsigned int mode);
121 extern void shadow_invlpg(struct vcpu *, unsigned long);
122 extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync(
123 struct vcpu *v, unsigned long gpfn, unsigned long mfn);
124 extern void free_monitor_pagetable(struct vcpu *v);
125 extern void __shadow_sync_all(struct domain *d);
126 extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va);
127 extern int set_p2m_entry(
128 struct domain *d, unsigned long pfn, unsigned long mfn,
129 struct domain_mmap_cache *l2cache,
130 struct domain_mmap_cache *l1cache);
131 extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
133 extern void shadow_l1_normal_pt_update(struct domain *d,
134 unsigned long pa, l1_pgentry_t l1e,
135 struct domain_mmap_cache *cache);
136 extern void shadow_l2_normal_pt_update(struct domain *d,
137 unsigned long pa, l2_pgentry_t l2e,
138 struct domain_mmap_cache *cache);
139 #if CONFIG_PAGING_LEVELS >= 3
140 #include <asm/page-guest32.h>
141 /*
142 * va_mask cannot be used because it's used by the shadow hash.
143 * Use the score area for for now.
144 */
145 #define is_xen_l2_slot(t,s) \
146 ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \
147 ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
149 extern unsigned long gva_to_gpa(unsigned long gva);
150 extern void shadow_l3_normal_pt_update(struct domain *d,
151 unsigned long pa, l3_pgentry_t l3e,
152 struct domain_mmap_cache *cache);
153 #endif
154 #if CONFIG_PAGING_LEVELS >= 4
155 extern void shadow_l4_normal_pt_update(struct domain *d,
156 unsigned long pa, l4_pgentry_t l4e,
157 struct domain_mmap_cache *cache);
158 #endif
159 extern int shadow_do_update_va_mapping(unsigned long va,
160 l1_pgentry_t val,
161 struct vcpu *v);
164 static inline unsigned long __shadow_status(
165 struct domain *d, unsigned long gpfn, unsigned long stype);
167 #if CONFIG_PAGING_LEVELS <= 2
168 static inline void update_hl2e(struct vcpu *v, unsigned long va);
169 #endif
171 extern void vmx_shadow_clear_state(struct domain *);
173 static inline int page_is_page_table(struct pfn_info *page)
174 {
175 struct domain *owner = page_get_owner(page);
176 u32 type_info;
178 if ( owner && shadow_mode_refcounts(owner) )
179 return page->count_info & PGC_page_table;
181 type_info = page->u.inuse.type_info & PGT_type_mask;
182 return type_info && (type_info <= PGT_l4_page_table);
183 }
185 static inline int mfn_is_page_table(unsigned long mfn)
186 {
187 if ( !pfn_valid(mfn) )
188 return 0;
190 return page_is_page_table(pfn_to_page(mfn));
191 }
193 static inline int page_out_of_sync(struct pfn_info *page)
194 {
195 return page->count_info & PGC_out_of_sync;
196 }
198 static inline int mfn_out_of_sync(unsigned long mfn)
199 {
200 if ( !pfn_valid(mfn) )
201 return 0;
203 return page_out_of_sync(pfn_to_page(mfn));
204 }
207 /************************************************************************/
209 static void inline
210 __shadow_sync_mfn(struct domain *d, unsigned long mfn)
211 {
212 if ( d->arch.out_of_sync )
213 {
214 // XXX - could be smarter
215 //
216 __shadow_sync_all(d);
217 }
218 }
220 static void inline
221 __shadow_sync_va(struct vcpu *v, unsigned long va)
222 {
223 struct domain *d = v->domain;
225 if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) )
226 {
227 perfc_incrc(shadow_sync_va);
229 // XXX - could be smarter
230 //
231 __shadow_sync_all(v->domain);
232 }
233 #if CONFIG_PAGING_LEVELS <= 2
234 // Also make sure the HL2 is up-to-date for this address.
235 //
236 if ( unlikely(shadow_mode_translate(v->domain)) )
237 update_hl2e(v, va);
238 #endif
239 }
241 static void inline
242 shadow_sync_all(struct domain *d)
243 {
244 if ( unlikely(shadow_mode_enabled(d)) )
245 {
246 shadow_lock(d);
248 if ( d->arch.out_of_sync )
249 __shadow_sync_all(d);
251 ASSERT(d->arch.out_of_sync == NULL);
253 shadow_unlock(d);
254 }
255 }
257 // SMP BUG: This routine can't ever be used properly in an SMP context.
258 // It should be something like get_shadow_and_sync_va().
259 // This probably shouldn't exist.
260 //
261 static void inline
262 shadow_sync_va(struct vcpu *v, unsigned long gva)
263 {
264 struct domain *d = v->domain;
265 if ( unlikely(shadow_mode_enabled(d)) )
266 {
267 shadow_lock(d);
268 __shadow_sync_va(v, gva);
269 shadow_unlock(d);
270 }
271 }
273 extern void __shadow_mode_disable(struct domain *d);
274 static inline void shadow_mode_disable(struct domain *d)
275 {
276 if ( unlikely(shadow_mode_enabled(d)) )
277 {
278 shadow_lock(d);
279 __shadow_mode_disable(d);
280 shadow_unlock(d);
281 }
282 }
284 /************************************************************************/
286 #define __mfn_to_gpfn(_d, mfn) \
287 ( (shadow_mode_translate(_d)) \
288 ? get_pfn_from_mfn(mfn) \
289 : (mfn) )
291 #define __gpfn_to_mfn(_d, gpfn) \
292 ({ \
293 unlikely(shadow_mode_translate(_d)) \
294 ? (likely(current->domain == (_d)) \
295 ? get_mfn_from_pfn(gpfn) \
296 : get_mfn_from_pfn_foreign(_d, gpfn)) \
297 : (gpfn); \
298 })
300 extern unsigned long get_mfn_from_pfn_foreign(
301 struct domain *d, unsigned long gpfn);
303 /************************************************************************/
305 struct shadow_status {
306 struct shadow_status *next; /* Pull-to-front list per hash bucket. */
307 unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
308 unsigned long smfn; /* Shadow mfn. */
309 };
311 #define shadow_ht_extra_size 128
312 #define shadow_ht_buckets 256
314 struct out_of_sync_entry {
315 struct out_of_sync_entry *next;
316 struct vcpu *v;
317 unsigned long gpfn; /* why is this here? */
318 unsigned long gmfn;
319 unsigned long snapshot_mfn;
320 physaddr_t writable_pl1e; /* NB: this is a machine address */
321 unsigned long va;
322 };
324 #define out_of_sync_extra_size 127
326 #define SHADOW_SNAPSHOT_ELSEWHERE (-1L)
328 /************************************************************************/
329 #define SHADOW_DEBUG 0
330 #define SHADOW_VERBOSE_DEBUG 0
331 #define SHADOW_VVERBOSE_DEBUG 0
332 #define SHADOW_VVVERBOSE_DEBUG 0
333 #define SHADOW_HASH_DEBUG 0
334 #define FULLSHADOW_DEBUG 0
336 #if SHADOW_DEBUG
337 extern int shadow_status_noswap;
338 #define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
339 #endif
341 #if SHADOW_VERBOSE_DEBUG
342 #define SH_LOG(_f, _a...) \
343 printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
344 current->domain->domain_id , current->processor, __LINE__ , ## _a )
345 #define SH_VLOG(_f, _a...) \
346 printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
347 current->domain->domain_id, current->processor, __LINE__ , ## _a )
348 #else
349 #define SH_LOG(_f, _a...) ((void)0)
350 #define SH_VLOG(_f, _a...) ((void)0)
351 #endif
353 #if SHADOW_VVERBOSE_DEBUG
354 #define SH_VVLOG(_f, _a...) \
355 printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
356 current->domain->domain_id, current->processor, __LINE__ , ## _a )
357 #else
358 #define SH_VVLOG(_f, _a...) ((void)0)
359 #endif
361 #if SHADOW_VVVERBOSE_DEBUG
362 #define SH_VVVLOG(_f, _a...) \
363 printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
364 current->domain->domain_id, current->processor, __LINE__ , ## _a )
365 #else
366 #define SH_VVVLOG(_f, _a...) ((void)0)
367 #endif
369 #if FULLSHADOW_DEBUG
370 #define FSH_LOG(_f, _a...) \
371 printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
372 current->domain->domain_id, current->processor, __LINE__ , ## _a )
373 #else
374 #define FSH_LOG(_f, _a...) ((void)0)
375 #endif
378 /************************************************************************/
380 static inline int
381 shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
382 {
383 l1_pgentry_t nl1e;
384 int res;
385 unsigned long mfn;
386 struct domain *owner;
388 ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
390 if ( !shadow_mode_refcounts(d) )
391 return 1;
393 nl1e = l1e;
394 l1e_remove_flags(nl1e, _PAGE_GLOBAL);
396 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
397 return 0;
399 res = get_page_from_l1e(nl1e, d);
401 if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
402 !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
403 (mfn = l1e_get_pfn(nl1e)) &&
404 pfn_valid(mfn) &&
405 (owner = page_get_owner(pfn_to_page(mfn))) &&
406 (d != owner) )
407 {
408 res = get_page_from_l1e(nl1e, owner);
409 printk("tried to map mfn %lx from domain %d into shadow page tables "
410 "of domain %d; %s\n",
411 mfn, owner->domain_id, d->domain_id,
412 res ? "success" : "failed");
413 }
415 if ( unlikely(!res) )
416 {
417 perfc_incrc(shadow_get_page_fail);
418 FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n",
419 __func__, l1e_get_intpte(l1e));
420 }
422 return res;
423 }
425 static inline void
426 shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
427 {
428 if ( !shadow_mode_refcounts(d) )
429 return;
431 put_page_from_l1e(l1e, d);
432 }
434 static inline void
435 shadow_put_page_type(struct domain *d, struct pfn_info *page)
436 {
437 if ( !shadow_mode_refcounts(d) )
438 return;
440 put_page_type(page);
441 }
443 static inline int shadow_get_page(struct domain *d,
444 struct pfn_info *page,
445 struct domain *owner)
446 {
447 if ( !shadow_mode_refcounts(d) )
448 return 1;
449 return get_page(page, owner);
450 }
452 static inline void shadow_put_page(struct domain *d,
453 struct pfn_info *page)
454 {
455 if ( !shadow_mode_refcounts(d) )
456 return;
457 put_page(page);
458 }
460 /************************************************************************/
462 static inline void __mark_dirty(struct domain *d, unsigned long mfn)
463 {
464 unsigned long pfn;
466 ASSERT(shadow_lock_is_acquired(d));
468 if ( likely(!shadow_mode_log_dirty(d)) || !VALID_MFN(mfn) )
469 return;
471 ASSERT(d->arch.shadow_dirty_bitmap != NULL);
473 /* We /really/ mean PFN here, even for non-translated guests. */
474 pfn = get_pfn_from_mfn(mfn);
476 /*
477 * Values with the MSB set denote MFNs that aren't really part of the
478 * domain's pseudo-physical memory map (e.g., the shared info frame).
479 * Nothing to do here...
480 */
481 if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
482 return;
484 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
485 if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) &&
486 !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
487 {
488 d->arch.shadow_dirty_count++;
489 }
490 #ifndef NDEBUG
491 else if ( mfn < max_page )
492 {
493 SH_VLOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
494 mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
495 SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info,
496 page_get_owner(&frame_table[mfn]),
497 frame_table[mfn].count_info,
498 frame_table[mfn].u.inuse.type_info );
499 }
500 #endif
501 }
504 static inline void mark_dirty(struct domain *d, unsigned int mfn)
505 {
506 if ( unlikely(shadow_mode_log_dirty(d)) )
507 {
508 shadow_lock(d);
509 __mark_dirty(d, mfn);
510 shadow_unlock(d);
511 }
512 }
515 /************************************************************************/
516 #if CONFIG_PAGING_LEVELS <= 2
517 static inline void
518 __shadow_get_l2e(
519 struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
520 {
521 ASSERT(shadow_mode_enabled(v->domain));
523 *psl2e = v->arch.shadow_vtable[l2_table_offset(va)];
524 }
526 static inline void
527 __shadow_set_l2e(
528 struct vcpu *v, unsigned long va, l2_pgentry_t value)
529 {
530 ASSERT(shadow_mode_enabled(v->domain));
532 v->arch.shadow_vtable[l2_table_offset(va)] = value;
533 }
535 static inline void
536 __guest_get_l2e(
537 struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
538 {
539 *pl2e = v->arch.guest_vtable[l2_table_offset(va)];
540 }
542 static inline void
543 __guest_set_l2e(
544 struct vcpu *v, unsigned long va, l2_pgentry_t value)
545 {
546 struct domain *d = v->domain;
548 v->arch.guest_vtable[l2_table_offset(va)] = value;
550 if ( unlikely(shadow_mode_translate(d)) )
551 update_hl2e(v, va);
553 __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table));
554 }
556 static inline void
557 update_hl2e(struct vcpu *v, unsigned long va)
558 {
559 int index = l2_table_offset(va);
560 unsigned long mfn;
561 l2_pgentry_t gl2e = v->arch.guest_vtable[index];
562 l1_pgentry_t old_hl2e, new_hl2e;
563 int need_flush = 0;
565 ASSERT(shadow_mode_translate(v->domain));
567 old_hl2e = v->arch.hl2_vtable[index];
569 if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
570 VALID_MFN(mfn = get_mfn_from_pfn(l2e_get_pfn(gl2e))) )
571 new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
572 else
573 new_hl2e = l1e_empty();
575 // only do the ref counting if something has changed.
576 //
577 if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) )
578 {
579 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
580 !shadow_get_page(v->domain, pfn_to_page(l1e_get_pfn(new_hl2e)),
581 v->domain) )
582 new_hl2e = l1e_empty();
583 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
584 {
585 shadow_put_page(v->domain, pfn_to_page(l1e_get_pfn(old_hl2e)));
586 need_flush = 1;
587 }
589 v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
591 if ( need_flush )
592 {
593 perfc_incrc(update_hl2e_invlpg);
594 flush_tlb_one_mask(v->domain->cpumask,
595 &linear_pg_table[l1_linear_offset(va)]);
596 }
597 }
598 }
600 static inline void shadow_drop_references(
601 struct domain *d, struct pfn_info *page)
602 {
603 if ( likely(!shadow_mode_refcounts(d)) ||
604 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
605 return;
607 /* XXX This needs more thought... */
608 printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
609 __func__, page_to_pfn(page));
610 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
611 page->count_info, page->u.inuse.type_info);
613 shadow_lock(d);
614 shadow_remove_all_access(d, page_to_pfn(page));
615 shadow_unlock(d);
617 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
618 page->count_info, page->u.inuse.type_info);
619 }
621 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
622 static inline void shadow_sync_and_drop_references(
623 struct domain *d, struct pfn_info *page)
624 {
625 if ( likely(!shadow_mode_refcounts(d)) )
626 return;
628 shadow_lock(d);
630 if ( page_out_of_sync(page) )
631 __shadow_sync_mfn(d, page_to_pfn(page));
633 shadow_remove_all_access(d, page_to_pfn(page));
635 shadow_unlock(d);
636 }
637 #endif
639 /************************************************************************/
641 /*
642 * Add another shadow reference to smfn.
643 */
644 static inline int
645 get_shadow_ref(unsigned long smfn)
646 {
647 u32 x, nx;
649 ASSERT(pfn_valid(smfn));
651 x = frame_table[smfn].count_info;
652 nx = x + 1;
654 if ( unlikely(nx == 0) )
655 {
656 printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
657 frame_table[smfn].u.inuse.type_info & PGT_mfn_mask,
658 smfn);
659 BUG();
660 }
662 // Guarded by the shadow lock...
663 //
664 frame_table[smfn].count_info = nx;
666 return 1;
667 }
669 extern void free_shadow_page(unsigned long smfn);
671 /*
672 * Drop a shadow reference to smfn.
673 */
674 static inline void
675 put_shadow_ref(unsigned long smfn)
676 {
677 u32 x, nx;
679 ASSERT(pfn_valid(smfn));
681 x = frame_table[smfn].count_info;
682 nx = x - 1;
684 if ( unlikely(x == 0) )
685 {
686 printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%"
687 PRtype_info "\n",
688 smfn,
689 frame_table[smfn].count_info,
690 frame_table[smfn].u.inuse.type_info);
691 BUG();
692 }
694 // Guarded by the shadow lock...
695 //
696 frame_table[smfn].count_info = nx;
698 if ( unlikely(nx == 0) )
699 {
700 free_shadow_page(smfn);
701 }
702 }
704 static inline void
705 shadow_pin(unsigned long smfn)
706 {
707 ASSERT( !(frame_table[smfn].u.inuse.type_info & PGT_pinned) );
709 frame_table[smfn].u.inuse.type_info |= PGT_pinned;
710 if ( unlikely(!get_shadow_ref(smfn)) )
711 BUG();
712 }
714 static inline void
715 shadow_unpin(unsigned long smfn)
716 {
717 ASSERT( (frame_table[smfn].u.inuse.type_info & PGT_pinned) );
719 frame_table[smfn].u.inuse.type_info &= ~PGT_pinned;
720 put_shadow_ref(smfn);
721 }
723 /*
724 * SMP issue. The following code assumes the shadow lock is held. Re-visit
725 * when working on finer-gained locks for shadow.
726 */
727 static inline void set_guest_back_ptr(
728 struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int index)
729 {
730 if ( shadow_mode_external(d) ) {
731 unsigned long gmfn;
733 ASSERT(shadow_lock_is_acquired(d));
734 gmfn = l1e_get_pfn(spte);
735 frame_table[gmfn].tlbflush_timestamp = smfn;
736 frame_table[gmfn].u.inuse.type_info &= ~PGT_va_mask;
737 frame_table[gmfn].u.inuse.type_info |= (unsigned long) index << PGT_va_shift;
738 }
739 }
741 /************************************************************************/
742 #if CONFIG_PAGING_LEVELS <= 2
743 extern void shadow_mark_va_out_of_sync(
744 struct vcpu *v, unsigned long gpfn, unsigned long mfn,
745 unsigned long va);
747 static inline int l1pte_write_fault(
748 struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
749 unsigned long va)
750 {
751 struct domain *d = v->domain;
752 l1_pgentry_t gpte = *gpte_p;
753 l1_pgentry_t spte;
754 unsigned long gpfn = l1e_get_pfn(gpte);
755 unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
757 //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
759 if ( unlikely(!VALID_MFN(gmfn)) )
760 {
761 SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
762 *spte_p = l1e_empty();
763 return 0;
764 }
766 ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
767 l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
768 spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
770 SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
771 l1e_get_intpte(spte), l1e_get_intpte(gpte));
773 __mark_dirty(d, gmfn);
775 if ( mfn_is_page_table(gmfn) )
776 shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
778 *gpte_p = gpte;
779 *spte_p = spte;
781 return 1;
782 }
784 static inline int l1pte_read_fault(
785 struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
786 {
787 l1_pgentry_t gpte = *gpte_p;
788 l1_pgentry_t spte = *spte_p;
789 unsigned long pfn = l1e_get_pfn(gpte);
790 unsigned long mfn = __gpfn_to_mfn(d, pfn);
792 if ( unlikely(!VALID_MFN(mfn)) )
793 {
794 SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
795 *spte_p = l1e_empty();
796 return 0;
797 }
799 l1e_add_flags(gpte, _PAGE_ACCESSED);
800 spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
802 if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
803 mfn_is_page_table(mfn) )
804 {
805 l1e_remove_flags(spte, _PAGE_RW);
806 }
808 SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
809 l1e_get_intpte(spte), l1e_get_intpte(gpte));
810 *gpte_p = gpte;
811 *spte_p = spte;
813 return 1;
814 }
815 #endif
817 static inline void l1pte_propagate_from_guest(
818 struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p)
819 {
820 unsigned long mfn;
821 l1_pgentry_t spte;
823 spte = l1e_empty();
825 if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
826 (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
827 VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
828 {
829 spte = l1e_from_pfn(
830 mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
832 if ( shadow_mode_log_dirty(d) ||
833 !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
834 mfn_is_page_table(mfn) )
835 {
836 l1e_remove_flags(spte, _PAGE_RW);
837 }
838 }
840 if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) )
841 SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte,
842 __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte));
844 *spte_p = spte;
845 }
847 static inline void hl2e_propagate_from_guest(
848 struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p)
849 {
850 unsigned long pfn = l2e_get_pfn(gpde);
851 unsigned long mfn;
852 l1_pgentry_t hl2e;
854 hl2e = l1e_empty();
856 if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
857 {
858 mfn = __gpfn_to_mfn(d, pfn);
859 if ( VALID_MFN(mfn) && (mfn < max_page) )
860 hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
861 }
863 if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) )
864 SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__,
865 l2e_get_intpte(gpde), l1e_get_intpte(hl2e));
867 *hl2e_p = hl2e;
868 }
870 static inline void l2pde_general(
871 struct domain *d,
872 guest_l2_pgentry_t *gpde_p,
873 l2_pgentry_t *spde_p,
874 unsigned long sl1mfn)
875 {
876 guest_l2_pgentry_t gpde = *gpde_p;
877 l2_pgentry_t spde;
879 spde = l2e_empty();
880 if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
881 {
882 spde = l2e_from_pfn(
883 sl1mfn,
884 (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
886 /* N.B. PDEs do not have a dirty bit. */
887 guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
889 *gpde_p = gpde;
890 }
892 if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
893 SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
894 l2e_get_intpte(gpde), l2e_get_intpte(spde));
896 *spde_p = spde;
897 }
899 static inline void l2pde_propagate_from_guest(
900 struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
901 {
902 guest_l2_pgentry_t gpde = *gpde_p;
903 unsigned long sl1mfn = 0;
905 if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT )
906 sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
907 l2pde_general(d, gpde_p, spde_p, sl1mfn);
908 }
910 /************************************************************************/
912 // returns true if a tlb flush is needed
913 //
914 static int inline
915 validate_pte_change(
916 struct domain *d,
917 guest_l1_pgentry_t new_pte,
918 l1_pgentry_t *shadow_pte_p)
919 {
920 l1_pgentry_t old_spte, new_spte;
921 int need_flush = 0;
923 perfc_incrc(validate_pte_calls);
925 l1pte_propagate_from_guest(d, new_pte, &new_spte);
927 if ( shadow_mode_refcounts(d) )
928 {
929 old_spte = *shadow_pte_p;
931 if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) )
932 {
933 // No accounting required...
934 //
935 perfc_incrc(validate_pte_changes1);
936 }
937 else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) )
938 {
939 // Fast path for PTEs that have merely been write-protected
940 // (e.g., during a Unix fork()). A strict reduction in privilege.
941 //
942 perfc_incrc(validate_pte_changes2);
943 if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
944 shadow_put_page_type(d, &frame_table[l1e_get_pfn(new_spte)]);
945 }
946 else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
947 _PAGE_PRESENT ) &&
948 l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
949 {
950 // only do the ref counting if something important changed.
951 //
952 perfc_incrc(validate_pte_changes3);
954 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
955 {
956 shadow_put_page_from_l1e(old_spte, d);
957 need_flush = 1;
958 }
959 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
960 !shadow_get_page_from_l1e(new_spte, d) ) {
961 new_spte = l1e_empty();
962 need_flush = -1; /* need to unshadow the page */
963 }
964 }
965 else
966 {
967 perfc_incrc(validate_pte_changes4);
968 }
969 }
971 *shadow_pte_p = new_spte;
973 return need_flush;
974 }
976 // returns true if a tlb flush is needed
977 //
978 static int inline
979 validate_hl2e_change(
980 struct domain *d,
981 l2_pgentry_t new_gpde,
982 l1_pgentry_t *shadow_hl2e_p)
983 {
984 l1_pgentry_t old_hl2e, new_hl2e;
985 int need_flush = 0;
987 perfc_incrc(validate_hl2e_calls);
989 old_hl2e = *shadow_hl2e_p;
990 hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
992 // Only do the ref counting if something important changed.
993 //
994 if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
995 l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
996 {
997 perfc_incrc(validate_hl2e_changes);
999 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
1000 !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), d) )
1001 new_hl2e = l1e_empty();
1002 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
1004 put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
1005 need_flush = 1;
1009 *shadow_hl2e_p = new_hl2e;
1011 return need_flush;
1014 // returns true if a tlb flush is needed
1015 //
1016 static int inline
1017 validate_pde_change(
1018 struct domain *d,
1019 guest_l2_pgentry_t new_gpde,
1020 l2_pgentry_t *shadow_pde_p)
1022 l2_pgentry_t old_spde, new_spde;
1023 int need_flush = 0;
1025 perfc_incrc(validate_pde_calls);
1027 old_spde = *shadow_pde_p;
1028 l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
1030 // Only do the ref counting if something important changed.
1031 //
1032 if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) &&
1033 l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
1035 perfc_incrc(validate_pde_changes);
1037 if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
1038 !get_shadow_ref(l2e_get_pfn(new_spde)) )
1039 BUG();
1040 if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
1042 put_shadow_ref(l2e_get_pfn(old_spde));
1043 need_flush = 1;
1047 *shadow_pde_p = new_spde;
1049 return need_flush;
1052 /*********************************************************************/
1054 #if SHADOW_HASH_DEBUG
1056 static void shadow_audit(struct domain *d, int print)
1058 int live = 0, free = 0, j = 0, abs;
1059 struct shadow_status *a;
1061 for ( j = 0; j < shadow_ht_buckets; j++ )
1063 a = &d->arch.shadow_ht[j];
1064 if ( a->gpfn_and_flags )
1066 live++;
1067 ASSERT(a->smfn);
1069 else
1070 ASSERT(!a->next);
1072 a = a->next;
1073 while ( a && (live < 9999) )
1075 live++;
1076 if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) )
1078 printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n",
1079 live, a->gpfn_and_flags, a->smfn, a->next);
1080 BUG();
1082 ASSERT(a->smfn);
1083 a = a->next;
1085 ASSERT(live < 9999);
1088 for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next )
1089 free++;
1091 if ( print )
1092 printk("Xlive=%d free=%d\n", live, free);
1094 // BUG: this only works if there's only a single domain which is
1095 // using shadow tables.
1096 //
1097 abs = (
1098 perfc_value(shadow_l1_pages) +
1099 perfc_value(shadow_l2_pages) +
1100 perfc_value(hl2_table_pages) +
1101 perfc_value(snapshot_pages) +
1102 perfc_value(writable_pte_predictions)
1103 ) - live;
1104 #ifdef PERF_COUNTERS
1105 if ( (abs < -1) || (abs > 1) )
1107 printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
1108 live, free,
1109 perfc_value(shadow_l1_pages),
1110 perfc_value(shadow_l2_pages),
1111 perfc_value(hl2_table_pages),
1112 perfc_value(snapshot_pages),
1113 perfc_value(writable_pte_predictions));
1114 BUG();
1116 #endif
1118 // XXX ought to add some code to audit the out-of-sync entries, too.
1119 //
1121 #else
1122 #define shadow_audit(p, print) ((void)0)
1123 #endif
1126 static inline struct shadow_status *hash_bucket(
1127 struct domain *d, unsigned int gpfn)
1129 return &d->arch.shadow_ht[gpfn % shadow_ht_buckets];
1133 /*
1134 * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace,
1135 * which, depending on full shadow mode, may or may not equal
1136 * its mfn).
1137 * It returns the shadow's mfn, or zero if it doesn't exist.
1138 */
1140 static inline unsigned long ___shadow_status(
1141 struct domain *d, unsigned long gpfn, unsigned long stype)
1143 struct shadow_status *p, *x, *head;
1144 unsigned long key = gpfn | stype;
1146 perfc_incrc(shadow_status_calls);
1148 x = head = hash_bucket(d, gpfn);
1149 p = NULL;
1151 //SH_VVLOG("lookup gpfn=%08x type=%08x bucket=%p", gpfn, stype, x);
1152 shadow_audit(d, 0);
1154 do
1156 ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL)));
1158 if ( x->gpfn_and_flags == key )
1160 #if SHADOW_DEBUG
1161 if ( unlikely(shadow_status_noswap) )
1162 return x->smfn;
1163 #endif
1164 /* Pull-to-front if 'x' isn't already the head item. */
1165 if ( unlikely(x != head) )
1167 /* Delete 'x' from list and reinsert immediately after head. */
1168 p->next = x->next;
1169 x->next = head->next;
1170 head->next = x;
1172 /* Swap 'x' contents with head contents. */
1173 SWAP(head->gpfn_and_flags, x->gpfn_and_flags);
1174 SWAP(head->smfn, x->smfn);
1176 else
1178 perfc_incrc(shadow_status_hit_head);
1181 //SH_VVLOG("lookup gpfn=%p => status=%p", key, head->smfn);
1182 return head->smfn;
1185 p = x;
1186 x = x->next;
1188 while ( x != NULL );
1190 //SH_VVLOG("lookup gpfn=%p => status=0", key);
1191 perfc_incrc(shadow_status_miss);
1192 return 0;
1195 static inline unsigned long __shadow_status(
1196 struct domain *d, unsigned long gpfn, unsigned long stype)
1198 unsigned long gmfn = ((current->domain == d)
1199 ? __gpfn_to_mfn(d, gpfn)
1200 : INVALID_MFN);
1202 ASSERT(shadow_lock_is_acquired(d));
1203 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1204 ASSERT(stype && !(stype & ~PGT_type_mask));
1206 if ( VALID_MFN(gmfn) && (gmfn < max_page) &&
1207 (stype != PGT_writable_pred) &&
1208 ((stype == PGT_snapshot)
1209 ? !mfn_out_of_sync(gmfn)
1210 : !mfn_is_page_table(gmfn)) )
1212 perfc_incrc(shadow_status_shortcut);
1213 #ifndef NDEBUG
1214 if ( ___shadow_status(d, gpfn, stype) != 0 )
1216 printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%" PRtype_info " "
1217 "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
1218 d->domain_id, gpfn, gmfn, stype,
1219 frame_table[gmfn].count_info,
1220 frame_table[gmfn].u.inuse.type_info,
1221 mfn_out_of_sync(gmfn), mfn_is_page_table(gmfn));
1222 BUG();
1225 // Undo the affects of the above call to ___shadow_status()'s perf
1226 // counters, since that call is really just part of an assertion.
1227 //
1228 perfc_decrc(shadow_status_calls);
1229 perfc_decrc(shadow_status_miss);
1230 #endif
1231 return 0;
1234 return ___shadow_status(d, gpfn, stype);
1237 /*
1238 * Not clear if pull-to-front is worth while for this or not,
1239 * as it generally needs to scan the entire bucket anyway.
1240 * Much simpler without.
1242 * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
1243 */
1244 static inline u32
1245 shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
1246 unsigned long *smfn)
1248 struct shadow_status *x;
1249 u32 pttype = PGT_none, type;
1251 ASSERT(shadow_lock_is_acquired(d));
1252 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1254 perfc_incrc(shadow_max_type);
1256 x = hash_bucket(d, gpfn);
1258 while ( x && x->gpfn_and_flags )
1260 if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn )
1262 type = x->gpfn_and_flags & PGT_type_mask;
1264 switch ( type )
1266 case PGT_hl2_shadow:
1267 // Treat an HL2 as if it's an L1
1268 //
1269 type = PGT_l1_shadow;
1270 break;
1271 case PGT_snapshot:
1272 case PGT_writable_pred:
1273 // Ignore snapshots -- they don't in and of themselves constitute
1274 // treating a page as a page table
1275 //
1276 goto next;
1277 case PGT_base_page_table:
1278 // Early exit if we found the max possible value
1279 //
1280 return type;
1281 default:
1282 break;
1285 if ( type > pttype )
1287 pttype = type;
1288 if ( smfn )
1289 *smfn = x->smfn;
1292 next:
1293 x = x->next;
1296 return pttype;
1299 static inline void delete_shadow_status(
1300 struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
1302 struct shadow_status *p, *x, *n, *head;
1303 unsigned long key = gpfn | stype;
1305 ASSERT(shadow_lock_is_acquired(d));
1306 ASSERT(!(gpfn & ~PGT_mfn_mask));
1307 ASSERT(stype && !(stype & ~PGT_type_mask));
1309 head = hash_bucket(d, gpfn);
1311 SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head);
1312 shadow_audit(d, 0);
1314 /* Match on head item? */
1315 if ( head->gpfn_and_flags == key )
1317 if ( (n = head->next) != NULL )
1319 /* Overwrite head with contents of following node. */
1320 head->gpfn_and_flags = n->gpfn_and_flags;
1321 head->smfn = n->smfn;
1323 /* Delete following node. */
1324 head->next = n->next;
1326 /* Add deleted node to the free list. */
1327 n->gpfn_and_flags = 0;
1328 n->smfn = 0;
1329 n->next = d->arch.shadow_ht_free;
1330 d->arch.shadow_ht_free = n;
1332 else
1334 /* This bucket is now empty. Initialise the head node. */
1335 head->gpfn_and_flags = 0;
1336 head->smfn = 0;
1339 goto found;
1342 p = head;
1343 x = head->next;
1345 do
1347 if ( x->gpfn_and_flags == key )
1349 /* Delete matching node. */
1350 p->next = x->next;
1352 /* Add deleted node to the free list. */
1353 x->gpfn_and_flags = 0;
1354 x->smfn = 0;
1355 x->next = d->arch.shadow_ht_free;
1356 d->arch.shadow_ht_free = x;
1358 goto found;
1361 p = x;
1362 x = x->next;
1364 while ( x != NULL );
1366 /* If we got here, it wasn't in the list! */
1367 BUG();
1369 found:
1370 // release ref to page
1371 if ( stype != PGT_writable_pred )
1372 put_page(pfn_to_page(gmfn));
1374 shadow_audit(d, 0);
1377 static inline void set_shadow_status(
1378 struct domain *d, unsigned long gpfn, unsigned long gmfn,
1379 unsigned long smfn, unsigned long stype)
1381 struct shadow_status *x, *head, *extra;
1382 int i;
1383 unsigned long key = gpfn | stype;
1385 SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
1387 ASSERT(shadow_lock_is_acquired(d));
1389 ASSERT(shadow_mode_translate(d) || gpfn);
1390 ASSERT(!(gpfn & ~PGT_mfn_mask));
1392 // XXX - need to be more graceful.
1393 ASSERT(VALID_MFN(gmfn));
1395 ASSERT(stype && !(stype & ~PGT_type_mask));
1397 x = head = hash_bucket(d, gpfn);
1399 SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
1400 gpfn, smfn, stype, x, x->next);
1401 shadow_audit(d, 0);
1403 // grab a reference to the guest page to represent the entry in the shadow
1404 // hash table
1405 //
1406 // XXX - Should PGT_writable_pred grab a page ref?
1407 // - Who/how are these hash table entry refs flushed if/when a page
1408 // is given away by the domain?
1409 //
1410 if ( stype != PGT_writable_pred )
1411 get_page(pfn_to_page(gmfn), d);
1413 /*
1414 * STEP 1. If page is already in the table, update it in place.
1415 */
1416 do
1418 if ( unlikely(x->gpfn_and_flags == key) )
1420 if ( stype != PGT_writable_pred )
1421 BUG(); // we should never replace entries into the hash table
1422 x->smfn = smfn;
1423 if ( stype != PGT_writable_pred )
1424 put_page(pfn_to_page(gmfn)); // already had a ref...
1425 goto done;
1428 x = x->next;
1430 while ( x != NULL );
1432 /*
1433 * STEP 2. The page must be inserted into the table.
1434 */
1436 /* If the bucket is empty then insert the new page as the head item. */
1437 if ( head->gpfn_and_flags == 0 )
1439 head->gpfn_and_flags = key;
1440 head->smfn = smfn;
1441 ASSERT(head->next == NULL);
1442 goto done;
1445 /* We need to allocate a new node. Ensure the quicklist is non-empty. */
1446 if ( unlikely(d->arch.shadow_ht_free == NULL) )
1448 SH_VLOG("Allocate more shadow hashtable blocks.");
1450 extra = xmalloc_bytes(
1451 sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1453 /* XXX Should be more graceful here. */
1454 if ( extra == NULL )
1455 BUG();
1457 memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1459 /* Record the allocation block so it can be correctly freed later. */
1460 d->arch.shadow_extras_count++;
1461 *((struct shadow_status **)&extra[shadow_ht_extra_size]) =
1462 d->arch.shadow_ht_extras;
1463 d->arch.shadow_ht_extras = &extra[0];
1465 /* Thread a free chain through the newly-allocated nodes. */
1466 for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
1467 extra[i].next = &extra[i+1];
1468 extra[i].next = NULL;
1470 /* Add the new nodes to the free list. */
1471 d->arch.shadow_ht_free = &extra[0];
1474 /* Allocate a new node from the quicklist. */
1475 x = d->arch.shadow_ht_free;
1476 d->arch.shadow_ht_free = x->next;
1478 /* Initialise the new node and insert directly after the head item. */
1479 x->gpfn_and_flags = key;
1480 x->smfn = smfn;
1481 x->next = head->next;
1482 head->next = x;
1484 done:
1485 shadow_audit(d, 0);
1487 if ( stype <= PGT_l4_shadow )
1489 // add to front of list of pages to check when removing write
1490 // permissions for a page...
1491 //
1495 /************************************************************************/
1497 void static inline
1498 shadow_update_min_max(unsigned long smfn, int index)
1500 struct pfn_info *sl1page = pfn_to_page(smfn);
1501 u32 min_max = sl1page->tlbflush_timestamp;
1502 int min = SHADOW_MIN(min_max);
1503 int max = SHADOW_MAX(min_max);
1504 int update = 0;
1506 if ( index < min )
1508 min = index;
1509 update = 1;
1511 if ( index > max )
1513 max = index;
1514 update = 1;
1516 if ( update )
1517 sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
1520 #if CONFIG_PAGING_LEVELS <= 2
1521 extern void shadow_map_l1_into_current_l2(unsigned long va);
1523 void static inline
1524 shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
1526 struct vcpu *v = current;
1527 struct domain *d = v->domain;
1528 l2_pgentry_t sl2e = {0};
1530 __shadow_get_l2e(v, va, &sl2e);
1531 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1533 /*
1534 * Either the L1 is not shadowed, or the shadow isn't linked into
1535 * the current shadow L2.
1536 */
1537 if ( create_l1_shadow )
1539 perfc_incrc(shadow_set_l1e_force_map);
1540 shadow_map_l1_into_current_l2(va);
1542 else /* check to see if it exists; if so, link it in */
1544 l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
1545 unsigned long gl1pfn = l2e_get_pfn(gpde);
1546 unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
1548 ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
1550 if ( sl1mfn )
1552 perfc_incrc(shadow_set_l1e_unlinked);
1553 if ( !get_shadow_ref(sl1mfn) )
1554 BUG();
1555 l2pde_general(d, &gpde, &sl2e, sl1mfn);
1556 __guest_set_l2e(v, va, gpde);
1557 __shadow_set_l2e(v, va, sl2e);
1559 else
1561 // no shadow exists, so there's nothing to do.
1562 perfc_incrc(shadow_set_l1e_fail);
1563 return;
1568 __shadow_get_l2e(v, va, &sl2e);
1570 if ( shadow_mode_refcounts(d) )
1572 l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
1574 // only do the ref counting if something important changed.
1575 //
1576 if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
1578 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
1579 !shadow_get_page_from_l1e(new_spte, d) )
1580 new_spte = l1e_empty();
1581 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
1582 shadow_put_page_from_l1e(old_spte, d);
1587 set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
1588 shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
1589 shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
1591 #endif
1592 /************************************************************************/
1594 static inline int
1595 shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn)
1597 struct vcpu *v = current;
1598 struct domain *d = v->domain;
1599 unsigned long mfn = __gpfn_to_mfn(d, gpfn);
1600 u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask;
1602 if ( shadow_mode_refcounts(d) &&
1603 (type == PGT_writable_page) )
1604 type = shadow_max_pgtable_type(d, gpfn, NULL);
1606 // Strange but true: writable page tables allow kernel-mode access
1607 // to L1 page table pages via write-protected PTEs... Similarly, write
1608 // access to all page table pages is granted for shadow_mode_write_all
1609 // clients.
1610 //
1611 if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
1612 (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
1613 ((va < HYPERVISOR_VIRT_START)
1614 #if defined(__x86_64__)
1615 || (va >= HYPERVISOR_VIRT_END)
1616 #endif
1617 ) &&
1618 KERNEL_MODE(v, regs) )
1619 return 1;
1621 return 0;
1624 #if CONFIG_PAGING_LEVELS <= 2
1625 static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
1627 l2_pgentry_t gpde;
1628 l1_pgentry_t gpte;
1629 struct vcpu *v = current;
1631 ASSERT( shadow_mode_translate(current->domain) );
1633 __guest_get_l2e(v, gva, &gpde);
1634 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
1635 return l1e_empty();;
1637 // This is actually overkill - we only need to make sure the hl2
1638 // is in-sync.
1639 //
1640 shadow_sync_va(v, gva);
1642 if ( unlikely(__copy_from_user(&gpte,
1643 &linear_pg_table[gva >> PAGE_SHIFT],
1644 sizeof(gpte))) )
1646 FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva);
1647 return l1e_empty();
1650 return gpte;
1653 static inline unsigned long gva_to_gpa(unsigned long gva)
1655 l1_pgentry_t gpte;
1657 gpte = gva_to_gpte(gva);
1658 if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
1659 return 0;
1661 return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK);
1663 #endif
1664 /************************************************************************/
1666 extern void __update_pagetables(struct vcpu *v);
1667 static inline void update_pagetables(struct vcpu *v)
1669 struct domain *d = v->domain;
1670 int paging_enabled;
1672 #ifdef CONFIG_VMX
1673 if ( VMX_DOMAIN(v) )
1674 paging_enabled = vmx_paging_enabled(v);
1676 else
1677 #endif
1678 // HACK ALERT: there's currently no easy way to figure out if a domU
1679 // has set its arch.guest_table to zero, vs not yet initialized it.
1680 //
1681 paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
1683 /*
1684 * We don't call __update_pagetables() when vmx guest paging is
1685 * disabled as we want the linear_pg_table to be inaccessible so that
1686 * we bail out early of shadow_fault() if the vmx guest tries illegal
1687 * accesses while it thinks paging is turned off.
1688 */
1689 if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
1691 shadow_lock(d);
1692 __update_pagetables(v);
1693 shadow_unlock(d);
1696 if ( likely(!shadow_mode_external(d)) )
1698 if ( shadow_mode_enabled(d) )
1699 v->arch.monitor_table = v->arch.shadow_table;
1700 else
1701 #if CONFIG_PAGING_LEVELS == 4
1702 if ( !(v->arch.flags & TF_kernel_mode) )
1703 v->arch.monitor_table = v->arch.guest_table_user;
1704 else
1705 #endif
1706 v->arch.monitor_table = v->arch.guest_table;
1710 #if SHADOW_DEBUG
1711 extern int _check_pagetable(struct vcpu *v, char *s);
1712 extern int _check_all_pagetables(struct vcpu *v, char *s);
1714 #define check_pagetable(_v, _s) _check_pagetable(_v, _s)
1715 //#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s)
1717 #else
1718 #define check_pagetable(_v, _s) ((void)0)
1719 #endif
1721 #endif /* XEN_SHADOW_H */
1723 /*
1724 * Local variables:
1725 * mode: C
1726 * c-set-style: "BSD"
1727 * c-basic-offset: 4
1728 * tab-width: 4
1729 * indent-tabs-mode: nil
1730 * End:
1731 */