direct-io.hg

view xen/include/asm-x86/shadow.h @ 11135:88e6bd5e2b54

Whitespace clean-ups.

Signed-off-by: Steven Hand <steven@xensource.com>
author shand@kneesaa.uk.xensource.com
date Wed Aug 16 11:36:13 2006 +0100 (2006-08-16)
parents bfe12b4d45d3
children 0f917d63e960
line source
1 /******************************************************************************
2 * include/asm-x86/shadow.h
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 #ifndef _XEN_SHADOW_H
23 #define _XEN_SHADOW_H
25 #include <xen/config.h>
26 #include <xen/types.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/mm.h>
30 #include <xen/domain_page.h>
31 #include <asm/current.h>
32 #include <asm/flushtlb.h>
33 #include <asm/processor.h>
34 #include <asm/hvm/hvm.h>
35 #include <asm/hvm/support.h>
36 #include <asm/regs.h>
37 #include <public/dom0_ops.h>
38 #include <asm/shadow_public.h>
39 #include <asm/page-guest32.h>
40 #include <asm/shadow_ops.h>
42 /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
44 #define SHM_enable (1<<0) /* we're in one of the shadow modes */
45 #define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
46 guest tables */
47 #define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
48 regardless of pte write permissions */
49 #define SHM_log_dirty (1<<3) /* enable log dirty mode */
50 #define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
51 #define SHM_external (1<<5) /* Xen does not steal address space from the
52 domain for its own booking; requires VT or
53 similar mechanisms */
54 #define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
55 point to page table pages. */
57 #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
58 #define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
59 #define shadow_mode_write_l1(_d) (VM_ASSIST(_d, VMASST_TYPE_writable_pagetables))
60 #define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
61 #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
62 #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
63 #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
64 #define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
66 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
67 #define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
68 (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
69 #define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable)
71 // easy access to the hl2 table (for translated but not external modes only)
72 #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
73 (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
75 /*
76 * For now we use the per-domain BIGLOCK rather than a shadow-specific lock.
77 * We usually have the BIGLOCK already acquired anyway, so this is unlikely
78 * to cause much unnecessary extra serialisation. Also it's a recursive
79 * lock, and there are some code paths containing nested shadow_lock().
80 * The #if0'ed code below is therefore broken until such nesting is removed.
81 */
82 #if 0
83 #define shadow_lock_init(_d) \
84 spin_lock_init(&(_d)->arch.shadow_lock)
85 #define shadow_lock_is_acquired(_d) \
86 spin_is_locked(&(_d)->arch.shadow_lock)
87 #define shadow_lock(_d) \
88 do { \
89 ASSERT(!shadow_lock_is_acquired(_d)); \
90 spin_lock(&(_d)->arch.shadow_lock); \
91 } while (0)
92 #define shadow_unlock(_d) \
93 do { \
94 ASSERT(!shadow_lock_is_acquired(_d)); \
95 spin_unlock(&(_d)->arch.shadow_lock); \
96 } while (0)
97 #else
98 #define shadow_lock_init(_d) \
99 ((_d)->arch.shadow_nest = 0)
100 #define shadow_lock_is_acquired(_d) \
101 (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0))
102 #define shadow_lock(_d) \
103 do { \
104 LOCK_BIGLOCK(_d); \
105 (_d)->arch.shadow_nest++; \
106 } while (0)
107 #define shadow_unlock(_d) \
108 do { \
109 ASSERT(shadow_lock_is_acquired(_d)); \
110 (_d)->arch.shadow_nest--; \
111 UNLOCK_BIGLOCK(_d); \
112 } while (0)
113 #endif
115 #if CONFIG_PAGING_LEVELS >= 3
116 static inline u64 get_cr3_idxval(struct vcpu *v)
117 {
118 u64 pae_cr3;
120 if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
121 !shadow_mode_log_dirty(v->domain) )
122 {
123 pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
124 return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
125 }
126 else
127 return 0;
128 }
130 #define shadow_key_t u64
131 #define index_to_key(x) ((x) << 32)
132 #else
133 #define get_cr3_idxval(v) (0)
134 #define shadow_key_t unsigned long
135 #define index_to_key(x) (0)
136 #endif
139 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
140 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
141 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
142 extern void shadow_direct_map_clean(struct domain *d);
143 extern int shadow_direct_map_init(struct domain *d);
144 extern int shadow_direct_map_fault(
145 unsigned long vpa, struct cpu_user_regs *regs);
146 extern void shadow_mode_init(void);
147 extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
148 extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
149 extern int shadow_mode_enable(struct domain *p, unsigned int mode);
150 extern void shadow_invlpg(struct vcpu *, unsigned long);
151 extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync(
152 struct vcpu *v, unsigned long gpfn, unsigned long mfn);
153 extern void free_monitor_pagetable(struct vcpu *v);
154 extern void __shadow_sync_all(struct domain *d);
155 extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va);
156 extern int set_p2m_entry(
157 struct domain *d, unsigned long pfn, unsigned long mfn,
158 struct domain_mmap_cache *l2cache,
159 struct domain_mmap_cache *l1cache);
160 extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
162 extern void free_shadow_page(unsigned long smfn);
164 extern void shadow_l1_normal_pt_update(struct domain *d,
165 paddr_t pa, l1_pgentry_t l1e,
166 struct domain_mmap_cache *cache);
167 extern void shadow_l2_normal_pt_update(struct domain *d,
168 paddr_t pa, l2_pgentry_t l2e,
169 struct domain_mmap_cache *cache);
170 #if CONFIG_PAGING_LEVELS >= 3
171 #include <asm/page-guest32.h>
172 /*
173 * va_mask cannot be used because it's used by the shadow hash.
174 * Use the score area for for now.
175 */
176 #define is_xen_l2_slot(t,s) \
177 ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \
178 ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
180 extern unsigned long gva_to_gpa(unsigned long gva);
181 extern void shadow_l3_normal_pt_update(struct domain *d,
182 paddr_t pa, l3_pgentry_t l3e,
183 struct domain_mmap_cache *cache);
184 #endif
185 #if CONFIG_PAGING_LEVELS >= 4
186 extern void shadow_l4_normal_pt_update(struct domain *d,
187 paddr_t pa, l4_pgentry_t l4e,
188 struct domain_mmap_cache *cache);
189 #endif
190 extern int shadow_do_update_va_mapping(unsigned long va,
191 l1_pgentry_t val,
192 struct vcpu *v);
195 static inline unsigned long __shadow_status(
196 struct domain *d, unsigned long gpfn, unsigned long stype);
198 #if CONFIG_PAGING_LEVELS <= 2
199 static inline void update_hl2e(struct vcpu *v, unsigned long va);
200 #endif
202 static inline int page_is_page_table(struct page_info *page)
203 {
204 struct domain *owner = page_get_owner(page);
205 u32 type_info;
207 if ( owner && shadow_mode_refcounts(owner) )
208 return page->count_info & PGC_page_table;
210 type_info = page->u.inuse.type_info & PGT_type_mask;
211 return type_info && (type_info <= PGT_l4_page_table);
212 }
214 static inline int mfn_is_page_table(unsigned long mfn)
215 {
216 if ( !mfn_valid(mfn) )
217 return 0;
219 return page_is_page_table(mfn_to_page(mfn));
220 }
222 static inline int page_out_of_sync(struct page_info *page)
223 {
224 return page->count_info & PGC_out_of_sync;
225 }
227 static inline int mfn_out_of_sync(unsigned long mfn)
228 {
229 if ( !mfn_valid(mfn) )
230 return 0;
232 return page_out_of_sync(mfn_to_page(mfn));
233 }
236 /************************************************************************/
238 static void inline
239 __shadow_sync_mfn(struct domain *d, unsigned long mfn)
240 {
241 if ( d->arch.out_of_sync )
242 {
243 // XXX - could be smarter
244 //
245 __shadow_sync_all(d);
246 }
247 }
249 static void inline
250 __shadow_sync_va(struct vcpu *v, unsigned long va)
251 {
252 struct domain *d = v->domain;
254 if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) )
255 {
256 perfc_incrc(shadow_sync_va);
258 // XXX - could be smarter
259 //
260 __shadow_sync_all(v->domain);
261 }
262 #if CONFIG_PAGING_LEVELS <= 2
263 // Also make sure the HL2 is up-to-date for this address.
264 //
265 if ( unlikely(shadow_mode_translate(v->domain)) )
266 update_hl2e(v, va);
267 #endif
268 }
270 static void inline
271 shadow_sync_all(struct domain *d)
272 {
273 if ( unlikely(shadow_mode_enabled(d)) )
274 {
275 shadow_lock(d);
277 if ( d->arch.out_of_sync )
278 __shadow_sync_all(d);
280 ASSERT(d->arch.out_of_sync == NULL);
282 shadow_unlock(d);
283 }
284 }
286 // SMP BUG: This routine can't ever be used properly in an SMP context.
287 // It should be something like get_shadow_and_sync_va().
288 // This probably shouldn't exist.
289 //
290 static void inline
291 shadow_sync_va(struct vcpu *v, unsigned long gva)
292 {
293 struct domain *d = v->domain;
294 if ( unlikely(shadow_mode_enabled(d)) )
295 {
296 shadow_lock(d);
297 __shadow_sync_va(v, gva);
298 shadow_unlock(d);
299 }
300 }
302 extern void __shadow_mode_disable(struct domain *d);
303 static inline void shadow_mode_disable(struct domain *d)
304 {
305 if ( unlikely(shadow_mode_enabled(d)) )
306 {
307 shadow_lock(d);
308 __shadow_mode_disable(d);
309 shadow_unlock(d);
310 }
311 }
313 /************************************************************************/
315 #define mfn_to_gmfn(_d, mfn) \
316 ( (shadow_mode_translate(_d)) \
317 ? get_gpfn_from_mfn(mfn) \
318 : (mfn) )
320 #define gmfn_to_mfn(_d, gpfn) \
321 ({ \
322 unlikely(shadow_mode_translate(_d)) \
323 ? (likely(current->domain == (_d)) \
324 ? get_mfn_from_gpfn(gpfn) \
325 : get_mfn_from_gpfn_foreign(_d, gpfn)) \
326 : (gpfn); \
327 })
329 extern unsigned long get_mfn_from_gpfn_foreign(
330 struct domain *d, unsigned long gpfn);
332 /************************************************************************/
334 struct shadow_status {
335 struct shadow_status *next; /* Pull-to-front list per hash bucket. */
336 shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */
337 unsigned long smfn; /* Shadow mfn. */
338 };
340 #define shadow_ht_extra_size 128
341 #define shadow_ht_buckets 256
343 struct out_of_sync_entry {
344 struct out_of_sync_entry *next;
345 struct vcpu *v;
346 unsigned long gpfn; /* why is this here? */
347 unsigned long gmfn;
348 unsigned long snapshot_mfn;
349 paddr_t writable_pl1e; /* NB: this is a machine address */
350 unsigned long va;
351 };
353 #define out_of_sync_extra_size 127
355 #define SHADOW_SNAPSHOT_ELSEWHERE (-1L)
357 /************************************************************************/
358 #define SHADOW_DEBUG 0
359 #define SHADOW_VERBOSE_DEBUG 0
360 #define SHADOW_VVERBOSE_DEBUG 0
361 #define SHADOW_VVVERBOSE_DEBUG 0
362 #define SHADOW_HASH_DEBUG 0
363 #define FULLSHADOW_DEBUG 0
365 #if SHADOW_DEBUG
366 extern int shadow_status_noswap;
367 #define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
368 #endif
370 #if SHADOW_VERBOSE_DEBUG
371 #define SH_LOG(_f, _a...) \
372 printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
373 current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
374 #define SH_VLOG(_f, _a...) \
375 printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
376 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
377 #else
378 #define SH_LOG(_f, _a...) ((void)0)
379 #define SH_VLOG(_f, _a...) ((void)0)
380 #endif
382 #if SHADOW_VVERBOSE_DEBUG
383 #define SH_VVLOG(_f, _a...) \
384 printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
385 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
386 #else
387 #define SH_VVLOG(_f, _a...) ((void)0)
388 #endif
390 #if SHADOW_VVVERBOSE_DEBUG
391 #define SH_VVVLOG(_f, _a...) \
392 printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
393 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
394 #else
395 #define SH_VVVLOG(_f, _a...) ((void)0)
396 #endif
398 #if FULLSHADOW_DEBUG
399 #define FSH_LOG(_f, _a...) \
400 printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
401 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
402 #else
403 #define FSH_LOG(_f, _a...) ((void)0)
404 #endif
407 /************************************************************************/
409 static inline int
410 shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
411 {
412 l1_pgentry_t nl1e;
413 int res;
414 unsigned long mfn;
415 struct domain *owner;
417 ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
419 if ( !shadow_mode_refcounts(d) )
420 return 1;
422 nl1e = l1e;
423 l1e_remove_flags(nl1e, _PAGE_GLOBAL);
425 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
426 return 0;
428 res = get_page_from_l1e(nl1e, d);
430 if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
431 !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
432 (mfn = l1e_get_pfn(nl1e)) &&
433 mfn_valid(mfn) &&
434 (owner = page_get_owner(mfn_to_page(mfn))) &&
435 (d != owner) )
436 {
437 res = get_page_from_l1e(nl1e, owner);
438 printk("tried to map mfn %lx from domain %d into shadow page tables "
439 "of domain %d; %s\n",
440 mfn, owner->domain_id, d->domain_id,
441 res ? "success" : "failed");
442 }
444 if ( unlikely(!res) )
445 {
446 perfc_incrc(shadow_get_page_fail);
447 FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n",
448 __func__, l1e_get_intpte(l1e));
449 }
451 return res;
452 }
454 static inline void
455 shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
456 {
457 if ( !shadow_mode_refcounts(d) )
458 return;
460 put_page_from_l1e(l1e, d);
461 }
463 static inline void
464 shadow_put_page_type(struct domain *d, struct page_info *page)
465 {
466 if ( !shadow_mode_refcounts(d) )
467 return;
469 put_page_type(page);
470 }
472 static inline int shadow_get_page(struct domain *d,
473 struct page_info *page,
474 struct domain *owner)
475 {
476 if ( !shadow_mode_refcounts(d) )
477 return 1;
478 return get_page(page, owner);
479 }
481 static inline void shadow_put_page(struct domain *d,
482 struct page_info *page)
483 {
484 if ( !shadow_mode_refcounts(d) )
485 return;
486 put_page(page);
487 }
489 /************************************************************************/
491 static inline void __mark_dirty(struct domain *d, unsigned long mfn)
492 {
493 unsigned long pfn;
495 ASSERT(shadow_lock_is_acquired(d));
497 if ( likely(!shadow_mode_log_dirty(d)) || !VALID_MFN(mfn) )
498 return;
500 ASSERT(d->arch.shadow_dirty_bitmap != NULL);
502 /* We /really/ mean PFN here, even for non-translated guests. */
503 pfn = get_gpfn_from_mfn(mfn);
505 /*
506 * Values with the MSB set denote MFNs that aren't really part of the
507 * domain's pseudo-physical memory map (e.g., the shared info frame).
508 * Nothing to do here...
509 */
510 if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
511 return;
513 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
514 if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) &&
515 !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
516 {
517 d->arch.shadow_dirty_count++;
518 }
519 #ifndef NDEBUG
520 else if ( mfn_valid(mfn) )
521 {
522 SH_VLOG("mark_dirty OOR! mfn=%lx pfn=%lx max=%x (dom %p)",
523 mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
524 SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info,
525 page_get_owner(mfn_to_page(mfn)),
526 mfn_to_page(mfn)->count_info,
527 mfn_to_page(mfn)->u.inuse.type_info );
528 }
529 #endif
530 }
533 static inline void mark_dirty(struct domain *d, unsigned int mfn)
534 {
535 if ( unlikely(shadow_mode_log_dirty(d)) )
536 {
537 shadow_lock(d);
538 __mark_dirty(d, mfn);
539 shadow_unlock(d);
540 }
541 }
544 /************************************************************************/
545 #if CONFIG_PAGING_LEVELS <= 2
546 static inline void
547 __shadow_get_l2e(
548 struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
549 {
550 ASSERT(shadow_mode_enabled(v->domain));
552 *psl2e = v->arch.shadow_vtable[l2_table_offset(va)];
553 }
555 static inline void
556 __shadow_set_l2e(
557 struct vcpu *v, unsigned long va, l2_pgentry_t value)
558 {
559 ASSERT(shadow_mode_enabled(v->domain));
561 v->arch.shadow_vtable[l2_table_offset(va)] = value;
562 }
564 static inline void
565 __guest_get_l2e(
566 struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
567 {
568 *pl2e = v->arch.guest_vtable[l2_table_offset(va)];
569 }
571 static inline void
572 __guest_set_l2e(
573 struct vcpu *v, unsigned long va, l2_pgentry_t value)
574 {
575 struct domain *d = v->domain;
577 v->arch.guest_vtable[l2_table_offset(va)] = value;
579 if ( unlikely(shadow_mode_translate(d)) )
580 update_hl2e(v, va);
582 __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table));
583 }
585 static inline void
586 __direct_get_l2e(
587 struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
588 {
589 l2_pgentry_t *phys_vtable;
591 ASSERT(shadow_mode_enabled(v->domain));
593 phys_vtable = map_domain_page(
594 pagetable_get_pfn(v->domain->arch.phys_table));
596 *psl2e = phys_vtable[l2_table_offset(va)];
598 unmap_domain_page(phys_vtable);
599 }
601 static inline void
602 __direct_set_l2e(
603 struct vcpu *v, unsigned long va, l2_pgentry_t value)
604 {
605 l2_pgentry_t *phys_vtable;
607 ASSERT(shadow_mode_enabled(v->domain));
609 phys_vtable = map_domain_page(
610 pagetable_get_pfn(v->domain->arch.phys_table));
612 phys_vtable[l2_table_offset(va)] = value;
614 unmap_domain_page(phys_vtable);
615 }
617 static inline void
618 update_hl2e(struct vcpu *v, unsigned long va)
619 {
620 int index = l2_table_offset(va);
621 unsigned long mfn;
622 l2_pgentry_t gl2e = v->arch.guest_vtable[index];
623 l1_pgentry_t old_hl2e, new_hl2e;
624 int need_flush = 0;
626 ASSERT(shadow_mode_translate(v->domain));
628 old_hl2e = v->arch.hl2_vtable[index];
630 if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
631 VALID_MFN(mfn = get_mfn_from_gpfn(l2e_get_pfn(gl2e))) )
632 new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
633 else
634 new_hl2e = l1e_empty();
636 // only do the ref counting if something has changed.
637 //
638 if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) )
639 {
640 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
641 !shadow_get_page(v->domain, mfn_to_page(l1e_get_pfn(new_hl2e)),
642 v->domain) )
643 new_hl2e = l1e_empty();
644 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
645 {
646 shadow_put_page(v->domain, mfn_to_page(l1e_get_pfn(old_hl2e)));
647 need_flush = 1;
648 }
650 v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
652 if ( need_flush )
653 {
654 perfc_incrc(update_hl2e_invlpg);
655 flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
656 &linear_pg_table[l1_linear_offset(va)]);
657 }
658 }
659 }
661 static inline void shadow_drop_references(
662 struct domain *d, struct page_info *page)
663 {
664 if ( likely(!shadow_mode_refcounts(d)) ||
665 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
666 return;
668 /* XXX This needs more thought... */
669 printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
670 __func__, page_to_mfn(page));
671 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
672 page->count_info, page->u.inuse.type_info);
674 shadow_lock(d);
675 shadow_remove_all_access(d, page_to_mfn(page));
676 shadow_unlock(d);
678 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
679 page->count_info, page->u.inuse.type_info);
680 }
682 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
683 static inline void shadow_sync_and_drop_references(
684 struct domain *d, struct page_info *page)
685 {
686 if ( likely(!shadow_mode_refcounts(d)) )
687 return;
689 if ( page_out_of_sync(page) )
690 __shadow_sync_mfn(d, page_to_mfn(page));
692 shadow_remove_all_access(d, page_to_mfn(page));
693 }
694 #endif
696 /************************************************************************/
698 /*
699 * Add another shadow reference to smfn.
700 */
701 static inline int
702 get_shadow_ref(unsigned long smfn)
703 {
704 u32 x, nx;
706 ASSERT(mfn_valid(smfn));
708 x = mfn_to_page(smfn)->count_info;
709 nx = x + 1;
711 if ( unlikely(nx == 0) )
712 {
713 printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
714 mfn_to_page(smfn)->u.inuse.type_info & PGT_mfn_mask,
715 smfn);
716 BUG();
717 }
719 // Guarded by the shadow lock...
720 //
721 mfn_to_page(smfn)->count_info = nx;
723 return 1;
724 }
726 /*
727 * Drop a shadow reference to smfn.
728 */
729 static inline void
730 put_shadow_ref(unsigned long smfn)
731 {
732 u32 x, nx;
734 ASSERT(mfn_valid(smfn));
736 x = mfn_to_page(smfn)->count_info;
737 nx = x - 1;
739 if ( unlikely(x == 0) )
740 {
741 printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%"
742 PRtype_info "\n",
743 smfn,
744 mfn_to_page(smfn)->count_info,
745 mfn_to_page(smfn)->u.inuse.type_info);
746 BUG();
747 }
749 // Guarded by the shadow lock...
750 //
751 mfn_to_page(smfn)->count_info = nx;
753 if ( unlikely(nx == 0) )
754 {
755 free_shadow_page(smfn);
756 }
757 }
759 static inline void
760 shadow_pin(unsigned long smfn)
761 {
762 ASSERT( !(mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
764 mfn_to_page(smfn)->u.inuse.type_info |= PGT_pinned;
765 if ( unlikely(!get_shadow_ref(smfn)) )
766 BUG();
767 }
769 static inline void
770 shadow_unpin(unsigned long smfn)
771 {
772 ASSERT( (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
774 mfn_to_page(smfn)->u.inuse.type_info &= ~PGT_pinned;
775 put_shadow_ref(smfn);
776 }
778 /*
779 * SMP issue. The following code assumes the shadow lock is held. Re-visit
780 * when working on finer-gained locks for shadow.
781 */
782 static inline void set_guest_back_ptr(
783 struct domain *d, l1_pgentry_t spte,
784 unsigned long smfn, unsigned int index)
785 {
786 struct page_info *gpage;
788 ASSERT(shadow_lock_is_acquired(d));
790 if ( !shadow_mode_external(d) ||
791 ((l1e_get_flags(spte) & (_PAGE_PRESENT|_PAGE_RW)) !=
792 (_PAGE_PRESENT|_PAGE_RW)) )
793 return;
795 gpage = l1e_get_page(spte);
797 ASSERT(smfn != 0);
798 ASSERT(page_to_mfn(gpage) != 0);
800 gpage->tlbflush_timestamp = smfn;
801 gpage->u.inuse.type_info &= ~PGT_va_mask;
802 gpage->u.inuse.type_info |= (unsigned long)index << PGT_va_shift;
803 }
805 /************************************************************************/
806 #if CONFIG_PAGING_LEVELS <= 2
807 extern void shadow_mark_va_out_of_sync(
808 struct vcpu *v, unsigned long gpfn, unsigned long mfn,
809 unsigned long va);
811 static inline int l1pte_write_fault(
812 struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
813 unsigned long va)
814 {
815 struct domain *d = v->domain;
816 l1_pgentry_t gpte = *gpte_p;
817 l1_pgentry_t spte;
818 unsigned long gpfn = l1e_get_pfn(gpte);
819 unsigned long gmfn = gmfn_to_mfn(d, gpfn);
821 //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
823 if ( unlikely(!VALID_MFN(gmfn)) )
824 {
825 SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
826 *spte_p = l1e_empty();
827 return 0;
828 }
830 ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
831 l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
832 spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
834 SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
835 l1e_get_intpte(spte), l1e_get_intpte(gpte));
837 __mark_dirty(d, gmfn);
839 if ( mfn_is_page_table(gmfn) )
840 shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
842 *gpte_p = gpte;
843 *spte_p = spte;
845 return 1;
846 }
848 static inline int l1pte_read_fault(
849 struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
850 {
851 l1_pgentry_t gpte = *gpte_p;
852 l1_pgentry_t spte = *spte_p;
853 unsigned long pfn = l1e_get_pfn(gpte);
854 unsigned long mfn = gmfn_to_mfn(d, pfn);
856 if ( unlikely(!VALID_MFN(mfn)) )
857 {
858 SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
859 *spte_p = l1e_empty();
860 return 0;
861 }
863 l1e_add_flags(gpte, _PAGE_ACCESSED);
864 spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
866 if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
867 mfn_is_page_table(mfn) )
868 {
869 l1e_remove_flags(spte, _PAGE_RW);
870 }
872 SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
873 l1e_get_intpte(spte), l1e_get_intpte(gpte));
874 *gpte_p = gpte;
875 *spte_p = spte;
877 return 1;
878 }
879 #endif
881 static inline void l1pte_propagate_from_guest(
882 struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p)
883 {
884 unsigned long mfn;
885 l1_pgentry_t spte;
887 spte = l1e_empty();
889 if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
890 (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
891 VALID_MFN(mfn = gmfn_to_mfn(d, l1e_get_pfn(gpte))) )
892 {
893 spte = l1e_from_pfn(
894 mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
896 if ( shadow_mode_log_dirty(d) ||
897 !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
898 mfn_is_page_table(mfn) )
899 {
900 l1e_remove_flags(spte, _PAGE_RW);
901 }
902 }
904 if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) )
905 SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte,
906 __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte));
908 *spte_p = spte;
909 }
911 static inline void hl2e_propagate_from_guest(
912 struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p)
913 {
914 unsigned long pfn = l2e_get_pfn(gpde);
915 unsigned long mfn;
916 l1_pgentry_t hl2e;
918 hl2e = l1e_empty();
920 if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
921 {
922 mfn = gmfn_to_mfn(d, pfn);
923 if ( VALID_MFN(mfn) && mfn_valid(mfn) )
924 hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
925 }
927 if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) )
928 SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__,
929 l2e_get_intpte(gpde), l1e_get_intpte(hl2e));
931 *hl2e_p = hl2e;
932 }
934 static inline void l2pde_general(
935 struct domain *d,
936 guest_l2_pgentry_t *gpde_p,
937 l2_pgentry_t *spde_p,
938 unsigned long sl1mfn)
939 {
940 guest_l2_pgentry_t gpde = *gpde_p;
941 l2_pgentry_t spde;
943 spde = l2e_empty();
944 if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
945 {
946 spde = l2e_from_pfn(
947 sl1mfn,
948 (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
950 /* N.B. PDEs do not have a dirty bit. */
951 guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
953 *gpde_p = gpde;
954 }
956 if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
957 SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
958 l2e_get_intpte(gpde), l2e_get_intpte(spde));
960 *spde_p = spde;
961 }
963 static inline void l2pde_propagate_from_guest(
964 struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
965 {
966 guest_l2_pgentry_t gpde = *gpde_p;
967 unsigned long sl1mfn = 0;
969 if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT )
970 sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
971 l2pde_general(d, gpde_p, spde_p, sl1mfn);
972 }
974 /************************************************************************/
976 // returns true if a tlb flush is needed
977 //
978 static int inline
979 validate_pte_change(
980 struct domain *d,
981 guest_l1_pgentry_t new_pte,
982 l1_pgentry_t *shadow_pte_p)
983 {
984 l1_pgentry_t old_spte, new_spte;
985 int need_flush = 0;
987 perfc_incrc(validate_pte_calls);
989 l1pte_propagate_from_guest(d, new_pte, &new_spte);
991 if ( shadow_mode_refcounts(d) )
992 {
993 old_spte = *shadow_pte_p;
995 if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) )
996 {
997 // No accounting required...
998 //
999 perfc_incrc(validate_pte_changes1);
1001 else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) )
1003 // Fast path for PTEs that have merely been write-protected
1004 // (e.g., during a Unix fork()). A strict reduction in privilege.
1005 //
1006 perfc_incrc(validate_pte_changes2);
1007 if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
1008 shadow_put_page_type(d, mfn_to_page(l1e_get_pfn(new_spte)));
1010 else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
1011 _PAGE_PRESENT ) &&
1012 l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
1014 // only do the ref counting if something important changed.
1015 //
1016 perfc_incrc(validate_pte_changes3);
1018 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
1020 shadow_put_page_from_l1e(old_spte, d);
1021 need_flush = 1;
1023 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
1024 !shadow_get_page_from_l1e(new_spte, d) ) {
1025 new_spte = l1e_empty();
1026 need_flush = -1; /* need to unshadow the page */
1029 else
1031 perfc_incrc(validate_pte_changes4);
1035 *shadow_pte_p = new_spte;
1037 return need_flush;
1040 // returns true if a tlb flush is needed
1041 //
1042 static int inline
1043 validate_hl2e_change(
1044 struct domain *d,
1045 l2_pgentry_t new_gpde,
1046 l1_pgentry_t *shadow_hl2e_p)
1048 l1_pgentry_t old_hl2e, new_hl2e;
1049 int need_flush = 0;
1051 perfc_incrc(validate_hl2e_calls);
1053 old_hl2e = *shadow_hl2e_p;
1054 hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
1056 // Only do the ref counting if something important changed.
1057 //
1058 if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
1059 l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
1061 perfc_incrc(validate_hl2e_changes);
1063 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
1064 !get_page(mfn_to_page(l1e_get_pfn(new_hl2e)), d) )
1065 new_hl2e = l1e_empty();
1066 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
1068 put_page(mfn_to_page(l1e_get_pfn(old_hl2e)));
1069 need_flush = 1;
1073 *shadow_hl2e_p = new_hl2e;
1075 return need_flush;
1078 // returns true if a tlb flush is needed
1079 //
1080 static int inline
1081 validate_pde_change(
1082 struct domain *d,
1083 guest_l2_pgentry_t new_gpde,
1084 l2_pgentry_t *shadow_pde_p)
1086 l2_pgentry_t old_spde, new_spde;
1087 int need_flush = 0;
1089 perfc_incrc(validate_pde_calls);
1091 old_spde = *shadow_pde_p;
1092 l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
1094 // Only do the ref counting if something important changed.
1095 //
1096 if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) &&
1097 l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
1099 perfc_incrc(validate_pde_changes);
1101 if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
1102 !get_shadow_ref(l2e_get_pfn(new_spde)) )
1103 BUG();
1104 if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
1106 put_shadow_ref(l2e_get_pfn(old_spde));
1107 need_flush = 1;
1111 *shadow_pde_p = new_spde;
1113 return need_flush;
1116 /*********************************************************************/
1118 #if SHADOW_HASH_DEBUG
1120 static void shadow_audit(struct domain *d, int print)
1122 int live = 0, free = 0, j = 0, abs;
1123 struct shadow_status *a;
1125 for ( j = 0; j < shadow_ht_buckets; j++ )
1127 a = &d->arch.shadow_ht[j];
1128 if ( a->gpfn_and_flags )
1130 live++;
1131 ASSERT(a->smfn);
1133 else
1134 ASSERT(!a->next);
1136 a = a->next;
1137 while ( a && (live < 9999) )
1139 live++;
1140 if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) )
1142 printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n",
1143 live, a->gpfn_and_flags, a->smfn, a->next);
1144 BUG();
1146 ASSERT(a->smfn);
1147 a = a->next;
1149 ASSERT(live < 9999);
1152 for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next )
1153 free++;
1155 if ( print )
1156 printk("Xlive=%d free=%d\n", live, free);
1158 // BUG: this only works if there's only a single domain which is
1159 // using shadow tables.
1160 //
1161 abs = (
1162 perfc_value(shadow_l1_pages) +
1163 perfc_value(shadow_l2_pages) +
1164 perfc_value(hl2_table_pages) +
1165 perfc_value(snapshot_pages) +
1166 perfc_value(writable_pte_predictions)
1167 ) - live;
1168 #ifdef PERF_COUNTERS
1169 if ( (abs < -1) || (abs > 1) )
1171 printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
1172 live, free,
1173 perfc_value(shadow_l1_pages),
1174 perfc_value(shadow_l2_pages),
1175 perfc_value(hl2_table_pages),
1176 perfc_value(snapshot_pages),
1177 perfc_value(writable_pte_predictions));
1178 BUG();
1180 #endif
1182 // XXX ought to add some code to audit the out-of-sync entries, too.
1183 //
1185 #else
1186 #define shadow_audit(p, print) ((void)0)
1187 #endif
1190 static inline struct shadow_status *hash_bucket(
1191 struct domain *d, unsigned int gpfn)
1193 return &d->arch.shadow_ht[gpfn % shadow_ht_buckets];
1197 /*
1198 * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace,
1199 * which, depending on full shadow mode, may or may not equal
1200 * its mfn).
1201 * It returns the shadow's mfn, or zero if it doesn't exist.
1202 */
1203 static inline unsigned long __shadow_status(
1204 struct domain *d, unsigned long gpfn, unsigned long stype)
1206 struct shadow_status *p, *x, *head;
1207 shadow_key_t key;
1208 #if CONFIG_PAGING_LEVELS >= 3
1209 if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == PGT_l4_shadow )
1210 key = gpfn | stype | index_to_key(get_cr3_idxval(current));
1211 else
1212 #endif
1213 key = gpfn | stype;
1215 ASSERT(shadow_lock_is_acquired(d));
1216 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1217 ASSERT(stype && !(stype & ~PGT_type_mask));
1219 perfc_incrc(shadow_status_calls);
1221 x = head = hash_bucket(d, gpfn);
1222 p = NULL;
1224 shadow_audit(d, 0);
1226 do
1228 ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL)));
1230 if ( x->gpfn_and_flags == key )
1232 #if SHADOW_DEBUG
1233 if ( unlikely(shadow_status_noswap) )
1234 return x->smfn;
1235 #endif
1236 /* Pull-to-front if 'x' isn't already the head item. */
1237 if ( unlikely(x != head) )
1239 /* Delete 'x' from list and reinsert immediately after head. */
1240 p->next = x->next;
1241 x->next = head->next;
1242 head->next = x;
1244 /* Swap 'x' contents with head contents. */
1245 SWAP(head->gpfn_and_flags, x->gpfn_and_flags);
1246 SWAP(head->smfn, x->smfn);
1248 else
1250 perfc_incrc(shadow_status_hit_head);
1253 return head->smfn;
1256 p = x;
1257 x = x->next;
1259 while ( x != NULL );
1261 perfc_incrc(shadow_status_miss);
1262 return 0;
1265 /*
1266 * Not clear if pull-to-front is worth while for this or not,
1267 * as it generally needs to scan the entire bucket anyway.
1268 * Much simpler without.
1270 * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
1271 */
1272 static inline u32
1273 shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
1274 unsigned long *smfn)
1276 struct shadow_status *x;
1277 u32 pttype = PGT_none, type;
1279 ASSERT(shadow_lock_is_acquired(d));
1280 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1282 perfc_incrc(shadow_max_type);
1284 x = hash_bucket(d, gpfn);
1286 while ( x && x->gpfn_and_flags )
1288 if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn )
1290 type = x->gpfn_and_flags & PGT_type_mask;
1292 switch ( type )
1294 case PGT_hl2_shadow:
1295 // Treat an HL2 as if it's an L1
1296 //
1297 type = PGT_l1_shadow;
1298 break;
1299 case PGT_snapshot:
1300 case PGT_writable_pred:
1301 // Ignore snapshots -- they don't in and of themselves constitute
1302 // treating a page as a page table
1303 //
1304 goto next;
1305 case PGT_base_page_table:
1306 // Early exit if we found the max possible value
1307 //
1308 return type;
1309 default:
1310 break;
1313 if ( type > pttype )
1315 pttype = type;
1316 if ( smfn )
1317 *smfn = x->smfn;
1320 next:
1321 x = x->next;
1324 return pttype;
1327 static inline void delete_shadow_status(
1328 struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype, u64 index)
1330 struct shadow_status *p, *x, *n, *head;
1332 shadow_key_t key = gpfn | stype | index_to_key(index);
1334 ASSERT(shadow_lock_is_acquired(d));
1335 ASSERT(!(gpfn & ~PGT_mfn_mask));
1336 ASSERT(stype && !(stype & ~PGT_type_mask));
1338 head = hash_bucket(d, gpfn);
1340 SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head);
1341 shadow_audit(d, 0);
1343 /* Match on head item? */
1344 if ( head->gpfn_and_flags == key )
1346 if ( (n = head->next) != NULL )
1348 /* Overwrite head with contents of following node. */
1349 head->gpfn_and_flags = n->gpfn_and_flags;
1350 head->smfn = n->smfn;
1352 /* Delete following node. */
1353 head->next = n->next;
1355 /* Add deleted node to the free list. */
1356 n->gpfn_and_flags = 0;
1357 n->smfn = 0;
1358 n->next = d->arch.shadow_ht_free;
1359 d->arch.shadow_ht_free = n;
1361 else
1363 /* This bucket is now empty. Initialise the head node. */
1364 head->gpfn_and_flags = 0;
1365 head->smfn = 0;
1368 goto found;
1371 p = head;
1372 x = head->next;
1374 do
1376 if ( x->gpfn_and_flags == key )
1378 /* Delete matching node. */
1379 p->next = x->next;
1381 /* Add deleted node to the free list. */
1382 x->gpfn_and_flags = 0;
1383 x->smfn = 0;
1384 x->next = d->arch.shadow_ht_free;
1385 d->arch.shadow_ht_free = x;
1387 goto found;
1390 p = x;
1391 x = x->next;
1393 while ( x != NULL );
1395 /* If we got here, it wasn't in the list! */
1396 BUG();
1398 found:
1399 // release ref to page
1400 if ( stype != PGT_writable_pred )
1401 put_page(mfn_to_page(gmfn));
1403 shadow_audit(d, 0);
1406 static inline void set_shadow_status(
1407 struct domain *d, unsigned long gpfn, unsigned long gmfn,
1408 unsigned long smfn, unsigned long stype, u64 index)
1410 struct shadow_status *x, *head, *extra;
1411 int i;
1413 shadow_key_t key = gpfn | stype | index_to_key(index);
1415 SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
1417 ASSERT(shadow_lock_is_acquired(d));
1419 ASSERT(shadow_mode_translate(d) || gpfn);
1420 ASSERT(!(gpfn & ~PGT_mfn_mask));
1422 // XXX - need to be more graceful.
1423 ASSERT(VALID_MFN(gmfn));
1425 ASSERT(stype && !(stype & ~PGT_type_mask));
1427 x = head = hash_bucket(d, gpfn);
1429 SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
1430 gpfn, smfn, stype, x, x->next);
1431 shadow_audit(d, 0);
1433 // grab a reference to the guest page to represent the entry in the shadow
1434 // hash table
1435 //
1436 // XXX - Should PGT_writable_pred grab a page ref?
1437 // - Who/how are these hash table entry refs flushed if/when a page
1438 // is given away by the domain?
1439 //
1440 if ( stype != PGT_writable_pred )
1441 get_page(mfn_to_page(gmfn), d);
1443 /*
1444 * STEP 1. If page is already in the table, update it in place.
1445 */
1446 do
1448 if ( unlikely(x->gpfn_and_flags == key) )
1450 if ( stype != PGT_writable_pred )
1451 BUG(); // we should never replace entries into the hash table
1452 x->smfn = smfn;
1453 if ( stype != PGT_writable_pred )
1454 put_page(mfn_to_page(gmfn)); // already had a ref...
1455 goto done;
1458 x = x->next;
1460 while ( x != NULL );
1462 /*
1463 * STEP 2. The page must be inserted into the table.
1464 */
1466 /* If the bucket is empty then insert the new page as the head item. */
1467 if ( head->gpfn_and_flags == 0 )
1469 head->gpfn_and_flags = key;
1470 head->smfn = smfn;
1471 ASSERT(head->next == NULL);
1472 goto done;
1475 /* We need to allocate a new node. Ensure the quicklist is non-empty. */
1476 if ( unlikely(d->arch.shadow_ht_free == NULL) )
1478 SH_VLOG("Allocate more shadow hashtable blocks.");
1480 extra = xmalloc_bytes(
1481 sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1483 /* XXX Should be more graceful here. */
1484 if ( extra == NULL )
1485 BUG();
1487 memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1489 /* Record the allocation block so it can be correctly freed later. */
1490 d->arch.shadow_extras_count++;
1491 *((struct shadow_status **)&extra[shadow_ht_extra_size]) =
1492 d->arch.shadow_ht_extras;
1493 d->arch.shadow_ht_extras = &extra[0];
1495 /* Thread a free chain through the newly-allocated nodes. */
1496 for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
1497 extra[i].next = &extra[i+1];
1498 extra[i].next = NULL;
1500 /* Add the new nodes to the free list. */
1501 d->arch.shadow_ht_free = &extra[0];
1504 /* Allocate a new node from the quicklist. */
1505 x = d->arch.shadow_ht_free;
1506 d->arch.shadow_ht_free = x->next;
1508 /* Initialise the new node and insert directly after the head item. */
1509 x->gpfn_and_flags = key;
1510 x->smfn = smfn;
1511 x->next = head->next;
1512 head->next = x;
1514 done:
1515 shadow_audit(d, 0);
1517 if ( stype <= PGT_l4_shadow )
1519 // add to front of list of pages to check when removing write
1520 // permissions for a page...
1521 //
1525 /************************************************************************/
1527 static inline void guest_physmap_add_page(
1528 struct domain *d, unsigned long gpfn, unsigned long mfn)
1530 struct domain_mmap_cache c1, c2;
1532 if ( likely(!shadow_mode_translate(d)) )
1533 return;
1535 domain_mmap_cache_init(&c1);
1536 domain_mmap_cache_init(&c2);
1537 shadow_lock(d);
1538 shadow_sync_and_drop_references(d, mfn_to_page(mfn));
1539 set_p2m_entry(d, gpfn, mfn, &c1, &c2);
1540 set_gpfn_from_mfn(mfn, gpfn);
1541 shadow_unlock(d);
1542 domain_mmap_cache_destroy(&c1);
1543 domain_mmap_cache_destroy(&c2);
1546 static inline void guest_physmap_remove_page(
1547 struct domain *d, unsigned long gpfn, unsigned long mfn)
1549 struct domain_mmap_cache c1, c2;
1550 unsigned long type;
1552 if ( likely(!shadow_mode_translate(d)) )
1553 return;
1555 domain_mmap_cache_init(&c1);
1556 domain_mmap_cache_init(&c2);
1557 shadow_lock(d);
1558 shadow_sync_and_drop_references(d, mfn_to_page(mfn));
1559 while ( (type = shadow_max_pgtable_type(d, gpfn, NULL)) != PGT_none )
1560 free_shadow_page(__shadow_status(d, gpfn, type));
1561 set_p2m_entry(d, gpfn, -1, &c1, &c2);
1562 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1563 shadow_unlock(d);
1564 domain_mmap_cache_destroy(&c1);
1565 domain_mmap_cache_destroy(&c2);
1568 /************************************************************************/
1570 void static inline
1571 shadow_update_min_max(unsigned long smfn, int index)
1573 struct page_info *sl1page = mfn_to_page(smfn);
1574 u32 min_max = sl1page->tlbflush_timestamp;
1575 int min = SHADOW_MIN(min_max);
1576 int max = SHADOW_MAX(min_max);
1577 int update = 0;
1579 if ( index < min )
1581 min = index;
1582 update = 1;
1584 if ( index > max )
1586 max = index;
1587 update = 1;
1589 if ( update )
1590 sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
1593 #if CONFIG_PAGING_LEVELS <= 2
1594 extern void shadow_map_l1_into_current_l2(unsigned long va);
1596 void static inline
1597 shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
1599 struct vcpu *v = current;
1600 struct domain *d = v->domain;
1601 l2_pgentry_t sl2e = {0};
1603 __shadow_get_l2e(v, va, &sl2e);
1604 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1606 /*
1607 * Either the L1 is not shadowed, or the shadow isn't linked into
1608 * the current shadow L2.
1609 */
1610 if ( create_l1_shadow )
1612 perfc_incrc(shadow_set_l1e_force_map);
1613 shadow_map_l1_into_current_l2(va);
1615 else /* check to see if it exists; if so, link it in */
1617 l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
1618 unsigned long gl1pfn = l2e_get_pfn(gpde);
1619 unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
1621 ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
1623 if ( sl1mfn )
1625 perfc_incrc(shadow_set_l1e_unlinked);
1626 if ( !get_shadow_ref(sl1mfn) )
1627 BUG();
1628 l2pde_general(d, &gpde, &sl2e, sl1mfn);
1629 __guest_set_l2e(v, va, gpde);
1630 __shadow_set_l2e(v, va, sl2e);
1632 else
1634 // no shadow exists, so there's nothing to do.
1635 perfc_incrc(shadow_set_l1e_fail);
1636 return;
1641 __shadow_get_l2e(v, va, &sl2e);
1643 if ( shadow_mode_refcounts(d) )
1645 l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
1647 // only do the ref counting if something important changed.
1648 //
1649 if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
1651 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
1652 !shadow_get_page_from_l1e(new_spte, d) )
1653 new_spte = l1e_empty();
1654 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
1655 shadow_put_page_from_l1e(old_spte, d);
1660 set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
1661 shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
1662 shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
1664 #endif
1665 /************************************************************************/
1667 static inline int
1668 shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn)
1670 struct vcpu *v = current;
1671 struct domain *d = v->domain;
1672 unsigned long mfn = gmfn_to_mfn(d, gpfn);
1673 u32 type = mfn_to_page(mfn)->u.inuse.type_info & PGT_type_mask;
1675 if ( shadow_mode_refcounts(d) &&
1676 (type == PGT_writable_page) )
1677 type = shadow_max_pgtable_type(d, gpfn, NULL);
1679 // Strange but true: writable page tables allow kernel-mode access
1680 // to L1 page table pages via write-protected PTEs... Similarly, write
1681 // access to all page table pages is granted for shadow_mode_write_all
1682 // clients.
1683 //
1684 if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
1685 (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
1686 ((va < HYPERVISOR_VIRT_START)
1687 #if defined(__x86_64__)
1688 || (va >= HYPERVISOR_VIRT_END)
1689 #endif
1690 ) &&
1691 guest_kernel_mode(v, regs) )
1692 return 1;
1694 return 0;
1697 #if CONFIG_PAGING_LEVELS <= 2
1698 static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
1700 l2_pgentry_t gpde;
1701 l1_pgentry_t gpte;
1702 struct vcpu *v = current;
1704 ASSERT( shadow_mode_translate(current->domain) );
1706 __guest_get_l2e(v, gva, &gpde);
1707 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
1708 return l1e_empty();;
1710 // This is actually overkill - we only need to make sure the hl2
1711 // is in-sync.
1712 //
1713 shadow_sync_va(v, gva);
1715 if ( unlikely(__copy_from_user(&gpte,
1716 &linear_pg_table[gva >> PAGE_SHIFT],
1717 sizeof(gpte))) )
1719 FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva);
1720 return l1e_empty();
1723 return gpte;
1726 static inline unsigned long gva_to_gpa(unsigned long gva)
1728 l1_pgentry_t gpte;
1730 gpte = gva_to_gpte(gva);
1731 if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
1732 return 0;
1734 return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK);
1736 #endif
1738 static inline unsigned long gva_to_mfn(unsigned long gva)
1740 unsigned long gpa = gva_to_gpa(gva);
1741 return get_mfn_from_gpfn(gpa >> PAGE_SHIFT);
1744 /************************************************************************/
1746 extern void __update_pagetables(struct vcpu *v);
1747 static inline void update_pagetables(struct vcpu *v)
1749 struct domain *d = v->domain;
1750 int paging_enabled;
1752 if ( hvm_guest(v) )
1753 paging_enabled = hvm_paging_enabled(v);
1754 else
1755 // HACK ALERT: there's currently no easy way to figure out if a domU
1756 // has set its arch.guest_table to zero, vs not yet initialized it.
1757 //
1758 paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
1760 /*
1761 * We don't call __update_pagetables() when hvm guest paging is
1762 * disabled as we want the linear_pg_table to be inaccessible so that
1763 * we bail out early of shadow_fault() if the hvm guest tries illegal
1764 * accesses while it thinks paging is turned off.
1765 */
1766 if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
1768 shadow_lock(d);
1769 __update_pagetables(v);
1770 shadow_unlock(d);
1773 if ( likely(!shadow_mode_external(d)) )
1775 if ( shadow_mode_enabled(d) )
1776 v->arch.monitor_table = v->arch.shadow_table;
1777 else
1778 #if CONFIG_PAGING_LEVELS == 4
1779 if ( !(v->arch.flags & TF_kernel_mode) )
1780 v->arch.monitor_table = v->arch.guest_table_user;
1781 else
1782 #endif
1783 v->arch.monitor_table = v->arch.guest_table;
1787 void clear_all_shadow_status(struct domain *d);
1789 #if SHADOW_DEBUG
1790 extern int _check_pagetable(struct vcpu *v, char *s);
1791 extern int _check_all_pagetables(struct vcpu *v, char *s);
1793 #define check_pagetable(_v, _s) _check_pagetable(_v, _s)
1794 //#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s)
1796 #else
1797 #define check_pagetable(_v, _s) ((void)0)
1798 #endif
1800 #endif /* XEN_SHADOW_H */
1802 /*
1803 * Local variables:
1804 * mode: C
1805 * c-set-style: "BSD"
1806 * c-basic-offset: 4
1807 * tab-width: 4
1808 * indent-tabs-mode: nil
1809 * End:
1810 */