ia64/xen-unstable

view xen/include/asm-x86/shadow.h @ 9520:4c2d101a3228

Re-arrange code for followup patch and remove extra shadow_lock in function which is only called with the lock already held.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Wed Mar 29 16:47:46 2006 +0100 (2006-03-29)
parents 94b10faa7577
children d102a30417a7
line source
1 /******************************************************************************
2 * include/asm-x86/shadow.h
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 #ifndef _XEN_SHADOW_H
23 #define _XEN_SHADOW_H
25 #include <xen/config.h>
26 #include <xen/types.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/mm.h>
30 #include <xen/domain_page.h>
31 #include <asm/current.h>
32 #include <asm/flushtlb.h>
33 #include <asm/processor.h>
34 #include <asm/hvm/hvm.h>
35 #include <asm/hvm/support.h>
36 #include <asm/regs.h>
37 #include <public/dom0_ops.h>
38 #include <asm/shadow_public.h>
39 #include <asm/page-guest32.h>
40 #include <asm/shadow_ops.h>
42 /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
44 #define SHM_enable (1<<0) /* we're in one of the shadow modes */
45 #define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
46 guest tables */
47 #define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
48 regardless of pte write permissions */
49 #define SHM_log_dirty (1<<3) /* enable log dirty mode */
50 #define SHM_translate (1<<4) /* Xen does p2m translation, not guest */
51 #define SHM_external (1<<5) /* Xen does not steal address space from the
52 domain for its own booking; requires VT or
53 similar mechanisms */
54 #define SHM_wr_pt_pte (1<<6) /* guest allowed to set PAGE_RW bit in PTEs which
55 point to page table pages. */
57 #define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
58 #define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
59 #define shadow_mode_write_l1(_d) (VM_ASSIST(_d, VMASST_TYPE_writable_pagetables))
60 #define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
61 #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
62 #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
63 #define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
64 #define shadow_mode_wr_pt_pte(_d) ((_d)->arch.shadow_mode & SHM_wr_pt_pte)
66 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
67 #define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
68 (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
69 #define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable)
71 // easy access to the hl2 table (for translated but not external modes only)
72 #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
73 (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
75 /*
76 * For now we use the per-domain BIGLOCK rather than a shadow-specific lock.
77 * We usually have the BIGLOCK already acquired anyway, so this is unlikely
78 * to cause much unnecessary extra serialisation. Also it's a recursive
79 * lock, and there are some code paths containing nested shadow_lock().
80 * The #if0'ed code below is therefore broken until such nesting is removed.
81 */
82 #if 0
83 #define shadow_lock_init(_d) \
84 spin_lock_init(&(_d)->arch.shadow_lock)
85 #define shadow_lock_is_acquired(_d) \
86 spin_is_locked(&(_d)->arch.shadow_lock)
87 #define shadow_lock(_d) \
88 do { \
89 ASSERT(!shadow_lock_is_acquired(_d)); \
90 spin_lock(&(_d)->arch.shadow_lock); \
91 } while (0)
92 #define shadow_unlock(_d) \
93 do { \
94 ASSERT(!shadow_lock_is_acquired(_d)); \
95 spin_unlock(&(_d)->arch.shadow_lock); \
96 } while (0)
97 #else
98 #define shadow_lock_init(_d) \
99 ((_d)->arch.shadow_nest = 0)
100 #define shadow_lock_is_acquired(_d) \
101 (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0))
102 #define shadow_lock(_d) \
103 do { \
104 LOCK_BIGLOCK(_d); \
105 (_d)->arch.shadow_nest++; \
106 } while (0)
107 #define shadow_unlock(_d) \
108 do { \
109 ASSERT(shadow_lock_is_acquired(_d)); \
110 (_d)->arch.shadow_nest--; \
111 UNLOCK_BIGLOCK(_d); \
112 } while (0)
113 #endif
115 #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
116 #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
117 #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
118 extern void shadow_direct_map_clean(struct domain *d);
119 extern int shadow_direct_map_init(struct domain *d);
120 extern int shadow_direct_map_fault(
121 unsigned long vpa, struct cpu_user_regs *regs);
122 extern void shadow_mode_init(void);
123 extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
124 extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
125 extern int shadow_mode_enable(struct domain *p, unsigned int mode);
126 extern void shadow_invlpg(struct vcpu *, unsigned long);
127 extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync(
128 struct vcpu *v, unsigned long gpfn, unsigned long mfn);
129 extern void free_monitor_pagetable(struct vcpu *v);
130 extern void __shadow_sync_all(struct domain *d);
131 extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va);
132 extern int set_p2m_entry(
133 struct domain *d, unsigned long pfn, unsigned long mfn,
134 struct domain_mmap_cache *l2cache,
135 struct domain_mmap_cache *l1cache);
136 extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
138 extern void free_shadow_page(unsigned long smfn);
140 extern void shadow_l1_normal_pt_update(struct domain *d,
141 paddr_t pa, l1_pgentry_t l1e,
142 struct domain_mmap_cache *cache);
143 extern void shadow_l2_normal_pt_update(struct domain *d,
144 paddr_t pa, l2_pgentry_t l2e,
145 struct domain_mmap_cache *cache);
146 #if CONFIG_PAGING_LEVELS >= 3
147 #include <asm/page-guest32.h>
148 /*
149 * va_mask cannot be used because it's used by the shadow hash.
150 * Use the score area for for now.
151 */
152 #define is_xen_l2_slot(t,s) \
153 ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \
154 ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
156 extern unsigned long gva_to_gpa(unsigned long gva);
157 extern void shadow_l3_normal_pt_update(struct domain *d,
158 paddr_t pa, l3_pgentry_t l3e,
159 struct domain_mmap_cache *cache);
160 #endif
161 #if CONFIG_PAGING_LEVELS >= 4
162 extern void shadow_l4_normal_pt_update(struct domain *d,
163 paddr_t pa, l4_pgentry_t l4e,
164 struct domain_mmap_cache *cache);
165 #endif
166 extern int shadow_do_update_va_mapping(unsigned long va,
167 l1_pgentry_t val,
168 struct vcpu *v);
171 static inline unsigned long __shadow_status(
172 struct domain *d, unsigned long gpfn, unsigned long stype);
174 #if CONFIG_PAGING_LEVELS <= 2
175 static inline void update_hl2e(struct vcpu *v, unsigned long va);
176 #endif
178 static inline int page_is_page_table(struct page_info *page)
179 {
180 struct domain *owner = page_get_owner(page);
181 u32 type_info;
183 if ( owner && shadow_mode_refcounts(owner) )
184 return page->count_info & PGC_page_table;
186 type_info = page->u.inuse.type_info & PGT_type_mask;
187 return type_info && (type_info <= PGT_l4_page_table);
188 }
190 static inline int mfn_is_page_table(unsigned long mfn)
191 {
192 if ( !mfn_valid(mfn) )
193 return 0;
195 return page_is_page_table(mfn_to_page(mfn));
196 }
198 static inline int page_out_of_sync(struct page_info *page)
199 {
200 return page->count_info & PGC_out_of_sync;
201 }
203 static inline int mfn_out_of_sync(unsigned long mfn)
204 {
205 if ( !mfn_valid(mfn) )
206 return 0;
208 return page_out_of_sync(mfn_to_page(mfn));
209 }
212 /************************************************************************/
214 static void inline
215 __shadow_sync_mfn(struct domain *d, unsigned long mfn)
216 {
217 if ( d->arch.out_of_sync )
218 {
219 // XXX - could be smarter
220 //
221 __shadow_sync_all(d);
222 }
223 }
225 static void inline
226 __shadow_sync_va(struct vcpu *v, unsigned long va)
227 {
228 struct domain *d = v->domain;
230 if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) )
231 {
232 perfc_incrc(shadow_sync_va);
234 // XXX - could be smarter
235 //
236 __shadow_sync_all(v->domain);
237 }
238 #if CONFIG_PAGING_LEVELS <= 2
239 // Also make sure the HL2 is up-to-date for this address.
240 //
241 if ( unlikely(shadow_mode_translate(v->domain)) )
242 update_hl2e(v, va);
243 #endif
244 }
246 static void inline
247 shadow_sync_all(struct domain *d)
248 {
249 if ( unlikely(shadow_mode_enabled(d)) )
250 {
251 shadow_lock(d);
253 if ( d->arch.out_of_sync )
254 __shadow_sync_all(d);
256 ASSERT(d->arch.out_of_sync == NULL);
258 shadow_unlock(d);
259 }
260 }
262 // SMP BUG: This routine can't ever be used properly in an SMP context.
263 // It should be something like get_shadow_and_sync_va().
264 // This probably shouldn't exist.
265 //
266 static void inline
267 shadow_sync_va(struct vcpu *v, unsigned long gva)
268 {
269 struct domain *d = v->domain;
270 if ( unlikely(shadow_mode_enabled(d)) )
271 {
272 shadow_lock(d);
273 __shadow_sync_va(v, gva);
274 shadow_unlock(d);
275 }
276 }
278 extern void __shadow_mode_disable(struct domain *d);
279 static inline void shadow_mode_disable(struct domain *d)
280 {
281 if ( unlikely(shadow_mode_enabled(d)) )
282 {
283 shadow_lock(d);
284 __shadow_mode_disable(d);
285 shadow_unlock(d);
286 }
287 }
289 /************************************************************************/
291 #define mfn_to_gmfn(_d, mfn) \
292 ( (shadow_mode_translate(_d)) \
293 ? get_gpfn_from_mfn(mfn) \
294 : (mfn) )
296 #define gmfn_to_mfn(_d, gpfn) \
297 ({ \
298 unlikely(shadow_mode_translate(_d)) \
299 ? (likely(current->domain == (_d)) \
300 ? get_mfn_from_gpfn(gpfn) \
301 : get_mfn_from_gpfn_foreign(_d, gpfn)) \
302 : (gpfn); \
303 })
305 extern unsigned long get_mfn_from_gpfn_foreign(
306 struct domain *d, unsigned long gpfn);
308 /************************************************************************/
310 struct shadow_status {
311 struct shadow_status *next; /* Pull-to-front list per hash bucket. */
312 unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
313 unsigned long smfn; /* Shadow mfn. */
314 };
316 #define shadow_ht_extra_size 128
317 #define shadow_ht_buckets 256
319 struct out_of_sync_entry {
320 struct out_of_sync_entry *next;
321 struct vcpu *v;
322 unsigned long gpfn; /* why is this here? */
323 unsigned long gmfn;
324 unsigned long snapshot_mfn;
325 paddr_t writable_pl1e; /* NB: this is a machine address */
326 unsigned long va;
327 };
329 #define out_of_sync_extra_size 127
331 #define SHADOW_SNAPSHOT_ELSEWHERE (-1L)
333 /************************************************************************/
334 #define SHADOW_DEBUG 0
335 #define SHADOW_VERBOSE_DEBUG 0
336 #define SHADOW_VVERBOSE_DEBUG 0
337 #define SHADOW_VVVERBOSE_DEBUG 0
338 #define SHADOW_HASH_DEBUG 0
339 #define FULLSHADOW_DEBUG 0
341 #if SHADOW_DEBUG
342 extern int shadow_status_noswap;
343 #define SHADOW_REFLECTS_SNAPSHOT _PAGE_AVAIL0
344 #endif
346 #if SHADOW_VERBOSE_DEBUG
347 #define SH_LOG(_f, _a...) \
348 printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
349 current->domain->domain_id , smp_processor_id(), __LINE__ , ## _a )
350 #define SH_VLOG(_f, _a...) \
351 printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
352 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
353 #else
354 #define SH_LOG(_f, _a...) ((void)0)
355 #define SH_VLOG(_f, _a...) ((void)0)
356 #endif
358 #if SHADOW_VVERBOSE_DEBUG
359 #define SH_VVLOG(_f, _a...) \
360 printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
361 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
362 #else
363 #define SH_VVLOG(_f, _a...) ((void)0)
364 #endif
366 #if SHADOW_VVVERBOSE_DEBUG
367 #define SH_VVVLOG(_f, _a...) \
368 printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
369 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
370 #else
371 #define SH_VVVLOG(_f, _a...) ((void)0)
372 #endif
374 #if FULLSHADOW_DEBUG
375 #define FSH_LOG(_f, _a...) \
376 printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
377 current->domain->domain_id, smp_processor_id(), __LINE__ , ## _a )
378 #else
379 #define FSH_LOG(_f, _a...) ((void)0)
380 #endif
383 /************************************************************************/
385 static inline int
386 shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
387 {
388 l1_pgentry_t nl1e;
389 int res;
390 unsigned long mfn;
391 struct domain *owner;
393 ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
395 if ( !shadow_mode_refcounts(d) )
396 return 1;
398 nl1e = l1e;
399 l1e_remove_flags(nl1e, _PAGE_GLOBAL);
401 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
402 return 0;
404 res = get_page_from_l1e(nl1e, d);
406 if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
407 !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
408 (mfn = l1e_get_pfn(nl1e)) &&
409 mfn_valid(mfn) &&
410 (owner = page_get_owner(mfn_to_page(mfn))) &&
411 (d != owner) )
412 {
413 res = get_page_from_l1e(nl1e, owner);
414 printk("tried to map mfn %lx from domain %d into shadow page tables "
415 "of domain %d; %s\n",
416 mfn, owner->domain_id, d->domain_id,
417 res ? "success" : "failed");
418 }
420 if ( unlikely(!res) )
421 {
422 perfc_incrc(shadow_get_page_fail);
423 FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n",
424 __func__, l1e_get_intpte(l1e));
425 }
427 return res;
428 }
430 static inline void
431 shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
432 {
433 if ( !shadow_mode_refcounts(d) )
434 return;
436 put_page_from_l1e(l1e, d);
437 }
439 static inline void
440 shadow_put_page_type(struct domain *d, struct page_info *page)
441 {
442 if ( !shadow_mode_refcounts(d) )
443 return;
445 put_page_type(page);
446 }
448 static inline int shadow_get_page(struct domain *d,
449 struct page_info *page,
450 struct domain *owner)
451 {
452 if ( !shadow_mode_refcounts(d) )
453 return 1;
454 return get_page(page, owner);
455 }
457 static inline void shadow_put_page(struct domain *d,
458 struct page_info *page)
459 {
460 if ( !shadow_mode_refcounts(d) )
461 return;
462 put_page(page);
463 }
465 /************************************************************************/
467 static inline void __mark_dirty(struct domain *d, unsigned long mfn)
468 {
469 unsigned long pfn;
471 ASSERT(shadow_lock_is_acquired(d));
473 if ( likely(!shadow_mode_log_dirty(d)) || !VALID_MFN(mfn) )
474 return;
476 ASSERT(d->arch.shadow_dirty_bitmap != NULL);
478 /* We /really/ mean PFN here, even for non-translated guests. */
479 pfn = get_gpfn_from_mfn(mfn);
481 /*
482 * Values with the MSB set denote MFNs that aren't really part of the
483 * domain's pseudo-physical memory map (e.g., the shared info frame).
484 * Nothing to do here...
485 */
486 if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
487 return;
489 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
490 if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) &&
491 !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
492 {
493 d->arch.shadow_dirty_count++;
494 }
495 #ifndef NDEBUG
496 else if ( mfn_valid(mfn) )
497 {
498 SH_VLOG("mark_dirty OOR! mfn=%lx pfn=%lx max=%x (dom %p)",
499 mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
500 SH_VLOG("dom=%p caf=%08x taf=%" PRtype_info,
501 page_get_owner(mfn_to_page(mfn)),
502 mfn_to_page(mfn)->count_info,
503 mfn_to_page(mfn)->u.inuse.type_info );
504 }
505 #endif
506 }
509 static inline void mark_dirty(struct domain *d, unsigned int mfn)
510 {
511 if ( unlikely(shadow_mode_log_dirty(d)) )
512 {
513 shadow_lock(d);
514 __mark_dirty(d, mfn);
515 shadow_unlock(d);
516 }
517 }
520 /************************************************************************/
521 #if CONFIG_PAGING_LEVELS <= 2
522 static inline void
523 __shadow_get_l2e(
524 struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
525 {
526 ASSERT(shadow_mode_enabled(v->domain));
528 *psl2e = v->arch.shadow_vtable[l2_table_offset(va)];
529 }
531 static inline void
532 __shadow_set_l2e(
533 struct vcpu *v, unsigned long va, l2_pgentry_t value)
534 {
535 ASSERT(shadow_mode_enabled(v->domain));
537 v->arch.shadow_vtable[l2_table_offset(va)] = value;
538 }
540 static inline void
541 __guest_get_l2e(
542 struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
543 {
544 *pl2e = v->arch.guest_vtable[l2_table_offset(va)];
545 }
547 static inline void
548 __guest_set_l2e(
549 struct vcpu *v, unsigned long va, l2_pgentry_t value)
550 {
551 struct domain *d = v->domain;
553 v->arch.guest_vtable[l2_table_offset(va)] = value;
555 if ( unlikely(shadow_mode_translate(d)) )
556 update_hl2e(v, va);
558 __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table));
559 }
561 static inline void
562 __direct_get_l2e(
563 struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
564 {
565 l2_pgentry_t *phys_vtable;
567 ASSERT(shadow_mode_enabled(v->domain));
569 phys_vtable = map_domain_page(
570 pagetable_get_pfn(v->domain->arch.phys_table));
572 *psl2e = phys_vtable[l2_table_offset(va)];
574 unmap_domain_page(phys_vtable);
575 }
577 static inline void
578 __direct_set_l2e(
579 struct vcpu *v, unsigned long va, l2_pgentry_t value)
580 {
581 l2_pgentry_t *phys_vtable;
583 ASSERT(shadow_mode_enabled(v->domain));
585 phys_vtable = map_domain_page(
586 pagetable_get_pfn(v->domain->arch.phys_table));
588 phys_vtable[l2_table_offset(va)] = value;
590 unmap_domain_page(phys_vtable);
591 }
593 static inline void
594 update_hl2e(struct vcpu *v, unsigned long va)
595 {
596 int index = l2_table_offset(va);
597 unsigned long mfn;
598 l2_pgentry_t gl2e = v->arch.guest_vtable[index];
599 l1_pgentry_t old_hl2e, new_hl2e;
600 int need_flush = 0;
602 ASSERT(shadow_mode_translate(v->domain));
604 old_hl2e = v->arch.hl2_vtable[index];
606 if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
607 VALID_MFN(mfn = get_mfn_from_gpfn(l2e_get_pfn(gl2e))) )
608 new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
609 else
610 new_hl2e = l1e_empty();
612 // only do the ref counting if something has changed.
613 //
614 if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) )
615 {
616 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
617 !shadow_get_page(v->domain, mfn_to_page(l1e_get_pfn(new_hl2e)),
618 v->domain) )
619 new_hl2e = l1e_empty();
620 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
621 {
622 shadow_put_page(v->domain, mfn_to_page(l1e_get_pfn(old_hl2e)));
623 need_flush = 1;
624 }
626 v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
628 if ( need_flush )
629 {
630 perfc_incrc(update_hl2e_invlpg);
631 flush_tlb_one_mask(v->domain->domain_dirty_cpumask,
632 &linear_pg_table[l1_linear_offset(va)]);
633 }
634 }
635 }
637 static inline void shadow_drop_references(
638 struct domain *d, struct page_info *page)
639 {
640 if ( likely(!shadow_mode_refcounts(d)) ||
641 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
642 return;
644 /* XXX This needs more thought... */
645 printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
646 __func__, page_to_mfn(page));
647 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
648 page->count_info, page->u.inuse.type_info);
650 shadow_lock(d);
651 shadow_remove_all_access(d, page_to_mfn(page));
652 shadow_unlock(d);
654 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_mfn(page),
655 page->count_info, page->u.inuse.type_info);
656 }
658 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
659 static inline void shadow_sync_and_drop_references(
660 struct domain *d, struct page_info *page)
661 {
662 if ( likely(!shadow_mode_refcounts(d)) )
663 return;
665 if ( page_out_of_sync(page) )
666 __shadow_sync_mfn(d, page_to_mfn(page));
668 shadow_remove_all_access(d, page_to_mfn(page));
669 }
670 #endif
672 /************************************************************************/
674 /*
675 * Add another shadow reference to smfn.
676 */
677 static inline int
678 get_shadow_ref(unsigned long smfn)
679 {
680 u32 x, nx;
682 ASSERT(mfn_valid(smfn));
684 x = mfn_to_page(smfn)->count_info;
685 nx = x + 1;
687 if ( unlikely(nx == 0) )
688 {
689 printk("get_shadow_ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
690 mfn_to_page(smfn)->u.inuse.type_info & PGT_mfn_mask,
691 smfn);
692 BUG();
693 }
695 // Guarded by the shadow lock...
696 //
697 mfn_to_page(smfn)->count_info = nx;
699 return 1;
700 }
702 /*
703 * Drop a shadow reference to smfn.
704 */
705 static inline void
706 put_shadow_ref(unsigned long smfn)
707 {
708 u32 x, nx;
710 ASSERT(mfn_valid(smfn));
712 x = mfn_to_page(smfn)->count_info;
713 nx = x - 1;
715 if ( unlikely(x == 0) )
716 {
717 printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%"
718 PRtype_info "\n",
719 smfn,
720 mfn_to_page(smfn)->count_info,
721 mfn_to_page(smfn)->u.inuse.type_info);
722 BUG();
723 }
725 // Guarded by the shadow lock...
726 //
727 mfn_to_page(smfn)->count_info = nx;
729 if ( unlikely(nx == 0) )
730 {
731 free_shadow_page(smfn);
732 }
733 }
735 static inline void
736 shadow_pin(unsigned long smfn)
737 {
738 ASSERT( !(mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
740 mfn_to_page(smfn)->u.inuse.type_info |= PGT_pinned;
741 if ( unlikely(!get_shadow_ref(smfn)) )
742 BUG();
743 }
745 static inline void
746 shadow_unpin(unsigned long smfn)
747 {
748 ASSERT( (mfn_to_page(smfn)->u.inuse.type_info & PGT_pinned) );
750 mfn_to_page(smfn)->u.inuse.type_info &= ~PGT_pinned;
751 put_shadow_ref(smfn);
752 }
754 /*
755 * SMP issue. The following code assumes the shadow lock is held. Re-visit
756 * when working on finer-gained locks for shadow.
757 */
758 static inline void set_guest_back_ptr(
759 struct domain *d, l1_pgentry_t spte, unsigned long smfn, unsigned int index)
760 {
761 if ( shadow_mode_external(d) ) {
762 unsigned long gmfn;
764 ASSERT(shadow_lock_is_acquired(d));
765 gmfn = l1e_get_pfn(spte);
766 mfn_to_page(gmfn)->tlbflush_timestamp = smfn;
767 mfn_to_page(gmfn)->u.inuse.type_info &= ~PGT_va_mask;
768 mfn_to_page(gmfn)->u.inuse.type_info |= (unsigned long) index << PGT_va_shift;
769 }
770 }
772 /************************************************************************/
773 #if CONFIG_PAGING_LEVELS <= 2
774 extern void shadow_mark_va_out_of_sync(
775 struct vcpu *v, unsigned long gpfn, unsigned long mfn,
776 unsigned long va);
778 static inline int l1pte_write_fault(
779 struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
780 unsigned long va)
781 {
782 struct domain *d = v->domain;
783 l1_pgentry_t gpte = *gpte_p;
784 l1_pgentry_t spte;
785 unsigned long gpfn = l1e_get_pfn(gpte);
786 unsigned long gmfn = gmfn_to_mfn(d, gpfn);
788 //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
790 if ( unlikely(!VALID_MFN(gmfn)) )
791 {
792 SH_VLOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
793 *spte_p = l1e_empty();
794 return 0;
795 }
797 ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
798 l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
799 spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
801 SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
802 l1e_get_intpte(spte), l1e_get_intpte(gpte));
804 __mark_dirty(d, gmfn);
806 if ( mfn_is_page_table(gmfn) )
807 shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
809 *gpte_p = gpte;
810 *spte_p = spte;
812 return 1;
813 }
815 static inline int l1pte_read_fault(
816 struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
817 {
818 l1_pgentry_t gpte = *gpte_p;
819 l1_pgentry_t spte = *spte_p;
820 unsigned long pfn = l1e_get_pfn(gpte);
821 unsigned long mfn = gmfn_to_mfn(d, pfn);
823 if ( unlikely(!VALID_MFN(mfn)) )
824 {
825 SH_VLOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
826 *spte_p = l1e_empty();
827 return 0;
828 }
830 l1e_add_flags(gpte, _PAGE_ACCESSED);
831 spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
833 if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
834 mfn_is_page_table(mfn) )
835 {
836 l1e_remove_flags(spte, _PAGE_RW);
837 }
839 SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
840 l1e_get_intpte(spte), l1e_get_intpte(gpte));
841 *gpte_p = gpte;
842 *spte_p = spte;
844 return 1;
845 }
846 #endif
848 static inline void l1pte_propagate_from_guest(
849 struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p)
850 {
851 unsigned long mfn;
852 l1_pgentry_t spte;
854 spte = l1e_empty();
856 if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
857 (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
858 VALID_MFN(mfn = gmfn_to_mfn(d, l1e_get_pfn(gpte))) )
859 {
860 spte = l1e_from_pfn(
861 mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
863 if ( shadow_mode_log_dirty(d) ||
864 !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) ||
865 mfn_is_page_table(mfn) )
866 {
867 l1e_remove_flags(spte, _PAGE_RW);
868 }
869 }
871 if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) )
872 SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte,
873 __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte));
875 *spte_p = spte;
876 }
878 static inline void hl2e_propagate_from_guest(
879 struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p)
880 {
881 unsigned long pfn = l2e_get_pfn(gpde);
882 unsigned long mfn;
883 l1_pgentry_t hl2e;
885 hl2e = l1e_empty();
887 if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
888 {
889 mfn = gmfn_to_mfn(d, pfn);
890 if ( VALID_MFN(mfn) && mfn_valid(mfn) )
891 hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
892 }
894 if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) )
895 SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__,
896 l2e_get_intpte(gpde), l1e_get_intpte(hl2e));
898 *hl2e_p = hl2e;
899 }
901 static inline void l2pde_general(
902 struct domain *d,
903 guest_l2_pgentry_t *gpde_p,
904 l2_pgentry_t *spde_p,
905 unsigned long sl1mfn)
906 {
907 guest_l2_pgentry_t gpde = *gpde_p;
908 l2_pgentry_t spde;
910 spde = l2e_empty();
911 if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
912 {
913 spde = l2e_from_pfn(
914 sl1mfn,
915 (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
917 /* N.B. PDEs do not have a dirty bit. */
918 guest_l2e_add_flags(gpde, _PAGE_ACCESSED);
920 *gpde_p = gpde;
921 }
923 if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
924 SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
925 l2e_get_intpte(gpde), l2e_get_intpte(spde));
927 *spde_p = spde;
928 }
930 static inline void l2pde_propagate_from_guest(
931 struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
932 {
933 guest_l2_pgentry_t gpde = *gpde_p;
934 unsigned long sl1mfn = 0;
936 if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT )
937 sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
938 l2pde_general(d, gpde_p, spde_p, sl1mfn);
939 }
941 /************************************************************************/
943 // returns true if a tlb flush is needed
944 //
945 static int inline
946 validate_pte_change(
947 struct domain *d,
948 guest_l1_pgentry_t new_pte,
949 l1_pgentry_t *shadow_pte_p)
950 {
951 l1_pgentry_t old_spte, new_spte;
952 int need_flush = 0;
954 perfc_incrc(validate_pte_calls);
956 l1pte_propagate_from_guest(d, new_pte, &new_spte);
958 if ( shadow_mode_refcounts(d) )
959 {
960 old_spte = *shadow_pte_p;
962 if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) )
963 {
964 // No accounting required...
965 //
966 perfc_incrc(validate_pte_changes1);
967 }
968 else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) )
969 {
970 // Fast path for PTEs that have merely been write-protected
971 // (e.g., during a Unix fork()). A strict reduction in privilege.
972 //
973 perfc_incrc(validate_pte_changes2);
974 if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
975 shadow_put_page_type(d, mfn_to_page(l1e_get_pfn(new_spte)));
976 }
977 else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
978 _PAGE_PRESENT ) &&
979 l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
980 {
981 // only do the ref counting if something important changed.
982 //
983 perfc_incrc(validate_pte_changes3);
985 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
986 {
987 shadow_put_page_from_l1e(old_spte, d);
988 need_flush = 1;
989 }
990 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
991 !shadow_get_page_from_l1e(new_spte, d) ) {
992 new_spte = l1e_empty();
993 need_flush = -1; /* need to unshadow the page */
994 }
995 }
996 else
997 {
998 perfc_incrc(validate_pte_changes4);
999 }
1002 *shadow_pte_p = new_spte;
1004 return need_flush;
1007 // returns true if a tlb flush is needed
1008 //
1009 static int inline
1010 validate_hl2e_change(
1011 struct domain *d,
1012 l2_pgentry_t new_gpde,
1013 l1_pgentry_t *shadow_hl2e_p)
1015 l1_pgentry_t old_hl2e, new_hl2e;
1016 int need_flush = 0;
1018 perfc_incrc(validate_hl2e_calls);
1020 old_hl2e = *shadow_hl2e_p;
1021 hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
1023 // Only do the ref counting if something important changed.
1024 //
1025 if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
1026 l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
1028 perfc_incrc(validate_hl2e_changes);
1030 if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
1031 !get_page(mfn_to_page(l1e_get_pfn(new_hl2e)), d) )
1032 new_hl2e = l1e_empty();
1033 if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
1035 put_page(mfn_to_page(l1e_get_pfn(old_hl2e)));
1036 need_flush = 1;
1040 *shadow_hl2e_p = new_hl2e;
1042 return need_flush;
1045 // returns true if a tlb flush is needed
1046 //
1047 static int inline
1048 validate_pde_change(
1049 struct domain *d,
1050 guest_l2_pgentry_t new_gpde,
1051 l2_pgentry_t *shadow_pde_p)
1053 l2_pgentry_t old_spde, new_spde;
1054 int need_flush = 0;
1056 perfc_incrc(validate_pde_calls);
1058 old_spde = *shadow_pde_p;
1059 l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
1061 // Only do the ref counting if something important changed.
1062 //
1063 if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) &&
1064 l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
1066 perfc_incrc(validate_pde_changes);
1068 if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
1069 !get_shadow_ref(l2e_get_pfn(new_spde)) )
1070 BUG();
1071 if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
1073 put_shadow_ref(l2e_get_pfn(old_spde));
1074 need_flush = 1;
1078 *shadow_pde_p = new_spde;
1080 return need_flush;
1083 /*********************************************************************/
1085 #if SHADOW_HASH_DEBUG
1087 static void shadow_audit(struct domain *d, int print)
1089 int live = 0, free = 0, j = 0, abs;
1090 struct shadow_status *a;
1092 for ( j = 0; j < shadow_ht_buckets; j++ )
1094 a = &d->arch.shadow_ht[j];
1095 if ( a->gpfn_and_flags )
1097 live++;
1098 ASSERT(a->smfn);
1100 else
1101 ASSERT(!a->next);
1103 a = a->next;
1104 while ( a && (live < 9999) )
1106 live++;
1107 if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) )
1109 printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n",
1110 live, a->gpfn_and_flags, a->smfn, a->next);
1111 BUG();
1113 ASSERT(a->smfn);
1114 a = a->next;
1116 ASSERT(live < 9999);
1119 for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next )
1120 free++;
1122 if ( print )
1123 printk("Xlive=%d free=%d\n", live, free);
1125 // BUG: this only works if there's only a single domain which is
1126 // using shadow tables.
1127 //
1128 abs = (
1129 perfc_value(shadow_l1_pages) +
1130 perfc_value(shadow_l2_pages) +
1131 perfc_value(hl2_table_pages) +
1132 perfc_value(snapshot_pages) +
1133 perfc_value(writable_pte_predictions)
1134 ) - live;
1135 #ifdef PERF_COUNTERS
1136 if ( (abs < -1) || (abs > 1) )
1138 printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
1139 live, free,
1140 perfc_value(shadow_l1_pages),
1141 perfc_value(shadow_l2_pages),
1142 perfc_value(hl2_table_pages),
1143 perfc_value(snapshot_pages),
1144 perfc_value(writable_pte_predictions));
1145 BUG();
1147 #endif
1149 // XXX ought to add some code to audit the out-of-sync entries, too.
1150 //
1152 #else
1153 #define shadow_audit(p, print) ((void)0)
1154 #endif
1157 static inline struct shadow_status *hash_bucket(
1158 struct domain *d, unsigned int gpfn)
1160 return &d->arch.shadow_ht[gpfn % shadow_ht_buckets];
1164 /*
1165 * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace,
1166 * which, depending on full shadow mode, may or may not equal
1167 * its mfn).
1168 * It returns the shadow's mfn, or zero if it doesn't exist.
1169 */
1170 static inline unsigned long __shadow_status(
1171 struct domain *d, unsigned long gpfn, unsigned long stype)
1173 struct shadow_status *p, *x, *head;
1174 unsigned long key = gpfn | stype;
1176 ASSERT(shadow_lock_is_acquired(d));
1177 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1178 ASSERT(stype && !(stype & ~PGT_type_mask));
1180 perfc_incrc(shadow_status_calls);
1182 x = head = hash_bucket(d, gpfn);
1183 p = NULL;
1185 shadow_audit(d, 0);
1187 do
1189 ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL)));
1191 if ( x->gpfn_and_flags == key )
1193 #if SHADOW_DEBUG
1194 if ( unlikely(shadow_status_noswap) )
1195 return x->smfn;
1196 #endif
1197 /* Pull-to-front if 'x' isn't already the head item. */
1198 if ( unlikely(x != head) )
1200 /* Delete 'x' from list and reinsert immediately after head. */
1201 p->next = x->next;
1202 x->next = head->next;
1203 head->next = x;
1205 /* Swap 'x' contents with head contents. */
1206 SWAP(head->gpfn_and_flags, x->gpfn_and_flags);
1207 SWAP(head->smfn, x->smfn);
1209 else
1211 perfc_incrc(shadow_status_hit_head);
1214 return head->smfn;
1217 p = x;
1218 x = x->next;
1220 while ( x != NULL );
1222 perfc_incrc(shadow_status_miss);
1223 return 0;
1226 /*
1227 * Not clear if pull-to-front is worth while for this or not,
1228 * as it generally needs to scan the entire bucket anyway.
1229 * Much simpler without.
1231 * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
1232 */
1233 static inline u32
1234 shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
1235 unsigned long *smfn)
1237 struct shadow_status *x;
1238 u32 pttype = PGT_none, type;
1240 ASSERT(shadow_lock_is_acquired(d));
1241 ASSERT(gpfn == (gpfn & PGT_mfn_mask));
1243 perfc_incrc(shadow_max_type);
1245 x = hash_bucket(d, gpfn);
1247 while ( x && x->gpfn_and_flags )
1249 if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn )
1251 type = x->gpfn_and_flags & PGT_type_mask;
1253 switch ( type )
1255 case PGT_hl2_shadow:
1256 // Treat an HL2 as if it's an L1
1257 //
1258 type = PGT_l1_shadow;
1259 break;
1260 case PGT_snapshot:
1261 case PGT_writable_pred:
1262 // Ignore snapshots -- they don't in and of themselves constitute
1263 // treating a page as a page table
1264 //
1265 goto next;
1266 case PGT_base_page_table:
1267 // Early exit if we found the max possible value
1268 //
1269 return type;
1270 default:
1271 break;
1274 if ( type > pttype )
1276 pttype = type;
1277 if ( smfn )
1278 *smfn = x->smfn;
1281 next:
1282 x = x->next;
1285 return pttype;
1288 static inline void delete_shadow_status(
1289 struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
1291 struct shadow_status *p, *x, *n, *head;
1292 unsigned long key = gpfn | stype;
1294 ASSERT(shadow_lock_is_acquired(d));
1295 ASSERT(!(gpfn & ~PGT_mfn_mask));
1296 ASSERT(stype && !(stype & ~PGT_type_mask));
1298 head = hash_bucket(d, gpfn);
1300 SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head);
1301 shadow_audit(d, 0);
1303 /* Match on head item? */
1304 if ( head->gpfn_and_flags == key )
1306 if ( (n = head->next) != NULL )
1308 /* Overwrite head with contents of following node. */
1309 head->gpfn_and_flags = n->gpfn_and_flags;
1310 head->smfn = n->smfn;
1312 /* Delete following node. */
1313 head->next = n->next;
1315 /* Add deleted node to the free list. */
1316 n->gpfn_and_flags = 0;
1317 n->smfn = 0;
1318 n->next = d->arch.shadow_ht_free;
1319 d->arch.shadow_ht_free = n;
1321 else
1323 /* This bucket is now empty. Initialise the head node. */
1324 head->gpfn_and_flags = 0;
1325 head->smfn = 0;
1328 goto found;
1331 p = head;
1332 x = head->next;
1334 do
1336 if ( x->gpfn_and_flags == key )
1338 /* Delete matching node. */
1339 p->next = x->next;
1341 /* Add deleted node to the free list. */
1342 x->gpfn_and_flags = 0;
1343 x->smfn = 0;
1344 x->next = d->arch.shadow_ht_free;
1345 d->arch.shadow_ht_free = x;
1347 goto found;
1350 p = x;
1351 x = x->next;
1353 while ( x != NULL );
1355 /* If we got here, it wasn't in the list! */
1356 BUG();
1358 found:
1359 // release ref to page
1360 if ( stype != PGT_writable_pred )
1361 put_page(mfn_to_page(gmfn));
1363 shadow_audit(d, 0);
1366 static inline void set_shadow_status(
1367 struct domain *d, unsigned long gpfn, unsigned long gmfn,
1368 unsigned long smfn, unsigned long stype)
1370 struct shadow_status *x, *head, *extra;
1371 int i;
1372 unsigned long key = gpfn | stype;
1374 SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
1376 ASSERT(shadow_lock_is_acquired(d));
1378 ASSERT(shadow_mode_translate(d) || gpfn);
1379 ASSERT(!(gpfn & ~PGT_mfn_mask));
1381 // XXX - need to be more graceful.
1382 ASSERT(VALID_MFN(gmfn));
1384 ASSERT(stype && !(stype & ~PGT_type_mask));
1386 x = head = hash_bucket(d, gpfn);
1388 SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
1389 gpfn, smfn, stype, x, x->next);
1390 shadow_audit(d, 0);
1392 // grab a reference to the guest page to represent the entry in the shadow
1393 // hash table
1394 //
1395 // XXX - Should PGT_writable_pred grab a page ref?
1396 // - Who/how are these hash table entry refs flushed if/when a page
1397 // is given away by the domain?
1398 //
1399 if ( stype != PGT_writable_pred )
1400 get_page(mfn_to_page(gmfn), d);
1402 /*
1403 * STEP 1. If page is already in the table, update it in place.
1404 */
1405 do
1407 if ( unlikely(x->gpfn_and_flags == key) )
1409 if ( stype != PGT_writable_pred )
1410 BUG(); // we should never replace entries into the hash table
1411 x->smfn = smfn;
1412 if ( stype != PGT_writable_pred )
1413 put_page(mfn_to_page(gmfn)); // already had a ref...
1414 goto done;
1417 x = x->next;
1419 while ( x != NULL );
1421 /*
1422 * STEP 2. The page must be inserted into the table.
1423 */
1425 /* If the bucket is empty then insert the new page as the head item. */
1426 if ( head->gpfn_and_flags == 0 )
1428 head->gpfn_and_flags = key;
1429 head->smfn = smfn;
1430 ASSERT(head->next == NULL);
1431 goto done;
1434 /* We need to allocate a new node. Ensure the quicklist is non-empty. */
1435 if ( unlikely(d->arch.shadow_ht_free == NULL) )
1437 SH_VLOG("Allocate more shadow hashtable blocks.");
1439 extra = xmalloc_bytes(
1440 sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1442 /* XXX Should be more graceful here. */
1443 if ( extra == NULL )
1444 BUG();
1446 memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
1448 /* Record the allocation block so it can be correctly freed later. */
1449 d->arch.shadow_extras_count++;
1450 *((struct shadow_status **)&extra[shadow_ht_extra_size]) =
1451 d->arch.shadow_ht_extras;
1452 d->arch.shadow_ht_extras = &extra[0];
1454 /* Thread a free chain through the newly-allocated nodes. */
1455 for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
1456 extra[i].next = &extra[i+1];
1457 extra[i].next = NULL;
1459 /* Add the new nodes to the free list. */
1460 d->arch.shadow_ht_free = &extra[0];
1463 /* Allocate a new node from the quicklist. */
1464 x = d->arch.shadow_ht_free;
1465 d->arch.shadow_ht_free = x->next;
1467 /* Initialise the new node and insert directly after the head item. */
1468 x->gpfn_and_flags = key;
1469 x->smfn = smfn;
1470 x->next = head->next;
1471 head->next = x;
1473 done:
1474 shadow_audit(d, 0);
1476 if ( stype <= PGT_l4_shadow )
1478 // add to front of list of pages to check when removing write
1479 // permissions for a page...
1480 //
1484 /************************************************************************/
1486 static inline void guest_physmap_add_page(
1487 struct domain *d, unsigned long gpfn, unsigned long mfn)
1489 struct domain_mmap_cache c1, c2;
1491 if ( likely(!shadow_mode_translate(d)) )
1492 return;
1494 domain_mmap_cache_init(&c1);
1495 domain_mmap_cache_init(&c2);
1496 shadow_lock(d);
1497 shadow_sync_and_drop_references(d, mfn_to_page(mfn));
1498 set_p2m_entry(d, gpfn, mfn, &c1, &c2);
1499 set_gpfn_from_mfn(mfn, gpfn);
1500 shadow_unlock(d);
1501 domain_mmap_cache_destroy(&c1);
1502 domain_mmap_cache_destroy(&c2);
1505 static inline void guest_physmap_remove_page(
1506 struct domain *d, unsigned long gpfn, unsigned long mfn)
1508 struct domain_mmap_cache c1, c2;
1510 if ( likely(!shadow_mode_translate(d)) )
1511 return;
1513 domain_mmap_cache_init(&c1);
1514 domain_mmap_cache_init(&c2);
1515 shadow_lock(d);
1516 shadow_sync_and_drop_references(d, mfn_to_page(mfn));
1517 set_p2m_entry(d, gpfn, -1, &c1, &c2);
1518 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1519 shadow_unlock(d);
1520 domain_mmap_cache_destroy(&c1);
1521 domain_mmap_cache_destroy(&c2);
1524 /************************************************************************/
1526 void static inline
1527 shadow_update_min_max(unsigned long smfn, int index)
1529 struct page_info *sl1page = mfn_to_page(smfn);
1530 u32 min_max = sl1page->tlbflush_timestamp;
1531 int min = SHADOW_MIN(min_max);
1532 int max = SHADOW_MAX(min_max);
1533 int update = 0;
1535 if ( index < min )
1537 min = index;
1538 update = 1;
1540 if ( index > max )
1542 max = index;
1543 update = 1;
1545 if ( update )
1546 sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
1549 #if CONFIG_PAGING_LEVELS <= 2
1550 extern void shadow_map_l1_into_current_l2(unsigned long va);
1552 void static inline
1553 shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
1555 struct vcpu *v = current;
1556 struct domain *d = v->domain;
1557 l2_pgentry_t sl2e = {0};
1559 __shadow_get_l2e(v, va, &sl2e);
1560 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1562 /*
1563 * Either the L1 is not shadowed, or the shadow isn't linked into
1564 * the current shadow L2.
1565 */
1566 if ( create_l1_shadow )
1568 perfc_incrc(shadow_set_l1e_force_map);
1569 shadow_map_l1_into_current_l2(va);
1571 else /* check to see if it exists; if so, link it in */
1573 l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
1574 unsigned long gl1pfn = l2e_get_pfn(gpde);
1575 unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
1577 ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
1579 if ( sl1mfn )
1581 perfc_incrc(shadow_set_l1e_unlinked);
1582 if ( !get_shadow_ref(sl1mfn) )
1583 BUG();
1584 l2pde_general(d, &gpde, &sl2e, sl1mfn);
1585 __guest_set_l2e(v, va, gpde);
1586 __shadow_set_l2e(v, va, sl2e);
1588 else
1590 // no shadow exists, so there's nothing to do.
1591 perfc_incrc(shadow_set_l1e_fail);
1592 return;
1597 __shadow_get_l2e(v, va, &sl2e);
1599 if ( shadow_mode_refcounts(d) )
1601 l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
1603 // only do the ref counting if something important changed.
1604 //
1605 if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
1607 if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
1608 !shadow_get_page_from_l1e(new_spte, d) )
1609 new_spte = l1e_empty();
1610 if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
1611 shadow_put_page_from_l1e(old_spte, d);
1616 set_guest_back_ptr(d, new_spte, l2e_get_pfn(sl2e), l1_table_offset(va));
1617 shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
1618 shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
1620 #endif
1621 /************************************************************************/
1623 static inline int
1624 shadow_mode_page_writable(unsigned long va, struct cpu_user_regs *regs, unsigned long gpfn)
1626 struct vcpu *v = current;
1627 struct domain *d = v->domain;
1628 unsigned long mfn = gmfn_to_mfn(d, gpfn);
1629 u32 type = mfn_to_page(mfn)->u.inuse.type_info & PGT_type_mask;
1631 if ( shadow_mode_refcounts(d) &&
1632 (type == PGT_writable_page) )
1633 type = shadow_max_pgtable_type(d, gpfn, NULL);
1635 // Strange but true: writable page tables allow kernel-mode access
1636 // to L1 page table pages via write-protected PTEs... Similarly, write
1637 // access to all page table pages is granted for shadow_mode_write_all
1638 // clients.
1639 //
1640 if ( ((shadow_mode_write_l1(d) && (type == PGT_l1_page_table)) ||
1641 (shadow_mode_write_all(d) && type && (type <= PGT_l4_page_table))) &&
1642 ((va < HYPERVISOR_VIRT_START)
1643 #if defined(__x86_64__)
1644 || (va >= HYPERVISOR_VIRT_END)
1645 #endif
1646 ) &&
1647 guest_kernel_mode(v, regs) )
1648 return 1;
1650 return 0;
1653 #if CONFIG_PAGING_LEVELS <= 2
1654 static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
1656 l2_pgentry_t gpde;
1657 l1_pgentry_t gpte;
1658 struct vcpu *v = current;
1660 ASSERT( shadow_mode_translate(current->domain) );
1662 __guest_get_l2e(v, gva, &gpde);
1663 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
1664 return l1e_empty();;
1666 // This is actually overkill - we only need to make sure the hl2
1667 // is in-sync.
1668 //
1669 shadow_sync_va(v, gva);
1671 if ( unlikely(__copy_from_user(&gpte,
1672 &linear_pg_table[gva >> PAGE_SHIFT],
1673 sizeof(gpte))) )
1675 FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva);
1676 return l1e_empty();
1679 return gpte;
1682 static inline unsigned long gva_to_gpa(unsigned long gva)
1684 l1_pgentry_t gpte;
1686 gpte = gva_to_gpte(gva);
1687 if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
1688 return 0;
1690 return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK);
1692 #endif
1693 /************************************************************************/
1695 extern void __update_pagetables(struct vcpu *v);
1696 static inline void update_pagetables(struct vcpu *v)
1698 struct domain *d = v->domain;
1699 int paging_enabled;
1701 if ( hvm_guest(v) )
1702 paging_enabled = hvm_paging_enabled(v);
1703 else
1704 // HACK ALERT: there's currently no easy way to figure out if a domU
1705 // has set its arch.guest_table to zero, vs not yet initialized it.
1706 //
1707 paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
1709 /*
1710 * We don't call __update_pagetables() when hvm guest paging is
1711 * disabled as we want the linear_pg_table to be inaccessible so that
1712 * we bail out early of shadow_fault() if the hvm guest tries illegal
1713 * accesses while it thinks paging is turned off.
1714 */
1715 if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
1717 shadow_lock(d);
1718 __update_pagetables(v);
1719 shadow_unlock(d);
1722 if ( likely(!shadow_mode_external(d)) )
1724 if ( shadow_mode_enabled(d) )
1725 v->arch.monitor_table = v->arch.shadow_table;
1726 else
1727 #if CONFIG_PAGING_LEVELS == 4
1728 if ( !(v->arch.flags & TF_kernel_mode) )
1729 v->arch.monitor_table = v->arch.guest_table_user;
1730 else
1731 #endif
1732 v->arch.monitor_table = v->arch.guest_table;
1736 void clear_all_shadow_status(struct domain *d);
1738 #if SHADOW_DEBUG
1739 extern int _check_pagetable(struct vcpu *v, char *s);
1740 extern int _check_all_pagetables(struct vcpu *v, char *s);
1742 #define check_pagetable(_v, _s) _check_pagetable(_v, _s)
1743 //#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s)
1745 #else
1746 #define check_pagetable(_v, _s) ((void)0)
1747 #endif
1749 #endif /* XEN_SHADOW_H */
1751 /*
1752 * Local variables:
1753 * mode: C
1754 * c-set-style: "BSD"
1755 * c-basic-offset: 4
1756 * tab-width: 4
1757 * indent-tabs-mode: nil
1758 * End:
1759 */