ia64/xen-unstable

view xen/arch/x86/shadow32.c @ 6536:44e70a3d6b4c

Unmap domain page on exit

Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 11:21:37 2005 -0800 (2005-08-17)
parents c1bcea912992
children f36aee6f8902
line source
1 /******************************************************************************
2 * arch/x86/shadow.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
35 static void shadow_free_snapshot(struct domain *d,
36 struct out_of_sync_entry *entry);
37 static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
38 static void free_writable_pte_predictions(struct domain *d);
40 #if SHADOW_DEBUG
41 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
42 #endif
44 /********
46 There's a per-domain shadow table spin lock which works fine for SMP
47 hosts. We don't have to worry about interrupts as no shadow operations
48 happen in an interrupt context. It's probably not quite ready for SMP
49 guest operation as we have to worry about synchonisation between gpte
50 and spte updates. Its possible that this might only happen in a
51 hypercall context, in which case we'll probably at have a per-domain
52 hypercall lock anyhow (at least initially).
54 ********/
56 static inline int
57 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
58 unsigned long new_type)
59 {
60 struct pfn_info *page = pfn_to_page(gmfn);
61 int pinned = 0, okay = 1;
63 if ( page_out_of_sync(page) )
64 {
65 // Don't know how long ago this snapshot was taken.
66 // Can't trust it to be recent enough.
67 //
68 __shadow_sync_mfn(d, gmfn);
69 }
71 if ( !shadow_mode_refcounts(d) )
72 return 1;
74 if ( unlikely(page_is_page_table(page)) )
75 return 1;
77 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
79 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
80 {
81 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
82 __func__, gpfn, gmfn);
83 #if 1 || defined(LIVE_DANGEROUSLY)
84 set_bit(_PGC_page_table, &page->count_info);
85 return 1;
86 #endif
87 return 0;
89 }
91 // To convert this page to use as a page table, the writable count
92 // should now be zero. Test this by grabbing the page as an page table,
93 // and then immediately releasing. This will also deal with any
94 // necessary TLB flushing issues for us.
95 //
96 // The cruft here about pinning doesn't really work right. This
97 // needs rethinking/rewriting... Need to gracefully deal with the
98 // TLB flushes required when promoting a writable page, and also deal
99 // with any outstanding (external) writable refs to this page (by
100 // refusing to promote it). The pinning headache complicates this
101 // code -- it would all get much simpler if we stop using
102 // shadow_lock() and move the shadow code to BIGLOCK().
103 //
104 if ( unlikely(!get_page(page, d)) )
105 BUG(); // XXX -- needs more thought for a graceful failure
106 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
107 {
108 pinned = 1;
109 put_page_and_type(page);
110 }
111 if ( get_page_type(page, PGT_base_page_table) )
112 {
113 set_bit(_PGC_page_table, &page->count_info);
114 put_page_type(page);
115 }
116 else
117 {
118 printk("shadow_promote: get_page_type failed "
119 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
120 d->domain_id, gpfn, gmfn, new_type);
121 okay = 0;
122 }
124 // Now put the type back to writable...
125 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
126 BUG(); // XXX -- needs more thought for a graceful failure
127 if ( unlikely(pinned) )
128 {
129 if ( unlikely(test_and_set_bit(_PGT_pinned,
130 &page->u.inuse.type_info)) )
131 BUG(); // hmm... someone pinned this again?
132 }
133 else
134 put_page_and_type(page);
136 return okay;
137 }
139 static inline void
140 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
141 {
142 if ( !shadow_mode_refcounts(d) )
143 return;
145 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
147 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
148 {
149 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
151 if ( page_out_of_sync(pfn_to_page(gmfn)) )
152 {
153 remove_out_of_sync_entries(d, gmfn);
154 }
155 }
156 }
158 /*
159 * Things in shadow mode that collect get_page() refs to the domain's
160 * pages are:
161 * - PGC_allocated takes a gen count, just like normal.
162 * - A writable page can be pinned (paravirtualized guests may consider
163 * these pages to be L1s or L2s, and don't know the difference).
164 * Pinning a page takes a gen count (but, for domains in shadow mode,
165 * it *doesn't* take a type count)
166 * - CR3 grabs a ref to whatever it points at, just like normal.
167 * - Shadow mode grabs an initial gen count for itself, as a placehold
168 * for whatever references will exist.
169 * - Shadow PTEs that point to a page take a gen count, just like regular
170 * PTEs. However, they don't get a type count, as get_page_type() is
171 * hardwired to keep writable pages' counts at 1 for domains in shadow
172 * mode.
173 * - Whenever we shadow a page, the entry in the shadow hash grabs a
174 * general ref to the page.
175 * - Whenever a page goes out of sync, the out of sync entry grabs a
176 * general ref to the page.
177 */
178 /*
179 * pfn_info fields for pages allocated as shadow pages:
180 *
181 * All 32 bits of count_info are a simple count of refs to this shadow
182 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
183 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
184 * references.
185 *
186 * u.inuse._domain is left NULL, to prevent accidently allow some random
187 * domain from gaining permissions to map this page.
188 *
189 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
190 * shadowed.
191 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
192 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
193 * is currently exists because this is a shadow of a root page, and we
194 * don't want to let those disappear just because no CR3 is currently pointing
195 * at it.
196 *
197 * tlbflush_timestamp holds a min & max index of valid page table entries
198 * within the shadow page.
199 */
201 static inline unsigned long
202 alloc_shadow_page(struct domain *d,
203 unsigned long gpfn, unsigned long gmfn,
204 u32 psh_type)
205 {
206 struct pfn_info *page;
207 unsigned long smfn;
208 int pin = 0;
210 // Currently, we only keep pre-zero'ed pages around for use as L1's...
211 // This will change. Soon.
212 //
213 if ( psh_type == PGT_l1_shadow )
214 {
215 if ( !list_empty(&d->arch.free_shadow_frames) )
216 {
217 struct list_head *entry = d->arch.free_shadow_frames.next;
218 page = list_entry(entry, struct pfn_info, list);
219 list_del(entry);
220 perfc_decr(free_l1_pages);
221 }
222 else
223 {
224 page = alloc_domheap_page(NULL);
225 void *l1 = map_domain_page(page_to_pfn(page));
226 memset(l1, 0, PAGE_SIZE);
227 unmap_domain_page(l1);
228 }
229 }
230 else
231 page = alloc_domheap_page(NULL);
233 if ( unlikely(page == NULL) )
234 {
235 printk("Couldn't alloc shadow page! dom%d count=%d\n",
236 d->domain_id, d->arch.shadow_page_count);
237 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
238 perfc_value(shadow_l1_pages),
239 perfc_value(shadow_l2_pages),
240 perfc_value(hl2_table_pages),
241 perfc_value(snapshot_pages));
242 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
243 }
245 smfn = page_to_pfn(page);
247 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
248 page->u.inuse.type_info = psh_type | gmfn;
249 page->count_info = 0;
250 page->tlbflush_timestamp = 0;
252 switch ( psh_type )
253 {
254 case PGT_l1_shadow:
255 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
256 goto fail;
257 perfc_incr(shadow_l1_pages);
258 d->arch.shadow_page_count++;
259 break;
261 case PGT_l2_shadow:
262 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
263 goto fail;
264 perfc_incr(shadow_l2_pages);
265 d->arch.shadow_page_count++;
266 if ( PGT_l2_page_table == PGT_root_page_table )
267 pin = 1;
269 break;
271 case PGT_hl2_shadow:
272 // Treat an hl2 as an L1 for purposes of promotion.
273 // For external mode domains, treat them as an L2 for purposes of
274 // pinning.
275 //
276 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
277 goto fail;
278 perfc_incr(hl2_table_pages);
279 d->arch.hl2_page_count++;
280 if ( shadow_mode_external(d) &&
281 (PGT_l2_page_table == PGT_root_page_table) )
282 pin = 1;
284 break;
286 case PGT_snapshot:
287 perfc_incr(snapshot_pages);
288 d->arch.snapshot_page_count++;
289 break;
291 default:
292 printk("Alloc shadow weird page type type=%08x\n", psh_type);
293 BUG();
294 break;
295 }
297 // Don't add a new shadow of something that already has a snapshot.
298 //
299 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
301 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
303 if ( pin )
304 shadow_pin(smfn);
306 return smfn;
308 fail:
309 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
310 gpfn, gmfn);
311 free_domheap_page(page);
312 return 0;
313 }
315 static void inline
316 free_shadow_l1_table(struct domain *d, unsigned long smfn)
317 {
318 l1_pgentry_t *pl1e = map_domain_page(smfn);
319 int i;
320 struct pfn_info *spage = pfn_to_page(smfn);
321 u32 min_max = spage->tlbflush_timestamp;
322 int min = SHADOW_MIN(min_max);
323 int max = SHADOW_MAX(min_max);
325 for ( i = min; i <= max; i++ )
326 {
327 shadow_put_page_from_l1e(pl1e[i], d);
328 pl1e[i] = l1e_empty();
329 }
331 unmap_domain_page(pl1e);
332 }
334 static void inline
335 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
336 {
337 l1_pgentry_t *hl2 = map_domain_page(smfn);
338 int i, limit;
340 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
342 #ifdef __i386__
343 if ( shadow_mode_external(d) )
344 limit = L2_PAGETABLE_ENTRIES;
345 else
346 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
347 #else
348 limit = 0; /* XXX x86/64 XXX */
349 #endif
351 for ( i = 0; i < limit; i++ )
352 {
353 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
354 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
355 }
357 unmap_domain_page(hl2);
358 }
360 static void inline
361 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
362 {
363 l2_pgentry_t *pl2e = map_domain_page(smfn);
364 int i, external = shadow_mode_external(d);
366 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
367 if ( external || is_guest_l2_slot(type, i) )
368 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
369 put_shadow_ref(l2e_get_pfn(pl2e[i]));
371 if ( (PGT_base_page_table == PGT_l2_page_table) &&
372 shadow_mode_translate(d) && !external )
373 {
374 // free the ref to the hl2
375 //
376 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
377 }
379 unmap_domain_page(pl2e);
380 }
382 void free_shadow_page(unsigned long smfn)
383 {
384 struct pfn_info *page = &frame_table[smfn];
385 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
386 struct domain *d = page_get_owner(pfn_to_page(gmfn));
387 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
388 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
390 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
392 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
394 delete_shadow_status(d, gpfn, gmfn, type);
396 switch ( type )
397 {
398 case PGT_l1_shadow:
399 perfc_decr(shadow_l1_pages);
400 shadow_demote(d, gpfn, gmfn);
401 free_shadow_l1_table(d, smfn);
402 break;
404 case PGT_l2_shadow:
405 perfc_decr(shadow_l2_pages);
406 shadow_demote(d, gpfn, gmfn);
407 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
408 break;
410 case PGT_hl2_shadow:
411 perfc_decr(hl2_table_pages);
412 shadow_demote(d, gpfn, gmfn);
413 free_shadow_hl2_table(d, smfn);
414 break;
416 case PGT_snapshot:
417 perfc_decr(snapshot_pages);
418 break;
420 default:
421 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
422 page_to_pfn(page), page->u.inuse.type_info);
423 break;
424 }
426 d->arch.shadow_page_count--;
428 // No TLB flushes are needed the next time this page gets allocated.
429 //
430 page->tlbflush_timestamp = 0;
431 page->u.free.cpumask = CPU_MASK_NONE;
433 if ( type == PGT_l1_shadow )
434 {
435 list_add(&page->list, &d->arch.free_shadow_frames);
436 perfc_incr(free_l1_pages);
437 }
438 else
439 free_domheap_page(page);
440 }
442 void
443 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
444 {
445 unsigned long smfn;
447 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
449 shadow_lock(d);
451 while ( stype >= PGT_l1_shadow )
452 {
453 smfn = __shadow_status(d, gpfn, stype);
454 if ( smfn && MFN_PINNED(smfn) )
455 shadow_unpin(smfn);
456 stype -= PGT_l1_shadow;
457 }
459 shadow_unlock(d);
460 }
462 static void inline
463 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
464 {
465 struct pfn_info *page;
467 page = &frame_table[entry->gmfn];
469 // Decrement ref count of guest & shadow pages
470 //
471 put_page(page);
473 // Only use entries that have low bits clear...
474 //
475 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
476 {
477 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
478 entry->writable_pl1e = -2;
479 }
480 else
481 ASSERT( entry->writable_pl1e == -1 );
483 // Free the snapshot
484 //
485 shadow_free_snapshot(d, entry);
486 }
488 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
489 {
490 struct out_of_sync_entry *entry = d->arch.out_of_sync;
491 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
492 struct out_of_sync_entry *found = NULL;
494 // NB: Be careful not to call something that manipulates this list
495 // while walking it. Collect the results into a separate list
496 // first, then walk that list.
497 //
498 while ( entry )
499 {
500 if ( entry->gmfn == gmfn )
501 {
502 // remove from out of sync list
503 *prev = entry->next;
505 // add to found list
506 entry->next = found;
507 found = entry;
509 entry = *prev;
510 continue;
511 }
512 prev = &entry->next;
513 entry = entry->next;
514 }
516 prev = NULL;
517 entry = found;
518 while ( entry )
519 {
520 release_out_of_sync_entry(d, entry);
522 prev = &entry->next;
523 entry = entry->next;
524 }
526 // Add found list to free list
527 if ( prev )
528 {
529 *prev = d->arch.out_of_sync_free;
530 d->arch.out_of_sync_free = found;
531 }
532 }
534 static void free_out_of_sync_state(struct domain *d)
535 {
536 struct out_of_sync_entry *entry;
538 // NB: Be careful not to call something that manipulates this list
539 // while walking it. Remove one item at a time, and always
540 // restart from start of list.
541 //
542 while ( (entry = d->arch.out_of_sync) )
543 {
544 d->arch.out_of_sync = entry->next;
545 release_out_of_sync_entry(d, entry);
547 entry->next = d->arch.out_of_sync_free;
548 d->arch.out_of_sync_free = entry;
549 }
550 }
552 static void free_shadow_pages(struct domain *d)
553 {
554 int i;
555 struct shadow_status *x;
556 struct vcpu *v;
558 /*
559 * WARNING! The shadow page table must not currently be in use!
560 * e.g., You are expected to have paused the domain and synchronized CR3.
561 */
563 if( !d->arch.shadow_ht ) return;
565 shadow_audit(d, 1);
567 // first, remove any outstanding refs from out_of_sync entries...
568 //
569 free_out_of_sync_state(d);
571 // second, remove any outstanding refs from v->arch.shadow_table
572 // and CR3.
573 //
574 for_each_vcpu(d, v)
575 {
576 if ( pagetable_get_paddr(v->arch.shadow_table) )
577 {
578 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
579 v->arch.shadow_table = mk_pagetable(0);
580 }
582 if ( v->arch.monitor_shadow_ref )
583 {
584 put_shadow_ref(v->arch.monitor_shadow_ref);
585 v->arch.monitor_shadow_ref = 0;
586 }
587 }
589 // For external shadows, remove the monitor table's refs
590 //
591 if ( shadow_mode_external(d) )
592 {
593 for_each_vcpu(d, v)
594 {
595 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
597 if ( mpl2e )
598 {
599 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
600 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
602 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
603 {
604 put_shadow_ref(l2e_get_pfn(hl2e));
605 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
606 }
607 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
608 {
609 put_shadow_ref(l2e_get_pfn(smfn));
610 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
611 }
612 }
613 }
614 }
616 // Now, the only refs to shadow pages that are left are from the shadow
617 // pages themselves. We just unpin the pinned pages, and the rest
618 // should automatically disappear.
619 //
620 // NB: Beware: each explicitly or implicit call to free_shadow_page
621 // can/will result in the hash bucket getting rewritten out from
622 // under us... First, collect the list of pinned pages, then
623 // free them.
624 //
625 for ( i = 0; i < shadow_ht_buckets; i++ )
626 {
627 u32 count;
628 unsigned long *mfn_list;
630 /* Skip empty buckets. */
631 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
632 continue;
634 count = 0;
635 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
636 if ( MFN_PINNED(x->smfn) )
637 count++;
638 if ( !count )
639 continue;
641 mfn_list = xmalloc_array(unsigned long, count);
642 count = 0;
643 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
644 if ( MFN_PINNED(x->smfn) )
645 mfn_list[count++] = x->smfn;
647 while ( count )
648 {
649 shadow_unpin(mfn_list[--count]);
650 }
651 xfree(mfn_list);
652 }
654 // Now free the pre-zero'ed pages from the domain
655 //
656 struct list_head *list_ent, *tmp;
657 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
658 {
659 list_del(list_ent);
660 perfc_decr(free_l1_pages);
662 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
663 free_domheap_page(page);
664 }
666 shadow_audit(d, 0);
668 SH_VLOG("Free shadow table.");
669 }
671 void shadow_mode_init(void)
672 {
673 }
675 int _shadow_mode_refcounts(struct domain *d)
676 {
677 return shadow_mode_refcounts(d);
678 }
680 static void alloc_monitor_pagetable(struct vcpu *v)
681 {
682 unsigned long mmfn;
683 l2_pgentry_t *mpl2e;
684 struct pfn_info *mmfn_info;
685 struct domain *d = v->domain;
687 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
689 mmfn_info = alloc_domheap_page(NULL);
690 ASSERT(mmfn_info != NULL);
692 mmfn = page_to_pfn(mmfn_info);
693 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
694 memset(mpl2e, 0, PAGE_SIZE);
696 #ifdef __i386__ /* XXX screws x86/64 build */
697 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
698 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
699 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
700 #endif
702 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
703 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
704 __PAGE_HYPERVISOR);
706 // map the phys_to_machine map into the Read-Only MPT space for this domain
707 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
708 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
709 __PAGE_HYPERVISOR);
711 // Don't (yet) have mappings for these...
712 // Don't want to accidentally see the idle_pg_table's linear mapping.
713 //
714 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
715 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
717 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
718 v->arch.monitor_vtable = mpl2e;
719 }
721 /*
722 * Free the pages for monitor_table and hl2_table
723 */
724 void free_monitor_pagetable(struct vcpu *v)
725 {
726 l2_pgentry_t *mpl2e, hl2e, sl2e;
727 unsigned long mfn;
729 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
731 mpl2e = v->arch.monitor_vtable;
733 /*
734 * First get the mfn for hl2_table by looking at monitor_table
735 */
736 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
737 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
738 {
739 mfn = l2e_get_pfn(hl2e);
740 ASSERT(mfn);
741 put_shadow_ref(mfn);
742 }
744 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
745 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
746 {
747 mfn = l2e_get_pfn(sl2e);
748 ASSERT(mfn);
749 put_shadow_ref(mfn);
750 }
752 unmap_domain_page(mpl2e);
754 /*
755 * Then free monitor_table.
756 */
757 mfn = pagetable_get_pfn(v->arch.monitor_table);
758 free_domheap_page(&frame_table[mfn]);
760 v->arch.monitor_table = mk_pagetable(0);
761 v->arch.monitor_vtable = 0;
762 }
764 int
765 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
766 struct domain_mmap_cache *l2cache,
767 struct domain_mmap_cache *l1cache)
768 {
769 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
770 l2_pgentry_t *l2, l2e;
771 l1_pgentry_t *l1;
772 struct pfn_info *l1page;
773 unsigned long va = pfn << PAGE_SHIFT;
775 ASSERT(tabpfn != 0);
777 l2 = map_domain_page_with_cache(tabpfn, l2cache);
778 l2e = l2[l2_table_offset(va)];
779 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
780 {
781 l1page = alloc_domheap_page(NULL);
782 if ( !l1page )
783 {
784 unmap_domain_page_with_cache(l2, l2cache);
785 return 0;
786 }
788 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
789 memset(l1, 0, PAGE_SIZE);
790 unmap_domain_page_with_cache(l1, l1cache);
792 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
793 l2[l2_table_offset(va)] = l2e;
794 }
795 unmap_domain_page_with_cache(l2, l2cache);
797 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
798 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
799 unmap_domain_page_with_cache(l1, l1cache);
801 return 1;
802 }
804 static int
805 alloc_p2m_table(struct domain *d)
806 {
807 struct list_head *list_ent;
808 struct pfn_info *page, *l2page;
809 l2_pgentry_t *l2;
810 unsigned long mfn, pfn;
811 struct domain_mmap_cache l1cache, l2cache;
813 l2page = alloc_domheap_page(NULL);
814 if ( l2page == NULL )
815 return 0;
817 domain_mmap_cache_init(&l1cache);
818 domain_mmap_cache_init(&l2cache);
820 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
821 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
822 memset(l2, 0, PAGE_SIZE);
823 unmap_domain_page_with_cache(l2, &l2cache);
825 list_ent = d->page_list.next;
826 while ( list_ent != &d->page_list )
827 {
828 page = list_entry(list_ent, struct pfn_info, list);
829 mfn = page_to_pfn(page);
830 pfn = machine_to_phys_mapping[mfn];
831 ASSERT(pfn != INVALID_M2P_ENTRY);
832 ASSERT(pfn < (1u<<20));
834 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
836 list_ent = page->list.next;
837 }
839 list_ent = d->xenpage_list.next;
840 while ( list_ent != &d->xenpage_list )
841 {
842 page = list_entry(list_ent, struct pfn_info, list);
843 mfn = page_to_pfn(page);
844 pfn = machine_to_phys_mapping[mfn];
845 if ( (pfn != INVALID_M2P_ENTRY) &&
846 (pfn < (1u<<20)) )
847 {
848 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
849 }
851 list_ent = page->list.next;
852 }
854 domain_mmap_cache_destroy(&l2cache);
855 domain_mmap_cache_destroy(&l1cache);
857 return 1;
858 }
860 static void
861 free_p2m_table(struct domain *d)
862 {
863 // uh, this needs some work... :)
864 BUG();
865 }
867 int __shadow_mode_enable(struct domain *d, unsigned int mode)
868 {
869 struct vcpu *v;
870 int new_modes = (mode & ~d->arch.shadow_mode);
872 // Gotta be adding something to call this function.
873 ASSERT(new_modes);
875 // can't take anything away by calling this function.
876 ASSERT(!(d->arch.shadow_mode & ~mode));
878 for_each_vcpu(d, v)
879 {
880 invalidate_shadow_ldt(v);
882 // We need to set these up for __update_pagetables().
883 // See the comment there.
885 /*
886 * arch.guest_vtable
887 */
888 if ( v->arch.guest_vtable &&
889 (v->arch.guest_vtable != __linear_l2_table) )
890 {
891 unmap_domain_page(v->arch.guest_vtable);
892 }
893 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
894 v->arch.guest_vtable = __linear_l2_table;
895 else
896 v->arch.guest_vtable = NULL;
898 /*
899 * arch.shadow_vtable
900 */
901 if ( v->arch.shadow_vtable &&
902 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
903 {
904 unmap_domain_page(v->arch.shadow_vtable);
905 }
906 if ( !(mode & SHM_external) )
907 v->arch.shadow_vtable = __shadow_linear_l2_table;
908 else
909 v->arch.shadow_vtable = NULL;
911 /*
912 * arch.hl2_vtable
913 */
914 if ( v->arch.hl2_vtable &&
915 (v->arch.hl2_vtable != __linear_hl2_table) )
916 {
917 unmap_domain_page(v->arch.hl2_vtable);
918 }
919 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
920 v->arch.hl2_vtable = __linear_hl2_table;
921 else
922 v->arch.hl2_vtable = NULL;
924 /*
925 * arch.monitor_table & arch.monitor_vtable
926 */
927 if ( v->arch.monitor_vtable )
928 {
929 free_monitor_pagetable(v);
930 }
931 if ( mode & SHM_external )
932 {
933 alloc_monitor_pagetable(v);
934 }
935 }
937 if ( new_modes & SHM_enable )
938 {
939 ASSERT( !d->arch.shadow_ht );
940 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
941 if ( d->arch.shadow_ht == NULL )
942 goto nomem;
944 memset(d->arch.shadow_ht, 0,
945 shadow_ht_buckets * sizeof(struct shadow_status));
946 }
948 if ( new_modes & SHM_log_dirty )
949 {
950 ASSERT( !d->arch.shadow_dirty_bitmap );
951 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
952 d->arch.shadow_dirty_bitmap =
953 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
954 (8 * sizeof(unsigned long)));
955 if ( d->arch.shadow_dirty_bitmap == NULL )
956 {
957 d->arch.shadow_dirty_bitmap_size = 0;
958 goto nomem;
959 }
960 memset(d->arch.shadow_dirty_bitmap, 0,
961 d->arch.shadow_dirty_bitmap_size/8);
962 }
964 if ( new_modes & SHM_translate )
965 {
966 if ( !(new_modes & SHM_external) )
967 {
968 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
969 if ( !alloc_p2m_table(d) )
970 {
971 printk("alloc_p2m_table failed (out-of-memory?)\n");
972 goto nomem;
973 }
974 }
975 else
976 {
977 // external guests provide their own memory for their P2M maps.
978 //
979 ASSERT( d == page_get_owner(
980 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
981 }
982 }
984 printk("audit1\n");
985 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
986 printk("audit1 done\n");
988 // Get rid of any shadow pages from any previous shadow mode.
989 //
990 free_shadow_pages(d);
992 printk("audit2\n");
993 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
994 printk("audit2 done\n");
996 /*
997 * Tear down it's counts by disassembling its page-table-based ref counts.
998 * Also remove CR3's gcount/tcount.
999 * That leaves things like GDTs and LDTs and external refs in tact.
1001 * Most pages will be writable tcount=0.
1002 * Some will still be L1 tcount=0 or L2 tcount=0.
1003 * Maybe some pages will be type none tcount=0.
1004 * Pages granted external writable refs (via grant tables?) will
1005 * still have a non-zero tcount. That's OK.
1007 * gcounts will generally be 1 for PGC_allocated.
1008 * GDTs and LDTs will have additional gcounts.
1009 * Any grant-table based refs will still be in the gcount.
1011 * We attempt to grab writable refs to each page (thus setting its type).
1012 * Immediately put back those type refs.
1014 * Assert that no pages are left with L1/L2/L3/L4 type.
1015 */
1016 audit_adjust_pgtables(d, -1, 1);
1018 d->arch.shadow_mode = mode;
1020 if ( shadow_mode_refcounts(d) )
1022 struct list_head *list_ent = d->page_list.next;
1023 while ( list_ent != &d->page_list )
1025 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1026 if ( !get_page_type(page, PGT_writable_page) )
1027 BUG();
1028 put_page_type(page);
1030 list_ent = page->list.next;
1034 audit_adjust_pgtables(d, 1, 1);
1036 printk("audit3\n");
1037 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1038 printk("audit3 done\n");
1040 return 0;
1042 nomem:
1043 if ( (new_modes & SHM_enable) )
1045 xfree(d->arch.shadow_ht);
1046 d->arch.shadow_ht = NULL;
1048 if ( (new_modes & SHM_log_dirty) )
1050 xfree(d->arch.shadow_dirty_bitmap);
1051 d->arch.shadow_dirty_bitmap = NULL;
1053 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1054 pagetable_get_paddr(d->arch.phys_table) )
1056 free_p2m_table(d);
1058 return -ENOMEM;
1061 int shadow_mode_enable(struct domain *d, unsigned int mode)
1063 int rc;
1064 shadow_lock(d);
1065 rc = __shadow_mode_enable(d, mode);
1066 shadow_unlock(d);
1067 return rc;
1070 static void
1071 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1073 int i;
1074 l1_pgentry_t *l1;
1076 l1 = map_domain_page(l1mfn);
1077 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1079 if ( is_guest_l1_slot(i) &&
1080 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1082 unsigned long mfn = l1e_get_pfn(l1[i]);
1083 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1084 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1085 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1088 unmap_domain_page(l1);
1091 // This is not general enough to handle arbitrary pagetables
1092 // with shared L1 pages, etc., but it is sufficient for bringing
1093 // up dom0.
1094 //
1095 void
1096 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1097 unsigned int type)
1099 int i;
1100 l2_pgentry_t *l2;
1102 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1104 l2 = map_domain_page(l2mfn);
1105 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1107 if ( is_guest_l2_slot(type, i) &&
1108 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1110 unsigned long mfn = l2e_get_pfn(l2[i]);
1111 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1112 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1113 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1114 translate_l1pgtable(d, p2m, mfn);
1117 unmap_domain_page(l2);
1120 static void free_shadow_ht_entries(struct domain *d)
1122 struct shadow_status *x, *n;
1124 SH_VLOG("freed tables count=%d l1=%d l2=%d",
1125 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
1126 perfc_value(shadow_l2_pages));
1128 n = d->arch.shadow_ht_extras;
1129 while ( (x = n) != NULL )
1131 d->arch.shadow_extras_count--;
1132 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
1133 xfree(x);
1136 d->arch.shadow_ht_extras = NULL;
1137 d->arch.shadow_ht_free = NULL;
1139 ASSERT(d->arch.shadow_extras_count == 0);
1140 SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
1142 if ( d->arch.shadow_dirty_bitmap != NULL )
1144 xfree(d->arch.shadow_dirty_bitmap);
1145 d->arch.shadow_dirty_bitmap = 0;
1146 d->arch.shadow_dirty_bitmap_size = 0;
1149 xfree(d->arch.shadow_ht);
1150 d->arch.shadow_ht = NULL;
1153 static void free_out_of_sync_entries(struct domain *d)
1155 struct out_of_sync_entry *x, *n;
1157 n = d->arch.out_of_sync_extras;
1158 while ( (x = n) != NULL )
1160 d->arch.out_of_sync_extras_count--;
1161 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
1162 xfree(x);
1165 d->arch.out_of_sync_extras = NULL;
1166 d->arch.out_of_sync_free = NULL;
1167 d->arch.out_of_sync = NULL;
1169 ASSERT(d->arch.out_of_sync_extras_count == 0);
1170 FSH_LOG("freed extra out_of_sync entries, now %d",
1171 d->arch.out_of_sync_extras_count);
1174 void __shadow_mode_disable(struct domain *d)
1176 if ( unlikely(!shadow_mode_enabled(d)) )
1177 return;
1179 /*
1180 * Currently this does not fix up page ref counts, so it is valid to call
1181 * only when a domain is being destroyed.
1182 */
1183 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
1184 shadow_mode_refcounts(d));
1185 d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
1187 free_shadow_pages(d);
1188 free_writable_pte_predictions(d);
1190 #ifndef NDEBUG
1191 int i;
1192 for ( i = 0; i < shadow_ht_buckets; i++ )
1194 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
1196 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
1197 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
1198 BUG();
1201 #endif
1203 d->arch.shadow_mode = 0;
1205 free_shadow_ht_entries(d);
1206 free_out_of_sync_entries(d);
1208 struct vcpu *v;
1209 for_each_vcpu(d, v)
1211 update_pagetables(v);
1215 static int shadow_mode_table_op(
1216 struct domain *d, dom0_shadow_control_t *sc)
1218 unsigned int op = sc->op;
1219 int i, rc = 0;
1220 struct vcpu *v;
1222 ASSERT(shadow_lock_is_acquired(d));
1224 SH_VLOG("shadow mode table op %lx %lx count %d",
1225 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1226 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1227 d->arch.shadow_page_count);
1229 shadow_audit(d, 1);
1231 switch ( op )
1233 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1234 free_shadow_pages(d);
1236 d->arch.shadow_fault_count = 0;
1237 d->arch.shadow_dirty_count = 0;
1238 d->arch.shadow_dirty_net_count = 0;
1239 d->arch.shadow_dirty_block_count = 0;
1241 break;
1243 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1244 free_shadow_pages(d);
1246 sc->stats.fault_count = d->arch.shadow_fault_count;
1247 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1248 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1249 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1251 d->arch.shadow_fault_count = 0;
1252 d->arch.shadow_dirty_count = 0;
1253 d->arch.shadow_dirty_net_count = 0;
1254 d->arch.shadow_dirty_block_count = 0;
1256 if ( (d->max_pages > sc->pages) ||
1257 (sc->dirty_bitmap == NULL) ||
1258 (d->arch.shadow_dirty_bitmap == NULL) )
1260 rc = -EINVAL;
1261 break;
1264 sc->pages = d->max_pages;
1266 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1267 for ( i = 0; i < d->max_pages; i += chunk )
1269 int bytes = ((((d->max_pages - i) > chunk) ?
1270 chunk : (d->max_pages - i)) + 7) / 8;
1272 if (copy_to_user(
1273 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1274 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1275 bytes))
1277 // copy_to_user can fail when copying to guest app memory.
1278 // app should zero buffer after mallocing, and pin it
1279 rc = -EINVAL;
1280 memset(
1281 d->arch.shadow_dirty_bitmap +
1282 (i/(8*sizeof(unsigned long))),
1283 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
1284 break;
1287 memset(
1288 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1289 0, bytes);
1292 break;
1294 case DOM0_SHADOW_CONTROL_OP_PEEK:
1295 sc->stats.fault_count = d->arch.shadow_fault_count;
1296 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1297 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1298 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1300 if ( (d->max_pages > sc->pages) ||
1301 (sc->dirty_bitmap == NULL) ||
1302 (d->arch.shadow_dirty_bitmap == NULL) )
1304 rc = -EINVAL;
1305 break;
1308 sc->pages = d->max_pages;
1309 if (copy_to_user(
1310 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
1312 rc = -EINVAL;
1313 break;
1316 break;
1318 default:
1319 rc = -EINVAL;
1320 break;
1323 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1324 shadow_audit(d, 1);
1326 for_each_vcpu(d,v)
1327 __update_pagetables(v);
1329 return rc;
1332 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1334 unsigned int op = sc->op;
1335 int rc = 0;
1336 struct vcpu *v;
1338 if ( unlikely(d == current->domain) )
1340 DPRINTK("Don't try to do a shadow op on yourself!\n");
1341 return -EINVAL;
1344 domain_pause(d);
1346 shadow_lock(d);
1348 switch ( op )
1350 case DOM0_SHADOW_CONTROL_OP_OFF:
1351 __shadow_sync_all(d);
1352 __shadow_mode_disable(d);
1353 break;
1355 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1356 free_shadow_pages(d);
1357 rc = __shadow_mode_enable(d, SHM_enable);
1358 break;
1360 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1361 free_shadow_pages(d);
1362 rc = __shadow_mode_enable(
1363 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1364 break;
1366 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1367 free_shadow_pages(d);
1368 rc = __shadow_mode_enable(
1369 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1370 break;
1372 default:
1373 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1374 break;
1377 shadow_unlock(d);
1379 for_each_vcpu(d,v)
1380 update_pagetables(v);
1382 domain_unpause(d);
1384 return rc;
1387 /*
1388 * XXX KAF: Why is this VMX specific?
1389 */
1390 void vmx_shadow_clear_state(struct domain *d)
1392 SH_VVLOG("%s:", __func__);
1393 shadow_lock(d);
1394 free_shadow_pages(d);
1395 shadow_unlock(d);
1396 update_pagetables(d->vcpu[0]);
1399 unsigned long
1400 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1402 ASSERT( shadow_mode_translate(d) );
1404 perfc_incrc(gpfn_to_mfn_foreign);
1406 unsigned long va = gpfn << PAGE_SHIFT;
1407 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1408 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1409 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1410 unmap_domain_page(l2);
1411 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1413 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1414 d->domain_id, gpfn, l2e_get_intpte(l2e));
1415 return INVALID_MFN;
1417 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1418 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1419 unmap_domain_page(l1);
1421 #if 0
1422 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1423 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1424 #endif
1426 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1428 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1429 d->domain_id, gpfn, l1e_get_intpte(l1e));
1430 return INVALID_MFN;
1433 return l1e_get_pfn(l1e);
1436 static unsigned long
1437 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
1438 unsigned long smfn)
1440 unsigned long hl2mfn;
1441 l1_pgentry_t *hl2;
1442 int limit;
1444 ASSERT(PGT_base_page_table == PGT_l2_page_table);
1446 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
1448 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
1449 gpfn, gmfn);
1450 BUG(); /* XXX Deal gracefully with failure. */
1453 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
1454 gpfn, gmfn, smfn, hl2mfn);
1455 perfc_incrc(shadow_hl2_table_count);
1457 hl2 = map_domain_page(hl2mfn);
1459 #ifdef __i386__
1460 if ( shadow_mode_external(d) )
1461 limit = L2_PAGETABLE_ENTRIES;
1462 else
1463 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
1464 #else
1465 limit = 0; /* XXX x86/64 XXX */
1466 #endif
1468 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
1470 if ( !shadow_mode_external(d) )
1472 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
1473 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1475 // Setup easy access to the GL2, SL2, and HL2 frames.
1476 //
1477 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
1478 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1479 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1480 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
1481 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
1482 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1485 unmap_domain_page(hl2);
1487 return hl2mfn;
1490 /*
1491 * This could take and use a snapshot, and validate the entire page at
1492 * once, or it could continue to fault in entries one at a time...
1493 * Might be worth investigating...
1494 */
1495 static unsigned long shadow_l2_table(
1496 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1498 unsigned long smfn;
1499 l2_pgentry_t *spl2e;
1501 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
1503 perfc_incrc(shadow_l2_table_count);
1505 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
1507 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
1508 gpfn, gmfn);
1509 BUG(); /* XXX Deal gracefully with failure. */
1512 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
1514 /* Install hypervisor and 2x linear p.t. mapings. */
1515 if ( (PGT_base_page_table == PGT_l2_page_table) &&
1516 !shadow_mode_external(d) )
1518 /*
1519 * We could proactively fill in PDEs for pages that are already
1520 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
1521 * (restriction required for coherence of the accessed bit). However,
1522 * we tried it and it didn't help performance. This is simpler.
1523 */
1524 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
1526 /* Install hypervisor and 2x linear p.t. mapings. */
1527 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1528 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1529 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1531 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1532 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
1534 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
1535 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
1536 __PAGE_HYPERVISOR);
1538 if ( shadow_mode_translate(d) ) // NB: not external
1540 unsigned long hl2mfn;
1542 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
1543 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
1544 __PAGE_HYPERVISOR);
1546 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1547 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1549 // shadow_mode_translate (but not external) sl2 tables hold a
1550 // ref to their hl2.
1551 //
1552 if ( !get_shadow_ref(hl2mfn) )
1553 BUG();
1555 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1556 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1558 else
1559 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1560 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1562 else
1564 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
1567 unmap_domain_page(spl2e);
1569 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
1570 return smfn;
1573 void shadow_map_l1_into_current_l2(unsigned long va)
1575 struct vcpu *v = current;
1576 struct domain *d = v->domain;
1577 l1_pgentry_t *gpl1e, *spl1e;
1578 l2_pgentry_t gl2e, sl2e;
1579 unsigned long gl1pfn, gl1mfn, sl1mfn;
1580 int i, init_table = 0;
1582 __guest_get_l2e(v, va, &gl2e);
1583 ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
1584 gl1pfn = l2e_get_pfn(gl2e);
1586 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
1588 /* This L1 is NOT already shadowed so we need to shadow it. */
1589 SH_VVLOG("4a: l1 not shadowed");
1591 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1592 if ( unlikely(!VALID_MFN(gl1mfn)) )
1594 // Attempt to use an invalid pfn as an L1 page.
1595 // XXX this needs to be more graceful!
1596 BUG();
1599 if ( unlikely(!(sl1mfn =
1600 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
1602 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
1603 gl1pfn, gl1mfn);
1604 BUG(); /* XXX Need to deal gracefully with failure. */
1607 perfc_incrc(shadow_l1_table_count);
1608 init_table = 1;
1610 else
1612 /* This L1 is shadowed already, but the L2 entry is missing. */
1613 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
1616 #ifndef NDEBUG
1617 l2_pgentry_t old_sl2e;
1618 __shadow_get_l2e(v, va, &old_sl2e);
1619 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
1620 #endif
1622 if ( !get_shadow_ref(sl1mfn) )
1623 BUG();
1624 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
1625 __guest_set_l2e(v, va, gl2e);
1626 __shadow_set_l2e(v, va, sl2e);
1628 if ( init_table )
1630 l1_pgentry_t sl1e;
1631 int index = l1_table_offset(va);
1632 int min = 1, max = 0;
1634 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
1635 ~(L1_PAGETABLE_ENTRIES-1)]);
1637 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
1638 ~(L1_PAGETABLE_ENTRIES-1)]);
1640 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1642 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
1643 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
1644 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
1645 sl1e = l1e_empty();
1646 if ( l1e_get_flags(sl1e) == 0 )
1648 // First copy entries from 0 until first invalid.
1649 // Then copy entries from index until first invalid.
1650 //
1651 if ( i < index ) {
1652 i = index - 1;
1653 continue;
1655 break;
1657 spl1e[i] = sl1e;
1658 if ( unlikely(i < min) )
1659 min = i;
1660 if ( likely(i > max) )
1661 max = i;
1664 frame_table[sl1mfn].tlbflush_timestamp =
1665 SHADOW_ENCODE_MIN_MAX(min, max);
1669 void shadow_invlpg(struct vcpu *v, unsigned long va)
1671 struct domain *d = v->domain;
1672 l1_pgentry_t gpte, spte;
1674 ASSERT(shadow_mode_enabled(d));
1676 shadow_lock(d);
1678 __shadow_sync_va(v, va);
1680 // XXX mafetter: will need to think about 4MB pages...
1682 // It's not strictly necessary to update the shadow here,
1683 // but it might save a fault later.
1684 //
1685 if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
1686 sizeof(gpte))) {
1687 perfc_incrc(shadow_invlpg_faults);
1688 return;
1690 l1pte_propagate_from_guest(d, gpte, &spte);
1691 shadow_set_l1e(va, spte, 1);
1693 shadow_unlock(d);
1696 struct out_of_sync_entry *
1697 shadow_alloc_oos_entry(struct domain *d)
1699 struct out_of_sync_entry *f, *extra;
1700 unsigned size, i;
1702 if ( unlikely(d->arch.out_of_sync_free == NULL) )
1704 FSH_LOG("Allocate more fullshadow tuple blocks.");
1706 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
1707 extra = xmalloc_bytes(size);
1709 /* XXX Should be more graceful here. */
1710 if ( extra == NULL )
1711 BUG();
1713 memset(extra, 0, size);
1715 /* Record the allocation block so it can be correctly freed later. */
1716 d->arch.out_of_sync_extras_count++;
1717 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
1718 d->arch.out_of_sync_extras;
1719 d->arch.out_of_sync_extras = &extra[0];
1721 /* Thread a free chain through the newly-allocated nodes. */
1722 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
1723 extra[i].next = &extra[i+1];
1724 extra[i].next = NULL;
1726 /* Add the new nodes to the free list. */
1727 d->arch.out_of_sync_free = &extra[0];
1730 /* Allocate a new node from the quicklist. */
1731 f = d->arch.out_of_sync_free;
1732 d->arch.out_of_sync_free = f->next;
1734 return f;
1737 static inline unsigned long
1738 shadow_make_snapshot(
1739 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1741 unsigned long smfn, sl1mfn = 0;
1742 void *original, *snapshot;
1743 u32 min_max = 0;
1744 int min, max, length;
1746 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
1748 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
1749 return SHADOW_SNAPSHOT_ELSEWHERE;
1752 perfc_incrc(shadow_make_snapshot);
1754 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
1756 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
1757 "Dom%d snapshot_count_count=%d\n",
1758 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
1759 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
1762 if ( !get_shadow_ref(smfn) )
1763 BUG();
1765 if ( shadow_mode_refcounts(d) &&
1766 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
1767 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
1768 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
1770 min = SHADOW_MIN(min_max);
1771 max = SHADOW_MAX(min_max);
1772 length = max - min + 1;
1773 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
1775 min *= sizeof(l1_pgentry_t);
1776 length *= sizeof(l1_pgentry_t);
1778 original = map_domain_page(gmfn);
1779 snapshot = map_domain_page(smfn);
1780 memcpy(snapshot + min, original + min, length);
1781 unmap_domain_page(original);
1782 unmap_domain_page(snapshot);
1784 return smfn;
1787 static void
1788 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
1790 void *snapshot;
1792 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1793 return;
1795 // Clear the out_of_sync bit.
1796 //
1797 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
1799 // XXX Need to think about how to protect the domain's
1800 // information less expensively.
1801 //
1802 snapshot = map_domain_page(entry->snapshot_mfn);
1803 memset(snapshot, 0, PAGE_SIZE);
1804 unmap_domain_page(snapshot);
1806 put_shadow_ref(entry->snapshot_mfn);
1809 struct out_of_sync_entry *
1810 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1811 unsigned long mfn)
1813 struct domain *d = v->domain;
1814 struct pfn_info *page = &frame_table[mfn];
1815 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
1817 ASSERT(shadow_lock_is_acquired(d));
1818 ASSERT(pfn_valid(mfn));
1820 #ifndef NDEBUG
1821 u32 type = page->u.inuse.type_info & PGT_type_mask;
1822 if ( shadow_mode_refcounts(d) )
1824 ASSERT(type == PGT_writable_page);
1826 else
1828 ASSERT(type && (type < PGT_l4_page_table));
1830 #endif
1832 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
1833 gpfn, mfn, page->count_info, page->u.inuse.type_info);
1835 // XXX this will require some more thought... Cross-domain sharing and
1836 // modification of page tables? Hmm...
1837 //
1838 if ( d != page_get_owner(page) )
1839 BUG();
1841 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
1843 entry->gpfn = gpfn;
1844 entry->gmfn = mfn;
1845 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1846 entry->writable_pl1e = -1;
1848 #if SHADOW_DEBUG
1849 mark_shadows_as_reflecting_snapshot(d, gpfn);
1850 #endif
1852 // increment guest's ref count to represent the entry in the
1853 // full shadow out-of-sync list.
1854 //
1855 get_page(page, d);
1857 // Add to the out-of-sync list
1858 //
1859 entry->next = d->arch.out_of_sync;
1860 d->arch.out_of_sync = entry;
1862 return entry;
1865 void shadow_mark_va_out_of_sync(
1866 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
1868 struct out_of_sync_entry *entry =
1869 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1870 l2_pgentry_t sl2e;
1872 // We need the address of shadow PTE that maps @va.
1873 // It might not exist yet. Make sure it's there.
1874 //
1875 __shadow_get_l2e(v, va, &sl2e);
1876 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1878 // either this L1 isn't shadowed yet, or the shadow isn't linked into
1879 // the current L2.
1880 shadow_map_l1_into_current_l2(va);
1881 __shadow_get_l2e(v, va, &sl2e);
1883 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
1885 // NB: this is stored as a machine address.
1886 entry->writable_pl1e =
1887 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
1888 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
1890 // Increment shadow's page count to represent the reference
1891 // inherent in entry->writable_pl1e
1892 //
1893 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
1894 BUG();
1896 FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
1897 va, entry->writable_pl1e);
1900 /*
1901 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
1902 * Returns 0 otherwise.
1903 */
1904 static int snapshot_entry_matches(
1905 struct domain *d, l1_pgentry_t *guest_pt,
1906 unsigned long gpfn, unsigned index)
1908 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
1909 l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
1910 int entries_match;
1912 perfc_incrc(snapshot_entry_matches_calls);
1914 if ( !smfn )
1915 return 0;
1917 snapshot = map_domain_page(smfn);
1919 if (__copy_from_user(&gpte, &guest_pt[index],
1920 sizeof(gpte))) {
1921 unmap_domain_page(snapshot);
1922 return 0;
1925 // This could probably be smarter, but this is sufficent for
1926 // our current needs.
1927 //
1928 entries_match = !l1e_has_changed(gpte, snapshot[index],
1929 PAGE_FLAG_MASK);
1931 unmap_domain_page(snapshot);
1933 #ifdef PERF_COUNTERS
1934 if ( entries_match )
1935 perfc_incrc(snapshot_entry_matches_true);
1936 #endif
1938 return entries_match;
1941 /*
1942 * Returns 1 if va's shadow mapping is out-of-sync.
1943 * Returns 0 otherwise.
1944 */
1945 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
1947 struct domain *d = v->domain;
1948 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1949 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1950 l2_pgentry_t l2e;
1951 unsigned long l1pfn, l1mfn;
1953 ASSERT(shadow_lock_is_acquired(d));
1954 ASSERT(VALID_M2P(l2pfn));
1956 perfc_incrc(shadow_out_of_sync_calls);
1958 if ( page_out_of_sync(&frame_table[l2mfn]) &&
1959 !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
1960 l2pfn, l2_table_offset(va)) )
1961 return 1;
1963 __guest_get_l2e(v, va, &l2e);
1964 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1965 return 0;
1967 l1pfn = l2e_get_pfn(l2e);
1968 l1mfn = __gpfn_to_mfn(d, l1pfn);
1970 // If the l1 pfn is invalid, it can't be out of sync...
1971 if ( !VALID_MFN(l1mfn) )
1972 return 0;
1974 if ( page_out_of_sync(&frame_table[l1mfn]) &&
1975 !snapshot_entry_matches(
1976 d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
1977 l1pfn, l1_table_offset(va)) )
1978 return 1;
1980 return 0;
1983 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
1984 static inline unsigned long
1985 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
1987 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
1990 static inline void
1991 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
1993 unsigned long score = prediction & PGT_score_mask;
1994 int create = (score == 0);
1996 // saturating addition
1997 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
1998 score = score ? score : PGT_score_mask;
2000 prediction = (prediction & PGT_mfn_mask) | score;
2002 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
2003 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2005 if ( create )
2006 perfc_incr(writable_pte_predictions);
2009 static inline void
2010 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2012 unsigned long score = prediction & PGT_score_mask;
2013 ASSERT(score);
2015 // divide score by 2... We don't like bad predictions.
2016 //
2017 score = (score >> 1) & PGT_score_mask;
2019 prediction = (prediction & PGT_mfn_mask) | score;
2021 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
2023 if ( score )
2024 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2025 else
2027 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
2028 perfc_decr(writable_pte_predictions);
2032 static void
2033 free_writable_pte_predictions(struct domain *d)
2035 int i;
2036 struct shadow_status *x;
2038 for ( i = 0; i < shadow_ht_buckets; i++ )
2040 u32 count;
2041 unsigned long *gpfn_list;
2043 /* Skip empty buckets. */
2044 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
2045 continue;
2047 count = 0;
2048 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2049 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2050 count++;
2052 gpfn_list = xmalloc_array(unsigned long, count);
2053 count = 0;
2054 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2055 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2056 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
2058 while ( count )
2060 count--;
2061 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
2064 xfree(gpfn_list);
2068 static u32 remove_all_write_access_in_ptpage(
2069 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
2070 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
2071 u32 max_refs_to_find, unsigned long prediction)
2073 l1_pgentry_t *pt = map_domain_page(pt_mfn);
2074 l1_pgentry_t match;
2075 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
2076 int i;
2077 u32 found = 0;
2078 int is_l1_shadow =
2079 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
2080 PGT_l1_shadow);
2082 match = l1e_from_pfn(readonly_gmfn, flags);
2084 // returns true if all refs have been found and fixed.
2085 //
2086 int fix_entry(int i)
2088 l1_pgentry_t old = pt[i];
2089 l1_pgentry_t new = old;
2091 l1e_remove_flags(new,_PAGE_RW);
2092 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
2093 BUG();
2094 found++;
2095 pt[i] = new;
2096 if ( is_l1_shadow )
2097 shadow_put_page_from_l1e(old, d);
2099 #if 0
2100 printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
2101 "is_l1_shadow=%d\n",
2102 readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
2103 #endif
2105 return (found == max_refs_to_find);
2108 i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
2109 if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
2111 perfc_incrc(remove_write_fast_exit);
2112 increase_writable_pte_prediction(d, readonly_gpfn, prediction);
2113 unmap_domain_page(pt);
2114 return found;
2117 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2119 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
2120 break;
2123 unmap_domain_page(pt);
2125 return found;
2126 #undef MATCH_ENTRY
2129 int shadow_remove_all_write_access(
2130 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
2132 int i;
2133 struct shadow_status *a;
2134 u32 found = 0, fixups, write_refs;
2135 unsigned long prediction, predicted_gpfn, predicted_smfn;
2137 ASSERT(shadow_lock_is_acquired(d));
2138 ASSERT(VALID_MFN(readonly_gmfn));
2140 perfc_incrc(remove_write_access);
2142 // If it's not a writable page, then no writable refs can be outstanding.
2143 //
2144 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
2145 PGT_writable_page )
2147 perfc_incrc(remove_write_not_writable);
2148 return 1;
2151 // How many outstanding writable PTEs for this page are there?
2152 //
2153 write_refs =
2154 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
2155 if ( write_refs && MFN_PINNED(readonly_gmfn) )
2157 write_refs--;
2160 if ( write_refs == 0 )
2162 perfc_incrc(remove_write_no_work);
2163 return 1;
2166 // Before searching all the L1 page tables, check the typical culprit first
2167 //
2168 if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
2170 predicted_gpfn = prediction & PGT_mfn_mask;
2171 if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
2172 (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
2174 found += fixups;
2175 if ( found == write_refs )
2177 perfc_incrc(remove_write_predicted);
2178 return 1;
2181 else
2183 perfc_incrc(remove_write_bad_prediction);
2184 decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
2188 // Search all the shadow L1 page tables...
2189 //
2190 for (i = 0; i < shadow_ht_buckets; i++)
2192 a = &d->arch.shadow_ht[i];
2193 while ( a && a->gpfn_and_flags )
2195 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
2197 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
2198 if ( found == write_refs )
2199 return 1;
2202 a = a->next;
2206 FSH_LOG("%s: looking for %d refs, found %d refs",
2207 __func__, write_refs, found);
2209 return 0;
2212 static u32 remove_all_access_in_page(
2213 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
2215 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
2216 l1_pgentry_t match;
2217 unsigned long flags = _PAGE_PRESENT;
2218 int i;
2219 u32 count = 0;
2220 int is_l1_shadow =
2221 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
2222 PGT_l1_shadow);
2224 match = l1e_from_pfn(forbidden_gmfn, flags);
2226 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2228 if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
2230 l1_pgentry_t ol2e = pl1e[i];
2231 pl1e[i] = l1e_empty();
2232 count++;
2234 if ( is_l1_shadow )
2235 shadow_put_page_from_l1e(ol2e, d);
2236 else /* must be an hl2 page */
2237 put_page(&frame_table[forbidden_gmfn]);
2241 unmap_domain_page(pl1e);
2243 return count;
2246 u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
2248 int i;
2249 struct shadow_status *a;
2250 u32 count = 0;
2252 if ( unlikely(!shadow_mode_enabled(d)) )
2253 return 0;
2255 ASSERT(shadow_lock_is_acquired(d));
2256 perfc_incrc(remove_all_access);
2258 for (i = 0; i < shadow_ht_buckets; i++)
2260 a = &d->arch.shadow_ht[i];
2261 while ( a && a->gpfn_and_flags )
2263 switch (a->gpfn_and_flags & PGT_type_mask)
2265 case PGT_l1_shadow:
2266 case PGT_l2_shadow:
2267 case PGT_l3_shadow:
2268 case PGT_l4_shadow:
2269 case PGT_hl2_shadow:
2270 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
2271 break;
2272 case PGT_snapshot:
2273 case PGT_writable_pred:
2274 // these can't hold refs to the forbidden page
2275 break;
2276 default:
2277 BUG();
2280 a = a->next;
2284 return count;
2287 static int resync_all(struct domain *d, u32 stype)
2289 struct out_of_sync_entry *entry;
2290 unsigned i;
2291 unsigned long smfn;
2292 void *guest, *shadow, *snapshot;
2293 int need_flush = 0, external = shadow_mode_external(d);
2294 int unshadow;
2295 int changed;
2297 ASSERT(shadow_lock_is_acquired(d));
2299 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2301 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
2302 continue;
2304 smfn = __shadow_status(d, entry->gpfn, stype);
2306 if ( !smfn )
2308 if ( shadow_mode_refcounts(d) )
2309 continue;
2311 // For light weight shadows, even when no shadow page exists,
2312 // we need to resync the refcounts to the new contents of the
2313 // guest page.
2314 // This only applies when we have writable page tables.
2315 //
2316 if ( !shadow_mode_write_all(d) &&
2317 !((stype == PGT_l1_shadow) &&
2318 VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
2319 // Page is not writable -- no resync necessary
2320 continue;
2323 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
2324 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
2326 // Compare guest's new contents to its snapshot, validating
2327 // and updating its shadow as appropriate.
2328 //
2329 guest = map_domain_page(entry->gmfn);
2330 snapshot = map_domain_page(entry->snapshot_mfn);
2332 if ( smfn )
2333 shadow = map_domain_page(smfn);
2334 else
2335 shadow = NULL;
2337 unshadow = 0;
2339 switch ( stype ) {
2340 case PGT_l1_shadow:
2342 l1_pgentry_t *guest1 = guest;
2343 l1_pgentry_t *shadow1 = shadow;
2344 l1_pgentry_t *snapshot1 = snapshot;
2346 ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
2347 shadow_mode_write_all(d));
2349 if ( !shadow_mode_refcounts(d) )
2350 revalidate_l1(d, guest1, snapshot1);
2352 if ( !smfn )
2353 break;
2355 u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
2356 int min_shadow = SHADOW_MIN(min_max_shadow);
2357 int max_shadow = SHADOW_MAX(min_max_shadow);
2359 u32 min_max_snapshot =
2360 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
2361 int min_snapshot = SHADOW_MIN(min_max_snapshot);
2362 int max_snapshot = SHADOW_MAX(min_max_snapshot);
2364 changed = 0;
2366 for ( i = min_shadow; i <= max_shadow; i++ )
2368 if ( (i < min_snapshot) || (i > max_snapshot) ||
2369 l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
2371 need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
2373 // can't update snapshots of linear page tables -- they
2374 // are used multiple times...
2375 //
2376 // snapshot[i] = new_pte;
2378 changed++;
2381 perfc_incrc(resync_l1);
2382 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
2383 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
2384 break;
2386 case PGT_l2_shadow:
2388 int max = -1;
2390 l2_pgentry_t *guest2 = guest;
2391 l2_pgentry_t *shadow2 = shadow;
2392 l2_pgentry_t *snapshot2 = snapshot;
2394 ASSERT(shadow_mode_write_all(d));
2395 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2397 changed = 0;
2398 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2400 #if CONFIG_X86_PAE
2401 BUG(); /* FIXME: need type_info */
2402 #endif
2403 if ( !is_guest_l2_slot(0,i) && !external )
2404 continue;
2406 l2_pgentry_t new_pde = guest2[i];
2407 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
2409 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
2411 // can't update snapshots of linear page tables -- they
2412 // are used multiple times...
2413 //
2414 // snapshot[i] = new_pde;
2416 changed++;
2418 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
2419 max = i;
2421 // XXX - This hack works for linux guests.
2422 // Need a better solution long term.
2423 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
2424 unlikely(l2e_get_intpte(new_pde) != 0) &&
2425 !unshadow && MFN_PINNED(smfn) )
2426 unshadow = 1;
2428 if ( max == -1 )
2429 unshadow = 1;
2430 perfc_incrc(resync_l2);
2431 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
2432 break;
2434 case PGT_hl2_shadow:
2436 l2_pgentry_t *guest2 = guest;
2437 l2_pgentry_t *snapshot2 = snapshot;
2438 l1_pgentry_t *shadow2 = shadow;
2440 ASSERT(shadow_mode_write_all(d));
2441 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2443 changed = 0;
2444 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2446 #if CONFIG_X86_PAE
2447 BUG(); /* FIXME: need type_info */
2448 #endif
2449 if ( !is_guest_l2_slot(0, i) && !external )
2450 continue;
2452 l2_pgentry_t new_pde = guest2[i];
2453 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
2455 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
2457 // can't update snapshots of linear page tables -- they
2458 // are used multiple times...
2459 //
2460 // snapshot[i] = new_pde;
2462 changed++;
2465 perfc_incrc(resync_hl2);
2466 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
2467 break;
2469 default:
2470 BUG();
2473 if ( smfn )
2474 unmap_domain_page(shadow);
2475 unmap_domain_page(snapshot);
2476 unmap_domain_page(guest);
2478 if ( unlikely(unshadow) )
2480 perfc_incrc(unshadow_l2_count);
2481 shadow_unpin(smfn);
2482 if ( unlikely(shadow_mode_external(d)) )
2484 unsigned long hl2mfn;
2486 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
2487 MFN_PINNED(hl2mfn) )
2488 shadow_unpin(hl2mfn);
2493 return need_flush;
2496 void __shadow_sync_all(struct domain *d)
2498 struct out_of_sync_entry *entry;
2499 int need_flush = 0;
2501 perfc_incrc(shadow_sync_all);
2503 ASSERT(shadow_lock_is_acquired(d));
2505 // First, remove all write permissions to the page tables
2506 //
2507 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2509 // Skip entries that have low bits set... Those aren't
2510 // real PTEs.
2511 //
2512 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
2513 continue;
2515 l1_pgentry_t *ppte = (l1_pgentry_t *)(
2516 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
2517 (entry->writable_pl1e & ~PAGE_MASK));
2518 l1_pgentry_t opte = *ppte;
2519 l1_pgentry_t npte = opte;
2520 l1e_remove_flags(npte, _PAGE_RW);
2522 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
2523 !shadow_get_page_from_l1e(npte, d) )
2524 BUG();
2525 *ppte = npte;
2526 shadow_put_page_from_l1e(opte, d);
2528 unmap_domain_page(ppte);
2531 // XXX mafetter: SMP
2532 //
2533 // With the current algorithm, we've gotta flush all the TLBs
2534 // before we can safely continue. I don't think we want to
2535 // do it this way, so I think we should consider making
2536 // entirely private copies of the shadow for each vcpu, and/or
2537 // possibly having a mix of private and shared shadow state
2538 // (any path from a PTE that grants write access to an out-of-sync
2539 // page table page needs to be vcpu private).
2540 //
2541 #if 0 // this should be enabled for SMP guests...
2542 flush_tlb_mask(cpu_online_map);
2543 #endif
2544 need_flush = 1;
2546 // Second, resync all L1 pages, then L2 pages, etc...
2547 //
2548 need_flush |= resync_all(d, PGT_l1_shadow);
2549 if ( shadow_mode_translate(d) )
2550 need_flush |= resync_all(d, PGT_hl2_shadow);
2551 need_flush |= resync_all(d, PGT_l2_shadow);
2553 if ( need_flush && !unlikely(shadow_mode_external(d)) )
2554 local_flush_tlb();
2556 free_out_of_sync_state(d);
2559 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
2561 l1_pgentry_t gpte, spte, orig_gpte;
2562 struct vcpu *v = current;
2563 struct domain *d = v->domain;
2564 l2_pgentry_t gpde;
2566 spte = l1e_empty();
2568 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
2569 va, (unsigned long)regs->error_code);
2570 perfc_incrc(shadow_fault_calls);
2572 check_pagetable(v, "pre-sf");
2574 /*
2575 * Don't let someone else take the guest's table pages out-of-sync.
2576 */
2577 shadow_lock(d);
2579 /* XXX - FIX THIS COMMENT!!!
2580 * STEP 1. Check to see if this fault might have been caused by an
2581 * out-of-sync table page entry, or if we should pass this
2582 * fault onto the guest.
2583 */
2584 __shadow_sync_va(v, va);
2586 /*
2587 * STEP 2. Check the guest PTE.
2588 */
2589 __guest_get_l2e(v, va, &gpde);
2590 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2592 SH_VVLOG("shadow_fault - EXIT: L1 not present");
2593 perfc_incrc(shadow_fault_bail_pde_not_present);
2594 goto fail;
2597 // This can't fault because we hold the shadow lock and we've ensured that
2598 // the mapping is in-sync, so the check of the PDE's present bit, above,
2599 // covers this access.
2600 //
2601 orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
2602 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
2604 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
2605 l1e_get_intpte(gpte));
2606 perfc_incrc(shadow_fault_bail_pte_not_present);
2607 goto fail;
2610 /* Write fault? */
2611 if ( regs->error_code & 2 )
2613 int allow_writes = 0;
2615 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
2617 if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
2619 allow_writes = 1;
2620 l1e_add_flags(gpte, _PAGE_RW);
2622 else
2624 /* Write fault on a read-only mapping. */
2625 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
2626 l1e_get_intpte(gpte));
2627 perfc_incrc(shadow_fault_bail_ro_mapping);
2628 goto fail;
2632 if ( !l1pte_write_fault(v, &gpte, &spte, va) )
2634 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
2635 perfc_incrc(write_fault_bail);
2636 shadow_unlock(d);
2637 return 0;
2640 if ( allow_writes )
2641 l1e_remove_flags(gpte, _PAGE_RW);
2643 else
2645 if ( !l1pte_read_fault(d, &gpte, &spte) )
2647 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
2648 perfc_incrc(read_fault_bail);
2649 shadow_unlock(d);
2650 return 0;
2654 /*
2655 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
2656 */
2657 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
2659 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
2660 if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
2661 &gpte, sizeof(gpte))) )
2663 printk("%s() failed, crashing domain %d "
2664 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
2665 __func__,d->domain_id, l2e_get_intpte(gpde), va);
2666 domain_crash_synchronous();
2669 // if necessary, record the page table page as dirty
2670 if ( unlikely(shadow_mode_log_dirty(d)) )
2671 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
2674 shadow_set_l1e(va, spte, 1);
2676 perfc_incrc(shadow_fault_fixed);
2677 d->arch.shadow_fault_count++;
2679 shadow_unlock(d);
2681 check_pagetable(v, "post-sf");
2682 return EXCRET_fault_fixed;
2684 fail:
2685 shadow_unlock(d);
2686 return 0;
2689 void shadow_l1_normal_pt_update(
2690 struct domain *d,
2691 unsigned long pa, l1_pgentry_t gpte,
2692 struct domain_mmap_cache *cache)
2694 unsigned long sl1mfn;
2695 l1_pgentry_t *spl1e, spte;
2697 shadow_lock(d);
2699 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
2700 if ( sl1mfn )
2702 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
2703 (void *)pa, l1e_get_intpte(gpte));
2704 l1pte_propagate_from_guest(current->domain, gpte, &spte);
2706 spl1e = map_domain_page_with_cache(sl1mfn, cache);
2707 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
2708 unmap_domain_page_with_cache(spl1e, cache);
2711 shadow_unlock(d);
2714 void shadow_l2_normal_pt_update(
2715 struct domain *d,
2716 unsigned long pa, l2_pgentry_t gpde,
2717 struct domain_mmap_cache *cache)
2719 unsigned long sl2mfn;
2720 l2_pgentry_t *spl2e;
2722 shadow_lock(d);
2724 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
2725 if ( sl2mfn )
2727 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
2728 (void *)pa, l2e_get_intpte(gpde));
2729 spl2e = map_domain_page_with_cache(sl2mfn, cache);
2730 validate_pde_change(d, gpde,
2731 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
2732 unmap_domain_page_with_cache(spl2e, cache);
2735 shadow_unlock(d);
2738 #if CONFIG_PAGING_LEVELS >= 3
2739 void shadow_l3_normal_pt_update(
2740 struct domain *d,
2741 unsigned long pa, l3_pgentry_t gpde,
2742 struct domain_mmap_cache *cache)
2744 BUG(); // not yet implemented
2746 #endif
2748 #if CONFIG_PAGING_LEVELS >= 4
2749 void shadow_l4_normal_pt_update(
2750 struct domain *d,
2751 unsigned long pa, l4_pgentry_t gpde,
2752 struct domain_mmap_cache *cache)
2754 BUG(); // not yet implemented
2756 #endif
2758 int shadow_do_update_va_mapping(unsigned long va,
2759 l1_pgentry_t val,
2760 struct vcpu *v)
2762 struct domain *d = v->domain;
2763 l1_pgentry_t spte;
2764 int rc = 0;
2766 shadow_lock(d);
2768 //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
2770 // This is actually overkill - we don't need to sync the L1 itself,
2771 // just everything involved in getting to this L1 (i.e. we need
2772 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
2773 //
2774 __shadow_sync_va(v, va);
2776 l1pte_propagate_from_guest(d, val, &spte);
2777 shadow_set_l1e(va, spte, 0);
2779 /*
2780 * If we're in log-dirty mode then we need to note that we've updated
2781 * the PTE in the PT-holding page. We need the machine frame number
2782 * for this.
2783 */
2784 if ( shadow_mode_log_dirty(d) )
2785 __mark_dirty(d, va_to_l1mfn(v, va));
2787 // out:
2788 shadow_unlock(d);
2790 return rc;
2794 /*
2795 * What lives where in the 32-bit address space in the various shadow modes,
2796 * and what it uses to get/maintain that mapping.
2798 * SHADOW MODE: none enable translate external
2800 * 4KB things:
2801 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
2802 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
2803 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
2804 * monitor_vtable n/a n/a n/a mapped once
2806 * 4MB things:
2807 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
2808 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
2809 * monitor_linear n/a n/a n/a ???
2810 * perdomain perdomain perdomain perdomain perdomain
2811 * R/O M2P R/O M2P R/O M2P n/a n/a
2812 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
2813 * P2M n/a n/a R/O M2P R/O M2P
2815 * NB:
2816 * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
2817 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
2818 * all play a part in maintaining these mappings.
2819 */
2820 void __update_pagetables(struct vcpu *v)
2822 struct domain *d = v->domain;
2823 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
2824 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
2825 unsigned long smfn, hl2mfn, old_smfn;
2827 int max_mode = ( shadow_mode_external(d) ? SHM_external
2828 : shadow_mode_translate(d) ? SHM_translate
2829 : shadow_mode_enabled(d) ? SHM_enable
2830 : 0 );
2832 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
2833 ASSERT( max_mode );
2835 /*
2836 * arch.guest_vtable
2837 */
2838 if ( max_mode & (SHM_enable | SHM_external) )
2840 if ( likely(v->arch.guest_vtable != NULL) )
2841 unmap_domain_page(v->arch.guest_vtable);
2842 v->arch.guest_vtable = map_domain_page(gmfn);
2845 /*
2846 * arch.shadow_table
2847 */
2848 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
2849 smfn = shadow_l2_table(d, gpfn, gmfn);
2850 if ( !get_shadow_ref(smfn) )
2851 BUG();
2852 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
2853 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
2854 if ( old_smfn )
2855 put_shadow_ref(old_smfn);
2857 SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
2859 /*
2860 * arch.shadow_vtable
2861 */
2862 if ( max_mode == SHM_external )
2864 if ( v->arch.shadow_vtable )
2865 unmap_domain_page(v->arch.shadow_vtable);
2866 v->arch.shadow_vtable = map_domain_page(smfn);
2869 /*
2870 * arch.hl2_vtable
2871 */
2873 // if max_mode == SHM_translate, then the hl2 is already installed
2874 // correctly in its smfn, and there's nothing to do.
2875 //
2876 if ( max_mode == SHM_external )
2878 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
2879 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
2880 if ( v->arch.hl2_vtable )
2881 unmap_domain_page(v->arch.hl2_vtable);
2882 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2885 /*
2886 * fixup pointers in monitor table, as necessary
2887 */
2888 if ( max_mode == SHM_external )
2890 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2891 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2892 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2894 ASSERT( shadow_mode_translate(d) );
2896 if ( !get_shadow_ref(hl2mfn) )
2897 BUG();
2898 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2899 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2900 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2901 put_shadow_ref(l2e_get_pfn(old_hl2e));
2903 if ( !get_shadow_ref(smfn) )
2904 BUG();
2905 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2906 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2907 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2908 put_shadow_ref(l2e_get_pfn(old_sl2e));
2910 // XXX - maybe this can be optimized somewhat??
2911 local_flush_tlb();
2916 /************************************************************************/
2917 /************************************************************************/
2918 /************************************************************************/
2920 #if SHADOW_DEBUG
2922 // The following is entirely for _check_pagetable()'s benefit.
2923 // _check_pagetable() wants to know whether a given entry in a
2924 // shadow page table is supposed to be the shadow of the guest's
2925 // current entry, or the shadow of the entry held in the snapshot
2926 // taken above.
2927 //
2928 // Here, we mark all currently existing entries as reflecting
2929 // the snapshot, above. All other places in xen that update
2930 // the shadow will keep the shadow in sync with the guest's
2931 // entries (via l1pte_propagate_from_guest and friends), which clear
2932 // the SHADOW_REFLECTS_SNAPSHOT bit.
2933 //
2934 static void
2935 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
2937 unsigned long smfn;
2938 l1_pgentry_t *l1e;
2939 l2_pgentry_t *l2e;
2940 unsigned i;
2942 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
2944 l1e = map_domain_page(smfn);
2945 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2946 if ( is_guest_l1_slot(i) &&
2947 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
2948 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
2949 unmap_domain_page(l1e);
2952 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
2954 l2e = map_domain_page(smfn);
2955 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2956 if ( is_guest_l2_slot(0, i) &&
2957 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
2958 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
2959 unmap_domain_page(l2e);
2963 // BUG: these are not SMP safe...
2964 static int sh_l2_present;
2965 static int sh_l1_present;
2966 char * sh_check_name;
2967 int shadow_status_noswap;
2969 #define v2m(_v, _adr) ({ \
2970 unsigned long _a = (unsigned long)(_adr); \
2971 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
2972 unsigned long _pa = -1; \
2973 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
2974 { \
2975 l1_pgentry_t _pte; \
2976 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
2977 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
2978 _pa = l1e_get_paddr(_pte); \
2979 } \
2980 _pa | (_a & ~PAGE_MASK); \
2981 })
2983 #define FAIL(_f, _a...) \
2984 do { \
2985 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
2986 sh_check_name, level, l2_idx, l1_idx, ## _a, \
2987 __FILE__, __LINE__); \
2988 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
2989 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
2990 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
2991 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
2992 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
2993 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
2994 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
2995 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
2996 (void *)v2m(v, p_snapshot_pte), \
2997 (l2_idx << L2_PAGETABLE_SHIFT) | \
2998 (l1_idx << L1_PAGETABLE_SHIFT)); \
2999 errors++; \
3000 } while ( 0 )
3002 static int check_pte(
3003 struct vcpu *v,
3004 l1_pgentry_t *p_guest_pte,
3005 l1_pgentry_t *p_shadow_pte,
3006 l1_pgentry_t *p_snapshot_pte,
3007 int level, int l2_idx, int l1_idx)
3009 struct domain *d = v->domain;
3010 l1_pgentry_t guest_pte = *p_guest_pte;
3011 l1_pgentry_t shadow_pte = *p_shadow_pte;
3012 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
3013 l1_pgentry_t eff_guest_pte;
3014 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
3015 int errors = 0, guest_writable;
3016 int page_table_page;
3018 if ( (l1e_get_intpte(shadow_pte) == 0) ||
3019 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
3020 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
3021 return errors; /* always safe */
3023 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
3024 FAIL("Non zero not present shadow_pte");
3026 if ( level == 2 ) sh_l2_present++;
3027 if ( level == 1 ) sh_l1_present++;
3029 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
3030 eff_guest_pte = snapshot_pte;
3031 else
3032 eff_guest_pte = guest_pte;
3034 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
3035 FAIL("Guest not present yet shadow is");
3037 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
3039 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
3040 FAIL("Corrupt?");
3042 if ( (level == 1) &&
3043 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
3044 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
3045 FAIL("Dirty coherence");
3047 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
3048 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
3049 FAIL("Accessed coherence");
3051 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
3052 FAIL("global bit set in shadow");
3054 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
3055 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
3056 shadow_mfn = l1e_get_pfn(shadow_pte);
3058 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
3059 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
3060 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
3062 page_table_page = mfn_is_page_table(eff_guest_mfn);
3064 guest_writable =
3065 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
3066 (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
3068 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
3070 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3071 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3072 frame_table[eff_guest_mfn].u.inuse.type_info,
3073 page_table_page);
3074 FAIL("RW coherence");
3077 if ( (level == 1) &&
3078 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
3079 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
3081 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3082 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3083 frame_table[eff_guest_mfn].u.inuse.type_info,
3084 page_table_page);
3085 FAIL("RW2 coherence");
3088 if ( eff_guest_mfn == shadow_mfn )
3090 if ( level > 1 )
3091 FAIL("Linear map ???"); /* XXX this will fail on BSD */
3093 else
3095 if ( level < 2 )
3096 FAIL("Shadow in L1 entry?");
3098 if ( level == 2 )
3100 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
3101 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
3102 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
3104 else
3105 BUG(); // XXX -- not handled yet.
3108 return errors;
3110 #undef FAIL
3111 #undef v2m
3113 static int check_l1_table(
3114 struct vcpu *v, unsigned long gpfn,
3115 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
3117 struct domain *d = v->domain;
3118 int i;
3119 unsigned long snapshot_mfn;
3120 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
3121 int errors = 0;
3123 if ( page_out_of_sync(pfn_to_page(gmfn)) )
3125 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
3126 ASSERT(snapshot_mfn);
3127 p_snapshot = map_domain_page(snapshot_mfn);
3130 p_guest = map_domain_page(gmfn);
3131 p_shadow = map_domain_page(smfn);
3133 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3134 errors += check_pte(v, p_guest+i, p_shadow+i,
3135 p_snapshot ? p_snapshot+i : NULL,
3136 1, l2_idx, i);
3138 unmap_domain_page(p_shadow);
3139 unmap_domain_page(p_guest);
3140 if ( p_snapshot )
3141 unmap_domain_page(p_snapshot);
3143 return errors;
3146 #define FAILPT(_f, _a...) \
3147 do { \
3148 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
3149 errors++; \
3150 } while ( 0 )
3152 int check_l2_table(
3153 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
3155 struct domain *d = v->domain;
3156 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
3157 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
3158 l2_pgentry_t match;
3159 int i;
3160 int errors = 0;
3161 int limit;
3163 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
3164 FAILPT("domain doesn't own page");
3165 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
3166 FAILPT("bogus owner for snapshot page");
3167 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
3168 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
3169 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
3171 #if 0
3172 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3173 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3174 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
3175 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
3177 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3178 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
3179 i++ )
3180 printk("+++ (%d) %lx %lx\n",i,
3181 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
3182 FAILPT("hypervisor entries inconsistent");
3185 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
3186 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
3187 FAILPT("hypervisor linear map inconsistent");
3188 #endif
3190 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
3191 if ( !shadow_mode_external(d) &&
3192 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
3193 match, PAGE_FLAG_MASK))
3195 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
3196 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
3197 L2_PAGETABLE_SHIFT]),
3198 l2e_get_intpte(match));
3201 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
3202 if ( !shadow_mode_external(d) &&
3203 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
3204 match, PAGE_FLAG_MASK))
3206 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
3207 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
3208 d->arch.mm_perdomain_pt,
3209 l2e_get_intpte(match));
3212 #ifdef __i386__
3213 if ( shadow_mode_external(d) )
3214 limit = L2_PAGETABLE_ENTRIES;
3215 else
3216 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3217 #else
3218 limit = 0; /* XXX x86/64 XXX */
3219 #endif
3221 /* Check the whole L2. */
3222 for ( i = 0; i < limit; i++ )
3223 errors += check_pte(v,
3224 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
3225 (l1_pgentry_t*)(&spl2e[i]),
3226 NULL,
3227 2, i, 0);
3229 unmap_domain_page(spl2e);
3230 unmap_domain_page(gpl2e);
3232 #if 1
3233 if ( errors )
3234 printk("check_l2_table returning %d errors\n", errors);
3235 #endif
3237 return errors;
3239 #undef FAILPT
3241 int _check_pagetable(struct vcpu *v, char *s)
3243 struct domain *d = v->domain;
3244 pagetable_t pt = v->arch.guest_table;
3245 unsigned long gptbase = pagetable_get_paddr(pt);
3246 unsigned long ptbase_pfn, smfn;
3247 unsigned long i;
3248 l2_pgentry_t *gpl2e, *spl2e;
3249 unsigned long ptbase_mfn = 0;
3250 int errors = 0, limit, oos_pdes = 0;
3252 //_audit_domain(d, AUDIT_QUIET);
3253 shadow_lock(d);
3255 sh_check_name = s;
3256 //SH_VVLOG("%s-PT Audit", s);
3257 sh_l2_present = sh_l1_present = 0;
3258 perfc_incrc(check_pagetable);
3260 ptbase_mfn = gptbase >> PAGE_SHIFT;
3261 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
3263 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
3265 printk("%s-PT %lx not shadowed\n", s, gptbase);
3266 goto out;
3268 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
3270 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
3271 oos_pdes = 1;
3272 ASSERT(ptbase_mfn);
3275 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
3277 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
3278 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
3280 /* Go back and recurse. */
3281 #ifdef __i386__
3282 if ( shadow_mode_external(d) )
3283 limit = L2_PAGETABLE_ENTRIES;
3284 else
3285 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3286 #else
3287 limit = 0; /* XXX x86/64 XXX */
3288 #endif
3290 for ( i = 0; i < limit; i++ )
3292 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
3293 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
3294 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
3296 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
3298 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
3302 unmap_domain_page(spl2e);
3303 unmap_domain_page(gpl2e);
3305 #if 0
3306 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
3307 sh_l2_present, sh_l1_present);
3308 #endif
3310 out:
3311 if ( errors )
3312 BUG();
3314 shadow_unlock(d);
3316 return errors;
3319 int _check_all_pagetables(struct vcpu *v, char *s)
3321 struct domain *d = v->domain;
3322 int i;
3323 struct shadow_status *a;
3324 unsigned long gmfn;
3325 int errors = 0;
3327 shadow_status_noswap = 1;
3329 sh_check_name = s;
3330 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
3331 sh_l2_present = sh_l1_present = 0;
3332 perfc_incrc(check_all_pagetables);
3334 for (i = 0; i < shadow_ht_buckets; i++)
3336 a = &d->arch.shadow_ht[i];
3337 while ( a && a->gpfn_and_flags )
3339 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
3341 switch ( a->gpfn_and_flags & PGT_type_mask )
3343 case PGT_l1_shadow:
3344 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
3345 gmfn, a->smfn, 0);
3346 break;
3347 case PGT_l2_shadow:
3348 errors += check_l2_table(v, gmfn, a->smfn,
3349 page_out_of_sync(pfn_to_page(gmfn)));
3350 break;
3351 case PGT_l3_shadow:
3352 case PGT_l4_shadow:
3353 case PGT_hl2_shadow:
3354 BUG(); // XXX - ought to fix this...
3355 break;
3356 case PGT_snapshot:
3357 case PGT_writable_pred:
3358 break;
3359 default:
3360 errors++;
3361 printk("unexpected shadow type %lx, gpfn=%lx, "
3362 "gmfn=%lx smfn=%lx\n",
3363 a->gpfn_and_flags & PGT_type_mask,
3364 a->gpfn_and_flags & PGT_mfn_mask,
3365 gmfn, a->smfn);
3366 BUG();
3368 a = a->next;
3372 shadow_status_noswap = 0;
3374 if ( errors )
3375 BUG();
3377 return errors;
3380 #endif // SHADOW_DEBUG
3382 /*
3383 * Local variables:
3384 * mode: C
3385 * c-set-style: "BSD"
3386 * c-basic-offset: 4
3387 * tab-width: 4
3388 * indent-tabs-mode: nil
3389 * End:
3390 */