ia64/xen-unstable

view xen/arch/x86/shadow32.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 f36aee6f8902
children 99914b54f7bf
line source
1 /******************************************************************************
2 * arch/x86/shadow.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
35 static void shadow_free_snapshot(struct domain *d,
36 struct out_of_sync_entry *entry);
37 static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
38 static void free_writable_pte_predictions(struct domain *d);
40 #if SHADOW_DEBUG
41 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
42 #endif
44 /********
46 There's a per-domain shadow table spin lock which works fine for SMP
47 hosts. We don't have to worry about interrupts as no shadow operations
48 happen in an interrupt context. It's probably not quite ready for SMP
49 guest operation as we have to worry about synchonisation between gpte
50 and spte updates. Its possible that this might only happen in a
51 hypercall context, in which case we'll probably at have a per-domain
52 hypercall lock anyhow (at least initially).
54 ********/
56 static inline int
57 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
58 unsigned long new_type)
59 {
60 struct pfn_info *page = pfn_to_page(gmfn);
61 int pinned = 0, okay = 1;
63 if ( page_out_of_sync(page) )
64 {
65 // Don't know how long ago this snapshot was taken.
66 // Can't trust it to be recent enough.
67 //
68 __shadow_sync_mfn(d, gmfn);
69 }
71 if ( !shadow_mode_refcounts(d) )
72 return 1;
74 if ( unlikely(page_is_page_table(page)) )
75 return 1;
77 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
79 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
80 {
81 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
82 __func__, gpfn, gmfn);
83 #if 1 || defined(LIVE_DANGEROUSLY)
84 set_bit(_PGC_page_table, &page->count_info);
85 return 1;
86 #endif
87 return 0;
89 }
91 // To convert this page to use as a page table, the writable count
92 // should now be zero. Test this by grabbing the page as an page table,
93 // and then immediately releasing. This will also deal with any
94 // necessary TLB flushing issues for us.
95 //
96 // The cruft here about pinning doesn't really work right. This
97 // needs rethinking/rewriting... Need to gracefully deal with the
98 // TLB flushes required when promoting a writable page, and also deal
99 // with any outstanding (external) writable refs to this page (by
100 // refusing to promote it). The pinning headache complicates this
101 // code -- it would all get much simpler if we stop using
102 // shadow_lock() and move the shadow code to BIGLOCK().
103 //
104 if ( unlikely(!get_page(page, d)) )
105 BUG(); // XXX -- needs more thought for a graceful failure
106 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
107 {
108 pinned = 1;
109 put_page_and_type(page);
110 }
111 if ( get_page_type(page, PGT_base_page_table) )
112 {
113 set_bit(_PGC_page_table, &page->count_info);
114 put_page_type(page);
115 }
116 else
117 {
118 printk("shadow_promote: get_page_type failed "
119 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
120 d->domain_id, gpfn, gmfn, new_type);
121 okay = 0;
122 }
124 // Now put the type back to writable...
125 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
126 BUG(); // XXX -- needs more thought for a graceful failure
127 if ( unlikely(pinned) )
128 {
129 if ( unlikely(test_and_set_bit(_PGT_pinned,
130 &page->u.inuse.type_info)) )
131 BUG(); // hmm... someone pinned this again?
132 }
133 else
134 put_page_and_type(page);
136 return okay;
137 }
139 static inline void
140 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
141 {
142 if ( !shadow_mode_refcounts(d) )
143 return;
145 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
147 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
148 {
149 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
151 if ( page_out_of_sync(pfn_to_page(gmfn)) )
152 {
153 remove_out_of_sync_entries(d, gmfn);
154 }
155 }
156 }
158 /*
159 * Things in shadow mode that collect get_page() refs to the domain's
160 * pages are:
161 * - PGC_allocated takes a gen count, just like normal.
162 * - A writable page can be pinned (paravirtualized guests may consider
163 * these pages to be L1s or L2s, and don't know the difference).
164 * Pinning a page takes a gen count (but, for domains in shadow mode,
165 * it *doesn't* take a type count)
166 * - CR3 grabs a ref to whatever it points at, just like normal.
167 * - Shadow mode grabs an initial gen count for itself, as a placehold
168 * for whatever references will exist.
169 * - Shadow PTEs that point to a page take a gen count, just like regular
170 * PTEs. However, they don't get a type count, as get_page_type() is
171 * hardwired to keep writable pages' counts at 1 for domains in shadow
172 * mode.
173 * - Whenever we shadow a page, the entry in the shadow hash grabs a
174 * general ref to the page.
175 * - Whenever a page goes out of sync, the out of sync entry grabs a
176 * general ref to the page.
177 */
178 /*
179 * pfn_info fields for pages allocated as shadow pages:
180 *
181 * All 32 bits of count_info are a simple count of refs to this shadow
182 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
183 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
184 * references.
185 *
186 * u.inuse._domain is left NULL, to prevent accidently allow some random
187 * domain from gaining permissions to map this page.
188 *
189 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
190 * shadowed.
191 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
192 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
193 * is currently exists because this is a shadow of a root page, and we
194 * don't want to let those disappear just because no CR3 is currently pointing
195 * at it.
196 *
197 * tlbflush_timestamp holds a min & max index of valid page table entries
198 * within the shadow page.
199 */
201 static inline unsigned long
202 alloc_shadow_page(struct domain *d,
203 unsigned long gpfn, unsigned long gmfn,
204 u32 psh_type)
205 {
206 struct pfn_info *page;
207 unsigned long smfn;
208 int pin = 0;
210 // Currently, we only keep pre-zero'ed pages around for use as L1's...
211 // This will change. Soon.
212 //
213 if ( psh_type == PGT_l1_shadow )
214 {
215 if ( !list_empty(&d->arch.free_shadow_frames) )
216 {
217 struct list_head *entry = d->arch.free_shadow_frames.next;
218 page = list_entry(entry, struct pfn_info, list);
219 list_del(entry);
220 perfc_decr(free_l1_pages);
221 }
222 else
223 {
224 page = alloc_domheap_page(NULL);
225 void *l1 = map_domain_page(page_to_pfn(page));
226 memset(l1, 0, PAGE_SIZE);
227 unmap_domain_page(l1);
228 }
229 }
230 else
231 page = alloc_domheap_page(NULL);
233 if ( unlikely(page == NULL) )
234 {
235 printk("Couldn't alloc shadow page! dom%d count=%d\n",
236 d->domain_id, d->arch.shadow_page_count);
237 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
238 perfc_value(shadow_l1_pages),
239 perfc_value(shadow_l2_pages),
240 perfc_value(hl2_table_pages),
241 perfc_value(snapshot_pages));
242 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
243 }
245 smfn = page_to_pfn(page);
247 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
248 page->u.inuse.type_info = psh_type | gmfn;
249 page->count_info = 0;
250 page->tlbflush_timestamp = 0;
252 switch ( psh_type )
253 {
254 case PGT_l1_shadow:
255 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
256 goto fail;
257 perfc_incr(shadow_l1_pages);
258 d->arch.shadow_page_count++;
259 break;
261 case PGT_l2_shadow:
262 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
263 goto fail;
264 perfc_incr(shadow_l2_pages);
265 d->arch.shadow_page_count++;
266 if ( PGT_l2_page_table == PGT_root_page_table )
267 pin = 1;
269 break;
271 case PGT_hl2_shadow:
272 // Treat an hl2 as an L1 for purposes of promotion.
273 // For external mode domains, treat them as an L2 for purposes of
274 // pinning.
275 //
276 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
277 goto fail;
278 perfc_incr(hl2_table_pages);
279 d->arch.hl2_page_count++;
280 if ( shadow_mode_external(d) &&
281 (PGT_l2_page_table == PGT_root_page_table) )
282 pin = 1;
284 break;
286 case PGT_snapshot:
287 perfc_incr(snapshot_pages);
288 d->arch.snapshot_page_count++;
289 break;
291 default:
292 printk("Alloc shadow weird page type type=%08x\n", psh_type);
293 BUG();
294 break;
295 }
297 // Don't add a new shadow of something that already has a snapshot.
298 //
299 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
301 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
303 if ( pin )
304 shadow_pin(smfn);
306 return smfn;
308 fail:
309 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
310 gpfn, gmfn);
311 free_domheap_page(page);
312 return 0;
313 }
315 static void inline
316 free_shadow_l1_table(struct domain *d, unsigned long smfn)
317 {
318 l1_pgentry_t *pl1e = map_domain_page(smfn);
319 int i;
320 struct pfn_info *spage = pfn_to_page(smfn);
321 u32 min_max = spage->tlbflush_timestamp;
322 int min = SHADOW_MIN(min_max);
323 int max = SHADOW_MAX(min_max);
325 for ( i = min; i <= max; i++ )
326 {
327 shadow_put_page_from_l1e(pl1e[i], d);
328 pl1e[i] = l1e_empty();
329 }
331 unmap_domain_page(pl1e);
332 }
334 static void inline
335 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
336 {
337 l1_pgentry_t *hl2 = map_domain_page(smfn);
338 int i, limit;
340 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
342 #ifdef __i386__
343 if ( shadow_mode_external(d) )
344 limit = L2_PAGETABLE_ENTRIES;
345 else
346 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
347 #else
348 limit = 0; /* XXX x86/64 XXX */
349 #endif
351 for ( i = 0; i < limit; i++ )
352 {
353 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
354 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
355 }
357 unmap_domain_page(hl2);
358 }
360 static void inline
361 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
362 {
363 l2_pgentry_t *pl2e = map_domain_page(smfn);
364 int i, external = shadow_mode_external(d);
366 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
367 if ( external || is_guest_l2_slot(type, i) )
368 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
369 put_shadow_ref(l2e_get_pfn(pl2e[i]));
371 if ( (PGT_base_page_table == PGT_l2_page_table) &&
372 shadow_mode_translate(d) && !external )
373 {
374 // free the ref to the hl2
375 //
376 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
377 }
379 unmap_domain_page(pl2e);
380 }
382 void free_shadow_page(unsigned long smfn)
383 {
384 struct pfn_info *page = &frame_table[smfn];
385 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
386 struct domain *d = page_get_owner(pfn_to_page(gmfn));
387 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
388 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
390 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
392 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
394 delete_shadow_status(d, gpfn, gmfn, type);
396 switch ( type )
397 {
398 case PGT_l1_shadow:
399 perfc_decr(shadow_l1_pages);
400 shadow_demote(d, gpfn, gmfn);
401 free_shadow_l1_table(d, smfn);
402 break;
404 case PGT_l2_shadow:
405 perfc_decr(shadow_l2_pages);
406 shadow_demote(d, gpfn, gmfn);
407 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
408 break;
410 case PGT_hl2_shadow:
411 perfc_decr(hl2_table_pages);
412 shadow_demote(d, gpfn, gmfn);
413 free_shadow_hl2_table(d, smfn);
414 break;
416 case PGT_snapshot:
417 perfc_decr(snapshot_pages);
418 break;
420 default:
421 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
422 page_to_pfn(page), page->u.inuse.type_info);
423 break;
424 }
426 d->arch.shadow_page_count--;
428 // No TLB flushes are needed the next time this page gets allocated.
429 //
430 page->tlbflush_timestamp = 0;
431 page->u.free.cpumask = CPU_MASK_NONE;
433 if ( type == PGT_l1_shadow )
434 {
435 list_add(&page->list, &d->arch.free_shadow_frames);
436 perfc_incr(free_l1_pages);
437 }
438 else
439 free_domheap_page(page);
440 }
442 void
443 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
444 {
445 unsigned long smfn;
447 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
449 shadow_lock(d);
451 while ( stype >= PGT_l1_shadow )
452 {
453 smfn = __shadow_status(d, gpfn, stype);
454 if ( smfn && MFN_PINNED(smfn) )
455 shadow_unpin(smfn);
456 stype -= PGT_l1_shadow;
457 }
459 shadow_unlock(d);
460 }
462 static void inline
463 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
464 {
465 struct pfn_info *page;
467 page = &frame_table[entry->gmfn];
469 // Decrement ref count of guest & shadow pages
470 //
471 put_page(page);
473 // Only use entries that have low bits clear...
474 //
475 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
476 {
477 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
478 entry->writable_pl1e = -2;
479 }
480 else
481 ASSERT( entry->writable_pl1e == -1 );
483 // Free the snapshot
484 //
485 shadow_free_snapshot(d, entry);
486 }
488 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
489 {
490 struct out_of_sync_entry *entry = d->arch.out_of_sync;
491 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
492 struct out_of_sync_entry *found = NULL;
494 // NB: Be careful not to call something that manipulates this list
495 // while walking it. Collect the results into a separate list
496 // first, then walk that list.
497 //
498 while ( entry )
499 {
500 if ( entry->gmfn == gmfn )
501 {
502 // remove from out of sync list
503 *prev = entry->next;
505 // add to found list
506 entry->next = found;
507 found = entry;
509 entry = *prev;
510 continue;
511 }
512 prev = &entry->next;
513 entry = entry->next;
514 }
516 prev = NULL;
517 entry = found;
518 while ( entry )
519 {
520 release_out_of_sync_entry(d, entry);
522 prev = &entry->next;
523 entry = entry->next;
524 }
526 // Add found list to free list
527 if ( prev )
528 {
529 *prev = d->arch.out_of_sync_free;
530 d->arch.out_of_sync_free = found;
531 }
532 }
534 static void free_out_of_sync_state(struct domain *d)
535 {
536 struct out_of_sync_entry *entry;
538 // NB: Be careful not to call something that manipulates this list
539 // while walking it. Remove one item at a time, and always
540 // restart from start of list.
541 //
542 while ( (entry = d->arch.out_of_sync) )
543 {
544 d->arch.out_of_sync = entry->next;
545 release_out_of_sync_entry(d, entry);
547 entry->next = d->arch.out_of_sync_free;
548 d->arch.out_of_sync_free = entry;
549 }
550 }
552 static void free_shadow_pages(struct domain *d)
553 {
554 int i;
555 struct shadow_status *x;
556 struct vcpu *v;
558 /*
559 * WARNING! The shadow page table must not currently be in use!
560 * e.g., You are expected to have paused the domain and synchronized CR3.
561 */
563 if( !d->arch.shadow_ht ) return;
565 shadow_audit(d, 1);
567 // first, remove any outstanding refs from out_of_sync entries...
568 //
569 free_out_of_sync_state(d);
571 // second, remove any outstanding refs from v->arch.shadow_table
572 // and CR3.
573 //
574 for_each_vcpu(d, v)
575 {
576 if ( pagetable_get_paddr(v->arch.shadow_table) )
577 {
578 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
579 v->arch.shadow_table = mk_pagetable(0);
580 }
582 if ( v->arch.monitor_shadow_ref )
583 {
584 put_shadow_ref(v->arch.monitor_shadow_ref);
585 v->arch.monitor_shadow_ref = 0;
586 }
587 }
589 // For external shadows, remove the monitor table's refs
590 //
591 if ( shadow_mode_external(d) )
592 {
593 for_each_vcpu(d, v)
594 {
595 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
597 if ( mpl2e )
598 {
599 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
600 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
602 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
603 {
604 put_shadow_ref(l2e_get_pfn(hl2e));
605 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
606 }
607 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
608 {
609 put_shadow_ref(l2e_get_pfn(smfn));
610 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
611 }
612 }
613 }
614 }
616 // Now, the only refs to shadow pages that are left are from the shadow
617 // pages themselves. We just unpin the pinned pages, and the rest
618 // should automatically disappear.
619 //
620 // NB: Beware: each explicitly or implicit call to free_shadow_page
621 // can/will result in the hash bucket getting rewritten out from
622 // under us... First, collect the list of pinned pages, then
623 // free them.
624 //
625 for ( i = 0; i < shadow_ht_buckets; i++ )
626 {
627 u32 count;
628 unsigned long *mfn_list;
630 /* Skip empty buckets. */
631 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
632 continue;
634 count = 0;
635 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
636 if ( MFN_PINNED(x->smfn) )
637 count++;
638 if ( !count )
639 continue;
641 mfn_list = xmalloc_array(unsigned long, count);
642 count = 0;
643 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
644 if ( MFN_PINNED(x->smfn) )
645 mfn_list[count++] = x->smfn;
647 while ( count )
648 {
649 shadow_unpin(mfn_list[--count]);
650 }
651 xfree(mfn_list);
652 }
654 // Now free the pre-zero'ed pages from the domain
655 //
656 struct list_head *list_ent, *tmp;
657 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
658 {
659 list_del(list_ent);
660 perfc_decr(free_l1_pages);
662 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
663 free_domheap_page(page);
664 }
666 shadow_audit(d, 0);
668 SH_VLOG("Free shadow table.");
669 }
671 void shadow_mode_init(void)
672 {
673 }
675 int _shadow_mode_refcounts(struct domain *d)
676 {
677 return shadow_mode_refcounts(d);
678 }
680 static void alloc_monitor_pagetable(struct vcpu *v)
681 {
682 unsigned long mmfn;
683 l2_pgentry_t *mpl2e;
684 struct pfn_info *mmfn_info;
685 struct domain *d = v->domain;
687 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
689 mmfn_info = alloc_domheap_page(NULL);
690 ASSERT(mmfn_info != NULL);
692 mmfn = page_to_pfn(mmfn_info);
693 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
694 memset(mpl2e, 0, PAGE_SIZE);
696 #ifdef __i386__ /* XXX screws x86/64 build */
697 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
698 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
699 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
700 #endif
702 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
703 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
704 __PAGE_HYPERVISOR);
706 // map the phys_to_machine map into the Read-Only MPT space for this domain
707 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
708 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
709 __PAGE_HYPERVISOR);
711 // Don't (yet) have mappings for these...
712 // Don't want to accidentally see the idle_pg_table's linear mapping.
713 //
714 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
715 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
717 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
718 v->arch.monitor_vtable = mpl2e;
719 }
721 /*
722 * Free the pages for monitor_table and hl2_table
723 */
724 void free_monitor_pagetable(struct vcpu *v)
725 {
726 l2_pgentry_t *mpl2e, hl2e, sl2e;
727 unsigned long mfn;
729 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
731 mpl2e = v->arch.monitor_vtable;
733 /*
734 * First get the mfn for hl2_table by looking at monitor_table
735 */
736 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
737 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
738 {
739 mfn = l2e_get_pfn(hl2e);
740 ASSERT(mfn);
741 put_shadow_ref(mfn);
742 }
744 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
745 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
746 {
747 mfn = l2e_get_pfn(sl2e);
748 ASSERT(mfn);
749 put_shadow_ref(mfn);
750 }
752 unmap_domain_page(mpl2e);
754 /*
755 * Then free monitor_table.
756 */
757 mfn = pagetable_get_pfn(v->arch.monitor_table);
758 free_domheap_page(&frame_table[mfn]);
760 v->arch.monitor_table = mk_pagetable(0);
761 v->arch.monitor_vtable = 0;
762 }
764 int
765 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
766 struct domain_mmap_cache *l2cache,
767 struct domain_mmap_cache *l1cache)
768 {
769 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
770 l2_pgentry_t *l2, l2e;
771 l1_pgentry_t *l1;
772 struct pfn_info *l1page;
773 unsigned long va = pfn << PAGE_SHIFT;
775 ASSERT(tabpfn != 0);
777 l2 = map_domain_page_with_cache(tabpfn, l2cache);
778 l2e = l2[l2_table_offset(va)];
779 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
780 {
781 l1page = alloc_domheap_page(NULL);
782 if ( !l1page )
783 {
784 unmap_domain_page_with_cache(l2, l2cache);
785 return 0;
786 }
788 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
789 memset(l1, 0, PAGE_SIZE);
790 unmap_domain_page_with_cache(l1, l1cache);
792 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
793 l2[l2_table_offset(va)] = l2e;
794 }
795 unmap_domain_page_with_cache(l2, l2cache);
797 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
798 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
799 unmap_domain_page_with_cache(l1, l1cache);
801 return 1;
802 }
804 static int
805 alloc_p2m_table(struct domain *d)
806 {
807 struct list_head *list_ent;
808 struct pfn_info *page, *l2page;
809 l2_pgentry_t *l2;
810 unsigned long mfn, pfn;
811 struct domain_mmap_cache l1cache, l2cache;
813 l2page = alloc_domheap_page(NULL);
814 if ( l2page == NULL )
815 return 0;
817 domain_mmap_cache_init(&l1cache);
818 domain_mmap_cache_init(&l2cache);
820 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
821 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
822 memset(l2, 0, PAGE_SIZE);
823 unmap_domain_page_with_cache(l2, &l2cache);
825 list_ent = d->page_list.next;
826 while ( list_ent != &d->page_list )
827 {
828 page = list_entry(list_ent, struct pfn_info, list);
829 mfn = page_to_pfn(page);
830 pfn = machine_to_phys_mapping[mfn];
831 ASSERT(pfn != INVALID_M2P_ENTRY);
832 ASSERT(pfn < (1u<<20));
834 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
836 list_ent = page->list.next;
837 }
839 list_ent = d->xenpage_list.next;
840 while ( list_ent != &d->xenpage_list )
841 {
842 page = list_entry(list_ent, struct pfn_info, list);
843 mfn = page_to_pfn(page);
844 pfn = machine_to_phys_mapping[mfn];
845 if ( (pfn != INVALID_M2P_ENTRY) &&
846 (pfn < (1u<<20)) )
847 {
848 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
849 }
851 list_ent = page->list.next;
852 }
854 domain_mmap_cache_destroy(&l2cache);
855 domain_mmap_cache_destroy(&l1cache);
857 return 1;
858 }
860 static void
861 free_p2m_table(struct domain *d)
862 {
863 // uh, this needs some work... :)
864 BUG();
865 }
867 int __shadow_mode_enable(struct domain *d, unsigned int mode)
868 {
869 struct vcpu *v;
870 int new_modes = (mode & ~d->arch.shadow_mode);
872 // Gotta be adding something to call this function.
873 ASSERT(new_modes);
875 // can't take anything away by calling this function.
876 ASSERT(!(d->arch.shadow_mode & ~mode));
878 for_each_vcpu(d, v)
879 {
880 invalidate_shadow_ldt(v);
882 // We need to set these up for __update_pagetables().
883 // See the comment there.
885 /*
886 * arch.guest_vtable
887 */
888 if ( v->arch.guest_vtable &&
889 (v->arch.guest_vtable != __linear_l2_table) )
890 {
891 unmap_domain_page(v->arch.guest_vtable);
892 }
893 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
894 v->arch.guest_vtable = __linear_l2_table;
895 else
896 v->arch.guest_vtable = NULL;
898 /*
899 * arch.shadow_vtable
900 */
901 if ( v->arch.shadow_vtable &&
902 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
903 {
904 unmap_domain_page(v->arch.shadow_vtable);
905 }
906 if ( !(mode & SHM_external) )
907 v->arch.shadow_vtable = __shadow_linear_l2_table;
908 else
909 v->arch.shadow_vtable = NULL;
911 /*
912 * arch.hl2_vtable
913 */
914 if ( v->arch.hl2_vtable &&
915 (v->arch.hl2_vtable != __linear_hl2_table) )
916 {
917 unmap_domain_page(v->arch.hl2_vtable);
918 }
919 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
920 v->arch.hl2_vtable = __linear_hl2_table;
921 else
922 v->arch.hl2_vtable = NULL;
924 /*
925 * arch.monitor_table & arch.monitor_vtable
926 */
927 if ( v->arch.monitor_vtable )
928 {
929 free_monitor_pagetable(v);
930 }
931 if ( mode & SHM_external )
932 {
933 alloc_monitor_pagetable(v);
934 }
935 }
937 if ( new_modes & SHM_enable )
938 {
939 ASSERT( !d->arch.shadow_ht );
940 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
941 if ( d->arch.shadow_ht == NULL )
942 goto nomem;
944 memset(d->arch.shadow_ht, 0,
945 shadow_ht_buckets * sizeof(struct shadow_status));
946 }
948 if ( new_modes & SHM_log_dirty )
949 {
950 ASSERT( !d->arch.shadow_dirty_bitmap );
951 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
952 d->arch.shadow_dirty_bitmap =
953 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
954 (8 * sizeof(unsigned long)));
955 if ( d->arch.shadow_dirty_bitmap == NULL )
956 {
957 d->arch.shadow_dirty_bitmap_size = 0;
958 goto nomem;
959 }
960 memset(d->arch.shadow_dirty_bitmap, 0,
961 d->arch.shadow_dirty_bitmap_size/8);
962 }
964 if ( new_modes & SHM_translate )
965 {
966 if ( !(new_modes & SHM_external) )
967 {
968 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
969 if ( !alloc_p2m_table(d) )
970 {
971 printk("alloc_p2m_table failed (out-of-memory?)\n");
972 goto nomem;
973 }
974 }
975 else
976 {
977 // external guests provide their own memory for their P2M maps.
978 //
979 ASSERT( d == page_get_owner(
980 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
981 }
982 }
984 printk("audit1\n");
985 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
986 printk("audit1 done\n");
988 // Get rid of any shadow pages from any previous shadow mode.
989 //
990 free_shadow_pages(d);
992 printk("audit2\n");
993 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
994 printk("audit2 done\n");
996 /*
997 * Tear down it's counts by disassembling its page-table-based ref counts.
998 * Also remove CR3's gcount/tcount.
999 * That leaves things like GDTs and LDTs and external refs in tact.
1001 * Most pages will be writable tcount=0.
1002 * Some will still be L1 tcount=0 or L2 tcount=0.
1003 * Maybe some pages will be type none tcount=0.
1004 * Pages granted external writable refs (via grant tables?) will
1005 * still have a non-zero tcount. That's OK.
1007 * gcounts will generally be 1 for PGC_allocated.
1008 * GDTs and LDTs will have additional gcounts.
1009 * Any grant-table based refs will still be in the gcount.
1011 * We attempt to grab writable refs to each page (thus setting its type).
1012 * Immediately put back those type refs.
1014 * Assert that no pages are left with L1/L2/L3/L4 type.
1015 */
1016 audit_adjust_pgtables(d, -1, 1);
1018 d->arch.shadow_mode = mode;
1020 if ( shadow_mode_refcounts(d) )
1022 struct list_head *list_ent = d->page_list.next;
1023 while ( list_ent != &d->page_list )
1025 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1026 if ( !get_page_type(page, PGT_writable_page) )
1027 BUG();
1028 put_page_type(page);
1030 list_ent = page->list.next;
1034 audit_adjust_pgtables(d, 1, 1);
1036 printk("audit3\n");
1037 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1038 printk("audit3 done\n");
1040 return 0;
1042 nomem:
1043 if ( (new_modes & SHM_enable) )
1045 xfree(d->arch.shadow_ht);
1046 d->arch.shadow_ht = NULL;
1048 if ( (new_modes & SHM_log_dirty) )
1050 xfree(d->arch.shadow_dirty_bitmap);
1051 d->arch.shadow_dirty_bitmap = NULL;
1053 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1054 pagetable_get_paddr(d->arch.phys_table) )
1056 free_p2m_table(d);
1058 return -ENOMEM;
1061 int shadow_mode_enable(struct domain *d, unsigned int mode)
1063 int rc;
1064 shadow_lock(d);
1065 rc = __shadow_mode_enable(d, mode);
1066 shadow_unlock(d);
1067 return rc;
1070 static void
1071 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1073 int i;
1074 l1_pgentry_t *l1;
1076 l1 = map_domain_page(l1mfn);
1077 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1079 if ( is_guest_l1_slot(i) &&
1080 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1082 unsigned long mfn = l1e_get_pfn(l1[i]);
1083 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1084 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1085 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1088 unmap_domain_page(l1);
1091 // This is not general enough to handle arbitrary pagetables
1092 // with shared L1 pages, etc., but it is sufficient for bringing
1093 // up dom0.
1094 //
1095 void
1096 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1097 unsigned int type)
1099 int i;
1100 l2_pgentry_t *l2;
1102 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1104 l2 = map_domain_page(l2mfn);
1105 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1107 if ( is_guest_l2_slot(type, i) &&
1108 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1110 unsigned long mfn = l2e_get_pfn(l2[i]);
1111 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1112 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1113 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1114 translate_l1pgtable(d, p2m, mfn);
1117 unmap_domain_page(l2);
1120 static void free_shadow_ht_entries(struct domain *d)
1122 struct shadow_status *x, *n;
1124 SH_VLOG("freed tables count=%d l1=%d l2=%d",
1125 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
1126 perfc_value(shadow_l2_pages));
1128 n = d->arch.shadow_ht_extras;
1129 while ( (x = n) != NULL )
1131 d->arch.shadow_extras_count--;
1132 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
1133 xfree(x);
1136 d->arch.shadow_ht_extras = NULL;
1137 d->arch.shadow_ht_free = NULL;
1139 ASSERT(d->arch.shadow_extras_count == 0);
1140 SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
1142 if ( d->arch.shadow_dirty_bitmap != NULL )
1144 xfree(d->arch.shadow_dirty_bitmap);
1145 d->arch.shadow_dirty_bitmap = 0;
1146 d->arch.shadow_dirty_bitmap_size = 0;
1149 xfree(d->arch.shadow_ht);
1150 d->arch.shadow_ht = NULL;
1153 static void free_out_of_sync_entries(struct domain *d)
1155 struct out_of_sync_entry *x, *n;
1157 n = d->arch.out_of_sync_extras;
1158 while ( (x = n) != NULL )
1160 d->arch.out_of_sync_extras_count--;
1161 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
1162 xfree(x);
1165 d->arch.out_of_sync_extras = NULL;
1166 d->arch.out_of_sync_free = NULL;
1167 d->arch.out_of_sync = NULL;
1169 ASSERT(d->arch.out_of_sync_extras_count == 0);
1170 FSH_LOG("freed extra out_of_sync entries, now %d",
1171 d->arch.out_of_sync_extras_count);
1174 void __shadow_mode_disable(struct domain *d)
1176 if ( unlikely(!shadow_mode_enabled(d)) )
1177 return;
1179 /*
1180 * Currently this does not fix up page ref counts, so it is valid to call
1181 * only when a domain is being destroyed.
1182 */
1183 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
1184 shadow_mode_refcounts(d));
1185 d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
1187 free_shadow_pages(d);
1188 free_writable_pte_predictions(d);
1190 #ifndef NDEBUG
1191 int i;
1192 for ( i = 0; i < shadow_ht_buckets; i++ )
1194 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
1196 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
1197 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
1198 BUG();
1201 #endif
1203 d->arch.shadow_mode = 0;
1205 free_shadow_ht_entries(d);
1206 free_out_of_sync_entries(d);
1208 struct vcpu *v;
1209 for_each_vcpu(d, v)
1211 update_pagetables(v);
1215 static int shadow_mode_table_op(
1216 struct domain *d, dom0_shadow_control_t *sc)
1218 unsigned int op = sc->op;
1219 int i, rc = 0;
1220 struct vcpu *v;
1222 ASSERT(shadow_lock_is_acquired(d));
1224 SH_VLOG("shadow mode table op %lx %lx count %d",
1225 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1226 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1227 d->arch.shadow_page_count);
1229 shadow_audit(d, 1);
1231 switch ( op )
1233 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1234 free_shadow_pages(d);
1236 d->arch.shadow_fault_count = 0;
1237 d->arch.shadow_dirty_count = 0;
1238 d->arch.shadow_dirty_net_count = 0;
1239 d->arch.shadow_dirty_block_count = 0;
1241 break;
1243 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1244 free_shadow_pages(d);
1246 sc->stats.fault_count = d->arch.shadow_fault_count;
1247 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1248 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1249 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1251 d->arch.shadow_fault_count = 0;
1252 d->arch.shadow_dirty_count = 0;
1253 d->arch.shadow_dirty_net_count = 0;
1254 d->arch.shadow_dirty_block_count = 0;
1256 if ( (d->max_pages > sc->pages) ||
1257 (sc->dirty_bitmap == NULL) ||
1258 (d->arch.shadow_dirty_bitmap == NULL) )
1260 rc = -EINVAL;
1261 break;
1264 sc->pages = d->max_pages;
1266 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1267 for ( i = 0; i < d->max_pages; i += chunk )
1269 int bytes = ((((d->max_pages - i) > chunk) ?
1270 chunk : (d->max_pages - i)) + 7) / 8;
1272 if (copy_to_user(
1273 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1274 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1275 bytes))
1277 // copy_to_user can fail when copying to guest app memory.
1278 // app should zero buffer after mallocing, and pin it
1279 rc = -EINVAL;
1280 memset(
1281 d->arch.shadow_dirty_bitmap +
1282 (i/(8*sizeof(unsigned long))),
1283 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
1284 break;
1287 memset(
1288 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1289 0, bytes);
1292 break;
1294 case DOM0_SHADOW_CONTROL_OP_PEEK:
1295 sc->stats.fault_count = d->arch.shadow_fault_count;
1296 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1297 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1298 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1300 if ( (d->max_pages > sc->pages) ||
1301 (sc->dirty_bitmap == NULL) ||
1302 (d->arch.shadow_dirty_bitmap == NULL) )
1304 rc = -EINVAL;
1305 break;
1308 sc->pages = d->max_pages;
1309 if (copy_to_user(
1310 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
1312 rc = -EINVAL;
1313 break;
1316 break;
1318 default:
1319 rc = -EINVAL;
1320 break;
1323 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1324 shadow_audit(d, 1);
1326 for_each_vcpu(d,v)
1327 __update_pagetables(v);
1329 return rc;
1332 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1334 unsigned int op = sc->op;
1335 int rc = 0;
1336 struct vcpu *v;
1338 if ( unlikely(d == current->domain) )
1340 DPRINTK("Don't try to do a shadow op on yourself!\n");
1341 return -EINVAL;
1344 domain_pause(d);
1346 shadow_lock(d);
1348 switch ( op )
1350 case DOM0_SHADOW_CONTROL_OP_OFF:
1351 __shadow_sync_all(d);
1352 __shadow_mode_disable(d);
1353 break;
1355 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1356 free_shadow_pages(d);
1357 rc = __shadow_mode_enable(d, SHM_enable);
1358 break;
1360 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1361 free_shadow_pages(d);
1362 rc = __shadow_mode_enable(
1363 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1364 break;
1366 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1367 free_shadow_pages(d);
1368 rc = __shadow_mode_enable(
1369 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1370 break;
1372 default:
1373 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1374 break;
1377 shadow_unlock(d);
1379 for_each_vcpu(d,v)
1380 update_pagetables(v);
1382 domain_unpause(d);
1384 return rc;
1387 /*
1388 * XXX KAF: Why is this VMX specific?
1389 */
1390 void vmx_shadow_clear_state(struct domain *d)
1392 SH_VVLOG("%s:", __func__);
1393 shadow_lock(d);
1394 free_shadow_pages(d);
1395 shadow_unlock(d);
1396 update_pagetables(d->vcpu[0]);
1399 unsigned long
1400 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1402 ASSERT( shadow_mode_translate(d) );
1404 perfc_incrc(gpfn_to_mfn_foreign);
1406 unsigned long va = gpfn << PAGE_SHIFT;
1407 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1408 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1409 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1410 unmap_domain_page(l2);
1411 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1413 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1414 d->domain_id, gpfn, l2e_get_intpte(l2e));
1415 return INVALID_MFN;
1417 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1418 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1419 unmap_domain_page(l1);
1421 #if 0
1422 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1423 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1424 #endif
1426 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1428 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1429 d->domain_id, gpfn, l1e_get_intpte(l1e));
1430 return INVALID_MFN;
1433 return l1e_get_pfn(l1e);
1436 static unsigned long
1437 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
1438 unsigned long smfn)
1440 unsigned long hl2mfn;
1441 l1_pgentry_t *hl2;
1442 int limit;
1444 ASSERT(PGT_base_page_table == PGT_l2_page_table);
1446 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
1448 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
1449 gpfn, gmfn);
1450 BUG(); /* XXX Deal gracefully with failure. */
1453 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
1454 gpfn, gmfn, smfn, hl2mfn);
1455 perfc_incrc(shadow_hl2_table_count);
1457 hl2 = map_domain_page(hl2mfn);
1459 #ifdef __i386__
1460 if ( shadow_mode_external(d) )
1461 limit = L2_PAGETABLE_ENTRIES;
1462 else
1463 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
1464 #else
1465 limit = 0; /* XXX x86/64 XXX */
1466 #endif
1468 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
1470 if ( !shadow_mode_external(d) )
1472 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
1473 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1475 // Setup easy access to the GL2, SL2, and HL2 frames.
1476 //
1477 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
1478 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1479 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1480 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
1481 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
1482 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1485 unmap_domain_page(hl2);
1487 return hl2mfn;
1490 /*
1491 * This could take and use a snapshot, and validate the entire page at
1492 * once, or it could continue to fault in entries one at a time...
1493 * Might be worth investigating...
1494 */
1495 static unsigned long shadow_l2_table(
1496 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1498 unsigned long smfn;
1499 l2_pgentry_t *spl2e;
1501 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
1503 perfc_incrc(shadow_l2_table_count);
1505 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
1507 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
1508 gpfn, gmfn);
1509 BUG(); /* XXX Deal gracefully with failure. */
1512 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
1514 /* Install hypervisor and 2x linear p.t. mapings. */
1515 if ( (PGT_base_page_table == PGT_l2_page_table) &&
1516 !shadow_mode_external(d) )
1518 /*
1519 * We could proactively fill in PDEs for pages that are already
1520 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
1521 * (restriction required for coherence of the accessed bit). However,
1522 * we tried it and it didn't help performance. This is simpler.
1523 */
1524 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
1526 /* Install hypervisor and 2x linear p.t. mapings. */
1527 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1528 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1529 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1531 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1532 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
1534 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
1535 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
1536 __PAGE_HYPERVISOR);
1538 if ( shadow_mode_translate(d) ) // NB: not external
1540 unsigned long hl2mfn;
1542 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
1543 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
1544 __PAGE_HYPERVISOR);
1546 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1547 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1549 // shadow_mode_translate (but not external) sl2 tables hold a
1550 // ref to their hl2.
1551 //
1552 if ( !get_shadow_ref(hl2mfn) )
1553 BUG();
1555 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1556 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1558 else
1559 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1560 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1562 else
1564 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
1567 unmap_domain_page(spl2e);
1569 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
1570 return smfn;
1573 void shadow_map_l1_into_current_l2(unsigned long va)
1575 struct vcpu *v = current;
1576 struct domain *d = v->domain;
1577 l1_pgentry_t *gpl1e, *spl1e;
1578 l2_pgentry_t gl2e, sl2e;
1579 unsigned long gl1pfn, gl1mfn, sl1mfn;
1580 int i, init_table = 0;
1582 __guest_get_l2e(v, va, &gl2e);
1583 ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
1584 gl1pfn = l2e_get_pfn(gl2e);
1586 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
1588 /* This L1 is NOT already shadowed so we need to shadow it. */
1589 SH_VVLOG("4a: l1 not shadowed");
1591 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1592 if ( unlikely(!VALID_MFN(gl1mfn)) )
1594 // Attempt to use an invalid pfn as an L1 page.
1595 // XXX this needs to be more graceful!
1596 BUG();
1599 if ( unlikely(!(sl1mfn =
1600 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
1602 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
1603 gl1pfn, gl1mfn);
1604 BUG(); /* XXX Need to deal gracefully with failure. */
1607 perfc_incrc(shadow_l1_table_count);
1608 init_table = 1;
1610 else
1612 /* This L1 is shadowed already, but the L2 entry is missing. */
1613 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
1616 #ifndef NDEBUG
1617 l2_pgentry_t old_sl2e;
1618 __shadow_get_l2e(v, va, &old_sl2e);
1619 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
1620 #endif
1622 if ( !get_shadow_ref(sl1mfn) )
1623 BUG();
1624 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
1625 __guest_set_l2e(v, va, gl2e);
1626 __shadow_set_l2e(v, va, sl2e);
1628 if ( init_table )
1630 l1_pgentry_t sl1e;
1631 int index = l1_table_offset(va);
1632 int min = 1, max = 0;
1634 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
1635 ~(L1_PAGETABLE_ENTRIES-1)]);
1637 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
1638 ~(L1_PAGETABLE_ENTRIES-1)]);
1640 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1642 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
1643 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
1644 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
1645 sl1e = l1e_empty();
1646 if ( l1e_get_flags(sl1e) == 0 )
1648 // First copy entries from 0 until first invalid.
1649 // Then copy entries from index until first invalid.
1650 //
1651 if ( i < index ) {
1652 i = index - 1;
1653 continue;
1655 break;
1657 spl1e[i] = sl1e;
1658 if ( unlikely(i < min) )
1659 min = i;
1660 if ( likely(i > max) )
1661 max = i;
1664 frame_table[sl1mfn].tlbflush_timestamp =
1665 SHADOW_ENCODE_MIN_MAX(min, max);
1669 void shadow_invlpg(struct vcpu *v, unsigned long va)
1671 struct domain *d = v->domain;
1672 l1_pgentry_t gpte, spte;
1674 ASSERT(shadow_mode_enabled(d));
1676 shadow_lock(d);
1678 __shadow_sync_va(v, va);
1680 // XXX mafetter: will need to think about 4MB pages...
1682 // It's not strictly necessary to update the shadow here,
1683 // but it might save a fault later.
1684 //
1685 if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
1686 sizeof(gpte))) {
1687 perfc_incrc(shadow_invlpg_faults);
1688 shadow_unlock(d);
1689 return;
1691 l1pte_propagate_from_guest(d, gpte, &spte);
1692 shadow_set_l1e(va, spte, 1);
1694 shadow_unlock(d);
1697 struct out_of_sync_entry *
1698 shadow_alloc_oos_entry(struct domain *d)
1700 struct out_of_sync_entry *f, *extra;
1701 unsigned size, i;
1703 if ( unlikely(d->arch.out_of_sync_free == NULL) )
1705 FSH_LOG("Allocate more fullshadow tuple blocks.");
1707 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
1708 extra = xmalloc_bytes(size);
1710 /* XXX Should be more graceful here. */
1711 if ( extra == NULL )
1712 BUG();
1714 memset(extra, 0, size);
1716 /* Record the allocation block so it can be correctly freed later. */
1717 d->arch.out_of_sync_extras_count++;
1718 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
1719 d->arch.out_of_sync_extras;
1720 d->arch.out_of_sync_extras = &extra[0];
1722 /* Thread a free chain through the newly-allocated nodes. */
1723 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
1724 extra[i].next = &extra[i+1];
1725 extra[i].next = NULL;
1727 /* Add the new nodes to the free list. */
1728 d->arch.out_of_sync_free = &extra[0];
1731 /* Allocate a new node from the quicklist. */
1732 f = d->arch.out_of_sync_free;
1733 d->arch.out_of_sync_free = f->next;
1735 return f;
1738 static inline unsigned long
1739 shadow_make_snapshot(
1740 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1742 unsigned long smfn, sl1mfn = 0;
1743 void *original, *snapshot;
1744 u32 min_max = 0;
1745 int min, max, length;
1747 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
1749 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
1750 return SHADOW_SNAPSHOT_ELSEWHERE;
1753 perfc_incrc(shadow_make_snapshot);
1755 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
1757 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
1758 "Dom%d snapshot_count_count=%d\n",
1759 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
1760 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
1763 if ( !get_shadow_ref(smfn) )
1764 BUG();
1766 if ( shadow_mode_refcounts(d) &&
1767 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
1768 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
1769 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
1771 min = SHADOW_MIN(min_max);
1772 max = SHADOW_MAX(min_max);
1773 length = max - min + 1;
1774 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
1776 min *= sizeof(l1_pgentry_t);
1777 length *= sizeof(l1_pgentry_t);
1779 original = map_domain_page(gmfn);
1780 snapshot = map_domain_page(smfn);
1781 memcpy(snapshot + min, original + min, length);
1782 unmap_domain_page(original);
1783 unmap_domain_page(snapshot);
1785 return smfn;
1788 static void
1789 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
1791 void *snapshot;
1793 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1794 return;
1796 // Clear the out_of_sync bit.
1797 //
1798 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
1800 // XXX Need to think about how to protect the domain's
1801 // information less expensively.
1802 //
1803 snapshot = map_domain_page(entry->snapshot_mfn);
1804 memset(snapshot, 0, PAGE_SIZE);
1805 unmap_domain_page(snapshot);
1807 put_shadow_ref(entry->snapshot_mfn);
1810 struct out_of_sync_entry *
1811 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1812 unsigned long mfn)
1814 struct domain *d = v->domain;
1815 struct pfn_info *page = &frame_table[mfn];
1816 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
1818 ASSERT(shadow_lock_is_acquired(d));
1819 ASSERT(pfn_valid(mfn));
1821 #ifndef NDEBUG
1822 u32 type = page->u.inuse.type_info & PGT_type_mask;
1823 if ( shadow_mode_refcounts(d) )
1825 ASSERT(type == PGT_writable_page);
1827 else
1829 ASSERT(type && (type < PGT_l4_page_table));
1831 #endif
1833 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
1834 gpfn, mfn, page->count_info, page->u.inuse.type_info);
1836 // XXX this will require some more thought... Cross-domain sharing and
1837 // modification of page tables? Hmm...
1838 //
1839 if ( d != page_get_owner(page) )
1840 BUG();
1842 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
1844 entry->gpfn = gpfn;
1845 entry->gmfn = mfn;
1846 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1847 entry->writable_pl1e = -1;
1849 #if SHADOW_DEBUG
1850 mark_shadows_as_reflecting_snapshot(d, gpfn);
1851 #endif
1853 // increment guest's ref count to represent the entry in the
1854 // full shadow out-of-sync list.
1855 //
1856 get_page(page, d);
1858 // Add to the out-of-sync list
1859 //
1860 entry->next = d->arch.out_of_sync;
1861 d->arch.out_of_sync = entry;
1863 return entry;
1866 void shadow_mark_va_out_of_sync(
1867 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
1869 struct out_of_sync_entry *entry =
1870 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1871 l2_pgentry_t sl2e;
1873 // We need the address of shadow PTE that maps @va.
1874 // It might not exist yet. Make sure it's there.
1875 //
1876 __shadow_get_l2e(v, va, &sl2e);
1877 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1879 // either this L1 isn't shadowed yet, or the shadow isn't linked into
1880 // the current L2.
1881 shadow_map_l1_into_current_l2(va);
1882 __shadow_get_l2e(v, va, &sl2e);
1884 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
1886 // NB: this is stored as a machine address.
1887 entry->writable_pl1e =
1888 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
1889 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
1891 // Increment shadow's page count to represent the reference
1892 // inherent in entry->writable_pl1e
1893 //
1894 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
1895 BUG();
1897 FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
1898 va, entry->writable_pl1e);
1901 /*
1902 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
1903 * Returns 0 otherwise.
1904 */
1905 static int snapshot_entry_matches(
1906 struct domain *d, l1_pgentry_t *guest_pt,
1907 unsigned long gpfn, unsigned index)
1909 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
1910 l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
1911 int entries_match;
1913 perfc_incrc(snapshot_entry_matches_calls);
1915 if ( !smfn )
1916 return 0;
1918 snapshot = map_domain_page(smfn);
1920 if (__copy_from_user(&gpte, &guest_pt[index],
1921 sizeof(gpte))) {
1922 unmap_domain_page(snapshot);
1923 return 0;
1926 // This could probably be smarter, but this is sufficent for
1927 // our current needs.
1928 //
1929 entries_match = !l1e_has_changed(gpte, snapshot[index],
1930 PAGE_FLAG_MASK);
1932 unmap_domain_page(snapshot);
1934 #ifdef PERF_COUNTERS
1935 if ( entries_match )
1936 perfc_incrc(snapshot_entry_matches_true);
1937 #endif
1939 return entries_match;
1942 /*
1943 * Returns 1 if va's shadow mapping is out-of-sync.
1944 * Returns 0 otherwise.
1945 */
1946 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
1948 struct domain *d = v->domain;
1949 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1950 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1951 l2_pgentry_t l2e;
1952 unsigned long l1pfn, l1mfn;
1954 ASSERT(shadow_lock_is_acquired(d));
1955 ASSERT(VALID_M2P(l2pfn));
1957 perfc_incrc(shadow_out_of_sync_calls);
1959 if ( page_out_of_sync(&frame_table[l2mfn]) &&
1960 !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
1961 l2pfn, l2_table_offset(va)) )
1962 return 1;
1964 __guest_get_l2e(v, va, &l2e);
1965 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1966 return 0;
1968 l1pfn = l2e_get_pfn(l2e);
1969 l1mfn = __gpfn_to_mfn(d, l1pfn);
1971 // If the l1 pfn is invalid, it can't be out of sync...
1972 if ( !VALID_MFN(l1mfn) )
1973 return 0;
1975 if ( page_out_of_sync(&frame_table[l1mfn]) &&
1976 !snapshot_entry_matches(
1977 d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
1978 l1pfn, l1_table_offset(va)) )
1979 return 1;
1981 return 0;
1984 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
1985 static inline unsigned long
1986 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
1988 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
1991 static inline void
1992 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
1994 unsigned long score = prediction & PGT_score_mask;
1995 int create = (score == 0);
1997 // saturating addition
1998 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
1999 score = score ? score : PGT_score_mask;
2001 prediction = (prediction & PGT_mfn_mask) | score;
2003 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
2004 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2006 if ( create )
2007 perfc_incr(writable_pte_predictions);
2010 static inline void
2011 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2013 unsigned long score = prediction & PGT_score_mask;
2014 ASSERT(score);
2016 // divide score by 2... We don't like bad predictions.
2017 //
2018 score = (score >> 1) & PGT_score_mask;
2020 prediction = (prediction & PGT_mfn_mask) | score;
2022 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
2024 if ( score )
2025 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2026 else
2028 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
2029 perfc_decr(writable_pte_predictions);
2033 static void
2034 free_writable_pte_predictions(struct domain *d)
2036 int i;
2037 struct shadow_status *x;
2039 for ( i = 0; i < shadow_ht_buckets; i++ )
2041 u32 count;
2042 unsigned long *gpfn_list;
2044 /* Skip empty buckets. */
2045 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
2046 continue;
2048 count = 0;
2049 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2050 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2051 count++;
2053 gpfn_list = xmalloc_array(unsigned long, count);
2054 count = 0;
2055 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2056 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2057 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
2059 while ( count )
2061 count--;
2062 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
2065 xfree(gpfn_list);
2069 static u32 remove_all_write_access_in_ptpage(
2070 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
2071 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
2072 u32 max_refs_to_find, unsigned long prediction)
2074 l1_pgentry_t *pt = map_domain_page(pt_mfn);
2075 l1_pgentry_t match;
2076 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
2077 int i;
2078 u32 found = 0;
2079 int is_l1_shadow =
2080 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
2081 PGT_l1_shadow);
2083 match = l1e_from_pfn(readonly_gmfn, flags);
2085 // returns true if all refs have been found and fixed.
2086 //
2087 int fix_entry(int i)
2089 l1_pgentry_t old = pt[i];
2090 l1_pgentry_t new = old;
2092 l1e_remove_flags(new,_PAGE_RW);
2093 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
2094 BUG();
2095 found++;
2096 pt[i] = new;
2097 if ( is_l1_shadow )
2098 shadow_put_page_from_l1e(old, d);
2100 #if 0
2101 printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
2102 "is_l1_shadow=%d\n",
2103 readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
2104 #endif
2106 return (found == max_refs_to_find);
2109 i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
2110 if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
2112 perfc_incrc(remove_write_fast_exit);
2113 increase_writable_pte_prediction(d, readonly_gpfn, prediction);
2114 unmap_domain_page(pt);
2115 return found;
2118 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2120 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
2121 break;
2124 unmap_domain_page(pt);
2126 return found;
2127 #undef MATCH_ENTRY
2130 int shadow_remove_all_write_access(
2131 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
2133 int i;
2134 struct shadow_status *a;
2135 u32 found = 0, fixups, write_refs;
2136 unsigned long prediction, predicted_gpfn, predicted_smfn;
2138 ASSERT(shadow_lock_is_acquired(d));
2139 ASSERT(VALID_MFN(readonly_gmfn));
2141 perfc_incrc(remove_write_access);
2143 // If it's not a writable page, then no writable refs can be outstanding.
2144 //
2145 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
2146 PGT_writable_page )
2148 perfc_incrc(remove_write_not_writable);
2149 return 1;
2152 // How many outstanding writable PTEs for this page are there?
2153 //
2154 write_refs =
2155 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
2156 if ( write_refs && MFN_PINNED(readonly_gmfn) )
2158 write_refs--;
2161 if ( write_refs == 0 )
2163 perfc_incrc(remove_write_no_work);
2164 return 1;
2167 // Before searching all the L1 page tables, check the typical culprit first
2168 //
2169 if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
2171 predicted_gpfn = prediction & PGT_mfn_mask;
2172 if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
2173 (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
2175 found += fixups;
2176 if ( found == write_refs )
2178 perfc_incrc(remove_write_predicted);
2179 return 1;
2182 else
2184 perfc_incrc(remove_write_bad_prediction);
2185 decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
2189 // Search all the shadow L1 page tables...
2190 //
2191 for (i = 0; i < shadow_ht_buckets; i++)
2193 a = &d->arch.shadow_ht[i];
2194 while ( a && a->gpfn_and_flags )
2196 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
2198 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
2199 if ( found == write_refs )
2200 return 1;
2203 a = a->next;
2207 FSH_LOG("%s: looking for %d refs, found %d refs",
2208 __func__, write_refs, found);
2210 return 0;
2213 static u32 remove_all_access_in_page(
2214 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
2216 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
2217 l1_pgentry_t match;
2218 unsigned long flags = _PAGE_PRESENT;
2219 int i;
2220 u32 count = 0;
2221 int is_l1_shadow =
2222 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
2223 PGT_l1_shadow);
2225 match = l1e_from_pfn(forbidden_gmfn, flags);
2227 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2229 if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
2231 l1_pgentry_t ol2e = pl1e[i];
2232 pl1e[i] = l1e_empty();
2233 count++;
2235 if ( is_l1_shadow )
2236 shadow_put_page_from_l1e(ol2e, d);
2237 else /* must be an hl2 page */
2238 put_page(&frame_table[forbidden_gmfn]);
2242 unmap_domain_page(pl1e);
2244 return count;
2247 u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
2249 int i;
2250 struct shadow_status *a;
2251 u32 count = 0;
2253 if ( unlikely(!shadow_mode_enabled(d)) )
2254 return 0;
2256 ASSERT(shadow_lock_is_acquired(d));
2257 perfc_incrc(remove_all_access);
2259 for (i = 0; i < shadow_ht_buckets; i++)
2261 a = &d->arch.shadow_ht[i];
2262 while ( a && a->gpfn_and_flags )
2264 switch (a->gpfn_and_flags & PGT_type_mask)
2266 case PGT_l1_shadow:
2267 case PGT_l2_shadow:
2268 case PGT_l3_shadow:
2269 case PGT_l4_shadow:
2270 case PGT_hl2_shadow:
2271 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
2272 break;
2273 case PGT_snapshot:
2274 case PGT_writable_pred:
2275 // these can't hold refs to the forbidden page
2276 break;
2277 default:
2278 BUG();
2281 a = a->next;
2285 return count;
2288 static int resync_all(struct domain *d, u32 stype)
2290 struct out_of_sync_entry *entry;
2291 unsigned i;
2292 unsigned long smfn;
2293 void *guest, *shadow, *snapshot;
2294 int need_flush = 0, external = shadow_mode_external(d);
2295 int unshadow;
2296 int changed;
2298 ASSERT(shadow_lock_is_acquired(d));
2300 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2302 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
2303 continue;
2305 smfn = __shadow_status(d, entry->gpfn, stype);
2307 if ( !smfn )
2309 if ( shadow_mode_refcounts(d) )
2310 continue;
2312 // For light weight shadows, even when no shadow page exists,
2313 // we need to resync the refcounts to the new contents of the
2314 // guest page.
2315 // This only applies when we have writable page tables.
2316 //
2317 if ( !shadow_mode_write_all(d) &&
2318 !((stype == PGT_l1_shadow) &&
2319 VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
2320 // Page is not writable -- no resync necessary
2321 continue;
2324 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
2325 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
2327 // Compare guest's new contents to its snapshot, validating
2328 // and updating its shadow as appropriate.
2329 //
2330 guest = map_domain_page(entry->gmfn);
2331 snapshot = map_domain_page(entry->snapshot_mfn);
2333 if ( smfn )
2334 shadow = map_domain_page(smfn);
2335 else
2336 shadow = NULL;
2338 unshadow = 0;
2340 switch ( stype ) {
2341 case PGT_l1_shadow:
2343 l1_pgentry_t *guest1 = guest;
2344 l1_pgentry_t *shadow1 = shadow;
2345 l1_pgentry_t *snapshot1 = snapshot;
2347 ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
2348 shadow_mode_write_all(d));
2350 if ( !shadow_mode_refcounts(d) )
2351 revalidate_l1(d, guest1, snapshot1);
2353 if ( !smfn )
2354 break;
2356 u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
2357 int min_shadow = SHADOW_MIN(min_max_shadow);
2358 int max_shadow = SHADOW_MAX(min_max_shadow);
2360 u32 min_max_snapshot =
2361 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
2362 int min_snapshot = SHADOW_MIN(min_max_snapshot);
2363 int max_snapshot = SHADOW_MAX(min_max_snapshot);
2365 changed = 0;
2367 for ( i = min_shadow; i <= max_shadow; i++ )
2369 if ( (i < min_snapshot) || (i > max_snapshot) ||
2370 l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
2372 need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
2374 // can't update snapshots of linear page tables -- they
2375 // are used multiple times...
2376 //
2377 // snapshot[i] = new_pte;
2379 changed++;
2382 perfc_incrc(resync_l1);
2383 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
2384 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
2385 break;
2387 case PGT_l2_shadow:
2389 int max = -1;
2391 l2_pgentry_t *guest2 = guest;
2392 l2_pgentry_t *shadow2 = shadow;
2393 l2_pgentry_t *snapshot2 = snapshot;
2395 ASSERT(shadow_mode_write_all(d));
2396 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2398 changed = 0;
2399 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2401 #if CONFIG_X86_PAE
2402 BUG(); /* FIXME: need type_info */
2403 #endif
2404 if ( !is_guest_l2_slot(0,i) && !external )
2405 continue;
2407 l2_pgentry_t new_pde = guest2[i];
2408 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
2410 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
2412 // can't update snapshots of linear page tables -- they
2413 // are used multiple times...
2414 //
2415 // snapshot[i] = new_pde;
2417 changed++;
2419 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
2420 max = i;
2422 // XXX - This hack works for linux guests.
2423 // Need a better solution long term.
2424 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
2425 unlikely(l2e_get_intpte(new_pde) != 0) &&
2426 !unshadow && MFN_PINNED(smfn) )
2427 unshadow = 1;
2429 if ( max == -1 )
2430 unshadow = 1;
2431 perfc_incrc(resync_l2);
2432 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
2433 break;
2435 case PGT_hl2_shadow:
2437 l2_pgentry_t *guest2 = guest;
2438 l2_pgentry_t *snapshot2 = snapshot;
2439 l1_pgentry_t *shadow2 = shadow;
2441 ASSERT(shadow_mode_write_all(d));
2442 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2444 changed = 0;
2445 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2447 #if CONFIG_X86_PAE
2448 BUG(); /* FIXME: need type_info */
2449 #endif
2450 if ( !is_guest_l2_slot(0, i) && !external )
2451 continue;
2453 l2_pgentry_t new_pde = guest2[i];
2454 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
2456 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
2458 // can't update snapshots of linear page tables -- they
2459 // are used multiple times...
2460 //
2461 // snapshot[i] = new_pde;
2463 changed++;
2466 perfc_incrc(resync_hl2);
2467 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
2468 break;
2470 default:
2471 BUG();
2474 if ( smfn )
2475 unmap_domain_page(shadow);
2476 unmap_domain_page(snapshot);
2477 unmap_domain_page(guest);
2479 if ( unlikely(unshadow) )
2481 perfc_incrc(unshadow_l2_count);
2482 shadow_unpin(smfn);
2483 if ( unlikely(shadow_mode_external(d)) )
2485 unsigned long hl2mfn;
2487 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
2488 MFN_PINNED(hl2mfn) )
2489 shadow_unpin(hl2mfn);
2494 return need_flush;
2497 void __shadow_sync_all(struct domain *d)
2499 struct out_of_sync_entry *entry;
2500 int need_flush = 0;
2502 perfc_incrc(shadow_sync_all);
2504 ASSERT(shadow_lock_is_acquired(d));
2506 // First, remove all write permissions to the page tables
2507 //
2508 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2510 // Skip entries that have low bits set... Those aren't
2511 // real PTEs.
2512 //
2513 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
2514 continue;
2516 l1_pgentry_t *ppte = (l1_pgentry_t *)(
2517 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
2518 (entry->writable_pl1e & ~PAGE_MASK));
2519 l1_pgentry_t opte = *ppte;
2520 l1_pgentry_t npte = opte;
2521 l1e_remove_flags(npte, _PAGE_RW);
2523 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
2524 !shadow_get_page_from_l1e(npte, d) )
2525 BUG();
2526 *ppte = npte;
2527 shadow_put_page_from_l1e(opte, d);
2529 unmap_domain_page(ppte);
2532 // XXX mafetter: SMP
2533 //
2534 // With the current algorithm, we've gotta flush all the TLBs
2535 // before we can safely continue. I don't think we want to
2536 // do it this way, so I think we should consider making
2537 // entirely private copies of the shadow for each vcpu, and/or
2538 // possibly having a mix of private and shared shadow state
2539 // (any path from a PTE that grants write access to an out-of-sync
2540 // page table page needs to be vcpu private).
2541 //
2542 #if 0 // this should be enabled for SMP guests...
2543 flush_tlb_mask(cpu_online_map);
2544 #endif
2545 need_flush = 1;
2547 // Second, resync all L1 pages, then L2 pages, etc...
2548 //
2549 need_flush |= resync_all(d, PGT_l1_shadow);
2550 if ( shadow_mode_translate(d) )
2551 need_flush |= resync_all(d, PGT_hl2_shadow);
2552 need_flush |= resync_all(d, PGT_l2_shadow);
2554 if ( need_flush && !unlikely(shadow_mode_external(d)) )
2555 local_flush_tlb();
2557 free_out_of_sync_state(d);
2560 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
2562 l1_pgentry_t gpte, spte, orig_gpte;
2563 struct vcpu *v = current;
2564 struct domain *d = v->domain;
2565 l2_pgentry_t gpde;
2567 spte = l1e_empty();
2569 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
2570 va, (unsigned long)regs->error_code);
2571 perfc_incrc(shadow_fault_calls);
2573 check_pagetable(v, "pre-sf");
2575 /*
2576 * Don't let someone else take the guest's table pages out-of-sync.
2577 */
2578 shadow_lock(d);
2580 /* XXX - FIX THIS COMMENT!!!
2581 * STEP 1. Check to see if this fault might have been caused by an
2582 * out-of-sync table page entry, or if we should pass this
2583 * fault onto the guest.
2584 */
2585 __shadow_sync_va(v, va);
2587 /*
2588 * STEP 2. Check the guest PTE.
2589 */
2590 __guest_get_l2e(v, va, &gpde);
2591 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2593 SH_VVLOG("shadow_fault - EXIT: L1 not present");
2594 perfc_incrc(shadow_fault_bail_pde_not_present);
2595 goto fail;
2598 // This can't fault because we hold the shadow lock and we've ensured that
2599 // the mapping is in-sync, so the check of the PDE's present bit, above,
2600 // covers this access.
2601 //
2602 orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
2603 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
2605 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
2606 l1e_get_intpte(gpte));
2607 perfc_incrc(shadow_fault_bail_pte_not_present);
2608 goto fail;
2611 /* Write fault? */
2612 if ( regs->error_code & 2 )
2614 int allow_writes = 0;
2616 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
2618 if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
2620 allow_writes = 1;
2621 l1e_add_flags(gpte, _PAGE_RW);
2623 else
2625 /* Write fault on a read-only mapping. */
2626 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
2627 l1e_get_intpte(gpte));
2628 perfc_incrc(shadow_fault_bail_ro_mapping);
2629 goto fail;
2633 if ( !l1pte_write_fault(v, &gpte, &spte, va) )
2635 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
2636 perfc_incrc(write_fault_bail);
2637 shadow_unlock(d);
2638 return 0;
2641 if ( allow_writes )
2642 l1e_remove_flags(gpte, _PAGE_RW);
2644 else
2646 if ( !l1pte_read_fault(d, &gpte, &spte) )
2648 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
2649 perfc_incrc(read_fault_bail);
2650 shadow_unlock(d);
2651 return 0;
2655 /*
2656 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
2657 */
2658 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
2660 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
2661 if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
2662 &gpte, sizeof(gpte))) )
2664 printk("%s() failed, crashing domain %d "
2665 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
2666 __func__,d->domain_id, l2e_get_intpte(gpde), va);
2667 domain_crash_synchronous();
2670 // if necessary, record the page table page as dirty
2671 if ( unlikely(shadow_mode_log_dirty(d)) )
2672 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
2675 shadow_set_l1e(va, spte, 1);
2677 perfc_incrc(shadow_fault_fixed);
2678 d->arch.shadow_fault_count++;
2680 shadow_unlock(d);
2682 check_pagetable(v, "post-sf");
2683 return EXCRET_fault_fixed;
2685 fail:
2686 shadow_unlock(d);
2687 return 0;
2690 void shadow_l1_normal_pt_update(
2691 struct domain *d,
2692 unsigned long pa, l1_pgentry_t gpte,
2693 struct domain_mmap_cache *cache)
2695 unsigned long sl1mfn;
2696 l1_pgentry_t *spl1e, spte;
2698 shadow_lock(d);
2700 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
2701 if ( sl1mfn )
2703 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
2704 (void *)pa, l1e_get_intpte(gpte));
2705 l1pte_propagate_from_guest(current->domain, gpte, &spte);
2707 spl1e = map_domain_page_with_cache(sl1mfn, cache);
2708 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
2709 unmap_domain_page_with_cache(spl1e, cache);
2712 shadow_unlock(d);
2715 void shadow_l2_normal_pt_update(
2716 struct domain *d,
2717 unsigned long pa, l2_pgentry_t gpde,
2718 struct domain_mmap_cache *cache)
2720 unsigned long sl2mfn;
2721 l2_pgentry_t *spl2e;
2723 shadow_lock(d);
2725 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
2726 if ( sl2mfn )
2728 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
2729 (void *)pa, l2e_get_intpte(gpde));
2730 spl2e = map_domain_page_with_cache(sl2mfn, cache);
2731 validate_pde_change(d, gpde,
2732 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
2733 unmap_domain_page_with_cache(spl2e, cache);
2736 shadow_unlock(d);
2739 #if CONFIG_PAGING_LEVELS >= 3
2740 void shadow_l3_normal_pt_update(
2741 struct domain *d,
2742 unsigned long pa, l3_pgentry_t gpde,
2743 struct domain_mmap_cache *cache)
2745 BUG(); // not yet implemented
2747 #endif
2749 #if CONFIG_PAGING_LEVELS >= 4
2750 void shadow_l4_normal_pt_update(
2751 struct domain *d,
2752 unsigned long pa, l4_pgentry_t gpde,
2753 struct domain_mmap_cache *cache)
2755 BUG(); // not yet implemented
2757 #endif
2759 int shadow_do_update_va_mapping(unsigned long va,
2760 l1_pgentry_t val,
2761 struct vcpu *v)
2763 struct domain *d = v->domain;
2764 l1_pgentry_t spte;
2765 int rc = 0;
2767 shadow_lock(d);
2769 //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
2771 // This is actually overkill - we don't need to sync the L1 itself,
2772 // just everything involved in getting to this L1 (i.e. we need
2773 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
2774 //
2775 __shadow_sync_va(v, va);
2777 l1pte_propagate_from_guest(d, val, &spte);
2778 shadow_set_l1e(va, spte, 0);
2780 /*
2781 * If we're in log-dirty mode then we need to note that we've updated
2782 * the PTE in the PT-holding page. We need the machine frame number
2783 * for this.
2784 */
2785 if ( shadow_mode_log_dirty(d) )
2786 __mark_dirty(d, va_to_l1mfn(v, va));
2788 // out:
2789 shadow_unlock(d);
2791 return rc;
2795 /*
2796 * What lives where in the 32-bit address space in the various shadow modes,
2797 * and what it uses to get/maintain that mapping.
2799 * SHADOW MODE: none enable translate external
2801 * 4KB things:
2802 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
2803 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
2804 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
2805 * monitor_vtable n/a n/a n/a mapped once
2807 * 4MB things:
2808 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
2809 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
2810 * monitor_linear n/a n/a n/a ???
2811 * perdomain perdomain perdomain perdomain perdomain
2812 * R/O M2P R/O M2P R/O M2P n/a n/a
2813 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
2814 * P2M n/a n/a R/O M2P R/O M2P
2816 * NB:
2817 * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
2818 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
2819 * all play a part in maintaining these mappings.
2820 */
2821 void __update_pagetables(struct vcpu *v)
2823 struct domain *d = v->domain;
2824 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
2825 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
2826 unsigned long smfn, hl2mfn, old_smfn;
2828 int max_mode = ( shadow_mode_external(d) ? SHM_external
2829 : shadow_mode_translate(d) ? SHM_translate
2830 : shadow_mode_enabled(d) ? SHM_enable
2831 : 0 );
2833 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
2834 ASSERT( max_mode );
2836 /*
2837 * arch.guest_vtable
2838 */
2839 if ( max_mode & (SHM_enable | SHM_external) )
2841 if ( likely(v->arch.guest_vtable != NULL) )
2842 unmap_domain_page(v->arch.guest_vtable);
2843 v->arch.guest_vtable = map_domain_page(gmfn);
2846 /*
2847 * arch.shadow_table
2848 */
2849 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
2850 smfn = shadow_l2_table(d, gpfn, gmfn);
2851 if ( !get_shadow_ref(smfn) )
2852 BUG();
2853 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
2854 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
2855 if ( old_smfn )
2856 put_shadow_ref(old_smfn);
2858 SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
2860 /*
2861 * arch.shadow_vtable
2862 */
2863 if ( max_mode == SHM_external )
2865 if ( v->arch.shadow_vtable )
2866 unmap_domain_page(v->arch.shadow_vtable);
2867 v->arch.shadow_vtable = map_domain_page(smfn);
2870 /*
2871 * arch.hl2_vtable
2872 */
2874 // if max_mode == SHM_translate, then the hl2 is already installed
2875 // correctly in its smfn, and there's nothing to do.
2876 //
2877 if ( max_mode == SHM_external )
2879 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
2880 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
2881 if ( v->arch.hl2_vtable )
2882 unmap_domain_page(v->arch.hl2_vtable);
2883 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2886 /*
2887 * fixup pointers in monitor table, as necessary
2888 */
2889 if ( max_mode == SHM_external )
2891 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2892 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2893 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2895 ASSERT( shadow_mode_translate(d) );
2897 if ( !get_shadow_ref(hl2mfn) )
2898 BUG();
2899 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2900 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2901 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2902 put_shadow_ref(l2e_get_pfn(old_hl2e));
2904 if ( !get_shadow_ref(smfn) )
2905 BUG();
2906 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2907 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2908 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2909 put_shadow_ref(l2e_get_pfn(old_sl2e));
2911 // XXX - maybe this can be optimized somewhat??
2912 local_flush_tlb();
2917 /************************************************************************/
2918 /************************************************************************/
2919 /************************************************************************/
2921 #if SHADOW_DEBUG
2923 // The following is entirely for _check_pagetable()'s benefit.
2924 // _check_pagetable() wants to know whether a given entry in a
2925 // shadow page table is supposed to be the shadow of the guest's
2926 // current entry, or the shadow of the entry held in the snapshot
2927 // taken above.
2928 //
2929 // Here, we mark all currently existing entries as reflecting
2930 // the snapshot, above. All other places in xen that update
2931 // the shadow will keep the shadow in sync with the guest's
2932 // entries (via l1pte_propagate_from_guest and friends), which clear
2933 // the SHADOW_REFLECTS_SNAPSHOT bit.
2934 //
2935 static void
2936 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
2938 unsigned long smfn;
2939 l1_pgentry_t *l1e;
2940 l2_pgentry_t *l2e;
2941 unsigned i;
2943 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
2945 l1e = map_domain_page(smfn);
2946 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2947 if ( is_guest_l1_slot(i) &&
2948 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
2949 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
2950 unmap_domain_page(l1e);
2953 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
2955 l2e = map_domain_page(smfn);
2956 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2957 if ( is_guest_l2_slot(0, i) &&
2958 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
2959 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
2960 unmap_domain_page(l2e);
2964 // BUG: these are not SMP safe...
2965 static int sh_l2_present;
2966 static int sh_l1_present;
2967 char * sh_check_name;
2968 int shadow_status_noswap;
2970 #define v2m(_v, _adr) ({ \
2971 unsigned long _a = (unsigned long)(_adr); \
2972 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
2973 unsigned long _pa = -1; \
2974 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
2975 { \
2976 l1_pgentry_t _pte; \
2977 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
2978 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
2979 _pa = l1e_get_paddr(_pte); \
2980 } \
2981 _pa | (_a & ~PAGE_MASK); \
2982 })
2984 #define FAIL(_f, _a...) \
2985 do { \
2986 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
2987 sh_check_name, level, l2_idx, l1_idx, ## _a, \
2988 __FILE__, __LINE__); \
2989 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
2990 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
2991 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
2992 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
2993 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
2994 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
2995 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
2996 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
2997 (void *)v2m(v, p_snapshot_pte), \
2998 (l2_idx << L2_PAGETABLE_SHIFT) | \
2999 (l1_idx << L1_PAGETABLE_SHIFT)); \
3000 errors++; \
3001 } while ( 0 )
3003 static int check_pte(
3004 struct vcpu *v,
3005 l1_pgentry_t *p_guest_pte,
3006 l1_pgentry_t *p_shadow_pte,
3007 l1_pgentry_t *p_snapshot_pte,
3008 int level, int l2_idx, int l1_idx)
3010 struct domain *d = v->domain;
3011 l1_pgentry_t guest_pte = *p_guest_pte;
3012 l1_pgentry_t shadow_pte = *p_shadow_pte;
3013 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
3014 l1_pgentry_t eff_guest_pte;
3015 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
3016 int errors = 0, guest_writable;
3017 int page_table_page;
3019 if ( (l1e_get_intpte(shadow_pte) == 0) ||
3020 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
3021 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
3022 return errors; /* always safe */
3024 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
3025 FAIL("Non zero not present shadow_pte");
3027 if ( level == 2 ) sh_l2_present++;
3028 if ( level == 1 ) sh_l1_present++;
3030 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
3031 eff_guest_pte = snapshot_pte;
3032 else
3033 eff_guest_pte = guest_pte;
3035 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
3036 FAIL("Guest not present yet shadow is");
3038 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
3040 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
3041 FAIL("Corrupt?");
3043 if ( (level == 1) &&
3044 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
3045 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
3046 FAIL("Dirty coherence");
3048 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
3049 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
3050 FAIL("Accessed coherence");
3052 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
3053 FAIL("global bit set in shadow");
3055 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
3056 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
3057 shadow_mfn = l1e_get_pfn(shadow_pte);
3059 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
3060 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
3061 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
3063 page_table_page = mfn_is_page_table(eff_guest_mfn);
3065 guest_writable =
3066 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
3067 (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
3069 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
3071 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3072 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3073 frame_table[eff_guest_mfn].u.inuse.type_info,
3074 page_table_page);
3075 FAIL("RW coherence");
3078 if ( (level == 1) &&
3079 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
3080 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
3082 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3083 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3084 frame_table[eff_guest_mfn].u.inuse.type_info,
3085 page_table_page);
3086 FAIL("RW2 coherence");
3089 if ( eff_guest_mfn == shadow_mfn )
3091 if ( level > 1 )
3092 FAIL("Linear map ???"); /* XXX this will fail on BSD */
3094 else
3096 if ( level < 2 )
3097 FAIL("Shadow in L1 entry?");
3099 if ( level == 2 )
3101 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
3102 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
3103 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
3105 else
3106 BUG(); // XXX -- not handled yet.
3109 return errors;
3111 #undef FAIL
3112 #undef v2m
3114 static int check_l1_table(
3115 struct vcpu *v, unsigned long gpfn,
3116 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
3118 struct domain *d = v->domain;
3119 int i;
3120 unsigned long snapshot_mfn;
3121 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
3122 int errors = 0;
3124 if ( page_out_of_sync(pfn_to_page(gmfn)) )
3126 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
3127 ASSERT(snapshot_mfn);
3128 p_snapshot = map_domain_page(snapshot_mfn);
3131 p_guest = map_domain_page(gmfn);
3132 p_shadow = map_domain_page(smfn);
3134 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3135 errors += check_pte(v, p_guest+i, p_shadow+i,
3136 p_snapshot ? p_snapshot+i : NULL,
3137 1, l2_idx, i);
3139 unmap_domain_page(p_shadow);
3140 unmap_domain_page(p_guest);
3141 if ( p_snapshot )
3142 unmap_domain_page(p_snapshot);
3144 return errors;
3147 #define FAILPT(_f, _a...) \
3148 do { \
3149 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
3150 errors++; \
3151 } while ( 0 )
3153 int check_l2_table(
3154 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
3156 struct domain *d = v->domain;
3157 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
3158 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
3159 l2_pgentry_t match;
3160 int i;
3161 int errors = 0;
3162 int limit;
3164 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
3165 FAILPT("domain doesn't own page");
3166 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
3167 FAILPT("bogus owner for snapshot page");
3168 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
3169 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
3170 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
3172 #if 0
3173 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3174 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3175 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
3176 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
3178 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3179 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
3180 i++ )
3181 printk("+++ (%d) %lx %lx\n",i,
3182 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
3183 FAILPT("hypervisor entries inconsistent");
3186 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
3187 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
3188 FAILPT("hypervisor linear map inconsistent");
3189 #endif
3191 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
3192 if ( !shadow_mode_external(d) &&
3193 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
3194 match, PAGE_FLAG_MASK))
3196 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
3197 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
3198 L2_PAGETABLE_SHIFT]),
3199 l2e_get_intpte(match));
3202 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
3203 if ( !shadow_mode_external(d) &&
3204 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
3205 match, PAGE_FLAG_MASK))
3207 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
3208 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
3209 d->arch.mm_perdomain_pt,
3210 l2e_get_intpte(match));
3213 #ifdef __i386__
3214 if ( shadow_mode_external(d) )
3215 limit = L2_PAGETABLE_ENTRIES;
3216 else
3217 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3218 #else
3219 limit = 0; /* XXX x86/64 XXX */
3220 #endif
3222 /* Check the whole L2. */
3223 for ( i = 0; i < limit; i++ )
3224 errors += check_pte(v,
3225 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
3226 (l1_pgentry_t*)(&spl2e[i]),
3227 NULL,
3228 2, i, 0);
3230 unmap_domain_page(spl2e);
3231 unmap_domain_page(gpl2e);
3233 #if 1
3234 if ( errors )
3235 printk("check_l2_table returning %d errors\n", errors);
3236 #endif
3238 return errors;
3240 #undef FAILPT
3242 int _check_pagetable(struct vcpu *v, char *s)
3244 struct domain *d = v->domain;
3245 pagetable_t pt = v->arch.guest_table;
3246 unsigned long gptbase = pagetable_get_paddr(pt);
3247 unsigned long ptbase_pfn, smfn;
3248 unsigned long i;
3249 l2_pgentry_t *gpl2e, *spl2e;
3250 unsigned long ptbase_mfn = 0;
3251 int errors = 0, limit, oos_pdes = 0;
3253 //_audit_domain(d, AUDIT_QUIET);
3254 shadow_lock(d);
3256 sh_check_name = s;
3257 //SH_VVLOG("%s-PT Audit", s);
3258 sh_l2_present = sh_l1_present = 0;
3259 perfc_incrc(check_pagetable);
3261 ptbase_mfn = gptbase >> PAGE_SHIFT;
3262 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
3264 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
3266 printk("%s-PT %lx not shadowed\n", s, gptbase);
3267 goto out;
3269 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
3271 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
3272 oos_pdes = 1;
3273 ASSERT(ptbase_mfn);
3276 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
3278 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
3279 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
3281 /* Go back and recurse. */
3282 #ifdef __i386__
3283 if ( shadow_mode_external(d) )
3284 limit = L2_PAGETABLE_ENTRIES;
3285 else
3286 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3287 #else
3288 limit = 0; /* XXX x86/64 XXX */
3289 #endif
3291 for ( i = 0; i < limit; i++ )
3293 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
3294 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
3295 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
3297 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
3299 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
3303 unmap_domain_page(spl2e);
3304 unmap_domain_page(gpl2e);
3306 #if 0
3307 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
3308 sh_l2_present, sh_l1_present);
3309 #endif
3311 out:
3312 if ( errors )
3313 BUG();
3315 shadow_unlock(d);
3317 return errors;
3320 int _check_all_pagetables(struct vcpu *v, char *s)
3322 struct domain *d = v->domain;
3323 int i;
3324 struct shadow_status *a;
3325 unsigned long gmfn;
3326 int errors = 0;
3328 shadow_status_noswap = 1;
3330 sh_check_name = s;
3331 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
3332 sh_l2_present = sh_l1_present = 0;
3333 perfc_incrc(check_all_pagetables);
3335 for (i = 0; i < shadow_ht_buckets; i++)
3337 a = &d->arch.shadow_ht[i];
3338 while ( a && a->gpfn_and_flags )
3340 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
3342 switch ( a->gpfn_and_flags & PGT_type_mask )
3344 case PGT_l1_shadow:
3345 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
3346 gmfn, a->smfn, 0);
3347 break;
3348 case PGT_l2_shadow:
3349 errors += check_l2_table(v, gmfn, a->smfn,
3350 page_out_of_sync(pfn_to_page(gmfn)));
3351 break;
3352 case PGT_l3_shadow:
3353 case PGT_l4_shadow:
3354 case PGT_hl2_shadow:
3355 BUG(); // XXX - ought to fix this...
3356 break;
3357 case PGT_snapshot:
3358 case PGT_writable_pred:
3359 break;
3360 default:
3361 errors++;
3362 printk("unexpected shadow type %lx, gpfn=%lx, "
3363 "gmfn=%lx smfn=%lx\n",
3364 a->gpfn_and_flags & PGT_type_mask,
3365 a->gpfn_and_flags & PGT_mfn_mask,
3366 gmfn, a->smfn);
3367 BUG();
3369 a = a->next;
3373 shadow_status_noswap = 0;
3375 if ( errors )
3376 BUG();
3378 return errors;
3381 #endif // SHADOW_DEBUG
3383 /*
3384 * Local variables:
3385 * mode: C
3386 * c-set-style: "BSD"
3387 * c-basic-offset: 4
3388 * tab-width: 4
3389 * indent-tabs-mode: nil
3390 * End:
3391 */