direct-io.hg

view xen/arch/x86/shadow.c @ 5617:22d7dda0e38d

bitkeeper revision 1.1782 (42c3a8e76Lioy1FVEJFTaTkbYVBy7Q)

[PATCH] Use copy_from_user when accessing guest_pt[]

Use copy_from_user when accessing guest_pt[]

Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author arun.sharma@intel.com[kaf24]
date Thu Jun 30 08:10:15 2005 +0000 (2005-06-30)
parents 2d8e63df504a
children f261f14b9781 a83ac0806d6b
line source
1 /******************************************************************************
2 * arch/x86/shadow.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
35 static void shadow_free_snapshot(struct domain *d,
36 struct out_of_sync_entry *entry);
37 static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
38 static void free_writable_pte_predictions(struct domain *d);
40 #if SHADOW_DEBUG
41 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
42 #endif
44 /********
46 There's a per-domain shadow table spin lock which works fine for SMP
47 hosts. We don't have to worry about interrupts as no shadow operations
48 happen in an interrupt context. It's probably not quite ready for SMP
49 guest operation as we have to worry about synchonisation between gpte
50 and spte updates. Its possible that this might only happen in a
51 hypercall context, in which case we'll probably at have a per-domain
52 hypercall lock anyhow (at least initially).
54 ********/
56 static inline int
57 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
58 unsigned long new_type)
59 {
60 struct pfn_info *page = pfn_to_page(gmfn);
61 int pinned = 0, okay = 1;
63 if ( page_out_of_sync(page) )
64 {
65 // Don't know how long ago this snapshot was taken.
66 // Can't trust it to be recent enough.
67 //
68 __shadow_sync_mfn(d, gmfn);
69 }
71 if ( !shadow_mode_refcounts(d) )
72 return 1;
74 if ( unlikely(page_is_page_table(page)) )
75 return 1;
77 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
79 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
80 {
81 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
82 __func__, gpfn, gmfn);
83 #if 1 || defined(LIVE_DANGEROUSLY)
84 set_bit(_PGC_page_table, &page->count_info);
85 return 1;
86 #endif
87 return 0;
89 }
91 // To convert this page to use as a page table, the writable count
92 // should now be zero. Test this by grabbing the page as an page table,
93 // and then immediately releasing. This will also deal with any
94 // necessary TLB flushing issues for us.
95 //
96 // The cruft here about pinning doesn't really work right. This
97 // needs rethinking/rewriting... Need to gracefully deal with the
98 // TLB flushes required when promoting a writable page, and also deal
99 // with any outstanding (external) writable refs to this page (by
100 // refusing to promote it). The pinning headache complicates this
101 // code -- it would all get much simpler if we stop using
102 // shadow_lock() and move the shadow code to BIGLOCK().
103 //
104 if ( unlikely(!get_page(page, d)) )
105 BUG(); // XXX -- needs more thought for a graceful failure
106 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
107 {
108 pinned = 1;
109 put_page_and_type(page);
110 }
111 if ( get_page_type(page, PGT_base_page_table) )
112 {
113 set_bit(_PGC_page_table, &page->count_info);
114 put_page_type(page);
115 }
116 else
117 {
118 printk("shadow_promote: get_page_type failed "
119 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
120 d->domain_id, gpfn, gmfn, new_type);
121 okay = 0;
122 }
124 // Now put the type back to writable...
125 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
126 BUG(); // XXX -- needs more thought for a graceful failure
127 if ( unlikely(pinned) )
128 {
129 if ( unlikely(test_and_set_bit(_PGT_pinned,
130 &page->u.inuse.type_info)) )
131 BUG(); // hmm... someone pinned this again?
132 }
133 else
134 put_page_and_type(page);
136 return okay;
137 }
139 static inline void
140 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
141 {
142 if ( !shadow_mode_refcounts(d) )
143 return;
145 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
147 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
148 {
149 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
151 if ( page_out_of_sync(pfn_to_page(gmfn)) )
152 {
153 remove_out_of_sync_entries(d, gmfn);
154 }
155 }
156 }
158 /*
159 * Things in shadow mode that collect get_page() refs to the domain's
160 * pages are:
161 * - PGC_allocated takes a gen count, just like normal.
162 * - A writable page can be pinned (paravirtualized guests may consider
163 * these pages to be L1s or L2s, and don't know the difference).
164 * Pinning a page takes a gen count (but, for domains in shadow mode,
165 * it *doesn't* take a type count)
166 * - CR3 grabs a ref to whatever it points at, just like normal.
167 * - Shadow mode grabs an initial gen count for itself, as a placehold
168 * for whatever references will exist.
169 * - Shadow PTEs that point to a page take a gen count, just like regular
170 * PTEs. However, they don't get a type count, as get_page_type() is
171 * hardwired to keep writable pages' counts at 1 for domains in shadow
172 * mode.
173 * - Whenever we shadow a page, the entry in the shadow hash grabs a
174 * general ref to the page.
175 * - Whenever a page goes out of sync, the out of sync entry grabs a
176 * general ref to the page.
177 */
178 /*
179 * pfn_info fields for pages allocated as shadow pages:
180 *
181 * All 32 bits of count_info are a simple count of refs to this shadow
182 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
183 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
184 * references.
185 *
186 * u.inuse._domain is left NULL, to prevent accidently allow some random
187 * domain from gaining permissions to map this page.
188 *
189 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
190 * shadowed.
191 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
192 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
193 * is currently exists because this is a shadow of a root page, and we
194 * don't want to let those disappear just because no CR3 is currently pointing
195 * at it.
196 *
197 * tlbflush_timestamp holds a min & max index of valid page table entries
198 * within the shadow page.
199 */
201 static inline unsigned long
202 alloc_shadow_page(struct domain *d,
203 unsigned long gpfn, unsigned long gmfn,
204 u32 psh_type)
205 {
206 struct pfn_info *page;
207 unsigned long smfn;
208 int pin = 0;
210 // Currently, we only keep pre-zero'ed pages around for use as L1's...
211 // This will change. Soon.
212 //
213 if ( psh_type == PGT_l1_shadow )
214 {
215 if ( !list_empty(&d->arch.free_shadow_frames) )
216 {
217 struct list_head *entry = d->arch.free_shadow_frames.next;
218 page = list_entry(entry, struct pfn_info, list);
219 list_del(entry);
220 perfc_decr(free_l1_pages);
221 }
222 else
223 {
224 page = alloc_domheap_page(NULL);
225 void *l1 = map_domain_page(page_to_pfn(page));
226 memset(l1, 0, PAGE_SIZE);
227 unmap_domain_page(l1);
228 }
229 }
230 else
231 page = alloc_domheap_page(NULL);
233 if ( unlikely(page == NULL) )
234 {
235 printk("Couldn't alloc shadow page! dom%d count=%d\n",
236 d->domain_id, d->arch.shadow_page_count);
237 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
238 perfc_value(shadow_l1_pages),
239 perfc_value(shadow_l2_pages),
240 perfc_value(hl2_table_pages),
241 perfc_value(snapshot_pages));
242 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
243 }
245 smfn = page_to_pfn(page);
247 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
248 page->u.inuse.type_info = psh_type | gmfn;
249 page->count_info = 0;
250 page->tlbflush_timestamp = 0;
252 switch ( psh_type )
253 {
254 case PGT_l1_shadow:
255 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
256 goto fail;
257 perfc_incr(shadow_l1_pages);
258 d->arch.shadow_page_count++;
259 break;
261 case PGT_l2_shadow:
262 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
263 goto fail;
264 perfc_incr(shadow_l2_pages);
265 d->arch.shadow_page_count++;
266 if ( PGT_l2_page_table == PGT_root_page_table )
267 pin = 1;
269 break;
271 case PGT_hl2_shadow:
272 // Treat an hl2 as an L1 for purposes of promotion.
273 // For external mode domains, treat them as an L2 for purposes of
274 // pinning.
275 //
276 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
277 goto fail;
278 perfc_incr(hl2_table_pages);
279 d->arch.hl2_page_count++;
280 if ( shadow_mode_external(d) &&
281 (PGT_l2_page_table == PGT_root_page_table) )
282 pin = 1;
284 break;
286 case PGT_snapshot:
287 perfc_incr(snapshot_pages);
288 d->arch.snapshot_page_count++;
289 break;
291 default:
292 printk("Alloc shadow weird page type type=%08x\n", psh_type);
293 BUG();
294 break;
295 }
297 // Don't add a new shadow of something that already has a snapshot.
298 //
299 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
301 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
303 if ( pin )
304 shadow_pin(smfn);
306 return smfn;
308 fail:
309 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
310 gpfn, gmfn);
311 free_domheap_page(page);
312 return 0;
313 }
315 static void inline
316 free_shadow_l1_table(struct domain *d, unsigned long smfn)
317 {
318 l1_pgentry_t *pl1e = map_domain_page(smfn);
319 int i;
320 struct pfn_info *spage = pfn_to_page(smfn);
321 u32 min_max = spage->tlbflush_timestamp;
322 int min = SHADOW_MIN(min_max);
323 int max = SHADOW_MAX(min_max);
325 for ( i = min; i <= max; i++ )
326 {
327 shadow_put_page_from_l1e(pl1e[i], d);
328 pl1e[i] = l1e_empty();
329 }
331 unmap_domain_page(pl1e);
332 }
334 static void inline
335 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
336 {
337 l1_pgentry_t *hl2 = map_domain_page(smfn);
338 int i, limit;
340 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
342 #ifdef __i386__
343 if ( shadow_mode_external(d) )
344 limit = L2_PAGETABLE_ENTRIES;
345 else
346 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
347 #else
348 limit = 0; /* XXX x86/64 XXX */
349 #endif
351 for ( i = 0; i < limit; i++ )
352 {
353 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
354 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
355 }
357 unmap_domain_page(hl2);
358 }
360 static void inline
361 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
362 {
363 l2_pgentry_t *pl2e = map_domain_page(smfn);
364 int i, external = shadow_mode_external(d);
366 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
367 if ( external || is_guest_l2_slot(type, i) )
368 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
369 put_shadow_ref(l2e_get_pfn(pl2e[i]));
371 if ( (PGT_base_page_table == PGT_l2_page_table) &&
372 shadow_mode_translate(d) && !external )
373 {
374 // free the ref to the hl2
375 //
376 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
377 }
379 unmap_domain_page(pl2e);
380 }
382 void free_shadow_page(unsigned long smfn)
383 {
384 struct pfn_info *page = &frame_table[smfn];
385 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
386 struct domain *d = page_get_owner(pfn_to_page(gmfn));
387 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
388 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
390 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
392 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
394 delete_shadow_status(d, gpfn, gmfn, type);
396 switch ( type )
397 {
398 case PGT_l1_shadow:
399 perfc_decr(shadow_l1_pages);
400 shadow_demote(d, gpfn, gmfn);
401 free_shadow_l1_table(d, smfn);
402 break;
404 case PGT_l2_shadow:
405 perfc_decr(shadow_l2_pages);
406 shadow_demote(d, gpfn, gmfn);
407 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
408 break;
410 case PGT_hl2_shadow:
411 perfc_decr(hl2_table_pages);
412 shadow_demote(d, gpfn, gmfn);
413 free_shadow_hl2_table(d, smfn);
414 break;
416 case PGT_snapshot:
417 perfc_decr(snapshot_pages);
418 break;
420 default:
421 printk("Free shadow weird page type mfn=%lx type=%08x\n",
422 page_to_pfn(page), page->u.inuse.type_info);
423 break;
424 }
426 d->arch.shadow_page_count--;
428 // No TLB flushes are needed the next time this page gets allocated.
429 //
430 page->tlbflush_timestamp = 0;
431 page->u.free.cpumask = CPU_MASK_NONE;
433 if ( type == PGT_l1_shadow )
434 {
435 list_add(&page->list, &d->arch.free_shadow_frames);
436 perfc_incr(free_l1_pages);
437 }
438 else
439 free_domheap_page(page);
440 }
442 void
443 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
444 {
445 unsigned long smfn;
447 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
449 shadow_lock(d);
451 while ( stype >= PGT_l1_shadow )
452 {
453 smfn = __shadow_status(d, gpfn, stype);
454 if ( smfn && MFN_PINNED(smfn) )
455 shadow_unpin(smfn);
456 stype -= PGT_l1_shadow;
457 }
459 shadow_unlock(d);
460 }
462 static void inline
463 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
464 {
465 struct pfn_info *page;
467 page = &frame_table[entry->gmfn];
469 // Decrement ref count of guest & shadow pages
470 //
471 put_page(page);
473 // Only use entries that have low bits clear...
474 //
475 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
476 {
477 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
478 entry->writable_pl1e = -2;
479 }
480 else
481 ASSERT( entry->writable_pl1e == -1 );
483 // Free the snapshot
484 //
485 shadow_free_snapshot(d, entry);
486 }
488 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
489 {
490 struct out_of_sync_entry *entry = d->arch.out_of_sync;
491 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
492 struct out_of_sync_entry *found = NULL;
494 // NB: Be careful not to call something that manipulates this list
495 // while walking it. Collect the results into a separate list
496 // first, then walk that list.
497 //
498 while ( entry )
499 {
500 if ( entry->gmfn == gmfn )
501 {
502 // remove from out of sync list
503 *prev = entry->next;
505 // add to found list
506 entry->next = found;
507 found = entry;
509 entry = *prev;
510 continue;
511 }
512 prev = &entry->next;
513 entry = entry->next;
514 }
516 prev = NULL;
517 entry = found;
518 while ( entry )
519 {
520 release_out_of_sync_entry(d, entry);
522 prev = &entry->next;
523 entry = entry->next;
524 }
526 // Add found list to free list
527 if ( prev )
528 {
529 *prev = d->arch.out_of_sync_free;
530 d->arch.out_of_sync_free = found;
531 }
532 }
534 static void free_out_of_sync_state(struct domain *d)
535 {
536 struct out_of_sync_entry *entry;
538 // NB: Be careful not to call something that manipulates this list
539 // while walking it. Remove one item at a time, and always
540 // restart from start of list.
541 //
542 while ( (entry = d->arch.out_of_sync) )
543 {
544 d->arch.out_of_sync = entry->next;
545 release_out_of_sync_entry(d, entry);
547 entry->next = d->arch.out_of_sync_free;
548 d->arch.out_of_sync_free = entry;
549 }
550 }
552 static void free_shadow_pages(struct domain *d)
553 {
554 int i;
555 struct shadow_status *x;
556 struct vcpu *v;
558 /*
559 * WARNING! The shadow page table must not currently be in use!
560 * e.g., You are expected to have paused the domain and synchronized CR3.
561 */
563 if( !d->arch.shadow_ht ) return;
565 shadow_audit(d, 1);
567 // first, remove any outstanding refs from out_of_sync entries...
568 //
569 free_out_of_sync_state(d);
571 // second, remove any outstanding refs from v->arch.shadow_table
572 // and CR3.
573 //
574 for_each_vcpu(d, v)
575 {
576 if ( pagetable_get_paddr(v->arch.shadow_table) )
577 {
578 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
579 v->arch.shadow_table = mk_pagetable(0);
580 }
582 if ( v->arch.monitor_shadow_ref )
583 {
584 put_shadow_ref(v->arch.monitor_shadow_ref);
585 v->arch.monitor_shadow_ref = 0;
586 }
587 }
589 // For external shadows, remove the monitor table's refs
590 //
591 if ( shadow_mode_external(d) )
592 {
593 for_each_vcpu(d, v)
594 {
595 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
597 if ( mpl2e )
598 {
599 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
600 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
602 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
603 {
604 put_shadow_ref(l2e_get_pfn(hl2e));
605 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
606 }
607 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
608 {
609 put_shadow_ref(l2e_get_pfn(smfn));
610 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
611 }
612 }
613 }
614 }
616 // Now, the only refs to shadow pages that are left are from the shadow
617 // pages themselves. We just unpin the pinned pages, and the rest
618 // should automatically disappear.
619 //
620 // NB: Beware: each explicitly or implicit call to free_shadow_page
621 // can/will result in the hash bucket getting rewritten out from
622 // under us... First, collect the list of pinned pages, then
623 // free them.
624 //
625 for ( i = 0; i < shadow_ht_buckets; i++ )
626 {
627 u32 count;
628 unsigned long *mfn_list;
630 /* Skip empty buckets. */
631 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
632 continue;
634 count = 0;
635 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
636 if ( MFN_PINNED(x->smfn) )
637 count++;
638 if ( !count )
639 continue;
641 mfn_list = xmalloc_array(unsigned long, count);
642 count = 0;
643 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
644 if ( MFN_PINNED(x->smfn) )
645 mfn_list[count++] = x->smfn;
647 while ( count )
648 {
649 shadow_unpin(mfn_list[--count]);
650 }
651 xfree(mfn_list);
652 }
654 // Now free the pre-zero'ed pages from the domain
655 //
656 struct list_head *list_ent, *tmp;
657 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
658 {
659 list_del(list_ent);
660 perfc_decr(free_l1_pages);
662 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
663 free_domheap_page(page);
664 }
666 shadow_audit(d, 0);
668 SH_LOG("Free shadow table.");
669 }
671 void shadow_mode_init(void)
672 {
673 }
675 int _shadow_mode_refcounts(struct domain *d)
676 {
677 return shadow_mode_refcounts(d);
678 }
680 static void alloc_monitor_pagetable(struct vcpu *v)
681 {
682 unsigned long mmfn;
683 l2_pgentry_t *mpl2e;
684 struct pfn_info *mmfn_info;
685 struct domain *d = v->domain;
687 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
689 mmfn_info = alloc_domheap_page(NULL);
690 ASSERT(mmfn_info != NULL);
692 mmfn = page_to_pfn(mmfn_info);
693 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
694 memset(mpl2e, 0, PAGE_SIZE);
696 #ifdef __i386__ /* XXX screws x86/64 build */
697 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
698 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
699 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
700 #endif
702 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
703 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
704 __PAGE_HYPERVISOR);
706 // map the phys_to_machine map into the Read-Only MPT space for this domain
707 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
708 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
709 __PAGE_HYPERVISOR);
711 // Don't (yet) have mappings for these...
712 // Don't want to accidentally see the idle_pg_table's linear mapping.
713 //
714 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
715 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
717 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
718 v->arch.monitor_vtable = mpl2e;
719 }
721 /*
722 * Free the pages for monitor_table and hl2_table
723 */
724 void free_monitor_pagetable(struct vcpu *v)
725 {
726 l2_pgentry_t *mpl2e, hl2e, sl2e;
727 unsigned long mfn;
729 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
731 mpl2e = v->arch.monitor_vtable;
733 /*
734 * First get the mfn for hl2_table by looking at monitor_table
735 */
736 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
737 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
738 {
739 mfn = l2e_get_pfn(hl2e);
740 ASSERT(mfn);
741 put_shadow_ref(mfn);
742 }
744 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
745 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
746 {
747 mfn = l2e_get_pfn(sl2e);
748 ASSERT(mfn);
749 put_shadow_ref(mfn);
750 }
752 unmap_domain_page(mpl2e);
754 /*
755 * Then free monitor_table.
756 */
757 mfn = pagetable_get_pfn(v->arch.monitor_table);
758 free_domheap_page(&frame_table[mfn]);
760 v->arch.monitor_table = mk_pagetable(0);
761 v->arch.monitor_vtable = 0;
762 }
764 int
765 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
766 struct domain_mmap_cache *l2cache,
767 struct domain_mmap_cache *l1cache)
768 {
769 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
770 l2_pgentry_t *l2, l2e;
771 l1_pgentry_t *l1;
772 struct pfn_info *l1page;
773 unsigned long va = pfn << PAGE_SHIFT;
775 ASSERT(tabpfn != 0);
777 l2 = map_domain_page_with_cache(tabpfn, l2cache);
778 l2e = l2[l2_table_offset(va)];
779 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
780 {
781 l1page = alloc_domheap_page(NULL);
782 if ( !l1page )
783 {
784 unmap_domain_page_with_cache(l2, l2cache);
785 return 0;
786 }
788 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
789 memset(l1, 0, PAGE_SIZE);
790 unmap_domain_page_with_cache(l1, l1cache);
792 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
793 l2[l2_table_offset(va)] = l2e;
794 }
795 unmap_domain_page_with_cache(l2, l2cache);
797 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
798 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
799 unmap_domain_page_with_cache(l1, l1cache);
801 return 1;
802 }
804 static int
805 alloc_p2m_table(struct domain *d)
806 {
807 struct list_head *list_ent;
808 struct pfn_info *page, *l2page;
809 l2_pgentry_t *l2;
810 unsigned long mfn, pfn;
811 struct domain_mmap_cache l1cache, l2cache;
813 l2page = alloc_domheap_page(NULL);
814 if ( l2page == NULL )
815 return 0;
817 domain_mmap_cache_init(&l1cache);
818 domain_mmap_cache_init(&l2cache);
820 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
821 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
822 memset(l2, 0, PAGE_SIZE);
823 unmap_domain_page_with_cache(l2, &l2cache);
825 list_ent = d->page_list.next;
826 while ( list_ent != &d->page_list )
827 {
828 page = list_entry(list_ent, struct pfn_info, list);
829 mfn = page_to_pfn(page);
830 pfn = machine_to_phys_mapping[mfn];
831 ASSERT(pfn != INVALID_M2P_ENTRY);
832 ASSERT(pfn < (1u<<20));
834 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
836 list_ent = page->list.next;
837 }
839 list_ent = d->xenpage_list.next;
840 while ( list_ent != &d->xenpage_list )
841 {
842 page = list_entry(list_ent, struct pfn_info, list);
843 mfn = page_to_pfn(page);
844 pfn = machine_to_phys_mapping[mfn];
845 if ( (pfn != INVALID_M2P_ENTRY) &&
846 (pfn < (1u<<20)) )
847 {
848 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
849 }
851 list_ent = page->list.next;
852 }
854 domain_mmap_cache_destroy(&l2cache);
855 domain_mmap_cache_destroy(&l1cache);
857 return 1;
858 }
860 static void
861 free_p2m_table(struct domain *d)
862 {
863 // uh, this needs some work... :)
864 BUG();
865 }
867 int __shadow_mode_enable(struct domain *d, unsigned int mode)
868 {
869 struct vcpu *v;
870 int new_modes = (mode & ~d->arch.shadow_mode);
872 // Gotta be adding something to call this function.
873 ASSERT(new_modes);
875 // can't take anything away by calling this function.
876 ASSERT(!(d->arch.shadow_mode & ~mode));
878 for_each_vcpu(d, v)
879 {
880 invalidate_shadow_ldt(v);
882 // We need to set these up for __update_pagetables().
883 // See the comment there.
885 /*
886 * arch.guest_vtable
887 */
888 if ( v->arch.guest_vtable &&
889 (v->arch.guest_vtable != __linear_l2_table) )
890 {
891 unmap_domain_page(v->arch.guest_vtable);
892 }
893 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
894 v->arch.guest_vtable = __linear_l2_table;
895 else
896 v->arch.guest_vtable = NULL;
898 /*
899 * arch.shadow_vtable
900 */
901 if ( v->arch.shadow_vtable &&
902 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
903 {
904 unmap_domain_page(v->arch.shadow_vtable);
905 }
906 if ( !(mode & SHM_external) )
907 v->arch.shadow_vtable = __shadow_linear_l2_table;
908 else
909 v->arch.shadow_vtable = NULL;
911 /*
912 * arch.hl2_vtable
913 */
914 if ( v->arch.hl2_vtable &&
915 (v->arch.hl2_vtable != __linear_hl2_table) )
916 {
917 unmap_domain_page(v->arch.hl2_vtable);
918 }
919 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
920 v->arch.hl2_vtable = __linear_hl2_table;
921 else
922 v->arch.hl2_vtable = NULL;
924 /*
925 * arch.monitor_table & arch.monitor_vtable
926 */
927 if ( v->arch.monitor_vtable )
928 {
929 free_monitor_pagetable(v);
930 }
931 if ( mode & SHM_external )
932 {
933 alloc_monitor_pagetable(v);
934 }
935 }
937 if ( new_modes & SHM_enable )
938 {
939 ASSERT( !d->arch.shadow_ht );
940 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
941 if ( d->arch.shadow_ht == NULL )
942 goto nomem;
944 memset(d->arch.shadow_ht, 0,
945 shadow_ht_buckets * sizeof(struct shadow_status));
946 }
948 if ( new_modes & SHM_log_dirty )
949 {
950 ASSERT( !d->arch.shadow_dirty_bitmap );
951 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
952 d->arch.shadow_dirty_bitmap =
953 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
954 (8 * sizeof(unsigned long)));
955 if ( d->arch.shadow_dirty_bitmap == NULL )
956 {
957 d->arch.shadow_dirty_bitmap_size = 0;
958 goto nomem;
959 }
960 memset(d->arch.shadow_dirty_bitmap, 0,
961 d->arch.shadow_dirty_bitmap_size/8);
962 }
964 if ( new_modes & SHM_translate )
965 {
966 if ( !(new_modes & SHM_external) )
967 {
968 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
969 if ( !alloc_p2m_table(d) )
970 {
971 printk("alloc_p2m_table failed (out-of-memory?)\n");
972 goto nomem;
973 }
974 }
975 else
976 {
977 // external guests provide their own memory for their P2M maps.
978 //
979 ASSERT( d == page_get_owner(
980 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
981 }
982 }
984 printk("audit1\n");
985 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
986 printk("audit1 done\n");
988 // Get rid of any shadow pages from any previous shadow mode.
989 //
990 free_shadow_pages(d);
992 printk("audit2\n");
993 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
994 printk("audit2 done\n");
996 /*
997 * Tear down it's counts by disassembling its page-table-based ref counts.
998 * Also remove CR3's gcount/tcount.
999 * That leaves things like GDTs and LDTs and external refs in tact.
1001 * Most pages will be writable tcount=0.
1002 * Some will still be L1 tcount=0 or L2 tcount=0.
1003 * Maybe some pages will be type none tcount=0.
1004 * Pages granted external writable refs (via grant tables?) will
1005 * still have a non-zero tcount. That's OK.
1007 * gcounts will generally be 1 for PGC_allocated.
1008 * GDTs and LDTs will have additional gcounts.
1009 * Any grant-table based refs will still be in the gcount.
1011 * We attempt to grab writable refs to each page (thus setting its type).
1012 * Immediately put back those type refs.
1014 * Assert that no pages are left with L1/L2/L3/L4 type.
1015 */
1016 audit_adjust_pgtables(d, -1, 1);
1018 d->arch.shadow_mode = mode;
1020 if ( shadow_mode_refcounts(d) )
1022 struct list_head *list_ent = d->page_list.next;
1023 while ( list_ent != &d->page_list )
1025 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1026 if ( !get_page_type(page, PGT_writable_page) )
1027 BUG();
1028 put_page_type(page);
1030 list_ent = page->list.next;
1034 audit_adjust_pgtables(d, 1, 1);
1036 printk("audit3\n");
1037 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1038 printk("audit3 done\n");
1040 return 0;
1042 nomem:
1043 if ( (new_modes & SHM_enable) )
1045 xfree(d->arch.shadow_ht);
1046 d->arch.shadow_ht = NULL;
1048 if ( (new_modes & SHM_log_dirty) )
1050 xfree(d->arch.shadow_dirty_bitmap);
1051 d->arch.shadow_dirty_bitmap = NULL;
1053 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1054 pagetable_get_paddr(d->arch.phys_table) )
1056 free_p2m_table(d);
1058 return -ENOMEM;
1061 int shadow_mode_enable(struct domain *d, unsigned int mode)
1063 int rc;
1064 shadow_lock(d);
1065 rc = __shadow_mode_enable(d, mode);
1066 shadow_unlock(d);
1067 return rc;
1070 static void
1071 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1073 int i;
1074 l1_pgentry_t *l1;
1076 l1 = map_domain_page(l1mfn);
1077 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1079 if ( is_guest_l1_slot(i) &&
1080 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1082 unsigned long mfn = l1e_get_pfn(l1[i]);
1083 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1084 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1085 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1088 unmap_domain_page(l1);
1091 // This is not general enough to handle arbitrary pagetables
1092 // with shared L1 pages, etc., but it is sufficient for bringing
1093 // up dom0.
1094 //
1095 void
1096 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1097 unsigned int type)
1099 int i;
1100 l2_pgentry_t *l2;
1102 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1104 l2 = map_domain_page(l2mfn);
1105 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1107 if ( is_guest_l2_slot(type, i) &&
1108 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1110 unsigned long mfn = l2e_get_pfn(l2[i]);
1111 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1112 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1113 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1114 translate_l1pgtable(d, p2m, mfn);
1117 unmap_domain_page(l2);
1120 static void free_shadow_ht_entries(struct domain *d)
1122 struct shadow_status *x, *n;
1124 SH_VLOG("freed tables count=%d l1=%d l2=%d",
1125 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
1126 perfc_value(shadow_l2_pages));
1128 n = d->arch.shadow_ht_extras;
1129 while ( (x = n) != NULL )
1131 d->arch.shadow_extras_count--;
1132 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
1133 xfree(x);
1136 d->arch.shadow_ht_extras = NULL;
1137 d->arch.shadow_ht_free = NULL;
1139 ASSERT(d->arch.shadow_extras_count == 0);
1140 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
1142 if ( d->arch.shadow_dirty_bitmap != NULL )
1144 xfree(d->arch.shadow_dirty_bitmap);
1145 d->arch.shadow_dirty_bitmap = 0;
1146 d->arch.shadow_dirty_bitmap_size = 0;
1149 xfree(d->arch.shadow_ht);
1150 d->arch.shadow_ht = NULL;
1153 static void free_out_of_sync_entries(struct domain *d)
1155 struct out_of_sync_entry *x, *n;
1157 n = d->arch.out_of_sync_extras;
1158 while ( (x = n) != NULL )
1160 d->arch.out_of_sync_extras_count--;
1161 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
1162 xfree(x);
1165 d->arch.out_of_sync_extras = NULL;
1166 d->arch.out_of_sync_free = NULL;
1167 d->arch.out_of_sync = NULL;
1169 ASSERT(d->arch.out_of_sync_extras_count == 0);
1170 FSH_LOG("freed extra out_of_sync entries, now %d",
1171 d->arch.out_of_sync_extras_count);
1174 void __shadow_mode_disable(struct domain *d)
1176 if ( unlikely(!shadow_mode_enabled(d)) )
1177 return;
1179 /*
1180 * Currently this does not fix up page ref counts, so it is valid to call
1181 * only when a domain is being destroyed.
1182 */
1183 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
1184 shadow_mode_refcounts(d));
1185 d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
1187 free_shadow_pages(d);
1188 free_writable_pte_predictions(d);
1190 #ifndef NDEBUG
1191 int i;
1192 for ( i = 0; i < shadow_ht_buckets; i++ )
1194 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
1196 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
1197 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
1198 BUG();
1201 #endif
1203 d->arch.shadow_mode = 0;
1205 free_shadow_ht_entries(d);
1206 free_out_of_sync_entries(d);
1208 struct vcpu *v;
1209 for_each_vcpu(d, v)
1211 update_pagetables(v);
1215 static int shadow_mode_table_op(
1216 struct domain *d, dom0_shadow_control_t *sc)
1218 unsigned int op = sc->op;
1219 int i, rc = 0;
1220 struct vcpu *v;
1222 ASSERT(shadow_lock_is_acquired(d));
1224 SH_VLOG("shadow mode table op %lx %lx count %d",
1225 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1226 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1227 d->arch.shadow_page_count);
1229 shadow_audit(d, 1);
1231 switch ( op )
1233 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1234 free_shadow_pages(d);
1236 d->arch.shadow_fault_count = 0;
1237 d->arch.shadow_dirty_count = 0;
1238 d->arch.shadow_dirty_net_count = 0;
1239 d->arch.shadow_dirty_block_count = 0;
1241 break;
1243 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1244 free_shadow_pages(d);
1246 sc->stats.fault_count = d->arch.shadow_fault_count;
1247 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1248 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1249 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1251 d->arch.shadow_fault_count = 0;
1252 d->arch.shadow_dirty_count = 0;
1253 d->arch.shadow_dirty_net_count = 0;
1254 d->arch.shadow_dirty_block_count = 0;
1256 if ( (d->max_pages > sc->pages) ||
1257 (sc->dirty_bitmap == NULL) ||
1258 (d->arch.shadow_dirty_bitmap == NULL) )
1260 rc = -EINVAL;
1261 break;
1264 sc->pages = d->max_pages;
1266 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1267 for ( i = 0; i < d->max_pages; i += chunk )
1269 int bytes = ((((d->max_pages - i) > chunk) ?
1270 chunk : (d->max_pages - i)) + 7) / 8;
1272 if (copy_to_user(
1273 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1274 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1275 bytes))
1277 // copy_to_user can fail when copying to guest app memory.
1278 // app should zero buffer after mallocing, and pin it
1279 rc = -EINVAL;
1280 memset(
1281 d->arch.shadow_dirty_bitmap +
1282 (i/(8*sizeof(unsigned long))),
1283 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
1284 break;
1287 memset(
1288 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1289 0, bytes);
1292 break;
1294 case DOM0_SHADOW_CONTROL_OP_PEEK:
1295 sc->stats.fault_count = d->arch.shadow_fault_count;
1296 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1297 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1298 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1300 if ( (d->max_pages > sc->pages) ||
1301 (sc->dirty_bitmap == NULL) ||
1302 (d->arch.shadow_dirty_bitmap == NULL) )
1304 rc = -EINVAL;
1305 break;
1308 sc->pages = d->max_pages;
1309 if (copy_to_user(
1310 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
1312 rc = -EINVAL;
1313 break;
1316 break;
1318 default:
1319 rc = -EINVAL;
1320 break;
1323 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1324 shadow_audit(d, 1);
1326 for_each_vcpu(d,v)
1327 __update_pagetables(v);
1329 return rc;
1332 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1334 unsigned int op = sc->op;
1335 int rc = 0;
1336 struct vcpu *v;
1338 if ( unlikely(d == current->domain) )
1340 DPRINTK("Don't try to do a shadow op on yourself!\n");
1341 return -EINVAL;
1344 domain_pause(d);
1346 shadow_lock(d);
1348 switch ( op )
1350 case DOM0_SHADOW_CONTROL_OP_OFF:
1351 __shadow_sync_all(d);
1352 __shadow_mode_disable(d);
1353 break;
1355 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1356 free_shadow_pages(d);
1357 rc = __shadow_mode_enable(d, SHM_enable);
1358 break;
1360 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1361 free_shadow_pages(d);
1362 rc = __shadow_mode_enable(
1363 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1364 break;
1366 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1367 free_shadow_pages(d);
1368 rc = __shadow_mode_enable(
1369 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1370 break;
1372 default:
1373 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1374 break;
1377 shadow_unlock(d);
1379 for_each_vcpu(d,v)
1380 update_pagetables(v);
1382 domain_unpause(d);
1384 return rc;
1387 /*
1388 * XXX KAF: Why is this VMX specific?
1389 */
1390 void vmx_shadow_clear_state(struct domain *d)
1392 SH_VVLOG("%s:", __func__);
1393 shadow_lock(d);
1394 free_shadow_pages(d);
1395 shadow_unlock(d);
1396 update_pagetables(d->vcpu[0]);
1399 unsigned long
1400 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1402 ASSERT( shadow_mode_translate(d) );
1404 perfc_incrc(gpfn_to_mfn_foreign);
1406 unsigned long va = gpfn << PAGE_SHIFT;
1407 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1408 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1409 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1410 unmap_domain_page(l2);
1411 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1413 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1414 d->domain_id, gpfn, l2e_get_intpte(l2e));
1415 return INVALID_MFN;
1417 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1418 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1419 unmap_domain_page(l1);
1421 #if 0
1422 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1423 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1424 #endif
1426 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1428 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1429 d->domain_id, gpfn, l1e_get_intpte(l1e));
1430 return INVALID_MFN;
1433 return l1e_get_pfn(l1e);
1436 static unsigned long
1437 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
1438 unsigned long smfn)
1440 unsigned long hl2mfn;
1441 l1_pgentry_t *hl2;
1442 int limit;
1444 ASSERT(PGT_base_page_table == PGT_l2_page_table);
1446 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
1448 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
1449 gpfn, gmfn);
1450 BUG(); /* XXX Deal gracefully with failure. */
1453 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
1454 gpfn, gmfn, smfn, hl2mfn);
1455 perfc_incrc(shadow_hl2_table_count);
1457 hl2 = map_domain_page(hl2mfn);
1459 #ifdef __i386__
1460 if ( shadow_mode_external(d) )
1461 limit = L2_PAGETABLE_ENTRIES;
1462 else
1463 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
1464 #else
1465 limit = 0; /* XXX x86/64 XXX */
1466 #endif
1468 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
1470 if ( !shadow_mode_external(d) )
1472 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
1473 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1475 // Setup easy access to the GL2, SL2, and HL2 frames.
1476 //
1477 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
1478 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1479 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1480 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
1481 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
1482 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1485 unmap_domain_page(hl2);
1487 return hl2mfn;
1490 /*
1491 * This could take and use a snapshot, and validate the entire page at
1492 * once, or it could continue to fault in entries one at a time...
1493 * Might be worth investigating...
1494 */
1495 static unsigned long shadow_l2_table(
1496 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1498 unsigned long smfn;
1499 l2_pgentry_t *spl2e;
1501 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
1503 perfc_incrc(shadow_l2_table_count);
1505 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
1507 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
1508 gpfn, gmfn);
1509 BUG(); /* XXX Deal gracefully with failure. */
1512 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
1514 /* Install hypervisor and 2x linear p.t. mapings. */
1515 if ( (PGT_base_page_table == PGT_l2_page_table) &&
1516 !shadow_mode_external(d) )
1518 /*
1519 * We could proactively fill in PDEs for pages that are already
1520 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
1521 * (restriction required for coherence of the accessed bit). However,
1522 * we tried it and it didn't help performance. This is simpler.
1523 */
1524 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
1526 /* Install hypervisor and 2x linear p.t. mapings. */
1527 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1528 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1529 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1531 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1532 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
1534 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
1535 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
1536 __PAGE_HYPERVISOR);
1538 if ( shadow_mode_translate(d) ) // NB: not external
1540 unsigned long hl2mfn;
1542 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
1543 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
1544 __PAGE_HYPERVISOR);
1546 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1547 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1549 // shadow_mode_translate (but not external) sl2 tables hold a
1550 // ref to their hl2.
1551 //
1552 if ( !get_shadow_ref(hl2mfn) )
1553 BUG();
1555 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1556 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1558 else
1559 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1560 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1562 else
1564 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
1567 unmap_domain_page(spl2e);
1569 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
1570 return smfn;
1573 void shadow_map_l1_into_current_l2(unsigned long va)
1575 struct vcpu *v = current;
1576 struct domain *d = v->domain;
1577 l1_pgentry_t *gpl1e, *spl1e;
1578 l2_pgentry_t gl2e, sl2e;
1579 unsigned long gl1pfn, gl1mfn, sl1mfn;
1580 int i, init_table = 0;
1582 __guest_get_l2e(v, va, &gl2e);
1583 ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
1584 gl1pfn = l2e_get_pfn(gl2e);
1586 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
1588 /* This L1 is NOT already shadowed so we need to shadow it. */
1589 SH_VVLOG("4a: l1 not shadowed");
1591 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1592 if ( unlikely(!VALID_MFN(gl1mfn)) )
1594 // Attempt to use an invalid pfn as an L1 page.
1595 // XXX this needs to be more graceful!
1596 BUG();
1599 if ( unlikely(!(sl1mfn =
1600 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
1602 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
1603 gl1pfn, gl1mfn);
1604 BUG(); /* XXX Need to deal gracefully with failure. */
1607 perfc_incrc(shadow_l1_table_count);
1608 init_table = 1;
1610 else
1612 /* This L1 is shadowed already, but the L2 entry is missing. */
1613 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
1616 #ifndef NDEBUG
1617 l2_pgentry_t old_sl2e;
1618 __shadow_get_l2e(v, va, &old_sl2e);
1619 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
1620 #endif
1622 if ( !get_shadow_ref(sl1mfn) )
1623 BUG();
1624 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
1625 __guest_set_l2e(v, va, gl2e);
1626 __shadow_set_l2e(v, va, sl2e);
1628 if ( init_table )
1630 l1_pgentry_t sl1e;
1631 int index = l1_table_offset(va);
1632 int min = 1, max = 0;
1634 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
1635 ~(L1_PAGETABLE_ENTRIES-1)]);
1637 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
1638 ~(L1_PAGETABLE_ENTRIES-1)]);
1640 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1642 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
1643 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
1644 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
1645 sl1e = l1e_empty();
1646 if ( l1e_get_flags(sl1e) == 0 )
1648 // First copy entries from 0 until first invalid.
1649 // Then copy entries from index until first invalid.
1650 //
1651 if ( i < index ) {
1652 i = index - 1;
1653 continue;
1655 break;
1657 spl1e[i] = sl1e;
1658 if ( unlikely(i < min) )
1659 min = i;
1660 if ( likely(i > max) )
1661 max = i;
1664 frame_table[sl1mfn].tlbflush_timestamp =
1665 SHADOW_ENCODE_MIN_MAX(min, max);
1669 void shadow_invlpg(struct vcpu *v, unsigned long va)
1671 struct domain *d = v->domain;
1672 l1_pgentry_t gpte, spte;
1674 ASSERT(shadow_mode_enabled(d));
1676 shadow_lock(d);
1678 __shadow_sync_va(v, va);
1680 // XXX mafetter: will need to think about 4MB pages...
1682 // It's not strictly necessary to update the shadow here,
1683 // but it might save a fault later.
1684 //
1685 if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
1686 sizeof(gpte))) {
1687 perfc_incrc(shadow_invlpg_faults);
1688 return;
1690 l1pte_propagate_from_guest(d, gpte, &spte);
1691 shadow_set_l1e(va, spte, 1);
1693 shadow_unlock(d);
1696 struct out_of_sync_entry *
1697 shadow_alloc_oos_entry(struct domain *d)
1699 struct out_of_sync_entry *f, *extra;
1700 unsigned size, i;
1702 if ( unlikely(d->arch.out_of_sync_free == NULL) )
1704 FSH_LOG("Allocate more fullshadow tuple blocks.");
1706 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
1707 extra = xmalloc_bytes(size);
1709 /* XXX Should be more graceful here. */
1710 if ( extra == NULL )
1711 BUG();
1713 memset(extra, 0, size);
1715 /* Record the allocation block so it can be correctly freed later. */
1716 d->arch.out_of_sync_extras_count++;
1717 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
1718 d->arch.out_of_sync_extras;
1719 d->arch.out_of_sync_extras = &extra[0];
1721 /* Thread a free chain through the newly-allocated nodes. */
1722 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
1723 extra[i].next = &extra[i+1];
1724 extra[i].next = NULL;
1726 /* Add the new nodes to the free list. */
1727 d->arch.out_of_sync_free = &extra[0];
1730 /* Allocate a new node from the quicklist. */
1731 f = d->arch.out_of_sync_free;
1732 d->arch.out_of_sync_free = f->next;
1734 return f;
1737 static inline unsigned long
1738 shadow_make_snapshot(
1739 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1741 unsigned long smfn, sl1mfn = 0;
1742 void *original, *snapshot;
1743 u32 min_max = 0;
1744 int min, max, length;
1746 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
1748 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
1749 return SHADOW_SNAPSHOT_ELSEWHERE;
1752 perfc_incrc(shadow_make_snapshot);
1754 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
1756 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
1757 "Dom%d snapshot_count_count=%d\n",
1758 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
1759 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
1762 if ( !get_shadow_ref(smfn) )
1763 BUG();
1765 if ( shadow_mode_refcounts(d) &&
1766 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
1767 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
1768 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
1770 min = SHADOW_MIN(min_max);
1771 max = SHADOW_MAX(min_max);
1772 length = max - min + 1;
1773 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
1775 min *= sizeof(l1_pgentry_t);
1776 length *= sizeof(l1_pgentry_t);
1778 original = map_domain_page(gmfn);
1779 snapshot = map_domain_page(smfn);
1780 memcpy(snapshot + min, original + min, length);
1781 unmap_domain_page(original);
1782 unmap_domain_page(snapshot);
1784 return smfn;
1787 static void
1788 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
1790 void *snapshot;
1792 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1793 return;
1795 // Clear the out_of_sync bit.
1796 //
1797 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
1799 // XXX Need to think about how to protect the domain's
1800 // information less expensively.
1801 //
1802 snapshot = map_domain_page(entry->snapshot_mfn);
1803 memset(snapshot, 0, PAGE_SIZE);
1804 unmap_domain_page(snapshot);
1806 put_shadow_ref(entry->snapshot_mfn);
1809 struct out_of_sync_entry *
1810 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1811 unsigned long mfn)
1813 struct domain *d = v->domain;
1814 struct pfn_info *page = &frame_table[mfn];
1815 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
1817 ASSERT(shadow_lock_is_acquired(d));
1818 ASSERT(pfn_valid(mfn));
1820 #ifndef NDEBUG
1821 u32 type = page->u.inuse.type_info & PGT_type_mask;
1822 if ( shadow_mode_refcounts(d) )
1824 ASSERT(type == PGT_writable_page);
1826 else
1828 ASSERT(type && (type < PGT_l4_page_table));
1830 #endif
1832 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
1833 gpfn, mfn, page->count_info, page->u.inuse.type_info);
1835 // XXX this will require some more thought... Cross-domain sharing and
1836 // modification of page tables? Hmm...
1837 //
1838 if ( d != page_get_owner(page) )
1839 BUG();
1841 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
1843 entry->gpfn = gpfn;
1844 entry->gmfn = mfn;
1845 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1846 entry->writable_pl1e = -1;
1848 #if SHADOW_DEBUG
1849 mark_shadows_as_reflecting_snapshot(d, gpfn);
1850 #endif
1852 // increment guest's ref count to represent the entry in the
1853 // full shadow out-of-sync list.
1854 //
1855 get_page(page, d);
1857 // Add to the out-of-sync list
1858 //
1859 entry->next = d->arch.out_of_sync;
1860 d->arch.out_of_sync = entry;
1862 return entry;
1865 void shadow_mark_va_out_of_sync(
1866 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
1868 struct out_of_sync_entry *entry =
1869 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1870 l2_pgentry_t sl2e;
1872 // We need the address of shadow PTE that maps @va.
1873 // It might not exist yet. Make sure it's there.
1874 //
1875 __shadow_get_l2e(v, va, &sl2e);
1876 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1878 // either this L1 isn't shadowed yet, or the shadow isn't linked into
1879 // the current L2.
1880 shadow_map_l1_into_current_l2(va);
1881 __shadow_get_l2e(v, va, &sl2e);
1883 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
1885 // NB: this is stored as a machine address.
1886 entry->writable_pl1e =
1887 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
1888 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
1890 // Increment shadow's page count to represent the reference
1891 // inherent in entry->writable_pl1e
1892 //
1893 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
1894 BUG();
1896 FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
1897 va, entry->writable_pl1e);
1900 /*
1901 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
1902 * Returns 0 otherwise.
1903 */
1904 static int snapshot_entry_matches(
1905 struct domain *d, l1_pgentry_t *guest_pt,
1906 unsigned long gpfn, unsigned index)
1908 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
1909 l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
1910 int entries_match;
1912 perfc_incrc(snapshot_entry_matches_calls);
1914 if ( !smfn )
1915 return 0;
1917 snapshot = map_domain_page(smfn);
1919 if (__copy_from_user(&gpte, &guest_pt[index],
1920 sizeof(gpte)))
1921 return 0;
1923 // This could probably be smarter, but this is sufficent for
1924 // our current needs.
1925 //
1926 entries_match = !l1e_has_changed(gpte, snapshot[index],
1927 PAGE_FLAG_MASK);
1929 unmap_domain_page(snapshot);
1931 #ifdef PERF_COUNTERS
1932 if ( entries_match )
1933 perfc_incrc(snapshot_entry_matches_true);
1934 #endif
1936 return entries_match;
1939 /*
1940 * Returns 1 if va's shadow mapping is out-of-sync.
1941 * Returns 0 otherwise.
1942 */
1943 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
1945 struct domain *d = v->domain;
1946 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1947 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1948 l2_pgentry_t l2e;
1949 unsigned long l1pfn, l1mfn;
1951 ASSERT(shadow_lock_is_acquired(d));
1952 ASSERT(VALID_M2P(l2pfn));
1954 perfc_incrc(shadow_out_of_sync_calls);
1956 if ( page_out_of_sync(&frame_table[l2mfn]) &&
1957 !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
1958 l2pfn, l2_table_offset(va)) )
1959 return 1;
1961 __guest_get_l2e(v, va, &l2e);
1962 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1963 return 0;
1965 l1pfn = l2e_get_pfn(l2e);
1966 l1mfn = __gpfn_to_mfn(d, l1pfn);
1968 // If the l1 pfn is invalid, it can't be out of sync...
1969 if ( !VALID_MFN(l1mfn) )
1970 return 0;
1972 if ( page_out_of_sync(&frame_table[l1mfn]) &&
1973 !snapshot_entry_matches(
1974 d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
1975 l1pfn, l1_table_offset(va)) )
1976 return 1;
1978 return 0;
1981 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
1982 static inline unsigned long
1983 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
1985 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
1988 static inline void
1989 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
1991 unsigned long score = prediction & PGT_score_mask;
1992 int create = (score == 0);
1994 // saturating addition
1995 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
1996 score = score ? score : PGT_score_mask;
1998 prediction = (prediction & PGT_mfn_mask) | score;
2000 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
2001 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2003 if ( create )
2004 perfc_incr(writable_pte_predictions);
2007 static inline void
2008 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2010 unsigned long score = prediction & PGT_score_mask;
2011 ASSERT(score);
2013 // divide score by 2... We don't like bad predictions.
2014 //
2015 score = (score >> 1) & PGT_score_mask;
2017 prediction = (prediction & PGT_mfn_mask) | score;
2019 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
2021 if ( score )
2022 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2023 else
2025 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
2026 perfc_decr(writable_pte_predictions);
2030 static void
2031 free_writable_pte_predictions(struct domain *d)
2033 int i;
2034 struct shadow_status *x;
2036 for ( i = 0; i < shadow_ht_buckets; i++ )
2038 u32 count;
2039 unsigned long *gpfn_list;
2041 /* Skip empty buckets. */
2042 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
2043 continue;
2045 count = 0;
2046 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2047 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2048 count++;
2050 gpfn_list = xmalloc_array(unsigned long, count);
2051 count = 0;
2052 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2053 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2054 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
2056 while ( count )
2058 count--;
2059 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
2062 xfree(gpfn_list);
2066 static u32 remove_all_write_access_in_ptpage(
2067 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
2068 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
2069 u32 max_refs_to_find, unsigned long prediction)
2071 l1_pgentry_t *pt = map_domain_page(pt_mfn);
2072 l1_pgentry_t match;
2073 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
2074 int i;
2075 u32 found = 0;
2076 int is_l1_shadow =
2077 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
2078 PGT_l1_shadow);
2080 match = l1e_from_pfn(readonly_gmfn, flags);
2082 // returns true if all refs have been found and fixed.
2083 //
2084 int fix_entry(int i)
2086 l1_pgentry_t old = pt[i];
2087 l1_pgentry_t new = old;
2089 l1e_remove_flags(new,_PAGE_RW);
2090 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
2091 BUG();
2092 found++;
2093 pt[i] = new;
2094 if ( is_l1_shadow )
2095 shadow_put_page_from_l1e(old, d);
2097 #if 0
2098 printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
2099 "is_l1_shadow=%d\n",
2100 readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
2101 #endif
2103 return (found == max_refs_to_find);
2106 i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
2107 if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
2109 perfc_incrc(remove_write_fast_exit);
2110 increase_writable_pte_prediction(d, readonly_gpfn, prediction);
2111 unmap_domain_page(pt);
2112 return found;
2115 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2117 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
2118 break;
2121 unmap_domain_page(pt);
2123 return found;
2124 #undef MATCH_ENTRY
2127 int shadow_remove_all_write_access(
2128 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
2130 int i;
2131 struct shadow_status *a;
2132 u32 found = 0, fixups, write_refs;
2133 unsigned long prediction, predicted_gpfn, predicted_smfn;
2135 ASSERT(shadow_lock_is_acquired(d));
2136 ASSERT(VALID_MFN(readonly_gmfn));
2138 perfc_incrc(remove_write_access);
2140 // If it's not a writable page, then no writable refs can be outstanding.
2141 //
2142 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
2143 PGT_writable_page )
2145 perfc_incrc(remove_write_not_writable);
2146 return 1;
2149 // How many outstanding writable PTEs for this page are there?
2150 //
2151 write_refs =
2152 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
2153 if ( write_refs && MFN_PINNED(readonly_gmfn) )
2155 write_refs--;
2158 if ( write_refs == 0 )
2160 perfc_incrc(remove_write_no_work);
2161 return 1;
2164 // Before searching all the L1 page tables, check the typical culprit first
2165 //
2166 if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
2168 predicted_gpfn = prediction & PGT_mfn_mask;
2169 if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
2170 (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
2172 found += fixups;
2173 if ( found == write_refs )
2175 perfc_incrc(remove_write_predicted);
2176 return 1;
2179 else
2181 perfc_incrc(remove_write_bad_prediction);
2182 decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
2186 // Search all the shadow L1 page tables...
2187 //
2188 for (i = 0; i < shadow_ht_buckets; i++)
2190 a = &d->arch.shadow_ht[i];
2191 while ( a && a->gpfn_and_flags )
2193 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
2195 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
2196 if ( found == write_refs )
2197 return 1;
2200 a = a->next;
2204 FSH_LOG("%s: looking for %d refs, found %d refs",
2205 __func__, write_refs, found);
2207 return 0;
2210 static u32 remove_all_access_in_page(
2211 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
2213 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
2214 l1_pgentry_t match;
2215 unsigned long flags = _PAGE_PRESENT;
2216 int i;
2217 u32 count = 0;
2218 int is_l1_shadow =
2219 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
2220 PGT_l1_shadow);
2222 match = l1e_from_pfn(forbidden_gmfn, flags);
2224 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2226 if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
2228 l1_pgentry_t ol2e = pl1e[i];
2229 pl1e[i] = l1e_empty();
2230 count++;
2232 if ( is_l1_shadow )
2233 shadow_put_page_from_l1e(ol2e, d);
2234 else /* must be an hl2 page */
2235 put_page(&frame_table[forbidden_gmfn]);
2239 unmap_domain_page(pl1e);
2241 return count;
2244 u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
2246 int i;
2247 struct shadow_status *a;
2248 u32 count = 0;
2250 if ( unlikely(!shadow_mode_enabled(d)) )
2251 return 0;
2253 ASSERT(shadow_lock_is_acquired(d));
2254 perfc_incrc(remove_all_access);
2256 for (i = 0; i < shadow_ht_buckets; i++)
2258 a = &d->arch.shadow_ht[i];
2259 while ( a && a->gpfn_and_flags )
2261 switch (a->gpfn_and_flags & PGT_type_mask)
2263 case PGT_l1_shadow:
2264 case PGT_l2_shadow:
2265 case PGT_l3_shadow:
2266 case PGT_l4_shadow:
2267 case PGT_hl2_shadow:
2268 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
2269 break;
2270 case PGT_snapshot:
2271 case PGT_writable_pred:
2272 // these can't hold refs to the forbidden page
2273 break;
2274 default:
2275 BUG();
2278 a = a->next;
2282 return count;
2285 static int resync_all(struct domain *d, u32 stype)
2287 struct out_of_sync_entry *entry;
2288 unsigned i;
2289 unsigned long smfn;
2290 void *guest, *shadow, *snapshot;
2291 int need_flush = 0, external = shadow_mode_external(d);
2292 int unshadow;
2293 int changed;
2295 ASSERT(shadow_lock_is_acquired(d));
2297 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2299 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
2300 continue;
2302 smfn = __shadow_status(d, entry->gpfn, stype);
2304 if ( !smfn )
2306 if ( shadow_mode_refcounts(d) )
2307 continue;
2309 // For light weight shadows, even when no shadow page exists,
2310 // we need to resync the refcounts to the new contents of the
2311 // guest page.
2312 // This only applies when we have writable page tables.
2313 //
2314 if ( !shadow_mode_write_all(d) &&
2315 !((stype == PGT_l1_shadow) &&
2316 VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
2317 // Page is not writable -- no resync necessary
2318 continue;
2321 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
2322 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
2324 // Compare guest's new contents to its snapshot, validating
2325 // and updating its shadow as appropriate.
2326 //
2327 guest = map_domain_page(entry->gmfn);
2328 snapshot = map_domain_page(entry->snapshot_mfn);
2330 if ( smfn )
2331 shadow = map_domain_page(smfn);
2332 else
2333 shadow = NULL;
2335 unshadow = 0;
2337 switch ( stype ) {
2338 case PGT_l1_shadow:
2340 l1_pgentry_t *guest1 = guest;
2341 l1_pgentry_t *shadow1 = shadow;
2342 l1_pgentry_t *snapshot1 = snapshot;
2344 ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
2345 shadow_mode_write_all(d));
2347 if ( !shadow_mode_refcounts(d) )
2348 revalidate_l1(d, guest1, snapshot1);
2350 if ( !smfn )
2351 break;
2353 u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
2354 int min_shadow = SHADOW_MIN(min_max_shadow);
2355 int max_shadow = SHADOW_MAX(min_max_shadow);
2357 u32 min_max_snapshot =
2358 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
2359 int min_snapshot = SHADOW_MIN(min_max_snapshot);
2360 int max_snapshot = SHADOW_MAX(min_max_snapshot);
2362 changed = 0;
2364 for ( i = min_shadow; i <= max_shadow; i++ )
2366 if ( (i < min_snapshot) || (i > max_snapshot) ||
2367 l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
2369 need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
2371 // can't update snapshots of linear page tables -- they
2372 // are used multiple times...
2373 //
2374 // snapshot[i] = new_pte;
2376 changed++;
2379 perfc_incrc(resync_l1);
2380 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
2381 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
2382 break;
2384 case PGT_l2_shadow:
2386 int max = -1;
2388 l2_pgentry_t *guest2 = guest;
2389 l2_pgentry_t *shadow2 = shadow;
2390 l2_pgentry_t *snapshot2 = snapshot;
2392 ASSERT(shadow_mode_write_all(d));
2393 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2395 changed = 0;
2396 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2398 #if CONFIG_X86_PAE
2399 BUG(); /* FIXME: need type_info */
2400 #endif
2401 if ( !is_guest_l2_slot(0,i) && !external )
2402 continue;
2404 l2_pgentry_t new_pde = guest2[i];
2405 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
2407 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
2409 // can't update snapshots of linear page tables -- they
2410 // are used multiple times...
2411 //
2412 // snapshot[i] = new_pde;
2414 changed++;
2416 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
2417 max = i;
2419 // XXX - This hack works for linux guests.
2420 // Need a better solution long term.
2421 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
2422 unlikely(l2e_get_intpte(new_pde) != 0) &&
2423 !unshadow && MFN_PINNED(smfn) )
2424 unshadow = 1;
2426 if ( max == -1 )
2427 unshadow = 1;
2428 perfc_incrc(resync_l2);
2429 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
2430 break;
2432 case PGT_hl2_shadow:
2434 l2_pgentry_t *guest2 = guest;
2435 l2_pgentry_t *snapshot2 = snapshot;
2436 l1_pgentry_t *shadow2 = shadow;
2438 ASSERT(shadow_mode_write_all(d));
2439 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2441 changed = 0;
2442 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2444 #if CONFIG_X86_PAE
2445 BUG(); /* FIXME: need type_info */
2446 #endif
2447 if ( !is_guest_l2_slot(0, i) && !external )
2448 continue;
2450 l2_pgentry_t new_pde = guest2[i];
2451 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
2453 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
2455 // can't update snapshots of linear page tables -- they
2456 // are used multiple times...
2457 //
2458 // snapshot[i] = new_pde;
2460 changed++;
2463 perfc_incrc(resync_hl2);
2464 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
2465 break;
2467 default:
2468 BUG();
2471 if ( smfn )
2472 unmap_domain_page(shadow);
2473 unmap_domain_page(snapshot);
2474 unmap_domain_page(guest);
2476 if ( unlikely(unshadow) )
2478 perfc_incrc(unshadow_l2_count);
2479 shadow_unpin(smfn);
2480 if ( unlikely(shadow_mode_external(d)) )
2482 unsigned long hl2mfn;
2484 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
2485 MFN_PINNED(hl2mfn) )
2486 shadow_unpin(hl2mfn);
2491 return need_flush;
2494 void __shadow_sync_all(struct domain *d)
2496 struct out_of_sync_entry *entry;
2497 int need_flush = 0;
2499 perfc_incrc(shadow_sync_all);
2501 ASSERT(shadow_lock_is_acquired(d));
2503 // First, remove all write permissions to the page tables
2504 //
2505 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2507 // Skip entries that have low bits set... Those aren't
2508 // real PTEs.
2509 //
2510 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
2511 continue;
2513 l1_pgentry_t *ppte = (l1_pgentry_t *)(
2514 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
2515 (entry->writable_pl1e & ~PAGE_MASK));
2516 l1_pgentry_t opte = *ppte;
2517 l1_pgentry_t npte = opte;
2518 l1e_remove_flags(npte, _PAGE_RW);
2520 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
2521 !shadow_get_page_from_l1e(npte, d) )
2522 BUG();
2523 *ppte = npte;
2524 shadow_put_page_from_l1e(opte, d);
2526 unmap_domain_page(ppte);
2529 // XXX mafetter: SMP
2530 //
2531 // With the current algorithm, we've gotta flush all the TLBs
2532 // before we can safely continue. I don't think we want to
2533 // do it this way, so I think we should consider making
2534 // entirely private copies of the shadow for each vcpu, and/or
2535 // possibly having a mix of private and shared shadow state
2536 // (any path from a PTE that grants write access to an out-of-sync
2537 // page table page needs to be vcpu private).
2538 //
2539 #if 0 // this should be enabled for SMP guests...
2540 flush_tlb_mask(cpu_online_map);
2541 #endif
2542 need_flush = 1;
2544 // Second, resync all L1 pages, then L2 pages, etc...
2545 //
2546 need_flush |= resync_all(d, PGT_l1_shadow);
2547 if ( shadow_mode_translate(d) )
2548 need_flush |= resync_all(d, PGT_hl2_shadow);
2549 need_flush |= resync_all(d, PGT_l2_shadow);
2551 if ( need_flush && !unlikely(shadow_mode_external(d)) )
2552 local_flush_tlb();
2554 free_out_of_sync_state(d);
2557 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
2559 l1_pgentry_t gpte, spte, orig_gpte;
2560 struct vcpu *v = current;
2561 struct domain *d = v->domain;
2562 l2_pgentry_t gpde;
2564 spte = l1e_empty();
2566 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
2567 va, (unsigned long)regs->error_code);
2568 perfc_incrc(shadow_fault_calls);
2570 check_pagetable(v, "pre-sf");
2572 /*
2573 * Don't let someone else take the guest's table pages out-of-sync.
2574 */
2575 shadow_lock(d);
2577 /* XXX - FIX THIS COMMENT!!!
2578 * STEP 1. Check to see if this fault might have been caused by an
2579 * out-of-sync table page entry, or if we should pass this
2580 * fault onto the guest.
2581 */
2582 __shadow_sync_va(v, va);
2584 /*
2585 * STEP 2. Check the guest PTE.
2586 */
2587 __guest_get_l2e(v, va, &gpde);
2588 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2590 SH_VVLOG("shadow_fault - EXIT: L1 not present");
2591 perfc_incrc(shadow_fault_bail_pde_not_present);
2592 goto fail;
2595 // This can't fault because we hold the shadow lock and we've ensured that
2596 // the mapping is in-sync, so the check of the PDE's present bit, above,
2597 // covers this access.
2598 //
2599 orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
2600 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
2602 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
2603 l1e_get_intpte(gpte));
2604 perfc_incrc(shadow_fault_bail_pte_not_present);
2605 goto fail;
2608 /* Write fault? */
2609 if ( regs->error_code & 2 )
2611 int allow_writes = 0;
2613 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
2615 if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
2617 allow_writes = 1;
2618 l1e_add_flags(gpte, _PAGE_RW);
2620 else
2622 /* Write fault on a read-only mapping. */
2623 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
2624 l1e_get_intpte(gpte));
2625 perfc_incrc(shadow_fault_bail_ro_mapping);
2626 goto fail;
2630 if ( !l1pte_write_fault(v, &gpte, &spte, va) )
2632 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
2633 perfc_incrc(write_fault_bail);
2634 shadow_unlock(d);
2635 return 0;
2638 if ( allow_writes )
2639 l1e_remove_flags(gpte, _PAGE_RW);
2641 else
2643 if ( !l1pte_read_fault(d, &gpte, &spte) )
2645 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
2646 perfc_incrc(read_fault_bail);
2647 shadow_unlock(d);
2648 return 0;
2652 /*
2653 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
2654 */
2655 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
2657 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
2658 if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
2659 &gpte, sizeof(gpte))) )
2661 printk("%s() failed, crashing domain %d "
2662 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
2663 __func__,d->domain_id, l2e_get_intpte(gpde), va);
2664 domain_crash_synchronous();
2667 // if necessary, record the page table page as dirty
2668 if ( unlikely(shadow_mode_log_dirty(d)) )
2669 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
2672 shadow_set_l1e(va, spte, 1);
2674 perfc_incrc(shadow_fault_fixed);
2675 d->arch.shadow_fault_count++;
2677 shadow_unlock(d);
2679 check_pagetable(v, "post-sf");
2680 return EXCRET_fault_fixed;
2682 fail:
2683 shadow_unlock(d);
2684 return 0;
2687 void shadow_l1_normal_pt_update(
2688 struct domain *d,
2689 unsigned long pa, l1_pgentry_t gpte,
2690 struct domain_mmap_cache *cache)
2692 unsigned long sl1mfn;
2693 l1_pgentry_t *spl1e, spte;
2695 shadow_lock(d);
2697 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
2698 if ( sl1mfn )
2700 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
2701 (void *)pa, l1e_get_intpte(gpte));
2702 l1pte_propagate_from_guest(current->domain, gpte, &spte);
2704 spl1e = map_domain_page_with_cache(sl1mfn, cache);
2705 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
2706 unmap_domain_page_with_cache(spl1e, cache);
2709 shadow_unlock(d);
2712 void shadow_l2_normal_pt_update(
2713 struct domain *d,
2714 unsigned long pa, l2_pgentry_t gpde,
2715 struct domain_mmap_cache *cache)
2717 unsigned long sl2mfn;
2718 l2_pgentry_t *spl2e;
2720 shadow_lock(d);
2722 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
2723 if ( sl2mfn )
2725 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
2726 (void *)pa, l2e_get_intpte(gpde));
2727 spl2e = map_domain_page_with_cache(sl2mfn, cache);
2728 validate_pde_change(d, gpde,
2729 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
2730 unmap_domain_page_with_cache(spl2e, cache);
2733 shadow_unlock(d);
2736 #if CONFIG_PAGING_LEVELS >= 3
2737 void shadow_l3_normal_pt_update(
2738 struct domain *d,
2739 unsigned long pa, l3_pgentry_t gpde,
2740 struct domain_mmap_cache *cache)
2742 BUG(); // not yet implemented
2744 #endif
2746 #if CONFIG_PAGING_LEVELS >= 4
2747 void shadow_l4_normal_pt_update(
2748 struct domain *d,
2749 unsigned long pa, l4_pgentry_t gpde,
2750 struct domain_mmap_cache *cache)
2752 BUG(); // not yet implemented
2754 #endif
2756 int shadow_do_update_va_mapping(unsigned long va,
2757 l1_pgentry_t val,
2758 struct vcpu *v)
2760 struct domain *d = v->domain;
2761 l1_pgentry_t spte;
2762 int rc = 0;
2764 shadow_lock(d);
2766 //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
2768 // This is actually overkill - we don't need to sync the L1 itself,
2769 // just everything involved in getting to this L1 (i.e. we need
2770 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
2771 //
2772 __shadow_sync_va(v, va);
2774 l1pte_propagate_from_guest(d, val, &spte);
2775 shadow_set_l1e(va, spte, 0);
2777 /*
2778 * If we're in log-dirty mode then we need to note that we've updated
2779 * the PTE in the PT-holding page. We need the machine frame number
2780 * for this.
2781 */
2782 if ( shadow_mode_log_dirty(d) )
2783 __mark_dirty(d, va_to_l1mfn(v, va));
2785 // out:
2786 shadow_unlock(d);
2788 return rc;
2792 /*
2793 * What lives where in the 32-bit address space in the various shadow modes,
2794 * and what it uses to get/maintain that mapping.
2796 * SHADOW MODE: none enable translate external
2798 * 4KB things:
2799 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
2800 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
2801 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
2802 * monitor_vtable n/a n/a n/a mapped once
2804 * 4MB things:
2805 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
2806 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
2807 * monitor_linear n/a n/a n/a ???
2808 * perdomain perdomain perdomain perdomain perdomain
2809 * R/O M2P R/O M2P R/O M2P n/a n/a
2810 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
2811 * P2M n/a n/a R/O M2P R/O M2P
2813 * NB:
2814 * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
2815 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
2816 * all play a part in maintaining these mappings.
2817 */
2818 void __update_pagetables(struct vcpu *v)
2820 struct domain *d = v->domain;
2821 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
2822 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
2823 unsigned long smfn, hl2mfn, old_smfn;
2825 int max_mode = ( shadow_mode_external(d) ? SHM_external
2826 : shadow_mode_translate(d) ? SHM_translate
2827 : shadow_mode_enabled(d) ? SHM_enable
2828 : 0 );
2830 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
2831 ASSERT( max_mode );
2833 /*
2834 * arch.guest_vtable
2835 */
2836 if ( max_mode & (SHM_enable | SHM_external) )
2838 if ( likely(v->arch.guest_vtable != NULL) )
2839 unmap_domain_page(v->arch.guest_vtable);
2840 v->arch.guest_vtable = map_domain_page(gmfn);
2843 /*
2844 * arch.shadow_table
2845 */
2846 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
2847 smfn = shadow_l2_table(d, gpfn, gmfn);
2848 if ( !get_shadow_ref(smfn) )
2849 BUG();
2850 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
2851 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
2852 if ( old_smfn )
2853 put_shadow_ref(old_smfn);
2855 SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
2857 /*
2858 * arch.shadow_vtable
2859 */
2860 if ( max_mode == SHM_external )
2862 if ( v->arch.shadow_vtable )
2863 unmap_domain_page(v->arch.shadow_vtable);
2864 v->arch.shadow_vtable = map_domain_page(smfn);
2867 /*
2868 * arch.hl2_vtable
2869 */
2871 // if max_mode == SHM_translate, then the hl2 is already installed
2872 // correctly in its smfn, and there's nothing to do.
2873 //
2874 if ( max_mode == SHM_external )
2876 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
2877 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
2878 if ( v->arch.hl2_vtable )
2879 unmap_domain_page(v->arch.hl2_vtable);
2880 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2883 /*
2884 * fixup pointers in monitor table, as necessary
2885 */
2886 if ( max_mode == SHM_external )
2888 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2889 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2890 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2892 ASSERT( shadow_mode_translate(d) );
2894 if ( !get_shadow_ref(hl2mfn) )
2895 BUG();
2896 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2897 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2898 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2899 put_shadow_ref(l2e_get_pfn(old_hl2e));
2901 if ( !get_shadow_ref(smfn) )
2902 BUG();
2903 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2904 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2905 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2906 put_shadow_ref(l2e_get_pfn(old_sl2e));
2908 // XXX - maybe this can be optimized somewhat??
2909 local_flush_tlb();
2914 /************************************************************************/
2915 /************************************************************************/
2916 /************************************************************************/
2918 #if SHADOW_DEBUG
2920 // The following is entirely for _check_pagetable()'s benefit.
2921 // _check_pagetable() wants to know whether a given entry in a
2922 // shadow page table is supposed to be the shadow of the guest's
2923 // current entry, or the shadow of the entry held in the snapshot
2924 // taken above.
2925 //
2926 // Here, we mark all currently existing entries as reflecting
2927 // the snapshot, above. All other places in xen that update
2928 // the shadow will keep the shadow in sync with the guest's
2929 // entries (via l1pte_propagate_from_guest and friends), which clear
2930 // the SHADOW_REFLECTS_SNAPSHOT bit.
2931 //
2932 static void
2933 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
2935 unsigned long smfn;
2936 l1_pgentry_t *l1e;
2937 l2_pgentry_t *l2e;
2938 unsigned i;
2940 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
2942 l1e = map_domain_page(smfn);
2943 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2944 if ( is_guest_l1_slot(i) &&
2945 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
2946 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
2947 unmap_domain_page(l1e);
2950 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
2952 l2e = map_domain_page(smfn);
2953 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2954 if ( is_guest_l2_slot(0, i) &&
2955 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
2956 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
2957 unmap_domain_page(l2e);
2961 // BUG: these are not SMP safe...
2962 static int sh_l2_present;
2963 static int sh_l1_present;
2964 char * sh_check_name;
2965 int shadow_status_noswap;
2967 #define v2m(_v, _adr) ({ \
2968 unsigned long _a = (unsigned long)(_adr); \
2969 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
2970 unsigned long _pa = -1; \
2971 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
2972 { \
2973 l1_pgentry_t _pte; \
2974 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
2975 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
2976 _pa = l1e_get_paddr(_pte); \
2977 } \
2978 _pa | (_a & ~PAGE_MASK); \
2979 })
2981 #define FAIL(_f, _a...) \
2982 do { \
2983 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
2984 sh_check_name, level, l2_idx, l1_idx, ## _a, \
2985 __FILE__, __LINE__); \
2986 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
2987 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
2988 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
2989 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
2990 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
2991 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
2992 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
2993 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
2994 (void *)v2m(v, p_snapshot_pte), \
2995 (l2_idx << L2_PAGETABLE_SHIFT) | \
2996 (l1_idx << L1_PAGETABLE_SHIFT)); \
2997 errors++; \
2998 } while ( 0 )
3000 static int check_pte(
3001 struct vcpu *v,
3002 l1_pgentry_t *p_guest_pte,
3003 l1_pgentry_t *p_shadow_pte,
3004 l1_pgentry_t *p_snapshot_pte,
3005 int level, int l2_idx, int l1_idx)
3007 struct domain *d = v->domain;
3008 l1_pgentry_t guest_pte = *p_guest_pte;
3009 l1_pgentry_t shadow_pte = *p_shadow_pte;
3010 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
3011 l1_pgentry_t eff_guest_pte;
3012 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
3013 int errors = 0, guest_writable;
3014 int page_table_page;
3016 if ( (l1e_get_intpte(shadow_pte) == 0) ||
3017 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
3018 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
3019 return errors; /* always safe */
3021 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
3022 FAIL("Non zero not present shadow_pte");
3024 if ( level == 2 ) sh_l2_present++;
3025 if ( level == 1 ) sh_l1_present++;
3027 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
3028 eff_guest_pte = snapshot_pte;
3029 else
3030 eff_guest_pte = guest_pte;
3032 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
3033 FAIL("Guest not present yet shadow is");
3035 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
3037 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
3038 FAIL("Corrupt?");
3040 if ( (level == 1) &&
3041 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
3042 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
3043 FAIL("Dirty coherence");
3045 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
3046 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
3047 FAIL("Accessed coherence");
3049 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
3050 FAIL("global bit set in shadow");
3052 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
3053 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
3054 shadow_mfn = l1e_get_pfn(shadow_pte);
3056 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
3057 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
3058 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
3060 page_table_page = mfn_is_page_table(eff_guest_mfn);
3062 guest_writable =
3063 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
3064 (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
3066 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
3068 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3069 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3070 frame_table[eff_guest_mfn].u.inuse.type_info,
3071 page_table_page);
3072 FAIL("RW coherence");
3075 if ( (level == 1) &&
3076 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
3077 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
3079 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
3080 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3081 frame_table[eff_guest_mfn].u.inuse.type_info,
3082 page_table_page);
3083 FAIL("RW2 coherence");
3086 if ( eff_guest_mfn == shadow_mfn )
3088 if ( level > 1 )
3089 FAIL("Linear map ???"); /* XXX this will fail on BSD */
3091 else
3093 if ( level < 2 )
3094 FAIL("Shadow in L1 entry?");
3096 if ( level == 2 )
3098 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
3099 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
3100 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
3102 else
3103 BUG(); // XXX -- not handled yet.
3106 return errors;
3108 #undef FAIL
3109 #undef v2m
3111 static int check_l1_table(
3112 struct vcpu *v, unsigned long gpfn,
3113 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
3115 struct domain *d = v->domain;
3116 int i;
3117 unsigned long snapshot_mfn;
3118 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
3119 int errors = 0;
3121 if ( page_out_of_sync(pfn_to_page(gmfn)) )
3123 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
3124 ASSERT(snapshot_mfn);
3125 p_snapshot = map_domain_page(snapshot_mfn);
3128 p_guest = map_domain_page(gmfn);
3129 p_shadow = map_domain_page(smfn);
3131 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3132 errors += check_pte(v, p_guest+i, p_shadow+i,
3133 p_snapshot ? p_snapshot+i : NULL,
3134 1, l2_idx, i);
3136 unmap_domain_page(p_shadow);
3137 unmap_domain_page(p_guest);
3138 if ( p_snapshot )
3139 unmap_domain_page(p_snapshot);
3141 return errors;
3144 #define FAILPT(_f, _a...) \
3145 do { \
3146 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
3147 errors++; \
3148 } while ( 0 )
3150 int check_l2_table(
3151 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
3153 struct domain *d = v->domain;
3154 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
3155 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
3156 l2_pgentry_t match;
3157 int i;
3158 int errors = 0;
3159 int limit;
3161 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
3162 FAILPT("domain doesn't own page");
3163 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
3164 FAILPT("bogus owner for snapshot page");
3165 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
3166 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
3167 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
3169 #if 0
3170 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3171 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3172 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
3173 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
3175 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3176 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
3177 i++ )
3178 printk("+++ (%d) %lx %lx\n",i,
3179 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
3180 FAILPT("hypervisor entries inconsistent");
3183 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
3184 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
3185 FAILPT("hypervisor linear map inconsistent");
3186 #endif
3188 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
3189 if ( !shadow_mode_external(d) &&
3190 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
3191 match, PAGE_FLAG_MASK))
3193 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
3194 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
3195 L2_PAGETABLE_SHIFT]),
3196 l2e_get_intpte(match));
3199 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
3200 if ( !shadow_mode_external(d) &&
3201 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
3202 match, PAGE_FLAG_MASK))
3204 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
3205 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
3206 d->arch.mm_perdomain_pt,
3207 l2e_get_intpte(match));
3210 #ifdef __i386__
3211 if ( shadow_mode_external(d) )
3212 limit = L2_PAGETABLE_ENTRIES;
3213 else
3214 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3215 #else
3216 limit = 0; /* XXX x86/64 XXX */
3217 #endif
3219 /* Check the whole L2. */
3220 for ( i = 0; i < limit; i++ )
3221 errors += check_pte(v,
3222 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
3223 (l1_pgentry_t*)(&spl2e[i]),
3224 NULL,
3225 2, i, 0);
3227 unmap_domain_page(spl2e);
3228 unmap_domain_page(gpl2e);
3230 #if 1
3231 if ( errors )
3232 printk("check_l2_table returning %d errors\n", errors);
3233 #endif
3235 return errors;
3237 #undef FAILPT
3239 int _check_pagetable(struct vcpu *v, char *s)
3241 struct domain *d = v->domain;
3242 pagetable_t pt = v->arch.guest_table;
3243 unsigned long gptbase = pagetable_get_paddr(pt);
3244 unsigned long ptbase_pfn, smfn;
3245 unsigned long i;
3246 l2_pgentry_t *gpl2e, *spl2e;
3247 unsigned long ptbase_mfn = 0;
3248 int errors = 0, limit, oos_pdes = 0;
3250 //_audit_domain(d, AUDIT_QUIET);
3251 shadow_lock(d);
3253 sh_check_name = s;
3254 //SH_VVLOG("%s-PT Audit", s);
3255 sh_l2_present = sh_l1_present = 0;
3256 perfc_incrc(check_pagetable);
3258 ptbase_mfn = gptbase >> PAGE_SHIFT;
3259 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
3261 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
3263 printk("%s-PT %lx not shadowed\n", s, gptbase);
3264 goto out;
3266 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
3268 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
3269 oos_pdes = 1;
3270 ASSERT(ptbase_mfn);
3273 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
3275 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
3276 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
3278 /* Go back and recurse. */
3279 #ifdef __i386__
3280 if ( shadow_mode_external(d) )
3281 limit = L2_PAGETABLE_ENTRIES;
3282 else
3283 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3284 #else
3285 limit = 0; /* XXX x86/64 XXX */
3286 #endif
3288 for ( i = 0; i < limit; i++ )
3290 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
3291 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
3292 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
3294 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
3296 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
3300 unmap_domain_page(spl2e);
3301 unmap_domain_page(gpl2e);
3303 #if 0
3304 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
3305 sh_l2_present, sh_l1_present);
3306 #endif
3308 out:
3309 if ( errors )
3310 BUG();
3312 shadow_unlock(d);
3314 return errors;
3317 int _check_all_pagetables(struct vcpu *v, char *s)
3319 struct domain *d = v->domain;
3320 int i;
3321 struct shadow_status *a;
3322 unsigned long gmfn;
3323 int errors = 0;
3325 shadow_status_noswap = 1;
3327 sh_check_name = s;
3328 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
3329 sh_l2_present = sh_l1_present = 0;
3330 perfc_incrc(check_all_pagetables);
3332 for (i = 0; i < shadow_ht_buckets; i++)
3334 a = &d->arch.shadow_ht[i];
3335 while ( a && a->gpfn_and_flags )
3337 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
3339 switch ( a->gpfn_and_flags & PGT_type_mask )
3341 case PGT_l1_shadow:
3342 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
3343 gmfn, a->smfn, 0);
3344 break;
3345 case PGT_l2_shadow:
3346 errors += check_l2_table(v, gmfn, a->smfn,
3347 page_out_of_sync(pfn_to_page(gmfn)));
3348 break;
3349 case PGT_l3_shadow:
3350 case PGT_l4_shadow:
3351 case PGT_hl2_shadow:
3352 BUG(); // XXX - ought to fix this...
3353 break;
3354 case PGT_snapshot:
3355 case PGT_writable_pred:
3356 break;
3357 default:
3358 errors++;
3359 printk("unexpected shadow type %lx, gpfn=%lx, "
3360 "gmfn=%lx smfn=%lx\n",
3361 a->gpfn_and_flags & PGT_type_mask,
3362 a->gpfn_and_flags & PGT_mfn_mask,
3363 gmfn, a->smfn);
3364 BUG();
3366 a = a->next;
3370 shadow_status_noswap = 0;
3372 if ( errors )
3373 BUG();
3375 return errors;
3378 #endif // SHADOW_DEBUG
3380 /*
3381 * Local variables:
3382 * mode: C
3383 * c-set-style: "BSD"
3384 * c-basic-offset: 4
3385 * tab-width: 4
3386 * indent-tabs-mode: nil
3387 * End:
3388 */