ia64/xen-unstable

view xen/arch/x86/shadow32.c @ 8114:a1e99c0104cb

Small fix for shadow mode: fix a p.t. limit check.

Signed-off-by:Xiaofeng Ling <xiaofeng.ling@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Nov 29 11:42:21 2005 +0100 (2005-11-29)
parents 66e16c4685f5
children d0ca851445e2
line source
1 /******************************************************************************
2 * arch/x86/shadow.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
34 #define va_to_l1mfn(_ed, _va) \
35 (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
37 static void shadow_free_snapshot(struct domain *d,
38 struct out_of_sync_entry *entry);
39 static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
40 static void free_writable_pte_predictions(struct domain *d);
42 #if SHADOW_DEBUG
43 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
44 #endif
46 /********
48 There's a per-domain shadow table spin lock which works fine for SMP
49 hosts. We don't have to worry about interrupts as no shadow operations
50 happen in an interrupt context. It's probably not quite ready for SMP
51 guest operation as we have to worry about synchonisation between gpte
52 and spte updates. Its possible that this might only happen in a
53 hypercall context, in which case we'll probably at have a per-domain
54 hypercall lock anyhow (at least initially).
56 ********/
58 static inline int
59 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
60 unsigned long new_type)
61 {
62 struct pfn_info *page = pfn_to_page(gmfn);
63 int pinned = 0, okay = 1;
65 if ( page_out_of_sync(page) )
66 {
67 // Don't know how long ago this snapshot was taken.
68 // Can't trust it to be recent enough.
69 //
70 __shadow_sync_mfn(d, gmfn);
71 }
73 if ( !shadow_mode_refcounts(d) )
74 return 1;
76 if ( unlikely(page_is_page_table(page)) )
77 return 1;
79 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
81 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
82 {
83 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
84 __func__, gpfn, gmfn);
85 #if 1 || defined(LIVE_DANGEROUSLY)
86 set_bit(_PGC_page_table, &page->count_info);
87 return 1;
88 #endif
89 return 0;
91 }
93 // To convert this page to use as a page table, the writable count
94 // should now be zero. Test this by grabbing the page as an page table,
95 // and then immediately releasing. This will also deal with any
96 // necessary TLB flushing issues for us.
97 //
98 // The cruft here about pinning doesn't really work right. This
99 // needs rethinking/rewriting... Need to gracefully deal with the
100 // TLB flushes required when promoting a writable page, and also deal
101 // with any outstanding (external) writable refs to this page (by
102 // refusing to promote it). The pinning headache complicates this
103 // code -- it would all get much simpler if we stop using
104 // shadow_lock() and move the shadow code to BIGLOCK().
105 //
106 if ( unlikely(!get_page(page, d)) )
107 BUG(); // XXX -- needs more thought for a graceful failure
108 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
109 {
110 pinned = 1;
111 put_page_and_type(page);
112 }
113 if ( get_page_type(page, PGT_base_page_table) )
114 {
115 set_bit(_PGC_page_table, &page->count_info);
116 put_page_type(page);
117 }
118 else
119 {
120 printk("shadow_promote: get_page_type failed "
121 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
122 d->domain_id, gpfn, gmfn, new_type);
123 okay = 0;
124 }
126 // Now put the type back to writable...
127 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
128 BUG(); // XXX -- needs more thought for a graceful failure
129 if ( unlikely(pinned) )
130 {
131 if ( unlikely(test_and_set_bit(_PGT_pinned,
132 &page->u.inuse.type_info)) )
133 BUG(); // hmm... someone pinned this again?
134 }
135 else
136 put_page_and_type(page);
138 return okay;
139 }
141 static inline void
142 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
143 {
144 if ( !shadow_mode_refcounts(d) )
145 return;
147 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
149 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
150 {
151 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
153 if ( page_out_of_sync(pfn_to_page(gmfn)) )
154 {
155 remove_out_of_sync_entries(d, gmfn);
156 }
157 }
158 }
160 /*
161 * Things in shadow mode that collect get_page() refs to the domain's
162 * pages are:
163 * - PGC_allocated takes a gen count, just like normal.
164 * - A writable page can be pinned (paravirtualized guests may consider
165 * these pages to be L1s or L2s, and don't know the difference).
166 * Pinning a page takes a gen count (but, for domains in shadow mode,
167 * it *doesn't* take a type count)
168 * - CR3 grabs a ref to whatever it points at, just like normal.
169 * - Shadow mode grabs an initial gen count for itself, as a placehold
170 * for whatever references will exist.
171 * - Shadow PTEs that point to a page take a gen count, just like regular
172 * PTEs. However, they don't get a type count, as get_page_type() is
173 * hardwired to keep writable pages' counts at 1 for domains in shadow
174 * mode.
175 * - Whenever we shadow a page, the entry in the shadow hash grabs a
176 * general ref to the page.
177 * - Whenever a page goes out of sync, the out of sync entry grabs a
178 * general ref to the page.
179 */
180 /*
181 * pfn_info fields for pages allocated as shadow pages:
182 *
183 * All 32 bits of count_info are a simple count of refs to this shadow
184 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
185 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
186 * references.
187 *
188 * u.inuse._domain is left NULL, to prevent accidently allow some random
189 * domain from gaining permissions to map this page.
190 *
191 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
192 * shadowed.
193 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
194 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
195 * is currently exists because this is a shadow of a root page, and we
196 * don't want to let those disappear just because no CR3 is currently pointing
197 * at it.
198 *
199 * tlbflush_timestamp holds a min & max index of valid page table entries
200 * within the shadow page.
201 */
203 static inline unsigned long
204 alloc_shadow_page(struct domain *d,
205 unsigned long gpfn, unsigned long gmfn,
206 u32 psh_type)
207 {
208 struct pfn_info *page;
209 unsigned long smfn;
210 int pin = 0;
211 void *l1;
213 // Currently, we only keep pre-zero'ed pages around for use as L1's...
214 // This will change. Soon.
215 //
216 if ( psh_type == PGT_l1_shadow )
217 {
218 if ( !list_empty(&d->arch.free_shadow_frames) )
219 {
220 struct list_head *entry = d->arch.free_shadow_frames.next;
221 page = list_entry(entry, struct pfn_info, list);
222 list_del(entry);
223 perfc_decr(free_l1_pages);
224 }
225 else
226 {
227 page = alloc_domheap_page(NULL);
228 l1 = map_domain_page(page_to_pfn(page));
229 memset(l1, 0, PAGE_SIZE);
230 unmap_domain_page(l1);
231 }
232 }
233 else
234 page = alloc_domheap_page(NULL);
236 if ( unlikely(page == NULL) )
237 {
238 printk("Couldn't alloc shadow page! dom%d count=%d\n",
239 d->domain_id, d->arch.shadow_page_count);
240 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
241 perfc_value(shadow_l1_pages),
242 perfc_value(shadow_l2_pages),
243 perfc_value(hl2_table_pages),
244 perfc_value(snapshot_pages));
245 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
246 }
248 smfn = page_to_pfn(page);
250 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
251 page->u.inuse.type_info = psh_type | gmfn;
252 page->count_info = 0;
253 page->tlbflush_timestamp = 0;
255 switch ( psh_type )
256 {
257 case PGT_l1_shadow:
258 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
259 goto fail;
260 perfc_incr(shadow_l1_pages);
261 d->arch.shadow_page_count++;
262 break;
264 case PGT_l2_shadow:
265 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
266 goto fail;
267 perfc_incr(shadow_l2_pages);
268 d->arch.shadow_page_count++;
269 if ( PGT_l2_page_table == PGT_root_page_table )
270 pin = 1;
272 break;
274 case PGT_hl2_shadow:
275 // Treat an hl2 as an L1 for purposes of promotion.
276 // For external mode domains, treat them as an L2 for purposes of
277 // pinning.
278 //
279 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
280 goto fail;
281 perfc_incr(hl2_table_pages);
282 d->arch.hl2_page_count++;
283 if ( shadow_mode_external(d) &&
284 (PGT_l2_page_table == PGT_root_page_table) )
285 pin = 1;
287 break;
289 case PGT_snapshot:
290 perfc_incr(snapshot_pages);
291 d->arch.snapshot_page_count++;
292 break;
294 default:
295 printk("Alloc shadow weird page type type=%08x\n", psh_type);
296 BUG();
297 break;
298 }
300 // Don't add a new shadow of something that already has a snapshot.
301 //
302 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
304 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
306 if ( pin )
307 shadow_pin(smfn);
309 return smfn;
311 fail:
312 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
313 gpfn, gmfn);
314 free_domheap_page(page);
315 return 0;
316 }
318 static void inline
319 free_shadow_l1_table(struct domain *d, unsigned long smfn)
320 {
321 l1_pgentry_t *pl1e = map_domain_page(smfn);
322 int i;
323 struct pfn_info *spage = pfn_to_page(smfn);
324 u32 min_max = spage->tlbflush_timestamp;
325 int min = SHADOW_MIN(min_max);
326 int max = SHADOW_MAX(min_max);
328 for ( i = min; i <= max; i++ )
329 {
330 shadow_put_page_from_l1e(pl1e[i], d);
331 pl1e[i] = l1e_empty();
332 }
334 unmap_domain_page(pl1e);
335 }
337 static void inline
338 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
339 {
340 l1_pgentry_t *hl2 = map_domain_page(smfn);
341 int i, limit;
343 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
345 #ifdef __i386__
346 if ( shadow_mode_external(d) )
347 limit = L2_PAGETABLE_ENTRIES;
348 else
349 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
350 #else
351 limit = 0; /* XXX x86/64 XXX */
352 #endif
354 for ( i = 0; i < limit; i++ )
355 {
356 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
357 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
358 }
360 unmap_domain_page(hl2);
361 }
363 static void inline
364 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
365 {
366 l2_pgentry_t *pl2e = map_domain_page(smfn);
367 int i, external = shadow_mode_external(d);
369 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
370 if ( external || is_guest_l2_slot(type, i) )
371 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
372 put_shadow_ref(l2e_get_pfn(pl2e[i]));
374 if ( (PGT_base_page_table == PGT_l2_page_table) &&
375 shadow_mode_translate(d) && !external )
376 {
377 // free the ref to the hl2
378 //
379 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
380 }
382 unmap_domain_page(pl2e);
383 }
385 void free_shadow_page(unsigned long smfn)
386 {
387 struct pfn_info *page = &frame_table[smfn];
388 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
389 struct domain *d = page_get_owner(pfn_to_page(gmfn));
390 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
391 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
393 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
395 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
397 delete_shadow_status(d, gpfn, gmfn, type);
399 switch ( type )
400 {
401 case PGT_l1_shadow:
402 perfc_decr(shadow_l1_pages);
403 shadow_demote(d, gpfn, gmfn);
404 free_shadow_l1_table(d, smfn);
405 d->arch.shadow_page_count--;
406 break;
408 case PGT_l2_shadow:
409 perfc_decr(shadow_l2_pages);
410 shadow_demote(d, gpfn, gmfn);
411 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
412 d->arch.shadow_page_count--;
413 break;
415 case PGT_hl2_shadow:
416 perfc_decr(hl2_table_pages);
417 shadow_demote(d, gpfn, gmfn);
418 free_shadow_hl2_table(d, smfn);
419 d->arch.hl2_page_count--;
420 break;
422 case PGT_snapshot:
423 perfc_decr(snapshot_pages);
424 d->arch.snapshot_page_count--;
425 break;
427 default:
428 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
429 page_to_pfn(page), page->u.inuse.type_info);
430 break;
431 }
433 // No TLB flushes are needed the next time this page gets allocated.
434 //
435 page->tlbflush_timestamp = 0;
436 page->u.free.cpumask = CPU_MASK_NONE;
438 if ( type == PGT_l1_shadow )
439 {
440 list_add(&page->list, &d->arch.free_shadow_frames);
441 perfc_incr(free_l1_pages);
442 }
443 else
444 free_domheap_page(page);
445 }
447 void
448 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
449 {
450 unsigned long smfn;
452 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
454 shadow_lock(d);
456 while ( stype >= PGT_l1_shadow )
457 {
458 smfn = __shadow_status(d, gpfn, stype);
459 if ( smfn && MFN_PINNED(smfn) )
460 shadow_unpin(smfn);
461 stype -= PGT_l1_shadow;
462 }
464 shadow_unlock(d);
465 }
467 static void inline
468 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
469 {
470 struct pfn_info *page;
472 page = &frame_table[entry->gmfn];
474 // Decrement ref count of guest & shadow pages
475 //
476 put_page(page);
478 // Only use entries that have low bits clear...
479 //
480 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
481 {
482 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
483 entry->writable_pl1e = -2;
484 }
485 else
486 ASSERT( entry->writable_pl1e == -1 );
488 // Free the snapshot
489 //
490 shadow_free_snapshot(d, entry);
491 }
493 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
494 {
495 struct out_of_sync_entry *entry = d->arch.out_of_sync;
496 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
497 struct out_of_sync_entry *found = NULL;
499 // NB: Be careful not to call something that manipulates this list
500 // while walking it. Collect the results into a separate list
501 // first, then walk that list.
502 //
503 while ( entry )
504 {
505 if ( entry->gmfn == gmfn )
506 {
507 // remove from out of sync list
508 *prev = entry->next;
510 // add to found list
511 entry->next = found;
512 found = entry;
514 entry = *prev;
515 continue;
516 }
517 prev = &entry->next;
518 entry = entry->next;
519 }
521 prev = NULL;
522 entry = found;
523 while ( entry )
524 {
525 release_out_of_sync_entry(d, entry);
527 prev = &entry->next;
528 entry = entry->next;
529 }
531 // Add found list to free list
532 if ( prev )
533 {
534 *prev = d->arch.out_of_sync_free;
535 d->arch.out_of_sync_free = found;
536 }
537 }
539 static void free_out_of_sync_state(struct domain *d)
540 {
541 struct out_of_sync_entry *entry;
543 // NB: Be careful not to call something that manipulates this list
544 // while walking it. Remove one item at a time, and always
545 // restart from start of list.
546 //
547 while ( (entry = d->arch.out_of_sync) )
548 {
549 d->arch.out_of_sync = entry->next;
550 release_out_of_sync_entry(d, entry);
552 entry->next = d->arch.out_of_sync_free;
553 d->arch.out_of_sync_free = entry;
554 }
555 }
557 static void free_shadow_pages(struct domain *d)
558 {
559 int i;
560 struct shadow_status *x;
561 struct vcpu *v;
562 struct list_head *list_ent, *tmp;
564 /*
565 * WARNING! The shadow page table must not currently be in use!
566 * e.g., You are expected to have paused the domain and synchronized CR3.
567 */
569 if( !d->arch.shadow_ht ) return;
571 shadow_audit(d, 1);
573 // first, remove any outstanding refs from out_of_sync entries...
574 //
575 free_out_of_sync_state(d);
577 // second, remove any outstanding refs from v->arch.shadow_table
578 // and CR3.
579 //
580 for_each_vcpu(d, v)
581 {
582 if ( pagetable_get_paddr(v->arch.shadow_table) )
583 {
584 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
585 v->arch.shadow_table = mk_pagetable(0);
586 }
588 if ( v->arch.monitor_shadow_ref )
589 {
590 put_shadow_ref(v->arch.monitor_shadow_ref);
591 v->arch.monitor_shadow_ref = 0;
592 }
593 }
595 // For external shadows, remove the monitor table's refs
596 //
597 if ( shadow_mode_external(d) )
598 {
599 for_each_vcpu(d, v)
600 {
601 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
603 if ( mpl2e )
604 {
605 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
606 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
608 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
609 {
610 put_shadow_ref(l2e_get_pfn(hl2e));
611 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
612 }
613 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
614 {
615 put_shadow_ref(l2e_get_pfn(smfn));
616 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
617 }
618 }
619 }
620 }
622 // Now, the only refs to shadow pages that are left are from the shadow
623 // pages themselves. We just unpin the pinned pages, and the rest
624 // should automatically disappear.
625 //
626 // NB: Beware: each explicitly or implicit call to free_shadow_page
627 // can/will result in the hash bucket getting rewritten out from
628 // under us... First, collect the list of pinned pages, then
629 // free them.
630 //
631 // FIXME: it would be good to just free all the pages referred to in
632 // the hash table without going through each of them to decrement their
633 // reference counts. In shadow_mode_refcount(), we've gotta do the hard
634 // work, but only for L1 shadows. If we're not in refcount mode, then
635 // there's no real hard work to do at all. Need to be careful with the
636 // writable_pte_predictions and snapshot entries in the hash table, but
637 // that's about it.
638 //
639 for ( i = 0; i < shadow_ht_buckets; i++ )
640 {
641 u32 count;
642 unsigned long *mfn_list;
644 /* Skip empty buckets. */
645 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
646 continue;
648 count = 0;
650 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
651 /* Skip entries that are writable_pred) */
652 switch(x->gpfn_and_flags & PGT_type_mask){
653 case PGT_l1_shadow:
654 case PGT_l2_shadow:
655 case PGT_l3_shadow:
656 case PGT_l4_shadow:
657 case PGT_hl2_shadow:
658 if ( MFN_PINNED(x->smfn) )
659 count++;
660 break;
661 case PGT_snapshot:
662 case PGT_writable_pred:
663 break;
664 default:
665 BUG();
667 }
668 }
670 if ( !count )
671 continue;
673 mfn_list = xmalloc_array(unsigned long, count);
674 count = 0;
675 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
676 /* Skip entries that are writable_pred) */
677 switch(x->gpfn_and_flags & PGT_type_mask){
678 case PGT_l1_shadow:
679 case PGT_l2_shadow:
680 case PGT_l3_shadow:
681 case PGT_l4_shadow:
682 case PGT_hl2_shadow:
683 if ( MFN_PINNED(x->smfn) )
684 mfn_list[count++] = x->smfn;
685 break;
686 case PGT_snapshot:
687 case PGT_writable_pred:
688 break;
689 default:
690 BUG();
692 }
693 }
695 while ( count )
696 {
697 shadow_unpin(mfn_list[--count]);
698 }
699 xfree(mfn_list);
700 }
702 /* Now free the pre-zero'ed pages from the domain */
703 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
704 {
705 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
707 list_del(list_ent);
708 perfc_decr(free_l1_pages);
710 free_domheap_page(page);
711 }
713 shadow_audit(d, 0);
715 SH_VLOG("Free shadow table.");
716 }
718 void shadow_mode_init(void)
719 {
720 }
722 int _shadow_mode_refcounts(struct domain *d)
723 {
724 return shadow_mode_refcounts(d);
725 }
727 static void alloc_monitor_pagetable(struct vcpu *v)
728 {
729 unsigned long mmfn;
730 l2_pgentry_t *mpl2e;
731 struct pfn_info *mmfn_info;
732 struct domain *d = v->domain;
734 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
736 mmfn_info = alloc_domheap_page(NULL);
737 ASSERT(mmfn_info != NULL);
739 mmfn = page_to_pfn(mmfn_info);
740 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
741 memset(mpl2e, 0, PAGE_SIZE);
743 #ifdef __i386__ /* XXX screws x86/64 build */
744 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
745 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
746 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
747 #endif
749 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
750 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
751 __PAGE_HYPERVISOR);
753 // map the phys_to_machine map into the Read-Only MPT space for this domain
754 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
755 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
756 __PAGE_HYPERVISOR);
758 // Don't (yet) have mappings for these...
759 // Don't want to accidentally see the idle_pg_table's linear mapping.
760 //
761 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
762 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
764 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
765 v->arch.monitor_vtable = mpl2e;
766 }
768 /*
769 * Free the pages for monitor_table and hl2_table
770 */
771 void free_monitor_pagetable(struct vcpu *v)
772 {
773 l2_pgentry_t *mpl2e, hl2e, sl2e;
774 unsigned long mfn;
776 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
778 mpl2e = v->arch.monitor_vtable;
780 /*
781 * First get the mfn for hl2_table by looking at monitor_table
782 */
783 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
784 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
785 {
786 mfn = l2e_get_pfn(hl2e);
787 ASSERT(mfn);
788 put_shadow_ref(mfn);
789 }
791 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
792 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
793 {
794 mfn = l2e_get_pfn(sl2e);
795 ASSERT(mfn);
796 put_shadow_ref(mfn);
797 }
799 /*
800 * Then free monitor_table.
801 */
802 mfn = pagetable_get_pfn(v->arch.monitor_table);
803 unmap_domain_page(v->arch.monitor_vtable);
804 free_domheap_page(&frame_table[mfn]);
806 v->arch.monitor_table = mk_pagetable(0);
807 v->arch.monitor_vtable = 0;
808 }
810 int
811 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
812 struct domain_mmap_cache *l2cache,
813 struct domain_mmap_cache *l1cache)
814 {
815 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
816 l2_pgentry_t *l2, l2e;
817 l1_pgentry_t *l1;
818 struct pfn_info *l1page;
819 unsigned long va = pfn << PAGE_SHIFT;
821 ASSERT(tabpfn != 0);
822 ASSERT(shadow_lock_is_acquired(d));
824 l2 = map_domain_page_with_cache(tabpfn, l2cache);
825 l2e = l2[l2_table_offset(va)];
826 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
827 {
828 l1page = alloc_domheap_page(NULL);
829 if ( !l1page )
830 {
831 unmap_domain_page_with_cache(l2, l2cache);
832 return 0;
833 }
835 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
836 memset(l1, 0, PAGE_SIZE);
837 unmap_domain_page_with_cache(l1, l1cache);
839 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
840 l2[l2_table_offset(va)] = l2e;
841 }
842 unmap_domain_page_with_cache(l2, l2cache);
844 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
845 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
846 unmap_domain_page_with_cache(l1, l1cache);
848 return 1;
849 }
851 static int
852 alloc_p2m_table(struct domain *d)
853 {
854 struct list_head *list_ent;
855 struct pfn_info *page, *l2page;
856 l2_pgentry_t *l2;
857 unsigned long mfn, pfn;
858 struct domain_mmap_cache l1cache, l2cache;
860 l2page = alloc_domheap_page(NULL);
861 if ( l2page == NULL )
862 return 0;
864 domain_mmap_cache_init(&l1cache);
865 domain_mmap_cache_init(&l2cache);
867 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
868 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
869 memset(l2, 0, PAGE_SIZE);
870 unmap_domain_page_with_cache(l2, &l2cache);
872 list_ent = d->page_list.next;
873 while ( list_ent != &d->page_list )
874 {
875 page = list_entry(list_ent, struct pfn_info, list);
876 mfn = page_to_pfn(page);
877 pfn = get_pfn_from_mfn(mfn);
878 ASSERT(pfn != INVALID_M2P_ENTRY);
879 ASSERT(pfn < (1u<<20));
881 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
883 list_ent = page->list.next;
884 }
886 list_ent = d->xenpage_list.next;
887 while ( list_ent != &d->xenpage_list )
888 {
889 page = list_entry(list_ent, struct pfn_info, list);
890 mfn = page_to_pfn(page);
891 pfn = get_pfn_from_mfn(mfn);
892 if ( (pfn != INVALID_M2P_ENTRY) &&
893 (pfn < (1u<<20)) )
894 {
895 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
896 }
898 list_ent = page->list.next;
899 }
901 domain_mmap_cache_destroy(&l2cache);
902 domain_mmap_cache_destroy(&l1cache);
904 return 1;
905 }
907 static void
908 free_p2m_table(struct domain *d)
909 {
910 // uh, this needs some work... :)
911 BUG();
912 }
914 int __shadow_mode_enable(struct domain *d, unsigned int mode)
915 {
916 struct vcpu *v;
917 int new_modes = (mode & ~d->arch.shadow_mode);
919 if(!new_modes) /* Nothing to do - return success */
920 return 0;
922 // can't take anything away by calling this function.
923 ASSERT(!(d->arch.shadow_mode & ~mode));
925 for_each_vcpu(d, v)
926 {
927 invalidate_shadow_ldt(v);
929 // We need to set these up for __update_pagetables().
930 // See the comment there.
932 /*
933 * arch.guest_vtable
934 */
935 if ( v->arch.guest_vtable &&
936 (v->arch.guest_vtable != __linear_l2_table) )
937 {
938 unmap_domain_page(v->arch.guest_vtable);
939 }
940 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
941 v->arch.guest_vtable = __linear_l2_table;
942 else
943 v->arch.guest_vtable = NULL;
945 /*
946 * arch.shadow_vtable
947 */
948 if ( v->arch.shadow_vtable &&
949 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
950 {
951 unmap_domain_page(v->arch.shadow_vtable);
952 }
953 if ( !(mode & SHM_external) )
954 v->arch.shadow_vtable = __shadow_linear_l2_table;
955 else
956 v->arch.shadow_vtable = NULL;
958 /*
959 * arch.hl2_vtable
960 */
961 if ( v->arch.hl2_vtable &&
962 (v->arch.hl2_vtable != __linear_hl2_table) )
963 {
964 unmap_domain_page(v->arch.hl2_vtable);
965 }
966 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
967 v->arch.hl2_vtable = __linear_hl2_table;
968 else
969 v->arch.hl2_vtable = NULL;
971 /*
972 * arch.monitor_table & arch.monitor_vtable
973 */
974 if ( v->arch.monitor_vtable )
975 {
976 free_monitor_pagetable(v);
977 }
978 if ( mode & SHM_external )
979 {
980 alloc_monitor_pagetable(v);
981 }
982 }
984 if ( new_modes & SHM_enable )
985 {
986 ASSERT( !d->arch.shadow_ht );
987 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
988 if ( d->arch.shadow_ht == NULL )
989 goto nomem;
991 memset(d->arch.shadow_ht, 0,
992 shadow_ht_buckets * sizeof(struct shadow_status));
993 }
995 if ( new_modes & SHM_log_dirty )
996 {
997 ASSERT( !d->arch.shadow_dirty_bitmap );
998 d->arch.shadow_dirty_bitmap_size =
999 (d->shared_info->arch.max_pfn + 63) & ~63;
1000 d->arch.shadow_dirty_bitmap =
1001 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
1002 (8 * sizeof(unsigned long)));
1003 if ( d->arch.shadow_dirty_bitmap == NULL )
1005 d->arch.shadow_dirty_bitmap_size = 0;
1006 goto nomem;
1008 memset(d->arch.shadow_dirty_bitmap, 0,
1009 d->arch.shadow_dirty_bitmap_size/8);
1012 if ( new_modes & SHM_translate )
1014 if ( !(new_modes & SHM_external) )
1016 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
1017 if ( !alloc_p2m_table(d) )
1019 printk("alloc_p2m_table failed (out-of-memory?)\n");
1020 goto nomem;
1023 else
1025 // external guests provide their own memory for their P2M maps.
1026 //
1027 ASSERT( d == page_get_owner(
1028 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
1032 // Get rid of any shadow pages from any previous shadow mode.
1033 //
1034 free_shadow_pages(d);
1036 /*
1037 * Tear down it's counts by disassembling its page-table-based ref counts.
1038 * Also remove CR3's gcount/tcount.
1039 * That leaves things like GDTs and LDTs and external refs in tact.
1041 * Most pages will be writable tcount=0.
1042 * Some will still be L1 tcount=0 or L2 tcount=0.
1043 * Maybe some pages will be type none tcount=0.
1044 * Pages granted external writable refs (via grant tables?) will
1045 * still have a non-zero tcount. That's OK.
1047 * gcounts will generally be 1 for PGC_allocated.
1048 * GDTs and LDTs will have additional gcounts.
1049 * Any grant-table based refs will still be in the gcount.
1051 * We attempt to grab writable refs to each page (thus setting its type).
1052 * Immediately put back those type refs.
1054 * Assert that no pages are left with L1/L2/L3/L4 type.
1055 */
1056 audit_adjust_pgtables(d, -1, 1);
1058 d->arch.shadow_mode = mode;
1060 if ( shadow_mode_refcounts(d) )
1062 struct list_head *list_ent = d->page_list.next;
1063 while ( list_ent != &d->page_list )
1065 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1066 if ( !get_page_type(page, PGT_writable_page) )
1067 BUG();
1068 put_page_type(page);
1069 /*
1070 * We use tlbflush_timestamp as back pointer to smfn, and need to
1071 * clean up it.
1072 */
1073 if ( shadow_mode_external(d) )
1074 page->tlbflush_timestamp = 0;
1075 list_ent = page->list.next;
1079 audit_adjust_pgtables(d, 1, 1);
1081 return 0;
1083 nomem:
1084 if ( (new_modes & SHM_enable) )
1086 xfree(d->arch.shadow_ht);
1087 d->arch.shadow_ht = NULL;
1089 if ( (new_modes & SHM_log_dirty) )
1091 xfree(d->arch.shadow_dirty_bitmap);
1092 d->arch.shadow_dirty_bitmap = NULL;
1094 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1095 pagetable_get_paddr(d->arch.phys_table) )
1097 free_p2m_table(d);
1099 return -ENOMEM;
1102 int shadow_mode_enable(struct domain *d, unsigned int mode)
1104 int rc;
1105 shadow_lock(d);
1106 rc = __shadow_mode_enable(d, mode);
1107 shadow_unlock(d);
1108 return rc;
1111 static void
1112 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1114 int i;
1115 l1_pgentry_t *l1;
1117 l1 = map_domain_page(l1mfn);
1118 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1120 if ( is_guest_l1_slot(i) &&
1121 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1123 unsigned long mfn = l1e_get_pfn(l1[i]);
1124 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1125 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1126 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1129 unmap_domain_page(l1);
1132 // This is not general enough to handle arbitrary pagetables
1133 // with shared L1 pages, etc., but it is sufficient for bringing
1134 // up dom0.
1135 //
1136 void
1137 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1138 unsigned int type)
1140 int i;
1141 l2_pgentry_t *l2;
1143 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1145 l2 = map_domain_page(l2mfn);
1146 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1148 if ( is_guest_l2_slot(type, i) &&
1149 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1151 unsigned long mfn = l2e_get_pfn(l2[i]);
1152 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1153 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1154 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1155 translate_l1pgtable(d, p2m, mfn);
1158 unmap_domain_page(l2);
1161 static void free_shadow_ht_entries(struct domain *d)
1163 struct shadow_status *x, *n;
1165 SH_VLOG("freed tables count=%d l1=%d l2=%d",
1166 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
1167 perfc_value(shadow_l2_pages));
1169 n = d->arch.shadow_ht_extras;
1170 while ( (x = n) != NULL )
1172 d->arch.shadow_extras_count--;
1173 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
1174 xfree(x);
1177 d->arch.shadow_ht_extras = NULL;
1178 d->arch.shadow_ht_free = NULL;
1180 ASSERT(d->arch.shadow_extras_count == 0);
1181 SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
1183 if ( d->arch.shadow_dirty_bitmap != NULL )
1185 xfree(d->arch.shadow_dirty_bitmap);
1186 d->arch.shadow_dirty_bitmap = 0;
1187 d->arch.shadow_dirty_bitmap_size = 0;
1190 xfree(d->arch.shadow_ht);
1191 d->arch.shadow_ht = NULL;
1194 static void free_out_of_sync_entries(struct domain *d)
1196 struct out_of_sync_entry *x, *n;
1198 n = d->arch.out_of_sync_extras;
1199 while ( (x = n) != NULL )
1201 d->arch.out_of_sync_extras_count--;
1202 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
1203 xfree(x);
1206 d->arch.out_of_sync_extras = NULL;
1207 d->arch.out_of_sync_free = NULL;
1208 d->arch.out_of_sync = NULL;
1210 ASSERT(d->arch.out_of_sync_extras_count == 0);
1211 FSH_LOG("freed extra out_of_sync entries, now %d",
1212 d->arch.out_of_sync_extras_count);
1215 void __shadow_mode_disable(struct domain *d)
1217 struct vcpu *v;
1218 #ifndef NDEBUG
1219 int i;
1220 #endif
1222 if ( unlikely(!shadow_mode_enabled(d)) )
1223 return;
1225 free_shadow_pages(d);
1226 free_writable_pte_predictions(d);
1228 #ifndef NDEBUG
1229 for ( i = 0; i < shadow_ht_buckets; i++ )
1231 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
1233 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
1234 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
1235 BUG();
1238 #endif
1240 d->arch.shadow_mode = 0;
1242 free_shadow_ht_entries(d);
1243 free_out_of_sync_entries(d);
1245 for_each_vcpu(d, v)
1246 update_pagetables(v);
1249 static int shadow_mode_table_op(
1250 struct domain *d, dom0_shadow_control_t *sc)
1252 unsigned int op = sc->op;
1253 int i, rc = 0;
1254 struct vcpu *v;
1256 ASSERT(shadow_lock_is_acquired(d));
1258 SH_VLOG("shadow mode table op %lx %lx count %d",
1259 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1260 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1261 d->arch.shadow_page_count);
1263 shadow_audit(d, 1);
1265 switch ( op )
1267 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1268 free_shadow_pages(d);
1270 d->arch.shadow_fault_count = 0;
1271 d->arch.shadow_dirty_count = 0;
1273 break;
1275 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1276 free_shadow_pages(d);
1278 sc->stats.fault_count = d->arch.shadow_fault_count;
1279 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1281 d->arch.shadow_fault_count = 0;
1282 d->arch.shadow_dirty_count = 0;
1284 if ( (sc->dirty_bitmap == NULL) ||
1285 (d->arch.shadow_dirty_bitmap == NULL) )
1287 rc = -EINVAL;
1288 break;
1291 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1292 sc->pages = d->arch.shadow_dirty_bitmap_size;
1294 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1295 for ( i = 0; i < sc->pages; i += chunk )
1297 int bytes = ((((sc->pages - i) > chunk) ?
1298 chunk : (sc->pages - i)) + 7) / 8;
1300 if (copy_to_user(
1301 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1302 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1303 bytes))
1305 rc = -EINVAL;
1306 break;
1309 memset(
1310 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1311 0, bytes);
1314 break;
1316 case DOM0_SHADOW_CONTROL_OP_PEEK:
1317 sc->stats.fault_count = d->arch.shadow_fault_count;
1318 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1320 if ( (sc->dirty_bitmap == NULL) ||
1321 (d->arch.shadow_dirty_bitmap == NULL) )
1323 rc = -EINVAL;
1324 break;
1327 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1328 sc->pages = d->arch.shadow_dirty_bitmap_size;
1330 if (copy_to_user(sc->dirty_bitmap,
1331 d->arch.shadow_dirty_bitmap, (sc->pages+7)/8))
1333 rc = -EINVAL;
1334 break;
1337 break;
1339 default:
1340 rc = -EINVAL;
1341 break;
1344 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1345 shadow_audit(d, 1);
1347 for_each_vcpu(d,v)
1348 __update_pagetables(v);
1350 return rc;
1353 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1355 unsigned int op = sc->op;
1356 int rc = 0;
1357 struct vcpu *v;
1359 if ( unlikely(d == current->domain) )
1361 DPRINTK("Don't try to do a shadow op on yourself!\n");
1362 return -EINVAL;
1365 domain_pause(d);
1367 shadow_lock(d);
1369 switch ( op )
1371 case DOM0_SHADOW_CONTROL_OP_OFF:
1372 if ( shadow_mode_enabled(d) )
1374 __shadow_sync_all(d);
1375 __shadow_mode_disable(d);
1377 break;
1379 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1380 free_shadow_pages(d);
1381 rc = __shadow_mode_enable(d, SHM_enable);
1382 break;
1384 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1385 free_shadow_pages(d);
1386 rc = __shadow_mode_enable(
1387 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1388 break;
1390 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1391 free_shadow_pages(d);
1392 rc = __shadow_mode_enable(
1393 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1394 break;
1396 default:
1397 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1398 break;
1401 shadow_unlock(d);
1403 for_each_vcpu(d,v)
1404 update_pagetables(v);
1406 domain_unpause(d);
1408 return rc;
1411 unsigned long
1412 get_mfn_from_pfn_foreign(struct domain *d, unsigned long gpfn)
1414 unsigned long va, tabpfn;
1415 l1_pgentry_t *l1, l1e;
1416 l2_pgentry_t *l2, l2e;
1418 ASSERT(shadow_mode_translate(d));
1420 perfc_incrc(get_mfn_from_pfn_foreign);
1422 va = gpfn << PAGE_SHIFT;
1423 tabpfn = pagetable_get_pfn(d->arch.phys_table);
1424 l2 = map_domain_page(tabpfn);
1425 l2e = l2[l2_table_offset(va)];
1426 unmap_domain_page(l2);
1427 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1429 printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1430 __func__, d->domain_id, gpfn, l2e_get_intpte(l2e));
1431 return INVALID_MFN;
1433 l1 = map_domain_page(l2e_get_pfn(l2e));
1434 l1e = l1[l1_table_offset(va)];
1435 unmap_domain_page(l1);
1437 #if 0
1438 printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1439 __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1440 #endif
1442 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1444 printk("%s(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1445 __func__, d->domain_id, gpfn, l1e_get_intpte(l1e));
1446 return INVALID_MFN;
1449 return l1e_get_pfn(l1e);
1452 static unsigned long
1453 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
1454 unsigned long smfn)
1456 unsigned long hl2mfn;
1457 l1_pgentry_t *hl2;
1458 int limit;
1460 ASSERT(PGT_base_page_table == PGT_l2_page_table);
1462 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
1464 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
1465 gpfn, gmfn);
1466 BUG(); /* XXX Deal gracefully with failure. */
1469 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
1470 gpfn, gmfn, smfn, hl2mfn);
1471 perfc_incrc(shadow_hl2_table_count);
1473 hl2 = map_domain_page(hl2mfn);
1475 if ( shadow_mode_external(d) )
1476 limit = L2_PAGETABLE_ENTRIES;
1477 else
1478 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
1480 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
1482 if ( !shadow_mode_external(d) )
1484 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
1485 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1487 // Setup easy access to the GL2, SL2, and HL2 frames.
1488 //
1489 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
1490 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1491 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1492 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
1493 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
1494 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1497 unmap_domain_page(hl2);
1499 return hl2mfn;
1502 /*
1503 * This could take and use a snapshot, and validate the entire page at
1504 * once, or it could continue to fault in entries one at a time...
1505 * Might be worth investigating...
1506 */
1507 static unsigned long shadow_l2_table(
1508 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1510 unsigned long smfn;
1511 l2_pgentry_t *spl2e;
1513 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
1515 perfc_incrc(shadow_l2_table_count);
1517 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
1519 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
1520 gpfn, gmfn);
1521 BUG(); /* XXX Deal gracefully with failure. */
1524 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
1526 /* Install hypervisor and 2x linear p.t. mapings. */
1527 if ( (PGT_base_page_table == PGT_l2_page_table) &&
1528 !shadow_mode_external(d) )
1530 /*
1531 * We could proactively fill in PDEs for pages that are already
1532 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
1533 * (restriction required for coherence of the accessed bit). However,
1534 * we tried it and it didn't help performance. This is simpler.
1535 */
1536 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
1538 /* Install hypervisor and 2x linear p.t. mapings. */
1539 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1540 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1541 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1543 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1544 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
1546 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
1547 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
1548 __PAGE_HYPERVISOR);
1550 if ( shadow_mode_translate(d) ) // NB: not external
1552 unsigned long hl2mfn;
1554 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
1555 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
1556 __PAGE_HYPERVISOR);
1558 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1559 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1561 // shadow_mode_translate (but not external) sl2 tables hold a
1562 // ref to their hl2.
1563 //
1564 if ( !get_shadow_ref(hl2mfn) )
1565 BUG();
1567 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1568 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1570 else
1571 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1572 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1574 else
1576 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
1579 unmap_domain_page(spl2e);
1581 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
1582 return smfn;
1585 void shadow_map_l1_into_current_l2(unsigned long va)
1587 struct vcpu *v = current;
1588 struct domain *d = v->domain;
1589 l1_pgentry_t *gpl1e, *spl1e;
1590 l2_pgentry_t gl2e, sl2e;
1591 unsigned long gl1pfn, gl1mfn, sl1mfn;
1592 int i, init_table = 0;
1594 __guest_get_l2e(v, va, &gl2e);
1595 ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
1596 gl1pfn = l2e_get_pfn(gl2e);
1598 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
1600 /* This L1 is NOT already shadowed so we need to shadow it. */
1601 SH_VVLOG("4a: l1 not shadowed");
1603 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1604 if ( unlikely(!VALID_MFN(gl1mfn)) )
1606 // Attempt to use an invalid pfn as an L1 page.
1607 // XXX this needs to be more graceful!
1608 BUG();
1611 if ( unlikely(!(sl1mfn =
1612 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
1614 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
1615 gl1pfn, gl1mfn);
1616 BUG(); /* XXX Need to deal gracefully with failure. */
1619 perfc_incrc(shadow_l1_table_count);
1620 init_table = 1;
1622 else
1624 /* This L1 is shadowed already, but the L2 entry is missing. */
1625 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
1628 #ifndef NDEBUG
1630 l2_pgentry_t old_sl2e;
1631 __shadow_get_l2e(v, va, &old_sl2e);
1632 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
1634 #endif
1636 if ( !get_shadow_ref(sl1mfn) )
1637 BUG();
1638 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
1639 __guest_set_l2e(v, va, gl2e);
1640 __shadow_set_l2e(v, va, sl2e);
1642 if ( init_table )
1644 l1_pgentry_t sl1e;
1645 int index = l1_table_offset(va);
1646 int min = 1, max = 0;
1648 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
1649 ~(L1_PAGETABLE_ENTRIES-1)]);
1651 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
1652 ~(L1_PAGETABLE_ENTRIES-1)]);
1654 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1656 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
1657 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
1658 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
1659 sl1e = l1e_empty();
1660 if ( l1e_get_flags(sl1e) == 0 )
1662 // First copy entries from 0 until first invalid.
1663 // Then copy entries from index until first invalid.
1664 //
1665 if ( i < index ) {
1666 i = index - 1;
1667 continue;
1669 break;
1671 spl1e[i] = sl1e;
1672 if ( unlikely(i < min) )
1673 min = i;
1674 if ( likely(i > max) )
1675 max = i;
1676 set_guest_back_ptr(d, sl1e, sl1mfn, i);
1679 frame_table[sl1mfn].tlbflush_timestamp =
1680 SHADOW_ENCODE_MIN_MAX(min, max);
1684 void shadow_invlpg(struct vcpu *v, unsigned long va)
1686 struct domain *d = v->domain;
1687 l1_pgentry_t gpte, spte;
1689 ASSERT(shadow_mode_enabled(d));
1691 shadow_lock(d);
1693 __shadow_sync_va(v, va);
1695 // XXX mafetter: will need to think about 4MB pages...
1697 // It's not strictly necessary to update the shadow here,
1698 // but it might save a fault later.
1699 //
1700 if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
1701 sizeof(gpte))) {
1702 perfc_incrc(shadow_invlpg_faults);
1703 shadow_unlock(d);
1704 return;
1706 l1pte_propagate_from_guest(d, gpte, &spte);
1707 shadow_set_l1e(va, spte, 1);
1709 shadow_unlock(d);
1712 struct out_of_sync_entry *
1713 shadow_alloc_oos_entry(struct domain *d)
1715 struct out_of_sync_entry *f, *extra;
1716 unsigned size, i;
1718 if ( unlikely(d->arch.out_of_sync_free == NULL) )
1720 FSH_LOG("Allocate more fullshadow tuple blocks.");
1722 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
1723 extra = xmalloc_bytes(size);
1725 /* XXX Should be more graceful here. */
1726 if ( extra == NULL )
1727 BUG();
1729 memset(extra, 0, size);
1731 /* Record the allocation block so it can be correctly freed later. */
1732 d->arch.out_of_sync_extras_count++;
1733 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
1734 d->arch.out_of_sync_extras;
1735 d->arch.out_of_sync_extras = &extra[0];
1737 /* Thread a free chain through the newly-allocated nodes. */
1738 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
1739 extra[i].next = &extra[i+1];
1740 extra[i].next = NULL;
1742 /* Add the new nodes to the free list. */
1743 d->arch.out_of_sync_free = &extra[0];
1746 /* Allocate a new node from the quicklist. */
1747 f = d->arch.out_of_sync_free;
1748 d->arch.out_of_sync_free = f->next;
1750 return f;
1753 static inline unsigned long
1754 shadow_make_snapshot(
1755 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1757 unsigned long smfn, sl1mfn = 0;
1758 void *original, *snapshot;
1759 u32 min_max = 0;
1760 int min, max, length;
1762 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
1764 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
1765 return SHADOW_SNAPSHOT_ELSEWHERE;
1768 perfc_incrc(shadow_make_snapshot);
1770 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
1772 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
1773 "Dom%d snapshot_count_count=%d\n",
1774 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
1775 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
1778 if ( !get_shadow_ref(smfn) )
1779 BUG();
1781 if ( shadow_mode_refcounts(d) &&
1782 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
1783 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
1784 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
1786 min = SHADOW_MIN(min_max);
1787 max = SHADOW_MAX(min_max);
1788 length = max - min + 1;
1789 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
1791 min *= sizeof(l1_pgentry_t);
1792 length *= sizeof(l1_pgentry_t);
1794 original = map_domain_page(gmfn);
1795 snapshot = map_domain_page(smfn);
1796 memcpy(snapshot + min, original + min, length);
1797 unmap_domain_page(original);
1798 unmap_domain_page(snapshot);
1800 return smfn;
1803 static void
1804 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
1806 void *snapshot;
1808 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1809 return;
1811 // Clear the out_of_sync bit.
1812 //
1813 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
1815 // XXX Need to think about how to protect the domain's
1816 // information less expensively.
1817 //
1818 snapshot = map_domain_page(entry->snapshot_mfn);
1819 memset(snapshot, 0, PAGE_SIZE);
1820 unmap_domain_page(snapshot);
1822 put_shadow_ref(entry->snapshot_mfn);
1825 struct out_of_sync_entry *
1826 __shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1827 unsigned long mfn)
1829 struct domain *d = v->domain;
1830 struct pfn_info *page = &frame_table[mfn];
1831 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
1833 ASSERT(shadow_lock_is_acquired(d));
1834 ASSERT(pfn_valid(mfn));
1836 #ifndef NDEBUG
1838 u32 type = page->u.inuse.type_info & PGT_type_mask;
1839 if ( shadow_mode_refcounts(d) )
1841 ASSERT(type == PGT_writable_page);
1843 else
1845 ASSERT(type && (type < PGT_l4_page_table));
1848 #endif
1850 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08lx", __func__,
1851 gpfn, mfn, page->count_info, page->u.inuse.type_info);
1853 // XXX this will require some more thought... Cross-domain sharing and
1854 // modification of page tables? Hmm...
1855 //
1856 if ( d != page_get_owner(page) )
1857 BUG();
1859 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
1861 entry->v = v;
1862 entry->gpfn = gpfn;
1863 entry->gmfn = mfn;
1864 entry->writable_pl1e = -1;
1866 #if SHADOW_DEBUG
1867 mark_shadows_as_reflecting_snapshot(d, gpfn);
1868 #endif
1870 // increment guest's ref count to represent the entry in the
1871 // full shadow out-of-sync list.
1872 //
1873 get_page(page, d);
1875 return entry;
1878 struct out_of_sync_entry *
1879 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1880 unsigned long mfn)
1882 struct out_of_sync_entry *entry =
1883 __shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1884 struct domain *d = v->domain;
1886 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1887 // Add to the out-of-sync list
1888 //
1889 entry->next = d->arch.out_of_sync;
1890 d->arch.out_of_sync = entry;
1892 return entry;
1895 void shadow_mark_va_out_of_sync(
1896 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
1898 struct out_of_sync_entry *entry =
1899 __shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1900 l2_pgentry_t sl2e;
1901 struct domain *d = v->domain;
1903 // We need the address of shadow PTE that maps @va.
1904 // It might not exist yet. Make sure it's there.
1905 //
1906 __shadow_get_l2e(v, va, &sl2e);
1907 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1909 // either this L1 isn't shadowed yet, or the shadow isn't linked into
1910 // the current L2.
1911 shadow_map_l1_into_current_l2(va);
1912 __shadow_get_l2e(v, va, &sl2e);
1914 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
1916 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1917 // NB: this is stored as a machine address.
1918 entry->writable_pl1e =
1919 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
1920 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
1921 entry->va = va;
1923 // Increment shadow's page count to represent the reference
1924 // inherent in entry->writable_pl1e
1925 //
1926 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
1927 BUG();
1929 // Add to the out-of-sync list
1930 //
1931 entry->next = d->arch.out_of_sync;
1932 d->arch.out_of_sync = entry;
1934 FSH_LOG("%s(va=%lx -> writable_pl1e=%lx)",
1935 __func__, va, entry->writable_pl1e);
1938 /*
1939 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
1940 * Returns 0 otherwise.
1941 */
1942 static int snapshot_entry_matches(
1943 struct domain *d, l1_pgentry_t *guest_pt,
1944 unsigned long gpfn, unsigned index)
1946 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
1947 l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
1948 int entries_match;
1950 perfc_incrc(snapshot_entry_matches_calls);
1952 if ( !smfn )
1953 return 0;
1955 snapshot = map_domain_page(smfn);
1957 if (__copy_from_user(&gpte, &guest_pt[index],
1958 sizeof(gpte))) {
1959 unmap_domain_page(snapshot);
1960 return 0;
1963 // This could probably be smarter, but this is sufficent for
1964 // our current needs.
1965 //
1966 entries_match = !l1e_has_changed(gpte, snapshot[index],
1967 PAGE_FLAG_MASK);
1969 unmap_domain_page(snapshot);
1971 #ifdef PERF_COUNTERS
1972 if ( entries_match )
1973 perfc_incrc(snapshot_entry_matches_true);
1974 #endif
1976 return entries_match;
1979 /*
1980 * Returns 1 if va's shadow mapping is out-of-sync.
1981 * Returns 0 otherwise.
1982 */
1983 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
1985 struct domain *d = v->domain;
1986 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1987 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1988 l2_pgentry_t l2e;
1989 unsigned long l1pfn, l1mfn;
1991 ASSERT(shadow_lock_is_acquired(d));
1992 ASSERT(VALID_M2P(l2pfn));
1994 perfc_incrc(shadow_out_of_sync_calls);
1996 if ( page_out_of_sync(&frame_table[l2mfn]) &&
1997 !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
1998 l2pfn, l2_table_offset(va)) )
1999 return 1;
2001 __guest_get_l2e(v, va, &l2e);
2002 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
2003 return 0;
2005 l1pfn = l2e_get_pfn(l2e);
2006 l1mfn = __gpfn_to_mfn(d, l1pfn);
2008 // If the l1 pfn is invalid, it can't be out of sync...
2009 if ( !VALID_MFN(l1mfn) )
2010 return 0;
2012 if ( page_out_of_sync(&frame_table[l1mfn]) &&
2013 !snapshot_entry_matches(
2014 d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
2015 l1pfn, l1_table_offset(va)) )
2016 return 1;
2018 return 0;
2021 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
2022 static inline unsigned long
2023 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
2025 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
2028 static inline void
2029 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2031 unsigned long score = prediction & PGT_score_mask;
2032 int create = (score == 0);
2034 // saturating addition
2035 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
2036 score = score ? score : PGT_score_mask;
2038 prediction = (prediction & PGT_mfn_mask) | score;
2040 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
2041 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2043 if ( create )
2044 perfc_incr(writable_pte_predictions);
2047 static inline void
2048 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2050 unsigned long score = prediction & PGT_score_mask;
2051 ASSERT(score);
2053 // divide score by 2... We don't like bad predictions.
2054 //
2055 score = (score >> 1) & PGT_score_mask;
2057 prediction = (prediction & PGT_mfn_mask) | score;
2059 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
2061 if ( score )
2062 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2063 else
2065 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
2066 perfc_decr(writable_pte_predictions);
2070 static void
2071 free_writable_pte_predictions(struct domain *d)
2073 int i;
2074 struct shadow_status *x;
2076 for ( i = 0; i < shadow_ht_buckets; i++ )
2078 u32 count;
2079 unsigned long *gpfn_list;
2081 /* Skip empty buckets. */
2082 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
2083 continue;
2085 count = 0;
2086 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2087 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2088 count++;
2090 gpfn_list = xmalloc_array(unsigned long, count);
2091 count = 0;
2092 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2093 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2094 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
2096 while ( count )
2098 count--;
2099 /* delete_shadow_status() may do a shadow_audit(), so we need to
2100 * keep an accurate count of writable_pte_predictions to keep it
2101 * happy.
2102 */
2103 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
2104 perfc_decr(writable_pte_predictions);
2107 xfree(gpfn_list);
2111 static int fix_entry(
2112 struct domain *d,
2113 l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
2115 l1_pgentry_t old = *pt;
2116 l1_pgentry_t new = old;
2118 l1e_remove_flags(new,_PAGE_RW);
2119 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
2120 BUG();
2121 (*found)++;
2122 *pt = new;
2123 if ( is_l1_shadow )
2124 shadow_put_page_from_l1e(old, d);
2126 return (*found == max_refs_to_find);
2129 static u32 remove_all_write_access_in_ptpage(
2130 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
2131 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
2132 u32 max_refs_to_find, unsigned long prediction)
2134 l1_pgentry_t *pt = map_domain_page(pt_mfn);
2135 l1_pgentry_t match;
2136 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
2137 int i;
2138 u32 found = 0;
2139 int is_l1_shadow =
2140 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
2141 PGT_l1_shadow);
2143 match = l1e_from_pfn(readonly_gmfn, flags);
2145 if ( shadow_mode_external(d) ) {
2146 i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask)
2147 >> PGT_va_shift;
2149 if ( (i >= 0 && i < L1_PAGETABLE_ENTRIES) &&
2150 !l1e_has_changed(pt[i], match, flags) &&
2151 fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
2152 !prediction )
2153 goto out;
2156 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2158 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) &&
2159 fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
2160 break;
2163 out:
2164 unmap_domain_page(pt);
2166 return found;
2169 int shadow_remove_all_write_access(
2170 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
2172 int i;
2173 struct shadow_status *a;
2174 u32 found = 0, write_refs;
2175 unsigned long predicted_smfn;
2177 ASSERT(shadow_lock_is_acquired(d));
2178 ASSERT(VALID_MFN(readonly_gmfn));
2180 perfc_incrc(remove_write_access);
2182 // If it's not a writable page, then no writable refs can be outstanding.
2183 //
2184 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
2185 PGT_writable_page )
2187 perfc_incrc(remove_write_not_writable);
2188 return 1;
2191 // How many outstanding writable PTEs for this page are there?
2192 //
2193 write_refs =
2194 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
2195 if ( write_refs && MFN_PINNED(readonly_gmfn) )
2197 write_refs--;
2200 if ( write_refs == 0 )
2202 perfc_incrc(remove_write_no_work);
2203 return 1;
2206 if ( shadow_mode_external(d) ) {
2207 if (write_refs-- == 0)
2208 return 0;
2210 // Use the back pointer to locate the shadow page that can contain
2211 // the PTE of interest
2212 if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) {
2213 found += remove_all_write_access_in_ptpage(
2214 d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0);
2215 if ( found == write_refs )
2216 return 0;
2220 // Search all the shadow L1 page tables...
2221 //
2222 for (i = 0; i < shadow_ht_buckets; i++)
2224 a = &d->arch.shadow_ht[i];
2225 while ( a && a->gpfn_and_flags )
2227 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
2229 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
2230 if ( found == write_refs )
2231 return 0;
2234 a = a->next;
2238 FSH_LOG("%s: looking for %d refs, found %d refs",
2239 __func__, write_refs, found);
2241 return 0;
2244 static u32 remove_all_access_in_page(
2245 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
2247 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
2248 l1_pgentry_t match, ol2e;
2249 unsigned long flags = _PAGE_PRESENT;
2250 int i;
2251 u32 count = 0;
2252 int is_l1_shadow =
2253 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
2254 PGT_l1_shadow);
2256 match = l1e_from_pfn(forbidden_gmfn, flags);
2258 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2260 if ( l1e_has_changed(pl1e[i], match, flags) )
2261 continue;
2263 ol2e = pl1e[i];
2264 pl1e[i] = l1e_empty();
2265 count++;
2267 if ( is_l1_shadow )
2268 shadow_put_page_from_l1e(ol2e, d);
2269 else /* must be an hl2 page */
2270 put_page(&frame_table[forbidden_gmfn]);
2273 unmap_domain_page(pl1e);
2275 return count;
2278 u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
2280 int i;
2281 struct shadow_status *a;
2282 u32 count = 0;
2284 if ( unlikely(!shadow_mode_enabled(d)) )
2285 return 0;
2287 ASSERT(shadow_lock_is_acquired(d));
2288 perfc_incrc(remove_all_access);
2290 for (i = 0; i < shadow_ht_buckets; i++)
2292 a = &d->arch.shadow_ht[i];
2293 while ( a && a->gpfn_and_flags )
2295 switch (a->gpfn_and_flags & PGT_type_mask)
2297 case PGT_l1_shadow:
2298 case PGT_l2_shadow:
2299 case PGT_l3_shadow:
2300 case PGT_l4_shadow:
2301 case PGT_hl2_shadow:
2302 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
2303 break;
2304 case PGT_snapshot:
2305 case PGT_writable_pred:
2306 // these can't hold refs to the forbidden page
2307 break;
2308 default:
2309 BUG();
2312 a = a->next;
2316 return count;
2319 static int resync_all(struct domain *d, u32 stype)
2321 struct out_of_sync_entry *entry;
2322 unsigned i;
2323 unsigned long smfn;
2324 void *guest, *shadow, *snapshot;
2325 int need_flush = 0, external = shadow_mode_external(d);
2326 int unshadow;
2327 int changed;
2328 u32 min_max_shadow, min_max_snapshot;
2329 int min_shadow, max_shadow, min_snapshot, max_snapshot;
2331 ASSERT(shadow_lock_is_acquired(d));
2333 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2335 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
2336 continue;
2338 smfn = __shadow_status(d, entry->gpfn, stype);
2340 if ( !smfn )
2342 // For heavy weight shadows: no need to update refcounts if
2343 // there's no shadow page.
2344 //
2345 if ( shadow_mode_refcounts(d) )
2346 continue;
2348 // For light weight shadows: only need up resync the refcounts to
2349 // the new contents of the guest page iff this it has the right
2350 // page type.
2351 //
2352 if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info & PGT_type_mask) )
2353 continue;
2356 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
2357 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
2359 // Compare guest's new contents to its snapshot, validating
2360 // and updating its shadow as appropriate.
2361 //
2362 guest = map_domain_page(entry->gmfn);
2363 snapshot = map_domain_page(entry->snapshot_mfn);
2365 if ( smfn )
2366 shadow = map_domain_page(smfn);
2367 else
2368 shadow = NULL;
2370 unshadow = 0;
2372 switch ( stype ) {
2373 case PGT_l1_shadow:
2375 l1_pgentry_t *guest1 = guest;
2376 l1_pgentry_t *shadow1 = shadow;
2377 l1_pgentry_t *snapshot1 = snapshot;
2378 int unshadow_l1 = 0;
2380 ASSERT(shadow_mode_write_l1(d) ||
2381 shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2383 if ( !shadow_mode_refcounts(d) )
2384 revalidate_l1(d, guest1, snapshot1);
2386 if ( !smfn )
2387 break;
2389 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
2390 min_shadow = SHADOW_MIN(min_max_shadow);
2391 max_shadow = SHADOW_MAX(min_max_shadow);
2393 min_max_snapshot =
2394 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
2395 min_snapshot = SHADOW_MIN(min_max_snapshot);
2396 max_snapshot = SHADOW_MAX(min_max_snapshot);
2398 changed = 0;
2400 for ( i = min_shadow; i <= max_shadow; i++ )
2402 if ( (i < min_snapshot) || (i > max_snapshot) ||
2403 l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
2405 int error;
2407 error = validate_pte_change(d, guest1[i], &shadow1[i]);
2408 if ( error == -1 )
2409 unshadow_l1 = 1;
2410 else {
2411 need_flush |= error;
2412 set_guest_back_ptr(d, shadow1[i], smfn, i);
2415 // can't update snapshots of linear page tables -- they
2416 // are used multiple times...
2417 //
2418 // snapshot[i] = new_pte;
2419 changed++;
2422 perfc_incrc(resync_l1);
2423 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
2424 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
2425 if (unshadow_l1) {
2426 l2_pgentry_t l2e;
2428 __shadow_get_l2e(entry->v, entry->va, &l2e);
2429 if (l2e_get_flags(l2e) & _PAGE_PRESENT) {
2430 put_shadow_ref(l2e_get_pfn(l2e));
2431 l2e = l2e_empty();
2432 __shadow_set_l2e(entry->v, entry->va, l2e);
2434 if (entry->v == current)
2435 need_flush = 1;
2439 break;
2441 case PGT_l2_shadow:
2443 int max = -1;
2445 l2_pgentry_t *guest2 = guest;
2446 l2_pgentry_t *shadow2 = shadow;
2447 l2_pgentry_t *snapshot2 = snapshot;
2449 ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2450 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2452 changed = 0;
2453 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2455 l2_pgentry_t new_pde = guest2[i];
2457 if ( !is_guest_l2_slot(0,i) && !external )
2458 continue;
2460 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
2462 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
2464 // can't update snapshots of linear page tables -- they
2465 // are used multiple times...
2466 //
2467 // snapshot[i] = new_pde;
2469 changed++;
2471 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
2472 max = i;
2474 // XXX - This hack works for linux guests.
2475 // Need a better solution long term.
2476 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
2477 unlikely(l2e_get_intpte(new_pde) != 0) &&
2478 !unshadow && MFN_PINNED(smfn) )
2479 unshadow = 1;
2481 if ( max == -1 )
2482 unshadow = 1;
2483 perfc_incrc(resync_l2);
2484 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
2485 break;
2487 case PGT_hl2_shadow:
2489 l2_pgentry_t *guest2 = guest;
2490 l2_pgentry_t *snapshot2 = snapshot;
2491 l1_pgentry_t *shadow2 = shadow;
2493 ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2494 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2496 changed = 0;
2497 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2499 l2_pgentry_t new_pde = guest2[i];
2501 if ( !is_guest_l2_slot(0, i) && !external )
2502 continue;
2504 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
2506 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
2508 // can't update snapshots of linear page tables -- they
2509 // are used multiple times...
2510 //
2511 // snapshot[i] = new_pde;
2513 changed++;
2516 perfc_incrc(resync_hl2);
2517 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
2518 break;
2520 default:
2521 BUG();
2524 if ( smfn )
2525 unmap_domain_page(shadow);
2526 unmap_domain_page(snapshot);
2527 unmap_domain_page(guest);
2529 if ( unlikely(unshadow) )
2531 perfc_incrc(unshadow_l2_count);
2532 shadow_unpin(smfn);
2533 if ( unlikely(shadow_mode_external(d)) )
2535 unsigned long hl2mfn;
2537 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
2538 MFN_PINNED(hl2mfn) )
2539 shadow_unpin(hl2mfn);
2544 return need_flush;
2547 void __shadow_sync_all(struct domain *d)
2549 struct out_of_sync_entry *entry;
2550 int need_flush = 0;
2551 l1_pgentry_t *ppte, opte, npte;
2552 cpumask_t other_vcpus_mask;
2554 perfc_incrc(shadow_sync_all);
2556 ASSERT(shadow_lock_is_acquired(d));
2558 // First, remove all write permissions to the page tables
2559 //
2560 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2562 // Skip entries that have low bits set... Those aren't
2563 // real PTEs.
2564 //
2565 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
2566 continue;
2568 ppte = (l1_pgentry_t *)(
2569 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
2570 (entry->writable_pl1e & ~PAGE_MASK));
2571 opte = npte = *ppte;
2572 l1e_remove_flags(npte, _PAGE_RW);
2574 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
2575 !shadow_get_page_from_l1e(npte, d) )
2576 BUG();
2577 *ppte = npte;
2578 set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT,
2579 (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t));
2580 shadow_put_page_from_l1e(opte, d);
2582 unmap_domain_page(ppte);
2585 /* Other VCPUs mustn't use the revoked writable mappings. */
2586 other_vcpus_mask = d->cpumask;
2587 cpu_clear(smp_processor_id(), other_vcpus_mask);
2588 flush_tlb_mask(other_vcpus_mask);
2590 /* Flush ourself later. */
2591 need_flush = 1;
2593 /* Second, resync all L1 pages, then L2 pages, etc... */
2594 need_flush |= resync_all(d, PGT_l1_shadow);
2595 if ( shadow_mode_translate(d) )
2596 need_flush |= resync_all(d, PGT_hl2_shadow);
2597 need_flush |= resync_all(d, PGT_l2_shadow);
2599 if ( need_flush && !unlikely(shadow_mode_external(d)) )
2600 local_flush_tlb();
2602 free_out_of_sync_state(d);
2605 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
2607 l1_pgentry_t gpte, spte, orig_gpte;
2608 struct vcpu *v = current;
2609 struct domain *d = v->domain;
2610 l2_pgentry_t gpde;
2612 spte = l1e_empty();
2614 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
2615 va, (unsigned long)regs->error_code);
2616 perfc_incrc(shadow_fault_calls);
2618 check_pagetable(v, "pre-sf");
2620 /*
2621 * Don't let someone else take the guest's table pages out-of-sync.
2622 */
2623 shadow_lock(d);
2625 /* XXX - FIX THIS COMMENT!!!
2626 * STEP 1. Check to see if this fault might have been caused by an
2627 * out-of-sync table page entry, or if we should pass this
2628 * fault onto the guest.
2629 */
2630 __shadow_sync_va(v, va);
2632 /*
2633 * STEP 2. Check the guest PTE.
2634 */
2635 __guest_get_l2e(v, va, &gpde);
2636 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2638 SH_VVLOG("shadow_fault - EXIT: L1 not present");
2639 perfc_incrc(shadow_fault_bail_pde_not_present);
2640 goto fail;
2643 // This can't fault because we hold the shadow lock and we've ensured that
2644 // the mapping is in-sync, so the check of the PDE's present bit, above,
2645 // covers this access.
2646 //
2647 orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
2648 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
2650 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
2651 l1e_get_intpte(gpte));
2652 perfc_incrc(shadow_fault_bail_pte_not_present);
2653 goto fail;
2656 /* Write fault? */
2657 if ( regs->error_code & 2 )
2659 int allow_writes = 0;
2661 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
2663 if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
2665 allow_writes = 1;
2666 l1e_add_flags(gpte, _PAGE_RW);
2668 else
2670 /* Write fault on a read-only mapping. */
2671 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
2672 l1e_get_intpte(gpte));
2673 perfc_incrc(shadow_fault_bail_ro_mapping);
2674 goto fail;
2677 else if ( unlikely(!shadow_mode_wr_pt_pte(d) && mfn_is_page_table(l1e_get_pfn(gpte))) )
2679 SH_LOG("l1pte_write_fault: no write access to page table page");
2680 domain_crash_synchronous();
2683 if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
2685 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
2686 perfc_incrc(write_fault_bail);
2687 shadow_unlock(d);
2688 return 0;
2691 if ( allow_writes )
2692 l1e_remove_flags(gpte, _PAGE_RW);
2694 else
2696 if ( !l1pte_read_fault(d, &gpte, &spte) )
2698 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
2699 perfc_incrc(read_fault_bail);
2700 shadow_unlock(d);
2701 return 0;
2705 /*
2706 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
2707 */
2708 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
2710 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
2711 if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
2712 &gpte, sizeof(gpte))) )
2714 printk("%s() failed, crashing domain %d "
2715 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
2716 __func__,d->domain_id, l2e_get_intpte(gpde), va);
2717 domain_crash_synchronous();
2720 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
2723 shadow_set_l1e(va, spte, 1);
2725 perfc_incrc(shadow_fault_fixed);
2726 d->arch.shadow_fault_count++;
2728 shadow_unlock(d);
2730 check_pagetable(v, "post-sf");
2731 return EXCRET_fault_fixed;
2733 fail:
2734 shadow_unlock(d);
2735 return 0;
2738 void shadow_l1_normal_pt_update(
2739 struct domain *d,
2740 unsigned long pa, l1_pgentry_t gpte,
2741 struct domain_mmap_cache *cache)
2743 unsigned long sl1mfn;
2744 l1_pgentry_t *spl1e, spte;
2746 shadow_lock(d);
2748 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
2749 if ( sl1mfn )
2751 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
2752 (void *)pa, l1e_get_intpte(gpte));
2753 l1pte_propagate_from_guest(current->domain, gpte, &spte);
2755 spl1e = map_domain_page_with_cache(sl1mfn, cache);
2756 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
2757 unmap_domain_page_with_cache(spl1e, cache);
2760 shadow_unlock(d);
2763 void shadow_l2_normal_pt_update(
2764 struct domain *d,
2765 unsigned long pa, l2_pgentry_t gpde,
2766 struct domain_mmap_cache *cache)
2768 unsigned long sl2mfn;
2769 l2_pgentry_t *spl2e;
2771 shadow_lock(d);
2773 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
2774 if ( sl2mfn )
2776 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
2777 (void *)pa, l2e_get_intpte(gpde));
2778 spl2e = map_domain_page_with_cache(sl2mfn, cache);
2779 validate_pde_change(d, gpde,
2780 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
2781 unmap_domain_page_with_cache(spl2e, cache);
2784 shadow_unlock(d);
2787 #if CONFIG_PAGING_LEVELS >= 3
2788 void shadow_l3_normal_pt_update(
2789 struct domain *d,
2790 unsigned long pa, l3_pgentry_t gpde,
2791 struct domain_mmap_cache *cache)
2793 BUG(); // not yet implemented
2795 #endif
2797 #if CONFIG_PAGING_LEVELS >= 4
2798 void shadow_l4_normal_pt_update(
2799 struct domain *d,
2800 unsigned long pa, l4_pgentry_t gpde,
2801 struct domain_mmap_cache *cache)
2803 BUG(); // not yet implemented
2805 #endif
2807 int shadow_do_update_va_mapping(unsigned long va,
2808 l1_pgentry_t val,
2809 struct vcpu *v)
2811 struct domain *d = v->domain;
2812 l1_pgentry_t spte;
2813 int rc = 0;
2815 shadow_lock(d);
2817 // This is actually overkill - we don't need to sync the L1 itself,
2818 // just everything involved in getting to this L1 (i.e. we need
2819 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
2820 //
2821 __shadow_sync_va(v, va);
2823 l1pte_propagate_from_guest(d, val, &spte);
2824 shadow_set_l1e(va, spte, 0);
2826 /*
2827 * If we're in log-dirty mode then we need to note that we've updated
2828 * the PTE in the PT-holding page. We need the machine frame number
2829 * for this.
2830 */
2831 __mark_dirty(d, va_to_l1mfn(v, va));
2833 shadow_unlock(d);
2835 return rc;
2839 /*
2840 * What lives where in the 32-bit address space in the various shadow modes,
2841 * and what it uses to get/maintain that mapping.
2843 * SHADOW MODE: none enable translate external
2845 * 4KB things:
2846 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
2847 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
2848 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
2849 * monitor_vtable n/a n/a n/a mapped once
2851 * 4MB things:
2852 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
2853 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
2854 * monitor_linear n/a n/a n/a ???
2855 * perdomain perdomain perdomain perdomain perdomain
2856 * R/O M2P R/O M2P R/O M2P n/a n/a
2857 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
2858 * P2M n/a n/a R/O M2P R/O M2P
2860 * NB:
2861 * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
2862 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
2863 * all play a part in maintaining these mappings.
2864 */
2865 void __update_pagetables(struct vcpu *v)
2867 struct domain *d = v->domain;
2868 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
2869 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
2870 unsigned long smfn, hl2mfn, old_smfn;
2872 int max_mode = ( shadow_mode_external(d) ? SHM_external
2873 : shadow_mode_translate(d) ? SHM_translate
2874 : shadow_mode_enabled(d) ? SHM_enable
2875 : 0 );
2877 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
2878 ASSERT( max_mode );
2880 /*
2881 * arch.guest_vtable
2882 */
2883 if ( max_mode & (SHM_enable | SHM_external) )
2885 if ( likely(v->arch.guest_vtable != NULL) )
2886 unmap_domain_page(v->arch.guest_vtable);
2887 v->arch.guest_vtable = map_domain_page(gmfn);
2890 /*
2891 * arch.shadow_table
2892 */
2893 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
2894 smfn = shadow_l2_table(d, gpfn, gmfn);
2895 if ( !get_shadow_ref(smfn) )
2896 BUG();
2897 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
2898 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
2899 if ( old_smfn )
2900 put_shadow_ref(old_smfn);
2902 SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
2904 /*
2905 * arch.shadow_vtable
2906 */
2907 if ( max_mode == SHM_external )
2909 if ( v->arch.shadow_vtable )
2910 unmap_domain_page(v->arch.shadow_vtable);
2911 v->arch.shadow_vtable = map_domain_page(smfn);
2914 /*
2915 * arch.hl2_vtable
2916 */
2918 // if max_mode == SHM_translate, then the hl2 is already installed
2919 // correctly in its smfn, and there's nothing to do.
2920 //
2921 if ( max_mode == SHM_external )
2923 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
2924 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
2925 if ( v->arch.hl2_vtable )
2926 unmap_domain_page(v->arch.hl2_vtable);
2927 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2930 /*
2931 * fixup pointers in monitor table, as necessary
2932 */
2933 if ( max_mode == SHM_external )
2935 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2936 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2937 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2939 ASSERT( shadow_mode_translate(d) );
2941 if ( !get_shadow_ref(hl2mfn) )
2942 BUG();
2943 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2944 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2945 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2946 put_shadow_ref(l2e_get_pfn(old_hl2e));
2948 if ( !get_shadow_ref(smfn) )
2949 BUG();
2950 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2951 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2952 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2953 put_shadow_ref(l2e_get_pfn(old_sl2e));
2955 // XXX - maybe this can be optimized somewhat??
2956 local_flush_tlb();
2961 /************************************************************************/
2962 /************************************************************************/
2963 /************************************************************************/
2965 #if SHADOW_DEBUG
2967 // The following is entirely for _check_pagetable()'s benefit.
2968 // _check_pagetable() wants to know whether a given entry in a
2969 // shadow page table is supposed to be the shadow of the guest's
2970 // current entry, or the shadow of the entry held in the snapshot
2971 // taken above.
2972 //
2973 // Here, we mark all currently existing entries as reflecting
2974 // the snapshot, above. All other places in xen that update
2975 // the shadow will keep the shadow in sync with the guest's
2976 // entries (via l1pte_propagate_from_guest and friends), which clear
2977 // the SHADOW_REFLECTS_SNAPSHOT bit.
2978 //
2979 static void
2980 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
2982 unsigned long smfn;
2983 l1_pgentry_t *l1e;
2984 l2_pgentry_t *l2e;
2985 unsigned i;
2987 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
2989 l1e = map_domain_page(smfn);
2990 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2991 if ( is_guest_l1_slot(i) &&
2992 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
2993 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
2994 unmap_domain_page(l1e);
2997 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
2999 l2e = map_domain_page(smfn);
3000 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
3001 if ( is_guest_l2_slot(0, i) &&
3002 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
3003 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
3004 unmap_domain_page(l2e);
3008 // BUG: these are not SMP safe...
3009 static int sh_l2_present;
3010 static int sh_l1_present;
3011 static char *sh_check_name;
3012 int shadow_status_noswap;
3014 #define v2m(_v, _adr) ({ \
3015 unsigned long _a = (unsigned long)(_adr); \
3016 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
3017 unsigned long _pa = -1; \
3018 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
3019 { \
3020 l1_pgentry_t _pte; \
3021 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
3022 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
3023 _pa = l1e_get_paddr(_pte); \
3024 } \
3025 _pa | (_a & ~PAGE_MASK); \
3026 })
3028 #define FAIL(_f, _a...) \
3029 do { \
3030 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
3031 sh_check_name, level, l2_idx, l1_idx, ## _a, \
3032 __FILE__, __LINE__); \
3033 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
3034 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
3035 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
3036 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
3037 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
3038 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
3039 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
3040 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
3041 (void *)v2m(v, p_snapshot_pte), \
3042 (l2_idx << L2_PAGETABLE_SHIFT) | \
3043 (l1_idx << L1_PAGETABLE_SHIFT)); \
3044 errors++; \
3045 } while ( 0 )
3047 static int check_pte(
3048 struct vcpu *v,
3049 l1_pgentry_t *p_guest_pte,
3050 l1_pgentry_t *p_shadow_pte,
3051 l1_pgentry_t *p_snapshot_pte,
3052 int level, int l2_idx, int l1_idx)
3054 struct domain *d = v->domain;
3055 l1_pgentry_t guest_pte = *p_guest_pte;
3056 l1_pgentry_t shadow_pte = *p_shadow_pte;
3057 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
3058 l1_pgentry_t eff_guest_pte = l1e_empty();
3059 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
3060 int errors = 0, guest_writable;
3061 int page_table_page;
3063 if ( (l1e_get_intpte(shadow_pte) == 0) ||
3064 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
3065 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
3066 return errors; /* always safe */
3068 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
3069 FAIL("Non zero not present shadow_pte");
3071 if ( level == 2 ) sh_l2_present++;
3072 if ( level == 1 ) sh_l1_present++;
3074 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
3075 eff_guest_pte = snapshot_pte;
3076 else
3077 eff_guest_pte = guest_pte;
3079 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
3080 FAIL("Guest not present yet shadow is");
3082 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
3084 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
3085 FAIL("Corrupt?");
3087 if ( (level == 1) &&
3088 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
3089 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
3090 FAIL("Dirty coherence");
3092 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
3093 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
3094 FAIL("Accessed coherence");
3096 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
3097 FAIL("global bit set in shadow");
3099 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
3100 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
3101 shadow_mfn = l1e_get_pfn(shadow_pte);
3103 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
3104 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
3105 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
3107 page_table_page = mfn_is_page_table(eff_guest_mfn);
3109 guest_writable =
3110 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
3111 (shadow_mode_write_l1(d) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
3113 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
3115 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=%lx page_table_page=%d\n",
3116 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3117 frame_table[eff_guest_mfn].u.inuse.type_info,
3118 page_table_page);
3119 FAIL("RW coherence");
3122 if ( (level == 1) &&
3123 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
3124 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
3126 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=%lx page_table_page=%d\n",
3127 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3128 frame_table[eff_guest_mfn].u.inuse.type_info,
3129 page_table_page);
3130 FAIL("RW2 coherence");
3133 if ( eff_guest_mfn == shadow_mfn )
3135 if ( level > 1 )
3136 FAIL("Linear map ???"); /* XXX this will fail on BSD */
3138 else
3140 if ( level < 2 )
3141 FAIL("Shadow in L1 entry?");
3143 if ( level == 2 )
3145 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
3146 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
3147 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
3149 else
3150 BUG(); // XXX -- not handled yet.
3153 return errors;
3155 #undef FAIL
3156 #undef v2m
3158 static int check_l1_table(
3159 struct vcpu *v, unsigned long gpfn,
3160 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
3162 struct domain *d = v->domain;
3163 int i;
3164 unsigned long snapshot_mfn;
3165 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
3166 int errors = 0;
3168 if ( page_out_of_sync(pfn_to_page(gmfn)) )
3170 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
3171 ASSERT(snapshot_mfn);
3172 p_snapshot = map_domain_page(snapshot_mfn);
3175 p_guest = map_domain_page(gmfn);
3176 p_shadow = map_domain_page(smfn);
3178 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3179 errors += check_pte(v, p_guest+i, p_shadow+i,
3180 p_snapshot ? p_snapshot+i : NULL,
3181 1, l2_idx, i);
3183 unmap_domain_page(p_shadow);
3184 unmap_domain_page(p_guest);
3185 if ( p_snapshot )
3186 unmap_domain_page(p_snapshot);
3188 return errors;
3191 #define FAILPT(_f, _a...) \
3192 do { \
3193 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
3194 errors++; \
3195 } while ( 0 )
3197 int check_l2_table(
3198 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
3200 struct domain *d = v->domain;
3201 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
3202 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
3203 l2_pgentry_t match;
3204 int i;
3205 int errors = 0;
3206 int limit;
3208 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
3209 FAILPT("domain doesn't own page");
3210 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
3211 FAILPT("bogus owner for snapshot page");
3212 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
3213 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
3214 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
3216 #if 0
3217 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3218 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3219 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
3220 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
3222 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3223 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
3224 i++ )
3225 printk("+++ (%d) %lx %lx\n",i,
3226 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
3227 FAILPT("hypervisor entries inconsistent");
3230 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
3231 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
3232 FAILPT("hypervisor linear map inconsistent");
3233 #endif
3235 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
3236 if ( !shadow_mode_external(d) &&
3237 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
3238 match, PAGE_FLAG_MASK))
3240 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
3241 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
3242 L2_PAGETABLE_SHIFT]),
3243 l2e_get_intpte(match));
3246 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
3247 if ( !shadow_mode_external(d) &&
3248 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
3249 match, PAGE_FLAG_MASK))
3251 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
3252 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
3253 d->arch.mm_perdomain_pt,
3254 l2e_get_intpte(match));
3257 #ifdef __i386__
3258 if ( shadow_mode_external(d) )
3259 limit = L2_PAGETABLE_ENTRIES;
3260 else
3261 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3262 #else
3263 limit = 0; /* XXX x86/64 XXX */
3264 #endif
3266 /* Check the whole L2. */
3267 for ( i = 0; i < limit; i++ )
3268 errors += check_pte(v,
3269 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
3270 (l1_pgentry_t*)(&spl2e[i]),
3271 NULL,
3272 2, i, 0);
3274 unmap_domain_page(spl2e);
3275 unmap_domain_page(gpl2e);
3277 #if 1
3278 if ( errors )
3279 printk("check_l2_table returning %d errors\n", errors);
3280 #endif
3282 return errors;
3284 #undef FAILPT
3286 int _check_pagetable(struct vcpu *v, char *s)
3288 struct domain *d = v->domain;
3289 pagetable_t pt = v->arch.guest_table;
3290 unsigned long gptbase = pagetable_get_paddr(pt);
3291 unsigned long ptbase_pfn, smfn;
3292 unsigned long i;
3293 l2_pgentry_t *gpl2e, *spl2e;
3294 unsigned long ptbase_mfn = 0;
3295 int errors = 0, limit, oos_pdes = 0;
3297 //_audit_domain(d, AUDIT_QUIET);
3298 shadow_lock(d);
3300 sh_check_name = s;
3301 //SH_VVLOG("%s-PT Audit", s);
3302 sh_l2_present = sh_l1_present = 0;
3303 perfc_incrc(check_pagetable);
3305 ptbase_mfn = gptbase >> PAGE_SHIFT;
3306 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
3308 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
3310 printk("%s-PT %lx not shadowed\n", s, gptbase);
3311 goto out;
3313 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
3315 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
3316 oos_pdes = 1;
3317 ASSERT(ptbase_mfn);
3320 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
3322 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
3323 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
3325 /* Go back and recurse. */
3326 #ifdef __i386__
3327 if ( shadow_mode_external(d) )
3328 limit = L2_PAGETABLE_ENTRIES;
3329 else
3330 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3331 #else
3332 limit = 0; /* XXX x86/64 XXX */
3333 #endif
3335 for ( i = 0; i < limit; i++ )
3337 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
3338 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
3339 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
3341 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
3343 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
3347 unmap_domain_page(spl2e);
3348 unmap_domain_page(gpl2e);
3350 #if 0
3351 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
3352 sh_l2_present, sh_l1_present);
3353 #endif
3355 out:
3356 if ( errors )
3357 BUG();
3359 shadow_unlock(d);
3361 return errors;
3364 int _check_all_pagetables(struct vcpu *v, char *s)
3366 struct domain *d = v->domain;
3367 int i;
3368 struct shadow_status *a;
3369 unsigned long gmfn;
3370 int errors = 0;
3372 shadow_status_noswap = 1;
3374 sh_check_name = s;
3375 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
3376 sh_l2_present = sh_l1_present = 0;
3377 perfc_incrc(check_all_pagetables);
3379 for (i = 0; i < shadow_ht_buckets; i++)
3381 a = &d->arch.shadow_ht[i];
3382 while ( a && a->gpfn_and_flags )
3384 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
3386 switch ( a->gpfn_and_flags & PGT_type_mask )
3388 case PGT_l1_shadow:
3389 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
3390 gmfn, a->smfn, 0);
3391 break;
3392 case PGT_l2_shadow:
3393 errors += check_l2_table(v, gmfn, a->smfn,
3394 page_out_of_sync(pfn_to_page(gmfn)));
3395 break;
3396 case PGT_l3_shadow:
3397 case PGT_l4_shadow:
3398 case PGT_hl2_shadow:
3399 BUG(); // XXX - ought to fix this...
3400 break;
3401 case PGT_snapshot:
3402 case PGT_writable_pred:
3403 break;
3404 default:
3405 errors++;
3406 printk("unexpected shadow type %lx, gpfn=%lx, "
3407 "gmfn=%lx smfn=%lx\n",
3408 a->gpfn_and_flags & PGT_type_mask,
3409 a->gpfn_and_flags & PGT_mfn_mask,
3410 gmfn, a->smfn);
3411 BUG();
3413 a = a->next;
3417 shadow_status_noswap = 0;
3419 if ( errors )
3420 BUG();
3422 return errors;
3425 #endif // SHADOW_DEBUG
3427 /*
3428 * Local variables:
3429 * mode: C
3430 * c-set-style: "BSD"
3431 * c-basic-offset: 4
3432 * tab-width: 4
3433 * indent-tabs-mode: nil
3434 * End:
3435 */