direct-io.hg

view xen/arch/x86/shadow32.c @ 7966:060a6634d9ec

SHADOW_CONTROL_OP_OFF should be checkign whether shadow mode
is actually currently enabled.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Nov 22 11:53:45 2005 +0100 (2005-11-22)
parents faf5c318f9b0
children c7508abc5b6b
line source
1 /******************************************************************************
2 * arch/x86/shadow.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
34 #define va_to_l1mfn(_ed, _va) \
35 (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
37 static void shadow_free_snapshot(struct domain *d,
38 struct out_of_sync_entry *entry);
39 static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
40 static void free_writable_pte_predictions(struct domain *d);
42 #if SHADOW_DEBUG
43 static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
44 #endif
46 /********
48 There's a per-domain shadow table spin lock which works fine for SMP
49 hosts. We don't have to worry about interrupts as no shadow operations
50 happen in an interrupt context. It's probably not quite ready for SMP
51 guest operation as we have to worry about synchonisation between gpte
52 and spte updates. Its possible that this might only happen in a
53 hypercall context, in which case we'll probably at have a per-domain
54 hypercall lock anyhow (at least initially).
56 ********/
58 static inline int
59 shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
60 unsigned long new_type)
61 {
62 struct pfn_info *page = pfn_to_page(gmfn);
63 int pinned = 0, okay = 1;
65 if ( page_out_of_sync(page) )
66 {
67 // Don't know how long ago this snapshot was taken.
68 // Can't trust it to be recent enough.
69 //
70 __shadow_sync_mfn(d, gmfn);
71 }
73 if ( !shadow_mode_refcounts(d) )
74 return 1;
76 if ( unlikely(page_is_page_table(page)) )
77 return 1;
79 FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
81 if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
82 {
83 FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
84 __func__, gpfn, gmfn);
85 #if 1 || defined(LIVE_DANGEROUSLY)
86 set_bit(_PGC_page_table, &page->count_info);
87 return 1;
88 #endif
89 return 0;
91 }
93 // To convert this page to use as a page table, the writable count
94 // should now be zero. Test this by grabbing the page as an page table,
95 // and then immediately releasing. This will also deal with any
96 // necessary TLB flushing issues for us.
97 //
98 // The cruft here about pinning doesn't really work right. This
99 // needs rethinking/rewriting... Need to gracefully deal with the
100 // TLB flushes required when promoting a writable page, and also deal
101 // with any outstanding (external) writable refs to this page (by
102 // refusing to promote it). The pinning headache complicates this
103 // code -- it would all get much simpler if we stop using
104 // shadow_lock() and move the shadow code to BIGLOCK().
105 //
106 if ( unlikely(!get_page(page, d)) )
107 BUG(); // XXX -- needs more thought for a graceful failure
108 if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
109 {
110 pinned = 1;
111 put_page_and_type(page);
112 }
113 if ( get_page_type(page, PGT_base_page_table) )
114 {
115 set_bit(_PGC_page_table, &page->count_info);
116 put_page_type(page);
117 }
118 else
119 {
120 printk("shadow_promote: get_page_type failed "
121 "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
122 d->domain_id, gpfn, gmfn, new_type);
123 okay = 0;
124 }
126 // Now put the type back to writable...
127 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
128 BUG(); // XXX -- needs more thought for a graceful failure
129 if ( unlikely(pinned) )
130 {
131 if ( unlikely(test_and_set_bit(_PGT_pinned,
132 &page->u.inuse.type_info)) )
133 BUG(); // hmm... someone pinned this again?
134 }
135 else
136 put_page_and_type(page);
138 return okay;
139 }
141 static inline void
142 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
143 {
144 if ( !shadow_mode_refcounts(d) )
145 return;
147 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
149 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
150 {
151 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
153 if ( page_out_of_sync(pfn_to_page(gmfn)) )
154 {
155 remove_out_of_sync_entries(d, gmfn);
156 }
157 }
158 }
160 /*
161 * Things in shadow mode that collect get_page() refs to the domain's
162 * pages are:
163 * - PGC_allocated takes a gen count, just like normal.
164 * - A writable page can be pinned (paravirtualized guests may consider
165 * these pages to be L1s or L2s, and don't know the difference).
166 * Pinning a page takes a gen count (but, for domains in shadow mode,
167 * it *doesn't* take a type count)
168 * - CR3 grabs a ref to whatever it points at, just like normal.
169 * - Shadow mode grabs an initial gen count for itself, as a placehold
170 * for whatever references will exist.
171 * - Shadow PTEs that point to a page take a gen count, just like regular
172 * PTEs. However, they don't get a type count, as get_page_type() is
173 * hardwired to keep writable pages' counts at 1 for domains in shadow
174 * mode.
175 * - Whenever we shadow a page, the entry in the shadow hash grabs a
176 * general ref to the page.
177 * - Whenever a page goes out of sync, the out of sync entry grabs a
178 * general ref to the page.
179 */
180 /*
181 * pfn_info fields for pages allocated as shadow pages:
182 *
183 * All 32 bits of count_info are a simple count of refs to this shadow
184 * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
185 * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
186 * references.
187 *
188 * u.inuse._domain is left NULL, to prevent accidently allow some random
189 * domain from gaining permissions to map this page.
190 *
191 * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
192 * shadowed.
193 * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
194 * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
195 * is currently exists because this is a shadow of a root page, and we
196 * don't want to let those disappear just because no CR3 is currently pointing
197 * at it.
198 *
199 * tlbflush_timestamp holds a min & max index of valid page table entries
200 * within the shadow page.
201 */
203 static inline unsigned long
204 alloc_shadow_page(struct domain *d,
205 unsigned long gpfn, unsigned long gmfn,
206 u32 psh_type)
207 {
208 struct pfn_info *page;
209 unsigned long smfn;
210 int pin = 0;
212 // Currently, we only keep pre-zero'ed pages around for use as L1's...
213 // This will change. Soon.
214 //
215 if ( psh_type == PGT_l1_shadow )
216 {
217 if ( !list_empty(&d->arch.free_shadow_frames) )
218 {
219 struct list_head *entry = d->arch.free_shadow_frames.next;
220 page = list_entry(entry, struct pfn_info, list);
221 list_del(entry);
222 perfc_decr(free_l1_pages);
223 }
224 else
225 {
226 page = alloc_domheap_page(NULL);
227 void *l1 = map_domain_page(page_to_pfn(page));
228 memset(l1, 0, PAGE_SIZE);
229 unmap_domain_page(l1);
230 }
231 }
232 else
233 page = alloc_domheap_page(NULL);
235 if ( unlikely(page == NULL) )
236 {
237 printk("Couldn't alloc shadow page! dom%d count=%d\n",
238 d->domain_id, d->arch.shadow_page_count);
239 printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
240 perfc_value(shadow_l1_pages),
241 perfc_value(shadow_l2_pages),
242 perfc_value(hl2_table_pages),
243 perfc_value(snapshot_pages));
244 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
245 }
247 smfn = page_to_pfn(page);
249 ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
250 page->u.inuse.type_info = psh_type | gmfn;
251 page->count_info = 0;
252 page->tlbflush_timestamp = 0;
254 switch ( psh_type )
255 {
256 case PGT_l1_shadow:
257 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
258 goto fail;
259 perfc_incr(shadow_l1_pages);
260 d->arch.shadow_page_count++;
261 break;
263 case PGT_l2_shadow:
264 if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
265 goto fail;
266 perfc_incr(shadow_l2_pages);
267 d->arch.shadow_page_count++;
268 if ( PGT_l2_page_table == PGT_root_page_table )
269 pin = 1;
271 break;
273 case PGT_hl2_shadow:
274 // Treat an hl2 as an L1 for purposes of promotion.
275 // For external mode domains, treat them as an L2 for purposes of
276 // pinning.
277 //
278 if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
279 goto fail;
280 perfc_incr(hl2_table_pages);
281 d->arch.hl2_page_count++;
282 if ( shadow_mode_external(d) &&
283 (PGT_l2_page_table == PGT_root_page_table) )
284 pin = 1;
286 break;
288 case PGT_snapshot:
289 perfc_incr(snapshot_pages);
290 d->arch.snapshot_page_count++;
291 break;
293 default:
294 printk("Alloc shadow weird page type type=%08x\n", psh_type);
295 BUG();
296 break;
297 }
299 // Don't add a new shadow of something that already has a snapshot.
300 //
301 ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
303 set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
305 if ( pin )
306 shadow_pin(smfn);
308 return smfn;
310 fail:
311 FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
312 gpfn, gmfn);
313 free_domheap_page(page);
314 return 0;
315 }
317 static void inline
318 free_shadow_l1_table(struct domain *d, unsigned long smfn)
319 {
320 l1_pgentry_t *pl1e = map_domain_page(smfn);
321 int i;
322 struct pfn_info *spage = pfn_to_page(smfn);
323 u32 min_max = spage->tlbflush_timestamp;
324 int min = SHADOW_MIN(min_max);
325 int max = SHADOW_MAX(min_max);
327 for ( i = min; i <= max; i++ )
328 {
329 shadow_put_page_from_l1e(pl1e[i], d);
330 pl1e[i] = l1e_empty();
331 }
333 unmap_domain_page(pl1e);
334 }
336 static void inline
337 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
338 {
339 l1_pgentry_t *hl2 = map_domain_page(smfn);
340 int i, limit;
342 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
344 #ifdef __i386__
345 if ( shadow_mode_external(d) )
346 limit = L2_PAGETABLE_ENTRIES;
347 else
348 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
349 #else
350 limit = 0; /* XXX x86/64 XXX */
351 #endif
353 for ( i = 0; i < limit; i++ )
354 {
355 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
356 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
357 }
359 unmap_domain_page(hl2);
360 }
362 static void inline
363 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
364 {
365 l2_pgentry_t *pl2e = map_domain_page(smfn);
366 int i, external = shadow_mode_external(d);
368 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
369 if ( external || is_guest_l2_slot(type, i) )
370 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
371 put_shadow_ref(l2e_get_pfn(pl2e[i]));
373 if ( (PGT_base_page_table == PGT_l2_page_table) &&
374 shadow_mode_translate(d) && !external )
375 {
376 // free the ref to the hl2
377 //
378 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
379 }
381 unmap_domain_page(pl2e);
382 }
384 void free_shadow_page(unsigned long smfn)
385 {
386 struct pfn_info *page = &frame_table[smfn];
387 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
388 struct domain *d = page_get_owner(pfn_to_page(gmfn));
389 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
390 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
392 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
394 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
396 delete_shadow_status(d, gpfn, gmfn, type);
398 switch ( type )
399 {
400 case PGT_l1_shadow:
401 perfc_decr(shadow_l1_pages);
402 shadow_demote(d, gpfn, gmfn);
403 free_shadow_l1_table(d, smfn);
404 d->arch.shadow_page_count--;
405 break;
407 case PGT_l2_shadow:
408 perfc_decr(shadow_l2_pages);
409 shadow_demote(d, gpfn, gmfn);
410 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
411 d->arch.shadow_page_count--;
412 break;
414 case PGT_hl2_shadow:
415 perfc_decr(hl2_table_pages);
416 shadow_demote(d, gpfn, gmfn);
417 free_shadow_hl2_table(d, smfn);
418 d->arch.hl2_page_count--;
419 break;
421 case PGT_snapshot:
422 perfc_decr(snapshot_pages);
423 d->arch.snapshot_page_count--;
424 break;
426 default:
427 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
428 page_to_pfn(page), page->u.inuse.type_info);
429 break;
430 }
432 // No TLB flushes are needed the next time this page gets allocated.
433 //
434 page->tlbflush_timestamp = 0;
435 page->u.free.cpumask = CPU_MASK_NONE;
437 if ( type == PGT_l1_shadow )
438 {
439 list_add(&page->list, &d->arch.free_shadow_frames);
440 perfc_incr(free_l1_pages);
441 }
442 else
443 free_domheap_page(page);
444 }
446 void
447 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
448 {
449 unsigned long smfn;
451 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
453 shadow_lock(d);
455 while ( stype >= PGT_l1_shadow )
456 {
457 smfn = __shadow_status(d, gpfn, stype);
458 if ( smfn && MFN_PINNED(smfn) )
459 shadow_unpin(smfn);
460 stype -= PGT_l1_shadow;
461 }
463 shadow_unlock(d);
464 }
466 static void inline
467 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
468 {
469 struct pfn_info *page;
471 page = &frame_table[entry->gmfn];
473 // Decrement ref count of guest & shadow pages
474 //
475 put_page(page);
477 // Only use entries that have low bits clear...
478 //
479 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
480 {
481 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
482 entry->writable_pl1e = -2;
483 }
484 else
485 ASSERT( entry->writable_pl1e == -1 );
487 // Free the snapshot
488 //
489 shadow_free_snapshot(d, entry);
490 }
492 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
493 {
494 struct out_of_sync_entry *entry = d->arch.out_of_sync;
495 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
496 struct out_of_sync_entry *found = NULL;
498 // NB: Be careful not to call something that manipulates this list
499 // while walking it. Collect the results into a separate list
500 // first, then walk that list.
501 //
502 while ( entry )
503 {
504 if ( entry->gmfn == gmfn )
505 {
506 // remove from out of sync list
507 *prev = entry->next;
509 // add to found list
510 entry->next = found;
511 found = entry;
513 entry = *prev;
514 continue;
515 }
516 prev = &entry->next;
517 entry = entry->next;
518 }
520 prev = NULL;
521 entry = found;
522 while ( entry )
523 {
524 release_out_of_sync_entry(d, entry);
526 prev = &entry->next;
527 entry = entry->next;
528 }
530 // Add found list to free list
531 if ( prev )
532 {
533 *prev = d->arch.out_of_sync_free;
534 d->arch.out_of_sync_free = found;
535 }
536 }
538 static void free_out_of_sync_state(struct domain *d)
539 {
540 struct out_of_sync_entry *entry;
542 // NB: Be careful not to call something that manipulates this list
543 // while walking it. Remove one item at a time, and always
544 // restart from start of list.
545 //
546 while ( (entry = d->arch.out_of_sync) )
547 {
548 d->arch.out_of_sync = entry->next;
549 release_out_of_sync_entry(d, entry);
551 entry->next = d->arch.out_of_sync_free;
552 d->arch.out_of_sync_free = entry;
553 }
554 }
556 static void free_shadow_pages(struct domain *d)
557 {
558 int i;
559 struct shadow_status *x;
560 struct vcpu *v;
562 /*
563 * WARNING! The shadow page table must not currently be in use!
564 * e.g., You are expected to have paused the domain and synchronized CR3.
565 */
567 if( !d->arch.shadow_ht ) return;
569 shadow_audit(d, 1);
571 // first, remove any outstanding refs from out_of_sync entries...
572 //
573 free_out_of_sync_state(d);
575 // second, remove any outstanding refs from v->arch.shadow_table
576 // and CR3.
577 //
578 for_each_vcpu(d, v)
579 {
580 if ( pagetable_get_paddr(v->arch.shadow_table) )
581 {
582 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
583 v->arch.shadow_table = mk_pagetable(0);
584 }
586 if ( v->arch.monitor_shadow_ref )
587 {
588 put_shadow_ref(v->arch.monitor_shadow_ref);
589 v->arch.monitor_shadow_ref = 0;
590 }
591 }
593 // For external shadows, remove the monitor table's refs
594 //
595 if ( shadow_mode_external(d) )
596 {
597 for_each_vcpu(d, v)
598 {
599 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
601 if ( mpl2e )
602 {
603 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
604 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
606 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
607 {
608 put_shadow_ref(l2e_get_pfn(hl2e));
609 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
610 }
611 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
612 {
613 put_shadow_ref(l2e_get_pfn(smfn));
614 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
615 }
616 }
617 }
618 }
620 // Now, the only refs to shadow pages that are left are from the shadow
621 // pages themselves. We just unpin the pinned pages, and the rest
622 // should automatically disappear.
623 //
624 // NB: Beware: each explicitly or implicit call to free_shadow_page
625 // can/will result in the hash bucket getting rewritten out from
626 // under us... First, collect the list of pinned pages, then
627 // free them.
628 //
629 // FIXME: it would be good to just free all the pages referred to in
630 // the hash table without going through each of them to decrement their
631 // reference counts. In shadow_mode_refcount(), we've gotta do the hard
632 // work, but only for L1 shadows. If we're not in refcount mode, then
633 // there's no real hard work to do at all. Need to be careful with the
634 // writable_pte_predictions and snapshot entries in the hash table, but
635 // that's about it.
636 //
637 for ( i = 0; i < shadow_ht_buckets; i++ )
638 {
639 u32 count;
640 unsigned long *mfn_list;
642 /* Skip empty buckets. */
643 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
644 continue;
646 count = 0;
648 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
649 /* Skip entries that are writable_pred) */
650 switch(x->gpfn_and_flags & PGT_type_mask){
651 case PGT_l1_shadow:
652 case PGT_l2_shadow:
653 case PGT_l3_shadow:
654 case PGT_l4_shadow:
655 case PGT_hl2_shadow:
656 if ( MFN_PINNED(x->smfn) )
657 count++;
658 break;
659 case PGT_snapshot:
660 case PGT_writable_pred:
661 break;
662 default:
663 BUG();
665 }
666 }
668 if ( !count )
669 continue;
671 mfn_list = xmalloc_array(unsigned long, count);
672 count = 0;
673 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next ) {
674 /* Skip entries that are writable_pred) */
675 switch(x->gpfn_and_flags & PGT_type_mask){
676 case PGT_l1_shadow:
677 case PGT_l2_shadow:
678 case PGT_l3_shadow:
679 case PGT_l4_shadow:
680 case PGT_hl2_shadow:
681 if ( MFN_PINNED(x->smfn) )
682 mfn_list[count++] = x->smfn;
683 break;
684 case PGT_snapshot:
685 case PGT_writable_pred:
686 break;
687 default:
688 BUG();
690 }
691 }
693 while ( count )
694 {
695 shadow_unpin(mfn_list[--count]);
696 }
697 xfree(mfn_list);
698 }
700 // Now free the pre-zero'ed pages from the domain
701 //
702 struct list_head *list_ent, *tmp;
703 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
704 {
705 list_del(list_ent);
706 perfc_decr(free_l1_pages);
708 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
709 free_domheap_page(page);
710 }
712 shadow_audit(d, 0);
714 SH_VLOG("Free shadow table.");
715 }
717 void shadow_mode_init(void)
718 {
719 }
721 int _shadow_mode_refcounts(struct domain *d)
722 {
723 return shadow_mode_refcounts(d);
724 }
726 static void alloc_monitor_pagetable(struct vcpu *v)
727 {
728 unsigned long mmfn;
729 l2_pgentry_t *mpl2e;
730 struct pfn_info *mmfn_info;
731 struct domain *d = v->domain;
733 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
735 mmfn_info = alloc_domheap_page(NULL);
736 ASSERT(mmfn_info != NULL);
738 mmfn = page_to_pfn(mmfn_info);
739 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
740 memset(mpl2e, 0, PAGE_SIZE);
742 #ifdef __i386__ /* XXX screws x86/64 build */
743 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
744 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
745 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
746 #endif
748 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
749 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
750 __PAGE_HYPERVISOR);
752 // map the phys_to_machine map into the Read-Only MPT space for this domain
753 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
754 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
755 __PAGE_HYPERVISOR);
757 // Don't (yet) have mappings for these...
758 // Don't want to accidentally see the idle_pg_table's linear mapping.
759 //
760 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
761 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
763 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
764 v->arch.monitor_vtable = mpl2e;
765 }
767 /*
768 * Free the pages for monitor_table and hl2_table
769 */
770 void free_monitor_pagetable(struct vcpu *v)
771 {
772 l2_pgentry_t *mpl2e, hl2e, sl2e;
773 unsigned long mfn;
775 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
777 mpl2e = v->arch.monitor_vtable;
779 /*
780 * First get the mfn for hl2_table by looking at monitor_table
781 */
782 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
783 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
784 {
785 mfn = l2e_get_pfn(hl2e);
786 ASSERT(mfn);
787 put_shadow_ref(mfn);
788 }
790 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
791 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
792 {
793 mfn = l2e_get_pfn(sl2e);
794 ASSERT(mfn);
795 put_shadow_ref(mfn);
796 }
798 unmap_domain_page(mpl2e);
800 /*
801 * Then free monitor_table.
802 * Note: for VMX guest, only BSP need do this free.
803 */
804 if (!(VMX_DOMAIN(v) && v->vcpu_id)) {
805 mfn = pagetable_get_pfn(v->arch.monitor_table);
806 unmap_domain_page(v->arch.monitor_vtable);
807 free_domheap_page(&frame_table[mfn]);
808 }
810 v->arch.monitor_table = mk_pagetable(0);
811 v->arch.monitor_vtable = 0;
812 }
814 int
815 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
816 struct domain_mmap_cache *l2cache,
817 struct domain_mmap_cache *l1cache)
818 {
819 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
820 l2_pgentry_t *l2, l2e;
821 l1_pgentry_t *l1;
822 struct pfn_info *l1page;
823 unsigned long va = pfn << PAGE_SHIFT;
825 ASSERT(tabpfn != 0);
826 ASSERT(shadow_lock_is_acquired(d));
828 l2 = map_domain_page_with_cache(tabpfn, l2cache);
829 l2e = l2[l2_table_offset(va)];
830 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
831 {
832 l1page = alloc_domheap_page(NULL);
833 if ( !l1page )
834 {
835 unmap_domain_page_with_cache(l2, l2cache);
836 return 0;
837 }
839 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
840 memset(l1, 0, PAGE_SIZE);
841 unmap_domain_page_with_cache(l1, l1cache);
843 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
844 l2[l2_table_offset(va)] = l2e;
845 }
846 unmap_domain_page_with_cache(l2, l2cache);
848 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
849 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
850 unmap_domain_page_with_cache(l1, l1cache);
852 return 1;
853 }
855 static int
856 alloc_p2m_table(struct domain *d)
857 {
858 struct list_head *list_ent;
859 struct pfn_info *page, *l2page;
860 l2_pgentry_t *l2;
861 unsigned long mfn, pfn;
862 struct domain_mmap_cache l1cache, l2cache;
864 l2page = alloc_domheap_page(NULL);
865 if ( l2page == NULL )
866 return 0;
868 domain_mmap_cache_init(&l1cache);
869 domain_mmap_cache_init(&l2cache);
871 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
872 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
873 memset(l2, 0, PAGE_SIZE);
874 unmap_domain_page_with_cache(l2, &l2cache);
876 list_ent = d->page_list.next;
877 while ( list_ent != &d->page_list )
878 {
879 page = list_entry(list_ent, struct pfn_info, list);
880 mfn = page_to_pfn(page);
881 pfn = get_pfn_from_mfn(mfn);
882 ASSERT(pfn != INVALID_M2P_ENTRY);
883 ASSERT(pfn < (1u<<20));
885 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
887 list_ent = page->list.next;
888 }
890 list_ent = d->xenpage_list.next;
891 while ( list_ent != &d->xenpage_list )
892 {
893 page = list_entry(list_ent, struct pfn_info, list);
894 mfn = page_to_pfn(page);
895 pfn = get_pfn_from_mfn(mfn);
896 if ( (pfn != INVALID_M2P_ENTRY) &&
897 (pfn < (1u<<20)) )
898 {
899 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
900 }
902 list_ent = page->list.next;
903 }
905 domain_mmap_cache_destroy(&l2cache);
906 domain_mmap_cache_destroy(&l1cache);
908 return 1;
909 }
911 static void
912 free_p2m_table(struct domain *d)
913 {
914 // uh, this needs some work... :)
915 BUG();
916 }
918 int __shadow_mode_enable(struct domain *d, unsigned int mode)
919 {
920 struct vcpu *v;
921 int new_modes = (mode & ~d->arch.shadow_mode);
923 if(!new_modes) /* Nothing to do - return success */
924 return 0;
926 // can't take anything away by calling this function.
927 ASSERT(!(d->arch.shadow_mode & ~mode));
929 for_each_vcpu(d, v)
930 {
931 invalidate_shadow_ldt(v);
933 // We need to set these up for __update_pagetables().
934 // See the comment there.
936 /*
937 * arch.guest_vtable
938 */
939 if ( v->arch.guest_vtable &&
940 (v->arch.guest_vtable != __linear_l2_table) )
941 {
942 unmap_domain_page(v->arch.guest_vtable);
943 }
944 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
945 v->arch.guest_vtable = __linear_l2_table;
946 else
947 v->arch.guest_vtable = NULL;
949 /*
950 * arch.shadow_vtable
951 */
952 if ( v->arch.shadow_vtable &&
953 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
954 {
955 unmap_domain_page(v->arch.shadow_vtable);
956 }
957 if ( !(mode & SHM_external) )
958 v->arch.shadow_vtable = __shadow_linear_l2_table;
959 else
960 v->arch.shadow_vtable = NULL;
962 /*
963 * arch.hl2_vtable
964 */
965 if ( v->arch.hl2_vtable &&
966 (v->arch.hl2_vtable != __linear_hl2_table) )
967 {
968 unmap_domain_page(v->arch.hl2_vtable);
969 }
970 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
971 v->arch.hl2_vtable = __linear_hl2_table;
972 else
973 v->arch.hl2_vtable = NULL;
975 /*
976 * arch.monitor_table & arch.monitor_vtable
977 */
978 if ( v->arch.monitor_vtable )
979 {
980 free_monitor_pagetable(v);
981 }
982 if ( mode & SHM_external )
983 {
984 alloc_monitor_pagetable(v);
985 }
986 }
988 if ( new_modes & SHM_enable )
989 {
990 ASSERT( !d->arch.shadow_ht );
991 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
992 if ( d->arch.shadow_ht == NULL )
993 goto nomem;
995 memset(d->arch.shadow_ht, 0,
996 shadow_ht_buckets * sizeof(struct shadow_status));
997 }
999 if ( new_modes & SHM_log_dirty )
1001 ASSERT( !d->arch.shadow_dirty_bitmap );
1002 d->arch.shadow_dirty_bitmap_size =
1003 (d->shared_info->arch.max_pfn + 63) & ~63;
1004 d->arch.shadow_dirty_bitmap =
1005 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
1006 (8 * sizeof(unsigned long)));
1007 if ( d->arch.shadow_dirty_bitmap == NULL )
1009 d->arch.shadow_dirty_bitmap_size = 0;
1010 goto nomem;
1012 memset(d->arch.shadow_dirty_bitmap, 0,
1013 d->arch.shadow_dirty_bitmap_size/8);
1016 if ( new_modes & SHM_translate )
1018 if ( !(new_modes & SHM_external) )
1020 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
1021 if ( !alloc_p2m_table(d) )
1023 printk("alloc_p2m_table failed (out-of-memory?)\n");
1024 goto nomem;
1027 else
1029 // external guests provide their own memory for their P2M maps.
1030 //
1031 ASSERT( d == page_get_owner(
1032 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
1036 // Get rid of any shadow pages from any previous shadow mode.
1037 //
1038 free_shadow_pages(d);
1040 /*
1041 * Tear down it's counts by disassembling its page-table-based ref counts.
1042 * Also remove CR3's gcount/tcount.
1043 * That leaves things like GDTs and LDTs and external refs in tact.
1045 * Most pages will be writable tcount=0.
1046 * Some will still be L1 tcount=0 or L2 tcount=0.
1047 * Maybe some pages will be type none tcount=0.
1048 * Pages granted external writable refs (via grant tables?) will
1049 * still have a non-zero tcount. That's OK.
1051 * gcounts will generally be 1 for PGC_allocated.
1052 * GDTs and LDTs will have additional gcounts.
1053 * Any grant-table based refs will still be in the gcount.
1055 * We attempt to grab writable refs to each page (thus setting its type).
1056 * Immediately put back those type refs.
1058 * Assert that no pages are left with L1/L2/L3/L4 type.
1059 */
1060 audit_adjust_pgtables(d, -1, 1);
1062 d->arch.shadow_mode = mode;
1064 if ( shadow_mode_refcounts(d) )
1066 struct list_head *list_ent = d->page_list.next;
1067 while ( list_ent != &d->page_list )
1069 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1070 if ( !get_page_type(page, PGT_writable_page) )
1071 BUG();
1072 put_page_type(page);
1073 /*
1074 * We use tlbflush_timestamp as back pointer to smfn, and need to
1075 * clean up it.
1076 */
1077 if ( shadow_mode_external(d) )
1078 page->tlbflush_timestamp = 0;
1079 list_ent = page->list.next;
1083 audit_adjust_pgtables(d, 1, 1);
1085 return 0;
1087 nomem:
1088 if ( (new_modes & SHM_enable) )
1090 xfree(d->arch.shadow_ht);
1091 d->arch.shadow_ht = NULL;
1093 if ( (new_modes & SHM_log_dirty) )
1095 xfree(d->arch.shadow_dirty_bitmap);
1096 d->arch.shadow_dirty_bitmap = NULL;
1098 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1099 pagetable_get_paddr(d->arch.phys_table) )
1101 free_p2m_table(d);
1103 return -ENOMEM;
1106 int shadow_mode_enable(struct domain *d, unsigned int mode)
1108 int rc;
1109 shadow_lock(d);
1110 rc = __shadow_mode_enable(d, mode);
1111 shadow_unlock(d);
1112 return rc;
1115 static void
1116 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1118 int i;
1119 l1_pgentry_t *l1;
1121 l1 = map_domain_page(l1mfn);
1122 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1124 if ( is_guest_l1_slot(i) &&
1125 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1127 unsigned long mfn = l1e_get_pfn(l1[i]);
1128 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1129 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1130 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1133 unmap_domain_page(l1);
1136 // This is not general enough to handle arbitrary pagetables
1137 // with shared L1 pages, etc., but it is sufficient for bringing
1138 // up dom0.
1139 //
1140 void
1141 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1142 unsigned int type)
1144 int i;
1145 l2_pgentry_t *l2;
1147 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1149 l2 = map_domain_page(l2mfn);
1150 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1152 if ( is_guest_l2_slot(type, i) &&
1153 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1155 unsigned long mfn = l2e_get_pfn(l2[i]);
1156 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1157 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1158 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1159 translate_l1pgtable(d, p2m, mfn);
1162 unmap_domain_page(l2);
1165 static void free_shadow_ht_entries(struct domain *d)
1167 struct shadow_status *x, *n;
1169 SH_VLOG("freed tables count=%d l1=%d l2=%d",
1170 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
1171 perfc_value(shadow_l2_pages));
1173 n = d->arch.shadow_ht_extras;
1174 while ( (x = n) != NULL )
1176 d->arch.shadow_extras_count--;
1177 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
1178 xfree(x);
1181 d->arch.shadow_ht_extras = NULL;
1182 d->arch.shadow_ht_free = NULL;
1184 ASSERT(d->arch.shadow_extras_count == 0);
1185 SH_VLOG("freed extras, now %d", d->arch.shadow_extras_count);
1187 if ( d->arch.shadow_dirty_bitmap != NULL )
1189 xfree(d->arch.shadow_dirty_bitmap);
1190 d->arch.shadow_dirty_bitmap = 0;
1191 d->arch.shadow_dirty_bitmap_size = 0;
1194 xfree(d->arch.shadow_ht);
1195 d->arch.shadow_ht = NULL;
1198 static void free_out_of_sync_entries(struct domain *d)
1200 struct out_of_sync_entry *x, *n;
1202 n = d->arch.out_of_sync_extras;
1203 while ( (x = n) != NULL )
1205 d->arch.out_of_sync_extras_count--;
1206 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
1207 xfree(x);
1210 d->arch.out_of_sync_extras = NULL;
1211 d->arch.out_of_sync_free = NULL;
1212 d->arch.out_of_sync = NULL;
1214 ASSERT(d->arch.out_of_sync_extras_count == 0);
1215 FSH_LOG("freed extra out_of_sync entries, now %d",
1216 d->arch.out_of_sync_extras_count);
1219 void __shadow_mode_disable(struct domain *d)
1221 if ( unlikely(!shadow_mode_enabled(d)) )
1222 return;
1224 free_shadow_pages(d);
1225 free_writable_pte_predictions(d);
1227 #ifndef NDEBUG
1228 int i;
1229 for ( i = 0; i < shadow_ht_buckets; i++ )
1231 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
1233 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
1234 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
1235 BUG();
1238 #endif
1240 d->arch.shadow_mode = 0;
1242 free_shadow_ht_entries(d);
1243 free_out_of_sync_entries(d);
1245 struct vcpu *v;
1246 for_each_vcpu(d, v)
1248 update_pagetables(v);
1252 static int shadow_mode_table_op(
1253 struct domain *d, dom0_shadow_control_t *sc)
1255 unsigned int op = sc->op;
1256 int i, rc = 0;
1257 struct vcpu *v;
1259 ASSERT(shadow_lock_is_acquired(d));
1261 SH_VLOG("shadow mode table op %lx %lx count %d",
1262 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1263 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1264 d->arch.shadow_page_count);
1266 shadow_audit(d, 1);
1268 switch ( op )
1270 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1271 free_shadow_pages(d);
1273 d->arch.shadow_fault_count = 0;
1274 d->arch.shadow_dirty_count = 0;
1275 d->arch.shadow_dirty_net_count = 0;
1276 d->arch.shadow_dirty_block_count = 0;
1278 break;
1280 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1281 free_shadow_pages(d);
1283 sc->stats.fault_count = d->arch.shadow_fault_count;
1284 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1285 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1286 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1288 d->arch.shadow_fault_count = 0;
1289 d->arch.shadow_dirty_count = 0;
1290 d->arch.shadow_dirty_net_count = 0;
1291 d->arch.shadow_dirty_block_count = 0;
1293 if ( (sc->dirty_bitmap == NULL) ||
1294 (d->arch.shadow_dirty_bitmap == NULL) )
1296 rc = -EINVAL;
1297 break;
1300 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1301 sc->pages = d->arch.shadow_dirty_bitmap_size;
1303 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1304 for ( i = 0; i < sc->pages; i += chunk )
1306 int bytes = ((((sc->pages - i) > chunk) ?
1307 chunk : (sc->pages - i)) + 7) / 8;
1309 if (copy_to_user(
1310 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1311 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1312 bytes))
1314 rc = -EINVAL;
1315 break;
1318 memset(
1319 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1320 0, bytes);
1323 break;
1325 case DOM0_SHADOW_CONTROL_OP_PEEK:
1326 sc->stats.fault_count = d->arch.shadow_fault_count;
1327 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1328 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1329 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1332 if ( (sc->dirty_bitmap == NULL) ||
1333 (d->arch.shadow_dirty_bitmap == NULL) )
1335 rc = -EINVAL;
1336 break;
1339 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1340 sc->pages = d->arch.shadow_dirty_bitmap_size;
1342 if (copy_to_user(sc->dirty_bitmap,
1343 d->arch.shadow_dirty_bitmap, (sc->pages+7)/8))
1345 rc = -EINVAL;
1346 break;
1349 break;
1351 default:
1352 rc = -EINVAL;
1353 break;
1356 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1357 shadow_audit(d, 1);
1359 for_each_vcpu(d,v)
1360 __update_pagetables(v);
1362 return rc;
1365 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1367 unsigned int op = sc->op;
1368 int rc = 0;
1369 struct vcpu *v;
1371 if ( unlikely(d == current->domain) )
1373 DPRINTK("Don't try to do a shadow op on yourself!\n");
1374 return -EINVAL;
1377 domain_pause(d);
1379 shadow_lock(d);
1381 switch ( op )
1383 case DOM0_SHADOW_CONTROL_OP_OFF:
1384 if ( shadow_mode_enabled(d) )
1386 __shadow_sync_all(d);
1387 __shadow_mode_disable(d);
1389 break;
1391 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1392 free_shadow_pages(d);
1393 rc = __shadow_mode_enable(d, SHM_enable);
1394 break;
1396 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1397 free_shadow_pages(d);
1398 rc = __shadow_mode_enable(
1399 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1400 break;
1402 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1403 free_shadow_pages(d);
1404 rc = __shadow_mode_enable(
1405 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1406 break;
1408 default:
1409 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1410 break;
1413 shadow_unlock(d);
1415 for_each_vcpu(d,v)
1416 update_pagetables(v);
1418 domain_unpause(d);
1420 return rc;
1423 unsigned long
1424 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1426 ASSERT( shadow_mode_translate(d) );
1428 perfc_incrc(gpfn_to_mfn_foreign);
1430 unsigned long va = gpfn << PAGE_SHIFT;
1431 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1432 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1433 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1434 unmap_domain_page(l2);
1435 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1437 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1438 d->domain_id, gpfn, l2e_get_intpte(l2e));
1439 return INVALID_MFN;
1441 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1442 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1443 unmap_domain_page(l1);
1445 #if 0
1446 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1447 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1448 #endif
1450 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1452 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1453 d->domain_id, gpfn, l1e_get_intpte(l1e));
1454 return INVALID_MFN;
1457 return l1e_get_pfn(l1e);
1460 static unsigned long
1461 shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
1462 unsigned long smfn)
1464 unsigned long hl2mfn;
1465 l1_pgentry_t *hl2;
1466 int limit;
1468 ASSERT(PGT_base_page_table == PGT_l2_page_table);
1470 if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
1472 printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
1473 gpfn, gmfn);
1474 BUG(); /* XXX Deal gracefully with failure. */
1477 SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
1478 gpfn, gmfn, smfn, hl2mfn);
1479 perfc_incrc(shadow_hl2_table_count);
1481 hl2 = map_domain_page(hl2mfn);
1483 if ( shadow_mode_external(d) )
1484 limit = L2_PAGETABLE_ENTRIES;
1485 else
1486 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
1488 memset(hl2, 0, limit * sizeof(l1_pgentry_t));
1490 if ( !shadow_mode_external(d) )
1492 memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
1493 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1495 // Setup easy access to the GL2, SL2, and HL2 frames.
1496 //
1497 hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
1498 l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1499 hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1500 l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
1501 hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
1502 l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1505 unmap_domain_page(hl2);
1507 return hl2mfn;
1510 /*
1511 * This could take and use a snapshot, and validate the entire page at
1512 * once, or it could continue to fault in entries one at a time...
1513 * Might be worth investigating...
1514 */
1515 static unsigned long shadow_l2_table(
1516 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1518 unsigned long smfn;
1519 l2_pgentry_t *spl2e;
1521 SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
1523 perfc_incrc(shadow_l2_table_count);
1525 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
1527 printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
1528 gpfn, gmfn);
1529 BUG(); /* XXX Deal gracefully with failure. */
1532 spl2e = (l2_pgentry_t *)map_domain_page(smfn);
1534 /* Install hypervisor and 2x linear p.t. mapings. */
1535 if ( (PGT_base_page_table == PGT_l2_page_table) &&
1536 !shadow_mode_external(d) )
1538 /*
1539 * We could proactively fill in PDEs for pages that are already
1540 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
1541 * (restriction required for coherence of the accessed bit). However,
1542 * we tried it and it didn't help performance. This is simpler.
1543 */
1544 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
1546 /* Install hypervisor and 2x linear p.t. mapings. */
1547 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1548 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
1549 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
1551 spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1552 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
1554 spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
1555 l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
1556 __PAGE_HYPERVISOR);
1558 if ( shadow_mode_translate(d) ) // NB: not external
1560 unsigned long hl2mfn;
1562 spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
1563 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
1564 __PAGE_HYPERVISOR);
1566 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
1567 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
1569 // shadow_mode_translate (but not external) sl2 tables hold a
1570 // ref to their hl2.
1571 //
1572 if ( !get_shadow_ref(hl2mfn) )
1573 BUG();
1575 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1576 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
1578 else
1579 spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1580 l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
1582 else
1584 memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
1587 unmap_domain_page(spl2e);
1589 SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
1590 return smfn;
1593 void shadow_map_l1_into_current_l2(unsigned long va)
1595 struct vcpu *v = current;
1596 struct domain *d = v->domain;
1597 l1_pgentry_t *gpl1e, *spl1e;
1598 l2_pgentry_t gl2e, sl2e;
1599 unsigned long gl1pfn, gl1mfn, sl1mfn;
1600 int i, init_table = 0;
1602 __guest_get_l2e(v, va, &gl2e);
1603 ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
1604 gl1pfn = l2e_get_pfn(gl2e);
1606 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
1608 /* This L1 is NOT already shadowed so we need to shadow it. */
1609 SH_VVLOG("4a: l1 not shadowed");
1611 gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1612 if ( unlikely(!VALID_MFN(gl1mfn)) )
1614 // Attempt to use an invalid pfn as an L1 page.
1615 // XXX this needs to be more graceful!
1616 BUG();
1619 if ( unlikely(!(sl1mfn =
1620 alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
1622 printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
1623 gl1pfn, gl1mfn);
1624 BUG(); /* XXX Need to deal gracefully with failure. */
1627 perfc_incrc(shadow_l1_table_count);
1628 init_table = 1;
1630 else
1632 /* This L1 is shadowed already, but the L2 entry is missing. */
1633 SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
1636 #ifndef NDEBUG
1637 l2_pgentry_t old_sl2e;
1638 __shadow_get_l2e(v, va, &old_sl2e);
1639 ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
1640 #endif
1642 if ( !get_shadow_ref(sl1mfn) )
1643 BUG();
1644 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
1645 __guest_set_l2e(v, va, gl2e);
1646 __shadow_set_l2e(v, va, sl2e);
1648 if ( init_table )
1650 l1_pgentry_t sl1e;
1651 int index = l1_table_offset(va);
1652 int min = 1, max = 0;
1654 gpl1e = &(linear_pg_table[l1_linear_offset(va) &
1655 ~(L1_PAGETABLE_ENTRIES-1)]);
1657 spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
1658 ~(L1_PAGETABLE_ENTRIES-1)]);
1660 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1662 l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
1663 if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
1664 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
1665 sl1e = l1e_empty();
1666 if ( l1e_get_flags(sl1e) == 0 )
1668 // First copy entries from 0 until first invalid.
1669 // Then copy entries from index until first invalid.
1670 //
1671 if ( i < index ) {
1672 i = index - 1;
1673 continue;
1675 break;
1677 spl1e[i] = sl1e;
1678 if ( unlikely(i < min) )
1679 min = i;
1680 if ( likely(i > max) )
1681 max = i;
1682 set_guest_back_ptr(d, sl1e, sl1mfn, i);
1685 frame_table[sl1mfn].tlbflush_timestamp =
1686 SHADOW_ENCODE_MIN_MAX(min, max);
1690 void shadow_invlpg(struct vcpu *v, unsigned long va)
1692 struct domain *d = v->domain;
1693 l1_pgentry_t gpte, spte;
1695 ASSERT(shadow_mode_enabled(d));
1697 shadow_lock(d);
1699 __shadow_sync_va(v, va);
1701 // XXX mafetter: will need to think about 4MB pages...
1703 // It's not strictly necessary to update the shadow here,
1704 // but it might save a fault later.
1705 //
1706 if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
1707 sizeof(gpte))) {
1708 perfc_incrc(shadow_invlpg_faults);
1709 shadow_unlock(d);
1710 return;
1712 l1pte_propagate_from_guest(d, gpte, &spte);
1713 shadow_set_l1e(va, spte, 1);
1715 shadow_unlock(d);
1718 struct out_of_sync_entry *
1719 shadow_alloc_oos_entry(struct domain *d)
1721 struct out_of_sync_entry *f, *extra;
1722 unsigned size, i;
1724 if ( unlikely(d->arch.out_of_sync_free == NULL) )
1726 FSH_LOG("Allocate more fullshadow tuple blocks.");
1728 size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
1729 extra = xmalloc_bytes(size);
1731 /* XXX Should be more graceful here. */
1732 if ( extra == NULL )
1733 BUG();
1735 memset(extra, 0, size);
1737 /* Record the allocation block so it can be correctly freed later. */
1738 d->arch.out_of_sync_extras_count++;
1739 *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
1740 d->arch.out_of_sync_extras;
1741 d->arch.out_of_sync_extras = &extra[0];
1743 /* Thread a free chain through the newly-allocated nodes. */
1744 for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
1745 extra[i].next = &extra[i+1];
1746 extra[i].next = NULL;
1748 /* Add the new nodes to the free list. */
1749 d->arch.out_of_sync_free = &extra[0];
1752 /* Allocate a new node from the quicklist. */
1753 f = d->arch.out_of_sync_free;
1754 d->arch.out_of_sync_free = f->next;
1756 return f;
1759 static inline unsigned long
1760 shadow_make_snapshot(
1761 struct domain *d, unsigned long gpfn, unsigned long gmfn)
1763 unsigned long smfn, sl1mfn = 0;
1764 void *original, *snapshot;
1765 u32 min_max = 0;
1766 int min, max, length;
1768 if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
1770 ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
1771 return SHADOW_SNAPSHOT_ELSEWHERE;
1774 perfc_incrc(shadow_make_snapshot);
1776 if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
1778 printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
1779 "Dom%d snapshot_count_count=%d\n",
1780 gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
1781 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
1784 if ( !get_shadow_ref(smfn) )
1785 BUG();
1787 if ( shadow_mode_refcounts(d) &&
1788 (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
1789 min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
1790 pfn_to_page(smfn)->tlbflush_timestamp = min_max;
1792 min = SHADOW_MIN(min_max);
1793 max = SHADOW_MAX(min_max);
1794 length = max - min + 1;
1795 perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
1797 min *= sizeof(l1_pgentry_t);
1798 length *= sizeof(l1_pgentry_t);
1800 original = map_domain_page(gmfn);
1801 snapshot = map_domain_page(smfn);
1802 memcpy(snapshot + min, original + min, length);
1803 unmap_domain_page(original);
1804 unmap_domain_page(snapshot);
1806 return smfn;
1809 static void
1810 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
1812 void *snapshot;
1814 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
1815 return;
1817 // Clear the out_of_sync bit.
1818 //
1819 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
1821 // XXX Need to think about how to protect the domain's
1822 // information less expensively.
1823 //
1824 snapshot = map_domain_page(entry->snapshot_mfn);
1825 memset(snapshot, 0, PAGE_SIZE);
1826 unmap_domain_page(snapshot);
1828 put_shadow_ref(entry->snapshot_mfn);
1831 struct out_of_sync_entry *
1832 __shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1833 unsigned long mfn)
1835 struct domain *d = v->domain;
1836 struct pfn_info *page = &frame_table[mfn];
1837 struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
1839 ASSERT(shadow_lock_is_acquired(d));
1840 ASSERT(pfn_valid(mfn));
1842 #ifndef NDEBUG
1843 u32 type = page->u.inuse.type_info & PGT_type_mask;
1844 if ( shadow_mode_refcounts(d) )
1846 ASSERT(type == PGT_writable_page);
1848 else
1850 ASSERT(type && (type < PGT_l4_page_table));
1852 #endif
1854 FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08lx", __func__,
1855 gpfn, mfn, page->count_info, page->u.inuse.type_info);
1857 // XXX this will require some more thought... Cross-domain sharing and
1858 // modification of page tables? Hmm...
1859 //
1860 if ( d != page_get_owner(page) )
1861 BUG();
1863 perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
1865 entry->v = v;
1866 entry->gpfn = gpfn;
1867 entry->gmfn = mfn;
1868 entry->writable_pl1e = -1;
1870 #if SHADOW_DEBUG
1871 mark_shadows_as_reflecting_snapshot(d, gpfn);
1872 #endif
1874 // increment guest's ref count to represent the entry in the
1875 // full shadow out-of-sync list.
1876 //
1877 get_page(page, d);
1879 return entry;
1882 struct out_of_sync_entry *
1883 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
1884 unsigned long mfn)
1886 struct out_of_sync_entry *entry =
1887 __shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1888 struct domain *d = v->domain;
1890 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1891 // Add to the out-of-sync list
1892 //
1893 entry->next = d->arch.out_of_sync;
1894 d->arch.out_of_sync = entry;
1896 return entry;
1899 void shadow_mark_va_out_of_sync(
1900 struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
1902 struct out_of_sync_entry *entry =
1903 __shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
1904 l2_pgentry_t sl2e;
1905 struct domain *d = v->domain;
1907 // We need the address of shadow PTE that maps @va.
1908 // It might not exist yet. Make sure it's there.
1909 //
1910 __shadow_get_l2e(v, va, &sl2e);
1911 if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
1913 // either this L1 isn't shadowed yet, or the shadow isn't linked into
1914 // the current L2.
1915 shadow_map_l1_into_current_l2(va);
1916 __shadow_get_l2e(v, va, &sl2e);
1918 ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
1920 entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
1921 // NB: this is stored as a machine address.
1922 entry->writable_pl1e =
1923 l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
1924 ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
1925 entry->va = va;
1927 // Increment shadow's page count to represent the reference
1928 // inherent in entry->writable_pl1e
1929 //
1930 if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
1931 BUG();
1933 // Add to the out-of-sync list
1934 //
1935 entry->next = d->arch.out_of_sync;
1936 d->arch.out_of_sync = entry;
1938 FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
1939 va, entry->writable_pl1e);
1942 /*
1943 * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
1944 * Returns 0 otherwise.
1945 */
1946 static int snapshot_entry_matches(
1947 struct domain *d, l1_pgentry_t *guest_pt,
1948 unsigned long gpfn, unsigned index)
1950 unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
1951 l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ...
1952 int entries_match;
1954 perfc_incrc(snapshot_entry_matches_calls);
1956 if ( !smfn )
1957 return 0;
1959 snapshot = map_domain_page(smfn);
1961 if (__copy_from_user(&gpte, &guest_pt[index],
1962 sizeof(gpte))) {
1963 unmap_domain_page(snapshot);
1964 return 0;
1967 // This could probably be smarter, but this is sufficent for
1968 // our current needs.
1969 //
1970 entries_match = !l1e_has_changed(gpte, snapshot[index],
1971 PAGE_FLAG_MASK);
1973 unmap_domain_page(snapshot);
1975 #ifdef PERF_COUNTERS
1976 if ( entries_match )
1977 perfc_incrc(snapshot_entry_matches_true);
1978 #endif
1980 return entries_match;
1983 /*
1984 * Returns 1 if va's shadow mapping is out-of-sync.
1985 * Returns 0 otherwise.
1986 */
1987 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
1989 struct domain *d = v->domain;
1990 unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
1991 unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
1992 l2_pgentry_t l2e;
1993 unsigned long l1pfn, l1mfn;
1995 ASSERT(shadow_lock_is_acquired(d));
1996 ASSERT(VALID_M2P(l2pfn));
1998 perfc_incrc(shadow_out_of_sync_calls);
2000 if ( page_out_of_sync(&frame_table[l2mfn]) &&
2001 !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
2002 l2pfn, l2_table_offset(va)) )
2003 return 1;
2005 __guest_get_l2e(v, va, &l2e);
2006 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
2007 return 0;
2009 l1pfn = l2e_get_pfn(l2e);
2010 l1mfn = __gpfn_to_mfn(d, l1pfn);
2012 // If the l1 pfn is invalid, it can't be out of sync...
2013 if ( !VALID_MFN(l1mfn) )
2014 return 0;
2016 if ( page_out_of_sync(&frame_table[l1mfn]) &&
2017 !snapshot_entry_matches(
2018 d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
2019 l1pfn, l1_table_offset(va)) )
2020 return 1;
2022 return 0;
2025 #define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
2026 static inline unsigned long
2027 predict_writable_pte_page(struct domain *d, unsigned long gpfn)
2029 return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
2032 static inline void
2033 increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2035 unsigned long score = prediction & PGT_score_mask;
2036 int create = (score == 0);
2038 // saturating addition
2039 score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
2040 score = score ? score : PGT_score_mask;
2042 prediction = (prediction & PGT_mfn_mask) | score;
2044 //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
2045 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2047 if ( create )
2048 perfc_incr(writable_pte_predictions);
2051 static inline void
2052 decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
2054 unsigned long score = prediction & PGT_score_mask;
2055 ASSERT(score);
2057 // divide score by 2... We don't like bad predictions.
2058 //
2059 score = (score >> 1) & PGT_score_mask;
2061 prediction = (prediction & PGT_mfn_mask) | score;
2063 //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
2065 if ( score )
2066 set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
2067 else
2069 delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
2070 perfc_decr(writable_pte_predictions);
2074 static void
2075 free_writable_pte_predictions(struct domain *d)
2077 int i;
2078 struct shadow_status *x;
2080 for ( i = 0; i < shadow_ht_buckets; i++ )
2082 u32 count;
2083 unsigned long *gpfn_list;
2085 /* Skip empty buckets. */
2086 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
2087 continue;
2089 count = 0;
2090 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2091 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2092 count++;
2094 gpfn_list = xmalloc_array(unsigned long, count);
2095 count = 0;
2096 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
2097 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
2098 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
2100 while ( count )
2102 count--;
2103 /* delete_shadow_status() may do a shadow_audit(), so we need to
2104 * keep an accurate count of writable_pte_predictions to keep it
2105 * happy.
2106 */
2107 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
2108 perfc_decr(writable_pte_predictions);
2111 xfree(gpfn_list);
2115 static int fix_entry(
2116 struct domain *d,
2117 l1_pgentry_t *pt, u32 *found, int is_l1_shadow, u32 max_refs_to_find)
2119 l1_pgentry_t old = *pt;
2120 l1_pgentry_t new = old;
2122 l1e_remove_flags(new,_PAGE_RW);
2123 if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
2124 BUG();
2125 (*found)++;
2126 *pt = new;
2127 if ( is_l1_shadow )
2128 shadow_put_page_from_l1e(old, d);
2130 return (*found == max_refs_to_find);
2133 static u32 remove_all_write_access_in_ptpage(
2134 struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
2135 unsigned long readonly_gpfn, unsigned long readonly_gmfn,
2136 u32 max_refs_to_find, unsigned long prediction)
2138 l1_pgentry_t *pt = map_domain_page(pt_mfn);
2139 l1_pgentry_t match;
2140 unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
2141 int i;
2142 u32 found = 0;
2143 int is_l1_shadow =
2144 ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
2145 PGT_l1_shadow);
2147 match = l1e_from_pfn(readonly_gmfn, flags);
2149 if ( shadow_mode_external(d) ) {
2150 i = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_va_mask)
2151 >> PGT_va_shift;
2153 if ( (i >= 0 && i <= L1_PAGETABLE_ENTRIES) &&
2154 !l1e_has_changed(pt[i], match, flags) &&
2155 fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) &&
2156 !prediction )
2157 goto out;
2160 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2162 if ( unlikely(!l1e_has_changed(pt[i], match, flags)) &&
2163 fix_entry(d, &pt[i], &found, is_l1_shadow, max_refs_to_find) )
2164 break;
2167 out:
2168 unmap_domain_page(pt);
2170 return found;
2173 int shadow_remove_all_write_access(
2174 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
2176 int i;
2177 struct shadow_status *a;
2178 u32 found = 0, write_refs;
2179 unsigned long predicted_smfn;
2181 ASSERT(shadow_lock_is_acquired(d));
2182 ASSERT(VALID_MFN(readonly_gmfn));
2184 perfc_incrc(remove_write_access);
2186 // If it's not a writable page, then no writable refs can be outstanding.
2187 //
2188 if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
2189 PGT_writable_page )
2191 perfc_incrc(remove_write_not_writable);
2192 return 1;
2195 // How many outstanding writable PTEs for this page are there?
2196 //
2197 write_refs =
2198 (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
2199 if ( write_refs && MFN_PINNED(readonly_gmfn) )
2201 write_refs--;
2204 if ( write_refs == 0 )
2206 perfc_incrc(remove_write_no_work);
2207 return 1;
2210 if ( shadow_mode_external(d) ) {
2211 if (write_refs-- == 0)
2212 return 0;
2214 // Use the back pointer to locate the shadow page that can contain
2215 // the PTE of interest
2216 if ( (predicted_smfn = frame_table[readonly_gmfn].tlbflush_timestamp) ) {
2217 found += remove_all_write_access_in_ptpage(
2218 d, predicted_smfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, 0);
2219 if ( found == write_refs )
2220 return 0;
2224 // Search all the shadow L1 page tables...
2225 //
2226 for (i = 0; i < shadow_ht_buckets; i++)
2228 a = &d->arch.shadow_ht[i];
2229 while ( a && a->gpfn_and_flags )
2231 if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
2233 found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
2234 if ( found == write_refs )
2235 return 0;
2238 a = a->next;
2242 FSH_LOG("%s: looking for %d refs, found %d refs",
2243 __func__, write_refs, found);
2245 return 0;
2248 static u32 remove_all_access_in_page(
2249 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
2251 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
2252 l1_pgentry_t match, ol2e;
2253 unsigned long flags = _PAGE_PRESENT;
2254 int i;
2255 u32 count = 0;
2256 int is_l1_shadow =
2257 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
2258 PGT_l1_shadow);
2260 match = l1e_from_pfn(forbidden_gmfn, flags);
2262 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
2264 if ( l1e_has_changed(pl1e[i], match, flags) )
2265 continue;
2267 ol2e = pl1e[i];
2268 pl1e[i] = l1e_empty();
2269 count++;
2271 if ( is_l1_shadow )
2272 shadow_put_page_from_l1e(ol2e, d);
2273 else /* must be an hl2 page */
2274 put_page(&frame_table[forbidden_gmfn]);
2277 unmap_domain_page(pl1e);
2279 return count;
2282 u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
2284 int i;
2285 struct shadow_status *a;
2286 u32 count = 0;
2288 if ( unlikely(!shadow_mode_enabled(d)) )
2289 return 0;
2291 ASSERT(shadow_lock_is_acquired(d));
2292 perfc_incrc(remove_all_access);
2294 for (i = 0; i < shadow_ht_buckets; i++)
2296 a = &d->arch.shadow_ht[i];
2297 while ( a && a->gpfn_and_flags )
2299 switch (a->gpfn_and_flags & PGT_type_mask)
2301 case PGT_l1_shadow:
2302 case PGT_l2_shadow:
2303 case PGT_l3_shadow:
2304 case PGT_l4_shadow:
2305 case PGT_hl2_shadow:
2306 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
2307 break;
2308 case PGT_snapshot:
2309 case PGT_writable_pred:
2310 // these can't hold refs to the forbidden page
2311 break;
2312 default:
2313 BUG();
2316 a = a->next;
2320 return count;
2323 static int resync_all(struct domain *d, u32 stype)
2325 struct out_of_sync_entry *entry;
2326 unsigned i;
2327 unsigned long smfn;
2328 void *guest, *shadow, *snapshot;
2329 int need_flush = 0, external = shadow_mode_external(d);
2330 int unshadow;
2331 int changed;
2333 ASSERT(shadow_lock_is_acquired(d));
2335 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2337 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
2338 continue;
2340 smfn = __shadow_status(d, entry->gpfn, stype);
2342 if ( !smfn )
2344 // For heavy weight shadows: no need to update refcounts if
2345 // there's no shadow page.
2346 //
2347 if ( shadow_mode_refcounts(d) )
2348 continue;
2350 // For light weight shadows: only need up resync the refcounts to
2351 // the new contents of the guest page iff this it has the right
2352 // page type.
2353 //
2354 if ( stype != ( pfn_to_page(entry->gmfn)->u.inuse.type_info & PGT_type_mask) )
2355 continue;
2358 FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
2359 stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
2361 // Compare guest's new contents to its snapshot, validating
2362 // and updating its shadow as appropriate.
2363 //
2364 guest = map_domain_page(entry->gmfn);
2365 snapshot = map_domain_page(entry->snapshot_mfn);
2367 if ( smfn )
2368 shadow = map_domain_page(smfn);
2369 else
2370 shadow = NULL;
2372 unshadow = 0;
2374 switch ( stype ) {
2375 case PGT_l1_shadow:
2377 l1_pgentry_t *guest1 = guest;
2378 l1_pgentry_t *shadow1 = shadow;
2379 l1_pgentry_t *snapshot1 = snapshot;
2380 int unshadow_l1 = 0;
2382 ASSERT(shadow_mode_write_l1(d) ||
2383 shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2385 if ( !shadow_mode_refcounts(d) )
2386 revalidate_l1(d, guest1, snapshot1);
2388 if ( !smfn )
2389 break;
2391 u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
2392 int min_shadow = SHADOW_MIN(min_max_shadow);
2393 int max_shadow = SHADOW_MAX(min_max_shadow);
2395 u32 min_max_snapshot =
2396 pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
2397 int min_snapshot = SHADOW_MIN(min_max_snapshot);
2398 int max_snapshot = SHADOW_MAX(min_max_snapshot);
2400 changed = 0;
2402 for ( i = min_shadow; i <= max_shadow; i++ )
2404 if ( (i < min_snapshot) || (i > max_snapshot) ||
2405 l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
2407 int error;
2409 error = validate_pte_change(d, guest1[i], &shadow1[i]);
2410 if ( error == -1 )
2411 unshadow_l1 = 1;
2412 else {
2413 need_flush |= error;
2414 set_guest_back_ptr(d, shadow1[i], smfn, i);
2417 // can't update snapshots of linear page tables -- they
2418 // are used multiple times...
2419 //
2420 // snapshot[i] = new_pte;
2421 changed++;
2424 perfc_incrc(resync_l1);
2425 perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
2426 perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
2427 if (unshadow_l1) {
2428 l2_pgentry_t l2e;
2430 __shadow_get_l2e(entry->v, entry->va, &l2e);
2431 if (l2e_get_flags(l2e) & _PAGE_PRESENT) {
2432 put_shadow_ref(l2e_get_pfn(l2e));
2433 l2e = l2e_empty();
2434 __shadow_set_l2e(entry->v, entry->va, l2e);
2436 if (entry->v == current)
2437 need_flush = 1;
2441 break;
2443 case PGT_l2_shadow:
2445 int max = -1;
2447 l2_pgentry_t *guest2 = guest;
2448 l2_pgentry_t *shadow2 = shadow;
2449 l2_pgentry_t *snapshot2 = snapshot;
2451 ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2452 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2454 changed = 0;
2455 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2457 #if CONFIG_X86_PAE
2458 BUG(); /* FIXME: need type_info */
2459 #endif
2460 if ( !is_guest_l2_slot(0,i) && !external )
2461 continue;
2463 l2_pgentry_t new_pde = guest2[i];
2464 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
2466 need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
2468 // can't update snapshots of linear page tables -- they
2469 // are used multiple times...
2470 //
2471 // snapshot[i] = new_pde;
2473 changed++;
2475 if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
2476 max = i;
2478 // XXX - This hack works for linux guests.
2479 // Need a better solution long term.
2480 if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
2481 unlikely(l2e_get_intpte(new_pde) != 0) &&
2482 !unshadow && MFN_PINNED(smfn) )
2483 unshadow = 1;
2485 if ( max == -1 )
2486 unshadow = 1;
2487 perfc_incrc(resync_l2);
2488 perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
2489 break;
2491 case PGT_hl2_shadow:
2493 l2_pgentry_t *guest2 = guest;
2494 l2_pgentry_t *snapshot2 = snapshot;
2495 l1_pgentry_t *shadow2 = shadow;
2497 ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
2498 BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
2500 changed = 0;
2501 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
2503 #if CONFIG_X86_PAE
2504 BUG(); /* FIXME: need type_info */
2505 #endif
2506 if ( !is_guest_l2_slot(0, i) && !external )
2507 continue;
2509 l2_pgentry_t new_pde = guest2[i];
2510 if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
2512 need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
2514 // can't update snapshots of linear page tables -- they
2515 // are used multiple times...
2516 //
2517 // snapshot[i] = new_pde;
2519 changed++;
2522 perfc_incrc(resync_hl2);
2523 perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
2524 break;
2526 default:
2527 BUG();
2530 if ( smfn )
2531 unmap_domain_page(shadow);
2532 unmap_domain_page(snapshot);
2533 unmap_domain_page(guest);
2535 if ( unlikely(unshadow) )
2537 perfc_incrc(unshadow_l2_count);
2538 shadow_unpin(smfn);
2539 if ( unlikely(shadow_mode_external(d)) )
2541 unsigned long hl2mfn;
2543 if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
2544 MFN_PINNED(hl2mfn) )
2545 shadow_unpin(hl2mfn);
2550 return need_flush;
2553 void __shadow_sync_all(struct domain *d)
2555 struct out_of_sync_entry *entry;
2556 int need_flush = 0;
2558 perfc_incrc(shadow_sync_all);
2560 ASSERT(shadow_lock_is_acquired(d));
2562 // First, remove all write permissions to the page tables
2563 //
2564 for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
2566 // Skip entries that have low bits set... Those aren't
2567 // real PTEs.
2568 //
2569 if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
2570 continue;
2572 l1_pgentry_t *ppte = (l1_pgentry_t *)(
2573 (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
2574 (entry->writable_pl1e & ~PAGE_MASK));
2575 l1_pgentry_t opte = *ppte;
2576 l1_pgentry_t npte = opte;
2577 l1e_remove_flags(npte, _PAGE_RW);
2579 if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
2580 !shadow_get_page_from_l1e(npte, d) )
2581 BUG();
2582 *ppte = npte;
2583 set_guest_back_ptr(d, npte, (entry->writable_pl1e) >> PAGE_SHIFT,
2584 (entry->writable_pl1e & ~PAGE_MASK)/sizeof(l1_pgentry_t));
2585 shadow_put_page_from_l1e(opte, d);
2587 unmap_domain_page(ppte);
2590 // XXX mafetter: SMP
2591 //
2592 // With the current algorithm, we've gotta flush all the TLBs
2593 // before we can safely continue. I don't think we want to
2594 // do it this way, so I think we should consider making
2595 // entirely private copies of the shadow for each vcpu, and/or
2596 // possibly having a mix of private and shared shadow state
2597 // (any path from a PTE that grants write access to an out-of-sync
2598 // page table page needs to be vcpu private).
2599 //
2600 #if 0 // this should be enabled for SMP guests...
2601 flush_tlb_mask(cpu_online_map);
2602 #endif
2603 need_flush = 1;
2605 // Second, resync all L1 pages, then L2 pages, etc...
2606 //
2607 need_flush |= resync_all(d, PGT_l1_shadow);
2608 if ( shadow_mode_translate(d) )
2609 need_flush |= resync_all(d, PGT_hl2_shadow);
2610 need_flush |= resync_all(d, PGT_l2_shadow);
2612 if ( need_flush && !unlikely(shadow_mode_external(d)) )
2613 local_flush_tlb();
2615 free_out_of_sync_state(d);
2618 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
2620 l1_pgentry_t gpte, spte, orig_gpte;
2621 struct vcpu *v = current;
2622 struct domain *d = v->domain;
2623 l2_pgentry_t gpde;
2625 spte = l1e_empty();
2627 SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
2628 va, (unsigned long)regs->error_code);
2629 perfc_incrc(shadow_fault_calls);
2631 check_pagetable(v, "pre-sf");
2633 /*
2634 * Don't let someone else take the guest's table pages out-of-sync.
2635 */
2636 shadow_lock(d);
2638 /* XXX - FIX THIS COMMENT!!!
2639 * STEP 1. Check to see if this fault might have been caused by an
2640 * out-of-sync table page entry, or if we should pass this
2641 * fault onto the guest.
2642 */
2643 __shadow_sync_va(v, va);
2645 /*
2646 * STEP 2. Check the guest PTE.
2647 */
2648 __guest_get_l2e(v, va, &gpde);
2649 if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
2651 SH_VVLOG("shadow_fault - EXIT: L1 not present");
2652 perfc_incrc(shadow_fault_bail_pde_not_present);
2653 goto fail;
2656 // This can't fault because we hold the shadow lock and we've ensured that
2657 // the mapping is in-sync, so the check of the PDE's present bit, above,
2658 // covers this access.
2659 //
2660 orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
2661 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
2663 SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
2664 l1e_get_intpte(gpte));
2665 perfc_incrc(shadow_fault_bail_pte_not_present);
2666 goto fail;
2669 /* Write fault? */
2670 if ( regs->error_code & 2 )
2672 int allow_writes = 0;
2674 if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
2676 if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gpte)) )
2678 allow_writes = 1;
2679 l1e_add_flags(gpte, _PAGE_RW);
2681 else
2683 /* Write fault on a read-only mapping. */
2684 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
2685 l1e_get_intpte(gpte));
2686 perfc_incrc(shadow_fault_bail_ro_mapping);
2687 goto fail;
2690 else if ( unlikely(!shadow_mode_wr_pt_pte(d) && mfn_is_page_table(l1e_get_pfn(gpte))) )
2692 SH_LOG("l1pte_write_fault: no write access to page table page");
2693 domain_crash_synchronous();
2696 if ( unlikely(!l1pte_write_fault(v, &gpte, &spte, va)) )
2698 SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
2699 perfc_incrc(write_fault_bail);
2700 shadow_unlock(d);
2701 return 0;
2704 if ( allow_writes )
2705 l1e_remove_flags(gpte, _PAGE_RW);
2707 else
2709 if ( !l1pte_read_fault(d, &gpte, &spte) )
2711 SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
2712 perfc_incrc(read_fault_bail);
2713 shadow_unlock(d);
2714 return 0;
2718 /*
2719 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
2720 */
2721 if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
2723 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
2724 if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
2725 &gpte, sizeof(gpte))) )
2727 printk("%s() failed, crashing domain %d "
2728 "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
2729 __func__,d->domain_id, l2e_get_intpte(gpde), va);
2730 domain_crash_synchronous();
2733 // if necessary, record the page table page as dirty
2734 if ( unlikely(shadow_mode_log_dirty(d)) )
2735 __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
2738 shadow_set_l1e(va, spte, 1);
2740 perfc_incrc(shadow_fault_fixed);
2741 d->arch.shadow_fault_count++;
2743 shadow_unlock(d);
2745 check_pagetable(v, "post-sf");
2746 return EXCRET_fault_fixed;
2748 fail:
2749 shadow_unlock(d);
2750 return 0;
2753 void shadow_l1_normal_pt_update(
2754 struct domain *d,
2755 unsigned long pa, l1_pgentry_t gpte,
2756 struct domain_mmap_cache *cache)
2758 unsigned long sl1mfn;
2759 l1_pgentry_t *spl1e, spte;
2761 shadow_lock(d);
2763 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
2764 if ( sl1mfn )
2766 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
2767 (void *)pa, l1e_get_intpte(gpte));
2768 l1pte_propagate_from_guest(current->domain, gpte, &spte);
2770 spl1e = map_domain_page_with_cache(sl1mfn, cache);
2771 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
2772 unmap_domain_page_with_cache(spl1e, cache);
2775 shadow_unlock(d);
2778 void shadow_l2_normal_pt_update(
2779 struct domain *d,
2780 unsigned long pa, l2_pgentry_t gpde,
2781 struct domain_mmap_cache *cache)
2783 unsigned long sl2mfn;
2784 l2_pgentry_t *spl2e;
2786 shadow_lock(d);
2788 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
2789 if ( sl2mfn )
2791 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
2792 (void *)pa, l2e_get_intpte(gpde));
2793 spl2e = map_domain_page_with_cache(sl2mfn, cache);
2794 validate_pde_change(d, gpde,
2795 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
2796 unmap_domain_page_with_cache(spl2e, cache);
2799 shadow_unlock(d);
2802 #if CONFIG_PAGING_LEVELS >= 3
2803 void shadow_l3_normal_pt_update(
2804 struct domain *d,
2805 unsigned long pa, l3_pgentry_t gpde,
2806 struct domain_mmap_cache *cache)
2808 BUG(); // not yet implemented
2810 #endif
2812 #if CONFIG_PAGING_LEVELS >= 4
2813 void shadow_l4_normal_pt_update(
2814 struct domain *d,
2815 unsigned long pa, l4_pgentry_t gpde,
2816 struct domain_mmap_cache *cache)
2818 BUG(); // not yet implemented
2820 #endif
2822 int shadow_do_update_va_mapping(unsigned long va,
2823 l1_pgentry_t val,
2824 struct vcpu *v)
2826 struct domain *d = v->domain;
2827 l1_pgentry_t spte;
2828 int rc = 0;
2830 shadow_lock(d);
2832 //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
2834 // This is actually overkill - we don't need to sync the L1 itself,
2835 // just everything involved in getting to this L1 (i.e. we need
2836 // linear_pg_table[l1_linear_offset(va)] to be in sync)...
2837 //
2838 __shadow_sync_va(v, va);
2840 l1pte_propagate_from_guest(d, val, &spte);
2841 shadow_set_l1e(va, spte, 0);
2843 /*
2844 * If we're in log-dirty mode then we need to note that we've updated
2845 * the PTE in the PT-holding page. We need the machine frame number
2846 * for this.
2847 */
2848 if ( shadow_mode_log_dirty(d) )
2849 __mark_dirty(d, va_to_l1mfn(v, va));
2851 // out:
2852 shadow_unlock(d);
2854 return rc;
2858 /*
2859 * What lives where in the 32-bit address space in the various shadow modes,
2860 * and what it uses to get/maintain that mapping.
2862 * SHADOW MODE: none enable translate external
2864 * 4KB things:
2865 * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
2866 * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
2867 * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
2868 * monitor_vtable n/a n/a n/a mapped once
2870 * 4MB things:
2871 * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
2872 * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
2873 * monitor_linear n/a n/a n/a ???
2874 * perdomain perdomain perdomain perdomain perdomain
2875 * R/O M2P R/O M2P R/O M2P n/a n/a
2876 * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
2877 * P2M n/a n/a R/O M2P R/O M2P
2879 * NB:
2880 * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
2881 * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
2882 * all play a part in maintaining these mappings.
2883 */
2884 void __update_pagetables(struct vcpu *v)
2886 struct domain *d = v->domain;
2887 unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
2888 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
2889 unsigned long smfn, hl2mfn, old_smfn;
2891 int max_mode = ( shadow_mode_external(d) ? SHM_external
2892 : shadow_mode_translate(d) ? SHM_translate
2893 : shadow_mode_enabled(d) ? SHM_enable
2894 : 0 );
2896 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
2897 ASSERT( max_mode );
2899 /*
2900 * arch.guest_vtable
2901 */
2902 if ( max_mode & (SHM_enable | SHM_external) )
2904 if ( likely(v->arch.guest_vtable != NULL) )
2905 unmap_domain_page(v->arch.guest_vtable);
2906 v->arch.guest_vtable = map_domain_page(gmfn);
2909 /*
2910 * arch.shadow_table
2911 */
2912 if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
2913 smfn = shadow_l2_table(d, gpfn, gmfn);
2914 if ( !get_shadow_ref(smfn) )
2915 BUG();
2916 old_smfn = pagetable_get_pfn(v->arch.shadow_table);
2917 v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
2918 if ( old_smfn )
2919 put_shadow_ref(old_smfn);
2921 SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
2923 /*
2924 * arch.shadow_vtable
2925 */
2926 if ( max_mode == SHM_external )
2928 if ( v->arch.shadow_vtable )
2929 unmap_domain_page(v->arch.shadow_vtable);
2930 v->arch.shadow_vtable = map_domain_page(smfn);
2933 /*
2934 * arch.hl2_vtable
2935 */
2937 // if max_mode == SHM_translate, then the hl2 is already installed
2938 // correctly in its smfn, and there's nothing to do.
2939 //
2940 if ( max_mode == SHM_external )
2942 if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
2943 hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
2944 if ( v->arch.hl2_vtable )
2945 unmap_domain_page(v->arch.hl2_vtable);
2946 v->arch.hl2_vtable = map_domain_page(hl2mfn);
2949 /*
2950 * fixup pointers in monitor table, as necessary
2951 */
2952 if ( max_mode == SHM_external )
2954 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
2955 l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
2956 l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
2958 ASSERT( shadow_mode_translate(d) );
2960 if ( !get_shadow_ref(hl2mfn) )
2961 BUG();
2962 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
2963 l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
2964 if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
2965 put_shadow_ref(l2e_get_pfn(old_hl2e));
2967 if ( !get_shadow_ref(smfn) )
2968 BUG();
2969 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
2970 l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
2971 if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
2972 put_shadow_ref(l2e_get_pfn(old_sl2e));
2974 // XXX - maybe this can be optimized somewhat??
2975 local_flush_tlb();
2980 /************************************************************************/
2981 /************************************************************************/
2982 /************************************************************************/
2984 #if SHADOW_DEBUG
2986 // The following is entirely for _check_pagetable()'s benefit.
2987 // _check_pagetable() wants to know whether a given entry in a
2988 // shadow page table is supposed to be the shadow of the guest's
2989 // current entry, or the shadow of the entry held in the snapshot
2990 // taken above.
2991 //
2992 // Here, we mark all currently existing entries as reflecting
2993 // the snapshot, above. All other places in xen that update
2994 // the shadow will keep the shadow in sync with the guest's
2995 // entries (via l1pte_propagate_from_guest and friends), which clear
2996 // the SHADOW_REFLECTS_SNAPSHOT bit.
2997 //
2998 static void
2999 mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
3001 unsigned long smfn;
3002 l1_pgentry_t *l1e;
3003 l2_pgentry_t *l2e;
3004 unsigned i;
3006 if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
3008 l1e = map_domain_page(smfn);
3009 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3010 if ( is_guest_l1_slot(i) &&
3011 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
3012 l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
3013 unmap_domain_page(l1e);
3016 if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
3018 l2e = map_domain_page(smfn);
3019 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
3020 if ( is_guest_l2_slot(0, i) &&
3021 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
3022 l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
3023 unmap_domain_page(l2e);
3027 // BUG: these are not SMP safe...
3028 static int sh_l2_present;
3029 static int sh_l1_present;
3030 static char *sh_check_name;
3031 int shadow_status_noswap;
3033 #define v2m(_v, _adr) ({ \
3034 unsigned long _a = (unsigned long)(_adr); \
3035 l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
3036 unsigned long _pa = -1; \
3037 if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
3038 { \
3039 l1_pgentry_t _pte; \
3040 _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
3041 if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
3042 _pa = l1e_get_paddr(_pte); \
3043 } \
3044 _pa | (_a & ~PAGE_MASK); \
3045 })
3047 #define FAIL(_f, _a...) \
3048 do { \
3049 printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
3050 sh_check_name, level, l2_idx, l1_idx, ## _a, \
3051 __FILE__, __LINE__); \
3052 printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
3053 " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
3054 " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
3055 " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
3056 l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
3057 l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
3058 p_guest_pte, p_shadow_pte, p_snapshot_pte, \
3059 (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
3060 (void *)v2m(v, p_snapshot_pte), \
3061 (l2_idx << L2_PAGETABLE_SHIFT) | \
3062 (l1_idx << L1_PAGETABLE_SHIFT)); \
3063 errors++; \
3064 } while ( 0 )
3066 static int check_pte(
3067 struct vcpu *v,
3068 l1_pgentry_t *p_guest_pte,
3069 l1_pgentry_t *p_shadow_pte,
3070 l1_pgentry_t *p_snapshot_pte,
3071 int level, int l2_idx, int l1_idx)
3073 struct domain *d = v->domain;
3074 l1_pgentry_t guest_pte = *p_guest_pte;
3075 l1_pgentry_t shadow_pte = *p_shadow_pte;
3076 l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
3077 l1_pgentry_t eff_guest_pte = l1e_empty();
3078 unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
3079 int errors = 0, guest_writable;
3080 int page_table_page;
3082 if ( (l1e_get_intpte(shadow_pte) == 0) ||
3083 (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
3084 (l1e_get_intpte(shadow_pte) == 0x00000E00) )
3085 return errors; /* always safe */
3087 if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
3088 FAIL("Non zero not present shadow_pte");
3090 if ( level == 2 ) sh_l2_present++;
3091 if ( level == 1 ) sh_l1_present++;
3093 if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
3094 eff_guest_pte = snapshot_pte;
3095 else
3096 eff_guest_pte = guest_pte;
3098 if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
3099 FAIL("Guest not present yet shadow is");
3101 mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
3103 if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
3104 FAIL("Corrupt?");
3106 if ( (level == 1) &&
3107 (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
3108 !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
3109 FAIL("Dirty coherence");
3111 if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
3112 !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
3113 FAIL("Accessed coherence");
3115 if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
3116 FAIL("global bit set in shadow");
3118 eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
3119 eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
3120 shadow_mfn = l1e_get_pfn(shadow_pte);
3122 if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
3123 FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
3124 __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
3126 page_table_page = mfn_is_page_table(eff_guest_mfn);
3128 guest_writable =
3129 (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
3130 (shadow_mode_write_l1(d) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
3132 if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
3134 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=%lx page_table_page=%d\n",
3135 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3136 frame_table[eff_guest_mfn].u.inuse.type_info,
3137 page_table_page);
3138 FAIL("RW coherence");
3141 if ( (level == 1) &&
3142 (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
3143 !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
3145 printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=%lx page_table_page=%d\n",
3146 eff_guest_pfn, eff_guest_mfn, shadow_mfn,
3147 frame_table[eff_guest_mfn].u.inuse.type_info,
3148 page_table_page);
3149 FAIL("RW2 coherence");
3152 if ( eff_guest_mfn == shadow_mfn )
3154 if ( level > 1 )
3155 FAIL("Linear map ???"); /* XXX this will fail on BSD */
3157 else
3159 if ( level < 2 )
3160 FAIL("Shadow in L1 entry?");
3162 if ( level == 2 )
3164 if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
3165 FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
3166 __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
3168 else
3169 BUG(); // XXX -- not handled yet.
3172 return errors;
3174 #undef FAIL
3175 #undef v2m
3177 static int check_l1_table(
3178 struct vcpu *v, unsigned long gpfn,
3179 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
3181 struct domain *d = v->domain;
3182 int i;
3183 unsigned long snapshot_mfn;
3184 l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
3185 int errors = 0;
3187 if ( page_out_of_sync(pfn_to_page(gmfn)) )
3189 snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
3190 ASSERT(snapshot_mfn);
3191 p_snapshot = map_domain_page(snapshot_mfn);
3194 p_guest = map_domain_page(gmfn);
3195 p_shadow = map_domain_page(smfn);
3197 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3198 errors += check_pte(v, p_guest+i, p_shadow+i,
3199 p_snapshot ? p_snapshot+i : NULL,
3200 1, l2_idx, i);
3202 unmap_domain_page(p_shadow);
3203 unmap_domain_page(p_guest);
3204 if ( p_snapshot )
3205 unmap_domain_page(p_snapshot);
3207 return errors;
3210 #define FAILPT(_f, _a...) \
3211 do { \
3212 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
3213 errors++; \
3214 } while ( 0 )
3216 int check_l2_table(
3217 struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
3219 struct domain *d = v->domain;
3220 l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
3221 l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
3222 l2_pgentry_t match;
3223 int i;
3224 int errors = 0;
3225 int limit;
3227 if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
3228 FAILPT("domain doesn't own page");
3229 if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
3230 FAILPT("bogus owner for snapshot page");
3231 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
3232 FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
3233 smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
3235 #if 0
3236 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3237 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
3238 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
3239 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
3241 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3242 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
3243 i++ )
3244 printk("+++ (%d) %lx %lx\n",i,
3245 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
3246 FAILPT("hypervisor entries inconsistent");
3249 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
3250 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
3251 FAILPT("hypervisor linear map inconsistent");
3252 #endif
3254 match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
3255 if ( !shadow_mode_external(d) &&
3256 l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
3257 match, PAGE_FLAG_MASK))
3259 FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
3260 l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
3261 L2_PAGETABLE_SHIFT]),
3262 l2e_get_intpte(match));
3265 match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
3266 if ( !shadow_mode_external(d) &&
3267 l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
3268 match, PAGE_FLAG_MASK))
3270 FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
3271 l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
3272 d->arch.mm_perdomain_pt,
3273 l2e_get_intpte(match));
3276 #ifdef __i386__
3277 if ( shadow_mode_external(d) )
3278 limit = L2_PAGETABLE_ENTRIES;
3279 else
3280 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3281 #else
3282 limit = 0; /* XXX x86/64 XXX */
3283 #endif
3285 /* Check the whole L2. */
3286 for ( i = 0; i < limit; i++ )
3287 errors += check_pte(v,
3288 (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
3289 (l1_pgentry_t*)(&spl2e[i]),
3290 NULL,
3291 2, i, 0);
3293 unmap_domain_page(spl2e);
3294 unmap_domain_page(gpl2e);
3296 #if 1
3297 if ( errors )
3298 printk("check_l2_table returning %d errors\n", errors);
3299 #endif
3301 return errors;
3303 #undef FAILPT
3305 int _check_pagetable(struct vcpu *v, char *s)
3307 struct domain *d = v->domain;
3308 pagetable_t pt = v->arch.guest_table;
3309 unsigned long gptbase = pagetable_get_paddr(pt);
3310 unsigned long ptbase_pfn, smfn;
3311 unsigned long i;
3312 l2_pgentry_t *gpl2e, *spl2e;
3313 unsigned long ptbase_mfn = 0;
3314 int errors = 0, limit, oos_pdes = 0;
3316 //_audit_domain(d, AUDIT_QUIET);
3317 shadow_lock(d);
3319 sh_check_name = s;
3320 //SH_VVLOG("%s-PT Audit", s);
3321 sh_l2_present = sh_l1_present = 0;
3322 perfc_incrc(check_pagetable);
3324 ptbase_mfn = gptbase >> PAGE_SHIFT;
3325 ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
3327 if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
3329 printk("%s-PT %lx not shadowed\n", s, gptbase);
3330 goto out;
3332 if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
3334 ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
3335 oos_pdes = 1;
3336 ASSERT(ptbase_mfn);
3339 errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
3341 gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
3342 spl2e = (l2_pgentry_t *) map_domain_page(smfn);
3344 /* Go back and recurse. */
3345 #ifdef __i386__
3346 if ( shadow_mode_external(d) )
3347 limit = L2_PAGETABLE_ENTRIES;
3348 else
3349 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
3350 #else
3351 limit = 0; /* XXX x86/64 XXX */
3352 #endif
3354 for ( i = 0; i < limit; i++ )
3356 unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
3357 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
3358 unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
3360 if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
3362 errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
3366 unmap_domain_page(spl2e);
3367 unmap_domain_page(gpl2e);
3369 #if 0
3370 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
3371 sh_l2_present, sh_l1_present);
3372 #endif
3374 out:
3375 if ( errors )
3376 BUG();
3378 shadow_unlock(d);
3380 return errors;
3383 int _check_all_pagetables(struct vcpu *v, char *s)
3385 struct domain *d = v->domain;
3386 int i;
3387 struct shadow_status *a;
3388 unsigned long gmfn;
3389 int errors = 0;
3391 shadow_status_noswap = 1;
3393 sh_check_name = s;
3394 SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
3395 sh_l2_present = sh_l1_present = 0;
3396 perfc_incrc(check_all_pagetables);
3398 for (i = 0; i < shadow_ht_buckets; i++)
3400 a = &d->arch.shadow_ht[i];
3401 while ( a && a->gpfn_and_flags )
3403 gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
3405 switch ( a->gpfn_and_flags & PGT_type_mask )
3407 case PGT_l1_shadow:
3408 errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
3409 gmfn, a->smfn, 0);
3410 break;
3411 case PGT_l2_shadow:
3412 errors += check_l2_table(v, gmfn, a->smfn,
3413 page_out_of_sync(pfn_to_page(gmfn)));
3414 break;
3415 case PGT_l3_shadow:
3416 case PGT_l4_shadow:
3417 case PGT_hl2_shadow:
3418 BUG(); // XXX - ought to fix this...
3419 break;
3420 case PGT_snapshot:
3421 case PGT_writable_pred:
3422 break;
3423 default:
3424 errors++;
3425 printk("unexpected shadow type %lx, gpfn=%lx, "
3426 "gmfn=%lx smfn=%lx\n",
3427 a->gpfn_and_flags & PGT_type_mask,
3428 a->gpfn_and_flags & PGT_mfn_mask,
3429 gmfn, a->smfn);
3430 BUG();
3432 a = a->next;
3436 shadow_status_noswap = 0;
3438 if ( errors )
3439 BUG();
3441 return errors;
3444 #endif // SHADOW_DEBUG
3446 /*
3447 * Local variables:
3448 * mode: C
3449 * c-set-style: "BSD"
3450 * c-basic-offset: 4
3451 * tab-width: 4
3452 * indent-tabs-mode: nil
3453 * End:
3454 */