ia64/xen-unstable

view xen/arch/x86/shadow.c @ 3858:5b63436f25fe

bitkeeper revision 1.1205.1.2 (421527deX3t0INFwjrOweq0E7Le7pw)

Rename fields in arch_exec_domain to be more uniform.
Promote vmx_shadow_invlpg() to shadow_invlpg().
author maf46@burn.cl.cam.ac.uk
date Thu Feb 17 23:25:18 2005 +0000 (2005-02-17)
parents 0fe3bb5ed3aa
children aca72468d4fe
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
3 #include <xen/config.h>
4 #include <xen/types.h>
5 #include <xen/mm.h>
6 #include <asm/shadow.h>
7 #include <asm/domain_page.h>
8 #include <asm/page.h>
9 #include <xen/event.h>
10 #include <xen/trace.h>
12 /********
14 There's a per-domain shadow table spin lock which works fine for SMP
15 hosts. We don't have to worry about interrupts as no shadow operations
16 happen in an interrupt context. It's probably not quite ready for SMP
17 guest operation as we have to worry about synchonisation between gpte
18 and spte updates. Its possible that this might only happen in a
19 hypercall context, in which case we'll probably at have a per-domain
20 hypercall lock anyhow (at least initially).
22 ********/
24 static inline void free_shadow_page(
25 struct domain *d, struct pfn_info *page)
26 {
27 d->arch.shadow_page_count--;
29 switch ( page->u.inuse.type_info & PGT_type_mask )
30 {
31 case PGT_l1_page_table:
32 perfc_decr(shadow_l1_pages);
33 break;
35 case PGT_l2_page_table:
36 perfc_decr(shadow_l2_pages);
37 break;
39 default:
40 printk("Free shadow weird page type pfn=%08x type=%08x\n",
41 frame_table-page, page->u.inuse.type_info);
42 break;
43 }
45 free_domheap_page(page);
46 }
48 void free_shadow_state(struct domain *d)
49 {
50 int i, free = 0;
51 struct shadow_status *x, *n;
53 /*
54 * WARNING! The shadow page table must not currently be in use!
55 * e.g., You are expected to have paused the domain and synchronized CR3.
56 */
58 shadow_audit(d, 1);
60 if( !d->arch.shadow_ht ) return;
62 /* Free each hash chain in turn. */
63 for ( i = 0; i < shadow_ht_buckets; i++ )
64 {
65 /* Skip empty buckets. */
66 x = &d->arch.shadow_ht[i];
67 if ( x->pfn == 0 )
68 continue;
70 /* Free the head page. */
71 free_shadow_page(
72 d, &frame_table[x->smfn_and_flags & PSH_pfn_mask]);
74 /* Reinitialise the head node. */
75 x->pfn = 0;
76 x->smfn_and_flags = 0;
77 n = x->next;
78 x->next = NULL;
80 free++;
82 /* Iterate over non-head nodes. */
83 for ( x = n; x != NULL; x = n )
84 {
85 /* Free the shadow page. */
86 free_shadow_page(
87 d, &frame_table[x->smfn_and_flags & PSH_pfn_mask]);
89 /* Re-initialise the chain node. */
90 x->pfn = 0;
91 x->smfn_and_flags = 0;
93 /* Add to the free list. */
94 n = x->next;
95 x->next = d->arch.shadow_ht_free;
96 d->arch.shadow_ht_free = x;
98 free++;
99 }
101 shadow_audit(d, 0);
102 }
104 SH_LOG("Free shadow table. Freed=%d.", free);
105 }
107 static inline int clear_shadow_page(
108 struct domain *d, struct shadow_status *x)
109 {
110 unsigned long *p;
111 int restart = 0;
112 struct pfn_info *spage = &frame_table[x->smfn_and_flags & PSH_pfn_mask];
114 switch ( spage->u.inuse.type_info & PGT_type_mask )
115 {
116 /* We clear L2 pages by zeroing the guest entries. */
117 case PGT_l2_page_table:
118 p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
119 if ( shadow_mode_external(d) )
120 memset(p, 0, L2_PAGETABLE_ENTRIES * sizeof(*p));
121 else
122 memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
123 unmap_domain_mem(p);
124 break;
126 /* We clear L1 pages by freeing them: no benefit from zeroing them. */
127 case PGT_l1_page_table:
128 delete_shadow_status(d, x->pfn);
129 free_shadow_page(d, spage);
130 restart = 1; /* We need to go to start of list again. */
131 break;
132 }
134 return restart;
135 }
137 static void clear_shadow_state(struct domain *d)
138 {
139 int i;
140 struct shadow_status *x;
142 shadow_audit(d, 1);
144 for ( i = 0; i < shadow_ht_buckets; i++ )
145 {
146 retry:
147 /* Skip empty buckets. */
148 x = &d->arch.shadow_ht[i];
149 if ( x->pfn == 0 )
150 continue;
152 if ( clear_shadow_page(d, x) )
153 goto retry;
155 for ( x = x->next; x != NULL; x = x->next )
156 if ( clear_shadow_page(d, x) )
157 goto retry;
159 shadow_audit(d, 0);
160 }
162 SH_VLOG("Scan shadow table. l1=%d l2=%d",
163 perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
164 }
167 void shadow_mode_init(void)
168 {
169 }
172 int __shadow_mode_enable(struct domain *d, unsigned int mode)
173 {
174 d->arch.shadow_mode = mode;
176 if (!d->arch.shadow_ht)
177 {
178 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
179 if ( d->arch.shadow_ht == NULL )
180 goto nomem;
182 memset(d->arch.shadow_ht, 0,
183 shadow_ht_buckets * sizeof(struct shadow_status));
184 }
186 if ( shadow_mode_log_dirty(d) && !d->arch.shadow_dirty_bitmap)
187 {
188 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
189 d->arch.shadow_dirty_bitmap =
190 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
191 (8 * sizeof(unsigned long)));
192 if ( d->arch.shadow_dirty_bitmap == NULL )
193 {
194 d->arch.shadow_dirty_bitmap_size = 0;
195 goto nomem;
196 }
197 memset(d->arch.shadow_dirty_bitmap, 0,
198 d->arch.shadow_dirty_bitmap_size/8);
199 }
201 return 0;
203 nomem:
204 if ( d->arch.shadow_ht != NULL )
205 xfree(d->arch.shadow_ht);
206 d->arch.shadow_ht = NULL;
207 return -ENOMEM;
208 }
210 int shadow_mode_enable(struct domain *d, unsigned int mode)
211 {
212 int rc;
213 shadow_lock(d);
214 rc = __shadow_mode_enable(d, mode);
215 shadow_unlock(d);
216 return rc;
217 }
219 void __shadow_mode_disable(struct domain *d)
220 {
221 struct shadow_status *x, *n;
223 free_shadow_state(d);
224 d->arch.shadow_mode = 0;
226 SH_VLOG("freed tables count=%d l1=%d l2=%d",
227 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
228 perfc_value(shadow_l2_pages));
230 n = d->arch.shadow_ht_extras;
231 while ( (x = n) != NULL )
232 {
233 d->arch.shadow_extras_count--;
234 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
235 xfree(x);
236 }
238 d->arch.shadow_ht_extras = NULL;
239 ASSERT(d->arch.shadow_extras_count == 0);
240 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
242 if ( d->arch.shadow_dirty_bitmap != NULL )
243 {
244 xfree(d->arch.shadow_dirty_bitmap);
245 d->arch.shadow_dirty_bitmap = 0;
246 d->arch.shadow_dirty_bitmap_size = 0;
247 }
249 xfree(d->arch.shadow_ht);
250 d->arch.shadow_ht = NULL;
251 }
253 static int shadow_mode_table_op(
254 struct domain *d, dom0_shadow_control_t *sc)
255 {
256 unsigned int op = sc->op;
257 int i, rc = 0;
258 struct exec_domain *ed;
260 ASSERT(spin_is_locked(&d->arch.shadow_lock));
262 SH_VLOG("shadow mode table op %p %p count %d",
263 pagetable_val(d->exec_domain[0]->arch.guest_table), /* XXX SMP */
264 pagetable_val(d->exec_domain[0]->arch.shadow_table), /* XXX SMP */
265 d->arch.shadow_page_count);
267 shadow_audit(d, 1);
269 switch ( op )
270 {
271 case DOM0_SHADOW_CONTROL_OP_FLUSH:
272 free_shadow_state(d);
274 d->arch.shadow_fault_count = 0;
275 d->arch.shadow_dirty_count = 0;
276 d->arch.shadow_dirty_net_count = 0;
277 d->arch.shadow_dirty_block_count = 0;
279 break;
281 case DOM0_SHADOW_CONTROL_OP_CLEAN:
282 clear_shadow_state(d);
284 sc->stats.fault_count = d->arch.shadow_fault_count;
285 sc->stats.dirty_count = d->arch.shadow_dirty_count;
286 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
287 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
289 d->arch.shadow_fault_count = 0;
290 d->arch.shadow_dirty_count = 0;
291 d->arch.shadow_dirty_net_count = 0;
292 d->arch.shadow_dirty_block_count = 0;
294 if ( (d->max_pages > sc->pages) ||
295 (sc->dirty_bitmap == NULL) ||
296 (d->arch.shadow_dirty_bitmap == NULL) )
297 {
298 rc = -EINVAL;
299 break;
300 }
302 sc->pages = d->max_pages;
304 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
305 for ( i = 0; i < d->max_pages; i += chunk )
306 {
307 int bytes = ((((d->max_pages - i) > chunk) ?
308 chunk : (d->max_pages - i)) + 7) / 8;
310 if (copy_to_user(
311 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
312 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
313 bytes))
314 {
315 // copy_to_user can fail when copying to guest app memory.
316 // app should zero buffer after mallocing, and pin it
317 rc = -EINVAL;
318 memset(
319 d->arch.shadow_dirty_bitmap +
320 (i/(8*sizeof(unsigned long))),
321 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
322 break;
323 }
325 memset(
326 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
327 0, bytes);
328 }
330 break;
332 case DOM0_SHADOW_CONTROL_OP_PEEK:
333 sc->stats.fault_count = d->arch.shadow_fault_count;
334 sc->stats.dirty_count = d->arch.shadow_dirty_count;
335 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
336 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
338 if ( (d->max_pages > sc->pages) ||
339 (sc->dirty_bitmap == NULL) ||
340 (d->arch.shadow_dirty_bitmap == NULL) )
341 {
342 rc = -EINVAL;
343 break;
344 }
346 sc->pages = d->max_pages;
347 if (copy_to_user(
348 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
349 {
350 rc = -EINVAL;
351 break;
352 }
354 break;
356 default:
357 rc = -EINVAL;
358 break;
359 }
361 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
362 shadow_audit(d, 1);
364 for_each_exec_domain(d,ed)
365 __update_pagetables(ed);
367 return rc;
368 }
370 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
371 {
372 unsigned int op = sc->op;
373 int rc = 0;
374 struct exec_domain *ed;
376 if ( unlikely(d == current->domain) )
377 {
378 DPRINTK("Don't try to do a shadow op on yourself!\n");
379 return -EINVAL;
380 }
382 domain_pause(d);
383 synchronise_pagetables(~0UL);
385 shadow_lock(d);
387 switch ( op )
388 {
389 case DOM0_SHADOW_CONTROL_OP_OFF:
390 shadow_mode_disable(d);
391 break;
393 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
394 free_shadow_state(d);
395 rc = __shadow_mode_enable(d, SHM_enable);
396 break;
398 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
399 free_shadow_state(d);
400 rc = __shadow_mode_enable(d, d->arch.shadow_mode|SHM_log_dirty);
401 break;
403 default:
404 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
405 break;
406 }
408 shadow_unlock(d);
410 for_each_exec_domain(d,ed)
411 update_pagetables(ed);
413 domain_unpause(d);
415 return rc;
416 }
418 static inline struct pfn_info *alloc_shadow_page(struct domain *d)
419 {
420 struct pfn_info *page = alloc_domheap_page(NULL);
422 d->arch.shadow_page_count++;
424 if ( unlikely(page == NULL) )
425 {
426 printk("Couldn't alloc shadow page! count=%d\n",
427 d->arch.shadow_page_count);
428 SH_VLOG("Shadow tables l1=%d l2=%d",
429 perfc_value(shadow_l1_pages),
430 perfc_value(shadow_l2_pages));
431 BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
432 }
434 return page;
435 }
437 void unshadow_table(unsigned long gpfn, unsigned int type)
438 {
439 unsigned long smfn;
440 struct domain *d = page_get_owner(&frame_table[gpfn]);
442 SH_VLOG("unshadow_table type=%08x gpfn=%p", type, gpfn);
444 perfc_incrc(unshadow_table_count);
446 /*
447 * This function is the same for all p.t. pages. Even for multi-processor
448 * guests there won't be a race here as this CPU was the one that
449 * cmpxchg'ed the page to invalid.
450 */
451 smfn = __shadow_status(d, gpfn) & PSH_pfn_mask;
452 delete_shadow_status(d, gpfn);
453 free_shadow_page(d, &frame_table[smfn]);
454 }
456 #ifdef CONFIG_VMX
457 void vmx_shadow_clear_state(struct domain *d)
458 {
459 SH_VVLOG("vmx_clear_shadow_state:");
460 shadow_lock(d);
461 clear_shadow_state(d);
462 shadow_unlock(d);
463 }
464 #endif
467 unsigned long shadow_l2_table(
468 struct domain *d, unsigned long gpfn)
469 {
470 struct pfn_info *spfn_info;
471 unsigned long spfn;
472 unsigned long guest_gpfn;
474 guest_gpfn = __mfn_to_gpfn(d, gpfn);
476 SH_VVLOG("shadow_l2_table( %p )", gpfn);
478 perfc_incrc(shadow_l2_table_count);
480 if ( (spfn_info = alloc_shadow_page(d)) == NULL )
481 BUG(); /* XXX Deal gracefully with failure. */
483 spfn_info->u.inuse.type_info = PGT_l2_page_table;
484 perfc_incr(shadow_l2_pages);
486 spfn = spfn_info - frame_table;
487 /* Mark pfn as being shadowed; update field to point at shadow. */
488 set_shadow_status(d, guest_gpfn, spfn | PSH_shadowed);
490 #ifdef __i386__
491 /* Install hypervisor and 2x linear p.t. mapings. */
492 if ( shadow_mode_translate(d) )
493 {
494 #ifdef CONFIG_VMX
495 vmx_update_shadow_state(d->exec_domain[0], gpfn, spfn);
496 #else
497 panic("Shadow Full 32 not yet implemented without VMX\n");
498 #endif
499 }
500 else
501 {
502 l2_pgentry_t *spl2e;
503 spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
504 /*
505 * We could proactively fill in PDEs for pages that are already
506 * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
507 * (restriction required for coherence of the accessed bit). However,
508 * we tried it and it didn't help performance. This is simpler.
509 */
510 memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
512 /* Install hypervisor and 2x linear p.t. mapings. */
513 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
514 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
515 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
516 spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
517 mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
518 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
519 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
520 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
521 mk_l2_pgentry(__pa(page_get_owner(
522 &frame_table[gpfn])->arch.mm_perdomain_pt) |
523 __PAGE_HYPERVISOR);
525 unmap_domain_mem(spl2e);
526 }
527 #endif
529 SH_VLOG("shadow_l2_table( %p -> %p)", gpfn, spfn);
530 return spfn;
531 }
533 static void shadow_map_l1_into_current_l2(unsigned long va)
534 {
535 struct exec_domain *ed = current;
536 struct domain *d = ed->domain;
537 unsigned long *gpl1e, *spl1e, gl2e, sl2e, gl1pfn, sl1mfn, sl1ss;
538 struct pfn_info *sl1mfn_info;
539 int i;
541 __guest_get_l2e(ed, va, &gl2e);
543 gl1pfn = gl2e >> PAGE_SHIFT;
545 sl1ss = __shadow_status(d, gl1pfn);
546 if ( !(sl1ss & PSH_shadowed) )
547 {
548 /* This L1 is NOT already shadowed so we need to shadow it. */
549 SH_VVLOG("4a: l1 not shadowed ( %p )", sl1ss);
551 sl1mfn_info = alloc_shadow_page(d);
552 sl1mfn_info->u.inuse.type_info = PGT_l1_page_table;
554 sl1mfn = sl1mfn_info - frame_table;
556 perfc_incrc(shadow_l1_table_count);
557 perfc_incr(shadow_l1_pages);
559 set_shadow_status(d, gl1pfn, PSH_shadowed | sl1mfn);
561 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
563 __guest_set_l2e(ed, va, gl2e);
564 __shadow_set_l2e(ed, va, sl2e);
566 gpl1e = (unsigned long *) &(linear_pg_table[
567 (va>>L1_PAGETABLE_SHIFT) & ~(L1_PAGETABLE_ENTRIES-1)]);
569 spl1e = (unsigned long *) &(shadow_linear_pg_table[
570 (va>>L1_PAGETABLE_SHIFT) & ~(L1_PAGETABLE_ENTRIES-1)]);
572 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
573 l1pte_propagate_from_guest(d, &gpl1e[i], &spl1e[i]);
574 }
575 else
576 {
577 /* This L1 is shadowed already, but the L2 entry is missing. */
578 SH_VVLOG("4b: was shadowed, l2 missing ( %p )", sl1ss);
580 sl1mfn = sl1ss & PSH_pfn_mask;
581 l2pde_general(d, &gl2e, &sl2e, sl1mfn);
582 __guest_set_l2e(ed, va, gl2e);
583 __shadow_set_l2e(ed, va, sl2e);
584 }
585 }
587 void shadow_invlpg(struct exec_domain *ed, unsigned long va)
588 {
589 unsigned long gpte, spte;
591 ASSERT(shadow_mode_enabled(ed->domain));
593 if (__put_user(0L, (unsigned long *)
594 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
595 vmx_shadow_clear_state(ed->domain);
596 return;
597 }
599 if (__get_user(gpte, (unsigned long *)
600 &linear_pg_table[va >> PAGE_SHIFT])) {
601 return;
602 }
604 l1pte_propagate_from_guest(ed->domain, &gpte, &spte);
606 if (__put_user(spte, (unsigned long *)
607 &shadow_linear_pg_table[va >> PAGE_SHIFT])) {
608 return;
609 }
610 }
612 int shadow_fault(unsigned long va, long error_code)
613 {
614 unsigned long gpte, spte = 0;
615 struct exec_domain *ed = current;
616 struct domain *d = ed->domain;
618 SH_VVLOG("shadow_fault( va=%p, code=%lu )", va, error_code );
620 check_pagetable(d, ed->arch.guest_table, "pre-sf");
622 /*
623 * STEP 1. A fast-reject set of checks with no locking.
624 */
626 if ( unlikely(__get_user(gpte, (unsigned long *)
627 &linear_pg_table[va >> PAGE_SHIFT])) )
628 {
629 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
630 return 0;
631 }
633 if ( !(gpte & _PAGE_PRESENT) )
634 {
635 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
636 return 0;
637 }
639 if ( (error_code & 2) && !(gpte & _PAGE_RW) )
640 {
641 /* Write fault on a read-only mapping. */
642 return 0;
643 }
645 /*
646 * STEP 2. Take the shadow lock and re-check the guest PTE.
647 */
649 shadow_lock(d);
651 if ( unlikely(__get_user(gpte, (unsigned long *)
652 &linear_pg_table[va >> PAGE_SHIFT])) )
653 {
654 SH_VVLOG("shadow_fault - EXIT: read gpte faulted2" );
655 shadow_unlock(d);
656 return 0;
657 }
659 if ( unlikely(!(gpte & _PAGE_PRESENT)) )
660 {
661 SH_VVLOG("shadow_fault - EXIT: gpte not present2 (%lx)",gpte );
662 shadow_unlock(d);
663 return 0;
664 }
666 /* Write fault? */
667 if ( error_code & 2 )
668 {
669 if ( unlikely(!(gpte & _PAGE_RW)) )
670 {
671 /* Write fault on a read-only mapping. */
672 SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
673 shadow_unlock(d);
674 return 0;
675 }
677 l1pte_write_fault(d, &gpte, &spte);
678 }
679 else
680 {
681 l1pte_read_fault(d, &gpte, &spte);
682 }
684 /*
685 * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
686 */
688 /* XXX Watch out for read-only L2 entries! (not used in Linux). */
689 if ( unlikely(__put_user(gpte, (unsigned long *)
690 &linear_pg_table[va >> PAGE_SHIFT])) )
691 domain_crash();
693 /*
694 * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
695 * or because the shadow isn't linked into this shadow L2 p.t.
696 */
697 if ( unlikely(__put_user(spte, (unsigned long *)
698 &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
699 {
700 SH_VVLOG("3: not shadowed/mapped gpte=%p spte=%p", gpte, spte);
701 shadow_map_l1_into_current_l2(va);
702 shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
703 }
705 perfc_incrc(shadow_fixup_count);
706 d->arch.shadow_fault_count++;
708 shadow_unlock(d);
710 check_pagetable(d, ed->arch.guest_table, "post-sf");
711 return EXCRET_fault_fixed;
712 }
715 void shadow_l1_normal_pt_update(
716 unsigned long pa, unsigned long gpte,
717 unsigned long *prev_smfn_ptr,
718 l1_pgentry_t **prev_spl1e_ptr)
719 {
720 unsigned long smfn, spte, prev_smfn = *prev_smfn_ptr;
721 l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
723 /* N.B. To get here, we know the l1 page *must* be shadowed. */
724 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%p, "
725 "prev_smfn=%p, prev_spl1e=%p",
726 pa, gpte, prev_smfn, prev_spl1e);
728 smfn = __shadow_status(current->domain, pa >> PAGE_SHIFT) & PSH_pfn_mask;
730 if ( smfn == prev_smfn )
731 {
732 spl1e = prev_spl1e;
733 }
734 else
735 {
736 if ( prev_spl1e != NULL )
737 unmap_domain_mem( prev_spl1e );
738 spl1e = (l1_pgentry_t *)map_domain_mem(smfn << PAGE_SHIFT);
739 *prev_smfn_ptr = smfn;
740 *prev_spl1e_ptr = spl1e;
741 }
743 l1pte_propagate_from_guest(current->domain, &gpte, &spte);
744 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
745 }
747 void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpde)
748 {
749 unsigned long sl2mfn, spde = 0;
750 l2_pgentry_t *spl2e;
751 unsigned long sl1mfn;
753 /* N.B. To get here, we know the l2 page *must* be shadowed. */
754 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%p",pa,gpde);
756 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT) & PSH_pfn_mask;
758 /*
759 * Only propagate to shadow if _PAGE_ACCESSED is set in the guest.
760 * Otherwise, to ensure coherency, we blow away the existing shadow value.
761 */
762 if ( gpde & _PAGE_ACCESSED )
763 {
764 sl1mfn = (gpde & _PAGE_PRESENT) ?
765 __shadow_status(current->domain, gpde >> PAGE_SHIFT) : 0;
766 l2pde_general(current->domain, &gpde, &spde, sl1mfn);
767 }
769 spl2e = (l2_pgentry_t *)map_domain_mem(sl2mfn << PAGE_SHIFT);
770 spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spde);
771 unmap_domain_mem(spl2e);
772 }
777 /************************************************************************/
778 /************************************************************************/
779 /************************************************************************/
781 #if SHADOW_DEBUG
783 // BUG: these are not SMP safe...
784 static int sh_l2_present;
785 static int sh_l1_present;
786 char * sh_check_name;
787 int shadow_status_noswap;
789 #define v2m(adr) ({ \
790 unsigned long _a = (unsigned long)(adr); \
791 unsigned long _pte = l1_pgentry_val( \
792 shadow_linear_pg_table[_a >> PAGE_SHIFT]); \
793 unsigned long _pa = _pte & PAGE_MASK; \
794 _pa | (_a & ~PAGE_MASK); \
795 })
797 #define FAIL(_f, _a...) \
798 do { \
799 printk("XXX %s-FAIL (%d,%d)" _f "\n" \
800 "g=%08lx s=%08lx &g=%08lx &s=%08lx" \
801 " v2m(&g)=%08lx v2m(&s)=%08lx ea=%08lx\n", \
802 sh_check_name, level, l1_idx, ## _a , \
803 gpte, spte, pgpte, pspte, \
804 v2m(pgpte), v2m(pspte), \
805 (l2_idx << L2_PAGETABLE_SHIFT) | \
806 (l1_idx << L1_PAGETABLE_SHIFT)); \
807 errors++; \
808 } while ( 0 )
810 static int check_pte(
811 struct domain *d, unsigned long *pgpte, unsigned long *pspte,
812 int level, int l2_idx, int l1_idx)
813 {
814 unsigned gpte = *pgpte;
815 unsigned spte = *pspte;
816 unsigned long mask, gpfn, smfn;
817 int errors = 0;
819 if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
820 return errors; /* always safe */
822 if ( !(spte & _PAGE_PRESENT) )
823 FAIL("Non zero not present spte");
825 if ( level == 2 ) sh_l2_present++;
826 if ( level == 1 ) sh_l1_present++;
828 if ( !(gpte & _PAGE_PRESENT) )
829 FAIL("Guest not present yet shadow is");
831 mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|PAGE_MASK);
833 if ( (spte & mask) != (gpte & mask) )
834 FAIL("Corrupt?");
836 if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
837 FAIL("Dirty coherence");
839 if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
840 FAIL("Accessed coherence");
842 if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
843 FAIL("RW coherence");
845 if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
846 FAIL("RW2 coherence");
848 smfn = spte >> PAGE_SHIFT;
849 gpfn = gpte >> PAGE_SHIFT;
851 if ( gpfn == smfn )
852 {
853 if ( level > 1 )
854 FAIL("Linear map ???"); /* XXX this will fail on BSD */
855 }
856 else
857 {
858 if ( level < 2 )
859 FAIL("Shadow in L1 entry?");
861 if ( __shadow_status(d, gpfn) != (PSH_shadowed | smfn) )
862 FAIL("smfn problem g.sf=%p",
863 __shadow_status(d, gpfn) );
864 }
866 return errors;
867 }
870 static int check_l1_table(
871 struct domain *d,
872 unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
873 {
874 int i;
875 unsigned long *gpl1e, *spl1e;
876 int cpu = current->processor;
877 int errors = 0;
879 // First check to see if this guest page is currently the active
880 // PTWR page. If so, then we compare the (old) cached copy of the
881 // guest page to the shadow, and not the currently writable (and
882 // thus potentially out-of-sync) guest page.
883 //
884 if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
885 {
886 for ( i = 0; i < ARRAY_SIZE(ptwr_info->ptinfo); i++)
887 {
888 if ( ptwr_info[cpu].ptinfo[i].l1va &&
889 ((v2m(ptwr_info[cpu].ptinfo[i].pl1e) >> PAGE_SHIFT) == gmfn) )
890 {
891 unsigned long old = gmfn;
892 gmfn = (v2m(ptwr_info[cpu].ptinfo[i].page) >> PAGE_SHIFT);
893 printk("hit1 ptwr_info[%d].ptinfo[%d].l1va, mfn=0x%08x, snapshot=0x%08x\n",
894 cpu, i, old, gmfn);
895 }
896 }
897 }
899 gpl1e = map_domain_mem(gmfn << PAGE_SHIFT);
900 spl1e = map_domain_mem(smfn << PAGE_SHIFT);
902 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
903 errors += check_pte(d, &gpl1e[i], &spl1e[i], 1, l2_idx, i);
905 unmap_domain_mem(spl1e);
906 unmap_domain_mem(gpl1e);
908 return errors;
909 }
911 #define FAILPT(_f, _a...) \
912 do { \
913 printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
914 errors++; \
915 } while ( 0 )
917 int check_l2_table(
918 struct domain *d, unsigned long gpfn, unsigned long smfn)
919 {
920 unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
921 l2_pgentry_t *gpl2e = (l2_pgentry_t *) map_domain_mem( gmfn << PAGE_SHIFT );
922 l2_pgentry_t *spl2e = (l2_pgentry_t *) map_domain_mem( smfn << PAGE_SHIFT );
923 int i;
924 int errors = 0;
926 if ( page_get_owner(pfn_to_page(gmfn)) != d )
927 FAILPT("domain doesn't own page");
928 if ( page_get_owner(pfn_to_page(smfn)) != NULL )
929 FAILPT("shadow page mfn=0x%08x is owned by someone, domid=%d",
930 smfn, page_get_owner(pfn_to_page(smfn))->id);
932 if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
933 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
934 ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
935 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
936 {
937 for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
938 i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
939 i++ )
940 printk("+++ (%d) %p %p\n",i,
941 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
942 FAILPT("hypervisor entries inconsistent");
943 }
945 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
946 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
947 FAILPT("hypervisor linear map inconsistent");
949 if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
950 L2_PAGETABLE_SHIFT]) !=
951 ((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
952 FAILPT("hypervisor shadow linear map inconsistent %p %p",
953 l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
954 L2_PAGETABLE_SHIFT]),
955 (smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
957 if ( !shadow_mode_translate(d) ) {
958 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
959 ((v2m(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt) |
960 __PAGE_HYPERVISOR))) )
961 FAILPT("hypervisor per-domain map inconsistent");
962 }
964 /* Check the whole L2. */
965 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
966 errors += check_pte(d, &l2_pgentry_val(gpl2e[i]), &l2_pgentry_val(spl2e[i]), 2, i, 0);
968 unmap_domain_mem(spl2e);
969 unmap_domain_mem(gpl2e);
971 return errors;
972 }
974 int _check_pagetable(struct domain *d, pagetable_t pt, char *s)
975 {
976 unsigned long gptbase = pagetable_val(pt);
977 unsigned long ptbase_pfn, smfn, ss;
978 unsigned long i;
979 l2_pgentry_t *gpl2e, *spl2e;
980 unsigned long ptbase_mfn = 0;
981 int errors = 0;
983 sh_check_name = s;
984 SH_VVLOG("%s-PT Audit", s);
985 sh_l2_present = sh_l1_present = 0;
986 perfc_incrc(check_pagetable);
988 ptbase_pfn = gptbase >> PAGE_SHIFT;
989 ptbase_mfn = __gpfn_to_mfn(d, ptbase_pfn);
991 ss = __shadow_status(d, ptbase_pfn);
993 if ( ! (ss & PSH_shadowed) )
994 {
995 printk("%s-PT %p not shadowed\n", s, gptbase);
996 errors++;
998 if ( ss != 0 )
999 BUG();
1000 return errors;
1003 smfn = ss & PSH_pfn_mask;
1005 if ( ss != (PSH_shadowed | smfn) )
1006 FAILPT("ptbase shadow inconsistent1");
1008 errors += check_l2_table(d, ptbase_pfn, smfn);
1010 gpl2e = (l2_pgentry_t *) map_domain_mem( ptbase_mfn << PAGE_SHIFT );
1011 spl2e = (l2_pgentry_t *) map_domain_mem( smfn << PAGE_SHIFT );
1013 /* Go back and recurse. */
1014 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
1016 unsigned long gl1pfn = l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT;
1017 unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
1018 unsigned long sl1mfn = l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT;
1020 if ( l2_pgentry_val(spl2e[i]) != 0 )
1022 errors += check_l1_table(d, gl1mfn, sl1mfn, i);
1026 unmap_domain_mem(spl2e);
1027 unmap_domain_mem(gpl2e);
1029 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
1030 sh_l2_present, sh_l1_present);
1032 #if 1
1033 if ( errors )
1034 BUG();
1035 #endif
1037 return errors;
1040 int _check_all_pagetables(struct domain *d, char *s)
1042 int i, j;
1043 struct shadow_status *a;
1044 unsigned long gmfn;
1045 int errors = 0;
1046 int cpu;
1048 shadow_status_noswap = 1;
1050 sh_check_name = s;
1051 SH_VVLOG("%s-PT Audit domid=%d", s, d->id);
1052 sh_l2_present = sh_l1_present = 0;
1053 perfc_incrc(check_all_pagetables);
1055 for (i = 0; i < shadow_ht_buckets; i++)
1057 a = &d->arch.shadow_ht[i];
1058 while ( a && a->pfn )
1060 gmfn = __gpfn_to_mfn(d, a->pfn);
1061 switch ( frame_table[a->pfn].u.inuse.type_info & PGT_type_mask )
1063 case PGT_l1_page_table:
1064 errors += check_l1_table(d, gmfn, a->smfn_and_flags & PSH_pfn_mask, 0);
1065 break;
1066 case PGT_l2_page_table:
1067 errors += check_l2_table(d, gmfn, a->smfn_and_flags & PSH_pfn_mask);
1068 break;
1069 default:
1070 errors++;
1071 printk("unexpected page type 0x%08x, pfn=0x%08x, gmfn=0x%08x\n",
1072 frame_table[gmfn].u.inuse.type_info,
1073 a->pfn, gmfn);
1074 BUG();
1076 a = a->next;
1080 shadow_status_noswap = 0;
1082 for (i = 0; i < 1024; i++)
1084 if ( l2_pgentry_val(shadow_linear_l2_table[i]) & _PAGE_PRESENT )
1086 unsigned base = i << 10;
1087 for (j = 0; j < 1024; j++)
1089 if ( (l1_pgentry_val(shadow_linear_pg_table[base + j]) & PAGE_MASK) == 0x0143d000 )
1091 printk("sh_ln_pg_tb[0x%08x] => 0x%08lx ",
1092 base + j,
1093 l1_pgentry_val(shadow_linear_pg_table[base + j]));
1094 if ( l1_pgentry_val(shadow_linear_pg_table[base + j]) & _PAGE_PRESENT )
1095 printk(" first entry => 0x%08lx\n",
1096 *(unsigned long *)((base + j) << PAGE_SHIFT));
1097 else
1098 printk(" page not present\n");
1104 if ( errors )
1106 printk("VM_ASSIST(d, VMASST_TYPE_writable_pagetables) => %d\n",
1107 VM_ASSIST(d, VMASST_TYPE_writable_pagetables));
1108 for ( cpu = 0; cpu < smp_num_cpus; cpu++ )
1110 for ( j = 0; j < ARRAY_SIZE(ptwr_info->ptinfo); j++)
1112 printk("ptwr_info[%d].ptinfo[%d].l1va => 0x%08x\n",
1113 cpu, j, ptwr_info[cpu].ptinfo[j].l1va);
1114 printk("ptwr_info[%d].ptinfo[%d].pl1e => 0x%08x\n",
1115 cpu, j, ptwr_info[cpu].ptinfo[j].pl1e);
1116 if (cpu == smp_processor_id())
1117 printk("v2m(ptwr_info[%d].ptinfo[%d].pl1e) => 0x%08x\n",
1118 cpu, j, v2m(ptwr_info[cpu].ptinfo[j].pl1e));
1119 printk("ptwr_info[%d].ptinfo[%d].page => 0x%08x\n",
1120 cpu, j, ptwr_info[cpu].ptinfo[j].page);
1121 if (cpu == smp_processor_id())
1122 printk("v2m(ptwr_info[%d].ptinfo[%d].page) => 0x%08x\n",
1123 cpu, j, v2m(ptwr_info[cpu].ptinfo[j].page));
1126 BUG();
1129 return errors;
1132 #endif // SHADOW_DEBUG