ia64/xen-unstable

view xen/arch/x86/shadow.c @ 2422:2274a0386cc9

bitkeeper revision 1.1159.69.5 (4138e882jA1YaR_OfTfNHe_uT4PDIg)

trivial
author iap10@labyrinth.cl.cam.ac.uk
date Fri Sep 03 21:56:18 2004 +0000 (2004-09-03)
parents 11be1dfb262b
children aed97013f9fe
line source
1 /* -*- Mode:C++; c-file-style:BSD; c-basic-offset:4; tab-width:4 -*- */
3 #include <xen/config.h>
4 #include <xen/types.h>
5 #include <xen/mm.h>
6 #include <asm/shadow.h>
7 #include <asm/domain_page.h>
8 #include <asm/page.h>
9 #include <xen/event.h>
10 #include <xen/trace.h>
13 /********
15 To use these shadow page tables, guests must not rely on the ACCESSED
16 and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
18 I doubt this will break anything. (If guests want to use the va_update
19 mechanism they've signed up for this anyhow...)
21 There's a per-domain shadow table spin lock which works fine for SMP
22 hosts. We don't have to worry about interrupts as no shadow operations
23 happen in an interrupt context. It's probably not quite ready for SMP
24 guest operation as we have to worry about synchonisation between gpte
25 and spte updates. Its possible that this might only happen in a
26 hypercall context, in which case we'll probably at have a per-domain
27 hypercall lock anyhow (at least initially).
29 ********/
32 static inline void free_shadow_page(struct mm_struct *m,
33 struct pfn_info *page)
34 {
35 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
37 m->shadow_page_count--;
39 if (type == PGT_l1_page_table)
40 perfc_decr(shadow_l1_pages);
41 else if (type == PGT_l2_page_table)
42 perfc_decr(shadow_l2_pages);
43 else printk("Free shadow weird page type pfn=%08x type=%08x\n",
44 frame_table-page, page->u.inuse.type_info);
46 free_domheap_page(page);
47 }
49 static void __free_shadow_table( struct mm_struct *m )
50 {
51 int j, free=0;
52 struct shadow_status *a,*next;
54 // the code assumes you're not using the page tables i.e.
55 // the domain is stopped and cr3 is something else!!
57 // walk the hash table and call free_shadow_page on all pages
59 shadow_audit(m,1);
61 for(j=0;j<shadow_ht_buckets;j++)
62 {
63 a = &m->shadow_ht[j];
64 if (a->pfn)
65 {
66 free_shadow_page( m,
67 &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
68 a->pfn = 0;
69 a->spfn_and_flags = 0;
70 free++;
71 }
72 next=a->next;
73 a->next=NULL;
74 a=next;
75 while(a)
76 {
77 struct shadow_status *next = a->next;
79 free_shadow_page( m,
80 &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
81 a->pfn = 0;
82 a->spfn_and_flags = 0;
83 free++;
84 a->next = m->shadow_ht_free;
85 m->shadow_ht_free = a;
86 a=next;
87 }
88 shadow_audit(m,0);
89 }
90 SH_LOG("Free shadow table. Freed= %d",free);
91 }
94 #define TABLE_OP_ZERO_L2 1
95 #define TABLE_OP_ZERO_L1 2
96 #define TABLE_OP_FREE_L1 3
98 static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
99 unsigned int gpfn,
100 struct pfn_info *spfn_info, int *work )
101 {
102 unsigned int spfn = spfn_info-frame_table;
103 int restart = 0;
105 switch( op )
106 {
107 case TABLE_OP_ZERO_L2:
108 {
109 if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
110 PGT_l2_page_table )
111 {
112 unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
113 #ifdef __i386__
114 memset(spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e));
115 #endif
116 unmap_domain_mem( spl1e );
117 }
118 }
119 break;
121 case TABLE_OP_ZERO_L1:
122 {
123 if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
124 PGT_l1_page_table )
125 {
126 unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
127 memset( spl1e, 0, ENTRIES_PER_L1_PAGETABLE * sizeof(*spl1e) );
128 unmap_domain_mem( spl1e );
129 }
130 }
131 break;
133 case TABLE_OP_FREE_L1:
134 {
135 if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
136 PGT_l1_page_table )
137 {
138 // lock is already held
139 delete_shadow_status( m, gpfn );
140 restart = 1; // we need to go to start of list again
141 }
142 }
144 break;
146 default:
147 BUG();
149 }
150 return restart;
151 }
153 static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
154 {
155 int j, work=0;
156 struct shadow_status *a, *next;
158 // the code assumes you're not using the page tables i.e.
159 // the domain is stopped and cr3 is something else!!
161 // walk the hash table and call free_shadow_page on all pages
163 shadow_audit(m,1);
165 for(j=0;j<shadow_ht_buckets;j++)
166 {
167 retry:
168 a = &m->shadow_ht[j];
169 next = a->next;
170 if (a->pfn)
171 {
172 if ( shadow_page_op( m, op, a->pfn,
173 &frame_table[a->spfn_and_flags & PSH_pfn_mask],
174 &work ) )
175 goto retry;
176 }
177 a=next;
178 while(a)
179 {
180 next = a->next;
181 if ( shadow_page_op( m, op, a->pfn,
182 &frame_table[a->spfn_and_flags & PSH_pfn_mask],
183 &work ) )
184 goto retry;
185 a=next;
186 }
187 shadow_audit(m,0);
188 }
189 SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
190 }
193 void shadow_mode_init(void)
194 {
195 }
197 int shadow_mode_enable( struct domain *p, unsigned int mode )
198 {
199 struct mm_struct *m = &p->mm;
200 struct shadow_status **fptr;
201 int i;
203 m->shadow_mode = mode;
205 // allocate hashtable
206 m->shadow_ht = xmalloc(shadow_ht_buckets *
207 sizeof(struct shadow_status));
208 if( m->shadow_ht == NULL )
209 goto nomem;
211 memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
213 // allocate space for first lot of extra nodes
214 m->shadow_ht_extras = xmalloc(sizeof(void*) +
215 (shadow_ht_extra_size *
216 sizeof(struct shadow_status)));
217 if( m->shadow_ht_extras == NULL )
218 goto nomem;
220 memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size *
221 sizeof(struct shadow_status)) );
223 m->shadow_extras_count++;
225 // add extras to free list
226 fptr = &m->shadow_ht_free;
227 for ( i=0; i<shadow_ht_extra_size; i++ )
228 {
229 *fptr = &m->shadow_ht_extras[i];
230 fptr = &(m->shadow_ht_extras[i].next);
231 }
232 *fptr = NULL;
233 *((struct shadow_status ** )
234 &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL;
236 if ( mode == SHM_logdirty )
237 {
238 m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
239 m->shadow_dirty_bitmap =
240 xmalloc( m->shadow_dirty_bitmap_size/8);
241 if( m->shadow_dirty_bitmap == NULL )
242 {
243 m->shadow_dirty_bitmap_size = 0;
244 goto nomem;
245 }
246 memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
247 }
249 // call shadow_mk_pagetable
250 __shadow_mk_pagetable( m );
251 return 0;
253 nomem:
254 return -ENOMEM;
255 }
257 void __shadow_mode_disable(struct domain *d)
258 {
259 struct mm_struct *m = &d->mm;
260 struct shadow_status *next;
262 __free_shadow_table(m);
263 m->shadow_mode = 0;
265 SH_VLOG("freed tables count=%d l1=%d l2=%d",
266 m->shadow_page_count, perfc_value(shadow_l1_pages),
267 perfc_value(shadow_l2_pages));
269 next = m->shadow_ht_extras;
270 while ( next )
271 {
272 struct shadow_status * this = next;
273 m->shadow_extras_count--;
274 next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
275 xfree(this);
276 }
278 SH_LOG("freed extras, now %d", m->shadow_extras_count);
280 if ( m->shadow_dirty_bitmap )
281 {
282 xfree( m->shadow_dirty_bitmap );
283 m->shadow_dirty_bitmap = 0;
284 m->shadow_dirty_bitmap_size = 0;
285 }
287 // free the hashtable itself
288 xfree( &m->shadow_ht[0] );
289 }
291 static int shadow_mode_table_op(struct domain *d,
292 dom0_shadow_control_t *sc)
293 {
294 unsigned int op = sc->op;
295 struct mm_struct *m = &d->mm;
296 int rc = 0;
298 // since Dom0 did the hypercall, we should be running with it's page
299 // tables right now. Calling flush on yourself would be really
300 // stupid.
302 ASSERT(spin_is_locked(&d->mm.shadow_lock));
304 if ( m == &current->mm )
305 {
306 printk("Don't try and flush your own page tables!\n");
307 return -EINVAL;
308 }
310 SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
312 shadow_audit(m,1);
314 switch(op)
315 {
316 case DOM0_SHADOW_CONTROL_OP_FLUSH:
317 // XXX THIS IS VERY DANGEROUS : MUST ENSURE THE PTs ARE NOT IN USE ON
318 // OTHER CPU -- fix when we get sched sync pause.
319 __free_shadow_table( m );
320 break;
322 case DOM0_SHADOW_CONTROL_OP_CLEAN: // zero all-non hypervisor
323 {
324 __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
325 __scan_shadow_table( m, TABLE_OP_ZERO_L1 );
327 goto send_bitmap;
328 }
331 case DOM0_SHADOW_CONTROL_OP_CLEAN2: // zero all L2, free L1s
332 {
333 int i,j,zero=1;
335 __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
336 __scan_shadow_table( m, TABLE_OP_FREE_L1 );
338 send_bitmap:
339 sc->stats.fault_count = d->mm.shadow_fault_count;
340 sc->stats.dirty_count = d->mm.shadow_dirty_count;
341 sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
342 sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
344 d->mm.shadow_fault_count = 0;
345 d->mm.shadow_dirty_count = 0;
346 d->mm.shadow_dirty_net_count = 0;
347 d->mm.shadow_dirty_block_count = 0;
349 sc->pages = d->max_pages;
351 if( d->max_pages > sc->pages ||
352 !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
353 {
354 rc = -EINVAL;
355 goto out;
356 }
359 #define chunk (8*1024) // do this in 1KB chunks for L1 cache
361 for(i=0;i<d->max_pages;i+=chunk)
362 {
363 int bytes = (( ((d->max_pages-i) > (chunk))?
364 (chunk):(d->max_pages-i) ) + 7) / 8;
366 copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
367 d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
368 bytes );
370 for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
371 {
372 if( d->mm.shadow_dirty_bitmap[j] != 0 )
373 zero = 0;
374 }
376 memset( d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
377 0, bytes);
378 }
380 /* Might as well stop the domain as an optimization. */
381 if ( zero )
382 domain_pause_by_systemcontroller(d);
384 break;
385 }
387 case DOM0_SHADOW_CONTROL_OP_PEEK:
388 {
389 int i;
391 sc->stats.fault_count = d->mm.shadow_fault_count;
392 sc->stats.dirty_count = d->mm.shadow_dirty_count;
393 sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
394 sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
396 if( d->max_pages > sc->pages ||
397 !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
398 {
399 rc = -EINVAL;
400 goto out;
401 }
403 sc->pages = d->max_pages;
405 #define chunk (8*1024) // do this in 1KB chunks for L1 cache
407 for(i=0;i<d->max_pages;i+=chunk)
408 {
409 int bytes = (( ((d->max_pages-i) > (chunk))?
410 (chunk):(d->max_pages-i) ) + 7) / 8;
412 copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
413 d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
414 bytes );
415 }
417 break;
418 }
420 default:
421 BUG();
423 }
426 out:
428 SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
430 shadow_audit(m,1);
432 // call shadow_mk_pagetable
433 __shadow_mk_pagetable( m );
435 return rc;
436 }
438 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
439 {
440 unsigned int cmd = sc->op;
441 int rc = 0;
443 domain_pause(d);
444 synchronise_pagetables(~0UL);
446 shadow_lock(&d->mm);
448 if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF )
449 {
450 shadow_mode_disable(d);
451 }
452 else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
453 {
454 shadow_mode_disable(d);
455 shadow_mode_enable(d, SHM_test);
456 }
457 else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY )
458 {
459 shadow_mode_disable(d);
460 shadow_mode_enable(d, SHM_logdirty);
461 }
462 else if ( shadow_mode(d) &&
463 (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) &&
464 (cmd <= DOM0_SHADOW_CONTROL_OP_CLEAN2) )
465 {
466 rc = shadow_mode_table_op(d, sc);
467 }
468 else
469 {
470 rc = -EINVAL;
471 }
473 shadow_unlock(&d->mm);
475 domain_unpause(d);
477 return rc;
478 }
480 static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
481 {
482 m->shadow_page_count++;
483 return alloc_domheap_page(NULL);
484 }
486 void unshadow_table( unsigned long gpfn, unsigned int type )
487 {
488 unsigned long spfn;
489 struct domain *d = frame_table[gpfn].u.inuse.domain;
491 SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
492 type,
493 gpfn );
495 perfc_incrc(unshadow_table_count);
497 // this function is the same for both l1 and l2 tables
499 // even in the SMP guest case, there won't be a race here as
500 // this CPU was the one that cmpxchg'ed the page to invalid
502 spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
504 delete_shadow_status(&d->mm, gpfn);
506 free_shadow_page(&d->mm, &frame_table[spfn] );
508 }
511 unsigned long shadow_l2_table(
512 struct mm_struct *m, unsigned long gpfn )
513 {
514 struct pfn_info *spfn_info;
515 unsigned long spfn;
516 l2_pgentry_t *spl2e, *gpl2e;
517 int i;
519 SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
521 perfc_incrc(shadow_l2_table_count);
523 // XXX in future, worry about racing in SMP guests
524 // -- use cmpxchg with PSH_pending flag to show progress (and spin)
526 spfn_info = alloc_shadow_page(m);
528 ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
530 spfn_info->u.inuse.type_info = PGT_l2_page_table;
531 perfc_incr(shadow_l2_pages);
533 spfn = (unsigned long) (spfn_info - frame_table);
535 // mark pfn as being shadowed, update field to point at shadow
536 set_shadow_status(m, gpfn, spfn | PSH_shadowed);
538 // we need to do this before the linear map is set up
539 spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
541 #ifdef __i386__
542 // get hypervisor and 2x linear PT mapings installed
543 memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
544 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
545 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
546 spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
547 mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
548 spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
549 mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
550 spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
551 mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
552 __PAGE_HYPERVISOR);
553 #endif
555 // can't use the linear map as we may not be in the right PT
556 gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
558 // proactively create entries for pages that are already shadowed
559 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
560 {
561 unsigned long spte = 0;
563 #if 0 // Turns out this doesn't really help
564 unsigned long gpte;
566 gpte = l2_pgentry_val(gpl2e[i]);
568 if (gpte & _PAGE_PRESENT)
569 {
570 unsigned long s_sh =
571 __shadow_status(p, gpte>>PAGE_SHIFT);
573 l2pde_general( m, &gpte, &spte, s_sh );
575 }
576 #endif
578 spl2e[i] = mk_l2_pgentry( spte );
580 }
582 // its arguable we should 'preemptively shadow' a few active L1 pages
583 // to avoid taking a string of faults when 'jacking' a running domain
585 unmap_domain_mem( gpl2e );
586 unmap_domain_mem( spl2e );
588 SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
590 return spfn;
591 }
594 int shadow_fault( unsigned long va, long error_code )
595 {
596 unsigned long gpte, spte;
597 struct mm_struct *m = &current->mm;
599 SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
601 check_pagetable( current, current->mm.pagetable, "pre-sf" );
603 if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
604 {
605 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
606 return 0; // propagate to guest
607 }
609 if ( ! (gpte & _PAGE_PRESENT) )
610 {
611 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
612 return 0; // we're not going to be able to help
613 }
615 if ( (error_code & 2) && ! (gpte & _PAGE_RW) )
616 {
617 // write fault on RO page
618 return 0;
619 }
621 // take the lock and reread gpte
623 shadow_lock(m);
625 if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
626 {
627 SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
628 shadow_unlock(m);
629 return 0; // propagate to guest
630 }
632 if ( unlikely(!(gpte & _PAGE_PRESENT)) )
633 {
634 SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
635 shadow_unlock(m);
636 return 0; // we're not going to be able to help
637 }
639 if ( error_code & 2 )
640 { // write fault
641 if ( likely(gpte & _PAGE_RW) )
642 {
643 l1pte_write_fault( m, &gpte, &spte );
644 }
645 else
646 { // write fault on RO page
647 SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
648 shadow_unlock(m);
649 return 0; // propagate to guest
650 // not clear whether we should set accessed bit here...
651 }
652 }
653 else
654 {
655 l1pte_read_fault( m, &gpte, &spte );
656 }
658 SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte );
660 // write back updated gpte
661 // XXX watch out for read-only L2 entries! (not used in Linux)
662 if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
663 BUG(); // fixme!
665 if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
666 {
667 // failed:
668 // the L1 may not be shadowed, or the L2 entry may be insufficient
670 unsigned long gpde, spde, gl1pfn, sl1pfn;
672 SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
674 gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
676 gl1pfn = gpde>>PAGE_SHIFT;
679 if ( ! (sl1pfn=__shadow_status(&current->mm, gl1pfn) ) )
680 {
681 // this L1 is NOT already shadowed so we need to shadow it
682 struct pfn_info *sl1pfn_info;
683 unsigned long *gpl1e, *spl1e;
684 int i;
685 sl1pfn_info = alloc_shadow_page( &current->mm );
686 sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
688 sl1pfn = sl1pfn_info - frame_table;
690 SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
691 perfc_incrc(shadow_l1_table_count);
692 perfc_incr(shadow_l1_pages);
694 set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
696 l2pde_general( m, &gpde, &spde, sl1pfn );
698 linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
699 shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
701 gpl1e = (unsigned long *) &(linear_pg_table[
702 (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
704 spl1e = (unsigned long *) &shadow_linear_pg_table[
705 (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
708 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
709 {
710 l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
711 }
714 }
715 else
716 {
717 // this L1 was shadowed (by another PT) but we didn't have an L2
718 // entry for it
720 SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
722 l2pde_general( m, &gpde, &spde, sl1pfn );
724 linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
725 shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
727 }
729 shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
730 // (we need to do the above even if we've just made the shadow L1)
732 } // end of fixup writing the shadow L1 directly failed
734 perfc_incrc(shadow_fixup_count);
736 m->shadow_fault_count++;
738 check_pagetable( current, current->mm.pagetable, "post-sf" );
740 shadow_unlock(m);
742 return 1; // let's try the faulting instruction again...
744 }
747 void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
748 unsigned long *prev_spfn_ptr,
749 l1_pgentry_t **prev_spl1e_ptr )
750 {
751 unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;
752 l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
755 SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
756 pa,gpte,prev_spfn, prev_spl1e);
758 // to get here, we know the l1 page *must* be shadowed
760 gpfn = pa >> PAGE_SHIFT;
761 spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
763 if ( spfn == prev_spfn )
764 {
765 spl1e = prev_spl1e;
766 }
767 else
768 {
769 if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
770 spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
771 *prev_spfn_ptr = spfn;
772 *prev_spl1e_ptr = spl1e;
773 }
775 // XXX we assume only pagetables can be shadowed;
776 // this will have to change to allow arbitrary CoW etc.
778 l1pte_no_fault( &current->mm, &gpte, &spte );
781 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = mk_l1_pgentry( spte );
783 }
785 void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
786 {
787 unsigned long gpfn, spfn, spte;
788 l2_pgentry_t * sp2le;
789 unsigned long s_sh=0;
791 SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
793 // to get here, we know the l2 page has a shadow
795 gpfn = pa >> PAGE_SHIFT;
796 spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
799 spte = 0;
801 if( gpte & _PAGE_PRESENT )
802 s_sh = __shadow_status(&current->mm, gpte >> PAGE_SHIFT);
804 sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
805 // no real need for a cache here
807 l2pde_general( &current->mm, &gpte, &spte, s_sh );
809 // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
811 sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] =
812 mk_l2_pgentry( spte );
814 unmap_domain_mem( (void *) sp2le );
815 }
818 #if SHADOW_DEBUG
820 static int sh_l2_present;
821 static int sh_l1_present;
822 char * sh_check_name;
824 #define FAIL(_f, _a...) \
825 {printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
827 static int check_pte( struct mm_struct *m,
828 unsigned long gpte, unsigned long spte, int level, int i )
829 {
830 unsigned long mask, gpfn, spfn;
832 if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
833 return 1; // always safe
835 if ( !(spte & _PAGE_PRESENT) )
836 FAIL("Non zero not present spte");
838 if( level == 2 ) sh_l2_present++;
839 if( level == 1 ) sh_l1_present++;
841 if ( !(gpte & _PAGE_PRESENT) )
842 FAIL("Guest not present yet shadow is");
844 mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
846 if ( (spte & mask) != (gpte & mask ) )
847 FAIL("Corrupt?");
849 if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
850 FAIL("Dirty coherence");
852 if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
853 FAIL("Accessed coherence");
855 if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
856 FAIL("RW coherence");
858 if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
859 FAIL("RW2 coherence");
861 spfn = spte>>PAGE_SHIFT;
862 gpfn = gpte>>PAGE_SHIFT;
864 if ( gpfn == spfn )
865 {
866 if ( level > 1 )
867 FAIL("Linear map ???"); // XXX this will fail on BSD
869 return 1;
870 }
871 else
872 {
873 if ( level < 2 )
874 FAIL("Shadow in L1 entry?");
876 if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
877 FAIL("spfn problem g.sf=%08lx",
878 __shadow_status(p, gpfn) );
879 }
881 return 1;
882 }
885 static int check_l1_table( struct mm_struct *m, unsigned long va,
886 unsigned long g2, unsigned long s2 )
887 {
888 int j;
889 unsigned long *gpl1e, *spl1e;
891 //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
892 //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
894 gpl1e = map_domain_mem( g2<<PAGE_SHIFT );
895 spl1e = map_domain_mem( s2<<PAGE_SHIFT );
897 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
898 {
899 unsigned long gpte = gpl1e[j];
900 unsigned long spte = spl1e[j];
902 check_pte( p, gpte, spte, 1, j );
903 }
905 unmap_domain_mem( spl1e );
906 unmap_domain_mem( gpl1e );
908 return 1;
909 }
911 #define FAILPT(_f, _a...) \
912 {printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
914 int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
915 {
916 unsigned long gptbase = pagetable_val(pt);
917 unsigned long gpfn, spfn;
918 int i;
919 l2_pgentry_t *gpl2e, *spl2e;
921 sh_check_name = s;
923 SH_VVLOG("%s-PT Audit",s);
925 sh_l2_present = sh_l1_present = 0;
927 gpfn = gptbase >> PAGE_SHIFT;
929 if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
930 {
931 printk("%s-PT %08lx not shadowed\n", s, gptbase);
933 if( __shadow_status(p, gpfn) != 0 ) BUG();
935 return 0;
936 }
938 spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
940 if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
941 FAILPT("ptbase shadow inconsistent1");
943 gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
944 spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
946 //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
949 if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
950 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
951 ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
952 * sizeof(l2_pgentry_t)) )
953 {
954 printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
955 for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE;
956 i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
957 i++ )
958 printk("+++ (%d) %08lx %08lx\n",i,
959 l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
960 FAILPT("hypervisor entries inconsistent");
961 }
963 if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
964 l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
965 FAILPT("hypervisor linear map inconsistent");
967 if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
968 ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
969 FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
970 l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
971 (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
972 );
974 if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
975 ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
976 FAILPT("hypervisor per-domain map inconsistent");
979 // check the whole L2
980 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
981 {
982 unsigned long gpte = l2_pgentry_val(gpl2e[i]);
983 unsigned long spte = l2_pgentry_val(spl2e[i]);
985 check_pte( p, gpte, spte, 2, i );
986 }
989 // go back and recurse
990 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
991 {
992 unsigned long gpte = l2_pgentry_val(gpl2e[i]);
993 unsigned long spte = l2_pgentry_val(spl2e[i]);
995 if ( spte )
996 check_l1_table( p,
997 i<<L2_PAGETABLE_SHIFT,
998 gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
1002 unmap_domain_mem( spl2e );
1003 unmap_domain_mem( gpl2e );
1005 SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
1006 sh_l2_present, sh_l1_present );
1008 return 1;
1012 #endif