direct-io.hg
changeset 1199:c4b1105fbc14
bitkeeper revision 1.794.1.1 (405b85b44Vh_3MMuChrmhJ9H5nxbyw)
basic shadow support
basic shadow support
author | iap10@labyrinth.cl.cam.ac.uk |
---|---|
date | Fri Mar 19 23:43:48 2004 +0000 (2004-03-19) |
parents | 39a4998c88fc |
children | d0ff128be81d |
files | .rootkeys BitKeeper/etc/ignore xen/arch/i386/process.c xen/arch/i386/traps.c xen/common/debug.c xen/common/domain.c xen/common/domain_page.c xen/common/kernel.c xen/common/memory.c xen/common/shadow.c xen/include/asm-i386/config.h xen/include/asm-i386/page.h xen/include/asm-i386/processor.h xen/include/xeno/mm.h xen/include/xeno/perfc_defn.h xen/include/xeno/shadow.h xen/net/dev.c |
line diff
1.1 --- a/.rootkeys Fri Mar 12 18:12:23 2004 +0000 1.2 +++ b/.rootkeys Fri Mar 19 23:43:48 2004 +0000 1.3 @@ -158,6 +158,7 @@ 4051bcecFeq4DE70p4zGO5setf47CA xen/commo 1.4 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c 1.5 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c 1.6 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c 1.7 +405b8599xI_PoEr3zZoJ2on-jdn7iw xen/common/shadow.c 1.8 3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c 1.9 3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c 1.10 3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c 1.11 @@ -552,6 +553,7 @@ 4006e65fWMwLqcocgik6wbF0Eeh0Og xen/inclu 1.12 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h 1.13 3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h 1.14 403a06a7H0hpHcKpAiDe5BPnaXWTlA xen/include/xeno/serial.h 1.15 +405b8599BsDsDwKEJLS0XipaiQW3TA xen/include/xeno/shadow.h 1.16 3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h 1.17 3ddb79c14dXIhP7C2ahnoD08K90G_w xen/include/xeno/slab.h 1.18 3ddb79c09xbS-xxfKxuV3JETIhBzmg xen/include/xeno/smp.h
2.1 --- a/BitKeeper/etc/ignore Fri Mar 12 18:12:23 2004 +0000 2.2 +++ b/BitKeeper/etc/ignore Fri Mar 19 23:43:48 2004 +0000 2.3 @@ -548,3 +548,13 @@ tools/xentrace/xentrace 2.4 tools/xc/lib/xc_evtchn.o 2.5 tools/xc/py/XenoUtil.pyc 2.6 tools/xend/xend 2.7 +tools/xc/lib/libxc.so.1.3 2.8 +tools/xc/lib/libxc.so.1.3.0 2.9 +tools/xc/lib/xc_physdev.o 2.10 +tools/xend/xend_utils.o 2.11 +xen/common/physdev.o 2.12 +xen/common/shadow.o 2.13 +xen/common/trace.o 2.14 +xen/drivers/char/console.o 2.15 +xen/drivers/char/keyboard.o 2.16 +xen/include/hypervisor-ifs/arch
3.1 --- a/xen/arch/i386/process.c Fri Mar 12 18:12:23 2004 +0000 3.2 +++ b/xen/arch/i386/process.c Fri Mar 19 23:43:48 2004 +0000 3.3 @@ -32,6 +32,7 @@ 3.4 3.5 #include <xeno/irq.h> 3.6 #include <xeno/event.h> 3.7 +#include <xeno/shadow.h> 3.8 3.9 int hlt_counter; 3.10 3.11 @@ -281,7 +282,24 @@ void switch_to(struct task_struct *prev_ 3.12 } 3.13 3.14 /* Switch page tables. */ 3.15 - write_cr3_counted(pagetable_val(next_p->mm.pagetable)); 3.16 +#ifdef CONFIG_SHADOW 3.17 + 3.18 + /* printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable, 3.19 + next_p->mm.shadowtable);*/ 3.20 + 3.21 + 3.22 + if( next_p->mm.shadowmode ) 3.23 + { 3.24 + write_cr3_counted(pagetable_val(next_p->mm.shadowtable)); 3.25 + check_pagetable( next_p->mm.pagetable, "switch" ); 3.26 + } 3.27 + else 3.28 +#endif 3.29 + write_cr3_counted(pagetable_val(next_p->mm.pagetable)); 3.30 + 3.31 + 3.32 + 3.33 + 3.34 3.35 set_current(next_p); 3.36
4.1 --- a/xen/arch/i386/traps.c Fri Mar 12 18:12:23 2004 +0000 4.2 +++ b/xen/arch/i386/traps.c Fri Mar 19 23:43:48 2004 +0000 4.3 @@ -39,6 +39,7 @@ 4.4 #include <xeno/spinlock.h> 4.5 #include <xeno/irq.h> 4.6 #include <xeno/perfc.h> 4.7 +#include <xeno/shadow.h> 4.8 #include <asm/domain_page.h> 4.9 #include <asm/system.h> 4.10 #include <asm/io.h> 4.11 @@ -323,6 +324,8 @@ asmlinkage void do_page_fault(struct pt_ 4.12 4.13 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); 4.14 4.15 + perfc_incrc(page_faults); 4.16 + 4.17 if ( unlikely(addr >= LDT_VIRT_START) && 4.18 (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) ) 4.19 { 4.20 @@ -336,6 +339,18 @@ asmlinkage void do_page_fault(struct pt_ 4.21 return; /* successfully copied the mapping */ 4.22 } 4.23 4.24 +#ifdef CONFIG_SHADOW 4.25 +//printk("1"); 4.26 +check_pagetable( current->mm.pagetable, "pre-sf" ); 4.27 + if ( p->mm.shadowmode && addr < PAGE_OFFSET && 4.28 + shadow_fault( addr, error_code ) ) 4.29 + { 4.30 + check_pagetable( current->mm.pagetable, "post-sfa" ); 4.31 + return; // return true if fault was handled 4.32 + } 4.33 + check_pagetable( current->mm.pagetable, "post-sfb" ); 4.34 +#endif 4.35 + 4.36 if ( unlikely(!(regs->xcs & 3)) ) 4.37 goto fault_in_hypervisor; 4.38 4.39 @@ -353,7 +368,8 @@ asmlinkage void do_page_fault(struct pt_ 4.40 4.41 if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) 4.42 { 4.43 - DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup); 4.44 + perfc_incrc(copy_user_faults); 4.45 + //DPRINTK("copy_user fault: %08lx -> %08lx\n", regs->eip, fixup); 4.46 regs->eip = fixup; 4.47 regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS; 4.48 return;
5.1 --- a/xen/common/debug.c Fri Mar 12 18:12:23 2004 +0000 5.2 +++ b/xen/common/debug.c Fri Mar 19 23:43:48 2004 +0000 5.3 @@ -91,7 +91,13 @@ int pdb_change_values(domid_t domain, u_ 5.4 5.5 if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT)) 5.6 { 5.7 - l2_table = map_domain_mem(pagetable_val(p->mm.pagetable)); 5.8 +#ifdef CONFIG_SHADOW 5.9 + if (p->mm.shadowmode ) 5.10 + l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable)); 5.11 + else 5.12 +#endif 5.13 + l2_table = map_domain_mem(pagetable_val(p->mm.pagetable)); 5.14 + 5.15 l2_table += l2_table_offset(addr); 5.16 if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) 5.17 {
6.1 --- a/xen/common/domain.c Fri Mar 12 18:12:23 2004 +0000 6.2 +++ b/xen/common/domain.c Fri Mar 19 23:43:48 2004 +0000 6.3 @@ -9,6 +9,7 @@ 6.4 #include <xeno/delay.h> 6.5 #include <xeno/event.h> 6.6 #include <xeno/time.h> 6.7 +#include <xeno/shadow.h> 6.8 #include <hypervisor-ifs/dom0_ops.h> 6.9 #include <asm/io.h> 6.10 #include <asm/domain_page.h> 6.11 @@ -546,6 +547,10 @@ int final_setup_guestos(struct task_stru 6.12 get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 6.13 PGT_l2_page_table); 6.14 6.15 +#ifdef CONFIG_SHADOW 6.16 + p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode); 6.17 +#endif 6.18 + 6.19 /* Set up the shared info structure. */ 6.20 update_dom_time(p->shared_info); 6.21 6.22 @@ -847,6 +852,15 @@ int setup_guestos(struct task_struct *p, 6.23 6.24 set_bit(PF_CONSTRUCTED, &p->flags); 6.25 6.26 +#ifdef CONFIG_SHADOW 6.27 + 6.28 +printk("Engage shadow mode for dom 0\n"); 6.29 + p->mm.shadowmode = SHM_test; // XXXXX IAP 6.30 + p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode ); 6.31 +#endif 6.32 + 6.33 + 6.34 + 6.35 new_thread(p, 6.36 (unsigned long)virt_load_address, 6.37 (unsigned long)virt_stack_address,
7.1 --- a/xen/common/domain_page.c Fri Mar 12 18:12:23 2004 +0000 7.2 +++ b/xen/common/domain_page.c Fri Mar 19 23:43:48 2004 +0000 7.3 @@ -45,6 +45,8 @@ void *map_domain_mem(unsigned long pa) 7.4 unsigned long *cache = mapcache; 7.5 unsigned long flags; 7.6 7.7 + perfc_incrc(map_domain_mem_count); 7.8 + 7.9 spin_lock_irqsave(&map_lock, flags); 7.10 7.11 /* Has some other CPU caused a wrap? We must flush if so. */
8.1 --- a/xen/common/kernel.c Fri Mar 12 18:12:23 2004 +0000 8.2 +++ b/xen/common/kernel.c Fri Mar 19 23:43:48 2004 +0000 8.3 @@ -104,6 +104,7 @@ void cmain(unsigned long magic, multiboo 8.4 module_t *mod; 8.5 void *heap_start; 8.6 int i; 8.7 + unsigned long frametable_pages, max_mem; 8.8 8.9 /* Parse the command-line options. */ 8.10 cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL); 8.11 @@ -190,22 +191,36 @@ void cmain(unsigned long magic, multiboo 8.12 for ( ; ; ) ; 8.13 } 8.14 8.15 - /* The array of pfn_info structures must fit into the reserved area. */ 8.16 - if ( sizeof(struct pfn_info) > 24 ) 8.17 + frametable_pages = ((FRAMETABLE_VIRT_END - RDWR_MPT_VIRT_START)/sizeof(struct pfn_info)); 8.18 + 8.19 + if ( frametable_pages < (1<<(32-PAGE_SHIFT)) ) 8.20 { 8.21 - printk("'struct pfn_info' too large to fit in Xen address space!\n"); 8.22 - for ( ; ; ) ; 8.23 + printk("Not enough space to initialise frame table for a 4GB machine (%luMB only)\n", frametable_pages >> (20-PAGE_SHIFT)); 8.24 } 8.25 8.26 set_current(&idle0_task); 8.27 8.28 - max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10); 8.29 + max_mem = max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10); 8.30 + 8.31 + if ( max_page > frametable_pages ) 8.32 + max_page = frametable_pages; 8.33 + 8.34 init_frametable(max_page); 8.35 - printk("Initialised all memory on a %luMB machine\n", 8.36 - max_page >> (20-PAGE_SHIFT)); 8.37 + printk("Initialised %luMB memory on a %luMB machine\n", 8.38 + max_page >> (20-PAGE_SHIFT), 8.39 + max_mem >> (20-PAGE_SHIFT) ); 8.40 8.41 heap_start = memguard_init(&_end); 8.42 8.43 + printk("Xen heap size is %luKB\n", 8.44 + (MAX_MONITOR_ADDRESS-__pa(heap_start))/1024 ); 8.45 + 8.46 + if ( ((MAX_MONITOR_ADDRESS-__pa(heap_start))/1024) <= 4096 ) 8.47 + { 8.48 + printk("Xen heap size is too small to safely continue!\n"); 8.49 + for ( ; ; ) ; 8.50 + } 8.51 + 8.52 init_page_allocator(__pa(heap_start), MAX_MONITOR_ADDRESS); 8.53 8.54 /* Initialise the slab allocator. */
9.1 --- a/xen/common/memory.c Fri Mar 12 18:12:23 2004 +0000 9.2 +++ b/xen/common/memory.c Fri Mar 19 23:43:48 2004 +0000 9.3 @@ -133,6 +133,7 @@ 9.4 #include <xeno/errno.h> 9.5 #include <xeno/perfc.h> 9.6 #include <xeno/interrupt.h> 9.7 +#include <xeno/shadow.h> 9.8 #include <asm/page.h> 9.9 #include <asm/flushtlb.h> 9.10 #include <asm/io.h> 9.11 @@ -182,6 +183,7 @@ static struct { 9.12 struct task_struct *subject_p; 9.13 } percpu_info[NR_CPUS] __cacheline_aligned; 9.14 9.15 + 9.16 /* 9.17 * init_frametable: 9.18 * Initialise per-frame memory information. This goes directly after 9.19 @@ -768,6 +770,13 @@ void free_page_type(struct pfn_info *pag 9.20 default: 9.21 BUG(); 9.22 } 9.23 + 9.24 +#ifdef CONFIG_SHADOW 9.25 + // assume we're in shadow mode if PSH_shadowed set 9.26 + if ( page->shadow_and_flags & PSH_shadowed ) 9.27 + unshadow_table( page-frame_table ); 9.28 +#endif 9.29 + 9.30 } 9.31 9.32 9.33 @@ -832,6 +841,10 @@ static int do_extended_command(unsigned 9.34 put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable) 9.35 >> PAGE_SHIFT]); 9.36 current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); 9.37 +#ifdef CONFIG_SHADOW 9.38 + current->mm.shadowtable = 9.39 + shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode); 9.40 +#endif 9.41 invalidate_shadow_ldt(); 9.42 percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; 9.43 } 9.44 @@ -917,6 +930,10 @@ int do_mmu_update(mmu_update_t *ureqs, i 9.45 struct pfn_info *page; 9.46 int rc = 0, okay = 1, i, cpu = smp_processor_id(); 9.47 unsigned int cmd; 9.48 +#ifdef CONFIG_SHADOW 9.49 + unsigned long prev_spfn = 0; 9.50 + l1_pgentry_t *prev_spl1e = 0; 9.51 +#endif 9.52 9.53 perfc_incrc(calls_to_mmu_update); 9.54 perfc_addc(num_page_updates, count); 9.55 @@ -967,6 +984,13 @@ int do_mmu_update(mmu_update_t *ureqs, i 9.56 { 9.57 okay = mod_l1_entry((l1_pgentry_t *)va, 9.58 mk_l1_pgentry(req.val)); 9.59 + 9.60 +#ifdef CONFIG_SHADOW 9.61 + if ( okay && page->shadow_and_flags & PSH_shadowed ) 9.62 + shadow_l1_normal_pt_update( req.ptr, req.val, 9.63 + &prev_spfn, &prev_spl1e ); 9.64 +#endif 9.65 + 9.66 put_page_type(page); 9.67 } 9.68 break; 9.69 @@ -976,6 +1000,11 @@ int do_mmu_update(mmu_update_t *ureqs, i 9.70 okay = mod_l2_entry((l2_pgentry_t *)va, 9.71 mk_l2_pgentry(req.val), 9.72 pfn); 9.73 +#ifdef CONFIG_SHADOW 9.74 + if ( okay && page->shadow_and_flags & PSH_shadowed ) 9.75 + shadow_l2_normal_pt_update( req.ptr, req.val ); 9.76 +#endif 9.77 + 9.78 put_page_type(page); 9.79 } 9.80 break; 9.81 @@ -985,9 +1014,19 @@ int do_mmu_update(mmu_update_t *ureqs, i 9.82 *(unsigned long *)va = req.val; 9.83 okay = 1; 9.84 put_page_type(page); 9.85 + 9.86 +#ifdef CONFIG_SHADOW 9.87 + if ( page->shadow_and_flags & PSH_shadowed ) 9.88 + BUG(); 9.89 + // at present, we shouldn't be shadowing such pages 9.90 +#endif 9.91 + 9.92 + 9.93 } 9.94 break; 9.95 } 9.96 + 9.97 +check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX 9.98 9.99 put_page(page); 9.100 9.101 @@ -1031,11 +1070,23 @@ int do_mmu_update(mmu_update_t *ureqs, i 9.102 if ( prev_pfn != 0 ) 9.103 unmap_domain_mem((void *)va); 9.104 9.105 +#ifdef CONFIG_SHADOW 9.106 + if( prev_spl1e != 0 ) 9.107 + unmap_domain_mem((void *)prev_spl1e); 9.108 +#endif 9.109 + 9.110 deferred_ops = percpu_info[cpu].deferred_ops; 9.111 percpu_info[cpu].deferred_ops = 0; 9.112 9.113 if ( deferred_ops & DOP_FLUSH_TLB ) 9.114 - write_cr3_counted(pagetable_val(current->mm.pagetable)); 9.115 + { 9.116 +#ifdef CONFIG_SHADOW 9.117 + if ( unlikely(current->mm.shadowmode) ) 9.118 + write_cr3_counted(pagetable_val(current->mm.shadowtable)); 9.119 + else 9.120 +#endif 9.121 + write_cr3_counted(pagetable_val(current->mm.pagetable)); 9.122 + } 9.123 9.124 if ( deferred_ops & DOP_RELOAD_LDT ) 9.125 (void)map_ldt_shadow_page(0); 9.126 @@ -1059,19 +1110,62 @@ int do_update_va_mapping(unsigned long p 9.127 unsigned int cpu = p->processor; 9.128 unsigned long deferred_ops; 9.129 9.130 + perfc_incrc(calls_to_update_va); 9.131 + 9.132 if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) ) 9.133 return -EINVAL; 9.134 9.135 + // XXX when we make this support 4MB pages we should also 9.136 + // deal with the case of updating L2s 9.137 + 9.138 if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], 9.139 mk_l1_pgentry(val))) ) 9.140 err = -EINVAL; 9.141 9.142 +#ifdef CONFIG_SHADOW 9.143 + 9.144 + if ( unlikely(p->mm.shadowmode) ) 9.145 + { 9.146 + unsigned long sval = 0; 9.147 + 9.148 + // XXX this only works for l1 entries, with no translation 9.149 + 9.150 + if ( (val & _PAGE_PRESENT) && (val & _PAGE_ACCESSED) ) 9.151 + { 9.152 + sval = val; 9.153 + if ( !(val & _PAGE_DIRTY) ) 9.154 + sval &= ~_PAGE_RW; 9.155 + } 9.156 + 9.157 + /* printk("update_va_map: page_nr=%08lx val =%08lx sval =%08lx\n", 9.158 + page_nr, val, sval);*/ 9.159 + 9.160 + if ( __put_user( sval, ((unsigned long *) (&shadow_linear_pg_table[page_nr])) ) ) 9.161 + { 9.162 + // Since L2's are guranteed RW, failure indicates the page 9.163 + // was not shadowed, so ignore. 9.164 + 9.165 + //MEM_LOG("update_va_map: couldn't write update\n"); 9.166 + } 9.167 + } 9.168 + 9.169 +check_pagetable( p->mm.pagetable, "va" ); 9.170 + 9.171 +#endif 9.172 + 9.173 deferred_ops = percpu_info[cpu].deferred_ops; 9.174 percpu_info[cpu].deferred_ops = 0; 9.175 9.176 if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 9.177 unlikely(flags & UVMF_FLUSH_TLB) ) 9.178 - write_cr3_counted(pagetable_val(p->mm.pagetable)); 9.179 + { 9.180 +#ifdef CONFIG_SHADOW 9.181 + if ( unlikely(p->mm.shadowmode) ) 9.182 + write_cr3_counted(pagetable_val(p->mm.shadowtable)); 9.183 + else 9.184 +#endif 9.185 + write_cr3_counted(pagetable_val(p->mm.pagetable)); 9.186 + } 9.187 else if ( unlikely(flags & UVMF_INVLPG) ) 9.188 __flush_tlb_one(page_nr << PAGE_SHIFT); 9.189
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/xen/common/shadow.c Fri Mar 19 23:43:48 2004 +0000 10.3 @@ -0,0 +1,618 @@ 10.4 +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ 10.5 + 10.6 +#include <xeno/config.h> 10.7 +#include <xeno/types.h> 10.8 +#include <xeno/mm.h> 10.9 +#include <xeno/shadow.h> 10.10 +#include <asm/domain_page.h> 10.11 +#include <asm/page.h> 10.12 + 10.13 +#ifdef CONFIG_SHADOW 10.14 + 10.15 + 10.16 +#if 1 10.17 +#define MEM_VLOG(_f, _a...) \ 10.18 + printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 10.19 + current->domain , __LINE__ , ## _a ) 10.20 +#else 10.21 +#define MEM_VLOG(_f, _a...) 10.22 +#endif 10.23 + 10.24 +#if 0 10.25 +#define MEM_VVLOG(_f, _a...) \ 10.26 + printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 10.27 + current->domain , __LINE__ , ## _a ) 10.28 +#else 10.29 +#define MEM_VVLOG(_f, _a...) 10.30 +#endif 10.31 + 10.32 + 10.33 +/******** 10.34 + 10.35 +To use these shadow page tables, guests must not rely on the ACCESSED 10.36 +and DIRTY bits on L2 pte's being accurate -- they will typically all be set. 10.37 + 10.38 +I doubt this will break anything. (If guests want to use the va_update 10.39 +mechanism they've signed up for this anyhow...) 10.40 + 10.41 +********/ 10.42 + 10.43 + 10.44 +pagetable_t shadow_mk_pagetable( unsigned long gptbase, 10.45 + unsigned int shadowmode ) 10.46 +{ 10.47 + unsigned long gpfn, spfn=0; 10.48 + 10.49 + MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )", 10.50 + gptbase, shadowmode ); 10.51 + 10.52 + if ( unlikely(shadowmode) ) 10.53 + { 10.54 + gpfn = gptbase >> PAGE_SHIFT; 10.55 + 10.56 + if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) ) 10.57 + { 10.58 + spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 10.59 + } 10.60 + else 10.61 + { 10.62 + spfn = shadow_l2_table( gpfn ); 10.63 + } 10.64 + } 10.65 + 10.66 + return mk_pagetable(spfn << PAGE_SHIFT); 10.67 +} 10.68 + 10.69 +void unshadow_table( unsigned long gpfn ) 10.70 +{ 10.71 + unsigned long spfn; 10.72 + 10.73 +MEM_VLOG("unshadow_table %08lx\n", gpfn ); 10.74 + 10.75 + perfc_incrc(unshadow_table_count); 10.76 + 10.77 + // this function is the same for both l1 and l2 tables 10.78 + 10.79 + // even in the SMP guest case, there won't be a race here as 10.80 + // this CPU was the one that cmpxchg'ed the page to invalid 10.81 + 10.82 + spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 10.83 + frame_table[gpfn].shadow_and_flags=0; 10.84 + frame_table[spfn].shadow_and_flags=0; 10.85 + 10.86 +#ifdef DEBUG 10.87 + { // XXX delete me! 10.88 + int i; 10.89 + unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT ); 10.90 + 10.91 + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) 10.92 + { 10.93 + spl1e[i] = 0xdead0000; 10.94 + } 10.95 + unmap_domain_mem( spl1e ); 10.96 + } 10.97 +#endif 10.98 + 10.99 + free_domain_page( &frame_table[spfn] ); 10.100 +} 10.101 + 10.102 + 10.103 +unsigned long shadow_l2_table( unsigned long gpfn ) 10.104 +{ 10.105 + struct pfn_info *spfn_info; 10.106 + unsigned long spfn; 10.107 + l2_pgentry_t *spl2e, *gpl2e; 10.108 + int i; 10.109 + 10.110 + MEM_VVLOG("shadow_l2_table( %08lx )",gpfn); 10.111 + 10.112 + perfc_incrc(shadow_l2_table_count); 10.113 + 10.114 + // XXX in future, worry about racing in SMP guests 10.115 + // -- use cmpxchg with PSH_pending flag to show progress (and spin) 10.116 + 10.117 + spfn_info = alloc_domain_page( NULL ); // XXX account properly later 10.118 + 10.119 + ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache 10.120 + 10.121 + spfn = (unsigned long) (spfn_info - frame_table); 10.122 + 10.123 + // mark pfn as being shadowed, update field to point at shadow 10.124 + frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed; 10.125 + 10.126 + // mark shadow pfn as being a shadow, update field to point at pfn 10.127 + frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow; 10.128 + 10.129 + // we need to do this before the linear map is set up 10.130 + spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT); 10.131 + 10.132 + // get hypervisor and 2x linear PT mapings installed 10.133 + memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 10.134 + &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 10.135 + HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); 10.136 + spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 10.137 + mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); 10.138 + spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = 10.139 + mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); 10.140 + spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = 10.141 + mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | 10.142 + __PAGE_HYPERVISOR); 10.143 + 10.144 + // can't use the linear map as we may not be in the right PT 10.145 + gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT); 10.146 + 10.147 + // proactively create entries for pages that are already shadowed 10.148 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 10.149 + { 10.150 + unsigned long spte = 0; 10.151 + 10.152 +#if 0 // Turns out this doesn't really help 10.153 + unsigned long gpte; 10.154 + 10.155 + gpte = l2_pgentry_val(gpl2e[i]); 10.156 + 10.157 + if (gpte & _PAGE_PRESENT) 10.158 + { 10.159 + unsigned long s_sh = 10.160 + frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags; 10.161 + 10.162 + if( s_sh & PSH_shadowed ) // PSH_shadowed 10.163 + { 10.164 + if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) ) 10.165 + { 10.166 + printk("Linear mapping detected\n"); 10.167 + spte = gpte & ~_PAGE_RW; 10.168 + } 10.169 + else 10.170 + { 10.171 + spte = ( gpte & ~PAGE_MASK ) | (s_sh<<PAGE_SHIFT) | 10.172 + _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED ; 10.173 + } 10.174 + // XXX should probably update guest to ACCESSED|DIRTY too... 10.175 + 10.176 + } 10.177 + 10.178 + } 10.179 +#endif 10.180 + 10.181 + spl2e[i] = mk_l2_pgentry( spte ); 10.182 + 10.183 + } 10.184 + 10.185 + // its arguable we should 'preemptively shadow' a few active L1 pages 10.186 + // to avoid taking a string of faults when 'jacking' a running domain 10.187 + 10.188 + unmap_domain_mem( gpl2e ); 10.189 + unmap_domain_mem( spl2e ); 10.190 + 10.191 + MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn); 10.192 + 10.193 + 10.194 + return spfn; 10.195 +} 10.196 + 10.197 + 10.198 +int shadow_fault( unsigned long va, long error_code ) 10.199 +{ 10.200 + unsigned long gpte, spte; 10.201 + 10.202 + MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); 10.203 + 10.204 + if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) 10.205 + { 10.206 + MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" ); 10.207 + return 0; // propagate to guest 10.208 + } 10.209 + 10.210 + if ( ! (gpte & _PAGE_PRESENT) ) 10.211 + { 10.212 + MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte ); 10.213 + return 0; // we're not going to be able to help 10.214 + } 10.215 + 10.216 + spte = gpte; 10.217 + 10.218 + if ( error_code & 2 ) 10.219 + { // write fault 10.220 + if ( gpte & _PAGE_RW ) 10.221 + { 10.222 + gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; 10.223 + spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 10.224 + // (we're about to dirty it anyhow...) 10.225 + } 10.226 + else 10.227 + { // write fault on RO page 10.228 + MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte ); 10.229 + return 0; // propagate to guest 10.230 + // not clear whether we should set accessed bit here... 10.231 + } 10.232 + } 10.233 + else 10.234 + { 10.235 + gpte |= _PAGE_ACCESSED; 10.236 + spte |= _PAGE_ACCESSED; // about to happen anyway 10.237 + if ( ! (gpte & _PAGE_DIRTY) ) 10.238 + spte &= ~_PAGE_RW; // force clear unless already dirty 10.239 + } 10.240 + 10.241 + MEM_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte ); 10.242 + 10.243 + // write back updated gpte 10.244 + // XXX watch out for read-only L2 entries! (not used in Linux) 10.245 + if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) 10.246 + BUG(); // fixme! 10.247 + 10.248 + if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) ) 10.249 + { 10.250 + // failed: 10.251 + // the L1 may not be shadowed, or the L2 entry may be insufficient 10.252 + 10.253 + unsigned long gpde, spde, gl1pfn, sl1pfn; 10.254 + 10.255 + MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte ); 10.256 + 10.257 + gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]); 10.258 + 10.259 + gl1pfn = gpde>>PAGE_SHIFT; 10.260 + 10.261 + if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) ) 10.262 + { 10.263 + // this L1 is NOT already shadowed so we need to shadow it 10.264 + struct pfn_info *sl1pfn_info; 10.265 + unsigned long *gpl1e, *spl1e; 10.266 + int i; 10.267 + sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 10.268 + sl1pfn = sl1pfn_info - frame_table; 10.269 + 10.270 + MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn); 10.271 + perfc_incrc(shadow_l1_table_count); 10.272 + 10.273 + sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn; 10.274 + frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn; 10.275 + 10.276 + gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; 10.277 + spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT); 10.278 + 10.279 + 10.280 + linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); 10.281 + shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); 10.282 + 10.283 + gpl1e = (unsigned long *) &(linear_pg_table[ 10.284 + (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]); 10.285 + 10.286 + spl1e = (unsigned long *) &shadow_linear_pg_table[ 10.287 + (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]; 10.288 + 10.289 + 10.290 + // XXX can only do this is the shadow/guest is writeable 10.291 + // disable write protection if ! gpde & _PAGE_RW ???? 10.292 + 10.293 + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) 10.294 + { 10.295 +#if SHADOW_OPTIMISE 10.296 + if ( (gpl1e[i] & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 10.297 + (_PAGE_PRESENT|_PAGE_ACCESSED) ) 10.298 + { 10.299 + spl1e[i] = gpl1e[i]; 10.300 + if ( !(gpl1e[i] & _PAGE_DIRTY) ) 10.301 + spl1e[i] &= ~_PAGE_RW; 10.302 + } 10.303 + else 10.304 +#endif 10.305 + spl1e[i] = 0; 10.306 + } 10.307 + 10.308 + 10.309 + } 10.310 + else 10.311 + { 10.312 + // this L1 was shadowed (by another PT) but we didn't have an L2 10.313 + // entry for it 10.314 + 10.315 + sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask; 10.316 + 10.317 + MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn); 10.318 + 10.319 + spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY; 10.320 + 10.321 + gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; 10.322 + 10.323 + 10.324 + if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT) ) ) 10.325 + { // detect linear map, and keep pointing at guest 10.326 + MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn); 10.327 + spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT); 10.328 + } 10.329 + 10.330 + linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); 10.331 + shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); 10.332 + 10.333 + 10.334 + } 10.335 + 10.336 + shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte); 10.337 + // (we need to do the above even if we've just made the shadow L1) 10.338 + 10.339 + } // end of fixup writing the shadow L1 directly failed 10.340 + 10.341 + perfc_incrc(shadow_fixup_count); 10.342 + 10.343 + return 1; // let's try the faulting instruction again... 10.344 + 10.345 +} 10.346 + 10.347 + 10.348 +void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 10.349 + unsigned long *prev_spfn_ptr, 10.350 + l1_pgentry_t **prev_spl1e_ptr ) 10.351 +{ 10.352 + unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr; 10.353 + l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr; 10.354 + 10.355 + 10.356 +MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n", 10.357 +pa,gpte,prev_spfn, prev_spl1e); 10.358 + 10.359 + // to get here, we know the l1 page *must* be shadowed 10.360 + 10.361 + gpfn = pa >> PAGE_SHIFT; 10.362 + spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 10.363 + 10.364 + if ( spfn == prev_spfn ) 10.365 + { 10.366 + spl1e = prev_spl1e; 10.367 + } 10.368 + else 10.369 + { 10.370 + if( prev_spl1e ) unmap_domain_mem( prev_spl1e ); 10.371 + spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); 10.372 + *prev_spfn_ptr = spfn; 10.373 + *prev_spl1e_ptr = spl1e; 10.374 + } 10.375 + // XXX we assume only pagetables can be shadowed; this will have to change 10.376 + // to allow arbitrary CoW etc. 10.377 + 10.378 + spte = 0; 10.379 + 10.380 +#if SHADOW_OPTIMISE 10.381 + if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 10.382 + (_PAGE_PRESENT|_PAGE_ACCESSED) ) 10.383 + { 10.384 + spte = gpte; 10.385 + if ( !(gpte & _PAGE_DIRTY ) ) 10.386 + gpte &= ~ _PAGE_RW; 10.387 + } 10.388 +#endif 10.389 + 10.390 + spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = 10.391 + mk_l1_pgentry( spte ); 10.392 + 10.393 + unmap_domain_mem( (void *) spl1e ); 10.394 +} 10.395 + 10.396 +void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ) 10.397 +{ 10.398 + unsigned long gpfn, spfn, spte; 10.399 + l2_pgentry_t * sp2le; 10.400 + unsigned long s_sh; 10.401 + 10.402 + MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte); 10.403 + 10.404 + // to get here, we know the l2 page has a shadow 10.405 + 10.406 + gpfn = pa >> PAGE_SHIFT; 10.407 + spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 10.408 + 10.409 + sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); 10.410 + // no real need for a cache here 10.411 + 10.412 + spte = 0; 10.413 + 10.414 + s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags; 10.415 + 10.416 + if ( s_sh ) // PSH_shadowed 10.417 + { 10.418 + if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) ) 10.419 + { 10.420 + // linear page table case 10.421 + spte = (gpte & ~_PAGE_RW) | _PAGE_DIRTY | _PAGE_ACCESSED; 10.422 + } 10.423 + else 10.424 + spte = (gpte & ~PAGE_MASK) | (s_sh<<PAGE_SHIFT) | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 10.425 + 10.426 + } 10.427 + 10.428 + // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!! 10.429 + 10.430 + sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 10.431 + mk_l2_pgentry( spte ); 10.432 + 10.433 + unmap_domain_mem( (void *) sp2le ); 10.434 +} 10.435 + 10.436 + 10.437 +#if SHADOW_DEBUG 10.438 + 10.439 +static int sh_l2_present; 10.440 +static int sh_l1_present; 10.441 +char * sh_check_name; 10.442 + 10.443 +#define FAIL(_f, _a...) \ 10.444 +{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();} 10.445 + 10.446 +int check_pte( unsigned long gpte, unsigned long spte, int level, int i ) 10.447 +{ 10.448 + unsigned long mask, gpfn, spfn; 10.449 + 10.450 + if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00) 10.451 + return 1; // always safe 10.452 + 10.453 + if ( !(spte & _PAGE_PRESENT) ) 10.454 + FAIL("Non zero not present spte"); 10.455 + 10.456 + if( level == 2 ) sh_l2_present++; 10.457 + if( level == 1 ) sh_l1_present++; 10.458 + 10.459 + if ( !(gpte & _PAGE_PRESENT) ) 10.460 + FAIL("Guest not present yet shadow is"); 10.461 + 10.462 + mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000); 10.463 + 10.464 + if ( (spte & mask) != (gpte & mask ) ) 10.465 + FAIL("Corrupt?"); 10.466 + 10.467 + if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) ) 10.468 + FAIL("Dirty coherence"); 10.469 + 10.470 + if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) ) 10.471 + FAIL("Accessed coherence"); 10.472 + 10.473 + if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) ) 10.474 + FAIL("RW coherence"); 10.475 + 10.476 + if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) )) 10.477 + FAIL("RW2 coherence"); 10.478 + 10.479 + spfn = spte>>PAGE_SHIFT; 10.480 + gpfn = gpte>>PAGE_SHIFT; 10.481 + 10.482 + if ( gpfn == spfn ) 10.483 + { 10.484 + if ( level > 1 ) 10.485 + FAIL("Linear map ???"); // XXX this will fail on BSD 10.486 + 10.487 +#if 0 // might be a RO mapping of a page table page 10.488 + if ( frame_table[gpfn].shadow_and_flags != 0 ) 10.489 + { 10.490 + FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 10.491 + frame_table[gpfn].shadow_and_flags, 10.492 + frame_table[spfn].shadow_and_flags); 10.493 + } 10.494 + else 10.495 +#endif 10.496 + return 1; 10.497 + } 10.498 + else 10.499 + { 10.500 + if ( level < 2 ) 10.501 + FAIL("Shadow in L1 entry?"); 10.502 + 10.503 + if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) ) 10.504 + FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 10.505 + frame_table[gpfn].shadow_and_flags, 10.506 + frame_table[spfn].shadow_and_flags, 10.507 + frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags, 10.508 + frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags 10.509 + ); 10.510 + 10.511 + if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) ) 10.512 + FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 10.513 + frame_table[gpfn].shadow_and_flags, 10.514 + frame_table[spfn].shadow_and_flags); 10.515 + 10.516 + } 10.517 + 10.518 + return 1; 10.519 +} 10.520 + 10.521 + 10.522 +int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 ) 10.523 +{ 10.524 + int j; 10.525 + unsigned long *gpl1e, *spl1e; 10.526 + 10.527 + gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]); 10.528 + spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]); 10.529 + 10.530 + 10.531 + for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ ) 10.532 + { 10.533 + unsigned long gpte = gpl1e[j]; 10.534 + unsigned long spte = spl1e[j]; 10.535 + 10.536 + check_pte( gpte, spte, 1, j ); 10.537 + } 10.538 + 10.539 + return 1; 10.540 +} 10.541 + 10.542 +#define FAILPT(_f, _a...) \ 10.543 +{printk("XXX FAILPT" _f "\n", ## _a ); BUG();} 10.544 + 10.545 +int check_pagetable( pagetable_t pt, char *s ) 10.546 +{ 10.547 + unsigned long gptbase = pagetable_val(pt); 10.548 + unsigned long gpfn, spfn; 10.549 + int i; 10.550 + l2_pgentry_t *gpl2e, *spl2e; 10.551 + 10.552 +return 1; 10.553 + 10.554 + sh_check_name = s; 10.555 + 10.556 + MEM_VVLOG("%s-PT Audit",s); 10.557 + 10.558 + sh_l2_present = sh_l1_present = 0; 10.559 + 10.560 + gpfn = gptbase >> PAGE_SHIFT; 10.561 + 10.562 + if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) ) 10.563 + { 10.564 + printk("%s-PT %08lx not shadowed\n", s, gptbase); 10.565 + 10.566 + if( frame_table[gpfn].shadow_and_flags != 0 ) BUG(); 10.567 + 10.568 + return 0; 10.569 + } 10.570 + 10.571 + spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 10.572 + 10.573 + if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) ) 10.574 + FAILPT("ptbase shadow inconsistent1"); 10.575 + 10.576 + if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) ) 10.577 + FAILPT("ptbase shadow inconsistent2"); 10.578 + 10.579 + 10.580 + // use the linear map to get a pointer to the L2 10.581 + gpl2e = (l2_pgentry_t *) &(linear_l2_table[0]); 10.582 + spl2e = (l2_pgentry_t *) &(shadow_linear_l2_table[0]); 10.583 + 10.584 + // check the whole L2 10.585 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 10.586 + { 10.587 + unsigned long gpte = l2_pgentry_val(gpl2e[i]); 10.588 + unsigned long spte = l2_pgentry_val(spl2e[i]); 10.589 + 10.590 + check_pte( gpte, spte, 2, i ); 10.591 + } 10.592 + 10.593 + 10.594 + // go back and recurse 10.595 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) 10.596 + { 10.597 + unsigned long gpte = l2_pgentry_val(gpl2e[i]); 10.598 + unsigned long spte = l2_pgentry_val(spl2e[i]); 10.599 + 10.600 + if ( spte ) 10.601 + check_l1_table( 10.602 + i<<L2_PAGETABLE_SHIFT, 10.603 + gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT ); 10.604 + 10.605 + } 10.606 + 10.607 + 10.608 + MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n", 10.609 + sh_l2_present, sh_l1_present ); 10.610 + 10.611 + return 1; 10.612 +} 10.613 + 10.614 + 10.615 +#endif 10.616 + 10.617 + 10.618 +#endif // CONFIG_SHADOW 10.619 + 10.620 + 10.621 +
11.1 --- a/xen/include/asm-i386/config.h Fri Mar 12 18:12:23 2004 +0000 11.2 +++ b/xen/include/asm-i386/config.h Fri Mar 19 23:43:48 2004 +0000 11.3 @@ -40,6 +40,9 @@ 11.4 11.5 #define CONFIG_XEN_ATTENTION_KEY 1 11.6 11.7 +#define CONFIG_SHADOW 1 11.8 + 11.9 + 11.10 #define HZ 100 11.11 11.12 /* 11.13 @@ -68,7 +71,7 @@ 11.14 */ 11.15 #define MAX_MONITOR_ADDRESS (16*1024*1024) 11.16 #define MAX_DMA_ADDRESS (16*1024*1024) 11.17 -#define MAX_DIRECTMAP_ADDRESS (44*1024*1024) 11.18 +#define MAX_DIRECTMAP_ADDRESS (40*1024*1024) // XXX was 44 11.19 /* And the virtual addresses for the direct-map region... */ 11.20 #define DIRECTMAP_VIRT_START (READONLY_MPT_VIRT_END) 11.21 #define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS) 11.22 @@ -81,8 +84,11 @@ 11.23 /* Next 4MB of virtual address space is used as a linear p.t. mapping. */ 11.24 #define LINEAR_PT_VIRT_START (DIRECTMAP_VIRT_END) 11.25 #define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024)) 11.26 +/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */ 11.27 +#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END) 11.28 +#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024)) 11.29 /* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */ 11.30 -#define PERDOMAIN_VIRT_START (LINEAR_PT_VIRT_END) 11.31 +#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END) 11.32 #define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024)) 11.33 #define GDT_VIRT_START (PERDOMAIN_VIRT_START) 11.34 #define GDT_VIRT_END (GDT_VIRT_START + (64*1024))
12.1 --- a/xen/include/asm-i386/page.h Fri Mar 12 18:12:23 2004 +0000 12.2 +++ b/xen/include/asm-i386/page.h Fri Mar 19 23:43:48 2004 +0000 12.3 @@ -91,6 +91,7 @@ typedef struct { unsigned long pt_lo; } 12.4 #include <asm/flushtlb.h> 12.5 12.6 #define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START) 12.7 +#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) 12.8 12.9 extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; 12.10 extern void paging_init(void);
13.1 --- a/xen/include/asm-i386/processor.h Fri Mar 12 18:12:23 2004 +0000 13.2 +++ b/xen/include/asm-i386/processor.h Fri Mar 19 23:43:48 2004 +0000 13.3 @@ -415,6 +415,12 @@ struct mm_struct { 13.4 */ 13.5 l1_pgentry_t *perdomain_pt; 13.6 pagetable_t pagetable; 13.7 + 13.8 +#ifdef CONFIG_SHADOW 13.9 + unsigned int shadowmode; /* flags to control shadow table operation */ 13.10 + pagetable_t shadowtable; 13.11 +#endif 13.12 + 13.13 /* Current LDT details. */ 13.14 unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt; 13.15 /* Next entry is passed to LGDT on domain switch. */
14.1 --- a/xen/include/xeno/mm.h Fri Mar 12 18:12:23 2004 +0000 14.2 +++ b/xen/include/xeno/mm.h Fri Mar 19 23:43:48 2004 +0000 14.3 @@ -67,6 +67,10 @@ struct pfn_info 14.4 unsigned long type_and_flags; 14.5 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ 14.6 unsigned long tlbflush_timestamp; 14.7 +#ifdef CONFIG_SHADOW 14.8 + /* Shadow page status: top bits flags, bottom bits are a pfn */ 14.9 + unsigned long shadow_and_flags; 14.10 +#endif 14.11 }; 14.12 14.13 /* The following page types are MUTUALLY EXCLUSIVE. */ 14.14 @@ -100,6 +104,7 @@ struct pfn_info 14.15 /* 28-bit count of references to this frame. */ 14.16 #define PGC_count_mask ((1<<28)-1) 14.17 14.18 + 14.19 /* We trust the slab allocator in slab.c, and our use of it. */ 14.20 #define PageSlab(page) (1) 14.21 #define PageSetSlab(page) ((void)0)
15.1 --- a/xen/include/xeno/perfc_defn.h Fri Mar 12 18:12:23 2004 +0000 15.2 +++ b/xen/include/xeno/perfc_defn.h Fri Mar 19 23:43:48 2004 +0000 15.3 @@ -19,6 +19,15 @@ PERFCOUNTER_CPU( need_flush_tlb_flush, " 15.4 15.5 PERFCOUNTER_CPU( calls_to_mmu_update, "calls_to_mmu_update" ) 15.6 PERFCOUNTER_CPU( num_page_updates, "num_page_updates" ) 15.7 +PERFCOUNTER_CPU( calls_to_update_va, "calls_to_update_va_map" ) 15.8 +PERFCOUNTER_CPU( page_faults, "page faults" ) 15.9 +PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" ) 15.10 +PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" ) 15.11 + 15.12 +PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" ) 15.13 +PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" ) 15.14 +PERFCOUNTER_CPU( unshadow_table_count, "unshadow_table count" ) 15.15 +PERFCOUNTER_CPU( shadow_fixup_count, "shadow_fixup count" ) 15.16 +PERFCOUNTER_CPU( shadow_update_va_fail, "shadow_update_va_fail" ) 15.17 15.18 15.19 -
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/xen/include/xeno/shadow.h Fri Mar 19 23:43:48 2004 +0000 16.3 @@ -0,0 +1,48 @@ 16.4 +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ 16.5 + 16.6 +#ifndef _XENO_SHADOW_H 16.7 +#define _XENO_SHADOW_H 16.8 + 16.9 +#ifdef CONFIG_SHADOW 16.10 + 16.11 +#include <xeno/config.h> 16.12 +#include <xeno/types.h> 16.13 +#include <xeno/mm.h> 16.14 + 16.15 +/* Shadow PT flag bits in pfn_info */ 16.16 +#define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */ 16.17 +#define PSH_shadow (1<<30) /* page is a shadow. PFN points to orig page */ 16.18 +#define PSH_pending (1<<29) /* page is in the process of being shadowed */ 16.19 +#define PSH_pfn_mask ((1<<21)-1) 16.20 + 16.21 +/* Shadow PT operation mode : shadowmode variable in mm_struct */ 16.22 +#define SHM_test (1<<0) /* just run domain on shadow PTs */ 16.23 +#define SHM_logdirty (1<<1) /* log pages that are dirtied */ 16.24 +#define SHM_cow (1<<2) /* copy on write all dirtied pages */ 16.25 +#define SHM_translate (1<<3) /* lookup machine pages in translation table */ 16.26 + 16.27 +#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) 16.28 +#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) 16.29 + 16.30 +extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode ); 16.31 +extern void unshadow_table( unsigned long gpfn ); 16.32 +extern unsigned long shadow_l2_table( unsigned long gpfn ); 16.33 +extern int shadow_fault( unsigned long va, long error_code ); 16.34 +extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 16.35 + unsigned long *prev_spfn_ptr, 16.36 + l1_pgentry_t **prev_spl1e_ptr ); 16.37 +extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ); 16.38 + 16.39 + 16.40 +#define SHADOW_DEBUG 0 16.41 +#define SHADOW_OPTIMISE 1 16.42 + 16.43 +#if SHADOW_DEBUG 16.44 +extern int check_pagetable( pagetable_t pt, char *s ); 16.45 +#else 16.46 +#define check_pagetable( pt, s ) 16.47 +#endif 16.48 + 16.49 + 16.50 +#endif 16.51 +#endif
17.1 --- a/xen/net/dev.c Fri Mar 12 18:12:23 2004 +0000 17.2 +++ b/xen/net/dev.c Fri Mar 19 23:43:48 2004 +0000 17.3 @@ -28,6 +28,7 @@ 17.4 #include <xeno/init.h> 17.5 #include <xeno/module.h> 17.6 #include <xeno/event.h> 17.7 +#include <xeno/shadow.h> 17.8 #include <asm/domain_page.h> 17.9 #include <asm/pgalloc.h> 17.10 #include <asm/io.h> 17.11 @@ -488,7 +489,7 @@ struct netif_rx_stats netdev_rx_stat[NR_ 17.12 void deliver_packet(struct sk_buff *skb, net_vif_t *vif) 17.13 { 17.14 rx_shadow_entry_t *rx; 17.15 - unsigned long *ptep, pte; 17.16 + unsigned long *ptep, pte, new_pte; 17.17 struct pfn_info *old_page, *new_page, *pte_page; 17.18 unsigned short size; 17.19 unsigned char offset, status = RING_STATUS_OK; 17.20 @@ -530,10 +531,12 @@ void deliver_packet(struct sk_buff *skb, 17.21 wmb(); /* Get type count and set flush bit before updating PTE. */ 17.22 17.23 pte = *ptep; 17.24 + 17.25 + new_pte = (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT | 17.26 + ((new_page - frame_table) << PAGE_SHIFT); 17.27 + 17.28 if ( unlikely(pte & _PAGE_PRESENT) || 17.29 - unlikely(cmpxchg(ptep, pte, 17.30 - (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT | 17.31 - ((new_page - frame_table) << PAGE_SHIFT))) != pte ) 17.32 + unlikely(cmpxchg(ptep, pte, new_pte)) != pte ) 17.33 { 17.34 DPRINTK("PTE was modified or reused! %08lx %08lx\n", pte, *ptep); 17.35 unmap_domain_mem(ptep); 17.36 @@ -543,6 +546,22 @@ void deliver_packet(struct sk_buff *skb, 17.37 goto out; 17.38 } 17.39 17.40 + 17.41 +#ifdef CONFIG_SHADOW 17.42 + if ( pte_page->shadow_and_flags & PSH_shadowed ) 17.43 + { 17.44 + unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask; 17.45 + unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) | 17.46 + (((unsigned long)ptep)&~PAGE_MASK) ); 17.47 + 17.48 + // save the fault later 17.49 + *sptr = new_pte; 17.50 + 17.51 + unmap_domain_mem( sptr ); 17.52 + } 17.53 +#endif 17.54 + 17.55 + 17.56 machine_to_phys_mapping[new_page - frame_table] 17.57 = machine_to_phys_mapping[old_page - frame_table]; 17.58 17.59 @@ -2068,6 +2087,8 @@ static void get_rx_bufs(net_vif_t *vif) 17.60 17.61 pte_pfn = rx.addr >> PAGE_SHIFT; 17.62 pte_page = &frame_table[pte_pfn]; 17.63 + 17.64 + //printk("MMM %08lx ", rx.addr); 17.65 17.66 /* The address passed down must be to a valid PTE. */ 17.67 if ( unlikely(pte_pfn >= max_page) || 17.68 @@ -2081,7 +2102,7 @@ static void get_rx_bufs(net_vif_t *vif) 17.69 17.70 ptep = map_domain_mem(rx.addr); 17.71 pte = *ptep; 17.72 - 17.73 + //printk("%08lx\n",pte); 17.74 /* We must be passed a valid writeable mapping to swizzle. */ 17.75 if ( unlikely((pte & (_PAGE_PRESENT|_PAGE_RW)) != 17.76 (_PAGE_PRESENT|_PAGE_RW)) || 17.77 @@ -2092,6 +2113,22 @@ static void get_rx_bufs(net_vif_t *vif) 17.78 make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); 17.79 goto rx_unmap_and_continue; 17.80 } 17.81 + 17.82 +#ifdef CONFIG_SHADOW 17.83 + { 17.84 + if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed ) 17.85 + { 17.86 + unsigned long spfn = 17.87 + frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask; 17.88 + unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) ); 17.89 + 17.90 + *sptr = 0; 17.91 + unmap_domain_mem( sptr ); 17.92 + 17.93 + } 17.94 + 17.95 + } 17.96 +#endif 17.97 17.98 buf_pfn = pte >> PAGE_SHIFT; 17.99 buf_page = &frame_table[buf_pfn]; 17.100 @@ -2112,6 +2149,8 @@ static void get_rx_bufs(net_vif_t *vif) 17.101 put_page_and_type(pte_page); 17.102 make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); 17.103 goto rx_unmap_and_continue; 17.104 + 17.105 + // XXX IAP should SHADOW_CONFIG do something here? 17.106 } 17.107 17.108 /*