ia64/xen-unstable
changeset 1216:a9abf4f7b2f6
bitkeeper revision 1.794.1.4 (40623aebq_XP4MvV6YJsXGleofDYNg)
shadow mode improvements :
use hash table to avoid increasing pfn_info size.
improved locking in preparation for SMP guests.
shadow mode improvements :
use hash table to avoid increasing pfn_info size.
improved locking in preparation for SMP guests.
author | iap10@tetris.cl.cam.ac.uk |
---|---|
date | Thu Mar 25 01:50:35 2004 +0000 (2004-03-25) |
parents | 3ef84b0712aa |
children | ee66ecf7b0f5 |
files | xen/arch/i386/process.c xen/arch/i386/traps.c xen/common/debug.c xen/common/domain.c xen/common/memory.c xen/common/perfc.c xen/common/shadow.c xen/include/asm-i386/config.h xen/include/asm-i386/processor.h xen/include/xeno/mm.h xen/include/xeno/shadow.h xen/net/dev.c |
line diff
1.1 --- a/xen/arch/i386/process.c Sun Mar 21 19:14:29 2004 +0000 1.2 +++ b/xen/arch/i386/process.c Thu Mar 25 01:50:35 2004 +0000 1.3 @@ -282,25 +282,14 @@ void switch_to(struct task_struct *prev_ 1.4 } 1.5 1.6 /* Switch page tables. */ 1.7 -#ifdef CONFIG_SHADOW 1.8 - 1.9 - /* printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable, 1.10 - next_p->mm.shadowtable);*/ 1.11 - 1.12 - 1.13 - if( next_p->mm.shadowmode ) 1.14 + if( next_p->mm.shadow_mode ) 1.15 { 1.16 - check_pagetable( next_p->mm.pagetable, "switch" ); 1.17 - write_cr3_counted(pagetable_val(next_p->mm.shadowtable)); 1.18 + check_pagetable( next_p, next_p->mm.pagetable, "switch" ); 1.19 + write_cr3_counted(pagetable_val(next_p->mm.shadow_table)); 1.20 } 1.21 else 1.22 -#endif 1.23 write_cr3_counted(pagetable_val(next_p->mm.pagetable)); 1.24 1.25 - 1.26 - 1.27 - 1.28 - 1.29 set_current(next_p); 1.30 1.31 /* Switch GDT and LDT. */
2.1 --- a/xen/arch/i386/traps.c Sun Mar 21 19:14:29 2004 +0000 2.2 +++ b/xen/arch/i386/traps.c Thu Mar 25 01:50:35 2004 +0000 2.3 @@ -339,13 +339,11 @@ asmlinkage void do_page_fault(struct pt_ 2.4 return; /* successfully copied the mapping */ 2.5 } 2.6 2.7 -#ifdef CONFIG_SHADOW 2.8 - if ( p->mm.shadowmode && addr < PAGE_OFFSET && 2.9 + if ( unlikely( p->mm.shadow_mode ) && addr < PAGE_OFFSET && 2.10 shadow_fault( addr, error_code ) ) 2.11 { 2.12 return; // return true if fault was handled 2.13 } 2.14 -#endif 2.15 2.16 if ( unlikely(!(regs->xcs & 3)) ) 2.17 goto fault_in_hypervisor;
3.1 --- a/xen/common/debug.c Sun Mar 21 19:14:29 2004 +0000 3.2 +++ b/xen/common/debug.c Thu Mar 25 01:50:35 2004 +0000 3.3 @@ -91,11 +91,9 @@ int pdb_change_values(domid_t domain, u_ 3.4 3.5 if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT)) 3.6 { 3.7 -#ifdef CONFIG_SHADOW 3.8 - if (p->mm.shadowmode ) 3.9 - l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable)); 3.10 + if (p->mm.shadow_mode ) 3.11 + l2_table = map_domain_mem(pagetable_val(p->mm.shadow_table)); 3.12 else 3.13 -#endif 3.14 l2_table = map_domain_mem(pagetable_val(p->mm.pagetable)); 3.15 3.16 l2_table += l2_table_offset(addr);
4.1 --- a/xen/common/domain.c Sun Mar 21 19:14:29 2004 +0000 4.2 +++ b/xen/common/domain.c Thu Mar 25 01:50:35 2004 +0000 4.3 @@ -341,12 +341,14 @@ void free_domain_page(struct pfn_info *p 4.4 if ( !(page->count_and_flags & PGC_zombie) ) 4.5 { 4.6 page->tlbflush_timestamp = tlbflush_clock; 4.7 - page->u.cpu_mask = 1 << p->processor; 4.8 - 4.9 - spin_lock(&p->page_list_lock); 4.10 - list_del(&page->list); 4.11 - p->tot_pages--; 4.12 - spin_unlock(&p->page_list_lock); 4.13 + if (p) 4.14 + { 4.15 + page->u.cpu_mask = 1 << p->processor; 4.16 + spin_lock(&p->page_list_lock); 4.17 + list_del(&page->list); 4.18 + p->tot_pages--; 4.19 + spin_unlock(&p->page_list_lock); 4.20 + } 4.21 } 4.22 4.23 page->count_and_flags = 0; 4.24 @@ -547,10 +549,6 @@ int final_setup_guestos(struct task_stru 4.25 get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 4.26 PGT_l2_page_table); 4.27 4.28 -#ifdef CONFIG_SHADOW 4.29 - p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode); 4.30 -#endif 4.31 - 4.32 /* Set up the shared info structure. */ 4.33 update_dom_time(p->shared_info); 4.34 4.35 @@ -852,15 +850,10 @@ int setup_guestos(struct task_struct *p, 4.36 4.37 set_bit(PF_CONSTRUCTED, &p->flags); 4.38 4.39 -#ifdef CONFIG_SHADOW 4.40 - 4.41 -printk("Engage shadow mode for dom 0\n"); 4.42 - p->mm.shadowmode = SHM_test; // XXXXX IAP 4.43 - p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode ); 4.44 +#if 1 // XXXXX IAP DO NOT CHECK IN ENBALED !!!!!!! 4.45 + shadow_mode_enable(p, SHM_test); 4.46 #endif 4.47 4.48 - 4.49 - 4.50 new_thread(p, 4.51 (unsigned long)virt_load_address, 4.52 (unsigned long)virt_stack_address,
5.1 --- a/xen/common/memory.c Sun Mar 21 19:14:29 2004 +0000 5.2 +++ b/xen/common/memory.c Thu Mar 25 01:50:35 2004 +0000 5.3 @@ -765,20 +765,22 @@ void free_page_type(struct pfn_info *pag 5.4 { 5.5 case PGT_l1_page_table: 5.6 free_l1_table(page); 5.7 -#ifdef CONFIG_SHADOW 5.8 - // assume we're in shadow mode if PSH_shadowed set 5.9 - if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed ) 5.10 + if ( unlikely(current->mm.shadow_mode) && 5.11 + (get_shadow_status(current, page-frame_table) & PSH_shadowed) ) 5.12 + { 5.13 unshadow_table( page-frame_table, type ); 5.14 -#endif 5.15 + put_shadow_status(current); 5.16 + } 5.17 return; 5.18 5.19 case PGT_l2_page_table: 5.20 free_l2_table(page); 5.21 -#ifdef CONFIG_SHADOW 5.22 - // assume we're in shadow mode if PSH_shadowed set 5.23 - if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed ) 5.24 + if ( unlikely(current->mm.shadow_mode) && 5.25 + (get_shadow_status(current, page-frame_table) & PSH_shadowed) ) 5.26 + { 5.27 unshadow_table( page-frame_table, type ); 5.28 -#endif 5.29 + put_shadow_status(current); 5.30 + } 5.31 return; 5.32 5.33 default: 5.34 @@ -848,21 +850,22 @@ static int do_extended_command(unsigned 5.35 put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable) 5.36 >> PAGE_SHIFT]); 5.37 current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); 5.38 -#ifdef CONFIG_SHADOW 5.39 - current->mm.shadowtable = 5.40 - shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode); 5.41 -#endif 5.42 - invalidate_shadow_ldt(); 5.43 + 5.44 + if( unlikely(current->mm.shadow_mode)) 5.45 + current->mm.shadow_table = 5.46 + shadow_mk_pagetable(current, pfn<<PAGE_SHIFT); 5.47 5.48 + invalidate_shadow_ldt(); 5.49 + 5.50 + // start using the new PT straight away 5.51 percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; 5.52 -#ifdef CONFIG_SHADOW 5.53 - if ( unlikely(current->mm.shadowmode) ) 5.54 + if ( unlikely(current->mm.shadow_mode) ) 5.55 { 5.56 - check_pagetable( current->mm.pagetable, "pre-stlb-flush" ); 5.57 - write_cr3_counted(pagetable_val(current->mm.shadowtable)); 5.58 + check_pagetable( current, 5.59 + current->mm.pagetable, "pre-stlb-flush" ); 5.60 + write_cr3_counted(pagetable_val(current->mm.shadow_table)); 5.61 } 5.62 else 5.63 -#endif 5.64 write_cr3_counted(pagetable_val(current->mm.pagetable)); 5.65 } 5.66 else 5.67 @@ -947,10 +950,8 @@ int do_mmu_update(mmu_update_t *ureqs, i 5.68 struct pfn_info *page; 5.69 int rc = 0, okay = 1, i, cpu = smp_processor_id(); 5.70 unsigned int cmd; 5.71 -#ifdef CONFIG_SHADOW 5.72 unsigned long prev_spfn = 0; 5.73 l1_pgentry_t *prev_spl1e = 0; 5.74 -#endif 5.75 5.76 perfc_incrc(calls_to_mmu_update); 5.77 perfc_addc(num_page_updates, count); 5.78 @@ -1002,11 +1003,14 @@ int do_mmu_update(mmu_update_t *ureqs, i 5.79 okay = mod_l1_entry((l1_pgentry_t *)va, 5.80 mk_l1_pgentry(req.val)); 5.81 5.82 -#ifdef CONFIG_SHADOW 5.83 - if ( okay && page->shadow_and_flags & PSH_shadowed ) 5.84 + if ( okay && unlikely(current->mm.shadow_mode) && 5.85 + (get_shadow_status(current, page-frame_table) & 5.86 + PSH_shadowed) ) 5.87 + { 5.88 shadow_l1_normal_pt_update( req.ptr, req.val, 5.89 &prev_spfn, &prev_spl1e ); 5.90 -#endif 5.91 + put_shadow_status(current); 5.92 + } 5.93 5.94 put_page_type(page); 5.95 } 5.96 @@ -1017,10 +1021,14 @@ int do_mmu_update(mmu_update_t *ureqs, i 5.97 okay = mod_l2_entry((l2_pgentry_t *)va, 5.98 mk_l2_pgentry(req.val), 5.99 pfn); 5.100 -#ifdef CONFIG_SHADOW 5.101 - if ( okay && page->shadow_and_flags & PSH_shadowed ) 5.102 + 5.103 + if ( okay && unlikely(current->mm.shadow_mode) && 5.104 + (get_shadow_status(current, page-frame_table) & 5.105 + PSH_shadowed) ) 5.106 + { 5.107 shadow_l2_normal_pt_update( req.ptr, req.val ); 5.108 -#endif 5.109 + put_shadow_status(current); 5.110 + } 5.111 5.112 put_page_type(page); 5.113 } 5.114 @@ -1032,19 +1040,11 @@ int do_mmu_update(mmu_update_t *ureqs, i 5.115 okay = 1; 5.116 put_page_type(page); 5.117 5.118 -#ifdef CONFIG_SHADOW 5.119 - if ( page->shadow_and_flags & PSH_shadowed ) 5.120 - BUG(); 5.121 - // at present, we shouldn't be shadowing such pages 5.122 -#endif 5.123 - 5.124 - 5.125 + // at present, we don't shadowing such pages 5.126 } 5.127 break; 5.128 } 5.129 5.130 -check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX 5.131 - 5.132 put_page(page); 5.133 5.134 break; 5.135 @@ -1087,25 +1087,22 @@ check_pagetable( current->mm.pagetable, 5.136 if ( prev_pfn != 0 ) 5.137 unmap_domain_mem((void *)va); 5.138 5.139 -#ifdef CONFIG_SHADOW 5.140 if( prev_spl1e != 0 ) 5.141 unmap_domain_mem((void *)prev_spl1e); 5.142 -#endif 5.143 5.144 deferred_ops = percpu_info[cpu].deferred_ops; 5.145 percpu_info[cpu].deferred_ops = 0; 5.146 5.147 if ( deferred_ops & DOP_FLUSH_TLB ) 5.148 { 5.149 -#ifdef CONFIG_SHADOW 5.150 - if ( unlikely(current->mm.shadowmode) ) 5.151 + if ( unlikely(current->mm.shadow_mode) ) 5.152 { 5.153 - check_pagetable( current->mm.pagetable, "pre-stlb-flush" ); 5.154 - write_cr3_counted(pagetable_val(current->mm.shadowtable)); 5.155 + check_pagetable( current, 5.156 + current->mm.pagetable, "pre-stlb-flush" ); 5.157 + write_cr3_counted(pagetable_val(current->mm.shadow_table)); 5.158 } 5.159 else 5.160 -#endif 5.161 - write_cr3_counted(pagetable_val(current->mm.pagetable)); 5.162 + write_cr3_counted(pagetable_val(current->mm.pagetable)); 5.163 } 5.164 5.165 if ( deferred_ops & DOP_RELOAD_LDT ) 5.166 @@ -1142,9 +1139,7 @@ int do_update_va_mapping(unsigned long p 5.167 mk_l1_pgentry(val))) ) 5.168 err = -EINVAL; 5.169 5.170 -#ifdef CONFIG_SHADOW 5.171 - 5.172 - if ( unlikely(p->mm.shadowmode) ) 5.173 + if ( unlikely(p->mm.shadow_mode) ) 5.174 { 5.175 unsigned long sval = 0; 5.176 5.177 @@ -1164,14 +1159,14 @@ int do_update_va_mapping(unsigned long p 5.178 { 5.179 // Since L2's are guranteed RW, failure indicates the page 5.180 // was not shadowed, so ignore. 5.181 - 5.182 + perfc_incrc(shadow_update_va_fail); 5.183 //MEM_LOG("update_va_map: couldn't write update\n"); 5.184 } 5.185 + 5.186 + check_pagetable( p, p->mm.pagetable, "va" ); // debug 5.187 + 5.188 } 5.189 5.190 -check_pagetable( p->mm.pagetable, "va" ); 5.191 - 5.192 -#endif 5.193 5.194 deferred_ops = percpu_info[cpu].deferred_ops; 5.195 percpu_info[cpu].deferred_ops = 0; 5.196 @@ -1179,12 +1174,10 @@ check_pagetable( p->mm.pagetable, "va" ) 5.197 if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 5.198 unlikely(flags & UVMF_FLUSH_TLB) ) 5.199 { 5.200 -#ifdef CONFIG_SHADOW 5.201 - if ( unlikely(p->mm.shadowmode) ) 5.202 - write_cr3_counted(pagetable_val(p->mm.shadowtable)); 5.203 + if ( unlikely(p->mm.shadow_mode) ) 5.204 + write_cr3_counted(pagetable_val(p->mm.shadow_table)); 5.205 else 5.206 -#endif 5.207 - write_cr3_counted(pagetable_val(p->mm.pagetable)); 5.208 + write_cr3_counted(pagetable_val(p->mm.pagetable)); 5.209 } 5.210 else if ( unlikely(flags & UVMF_INVLPG) ) 5.211 __flush_tlb_one(page_nr << PAGE_SHIFT);
6.1 --- a/xen/common/perfc.c Sun Mar 21 19:14:29 2004 +0000 6.2 +++ b/xen/common/perfc.c Thu Mar 25 01:50:35 2004 +0000 6.3 @@ -103,7 +103,7 @@ void perfc_reset(u_char key, void *dev_i 6.4 for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ ) 6.5 atomic_set(&counters[j],0); 6.6 case TYPE_S_ARRAY: 6.7 - counters += j; 6.8 + counters += perfc_info[i].nr_elements; 6.9 break; 6.10 } 6.11 }
7.1 --- a/xen/common/shadow.c Sun Mar 21 19:14:29 2004 +0000 7.2 +++ b/xen/common/shadow.c Thu Mar 25 01:50:35 2004 +0000 7.3 @@ -7,70 +7,161 @@ 7.4 #include <asm/domain_page.h> 7.5 #include <asm/page.h> 7.6 7.7 -#ifdef CONFIG_SHADOW 7.8 - 7.9 - 7.10 -#if SHADOW_DEBUG 7.11 -#define MEM_VLOG(_f, _a...) \ 7.12 - printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 7.13 - current->domain , __LINE__ , ## _a ) 7.14 -#else 7.15 -#define MEM_VLOG(_f, _a...) 7.16 -#endif 7.17 - 7.18 -#if 0 7.19 -#define MEM_VVLOG(_f, _a...) \ 7.20 - printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 7.21 - current->domain , __LINE__ , ## _a ) 7.22 -#else 7.23 -#define MEM_VVLOG(_f, _a...) 7.24 -#endif 7.25 - 7.26 7.27 /******** 7.28 7.29 To use these shadow page tables, guests must not rely on the ACCESSED 7.30 and DIRTY bits on L2 pte's being accurate -- they will typically all be set. 7.31 7.32 + 7.33 I doubt this will break anything. (If guests want to use the va_update 7.34 mechanism they've signed up for this anyhow...) 7.35 7.36 ********/ 7.37 7.38 7.39 -pagetable_t shadow_mk_pagetable( unsigned long gptbase, 7.40 - unsigned int shadowmode ) 7.41 +int shadow_mode_enable( struct task_struct *p, unsigned int mode ) 7.42 { 7.43 - unsigned long gpfn, spfn=0; 7.44 + struct shadow_status **fptr; 7.45 + int i; 7.46 + 7.47 + // sychronously stop domain 7.48 + // XXX for the moment, only use on already stopped domains!!! 7.49 + 7.50 + spin_lock_init(&p->mm.shadow_lock); 7.51 + spin_lock(&p->mm.shadow_lock); 7.52 7.53 - MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )", 7.54 - gptbase, shadowmode ); 7.55 + p->mm.shadow_mode = mode; 7.56 + 7.57 + // allocate hashtable 7.58 + p->mm.shadow_ht = kmalloc( shadow_ht_buckets * 7.59 + sizeof(struct shadow_status), GFP_KERNEL ); 7.60 + if( ! p->mm.shadow_ht ) 7.61 + goto nomem; 7.62 + 7.63 + memset( p->mm.shadow_ht, 0, shadow_ht_buckets * 7.64 + sizeof(struct shadow_status) ); 7.65 + 7.66 7.67 - if ( unlikely(shadowmode) ) 7.68 + // allocate space for first lot of extra nodes 7.69 + p->mm.shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 7.70 + sizeof(struct shadow_status)), GFP_KERNEL ); 7.71 + 7.72 + if( ! p->mm.shadow_ht_extras ) 7.73 + goto nomem; 7.74 + 7.75 + memset( p->mm.shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 7.76 + sizeof(struct shadow_status)) ); 7.77 + 7.78 + // add extras to free list 7.79 + fptr = &p->mm.shadow_ht_free; 7.80 + for ( i=0; i<shadow_ht_extra_size; i++ ) 7.81 { 7.82 - gpfn = gptbase >> PAGE_SHIFT; 7.83 - 7.84 - if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) ) 7.85 - { 7.86 - spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 7.87 + *fptr = &p->mm.shadow_ht_extras[i]; 7.88 + fptr = &(p->mm.shadow_ht_extras[i].next); 7.89 + } 7.90 + *fptr = NULL; 7.91 + *((struct shadow_status ** ) &p->mm.shadow_ht_extras[shadow_ht_extra_size]) = NULL; 7.92 + 7.93 + spin_unlock(&p->mm.shadow_lock); 7.94 + 7.95 + // call shadow_mk_pagetable 7.96 + p->mm.shadow_table = shadow_mk_pagetable( p, 7.97 + pagetable_val(p->mm.pagetable) ); 7.98 + 7.99 + return 0; 7.100 + 7.101 +nomem: 7.102 + spin_unlock(&p->mm.shadow_lock); 7.103 + return -ENOMEM; 7.104 +} 7.105 + 7.106 +void shadow_mode_disable( ) 7.107 +{ 7.108 + 7.109 + // free the hash buckets as you go 7.110 + 7.111 + // free the hashtable itself 7.112 +} 7.113 + 7.114 + 7.115 +static inline void free_shadow_page( struct task_struct *p, unsigned int pfn ) 7.116 +{ 7.117 + unsigned long flags; 7.118 + 7.119 + p->mm.shadow_page_count--; 7.120 + 7.121 + spin_lock_irqsave(&free_list_lock, flags); 7.122 + list_add(&frame_table[pfn].list, &free_list); 7.123 + free_pfns++; 7.124 + spin_unlock_irqrestore(&free_list_lock, flags); 7.125 +} 7.126 + 7.127 +static inline struct pfn_info *alloc_shadow_page( struct task_struct *p ) 7.128 +{ 7.129 + p->mm.shadow_page_count++; 7.130 + 7.131 + return alloc_domain_page( NULL ); 7.132 +} 7.133 + 7.134 + 7.135 +static void __free_shadow_table( struct task_struct *p ) 7.136 +{ 7.137 + int j; 7.138 + struct shadow_status *a; 7.139 + 7.140 + // the code assumes you're not using the page tables i.e. 7.141 + // the domain is stopped and cr3 is something else!! 7.142 + 7.143 + // walk the hash table and call free_shadow_page on all pages 7.144 + 7.145 + for(j=0;j<shadow_ht_buckets;j++) 7.146 + { 7.147 + a = &p->mm.shadow_ht[j]; 7.148 + if (a->pfn) 7.149 + { 7.150 + free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask ); 7.151 + a->pfn = 0; 7.152 + a->spfn_and_flags = 0; 7.153 + } 7.154 + a=a->next; 7.155 + while(a) 7.156 + { 7.157 + struct shadow_status *next = a->next; 7.158 + free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask ); 7.159 + a->pfn = 0; 7.160 + a->spfn_and_flags = 0; 7.161 + a->next = p->mm.shadow_ht_free; 7.162 + p->mm.shadow_ht_free = a; 7.163 + a=next; 7.164 } 7.165 - else 7.166 - { 7.167 - spfn = shadow_l2_table( gpfn ); 7.168 - } 7.169 } 7.170 +} 7.171 7.172 - return mk_pagetable(spfn << PAGE_SHIFT); 7.173 +static void flush_shadow_table( struct task_struct *p ) 7.174 +{ 7.175 + 7.176 + // XXX synchronously stop domain (needed for SMP guests) 7.177 + 7.178 + // switch to idle task's page tables 7.179 + 7.180 + // walk the hash table and call free_shadow_page on all pages 7.181 + spin_lock(&p->mm.shadow_lock); 7.182 + __free_shadow_table( p ); 7.183 + spin_unlock(&p->mm.shadow_lock); 7.184 + 7.185 + // XXX unpause domain 7.186 } 7.187 7.188 + 7.189 + 7.190 void unshadow_table( unsigned long gpfn, unsigned int type ) 7.191 { 7.192 unsigned long spfn; 7.193 7.194 - MEM_VLOG("unshadow_table type=%08x gpfn=%08lx, spfn=%08lx", 7.195 + SH_VLOG("unshadow_table type=%08x gpfn=%08lx", 7.196 type, 7.197 - gpfn, 7.198 - frame_table[gpfn].shadow_and_flags & PSH_pfn_mask ); 7.199 + gpfn ); 7.200 7.201 perfc_incrc(unshadow_table_count); 7.202 7.203 @@ -79,9 +170,8 @@ void unshadow_table( unsigned long gpfn, 7.204 // even in the SMP guest case, there won't be a race here as 7.205 // this CPU was the one that cmpxchg'ed the page to invalid 7.206 7.207 - spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 7.208 - frame_table[gpfn].shadow_and_flags=0; 7.209 - frame_table[spfn].shadow_and_flags=0; 7.210 + spfn = __shadow_status(current, gpfn) & PSH_pfn_mask; 7.211 + delete_shadow_status(current, gpfn); 7.212 7.213 #if 0 // XXX leave as might be useful for later debugging 7.214 { 7.215 @@ -101,27 +191,21 @@ void unshadow_table( unsigned long gpfn, 7.216 else 7.217 perfc_decr(shadow_l2_pages); 7.218 7.219 - //free_domain_page( &frame_table[spfn] ); 7.220 - 7.221 - { 7.222 - unsigned long flags; 7.223 - spin_lock_irqsave(&free_list_lock, flags); 7.224 - list_add(&frame_table[spfn].list, &free_list); 7.225 - free_pfns++; 7.226 - spin_unlock_irqrestore(&free_list_lock, flags); 7.227 - } 7.228 + free_shadow_page( current, spfn ); 7.229 7.230 } 7.231 7.232 7.233 -unsigned long shadow_l2_table( unsigned long gpfn ) 7.234 +static unsigned long shadow_l2_table( 7.235 + struct task_struct *p, unsigned long gpfn ) 7.236 { 7.237 struct pfn_info *spfn_info; 7.238 unsigned long spfn; 7.239 l2_pgentry_t *spl2e, *gpl2e; 7.240 int i; 7.241 7.242 - MEM_VVLOG("shadow_l2_table( %08lx )",gpfn); 7.243 + SH_VVLOG("shadow_l2_table( %08lx )",gpfn); 7.244 + spin_lock(&p->mm.shadow_lock); 7.245 7.246 perfc_incrc(shadow_l2_table_count); 7.247 perfc_incr(shadow_l2_pages); 7.248 @@ -129,17 +213,14 @@ unsigned long shadow_l2_table( unsigned 7.249 // XXX in future, worry about racing in SMP guests 7.250 // -- use cmpxchg with PSH_pending flag to show progress (and spin) 7.251 7.252 - spfn_info = alloc_domain_page( NULL ); // XXX account properly later 7.253 + spfn_info = alloc_shadow_page(p); 7.254 7.255 ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache 7.256 7.257 spfn = (unsigned long) (spfn_info - frame_table); 7.258 7.259 // mark pfn as being shadowed, update field to point at shadow 7.260 - frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed; 7.261 - 7.262 - // mark shadow pfn as being a shadow, update field to point at pfn 7.263 - frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow; 7.264 + set_shadow_status(p, gpfn, spfn | PSH_shadowed); 7.265 7.266 // we need to do this before the linear map is set up 7.267 spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT); 7.268 @@ -172,11 +253,11 @@ unsigned long shadow_l2_table( unsigned 7.269 if (gpte & _PAGE_PRESENT) 7.270 { 7.271 unsigned long s_sh = 7.272 - frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags; 7.273 + __shadow_status(p, gpte>>PAGE_SHIFT); 7.274 7.275 if( s_sh & PSH_shadowed ) // PSH_shadowed 7.276 { 7.277 - if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) ) 7.278 + if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) ) 7.279 { 7.280 printk("Linear mapping detected\n"); 7.281 spte = gpte & ~_PAGE_RW; 7.282 @@ -203,33 +284,61 @@ unsigned long shadow_l2_table( unsigned 7.283 unmap_domain_mem( gpl2e ); 7.284 unmap_domain_mem( spl2e ); 7.285 7.286 - MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn); 7.287 + SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn); 7.288 7.289 - 7.290 + spin_unlock(&p->mm.shadow_lock); 7.291 return spfn; 7.292 } 7.293 7.294 +pagetable_t shadow_mk_pagetable( struct task_struct *p, 7.295 + unsigned long gptbase) 7.296 +{ 7.297 + unsigned long gpfn, spfn=0; 7.298 + 7.299 + SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )", 7.300 + gptbase, p->mm.shadow_mode ); 7.301 + 7.302 + if ( likely(p->mm.shadow_mode) ) // should always be true if we're here 7.303 + { 7.304 + gpfn = gptbase >> PAGE_SHIFT; 7.305 + 7.306 + if ( unlikely((spfn=__shadow_status(p, gpfn)) == 0 ) ) 7.307 + { 7.308 + spfn = shadow_l2_table(p, gpfn ); 7.309 + } 7.310 + } 7.311 + 7.312 + SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d )", 7.313 + gptbase, p->mm.shadow_mode ); 7.314 + 7.315 + return mk_pagetable(spfn<<PAGE_SHIFT); 7.316 +} 7.317 7.318 int shadow_fault( unsigned long va, long error_code ) 7.319 { 7.320 unsigned long gpte, spte; 7.321 7.322 - MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); 7.323 + SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); 7.324 7.325 - check_pagetable( current->mm.pagetable, "pre-sf" ); 7.326 + spin_lock(¤t->mm.shadow_lock); 7.327 + 7.328 + check_pagetable( current, current->mm.pagetable, "pre-sf" ); 7.329 7.330 if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) 7.331 { 7.332 - MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" ); 7.333 + SH_VVLOG("shadow_fault - EXIT: read gpte faulted" ); 7.334 + spin_unlock(¤t->mm.shadow_lock); 7.335 return 0; // propagate to guest 7.336 } 7.337 7.338 if ( ! (gpte & _PAGE_PRESENT) ) 7.339 { 7.340 - MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte ); 7.341 + SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte ); 7.342 + spin_unlock(¤t->mm.shadow_lock); 7.343 return 0; // we're not going to be able to help 7.344 } 7.345 7.346 + 7.347 spte = gpte; 7.348 7.349 if ( error_code & 2 ) 7.350 @@ -242,7 +351,8 @@ int shadow_fault( unsigned long va, long 7.351 } 7.352 else 7.353 { // write fault on RO page 7.354 - MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte ); 7.355 + SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte ); 7.356 + spin_unlock(¤t->mm.shadow_lock); 7.357 return 0; // propagate to guest 7.358 // not clear whether we should set accessed bit here... 7.359 } 7.360 @@ -255,7 +365,7 @@ int shadow_fault( unsigned long va, long 7.361 spte &= ~_PAGE_RW; // force clear unless already dirty 7.362 } 7.363 7.364 - MEM_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte ); 7.365 + SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte ); 7.366 7.367 // write back updated gpte 7.368 // XXX watch out for read-only L2 entries! (not used in Linux) 7.369 @@ -269,13 +379,13 @@ int shadow_fault( unsigned long va, long 7.370 7.371 unsigned long gpde, spde, gl1pfn, sl1pfn; 7.372 7.373 - MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte ); 7.374 + SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte ); 7.375 7.376 gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]); 7.377 7.378 gl1pfn = gpde>>PAGE_SHIFT; 7.379 7.380 - if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) ) 7.381 + if ( ! (sl1pfn=__shadow_status(current, gl1pfn) ) ) 7.382 { 7.383 // this L1 is NOT already shadowed so we need to shadow it 7.384 struct pfn_info *sl1pfn_info; 7.385 @@ -284,12 +394,11 @@ int shadow_fault( unsigned long va, long 7.386 sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 7.387 sl1pfn = sl1pfn_info - frame_table; 7.388 7.389 - MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn); 7.390 + SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn); 7.391 perfc_incrc(shadow_l1_table_count); 7.392 perfc_incr(shadow_l1_pages); 7.393 7.394 - sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn; 7.395 - frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn; 7.396 + set_shadow_status(current, gl1pfn, PSH_shadowed | sl1pfn); 7.397 7.398 gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; 7.399 spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT); 7.400 @@ -330,9 +439,7 @@ int shadow_fault( unsigned long va, long 7.401 // this L1 was shadowed (by another PT) but we didn't have an L2 7.402 // entry for it 7.403 7.404 - sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask; 7.405 - 7.406 - MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn); 7.407 + SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn); 7.408 7.409 spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY; 7.410 7.411 @@ -341,7 +448,7 @@ int shadow_fault( unsigned long va, long 7.412 7.413 if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT) ) ) 7.414 { // detect linear map, and keep pointing at guest 7.415 - MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn); 7.416 + SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn); 7.417 spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT); 7.418 } 7.419 7.420 @@ -358,7 +465,9 @@ int shadow_fault( unsigned long va, long 7.421 7.422 perfc_incrc(shadow_fixup_count); 7.423 7.424 - check_pagetable( current->mm.pagetable, "post-sf" ); 7.425 + check_pagetable( current, current->mm.pagetable, "post-sf" ); 7.426 + 7.427 + spin_unlock(¤t->mm.shadow_lock); 7.428 7.429 return 1; // let's try the faulting instruction again... 7.430 7.431 @@ -373,13 +482,13 @@ void shadow_l1_normal_pt_update( unsigne 7.432 l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr; 7.433 7.434 7.435 -MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n", 7.436 +SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n", 7.437 pa,gpte,prev_spfn, prev_spl1e); 7.438 7.439 // to get here, we know the l1 page *must* be shadowed 7.440 7.441 gpfn = pa >> PAGE_SHIFT; 7.442 - spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 7.443 + spfn = __shadow_status(current, gpfn) & PSH_pfn_mask; 7.444 7.445 if ( spfn == prev_spfn ) 7.446 { 7.447 @@ -417,21 +526,23 @@ void shadow_l2_normal_pt_update( unsigne 7.448 { 7.449 unsigned long gpfn, spfn, spte; 7.450 l2_pgentry_t * sp2le; 7.451 - unsigned long s_sh; 7.452 + unsigned long s_sh=0; 7.453 7.454 - MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte); 7.455 + SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte); 7.456 7.457 // to get here, we know the l2 page has a shadow 7.458 7.459 gpfn = pa >> PAGE_SHIFT; 7.460 - spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 7.461 + spfn = __shadow_status(current, gpfn) & PSH_pfn_mask; 7.462 + 7.463 + 7.464 + spte = 0; 7.465 + 7.466 + if( gpte & _PAGE_PRESENT ) 7.467 + s_sh = __shadow_status(current, gpte >> PAGE_SHIFT); 7.468 7.469 sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); 7.470 // no real need for a cache here 7.471 - 7.472 - spte = 0; 7.473 - 7.474 - s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags; 7.475 7.476 if ( s_sh ) // PSH_shadowed 7.477 { 7.478 @@ -463,7 +574,8 @@ char * sh_check_name; 7.479 #define FAIL(_f, _a...) \ 7.480 {printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();} 7.481 7.482 -int check_pte( unsigned long gpte, unsigned long spte, int level, int i ) 7.483 +static int check_pte( struct task_struct *p, 7.484 + unsigned long gpte, unsigned long spte, int level, int i ) 7.485 { 7.486 unsigned long mask, gpfn, spfn; 7.487 7.488 @@ -504,42 +616,24 @@ int check_pte( unsigned long gpte, unsig 7.489 if ( level > 1 ) 7.490 FAIL("Linear map ???"); // XXX this will fail on BSD 7.491 7.492 -#if 0 // might be a RO mapping of a page table page 7.493 - if ( frame_table[gpfn].shadow_and_flags != 0 ) 7.494 - { 7.495 - FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 7.496 - frame_table[gpfn].shadow_and_flags, 7.497 - frame_table[spfn].shadow_and_flags); 7.498 - } 7.499 - else 7.500 -#endif 7.501 - return 1; 7.502 + return 1; 7.503 } 7.504 else 7.505 { 7.506 if ( level < 2 ) 7.507 FAIL("Shadow in L1 entry?"); 7.508 7.509 - if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) ) 7.510 - FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 7.511 - frame_table[gpfn].shadow_and_flags, 7.512 - frame_table[spfn].shadow_and_flags, 7.513 - frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags, 7.514 - frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags 7.515 - ); 7.516 - 7.517 - if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) ) 7.518 - FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 7.519 - frame_table[gpfn].shadow_and_flags, 7.520 - frame_table[spfn].shadow_and_flags); 7.521 - 7.522 + if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) ) 7.523 + FAIL("spfn problem g.sf=%08lx", 7.524 + __shadow_status(p, gpfn) ); 7.525 } 7.526 7.527 return 1; 7.528 } 7.529 7.530 7.531 -int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 ) 7.532 +static int check_l1_table( struct task_struct *p, unsigned long va, 7.533 + unsigned long g2, unsigned long s2 ) 7.534 { 7.535 int j; 7.536 unsigned long *gpl1e, *spl1e; 7.537 @@ -555,7 +649,7 @@ int check_l1_table( unsigned long va, un 7.538 unsigned long gpte = gpl1e[j]; 7.539 unsigned long spte = spl1e[j]; 7.540 7.541 - check_pte( gpte, spte, 1, j ); 7.542 + check_pte( p, gpte, spte, 1, j ); 7.543 } 7.544 7.545 unmap_domain_mem( spl1e ); 7.546 @@ -567,7 +661,7 @@ int check_l1_table( unsigned long va, un 7.547 #define FAILPT(_f, _a...) \ 7.548 {printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();} 7.549 7.550 -int check_pagetable( pagetable_t pt, char *s ) 7.551 +int check_pagetable( struct task_struct *p, pagetable_t pt, char *s ) 7.552 { 7.553 unsigned long gptbase = pagetable_val(pt); 7.554 unsigned long gpfn, spfn; 7.555 @@ -576,29 +670,26 @@ int check_pagetable( pagetable_t pt, cha 7.556 7.557 sh_check_name = s; 7.558 7.559 - MEM_VVLOG("%s-PT Audit",s); 7.560 + SH_VVLOG("%s-PT Audit",s); 7.561 7.562 sh_l2_present = sh_l1_present = 0; 7.563 7.564 gpfn = gptbase >> PAGE_SHIFT; 7.565 7.566 - if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) ) 7.567 + if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) ) 7.568 { 7.569 printk("%s-PT %08lx not shadowed\n", s, gptbase); 7.570 7.571 - if( frame_table[gpfn].shadow_and_flags != 0 ) BUG(); 7.572 + if( __shadow_status(p, gpfn) != 0 ) BUG(); 7.573 7.574 return 0; 7.575 } 7.576 7.577 - spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask; 7.578 + spfn = __shadow_status(p, gpfn) & PSH_pfn_mask; 7.579 7.580 - if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) ) 7.581 + if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) ) 7.582 FAILPT("ptbase shadow inconsistent1"); 7.583 7.584 - if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) ) 7.585 - FAILPT("ptbase shadow inconsistent2"); 7.586 - 7.587 gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT ); 7.588 spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); 7.589 7.590 @@ -641,7 +732,7 @@ int check_pagetable( pagetable_t pt, cha 7.591 unsigned long gpte = l2_pgentry_val(gpl2e[i]); 7.592 unsigned long spte = l2_pgentry_val(spl2e[i]); 7.593 7.594 - check_pte( gpte, spte, 2, i ); 7.595 + check_pte( p, gpte, spte, 2, i ); 7.596 } 7.597 7.598 7.599 @@ -652,7 +743,7 @@ int check_pagetable( pagetable_t pt, cha 7.600 unsigned long spte = l2_pgentry_val(spl2e[i]); 7.601 7.602 if ( spte ) 7.603 - check_l1_table( 7.604 + check_l1_table( p, 7.605 i<<L2_PAGETABLE_SHIFT, 7.606 gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT ); 7.607 7.608 @@ -661,7 +752,7 @@ int check_pagetable( pagetable_t pt, cha 7.609 unmap_domain_mem( spl2e ); 7.610 unmap_domain_mem( gpl2e ); 7.611 7.612 - MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n", 7.613 + SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n", 7.614 sh_l2_present, sh_l1_present ); 7.615 7.616 return 1; 7.617 @@ -671,7 +762,6 @@ int check_pagetable( pagetable_t pt, cha 7.618 #endif 7.619 7.620 7.621 -#endif // CONFIG_SHADOW 7.622 7.623 7.624
8.1 --- a/xen/include/asm-i386/config.h Sun Mar 21 19:14:29 2004 +0000 8.2 +++ b/xen/include/asm-i386/config.h Thu Mar 25 01:50:35 2004 +0000 8.3 @@ -40,8 +40,6 @@ 8.4 8.5 #define CONFIG_XEN_ATTENTION_KEY 1 8.6 8.7 -#define CONFIG_SHADOW 1 8.8 - 8.9 8.10 #define HZ 100 8.11
9.1 --- a/xen/include/asm-i386/processor.h Sun Mar 21 19:14:29 2004 +0000 9.2 +++ b/xen/include/asm-i386/processor.h Thu Mar 25 01:50:35 2004 +0000 9.3 @@ -12,6 +12,7 @@ 9.4 #include <asm/cpufeature.h> 9.5 #include <asm/desc.h> 9.6 #include <xeno/config.h> 9.7 +#include <xeno/spinlock.h> 9.8 #include <hypervisor-ifs/hypervisor-if.h> 9.9 9.10 struct task_struct; 9.11 @@ -416,10 +417,14 @@ struct mm_struct { 9.12 l1_pgentry_t *perdomain_pt; 9.13 pagetable_t pagetable; 9.14 9.15 -#ifdef CONFIG_SHADOW 9.16 - unsigned int shadowmode; /* flags to control shadow table operation */ 9.17 - pagetable_t shadowtable; 9.18 -#endif 9.19 + unsigned int shadow_mode; /* flags to control shadow table operation */ 9.20 + pagetable_t shadow_table; 9.21 + spinlock_t shadow_lock; 9.22 + struct shadow_status *shadow_ht; 9.23 + struct shadow_status *shadow_ht_free; 9.24 + struct shadow_status *shadow_ht_extras; // extra allocation units 9.25 + unsigned int shadow_page_count; 9.26 + unsigned int shadow_max_page_count; 9.27 9.28 /* Current LDT details. */ 9.29 unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
10.1 --- a/xen/include/xeno/mm.h Sun Mar 21 19:14:29 2004 +0000 10.2 +++ b/xen/include/xeno/mm.h Thu Mar 25 01:50:35 2004 +0000 10.3 @@ -67,10 +67,6 @@ struct pfn_info 10.4 unsigned long type_and_flags; 10.5 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ 10.6 unsigned long tlbflush_timestamp; 10.7 -#ifdef CONFIG_SHADOW 10.8 - /* Shadow page status: top bits flags, bottom bits are a pfn */ 10.9 - unsigned long shadow_and_flags; 10.10 -#endif 10.11 }; 10.12 10.13 /* The following page types are MUTUALLY EXCLUSIVE. */
11.1 --- a/xen/include/xeno/shadow.h Sun Mar 21 19:14:29 2004 +0000 11.2 +++ b/xen/include/xeno/shadow.h Thu Mar 25 01:50:35 2004 +0000 11.3 @@ -3,15 +3,13 @@ 11.4 #ifndef _XENO_SHADOW_H 11.5 #define _XENO_SHADOW_H 11.6 11.7 -#ifdef CONFIG_SHADOW 11.8 - 11.9 #include <xeno/config.h> 11.10 #include <xeno/types.h> 11.11 #include <xeno/mm.h> 11.12 +#include <xeno/perfc.h> 11.13 11.14 /* Shadow PT flag bits in pfn_info */ 11.15 #define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */ 11.16 -#define PSH_shadow (1<<30) /* page is a shadow. PFN points to orig page */ 11.17 #define PSH_pending (1<<29) /* page is in the process of being shadowed */ 11.18 #define PSH_pfn_mask ((1<<21)-1) 11.19 11.20 @@ -24,28 +22,323 @@ 11.21 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) 11.22 #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) 11.23 11.24 -extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode ); 11.25 -extern void unshadow_table( unsigned long gpfn, unsigned int type ); 11.26 -extern unsigned long shadow_l2_table( unsigned long gpfn ); 11.27 +extern pagetable_t shadow_mk_pagetable( struct task_struct *p, 11.28 + unsigned long gptbase); 11.29 extern int shadow_fault( unsigned long va, long error_code ); 11.30 extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 11.31 unsigned long *prev_spfn_ptr, 11.32 l1_pgentry_t **prev_spl1e_ptr ); 11.33 extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ); 11.34 - 11.35 +extern void unshadow_table( unsigned long gpfn, unsigned int type ); 11.36 +extern int shadow_mode_enable( struct task_struct *p, unsigned int mode ); 11.37 11.38 #define SHADOW_DEBUG 0 11.39 +#define SHADOW_HASH_DEBUG 0 11.40 #define SHADOW_OPTIMISE 1 11.41 11.42 -#endif // end of CONFIG_SHADOW 11.43 +struct shadow_status { 11.44 + unsigned long pfn; // gpfn 11.45 + unsigned long spfn_and_flags; // spfn plus flags 11.46 + struct shadow_status *next; // use pull-to-front list. 11.47 +}; 11.48 + 11.49 +#define shadow_ht_extra_size 128 /*128*/ 11.50 +#define shadow_ht_buckets 256 /*256*/ 11.51 + 11.52 +#ifndef NDEBUG 11.53 +#define SH_LOG(_f, _a...) \ 11.54 + printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 11.55 + current->domain , __LINE__ , ## _a ) 11.56 +#else 11.57 +#define SH_LOG(_f, _a...) 11.58 +#endif 11.59 11.60 #if SHADOW_DEBUG 11.61 -extern int check_pagetable( pagetable_t pt, char *s ); 11.62 +#define SH_VLOG(_f, _a...) \ 11.63 + printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 11.64 + current->domain , __LINE__ , ## _a ) 11.65 #else 11.66 -#define check_pagetable( pt, s ) 11.67 +#define SH_VLOG(_f, _a...) 11.68 +#endif 11.69 + 11.70 +#if 0 11.71 +#define SH_VVLOG(_f, _a...) \ 11.72 + printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 11.73 + current->domain , __LINE__ , ## _a ) 11.74 +#else 11.75 +#define SH_VVLOG(_f, _a...) 11.76 #endif 11.77 11.78 11.79 11.80 +#if SHADOW_HASH_DEBUG 11.81 +static void shadow_audit(struct task_struct *p, int print) 11.82 +{ 11.83 + int live=0, free=0, j=0, abs; 11.84 + struct shadow_status *a; 11.85 + 11.86 + for(j=0;j<shadow_ht_buckets;j++) 11.87 + { 11.88 + a = &p->mm.shadow_ht[j]; 11.89 + if(a->pfn) live++; 11.90 + while(a->next && live<9999) 11.91 + { 11.92 + live++; 11.93 + if(a->pfn == 0) 11.94 + { 11.95 + printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n", 11.96 + live, a->pfn, a->spfn_and_flags, a->next); 11.97 + BUG(); 11.98 + } 11.99 + a=a->next; 11.100 + } 11.101 + ASSERT(live<9999); 11.102 + } 11.103 + 11.104 + a = p->mm.shadow_ht_free; 11.105 + while(a) { free++; a=a->next; } 11.106 + 11.107 + if(print) printk("live=%d free=%d\n",live,free); 11.108 + 11.109 + abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live; 11.110 + if( abs < -1 || abs > 1 ) 11.111 + { 11.112 + printk("live=%d free=%d l1=%d l2=%d\n",live,free, 11.113 + perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) ); 11.114 + BUG(); 11.115 + } 11.116 + 11.117 +} 11.118 + 11.119 +#else 11.120 +#define shadow_audit(p, print) 11.121 +#endif 11.122 + 11.123 +static inline struct shadow_status* hash_bucket( struct task_struct *p, 11.124 + unsigned int gpfn ) 11.125 +{ 11.126 + return &(p->mm.shadow_ht[gpfn % shadow_ht_buckets]); 11.127 +} 11.128 + 11.129 + 11.130 +static inline unsigned long __shadow_status( struct task_struct *p, 11.131 + unsigned int gpfn ) 11.132 +{ 11.133 + struct shadow_status **ob, *b, *B = hash_bucket( p, gpfn ); 11.134 + 11.135 + b = B; 11.136 + ob = NULL; 11.137 + 11.138 + SH_VVLOG("lookup gpfn=%08lx bucket=%08lx", gpfn, b ); 11.139 + shadow_audit(p,0); // if in debug mode 11.140 + 11.141 + do 11.142 + { 11.143 + if ( b->pfn == gpfn ) 11.144 + { 11.145 + unsigned long t; 11.146 + struct shadow_status *x; 11.147 + 11.148 + // swap with head 11.149 + t=B->pfn; B->pfn=b->pfn; b->pfn=t; 11.150 + t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; 11.151 + b->spfn_and_flags=t; 11.152 + 11.153 + if(ob) 11.154 + { // pull to front 11.155 + *ob=b->next; 11.156 + x=B->next; 11.157 + B->next=b; 11.158 + b->next=x; 11.159 + } 11.160 + return B->spfn_and_flags; 11.161 + } 11.162 +#if SHADOW_HASH_DEBUG 11.163 + else 11.164 + { 11.165 + if(b!=B)ASSERT(b->pfn); 11.166 + } 11.167 +#endif 11.168 + ob=&b->next; 11.169 + b=b->next; 11.170 + } 11.171 + while (b); 11.172 + 11.173 + return 0; 11.174 +} 11.175 + 11.176 +/* we can make this locking more fine grained e.g. per shadow page if it 11.177 +ever becomes a problem, but since we need a spin lock on the hash table 11.178 +anyway its probably not worth being too clever. */ 11.179 + 11.180 +static inline unsigned long get_shadow_status( struct task_struct *p, 11.181 + unsigned int gpfn ) 11.182 +{ 11.183 + unsigned long res; 11.184 + 11.185 + spin_lock(&p->mm.shadow_lock); 11.186 + res = __shadow_status( p, gpfn ); 11.187 + if (!res) spin_unlock(&p->mm.shadow_lock); 11.188 + return res; 11.189 +} 11.190 + 11.191 + 11.192 +static inline void put_shadow_status( struct task_struct *p ) 11.193 +{ 11.194 + spin_unlock(&p->mm.shadow_lock); 11.195 +} 11.196 + 11.197 + 11.198 +static inline void delete_shadow_status( struct task_struct *p, 11.199 + unsigned int gpfn ) 11.200 +{ 11.201 + struct shadow_status *b, *B, **ob; 11.202 + 11.203 + B = b = hash_bucket( p, gpfn ); 11.204 + 11.205 + SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b ); 11.206 + shadow_audit(p,0); 11.207 + ASSERT(gpfn); 11.208 + 11.209 + if( b->pfn == gpfn ) 11.210 + { 11.211 + if (b->next) 11.212 + { 11.213 + struct shadow_status *D=b->next; 11.214 + b->spfn_and_flags = b->next->spfn_and_flags; 11.215 + b->pfn = b->next->pfn; 11.216 + 11.217 + b->next = b->next->next; 11.218 + D->next = p->mm.shadow_ht_free; 11.219 + p->mm.shadow_ht_free = D; 11.220 + } 11.221 + else 11.222 + { 11.223 + b->pfn = 0; 11.224 + b->spfn_and_flags = 0; 11.225 + } 11.226 + 11.227 +#if SHADOW_HASH_DEBUG 11.228 + if( __shadow_status(p,gpfn) ) BUG(); 11.229 +#endif 11.230 + return; 11.231 + } 11.232 + 11.233 + ob = &b->next; 11.234 + b=b->next; 11.235 + 11.236 + do 11.237 + { 11.238 + if ( b->pfn == gpfn ) 11.239 + { 11.240 + b->pfn = 0; 11.241 + b->spfn_and_flags = 0; 11.242 + 11.243 + // b is in the list 11.244 + *ob=b->next; 11.245 + b->next = p->mm.shadow_ht_free; 11.246 + p->mm.shadow_ht_free = b; 11.247 + 11.248 +#if SHADOW_HASH_DEBUG 11.249 + if( __shadow_status(p,gpfn) ) BUG(); 11.250 +#endif 11.251 + return; 11.252 + } 11.253 + 11.254 + ob = &b->next; 11.255 + b=b->next; 11.256 + } 11.257 + while (b); 11.258 + 11.259 + // if we got here, it wasn't in the list 11.260 + BUG(); 11.261 +} 11.262 + 11.263 + 11.264 +static inline void set_shadow_status( struct task_struct *p, 11.265 + unsigned int gpfn, unsigned long s ) 11.266 +{ 11.267 + struct shadow_status *b, *B, *extra, **fptr; 11.268 + int i; 11.269 + 11.270 + B = b = hash_bucket( p, gpfn ); 11.271 + 11.272 + ASSERT(gpfn); 11.273 + ASSERT(s); 11.274 + SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next ); 11.275 + shadow_audit(p,0); 11.276 + 11.277 + do 11.278 + { 11.279 + if ( b->pfn == gpfn ) 11.280 + { 11.281 + b->spfn_and_flags = s; 11.282 + return; 11.283 + } 11.284 + 11.285 + b=b->next; 11.286 + } 11.287 + while (b); 11.288 + 11.289 + // if we got here, this is an insert rather than update 11.290 + 11.291 + ASSERT( s ); // deletes must have succeeded by here 11.292 + 11.293 + if ( B->pfn == 0 ) 11.294 + { 11.295 + // we can use this head 11.296 + ASSERT( B->next == 0 ); 11.297 + B->pfn = gpfn; 11.298 + B->spfn_and_flags = s; 11.299 + return; 11.300 + } 11.301 + 11.302 + if( unlikely(p->mm.shadow_ht_free == NULL) ) 11.303 + { 11.304 + SH_LOG("allocate more shadow hashtable blocks"); 11.305 + 11.306 + // we need to allocate more space 11.307 + extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 11.308 + sizeof(struct shadow_status)), GFP_KERNEL ); 11.309 + 11.310 + if( ! extra ) BUG(); // should be more graceful here.... 11.311 + 11.312 + memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size * 11.313 + sizeof(struct shadow_status)) ); 11.314 + 11.315 + // add extras to free list 11.316 + fptr = &p->mm.shadow_ht_free; 11.317 + for ( i=0; i<shadow_ht_extra_size; i++ ) 11.318 + { 11.319 + *fptr = &extra[i]; 11.320 + fptr = &(extra[i].next); 11.321 + } 11.322 + *fptr = NULL; 11.323 + 11.324 + *((struct shadow_status ** ) &p->mm.shadow_ht[shadow_ht_extra_size]) = 11.325 + p->mm.shadow_ht_extras; 11.326 + p->mm.shadow_ht_extras = extra; 11.327 + 11.328 + } 11.329 + 11.330 + // should really put this in B to go right to front 11.331 + b = p->mm.shadow_ht_free; 11.332 + p->mm.shadow_ht_free = b->next; 11.333 + b->spfn_and_flags = s; 11.334 + b->pfn = gpfn; 11.335 + b->next = B->next; 11.336 + B->next = b; 11.337 + 11.338 + return; 11.339 +} 11.340 + 11.341 + 11.342 + 11.343 +#if SHADOW_DEBUG 11.344 +extern int check_pagetable( struct task_struct *p, pagetable_t pt, char *s ); 11.345 +#else 11.346 +#define check_pagetable( p, pt, s ) 11.347 +#endif 11.348 + 11.349 11.350 #endif
12.1 --- a/xen/net/dev.c Sun Mar 21 19:14:29 2004 +0000 12.2 +++ b/xen/net/dev.c Thu Mar 25 01:50:35 2004 +0000 12.3 @@ -494,6 +494,7 @@ void deliver_packet(struct sk_buff *skb, 12.4 unsigned short size; 12.5 unsigned char offset, status = RING_STATUS_OK; 12.6 struct task_struct *p = vif->domain; 12.7 + unsigned long spte_pfn; 12.8 12.9 memcpy(skb->mac.ethernet->h_dest, vif->vmac, ETH_ALEN); 12.10 if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) 12.11 @@ -546,21 +547,18 @@ void deliver_packet(struct sk_buff *skb, 12.12 goto out; 12.13 } 12.14 12.15 - 12.16 -#ifdef CONFIG_SHADOW 12.17 - if ( pte_page->shadow_and_flags & PSH_shadowed ) 12.18 + if ( p->mm.shadow_mode && 12.19 + (spte_pfn=get_shadow_status(p, pte_page-frame_table)) ) 12.20 { 12.21 - unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask; 12.22 unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) | 12.23 (((unsigned long)ptep)&~PAGE_MASK) ); 12.24 12.25 - // save the fault later 12.26 + // avoid the fault later 12.27 *sptr = new_pte; 12.28 12.29 - unmap_domain_mem( sptr ); 12.30 + unmap_domain_mem(sptr); 12.31 + put_shadow_status(p); 12.32 } 12.33 -#endif 12.34 - 12.35 12.36 machine_to_phys_mapping[new_page - frame_table] 12.37 = machine_to_phys_mapping[old_page - frame_table]; 12.38 @@ -2068,7 +2066,7 @@ static void get_rx_bufs(net_vif_t *vif) 12.39 rx_shadow_entry_t *srx; 12.40 unsigned long pte_pfn, buf_pfn; 12.41 struct pfn_info *pte_page, *buf_page; 12.42 - unsigned long *ptep, pte; 12.43 + unsigned long *ptep, pte, spfn; 12.44 12.45 spin_lock(&vif->rx_lock); 12.46 12.47 @@ -2114,21 +2112,16 @@ static void get_rx_bufs(net_vif_t *vif) 12.48 goto rx_unmap_and_continue; 12.49 } 12.50 12.51 -#ifdef CONFIG_SHADOW 12.52 - { 12.53 - if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed ) 12.54 - { 12.55 - unsigned long spfn = 12.56 - frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask; 12.57 - unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) ); 12.58 + if ( p->mm.shadow_mode && 12.59 + (spfn=get_shadow_status(p, rx.addr>>PAGE_SHIFT)) ) 12.60 + { 12.61 + unsigned long * sptr = 12.62 + map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) ); 12.63 12.64 - *sptr = 0; 12.65 - unmap_domain_mem( sptr ); 12.66 - 12.67 - } 12.68 - 12.69 - } 12.70 -#endif 12.71 + *sptr = 0; 12.72 + unmap_domain_mem( sptr ); 12.73 + put_shadow_status(p); 12.74 + } 12.75 12.76 buf_pfn = pte >> PAGE_SHIFT; 12.77 buf_page = &frame_table[buf_pfn];