ia64/xen-unstable

changeset 1199:c4b1105fbc14

bitkeeper revision 1.794.1.1 (405b85b44Vh_3MMuChrmhJ9H5nxbyw)

basic shadow support
author iap10@labyrinth.cl.cam.ac.uk
date Fri Mar 19 23:43:48 2004 +0000 (2004-03-19)
parents 39a4998c88fc
children d0ff128be81d
files .rootkeys BitKeeper/etc/ignore xen/arch/i386/process.c xen/arch/i386/traps.c xen/common/debug.c xen/common/domain.c xen/common/domain_page.c xen/common/kernel.c xen/common/memory.c xen/common/shadow.c xen/include/asm-i386/config.h xen/include/asm-i386/page.h xen/include/asm-i386/processor.h xen/include/xeno/mm.h xen/include/xeno/perfc_defn.h xen/include/xeno/shadow.h xen/net/dev.c
line diff
     1.1 --- a/.rootkeys	Fri Mar 12 18:12:23 2004 +0000
     1.2 +++ b/.rootkeys	Fri Mar 19 23:43:48 2004 +0000
     1.3 @@ -158,6 +158,7 @@ 4051bcecFeq4DE70p4zGO5setf47CA xen/commo
     1.4  4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c
     1.5  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
     1.6  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
     1.7 +405b8599xI_PoEr3zZoJ2on-jdn7iw xen/common/shadow.c
     1.8  3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c
     1.9  3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
    1.10  3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
    1.11 @@ -552,6 +553,7 @@ 4006e65fWMwLqcocgik6wbF0Eeh0Og xen/inclu
    1.12  3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h
    1.13  3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h
    1.14  403a06a7H0hpHcKpAiDe5BPnaXWTlA xen/include/xeno/serial.h
    1.15 +405b8599BsDsDwKEJLS0XipaiQW3TA xen/include/xeno/shadow.h
    1.16  3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h
    1.17  3ddb79c14dXIhP7C2ahnoD08K90G_w xen/include/xeno/slab.h
    1.18  3ddb79c09xbS-xxfKxuV3JETIhBzmg xen/include/xeno/smp.h
     2.1 --- a/BitKeeper/etc/ignore	Fri Mar 12 18:12:23 2004 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Fri Mar 19 23:43:48 2004 +0000
     2.3 @@ -548,3 +548,13 @@ tools/xentrace/xentrace
     2.4  tools/xc/lib/xc_evtchn.o
     2.5  tools/xc/py/XenoUtil.pyc
     2.6  tools/xend/xend
     2.7 +tools/xc/lib/libxc.so.1.3
     2.8 +tools/xc/lib/libxc.so.1.3.0
     2.9 +tools/xc/lib/xc_physdev.o
    2.10 +tools/xend/xend_utils.o
    2.11 +xen/common/physdev.o
    2.12 +xen/common/shadow.o
    2.13 +xen/common/trace.o
    2.14 +xen/drivers/char/console.o
    2.15 +xen/drivers/char/keyboard.o
    2.16 +xen/include/hypervisor-ifs/arch
     3.1 --- a/xen/arch/i386/process.c	Fri Mar 12 18:12:23 2004 +0000
     3.2 +++ b/xen/arch/i386/process.c	Fri Mar 19 23:43:48 2004 +0000
     3.3 @@ -32,6 +32,7 @@
     3.4  
     3.5  #include <xeno/irq.h>
     3.6  #include <xeno/event.h>
     3.7 +#include <xeno/shadow.h>
     3.8  
     3.9  int hlt_counter;
    3.10  
    3.11 @@ -281,7 +282,24 @@ void switch_to(struct task_struct *prev_
    3.12      }
    3.13  
    3.14      /* Switch page tables.  */
    3.15 -    write_cr3_counted(pagetable_val(next_p->mm.pagetable));
    3.16 +#ifdef CONFIG_SHADOW
    3.17 +
    3.18 +    /*    printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable,
    3.19 +	   next_p->mm.shadowtable);*/
    3.20 +
    3.21 +
    3.22 +    if( next_p->mm.shadowmode )
    3.23 +      {
    3.24 +	write_cr3_counted(pagetable_val(next_p->mm.shadowtable));
    3.25 +	check_pagetable( next_p->mm.pagetable, "switch" );
    3.26 +      }
    3.27 +    else
    3.28 +#endif
    3.29 +      write_cr3_counted(pagetable_val(next_p->mm.pagetable));
    3.30 +
    3.31 +
    3.32 +
    3.33 +
    3.34  
    3.35      set_current(next_p);
    3.36  
     4.1 --- a/xen/arch/i386/traps.c	Fri Mar 12 18:12:23 2004 +0000
     4.2 +++ b/xen/arch/i386/traps.c	Fri Mar 19 23:43:48 2004 +0000
     4.3 @@ -39,6 +39,7 @@
     4.4  #include <xeno/spinlock.h>
     4.5  #include <xeno/irq.h>
     4.6  #include <xeno/perfc.h>
     4.7 +#include <xeno/shadow.h>
     4.8  #include <asm/domain_page.h>
     4.9  #include <asm/system.h>
    4.10  #include <asm/io.h>
    4.11 @@ -323,6 +324,8 @@ asmlinkage void do_page_fault(struct pt_
    4.12  
    4.13      __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
    4.14  
    4.15 +    perfc_incrc(page_faults);
    4.16 +
    4.17      if ( unlikely(addr >= LDT_VIRT_START) && 
    4.18           (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
    4.19      {
    4.20 @@ -336,6 +339,18 @@ asmlinkage void do_page_fault(struct pt_
    4.21              return; /* successfully copied the mapping */
    4.22      }
    4.23  
    4.24 +#ifdef CONFIG_SHADOW
    4.25 +//printk("1");
    4.26 +check_pagetable( current->mm.pagetable, "pre-sf" );
    4.27 +    if ( p->mm.shadowmode && addr < PAGE_OFFSET &&
    4.28 +	 shadow_fault( addr, error_code ) )
    4.29 +      {
    4.30 +	check_pagetable( current->mm.pagetable, "post-sfa" );
    4.31 +	return; // return true if fault was handled 
    4.32 +      }
    4.33 +    check_pagetable( current->mm.pagetable, "post-sfb" );
    4.34 +#endif
    4.35 +
    4.36      if ( unlikely(!(regs->xcs & 3)) )
    4.37          goto fault_in_hypervisor;
    4.38  
    4.39 @@ -353,7 +368,8 @@ asmlinkage void do_page_fault(struct pt_
    4.40  
    4.41      if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
    4.42      {
    4.43 -        DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
    4.44 +        perfc_incrc(copy_user_faults);
    4.45 +        //DPRINTK("copy_user fault: %08lx -> %08lx\n", regs->eip, fixup);
    4.46          regs->eip = fixup;
    4.47          regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
    4.48          return;
     5.1 --- a/xen/common/debug.c	Fri Mar 12 18:12:23 2004 +0000
     5.2 +++ b/xen/common/debug.c	Fri Mar 19 23:43:48 2004 +0000
     5.3 @@ -91,7 +91,13 @@ int pdb_change_values(domid_t domain, u_
     5.4  
     5.5      if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT))
     5.6      {
     5.7 -        l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
     5.8 +#ifdef CONFIG_SHADOW
     5.9 +        if (p->mm.shadowmode )
    5.10 +          l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable));
    5.11 +	else
    5.12 +#endif
    5.13 +          l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
    5.14 +
    5.15  	l2_table += l2_table_offset(addr);
    5.16  	if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) 
    5.17  	{
     6.1 --- a/xen/common/domain.c	Fri Mar 12 18:12:23 2004 +0000
     6.2 +++ b/xen/common/domain.c	Fri Mar 19 23:43:48 2004 +0000
     6.3 @@ -9,6 +9,7 @@
     6.4  #include <xeno/delay.h>
     6.5  #include <xeno/event.h>
     6.6  #include <xeno/time.h>
     6.7 +#include <xeno/shadow.h>
     6.8  #include <hypervisor-ifs/dom0_ops.h>
     6.9  #include <asm/io.h>
    6.10  #include <asm/domain_page.h>
    6.11 @@ -546,6 +547,10 @@ int final_setup_guestos(struct task_stru
    6.12      get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 
    6.13                        PGT_l2_page_table);
    6.14  
    6.15 +#ifdef CONFIG_SHADOW
    6.16 +    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode);
    6.17 +#endif
    6.18 +
    6.19      /* Set up the shared info structure. */
    6.20      update_dom_time(p->shared_info);
    6.21  
    6.22 @@ -847,6 +852,15 @@ int setup_guestos(struct task_struct *p,
    6.23  
    6.24      set_bit(PF_CONSTRUCTED, &p->flags);
    6.25  
    6.26 +#ifdef CONFIG_SHADOW
    6.27 +
    6.28 +printk("Engage shadow mode for dom 0\n");
    6.29 +    p->mm.shadowmode = SHM_test; // XXXXX IAP
    6.30 +    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode );
    6.31 +#endif
    6.32 +
    6.33 +
    6.34 +
    6.35      new_thread(p, 
    6.36                 (unsigned long)virt_load_address, 
    6.37                 (unsigned long)virt_stack_address, 
     7.1 --- a/xen/common/domain_page.c	Fri Mar 12 18:12:23 2004 +0000
     7.2 +++ b/xen/common/domain_page.c	Fri Mar 19 23:43:48 2004 +0000
     7.3 @@ -45,6 +45,8 @@ void *map_domain_mem(unsigned long pa)
     7.4      unsigned long *cache = mapcache;
     7.5      unsigned long flags;
     7.6  
     7.7 +    perfc_incrc(map_domain_mem_count);
     7.8 +
     7.9      spin_lock_irqsave(&map_lock, flags);
    7.10  
    7.11      /* Has some other CPU caused a wrap? We must flush if so. */
     8.1 --- a/xen/common/kernel.c	Fri Mar 12 18:12:23 2004 +0000
     8.2 +++ b/xen/common/kernel.c	Fri Mar 19 23:43:48 2004 +0000
     8.3 @@ -104,6 +104,7 @@ void cmain(unsigned long magic, multiboo
     8.4      module_t *mod;
     8.5      void *heap_start;
     8.6      int i;
     8.7 +    unsigned long frametable_pages, max_mem;
     8.8  
     8.9      /* Parse the command-line options. */
    8.10      cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
    8.11 @@ -190,22 +191,36 @@ void cmain(unsigned long magic, multiboo
    8.12          for ( ; ; ) ;
    8.13      }
    8.14  
    8.15 -    /* The array of pfn_info structures must fit into the reserved area. */
    8.16 -    if ( sizeof(struct pfn_info) > 24 )
    8.17 +    frametable_pages = ((FRAMETABLE_VIRT_END - RDWR_MPT_VIRT_START)/sizeof(struct pfn_info));
    8.18 +
    8.19 +    if ( frametable_pages < (1<<(32-PAGE_SHIFT)) ) 
    8.20      {
    8.21 -        printk("'struct pfn_info' too large to fit in Xen address space!\n");
    8.22 -        for ( ; ; ) ;
    8.23 +      printk("Not enough space to initialise frame table for a 4GB machine (%luMB only)\n", frametable_pages >> (20-PAGE_SHIFT));
    8.24      }
    8.25  
    8.26      set_current(&idle0_task);
    8.27  
    8.28 -    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
    8.29 +    max_mem = max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
    8.30 +
    8.31 +    if ( max_page > frametable_pages )
    8.32 +      max_page = frametable_pages;
    8.33 +
    8.34      init_frametable(max_page);
    8.35 -    printk("Initialised all memory on a %luMB machine\n",
    8.36 -           max_page >> (20-PAGE_SHIFT));
    8.37 +    printk("Initialised %luMB memory on a %luMB machine\n",
    8.38 +           max_page >> (20-PAGE_SHIFT),
    8.39 +	   max_mem  >> (20-PAGE_SHIFT) );
    8.40  
    8.41      heap_start = memguard_init(&_end);
    8.42  
    8.43 +    printk("Xen heap size is %luKB\n", 
    8.44 +	   (MAX_MONITOR_ADDRESS-__pa(heap_start))/1024 );
    8.45 +
    8.46 +    if ( ((MAX_MONITOR_ADDRESS-__pa(heap_start))/1024) <= 4096 )
    8.47 +    {
    8.48 +        printk("Xen heap size is too small to safely continue!\n");
    8.49 +        for ( ; ; ) ;
    8.50 +    }
    8.51 +
    8.52      init_page_allocator(__pa(heap_start), MAX_MONITOR_ADDRESS);
    8.53   
    8.54      /* Initialise the slab allocator. */
     9.1 --- a/xen/common/memory.c	Fri Mar 12 18:12:23 2004 +0000
     9.2 +++ b/xen/common/memory.c	Fri Mar 19 23:43:48 2004 +0000
     9.3 @@ -133,6 +133,7 @@
     9.4  #include <xeno/errno.h>
     9.5  #include <xeno/perfc.h>
     9.6  #include <xeno/interrupt.h>
     9.7 +#include <xeno/shadow.h>
     9.8  #include <asm/page.h>
     9.9  #include <asm/flushtlb.h>
    9.10  #include <asm/io.h>
    9.11 @@ -182,6 +183,7 @@ static struct {
    9.12      struct task_struct *subject_p;
    9.13  } percpu_info[NR_CPUS] __cacheline_aligned;
    9.14  
    9.15 +
    9.16  /*
    9.17   * init_frametable:
    9.18   * Initialise per-frame memory information. This goes directly after
    9.19 @@ -768,6 +770,13 @@ void free_page_type(struct pfn_info *pag
    9.20      default:
    9.21          BUG();
    9.22      }
    9.23 +
    9.24 +#ifdef CONFIG_SHADOW
    9.25 +    // assume we're in shadow mode if PSH_shadowed set
    9.26 +    if ( page->shadow_and_flags & PSH_shadowed )
    9.27 +      unshadow_table( page-frame_table );
    9.28 +#endif
    9.29 +
    9.30  }
    9.31  
    9.32  
    9.33 @@ -832,6 +841,10 @@ static int do_extended_command(unsigned 
    9.34              put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
    9.35                                            >> PAGE_SHIFT]);
    9.36              current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
    9.37 +#ifdef CONFIG_SHADOW            
    9.38 +	    current->mm.shadowtable = 
    9.39 +	      shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode);
    9.40 +#endif
    9.41              invalidate_shadow_ldt();
    9.42              percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
    9.43          }
    9.44 @@ -917,6 +930,10 @@ int do_mmu_update(mmu_update_t *ureqs, i
    9.45      struct pfn_info *page;
    9.46      int rc = 0, okay = 1, i, cpu = smp_processor_id();
    9.47      unsigned int cmd;
    9.48 +#ifdef CONFIG_SHADOW
    9.49 +    unsigned long prev_spfn = 0;
    9.50 +    l1_pgentry_t *prev_spl1e = 0;
    9.51 +#endif
    9.52  
    9.53      perfc_incrc(calls_to_mmu_update); 
    9.54      perfc_addc(num_page_updates, count);
    9.55 @@ -967,6 +984,13 @@ int do_mmu_update(mmu_update_t *ureqs, i
    9.56                  {
    9.57                      okay = mod_l1_entry((l1_pgentry_t *)va, 
    9.58                                          mk_l1_pgentry(req.val)); 
    9.59 +
    9.60 +#ifdef CONFIG_SHADOW
    9.61 +		    if ( okay && page->shadow_and_flags & PSH_shadowed )
    9.62 +		        shadow_l1_normal_pt_update( req.ptr, req.val, 
    9.63 +						    &prev_spfn, &prev_spl1e );
    9.64 +#endif
    9.65 +
    9.66                      put_page_type(page);
    9.67                  }
    9.68                  break;
    9.69 @@ -976,6 +1000,11 @@ int do_mmu_update(mmu_update_t *ureqs, i
    9.70                      okay = mod_l2_entry((l2_pgentry_t *)va, 
    9.71                                          mk_l2_pgentry(req.val),
    9.72                                          pfn); 
    9.73 +#ifdef CONFIG_SHADOW
    9.74 +		    if ( okay && page->shadow_and_flags & PSH_shadowed )
    9.75 +		        shadow_l2_normal_pt_update( req.ptr, req.val );
    9.76 +#endif
    9.77 +
    9.78                      put_page_type(page);
    9.79                  }
    9.80                  break;
    9.81 @@ -985,9 +1014,19 @@ int do_mmu_update(mmu_update_t *ureqs, i
    9.82                      *(unsigned long *)va = req.val;
    9.83                      okay = 1;
    9.84                      put_page_type(page);
    9.85 +
    9.86 +#ifdef CONFIG_SHADOW
    9.87 +		    if ( page->shadow_and_flags & PSH_shadowed )
    9.88 +		        BUG(); 
    9.89 +		        // at present, we shouldn't be shadowing such pages
    9.90 +#endif
    9.91 +
    9.92 +
    9.93                  }
    9.94                  break;
    9.95              }
    9.96 +
    9.97 +check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
    9.98              
    9.99              put_page(page);
   9.100  
   9.101 @@ -1031,11 +1070,23 @@ int do_mmu_update(mmu_update_t *ureqs, i
   9.102      if ( prev_pfn != 0 )
   9.103          unmap_domain_mem((void *)va);
   9.104  
   9.105 +#ifdef CONFIG_SHADOW
   9.106 +    if( prev_spl1e != 0 ) 
   9.107 +        unmap_domain_mem((void *)prev_spl1e);
   9.108 +#endif
   9.109 +
   9.110      deferred_ops = percpu_info[cpu].deferred_ops;
   9.111      percpu_info[cpu].deferred_ops = 0;
   9.112  
   9.113      if ( deferred_ops & DOP_FLUSH_TLB )
   9.114 -        write_cr3_counted(pagetable_val(current->mm.pagetable));
   9.115 +    {
   9.116 +#ifdef CONFIG_SHADOW
   9.117 +        if ( unlikely(current->mm.shadowmode) )
   9.118 +	  write_cr3_counted(pagetable_val(current->mm.shadowtable));
   9.119 +        else
   9.120 +#endif	  
   9.121 +	  write_cr3_counted(pagetable_val(current->mm.pagetable));
   9.122 +    }
   9.123  
   9.124      if ( deferred_ops & DOP_RELOAD_LDT )
   9.125          (void)map_ldt_shadow_page(0);
   9.126 @@ -1059,19 +1110,62 @@ int do_update_va_mapping(unsigned long p
   9.127      unsigned int cpu = p->processor;
   9.128      unsigned long deferred_ops;
   9.129  
   9.130 +    perfc_incrc(calls_to_update_va);
   9.131 +
   9.132      if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
   9.133          return -EINVAL;
   9.134  
   9.135 +    // XXX when we make this support 4MB pages we should also
   9.136 +    // deal with the case of updating L2s
   9.137 +
   9.138      if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], 
   9.139                                  mk_l1_pgentry(val))) )
   9.140          err = -EINVAL;
   9.141  
   9.142 +#ifdef CONFIG_SHADOW
   9.143 +
   9.144 +    if ( unlikely(p->mm.shadowmode) )
   9.145 +    {
   9.146 +        unsigned long sval = 0;
   9.147 +
   9.148 +	// XXX this only works for l1 entries, with no translation
   9.149 +
   9.150 +        if ( (val & _PAGE_PRESENT) && (val & _PAGE_ACCESSED) )
   9.151 +        {
   9.152 +	    sval = val;
   9.153 +            if ( !(val & _PAGE_DIRTY) ) 
   9.154 +	        sval &= ~_PAGE_RW;
   9.155 +	}
   9.156 +
   9.157 +	/*	printk("update_va_map: page_nr=%08lx val =%08lx sval =%08lx\n", 
   9.158 +	       page_nr, val, sval);*/
   9.159 +
   9.160 +	if ( __put_user( sval, ((unsigned long *) (&shadow_linear_pg_table[page_nr])) ) )
   9.161 +	{
   9.162 +	    // Since L2's are guranteed RW, failure indicates the page
   9.163 +	    // was not shadowed, so ignore.
   9.164 +            
   9.165 +	    //MEM_LOG("update_va_map: couldn't write update\n");	
   9.166 +	}
   9.167 +    }
   9.168 +
   9.169 +check_pagetable( p->mm.pagetable, "va" );
   9.170 +
   9.171 +#endif
   9.172 +
   9.173      deferred_ops = percpu_info[cpu].deferred_ops;
   9.174      percpu_info[cpu].deferred_ops = 0;
   9.175  
   9.176      if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
   9.177           unlikely(flags & UVMF_FLUSH_TLB) )
   9.178 -        write_cr3_counted(pagetable_val(p->mm.pagetable));
   9.179 +    {
   9.180 +#ifdef CONFIG_SHADOW
   9.181 +        if ( unlikely(p->mm.shadowmode) )
   9.182 +          write_cr3_counted(pagetable_val(p->mm.shadowtable));
   9.183 +        else
   9.184 +#endif
   9.185 +          write_cr3_counted(pagetable_val(p->mm.pagetable));
   9.186 +    }
   9.187      else if ( unlikely(flags & UVMF_INVLPG) )
   9.188          __flush_tlb_one(page_nr << PAGE_SHIFT);
   9.189  
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen/common/shadow.c	Fri Mar 19 23:43:48 2004 +0000
    10.3 @@ -0,0 +1,618 @@
    10.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
    10.5 +
    10.6 +#include <xeno/config.h>
    10.7 +#include <xeno/types.h>
    10.8 +#include <xeno/mm.h>
    10.9 +#include <xeno/shadow.h>
   10.10 +#include <asm/domain_page.h>
   10.11 +#include <asm/page.h>
   10.12 +
   10.13 +#ifdef CONFIG_SHADOW
   10.14 +
   10.15 +
   10.16 +#if 1
   10.17 +#define MEM_VLOG(_f, _a...)                             \
   10.18 +  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   10.19 +         current->domain , __LINE__ , ## _a )
   10.20 +#else
   10.21 +#define MEM_VLOG(_f, _a...) 
   10.22 +#endif
   10.23 +
   10.24 +#if 0
   10.25 +#define MEM_VVLOG(_f, _a...)                             \
   10.26 +  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   10.27 +         current->domain , __LINE__ , ## _a )
   10.28 +#else
   10.29 +#define MEM_VVLOG(_f, _a...) 
   10.30 +#endif
   10.31 +
   10.32 +
   10.33 +/********
   10.34 +
   10.35 +To use these shadow page tables, guests must not rely on the ACCESSED
   10.36 +and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
   10.37 +
   10.38 +I doubt this will break anything. (If guests want to use the va_update
   10.39 +mechanism they've signed up for this anyhow...)
   10.40 +
   10.41 +********/
   10.42 +
   10.43 +
   10.44 +pagetable_t shadow_mk_pagetable( unsigned long gptbase, 
   10.45 +					unsigned int shadowmode )
   10.46 +{
   10.47 +	unsigned long gpfn, spfn=0;
   10.48 +
   10.49 +	MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
   10.50 +			 gptbase, shadowmode );
   10.51 +
   10.52 +	if ( unlikely(shadowmode) ) 
   10.53 +	{
   10.54 +		gpfn =  gptbase >> PAGE_SHIFT;
   10.55 +		
   10.56 +		if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
   10.57 +		{
   10.58 +			spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   10.59 +		}
   10.60 +		else
   10.61 +		{
   10.62 +			spfn = shadow_l2_table( gpfn );
   10.63 +		}      
   10.64 +	}
   10.65 +
   10.66 +	return mk_pagetable(spfn << PAGE_SHIFT);
   10.67 +}
   10.68 +
   10.69 +void unshadow_table( unsigned long gpfn )
   10.70 +{
   10.71 +	unsigned long spfn;
   10.72 +
   10.73 +MEM_VLOG("unshadow_table %08lx\n", gpfn );
   10.74 +
   10.75 +	perfc_incrc(unshadow_table_count);
   10.76 +
   10.77 +	// this function is the same for both l1 and l2 tables
   10.78 +
   10.79 +	// even in the SMP guest case, there won't be a race here as
   10.80 +    // this CPU was the one that cmpxchg'ed the page to invalid
   10.81 +
   10.82 +	spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   10.83 +	frame_table[gpfn].shadow_and_flags=0;
   10.84 +	frame_table[spfn].shadow_and_flags=0;
   10.85 +
   10.86 +#ifdef DEBUG
   10.87 +	{ // XXX delete me!
   10.88 +		int i;
   10.89 +		unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   10.90 +
   10.91 +		for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   10.92 +	        {
   10.93 +				spl1e[i] = 0xdead0000;
   10.94 +			}
   10.95 +		unmap_domain_mem( spl1e );
   10.96 +	}
   10.97 +#endif
   10.98 +
   10.99 +	free_domain_page( &frame_table[spfn] );
  10.100 +}
  10.101 +
  10.102 +
  10.103 +unsigned long shadow_l2_table( unsigned long gpfn )
  10.104 +{
  10.105 +	struct pfn_info *spfn_info;
  10.106 +	unsigned long spfn;
  10.107 +	l2_pgentry_t *spl2e, *gpl2e;
  10.108 +	int i;
  10.109 +
  10.110 +	MEM_VVLOG("shadow_l2_table( %08lx )",gpfn);
  10.111 +
  10.112 +	perfc_incrc(shadow_l2_table_count);
  10.113 +
  10.114 +    // XXX in future, worry about racing in SMP guests 
  10.115 +    //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
  10.116 +
  10.117 +	spfn_info = alloc_domain_page( NULL ); // XXX account properly later 
  10.118 +
  10.119 +    ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
  10.120 +
  10.121 +	spfn = (unsigned long) (spfn_info - frame_table);
  10.122 +
  10.123 +	// mark pfn as being shadowed, update field to point at shadow
  10.124 +	frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed;
  10.125 +
  10.126 +	// mark shadow pfn as being a shadow, update field to point at  pfn	
  10.127 +	frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow;
  10.128 +	
  10.129 +	// we need to do this before the linear map is set up
  10.130 +	spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
  10.131 +
  10.132 +	// get hypervisor and 2x linear PT mapings installed 
  10.133 +	memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  10.134 +           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  10.135 +           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
  10.136 +    spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  10.137 +        mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
  10.138 +    spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
  10.139 +        mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
  10.140 +    spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
  10.141 +        mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | 
  10.142 +                      __PAGE_HYPERVISOR);
  10.143 +
  10.144 +	// can't use the linear map as we may not be in the right PT
  10.145 +	gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
  10.146 +
  10.147 +	// proactively create entries for pages that are already shadowed
  10.148 +	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  10.149 +	{
  10.150 +		unsigned long spte = 0;
  10.151 +
  10.152 +#if 0  // Turns out this doesn't really help
  10.153 +        unsigned long gpte;
  10.154 +
  10.155 +        gpte = l2_pgentry_val(gpl2e[i]);
  10.156 +
  10.157 +		if (gpte & _PAGE_PRESENT)
  10.158 +		{
  10.159 +			unsigned long s_sh = 
  10.160 +				frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags;
  10.161 +
  10.162 +			if( s_sh & PSH_shadowed ) // PSH_shadowed
  10.163 +			{
  10.164 +				if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
  10.165 +                {
  10.166 +					printk("Linear mapping detected\n");
  10.167 +  				    spte = gpte & ~_PAGE_RW;
  10.168 +                }
  10.169 +				else
  10.170 +                {
  10.171 +  				    spte = ( gpte & ~PAGE_MASK ) | (s_sh<<PAGE_SHIFT) |
  10.172 +						_PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED ;
  10.173 +				}
  10.174 +				// XXX should probably update guest to ACCESSED|DIRTY too...
  10.175 +
  10.176 +		    }
  10.177 +
  10.178 +		}
  10.179 +#endif
  10.180 +
  10.181 +		spl2e[i] = mk_l2_pgentry( spte );
  10.182 +
  10.183 +	}
  10.184 +
  10.185 +	// its arguable we should 'preemptively shadow' a few active L1 pages
  10.186 +    // to avoid taking a string of faults when 'jacking' a running domain
  10.187 +
  10.188 +    unmap_domain_mem( gpl2e );
  10.189 +    unmap_domain_mem( spl2e );
  10.190 +
  10.191 +	MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
  10.192 +
  10.193 +
  10.194 +	return spfn;
  10.195 +}
  10.196 +
  10.197 +
  10.198 +int shadow_fault( unsigned long va, long error_code )
  10.199 +{
  10.200 +	unsigned long gpte, spte;
  10.201 +
  10.202 +	MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
  10.203 +
  10.204 +	if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
  10.205 +	{
  10.206 +		MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" );
  10.207 +		return 0;  // propagate to guest
  10.208 +	}
  10.209 +
  10.210 +	if ( ! (gpte & _PAGE_PRESENT) )
  10.211 +	{
  10.212 +		MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
  10.213 +		return 0;  // we're not going to be able to help
  10.214 +    }
  10.215 +
  10.216 +    spte = gpte;
  10.217 +
  10.218 +	if ( error_code & 2  )  
  10.219 +	{  // write fault
  10.220 +		if ( gpte & _PAGE_RW )
  10.221 +	    {
  10.222 +			gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
  10.223 +			spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 
  10.224 +            // (we're about to dirty it anyhow...)
  10.225 +		}
  10.226 +		else
  10.227 +		{   // write fault on RO page
  10.228 +            MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
  10.229 +			return 0; // propagate to guest
  10.230 +			// not clear whether we should set accessed bit here...
  10.231 +		}
  10.232 +	}
  10.233 +	else
  10.234 +	{
  10.235 +		gpte |= _PAGE_ACCESSED;
  10.236 +        spte |= _PAGE_ACCESSED; // about to happen anyway
  10.237 +		if ( ! (gpte & _PAGE_DIRTY) ) 
  10.238 +			spte &= ~_PAGE_RW;  // force clear unless already dirty
  10.239 +	}
  10.240 +
  10.241 + 	MEM_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
  10.242 +
  10.243 +	// write back updated gpte
  10.244 +    // XXX watch out for read-only L2 entries! (not used in Linux)
  10.245 +	if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
  10.246 +		BUG();  // fixme!
  10.247 +
  10.248 +    if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
  10.249 +	{ 
  10.250 +		// failed:
  10.251 +        //  the L1 may not be shadowed, or the L2 entry may be insufficient
  10.252 +
  10.253 +		unsigned long gpde, spde, gl1pfn, sl1pfn;
  10.254 +
  10.255 +        MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
  10.256 + 
  10.257 +        gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
  10.258 +
  10.259 +        gl1pfn = gpde>>PAGE_SHIFT;
  10.260 +
  10.261 +        if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) )
  10.262 +        {
  10.263 +            // this L1 is NOT already shadowed so we need to shadow it
  10.264 +            struct pfn_info *sl1pfn_info;
  10.265 +            unsigned long *gpl1e, *spl1e;
  10.266 +            int i;
  10.267 +            sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 
  10.268 +            sl1pfn = sl1pfn_info - frame_table;
  10.269 +
  10.270 +            MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
  10.271 +	        perfc_incrc(shadow_l1_table_count);
  10.272 +
  10.273 +            sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn;
  10.274 +            frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn;
  10.275 +
  10.276 +            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
  10.277 +            spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
  10.278 +        
  10.279 +
  10.280 +            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
  10.281 +            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =  mk_l2_pgentry(spde);
  10.282 +
  10.283 +            gpl1e = (unsigned long *) &(linear_pg_table[
  10.284 +                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
  10.285 +
  10.286 +            spl1e = (unsigned long *) &shadow_linear_pg_table[
  10.287 +                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
  10.288 +
  10.289 +
  10.290 +			// XXX can only do this is the shadow/guest is writeable
  10.291 +            // disable write protection if ! gpde & _PAGE_RW ????
  10.292 +
  10.293 +            for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
  10.294 +	        {
  10.295 +#if SHADOW_OPTIMISE
  10.296 +                if ( (gpl1e[i] & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
  10.297 +                                (_PAGE_PRESENT|_PAGE_ACCESSED) )
  10.298 +                {
  10.299 +                    spl1e[i] = gpl1e[i];
  10.300 +                    if ( !(gpl1e[i] & _PAGE_DIRTY) )
  10.301 +                        spl1e[i] &= ~_PAGE_RW;
  10.302 +                }
  10.303 +                else
  10.304 +#endif
  10.305 +                    spl1e[i] = 0;
  10.306 +            }
  10.307 +
  10.308 +
  10.309 +        }
  10.310 +        else
  10.311 +        {
  10.312 +            // this L1 was shadowed (by another PT) but we didn't have an L2
  10.313 +            // entry for it
  10.314 +
  10.315 +            sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask;
  10.316 +
  10.317 +            MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
  10.318 +
  10.319 +		    spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
  10.320 +
  10.321 +            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
  10.322 +
  10.323 +
  10.324 +			if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT)  ) )
  10.325 +			{   // detect linear map, and keep pointing at guest
  10.326 +                MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
  10.327 +				spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
  10.328 +			}
  10.329 +
  10.330 +            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
  10.331 +            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
  10.332 +			
  10.333 +
  10.334 +        }              
  10.335 +
  10.336 +    shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
  10.337 +    // (we need to do the above even if we've just made the shadow L1)
  10.338 +
  10.339 +    } // end of fixup writing the shadow L1 directly failed
  10.340 +    	
  10.341 +    perfc_incrc(shadow_fixup_count);
  10.342 +
  10.343 +    return 1; // let's try the faulting instruction again...
  10.344 +
  10.345 +}
  10.346 +
  10.347 +
  10.348 +void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
  10.349 +                                 unsigned long *prev_spfn_ptr,
  10.350 +				 l1_pgentry_t **prev_spl1e_ptr )
  10.351 +{
  10.352 +    unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;    
  10.353 +    l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
  10.354 +
  10.355 +
  10.356 +MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
  10.357 +pa,gpte,prev_spfn, prev_spl1e);
  10.358 +
  10.359 +    // to get here, we know the l1 page *must* be shadowed
  10.360 +
  10.361 +    gpfn = pa >> PAGE_SHIFT;
  10.362 +    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
  10.363 +
  10.364 +    if ( spfn == prev_spfn )
  10.365 +    {
  10.366 +        spl1e = prev_spl1e;
  10.367 +    }
  10.368 +    else
  10.369 +    {
  10.370 +        if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
  10.371 +        spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  10.372 +	    *prev_spfn_ptr  = spfn;
  10.373 +	    *prev_spl1e_ptr = spl1e;
  10.374 +    }
  10.375 +	// XXX we assume only pagetables can be shadowed; this will have to change
  10.376 +	// to allow arbitrary CoW etc.
  10.377 +
  10.378 +    spte = 0;
  10.379 +
  10.380 +#if SHADOW_OPTIMISE
  10.381 +	if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
  10.382 +		 (_PAGE_PRESENT|_PAGE_ACCESSED) )
  10.383 +    {
  10.384 +        spte = gpte;
  10.385 +		if ( !(gpte & _PAGE_DIRTY ) )
  10.386 +			gpte &= ~ _PAGE_RW;
  10.387 +	}
  10.388 +#endif
  10.389 +
  10.390 +	spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = 
  10.391 +		mk_l1_pgentry( spte );
  10.392 +
  10.393 +	unmap_domain_mem( (void *) spl1e );
  10.394 +}
  10.395 +
  10.396 +void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
  10.397 +{
  10.398 +    unsigned long gpfn, spfn, spte;
  10.399 +    l2_pgentry_t * sp2le;
  10.400 +    unsigned long s_sh;
  10.401 +
  10.402 +    MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
  10.403 +
  10.404 +    // to get here, we know the l2 page has a shadow
  10.405 +
  10.406 +    gpfn = pa >> PAGE_SHIFT;
  10.407 +    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
  10.408 +
  10.409 +    sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  10.410 +    // no real need for a cache here
  10.411 +
  10.412 +    spte = 0;
  10.413 +
  10.414 +    s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags;
  10.415 +		
  10.416 +	if ( s_sh ) // PSH_shadowed
  10.417 +	{
  10.418 +		if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
  10.419 +		{ 
  10.420 +            // linear page table case
  10.421 +			spte = (gpte & ~_PAGE_RW) | _PAGE_DIRTY | _PAGE_ACCESSED; 
  10.422 +	    }
  10.423 +	    else
  10.424 +			spte = (gpte & ~PAGE_MASK) | (s_sh<<PAGE_SHIFT) | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
  10.425 +
  10.426 +	}
  10.427 +
  10.428 +	// XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
  10.429 +
  10.430 +	sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 
  10.431 +		mk_l2_pgentry( spte );
  10.432 +
  10.433 +	unmap_domain_mem( (void *) sp2le );
  10.434 +}
  10.435 +
  10.436 +
  10.437 +#if SHADOW_DEBUG
  10.438 +
  10.439 +static int sh_l2_present;
  10.440 +static int sh_l1_present;
  10.441 +char * sh_check_name;
  10.442 +
  10.443 +#define FAIL(_f, _a...)                             \
  10.444 +{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
  10.445 +
  10.446 +int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
  10.447 +{
  10.448 +	unsigned long mask, gpfn, spfn;
  10.449 +
  10.450 +    if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
  10.451 +        return 1;  // always safe
  10.452 +
  10.453 +    if ( !(spte & _PAGE_PRESENT) )
  10.454 +        FAIL("Non zero not present spte");
  10.455 +
  10.456 +	if( level == 2 ) sh_l2_present++;
  10.457 +	if( level == 1 ) sh_l1_present++;
  10.458 +
  10.459 +    if ( !(gpte & _PAGE_PRESENT) )
  10.460 +        FAIL("Guest not present yet shadow is");
  10.461 +
  10.462 +    mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
  10.463 +
  10.464 +    if ( (spte & mask) != (gpte & mask ) )
  10.465 +		FAIL("Corrupt?");
  10.466 +
  10.467 +	if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
  10.468 +		FAIL("Dirty coherence");
  10.469 +
  10.470 +	if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
  10.471 +		FAIL("Accessed coherence");
  10.472 +
  10.473 +	if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
  10.474 +		FAIL("RW coherence");
  10.475 +
  10.476 +	if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
  10.477 +		FAIL("RW2 coherence");
  10.478 +	
  10.479 +	spfn = spte>>PAGE_SHIFT;
  10.480 +	gpfn = gpte>>PAGE_SHIFT;
  10.481 +
  10.482 +	if ( gpfn == spfn )
  10.483 +    {
  10.484 +		if ( level > 1 )
  10.485 +			FAIL("Linear map ???");			 // XXX this will fail on BSD
  10.486 +
  10.487 +#if 0 // might be a RO mapping of a page table page
  10.488 +		if ( frame_table[gpfn].shadow_and_flags != 0 )
  10.489 +        {
  10.490 +			FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 
  10.491 +				 frame_table[gpfn].shadow_and_flags,
  10.492 +				 frame_table[spfn].shadow_and_flags);
  10.493 +        }
  10.494 +		else
  10.495 +#endif
  10.496 +			return 1;
  10.497 +	}
  10.498 +	else
  10.499 +	{
  10.500 +		if ( level < 2 )
  10.501 +			FAIL("Shadow in L1 entry?");
  10.502 +
  10.503 +		if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) )
  10.504 +			FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 
  10.505 +				 frame_table[gpfn].shadow_and_flags,
  10.506 +				 frame_table[spfn].shadow_and_flags,
  10.507 +				 frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags,
  10.508 +				 frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags
  10.509 +				 );
  10.510 +
  10.511 +		if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) )
  10.512 +			FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 
  10.513 +				 frame_table[gpfn].shadow_and_flags,
  10.514 +				 frame_table[spfn].shadow_and_flags);
  10.515 +
  10.516 +	}
  10.517 +
  10.518 +	return 1;
  10.519 +}
  10.520 +
  10.521 +
  10.522 +int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
  10.523 +{
  10.524 +	int j;
  10.525 +	unsigned long *gpl1e, *spl1e;
  10.526 +
  10.527 +	gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
  10.528 +	spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
  10.529 +
  10.530 +
  10.531 +	for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
  10.532 +	{
  10.533 +		unsigned long gpte = gpl1e[j];
  10.534 +		unsigned long spte = spl1e[j];
  10.535 +		
  10.536 +		check_pte( gpte, spte, 1, j );
  10.537 +	}
  10.538 +
  10.539 +	return 1;
  10.540 +}
  10.541 +
  10.542 +#define FAILPT(_f, _a...)                             \
  10.543 +{printk("XXX FAILPT" _f "\n", ## _a ); BUG();}
  10.544 +
  10.545 +int check_pagetable( pagetable_t pt, char *s )
  10.546 +{
  10.547 +	unsigned long gptbase = pagetable_val(pt);
  10.548 +	unsigned long gpfn, spfn;
  10.549 +	int i;
  10.550 +	l2_pgentry_t *gpl2e, *spl2e;
  10.551 +
  10.552 +return 1;
  10.553 +
  10.554 +	sh_check_name = s;
  10.555 +
  10.556 +    MEM_VVLOG("%s-PT Audit",s);
  10.557 +
  10.558 +	sh_l2_present = sh_l1_present = 0;
  10.559 +
  10.560 +	gpfn =  gptbase >> PAGE_SHIFT;
  10.561 +
  10.562 +	if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
  10.563 +	{
  10.564 +		printk("%s-PT %08lx not shadowed\n", s, gptbase);
  10.565 +
  10.566 +		if( frame_table[gpfn].shadow_and_flags != 0 ) BUG();
  10.567 +
  10.568 +		return 0;
  10.569 +	}
  10.570 +	
  10.571 +    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
  10.572 +
  10.573 +	if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) )
  10.574 +		FAILPT("ptbase shadow inconsistent1");
  10.575 +
  10.576 +	if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) )
  10.577 +		FAILPT("ptbase shadow inconsistent2");
  10.578 +
  10.579 +
  10.580 +	// use the linear map to get a pointer to the L2
  10.581 +	gpl2e = (l2_pgentry_t *) &(linear_l2_table[0]);
  10.582 +	spl2e = (l2_pgentry_t *) &(shadow_linear_l2_table[0]);
  10.583 +
  10.584 +	// check the whole L2
  10.585 +	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  10.586 +	{
  10.587 +		unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  10.588 +		unsigned long spte = l2_pgentry_val(spl2e[i]);
  10.589 +
  10.590 +		check_pte( gpte, spte, 2, i );
  10.591 +	}
  10.592 +
  10.593 +
  10.594 +	// go back and recurse
  10.595 +	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  10.596 +	{
  10.597 +		unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  10.598 +		unsigned long spte = l2_pgentry_val(spl2e[i]);
  10.599 +
  10.600 +		if ( spte )	   
  10.601 +			check_l1_table( 
  10.602 +				i<<L2_PAGETABLE_SHIFT,
  10.603 +				gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
  10.604 +
  10.605 +	}
  10.606 +
  10.607 +
  10.608 +	MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
  10.609 +		   sh_l2_present, sh_l1_present );
  10.610 +	
  10.611 +	return 1;
  10.612 +}
  10.613 +
  10.614 +
  10.615 +#endif
  10.616 +
  10.617 +
  10.618 +#endif // CONFIG_SHADOW
  10.619 +
  10.620 +
  10.621 +
    11.1 --- a/xen/include/asm-i386/config.h	Fri Mar 12 18:12:23 2004 +0000
    11.2 +++ b/xen/include/asm-i386/config.h	Fri Mar 19 23:43:48 2004 +0000
    11.3 @@ -40,6 +40,9 @@
    11.4  
    11.5  #define CONFIG_XEN_ATTENTION_KEY 1
    11.6  
    11.7 +#define CONFIG_SHADOW 1
    11.8 +
    11.9 +
   11.10  #define HZ 100
   11.11  
   11.12  /*
   11.13 @@ -68,7 +71,7 @@
   11.14   */
   11.15  #define MAX_MONITOR_ADDRESS   (16*1024*1024)
   11.16  #define MAX_DMA_ADDRESS       (16*1024*1024)
   11.17 -#define MAX_DIRECTMAP_ADDRESS (44*1024*1024)
   11.18 +#define MAX_DIRECTMAP_ADDRESS (40*1024*1024) // XXX was 44
   11.19  /* And the virtual addresses for the direct-map region... */
   11.20  #define DIRECTMAP_VIRT_START  (READONLY_MPT_VIRT_END)
   11.21  #define DIRECTMAP_VIRT_END    (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
   11.22 @@ -81,8 +84,11 @@
   11.23  /* Next 4MB of virtual address space is used as a linear p.t. mapping. */
   11.24  #define LINEAR_PT_VIRT_START  (DIRECTMAP_VIRT_END)
   11.25  #define LINEAR_PT_VIRT_END    (LINEAR_PT_VIRT_START + (4*1024*1024))
   11.26 +/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */
   11.27 +#define SH_LINEAR_PT_VIRT_START  (LINEAR_PT_VIRT_END)
   11.28 +#define SH_LINEAR_PT_VIRT_END    (SH_LINEAR_PT_VIRT_START + (4*1024*1024))
   11.29  /* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
   11.30 -#define PERDOMAIN_VIRT_START  (LINEAR_PT_VIRT_END)
   11.31 +#define PERDOMAIN_VIRT_START  (SH_LINEAR_PT_VIRT_END)
   11.32  #define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (4*1024*1024))
   11.33  #define GDT_VIRT_START        (PERDOMAIN_VIRT_START)
   11.34  #define GDT_VIRT_END          (GDT_VIRT_START + (64*1024))
    12.1 --- a/xen/include/asm-i386/page.h	Fri Mar 12 18:12:23 2004 +0000
    12.2 +++ b/xen/include/asm-i386/page.h	Fri Mar 19 23:43:48 2004 +0000
    12.3 @@ -91,6 +91,7 @@ typedef struct { unsigned long pt_lo; } 
    12.4  #include <asm/flushtlb.h>
    12.5  
    12.6  #define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
    12.7 +#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
    12.8  
    12.9  extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
   12.10  extern void paging_init(void);
    13.1 --- a/xen/include/asm-i386/processor.h	Fri Mar 12 18:12:23 2004 +0000
    13.2 +++ b/xen/include/asm-i386/processor.h	Fri Mar 19 23:43:48 2004 +0000
    13.3 @@ -415,6 +415,12 @@ struct mm_struct {
    13.4       */
    13.5      l1_pgentry_t *perdomain_pt;
    13.6      pagetable_t  pagetable;
    13.7 +
    13.8 +#ifdef CONFIG_SHADOW
    13.9 +    unsigned int shadowmode;  /* flags to control shadow table operation */
   13.10 +    pagetable_t  shadowtable;
   13.11 +#endif
   13.12 +
   13.13      /* Current LDT details. */
   13.14      unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
   13.15      /* Next entry is passed to LGDT on domain switch. */
    14.1 --- a/xen/include/xeno/mm.h	Fri Mar 12 18:12:23 2004 +0000
    14.2 +++ b/xen/include/xeno/mm.h	Fri Mar 19 23:43:48 2004 +0000
    14.3 @@ -67,6 +67,10 @@ struct pfn_info
    14.4      unsigned long       type_and_flags;
    14.5      /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
    14.6      unsigned long       tlbflush_timestamp;
    14.7 +#ifdef CONFIG_SHADOW
    14.8 +    /* Shadow page status: top bits flags, bottom bits are a pfn */
    14.9 +    unsigned long       shadow_and_flags;  
   14.10 +#endif
   14.11  };
   14.12  
   14.13   /* The following page types are MUTUALLY EXCLUSIVE. */
   14.14 @@ -100,6 +104,7 @@ struct pfn_info
   14.15   /* 28-bit count of references to this frame. */
   14.16  #define PGC_count_mask                ((1<<28)-1)
   14.17  
   14.18 +
   14.19  /* We trust the slab allocator in slab.c, and our use of it. */
   14.20  #define PageSlab(page)		(1)
   14.21  #define PageSetSlab(page)	((void)0)
    15.1 --- a/xen/include/xeno/perfc_defn.h	Fri Mar 12 18:12:23 2004 +0000
    15.2 +++ b/xen/include/xeno/perfc_defn.h	Fri Mar 19 23:43:48 2004 +0000
    15.3 @@ -19,6 +19,15 @@ PERFCOUNTER_CPU( need_flush_tlb_flush, "
    15.4  
    15.5  PERFCOUNTER_CPU( calls_to_mmu_update, "calls_to_mmu_update" )
    15.6  PERFCOUNTER_CPU( num_page_updates, "num_page_updates" )
    15.7 +PERFCOUNTER_CPU( calls_to_update_va, "calls_to_update_va_map" )
    15.8 +PERFCOUNTER_CPU( page_faults, "page faults" )
    15.9 +PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" )
   15.10 +PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" )
   15.11 +
   15.12 +PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" )
   15.13 +PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" )
   15.14 +PERFCOUNTER_CPU( unshadow_table_count, "unshadow_table count" )
   15.15 +PERFCOUNTER_CPU( shadow_fixup_count, "shadow_fixup count" )
   15.16 +PERFCOUNTER_CPU( shadow_update_va_fail, "shadow_update_va_fail" )
   15.17  
   15.18  
   15.19 -
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/include/xeno/shadow.h	Fri Mar 19 23:43:48 2004 +0000
    16.3 @@ -0,0 +1,48 @@
    16.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
    16.5 +
    16.6 +#ifndef _XENO_SHADOW_H
    16.7 +#define _XENO_SHADOW_H
    16.8 +
    16.9 +#ifdef CONFIG_SHADOW
   16.10 +
   16.11 +#include <xeno/config.h>
   16.12 +#include <xeno/types.h>
   16.13 +#include <xeno/mm.h>
   16.14 +
   16.15 +/* Shadow PT flag bits in pfn_info */
   16.16 +#define PSH_shadowed	(1<<31) /* page has a shadow. PFN points to shadow */
   16.17 +#define PSH_shadow	    (1<<30) /* page is a shadow. PFN points to orig page */
   16.18 +#define PSH_pending	    (1<<29) /* page is in the process of being shadowed */
   16.19 +#define PSH_pfn_mask	((1<<21)-1)
   16.20 +
   16.21 +/* Shadow PT operation mode : shadowmode variable in mm_struct */
   16.22 +#define SHM_test        (1<<0) /* just run domain on shadow PTs */
   16.23 +#define SHM_logdirty    (1<<1) /* log pages that are dirtied */
   16.24 +#define SHM_cow         (1<<2) /* copy on write all dirtied pages */
   16.25 +#define SHM_translate   (1<<3) /* lookup machine pages in translation table */
   16.26 +
   16.27 +#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
   16.28 +#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
   16.29 +
   16.30 +extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode );
   16.31 +extern void unshadow_table( unsigned long gpfn );
   16.32 +extern unsigned long shadow_l2_table( unsigned long gpfn );
   16.33 +extern int shadow_fault( unsigned long va, long error_code );
   16.34 +extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 
   16.35 +										unsigned long *prev_spfn_ptr,
   16.36 +										l1_pgentry_t **prev_spl1e_ptr  );
   16.37 +extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
   16.38 +
   16.39 +
   16.40 +#define SHADOW_DEBUG 0
   16.41 +#define SHADOW_OPTIMISE 1
   16.42 +
   16.43 +#if SHADOW_DEBUG
   16.44 +extern int check_pagetable( pagetable_t pt, char *s );
   16.45 +#else
   16.46 +#define check_pagetable( pt, s )
   16.47 +#endif
   16.48 +
   16.49 +
   16.50 +#endif
   16.51 +#endif
    17.1 --- a/xen/net/dev.c	Fri Mar 12 18:12:23 2004 +0000
    17.2 +++ b/xen/net/dev.c	Fri Mar 19 23:43:48 2004 +0000
    17.3 @@ -28,6 +28,7 @@
    17.4  #include <xeno/init.h>
    17.5  #include <xeno/module.h>
    17.6  #include <xeno/event.h>
    17.7 +#include <xeno/shadow.h>
    17.8  #include <asm/domain_page.h>
    17.9  #include <asm/pgalloc.h>
   17.10  #include <asm/io.h>
   17.11 @@ -488,7 +489,7 @@ struct netif_rx_stats netdev_rx_stat[NR_
   17.12  void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
   17.13  {
   17.14      rx_shadow_entry_t *rx;
   17.15 -    unsigned long *ptep, pte; 
   17.16 +    unsigned long *ptep, pte, new_pte; 
   17.17      struct pfn_info *old_page, *new_page, *pte_page;
   17.18      unsigned short size;
   17.19      unsigned char  offset, status = RING_STATUS_OK;
   17.20 @@ -530,10 +531,12 @@ void deliver_packet(struct sk_buff *skb,
   17.21      wmb(); /* Get type count and set flush bit before updating PTE. */
   17.22  
   17.23      pte = *ptep;
   17.24 +
   17.25 +    new_pte = (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT |
   17.26 +                          ((new_page - frame_table) << PAGE_SHIFT);
   17.27 +
   17.28      if ( unlikely(pte & _PAGE_PRESENT) || 
   17.29 -         unlikely(cmpxchg(ptep, pte, 
   17.30 -                          (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT |
   17.31 -                          ((new_page - frame_table) << PAGE_SHIFT))) != pte )
   17.32 +         unlikely(cmpxchg(ptep, pte, new_pte)) != pte )
   17.33      {
   17.34          DPRINTK("PTE was modified or reused! %08lx %08lx\n", pte, *ptep);
   17.35          unmap_domain_mem(ptep);
   17.36 @@ -543,6 +546,22 @@ void deliver_packet(struct sk_buff *skb,
   17.37          goto out;
   17.38      }
   17.39  
   17.40 +
   17.41 +#ifdef CONFIG_SHADOW
   17.42 +    if ( pte_page->shadow_and_flags & PSH_shadowed )
   17.43 +    {
   17.44 +        unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask;
   17.45 +	unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) |
   17.46 +			(((unsigned long)ptep)&~PAGE_MASK) );
   17.47 +
   17.48 +        // save the fault later
   17.49 +	*sptr = new_pte;
   17.50 +
   17.51 +	unmap_domain_mem( sptr );
   17.52 +    }
   17.53 +#endif
   17.54 +
   17.55 +
   17.56      machine_to_phys_mapping[new_page - frame_table] 
   17.57          = machine_to_phys_mapping[old_page - frame_table];
   17.58      
   17.59 @@ -2068,6 +2087,8 @@ static void get_rx_bufs(net_vif_t *vif)
   17.60  
   17.61          pte_pfn  = rx.addr >> PAGE_SHIFT;
   17.62          pte_page = &frame_table[pte_pfn];
   17.63 +
   17.64 +	//printk("MMM %08lx ", rx.addr);
   17.65              
   17.66          /* The address passed down must be to a valid PTE. */
   17.67          if ( unlikely(pte_pfn >= max_page) ||
   17.68 @@ -2081,7 +2102,7 @@ static void get_rx_bufs(net_vif_t *vif)
   17.69          
   17.70          ptep = map_domain_mem(rx.addr);
   17.71          pte  = *ptep;
   17.72 -        
   17.73 +	//printk("%08lx\n",pte);        
   17.74          /* We must be passed a valid writeable mapping to swizzle. */
   17.75          if ( unlikely((pte & (_PAGE_PRESENT|_PAGE_RW)) != 
   17.76                        (_PAGE_PRESENT|_PAGE_RW)) ||
   17.77 @@ -2092,6 +2113,22 @@ static void get_rx_bufs(net_vif_t *vif)
   17.78              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
   17.79              goto rx_unmap_and_continue;
   17.80          }
   17.81 +
   17.82 +#ifdef CONFIG_SHADOW
   17.83 +	{
   17.84 +	    if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed )
   17.85 +	      {
   17.86 +		unsigned long spfn = 
   17.87 +		  frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask;
   17.88 +		unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
   17.89 +
   17.90 +		*sptr = 0;
   17.91 +		unmap_domain_mem( sptr );
   17.92 +
   17.93 +	      }
   17.94 +
   17.95 +	}
   17.96 +#endif
   17.97          
   17.98          buf_pfn  = pte >> PAGE_SHIFT;
   17.99          buf_page = &frame_table[buf_pfn];
  17.100 @@ -2112,6 +2149,8 @@ static void get_rx_bufs(net_vif_t *vif)
  17.101              put_page_and_type(pte_page);
  17.102              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
  17.103              goto rx_unmap_and_continue;
  17.104 +
  17.105 +	    // XXX IAP should SHADOW_CONFIG do something here?
  17.106          }
  17.107  
  17.108          /*