direct-io.hg

changeset 4388:841818f62d35

bitkeeper revision 1.1236.1.164 (424aed36IaX4v3-NGAT_MnCdUsD1yQ)

Split mmu_update() hypercall into mmu_update() and mmuext_op().
All MMUEXT_* ops are now done via the latter hypercall. This allows
more arguments to be passed in a nicer way to mmuext operations.
Linux 2.4, 2.6 and control tools all use the new interface. The BSDs
will need some work, but shouldn't be too hard (and they can be moved
to writable pagetables at the same time :-) ).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Mar 30 18:17:26 2005 +0000 (2005-03-30)
parents 09ff1dbfa3f6
children 196020909646
files freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c linux-2.4.29-xen-sparse/mm/memory.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c tools/libxc/xc_linux_build.c tools/libxc/xc_linux_restore.c tools/libxc/xc_plan9_build.c tools/libxc/xc_private.c tools/libxc/xc_private.h tools/libxc/xc_vmx_build.c xen/arch/x86/mm.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S xen/include/public/xen.h
line diff
     1.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Wed Mar 30 18:13:34 2005 +0000
     1.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Wed Mar 30 18:17:26 2005 +0000
     1.3 @@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_p
     1.4      MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping;
     1.5      MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va;
     1.6      MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma;
     1.7 -    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG;
     1.8 +    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL;
     1.9      mcl_increment_idx();
    1.10  }
    1.11  
     2.1 --- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c	Wed Mar 30 18:13:34 2005 +0000
     2.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c	Wed Mar 30 18:17:26 2005 +0000
     2.3 @@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
     2.4      PT_UPDATES_FLUSH();
     2.5  
     2.6      /* After all PTEs have been zapped we blow away stale TLB entries. */
     2.7 -    xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
     2.8 +    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
     2.9  
    2.10      /* Give away a batch of pages. */
    2.11      xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
     3.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c	Wed Mar 30 18:13:34 2005 +0000
     3.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c	Wed Mar 30 18:17:26 2005 +0000
     3.3 @@ -316,15 +316,7 @@ asmlinkage void do_general_protection(st
     3.4  		__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
     3.5  		if ( ldt == 0 )
     3.6  		{
     3.7 -		    mmu_update_t u;
     3.8 -		    u.ptr  = MMU_EXTENDED_COMMAND;
     3.9 -		    u.ptr |= (unsigned long)&default_ldt[0];
    3.10 -		    u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
    3.11 -		    if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0) )
    3.12 -		    {
    3.13 -			show_trace(NULL);
    3.14 -			panic("Failed to install default LDT");
    3.15 -		    }
    3.16 +                    xen_set_ldt((unsigned long)&default_ldt[0], 5);
    3.17  		    return;
    3.18  		}
    3.19  	}
     4.1 --- a/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c	Wed Mar 30 18:13:34 2005 +0000
     4.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c	Wed Mar 30 18:17:26 2005 +0000
     4.3 @@ -113,12 +113,7 @@ int direct_remap_area_pages(struct mm_st
     4.4      int i;
     4.5      unsigned long start_address;
     4.6  #define MAX_DIRECTMAP_MMU_QUEUE 130
     4.7 -    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
     4.8 -
     4.9 -    u[0].ptr  = MMU_EXTENDED_COMMAND;
    4.10 -    u[0].val  = MMUEXT_SET_FOREIGNDOM;
    4.11 -    u[0].val |= (unsigned long)domid << 16;
    4.12 -    v = w = &u[1];
    4.13 +    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
    4.14  
    4.15      start_address = address;
    4.16  
    4.17 @@ -130,11 +125,11 @@ int direct_remap_area_pages(struct mm_st
    4.18  	    __direct_remap_area_pages( mm,
    4.19  				       start_address, 
    4.20  				       address-start_address, 
    4.21 -				       w);
    4.22 +				       u);
    4.23  	    
    4.24 -	    if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 )
    4.25 +	    if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 )
    4.26  		return -EFAULT;	    
    4.27 -	    v = w;
    4.28 +	    v = u;
    4.29  	    start_address = address;
    4.30  	}
    4.31  
    4.32 @@ -149,14 +144,14 @@ int direct_remap_area_pages(struct mm_st
    4.33          v++;
    4.34      }
    4.35  
    4.36 -    if ( v != w )
    4.37 +    if ( v != u )
    4.38      {
    4.39  	/* get the ptep's filled in */
    4.40  	__direct_remap_area_pages(mm,
    4.41                                    start_address, 
    4.42                                    address-start_address, 
    4.43 -                                  w);	 
    4.44 -	if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) )
    4.45 +                                  u);	 
    4.46 +	if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) )
    4.47  	    return -EFAULT;	    
    4.48      }
    4.49      
     5.1 --- a/linux-2.4.29-xen-sparse/mm/memory.c	Wed Mar 30 18:13:34 2005 +0000
     5.2 +++ b/linux-2.4.29-xen-sparse/mm/memory.c	Wed Mar 30 18:17:26 2005 +0000
     5.3 @@ -911,7 +911,7 @@ static inline void establish_pte(struct 
     5.4  {
     5.5  #ifdef CONFIG_XEN
     5.6  	if ( likely(vma->vm_mm == current->mm) ) {
     5.7 -		HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG);
     5.8 +		HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL);
     5.9  	} else {
    5.10  		set_pte(page_table, entry);
    5.11  		flush_tlb_page(vma, address);
     6.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c	Wed Mar 30 18:13:34 2005 +0000
     6.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c	Wed Mar 30 18:17:26 2005 +0000
     6.3 @@ -465,14 +465,7 @@ fastcall void do_general_protection(stru
     6.4  		unsigned long ldt;
     6.5  		__asm__ __volatile__ ("sldt %0" : "=r" (ldt));
     6.6  		if (ldt == 0) {
     6.7 -			mmu_update_t u;
     6.8 -			u.ptr = MMU_EXTENDED_COMMAND;
     6.9 -			u.ptr |= (unsigned long)&default_ldt[0];
    6.10 -			u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
    6.11 -			if (unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0)) {
    6.12 -				show_trace(NULL, (unsigned long *)&u);
    6.13 -				panic("Failed to install default LDT");
    6.14 -			}
    6.15 +			xen_set_ldt((unsigned long)&default_ldt[0], 5);
    6.16  			return;
    6.17  		}
    6.18  	}
     7.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Wed Mar 30 18:13:34 2005 +0000
     7.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Wed Mar 30 18:17:26 2005 +0000
     7.3 @@ -52,7 +52,7 @@ void xen_l1_entry_update(pte_t *ptr, uns
     7.4      mmu_update_t u;
     7.5      u.ptr = virt_to_machine(ptr);
     7.6      u.val = val;
     7.7 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
     7.8 +    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
     7.9  }
    7.10  
    7.11  void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
    7.12 @@ -60,71 +60,7 @@ void xen_l2_entry_update(pmd_t *ptr, pmd
    7.13      mmu_update_t u;
    7.14      u.ptr = virt_to_machine(ptr);
    7.15      u.val = pmd_val_ma(val);
    7.16 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.17 -}
    7.18 -
    7.19 -void xen_pt_switch(unsigned long ptr)
    7.20 -{
    7.21 -    mmu_update_t u;
    7.22 -    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
    7.23 -    u.val = MMUEXT_NEW_BASEPTR;
    7.24 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.25 -}
    7.26 -
    7.27 -void xen_tlb_flush(void)
    7.28 -{
    7.29 -    mmu_update_t u;
    7.30 -    u.ptr = MMU_EXTENDED_COMMAND;
    7.31 -    u.val = MMUEXT_TLB_FLUSH;
    7.32 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.33 -}
    7.34 -
    7.35 -void xen_invlpg(unsigned long ptr)
    7.36 -{
    7.37 -    mmu_update_t u;
    7.38 -    u.ptr = (ptr & PAGE_MASK) | MMU_EXTENDED_COMMAND;
    7.39 -    u.val = MMUEXT_INVLPG;
    7.40 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.41 -}
    7.42 -
    7.43 -void xen_pgd_pin(unsigned long ptr)
    7.44 -{
    7.45 -    mmu_update_t u;
    7.46 -    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
    7.47 -    u.val = MMUEXT_PIN_L2_TABLE;
    7.48 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.49 -}
    7.50 -
    7.51 -void xen_pgd_unpin(unsigned long ptr)
    7.52 -{
    7.53 -    mmu_update_t u;
    7.54 -    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
    7.55 -    u.val = MMUEXT_UNPIN_TABLE;
    7.56 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.57 -}
    7.58 -
    7.59 -void xen_pte_pin(unsigned long ptr)
    7.60 -{
    7.61 -    mmu_update_t u;
    7.62 -    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
    7.63 -    u.val = MMUEXT_PIN_L1_TABLE;
    7.64 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.65 -}
    7.66 -
    7.67 -void xen_pte_unpin(unsigned long ptr)
    7.68 -{
    7.69 -    mmu_update_t u;
    7.70 -    u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND;
    7.71 -    u.val = MMUEXT_UNPIN_TABLE;
    7.72 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.73 -}
    7.74 -
    7.75 -void xen_set_ldt(unsigned long ptr, unsigned long len)
    7.76 -{
    7.77 -    mmu_update_t u;
    7.78 -    u.ptr = ptr | MMU_EXTENDED_COMMAND;
    7.79 -    u.val = (len << MMUEXT_CMD_SHIFT) | MMUEXT_SET_LDT;
    7.80 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.81 +    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
    7.82  }
    7.83  
    7.84  void xen_machphys_update(unsigned long mfn, unsigned long pfn)
    7.85 @@ -132,7 +68,71 @@ void xen_machphys_update(unsigned long m
    7.86      mmu_update_t u;
    7.87      u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
    7.88      u.val = pfn;
    7.89 -    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0);
    7.90 +    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
    7.91 +}
    7.92 +
    7.93 +void xen_pt_switch(unsigned long ptr)
    7.94 +{
    7.95 +    struct mmuext_op op;
    7.96 +    op.cmd = MMUEXT_NEW_BASEPTR;
    7.97 +    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
    7.98 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    7.99 +}
   7.100 +
   7.101 +void xen_tlb_flush(void)
   7.102 +{
   7.103 +    struct mmuext_op op;
   7.104 +    op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
   7.105 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.106 +}
   7.107 +
   7.108 +void xen_invlpg(unsigned long ptr)
   7.109 +{
   7.110 +    struct mmuext_op op;
   7.111 +    op.cmd = MMUEXT_INVLPG_LOCAL;
   7.112 +    op.linear_addr = ptr & PAGE_MASK;
   7.113 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.114 +}
   7.115 +
   7.116 +void xen_pgd_pin(unsigned long ptr)
   7.117 +{
   7.118 +    struct mmuext_op op;
   7.119 +    op.cmd = MMUEXT_PIN_L2_TABLE;
   7.120 +    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
   7.121 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.122 +}
   7.123 +
   7.124 +void xen_pgd_unpin(unsigned long ptr)
   7.125 +{
   7.126 +    struct mmuext_op op;
   7.127 +    op.cmd = MMUEXT_UNPIN_TABLE;
   7.128 +    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
   7.129 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.130 +}
   7.131 +
   7.132 +void xen_pte_pin(unsigned long ptr)
   7.133 +{
   7.134 +    struct mmuext_op op;
   7.135 +    op.cmd = MMUEXT_PIN_L1_TABLE;
   7.136 +    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
   7.137 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.138 +}
   7.139 +
   7.140 +void xen_pte_unpin(unsigned long ptr)
   7.141 +{
   7.142 +    struct mmuext_op op;
   7.143 +    op.cmd = MMUEXT_UNPIN_TABLE;
   7.144 +    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
   7.145 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.146 +}
   7.147 +
   7.148 +void xen_set_ldt(unsigned long ptr, unsigned long len)
   7.149 +{
   7.150 +    struct mmuext_op op;
   7.151 +    op.cmd = MMUEXT_SET_LDT;
   7.152 +    op.linear_addr = ptr;
   7.153 +    op.nr_ents = len;
   7.154 +    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
   7.155  }
   7.156  
   7.157  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
     8.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Wed Mar 30 18:13:34 2005 +0000
     8.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c	Wed Mar 30 18:17:26 2005 +0000
     8.3 @@ -108,7 +108,7 @@ void __iomem * __ioremap(unsigned long p
     8.4  			if(!PageReserved(page))
     8.5  				return NULL;
     8.6  
     8.7 -		domid = DOMID_LOCAL;
     8.8 +		domid = DOMID_SELF;
     8.9  	}
    8.10  
    8.11  	/*
    8.12 @@ -393,15 +393,7 @@ int direct_remap_area_pages(struct mm_st
    8.13  	int i;
    8.14  	unsigned long start_address;
    8.15  #define MAX_DIRECTMAP_MMU_QUEUE 130
    8.16 -	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
    8.17 -
    8.18 -	v = w = &u[0];
    8.19 -	if (domid != DOMID_LOCAL) {
    8.20 -		u[0].ptr  = MMU_EXTENDED_COMMAND;
    8.21 -		u[0].val  = MMUEXT_SET_FOREIGNDOM;
    8.22 -		u[0].val |= (unsigned long)domid << 16;
    8.23 -		v = w = &u[1];
    8.24 -	}
    8.25 +	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
    8.26  
    8.27  	start_address = address;
    8.28  
    8.29 @@ -413,11 +405,11 @@ int direct_remap_area_pages(struct mm_st
    8.30  			__direct_remap_area_pages(mm,
    8.31  						  start_address, 
    8.32  						  address-start_address, 
    8.33 -						  w);
    8.34 +						  u);
    8.35   
    8.36 -			if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
    8.37 +			if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
    8.38  				return -EFAULT;
    8.39 -			v = w;
    8.40 +			v = u;
    8.41  			start_address = address;
    8.42  		}
    8.43  
    8.44 @@ -432,13 +424,13 @@ int direct_remap_area_pages(struct mm_st
    8.45  		v++;
    8.46  	}
    8.47  
    8.48 -	if (v != w) {
    8.49 +	if (v != u) {
    8.50  		/* get the ptep's filled in */
    8.51  		__direct_remap_area_pages(mm,
    8.52  					  start_address, 
    8.53  					  address-start_address, 
    8.54 -					  w);
    8.55 -		if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
    8.56 +					  u);
    8.57 +		if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
    8.58  			return -EFAULT;
    8.59  	}
    8.60  
     9.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Wed Mar 30 18:13:34 2005 +0000
     9.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Wed Mar 30 18:17:26 2005 +0000
     9.3 @@ -100,7 +100,7 @@ static void fast_flush_area(int idx, int
     9.4          mcl[i].args[2] = 0;
     9.5      }
     9.6  
     9.7 -    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
     9.8 +    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
     9.9      if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
    9.10          BUG();
    9.11  }
    10.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Wed Mar 30 18:13:34 2005 +0000
    10.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Wed Mar 30 18:17:26 2005 +0000
    10.3 @@ -38,8 +38,9 @@ static DECLARE_TASKLET(net_rx_tasklet, n
    10.4  static struct timer_list net_timer;
    10.5  
    10.6  static struct sk_buff_head rx_queue;
    10.7 -static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
    10.8 -static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
    10.9 +static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
   10.10 +static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
   10.11 +static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
   10.12  static unsigned char rx_notify[NR_EVENT_CHANNELS];
   10.13  
   10.14  /* Don't currently gate addition of an interface to the tx scheduling list. */
   10.15 @@ -195,8 +196,9 @@ static void net_rx_action(unsigned long 
   10.16      netif_t *netif;
   10.17      s8 status;
   10.18      u16 size, id, evtchn;
   10.19 +    multicall_entry_t *mcl;
   10.20      mmu_update_t *mmu;
   10.21 -    multicall_entry_t *mcl;
   10.22 +    struct mmuext_op *mmuext;
   10.23      unsigned long vdata, mdata, new_mfn;
   10.24      struct sk_buff_head rxq;
   10.25      struct sk_buff *skb;
   10.26 @@ -207,6 +209,7 @@ static void net_rx_action(unsigned long 
   10.27  
   10.28      mcl = rx_mcl;
   10.29      mmu = rx_mmu;
   10.30 +    mmuext = rx_mmuext;
   10.31      while ( (skb = skb_dequeue(&rx_queue)) != NULL )
   10.32      {
   10.33          netif   = netdev_priv(skb->dev);
   10.34 @@ -229,25 +232,26 @@ static void net_rx_action(unsigned long 
   10.35           */
   10.36          phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
   10.37          
   10.38 -        mmu[0].ptr  = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
   10.39 -        mmu[0].val  = __pa(vdata) >> PAGE_SHIFT;  
   10.40 -        mmu[1].ptr  = MMU_EXTENDED_COMMAND;
   10.41 -        mmu[1].val  = MMUEXT_SET_FOREIGNDOM;      
   10.42 -        mmu[1].val |= (unsigned long)netif->domid << 16;
   10.43 -        mmu[2].ptr  = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
   10.44 -        mmu[2].val  = MMUEXT_REASSIGN_PAGE;
   10.45 +        mcl->op = __HYPERVISOR_update_va_mapping;
   10.46 +        mcl->args[0] = vdata;
   10.47 +        mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
   10.48 +        mcl->args[2] = 0;
   10.49 +        mcl++;
   10.50  
   10.51 -        mcl[0].op = __HYPERVISOR_update_va_mapping;
   10.52 -        mcl[0].args[0] = vdata;
   10.53 -        mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
   10.54 -        mcl[0].args[2] = 0;
   10.55 -        mcl[1].op = __HYPERVISOR_mmu_update;
   10.56 -        mcl[1].args[0] = (unsigned long)mmu;
   10.57 -        mcl[1].args[1] = 3;
   10.58 -        mcl[1].args[2] = 0;
   10.59 +        mcl->op = __HYPERVISOR_mmuext_op;
   10.60 +        mcl->args[0] = (unsigned long)mmuext;
   10.61 +        mcl->args[1] = 1;
   10.62 +        mcl->args[2] = 0;
   10.63 +        mcl->args[3] = netif->domid;
   10.64 +        mcl++;
   10.65  
   10.66 -        mcl += 2;
   10.67 -        mmu += 3;
   10.68 +        mmuext->cmd = MMUEXT_REASSIGN_PAGE;
   10.69 +        mmuext->mfn = mdata >> PAGE_SHIFT;
   10.70 +        mmuext++;
   10.71 +
   10.72 +        mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
   10.73 +        mmu->val = __pa(vdata) >> PAGE_SHIFT;  
   10.74 +        mmu++;
   10.75  
   10.76          __skb_queue_tail(&rxq, skb);
   10.77  
   10.78 @@ -259,12 +263,19 @@ static void net_rx_action(unsigned long 
   10.79      if ( mcl == rx_mcl )
   10.80          return;
   10.81  
   10.82 -    mcl[-2].args[2] = UVMF_FLUSH_TLB;
   10.83 +    mcl->op = __HYPERVISOR_mmu_update;
   10.84 +    mcl->args[0] = (unsigned long)rx_mmu;
   10.85 +    mcl->args[1] = mmu - rx_mmu;
   10.86 +    mcl->args[2] = 0;
   10.87 +    mcl->args[3] = DOMID_SELF;
   10.88 +    mcl++;
   10.89 +
   10.90 +    mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL;
   10.91      if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
   10.92          BUG();
   10.93  
   10.94      mcl = rx_mcl;
   10.95 -    mmu = rx_mmu;
   10.96 +    mmuext = rx_mmuext;
   10.97      while ( (skb = __skb_dequeue(&rxq)) != NULL )
   10.98      {
   10.99          netif   = netdev_priv(skb->dev);
  10.100 @@ -272,7 +283,7 @@ static void net_rx_action(unsigned long 
  10.101  
  10.102          /* Rederive the machine addresses. */
  10.103          new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
  10.104 -        mdata   = ((mmu[2].ptr & PAGE_MASK) |
  10.105 +        mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
  10.106                     ((unsigned long)skb->data & ~PAGE_MASK));
  10.107          
  10.108          atomic_set(&(skb_shinfo(skb)->dataref), 1);
  10.109 @@ -308,7 +319,7 @@ static void net_rx_action(unsigned long 
  10.110          dev_kfree_skb(skb);
  10.111  
  10.112          mcl += 2;
  10.113 -        mmu += 3;
  10.114 +        mmuext += 1;
  10.115      }
  10.116  
  10.117      while ( notify_nr != 0 )
  10.118 @@ -418,7 +429,7 @@ static void net_tx_action(unsigned long 
  10.119          mcl++;     
  10.120      }
  10.121  
  10.122 -    mcl[-1].args[2] = UVMF_FLUSH_TLB;
  10.123 +    mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL;
  10.124      if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
  10.125          BUG();
  10.126  
    11.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Wed Mar 30 18:13:34 2005 +0000
    11.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Wed Mar 30 18:17:26 2005 +0000
    11.3 @@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(str
    11.4      }
    11.5  
    11.6      /* After all PTEs have been zapped we blow away stale TLB entries. */
    11.7 -    rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
    11.8 +    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL;
    11.9  
   11.10      /* Give away a batch of pages. */
   11.11      rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
   11.12 @@ -588,6 +588,7 @@ static int netif_poll(struct net_device 
   11.13          mcl->args[0] = (unsigned long)rx_mmu;
   11.14          mcl->args[1] = mmu - rx_mmu;
   11.15          mcl->args[2] = 0;
   11.16 +        mcl->args[3] = DOMID_SELF;
   11.17          mcl++;
   11.18          (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
   11.19      }
    12.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Mar 30 18:13:34 2005 +0000
    12.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Mar 30 18:17:26 2005 +0000
    12.3 @@ -98,7 +98,6 @@ static int privcmd_ioctl(struct inode *i
    12.4                  struct vm_area_struct *vma = 
    12.5                      find_vma( current->mm, msg[j].va );
    12.6  
    12.7 -
    12.8                  if ( !vma )
    12.9                      return -EINVAL;
   12.10  
   12.11 @@ -123,8 +122,7 @@ static int privcmd_ioctl(struct inode *i
   12.12  
   12.13      case IOCTL_PRIVCMD_MMAPBATCH:
   12.14      {
   12.15 -#define MAX_DIRECTMAP_MMU_QUEUE 130
   12.16 -        mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
   12.17 +        mmu_update_t u;
   12.18          privcmd_mmapbatch_t m;
   12.19          struct vm_area_struct *vma = NULL;
   12.20          unsigned long *p, addr;
   12.21 @@ -145,39 +143,31 @@ static int privcmd_ioctl(struct inode *i
   12.22          if ( (m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end )
   12.23          { ret = -EFAULT; goto batch_err; }
   12.24  
   12.25 -        u[0].ptr  = MMU_EXTENDED_COMMAND;
   12.26 -        u[0].val  = MMUEXT_SET_FOREIGNDOM;
   12.27 -        u[0].val |= (unsigned long)m.dom << 16;
   12.28 -        v = w = &u[1];
   12.29 -
   12.30          p = m.arr;
   12.31          addr = m.addr;
   12.32          for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ )
   12.33          {
   12.34 -
   12.35              if ( get_user(mfn, p) )
   12.36                  return -EFAULT;
   12.37  
   12.38 -            v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
   12.39 +            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
   12.40  
   12.41              __direct_remap_area_pages(vma->vm_mm,
   12.42                                        addr, 
   12.43                                        PAGE_SIZE, 
   12.44 -                                      v);
   12.45 +                                      &u);
   12.46  
   12.47 -            if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) )
   12.48 -                put_user( 0xF0000000 | mfn, p );
   12.49 -
   12.50 -            v = w;
   12.51 +            if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
   12.52 +                put_user(0xF0000000 | mfn, p);
   12.53          }
   12.54  
   12.55          ret = 0;
   12.56          break;
   12.57  
   12.58      batch_err:
   12.59 -        printk(KERN_ALERT "XXX SMH: ERROR IN MMAPBATCH\n"); 
   12.60          printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", 
   12.61 -               ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end);
   12.62 +               ret, vma, m.addr, m.num, m.arr,
   12.63 +               vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
   12.64          break;
   12.65      }
   12.66      break;
    13.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Wed Mar 30 18:13:34 2005 +0000
    13.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Wed Mar 30 18:17:26 2005 +0000
    13.3 @@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int
    13.4          mcl[i].args[2] = 0;
    13.5      }
    13.6  
    13.7 -    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
    13.8 +    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
    13.9      if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   13.10          BUG();
   13.11  }
    14.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Wed Mar 30 18:13:34 2005 +0000
    14.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Wed Mar 30 18:17:26 2005 +0000
    14.3 @@ -407,7 +407,7 @@ extern void noexec_setup(const char *str
    14.4  	do {								  \
    14.5  		if (__dirty) {						  \
    14.6  		        if ( likely((__vma)->vm_mm == current->mm) ) {    \
    14.7 -			    HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG); \
    14.8 +			    HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG_LOCAL); \
    14.9  			} else {                                          \
   14.10                              xen_l1_entry_update((__ptep), (__entry).pte_low); \
   14.11  			    flush_tlb_page((__vma), (__address));         \
   14.12 @@ -455,7 +455,6 @@ void make_pages_writable(void *va, unsig
   14.13  #define kern_addr_valid(addr)	(1)
   14.14  #endif /* !CONFIG_DISCONTIGMEM */
   14.15  
   14.16 -#define DOMID_LOCAL (0xFFFFU)
   14.17  int direct_remap_area_pages(struct mm_struct *mm,
   14.18                              unsigned long address, 
   14.19                              unsigned long machine_addr,
    15.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Wed Mar 30 18:13:34 2005 +0000
    15.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Wed Mar 30 18:17:26 2005 +0000
    15.3 @@ -126,16 +126,33 @@ HYPERVISOR_set_trap_table(
    15.4  
    15.5  static inline int
    15.6  HYPERVISOR_mmu_update(
    15.7 -    mmu_update_t *req, int count, int *success_count)
    15.8 +    mmu_update_t *req, int count, int *success_count, domid_t domid)
    15.9  {
   15.10      int ret;
   15.11 -    unsigned long ign1, ign2, ign3;
   15.12 +    unsigned long ign1, ign2, ign3, ign4;
   15.13  
   15.14      __asm__ __volatile__ (
   15.15          TRAP_INSTR
   15.16 -        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
   15.17 +        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
   15.18  	: "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
   15.19 -	  "3" (success_count)
   15.20 +        "3" (success_count), "4" (domid)
   15.21 +	: "memory" );
   15.22 +
   15.23 +    return ret;
   15.24 +}
   15.25 +
   15.26 +static inline int
   15.27 +HYPERVISOR_mmuext_op(
   15.28 +    struct mmuext_op *op, int count, int *success_count, domid_t domid)
   15.29 +{
   15.30 +    int ret;
   15.31 +    unsigned long ign1, ign2, ign3, ign4;
   15.32 +
   15.33 +    __asm__ __volatile__ (
   15.34 +        TRAP_INSTR
   15.35 +        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
   15.36 +	: "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
   15.37 +        "3" (success_count), "4" (domid)
   15.38  	: "memory" );
   15.39  
   15.40      return ret;
    16.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c	Wed Mar 30 18:13:34 2005 +0000
    16.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c	Wed Mar 30 18:17:26 2005 +0000
    16.3 @@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_soft
    16.4  	xpq_flush_queue();
    16.5  
    16.6  	/* After all PTEs have been zapped we blow away stale TLB entries. */
    16.7 -	rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
    16.8 +	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
    16.9  
   16.10  	/* Give away a batch of pages. */
   16.11  	rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
   16.12 @@ -681,7 +681,7 @@ xen_network_handler(void *arg)
   16.13  		mcl->op = __HYPERVISOR_update_va_mapping;
   16.14  		mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
   16.15  		mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
   16.16 -		mcl->args[2] = UVMF_FLUSH_TLB; // 0;
   16.17 +		mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0;
   16.18  		mcl++;
   16.19  
   16.20  		xpmap_phys_to_machine_mapping
   16.21 @@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_s
   16.22  	xpq_flush_queue();
   16.23  
   16.24  	/* After all PTEs have been zapped we blow away stale TLB entries. */
   16.25 -	rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
   16.26 +	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
   16.27  
   16.28  	/* Give away a batch of pages. */
   16.29  	rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
    17.1 --- a/tools/libxc/xc_linux_build.c	Wed Mar 30 18:13:34 2005 +0000
    17.2 +++ b/tools/libxc/xc_linux_build.c	Wed Mar 30 18:17:26 2005 +0000
    17.3 @@ -254,8 +254,7 @@ static int setup_guest(int xc_handle,
    17.4       * Pin down l2tab addr as page dir page - causes hypervisor to provide
    17.5       * correct protection for the page
    17.6       */ 
    17.7 -    if ( add_mmu_update(xc_handle, mmu,
    17.8 -                        l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
    17.9 +    if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
   17.10          goto error_out;
   17.11  
   17.12      start_info = xc_map_foreign_range(
   17.13 @@ -447,10 +446,16 @@ int xc_linux_build(int xc_handle,
   17.14      memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
   17.15  
   17.16      /* No callback handlers. */
   17.17 +#if defined(__i386__)
   17.18      ctxt->event_callback_cs     = FLAT_KERNEL_CS;
   17.19      ctxt->event_callback_eip    = 0;
   17.20      ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
   17.21      ctxt->failsafe_callback_eip = 0;
   17.22 +#elif defined(__x86_64__)
   17.23 +    ctxt->event_callback_eip    = 0;
   17.24 +    ctxt->failsafe_callback_eip = 0;
   17.25 +    ctxt->syscall_callback_eip  = 0;
   17.26 +#endif
   17.27  
   17.28      memset( &launch_op, 0, sizeof(launch_op) );
   17.29  
    18.1 --- a/tools/libxc/xc_linux_restore.c	Wed Mar 30 18:13:34 2005 +0000
    18.2 +++ b/tools/libxc/xc_linux_restore.c	Wed Mar 30 18:17:26 2005 +0000
    18.3 @@ -422,10 +422,8 @@ int xc_linux_restore(int xc_handle, XcIO
    18.4      {
    18.5          if ( pfn_type[i] == (L1TAB|LPINTAB) )
    18.6          {
    18.7 -            if ( add_mmu_update(xc_handle, mmu,
    18.8 -                                (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
    18.9 -                                MMU_EXTENDED_COMMAND,
   18.10 -                                MMUEXT_PIN_L1_TABLE) ) {
   18.11 +            if ( pin_table(xc_handle, MMUEXT_PIN_L1_TABLE,
   18.12 +                           pfn_to_mfn_table[i], dom) ) {
   18.13                  printf("ERR pin L1 pfn=%lx mfn=%lx\n",
   18.14                         (unsigned long)i, pfn_to_mfn_table[i]);
   18.15                  goto out;
   18.16 @@ -438,11 +436,8 @@ int xc_linux_restore(int xc_handle, XcIO
   18.17      {
   18.18          if ( pfn_type[i] == (L2TAB|LPINTAB) )
   18.19          {
   18.20 -            if ( add_mmu_update(xc_handle, mmu,
   18.21 -                                (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
   18.22 -                                MMU_EXTENDED_COMMAND,
   18.23 -                                MMUEXT_PIN_L2_TABLE) )
   18.24 -            {
   18.25 +            if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
   18.26 +                           pfn_to_mfn_table[i], dom) ) {
   18.27                  printf("ERR pin L2 pfn=%lx mfn=%lx\n",
   18.28                         (unsigned long)i, pfn_to_mfn_table[i]);
   18.29                  goto out;
   18.30 @@ -623,10 +618,12 @@ int xc_linux_restore(int xc_handle, XcIO
   18.31      }
   18.32      if ( (ctxt.kernel_ss & 3) == 0 )
   18.33          ctxt.kernel_ss = FLAT_KERNEL_DS;
   18.34 +#if defined(__i386__)
   18.35      if ( (ctxt.event_callback_cs & 3) == 0 )
   18.36          ctxt.event_callback_cs = FLAT_KERNEL_CS;
   18.37      if ( (ctxt.failsafe_callback_cs & 3) == 0 )
   18.38          ctxt.failsafe_callback_cs = FLAT_KERNEL_CS;
   18.39 +#endif
   18.40      if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
   18.41           (ctxt.ldt_ents > 8192) ||
   18.42           (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
    19.1 --- a/tools/libxc/xc_plan9_build.c	Wed Mar 30 18:13:34 2005 +0000
    19.2 +++ b/tools/libxc/xc_plan9_build.c	Wed Mar 30 18:17:26 2005 +0000
    19.3 @@ -314,8 +314,7 @@ setup_guest(int xc_handle,
    19.4  	 * Pin down l2tab addr as page dir page - causes hypervisor to provide
    19.5  	 * correct protection for the page
    19.6  	 */
    19.7 -	if (add_mmu_update(xc_handle, mmu,
    19.8 -			   l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE))
    19.9 +	if (pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom))
   19.10  		goto error_out;
   19.11  
   19.12  	for (count = 0; count < tot_pages; count++) {
   19.13 @@ -526,10 +525,16 @@ xc_plan9_build(int xc_handle,
   19.14  	memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
   19.15  
   19.16  	/* No callback handlers. */
   19.17 -	ctxt->event_callback_cs = FLAT_KERNEL_CS;
   19.18 -	ctxt->event_callback_eip = 0;
   19.19 -	ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
   19.20 +#if defined(__i386__)
   19.21 +	ctxt->event_callback_cs     = FLAT_KERNEL_CS;
   19.22 +	ctxt->event_callback_eip    = 0;
   19.23 +	ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
   19.24  	ctxt->failsafe_callback_eip = 0;
   19.25 +#elif defined(__x86_64__)
   19.26 +	ctxt->event_callback_eip    = 0;
   19.27 +	ctxt->failsafe_callback_eip = 0;
   19.28 +	ctxt->syscall_callback_eip  = 0;
   19.29 +#endif
   19.30  
   19.31  	memset(&launch_op, 0, sizeof (launch_op));
   19.32  
    20.1 --- a/tools/libxc/xc_private.c	Wed Mar 30 18:13:34 2005 +0000
    20.2 +++ b/tools/libxc/xc_private.c	Wed Mar 30 18:17:26 2005 +0000
    20.3 @@ -92,24 +92,54 @@ unsigned int get_pfn_type(int xc_handle,
    20.4  
    20.5  /*******************/
    20.6  
    20.7 -#define FIRST_MMU_UPDATE 1
    20.8 +int pin_table(
    20.9 +    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
   20.10 +{
   20.11 +    int err = 0;
   20.12 +    struct mmuext_op op;
   20.13 +    privcmd_hypercall_t hypercall;
   20.14 +
   20.15 +    op.cmd = type;
   20.16 +    op.mfn = mfn;
   20.17 +
   20.18 +    hypercall.op     = __HYPERVISOR_mmuext_op;
   20.19 +    hypercall.arg[0] = (unsigned long)&op;
   20.20 +    hypercall.arg[1] = 1;
   20.21 +    hypercall.arg[2] = 0;
   20.22 +    hypercall.arg[3] = dom;
   20.23 +
   20.24 +    if ( mlock(&op, sizeof(op)) != 0 )
   20.25 +    {
   20.26 +        PERROR("Could not lock mmuext_op");
   20.27 +        err = 1;
   20.28 +        goto out;
   20.29 +    }
   20.30 +
   20.31 +    if ( do_xen_hypercall(xc_handle, &hypercall) < 0 )
   20.32 +    {
   20.33 +        ERROR("Failure when submitting mmu updates");
   20.34 +        err = 1;
   20.35 +    }
   20.36 +
   20.37 +    (void)munlock(&op, sizeof(op));
   20.38 +
   20.39 + out:
   20.40 +    return err;
   20.41 +}
   20.42  
   20.43  static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
   20.44  {
   20.45      int err = 0;
   20.46      privcmd_hypercall_t hypercall;
   20.47  
   20.48 -    if ( mmu->idx == FIRST_MMU_UPDATE )
   20.49 +    if ( mmu->idx == 0 )
   20.50          return 0;
   20.51  
   20.52 -    mmu->updates[0].ptr  = MMU_EXTENDED_COMMAND;
   20.53 -    mmu->updates[0].val  = MMUEXT_SET_FOREIGNDOM;
   20.54 -    mmu->updates[0].val |= (unsigned long)mmu->subject << 16;
   20.55 -
   20.56      hypercall.op     = __HYPERVISOR_mmu_update;
   20.57      hypercall.arg[0] = (unsigned long)mmu->updates;
   20.58      hypercall.arg[1] = (unsigned long)mmu->idx;
   20.59      hypercall.arg[2] = 0;
   20.60 +    hypercall.arg[3] = mmu->subject;
   20.61  
   20.62      if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
   20.63      {
   20.64 @@ -124,7 +154,7 @@ static int flush_mmu_updates(int xc_hand
   20.65          err = 1;
   20.66      }
   20.67  
   20.68 -    mmu->idx = FIRST_MMU_UPDATE;
   20.69 +    mmu->idx = 0;
   20.70      
   20.71      (void)munlock(mmu->updates, sizeof(mmu->updates));
   20.72  
   20.73 @@ -137,7 +167,7 @@ mmu_t *init_mmu_updates(int xc_handle, d
   20.74      mmu_t *mmu = malloc(sizeof(mmu_t));
   20.75      if ( mmu == NULL )
   20.76          return mmu;
   20.77 -    mmu->idx     = FIRST_MMU_UPDATE;
   20.78 +    mmu->idx     = 0;
   20.79      mmu->subject = dom;
   20.80      return mmu;
   20.81  }
    21.1 --- a/tools/libxc/xc_private.h	Wed Mar 30 18:13:34 2005 +0000
    21.2 +++ b/tools/libxc/xc_private.h	Wed Mar 30 18:17:26 2005 +0000
    21.3 @@ -212,4 +212,7 @@ void xc_map_memcpy(unsigned long dst, ch
    21.4                     int xch, u32 dom, unsigned long *parray,
    21.5                     unsigned long vstart);
    21.6  
    21.7 +int pin_table(
    21.8 +    int xc_handle, unsigned int type, unsigned long mfn, domid_t dom);
    21.9 +
   21.10  #endif /* __XC_PRIVATE_H__ */
    22.1 --- a/tools/libxc/xc_vmx_build.c	Wed Mar 30 18:13:34 2005 +0000
    22.2 +++ b/tools/libxc/xc_vmx_build.c	Wed Mar 30 18:17:26 2005 +0000
    22.3 @@ -333,8 +333,7 @@ static int setup_guest(int xc_handle,
    22.4       * Pin down l2tab addr as page dir page - causes hypervisor to provide
    22.5       * correct protection for the page
    22.6       */ 
    22.7 -    if ( add_mmu_update(xc_handle, mmu,
    22.8 -                        l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
    22.9 +    if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
   22.10          goto error_out;
   22.11  
   22.12      if ((boot_paramsp = xc_map_foreign_range(
   22.13 @@ -612,10 +611,16 @@ int xc_vmx_build(int xc_handle,
   22.14      memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
   22.15  
   22.16      /* No callback handlers. */
   22.17 +#if defined(__i386__)
   22.18      ctxt->event_callback_cs     = FLAT_KERNEL_CS;
   22.19      ctxt->event_callback_eip    = 0;
   22.20      ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
   22.21      ctxt->failsafe_callback_eip = 0;
   22.22 +#elif defined(__x86_64__)
   22.23 +    ctxt->event_callback_eip    = 0;
   22.24 +    ctxt->failsafe_callback_eip = 0;
   22.25 +    ctxt->syscall_callback_eip  = 0;
   22.26 +#endif
   22.27  
   22.28      memset( &launch_op, 0, sizeof(launch_op) );
   22.29  
    23.1 --- a/xen/arch/x86/mm.c	Wed Mar 30 18:13:34 2005 +0000
    23.2 +++ b/xen/arch/x86/mm.c	Wed Mar 30 18:17:26 2005 +0000
    23.3 @@ -111,6 +111,13 @@
    23.4  #define MEM_LOG(_f, _a...) ((void)0)
    23.5  #endif
    23.6  
    23.7 +/*
    23.8 + * Both do_mmuext_op() and do_mmu_update():
    23.9 + * We steal the m.s.b. of the @count parameter to indicate whether this
   23.10 + * invocation of do_mmu_update() is resuming a previously preempted call.
   23.11 + */
   23.12 +#define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
   23.13 +
   23.14  static int alloc_l2_table(struct pfn_info *page);
   23.15  static int alloc_l1_table(struct pfn_info *page);
   23.16  static int get_page_from_pagenr(unsigned long page_nr, struct domain *d);
   23.17 @@ -128,7 +135,7 @@ static int mod_l1_entry(l1_pgentry_t *, 
   23.18  static struct {
   23.19  #define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
   23.20  #define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
   23.21 -    unsigned long  deferred_ops;
   23.22 +    unsigned int   deferred_ops;
   23.23      /* If non-NULL, specifies a foreign subject domain for some operations. */
   23.24      struct domain *foreign;
   23.25  } __cacheline_aligned percpu_info[NR_CPUS];
   23.26 @@ -199,12 +206,16 @@ void write_ptbase(struct exec_domain *ed
   23.27      write_cr3(pagetable_val(ed->arch.monitor_table));
   23.28  }
   23.29  
   23.30 -static void __invalidate_shadow_ldt(struct exec_domain *d)
   23.31 +
   23.32 +static inline void invalidate_shadow_ldt(struct exec_domain *d)
   23.33  {
   23.34      int i;
   23.35      unsigned long pfn;
   23.36      struct pfn_info *page;
   23.37      
   23.38 +    if ( d->arch.shadow_ldt_mapcnt == 0 )
   23.39 +        return;
   23.40 +
   23.41      d->arch.shadow_ldt_mapcnt = 0;
   23.42  
   23.43      for ( i = 16; i < 32; i++ )
   23.44 @@ -223,13 +234,6 @@ static void __invalidate_shadow_ldt(stru
   23.45  }
   23.46  
   23.47  
   23.48 -static inline void invalidate_shadow_ldt(struct exec_domain *d)
   23.49 -{
   23.50 -    if ( d->arch.shadow_ldt_mapcnt != 0 )
   23.51 -        __invalidate_shadow_ldt(d);
   23.52 -}
   23.53 -
   23.54 -
   23.55  static int alloc_segdesc_page(struct pfn_info *page)
   23.56  {
   23.57      struct desc_struct *descs;
   23.58 @@ -1251,401 +1255,409 @@ int new_guest_cr3(unsigned long pfn)
   23.59      return okay;
   23.60  }
   23.61  
   23.62 -static int do_extended_command(unsigned long ptr, unsigned long val)
   23.63 +static void process_deferred_ops(unsigned int cpu)
   23.64  {
   23.65 -    int okay = 1, cpu = smp_processor_id();
   23.66 -    unsigned int cmd = val & MMUEXT_CMD_MASK, type;
   23.67 -    unsigned long pfn = ptr >> PAGE_SHIFT;
   23.68 -    struct pfn_info *page = &frame_table[pfn];
   23.69 -    struct exec_domain *ed = current;
   23.70 -    struct domain *d = ed->domain, *e;
   23.71 -    u32 x, y, _d, _nd;
   23.72 -    domid_t domid;
   23.73 -    grant_ref_t gntref;
   23.74 -
   23.75 -    switch ( cmd )
   23.76 +    unsigned int deferred_ops;
   23.77 +
   23.78 +    deferred_ops = percpu_info[cpu].deferred_ops;
   23.79 +    percpu_info[cpu].deferred_ops = 0;
   23.80 +
   23.81 +    if ( deferred_ops & DOP_FLUSH_TLB )
   23.82 +        local_flush_tlb();
   23.83 +        
   23.84 +    if ( deferred_ops & DOP_RELOAD_LDT )
   23.85 +        (void)map_ldt_shadow_page(0);
   23.86 +
   23.87 +    if ( unlikely(percpu_info[cpu].foreign != NULL) )
   23.88      {
   23.89 -    case MMUEXT_PIN_L1_TABLE:
   23.90 -        /*
   23.91 -         * We insist that, if you pin an L1 page, it's the first thing that
   23.92 -         * you do to it. This is because we require the backptr to still be
   23.93 -         * mutable. This assumption seems safe.
   23.94 -         */
   23.95 -        type = PGT_l1_page_table | PGT_va_mutable;
   23.96 -
   23.97 -    pin_page:
   23.98 -        okay = get_page_and_type_from_pagenr(pfn, type, FOREIGNDOM);
   23.99 -        if ( unlikely(!okay) )
  23.100 +        put_domain(percpu_info[cpu].foreign);
  23.101 +        percpu_info[cpu].foreign = NULL;
  23.102 +    }
  23.103 +}
  23.104 +
  23.105 +static int set_foreigndom(unsigned int cpu, domid_t domid)
  23.106 +{
  23.107 +    struct domain *e, *d = current->domain;
  23.108 +    int okay = 1;
  23.109 +
  23.110 +    if ( (e = percpu_info[cpu].foreign) != NULL )
  23.111 +        put_domain(e);
  23.112 +    percpu_info[cpu].foreign = NULL;
  23.113 +    
  23.114 +    if ( domid == DOMID_SELF )
  23.115 +        goto out;
  23.116 +
  23.117 +    if ( !IS_PRIV(d) )
  23.118 +    {
  23.119 +        switch ( domid )
  23.120          {
  23.121 -            MEM_LOG("Error while pinning pfn %p", pfn);
  23.122 +        case DOMID_IO:
  23.123 +            get_knownalive_domain(dom_io);
  23.124 +            percpu_info[cpu].foreign = dom_io;
  23.125              break;
  23.126 -        }
  23.127 -
  23.128 -        if ( unlikely(test_and_set_bit(_PGT_pinned,
  23.129 -                                       &page->u.inuse.type_info)) )
  23.130 -        {
  23.131 -            MEM_LOG("Pfn %p already pinned", pfn);
  23.132 -            put_page_and_type(page);
  23.133 +        default:
  23.134 +            MEM_LOG("Dom %u cannot set foreign dom\n", d->id);
  23.135              okay = 0;
  23.136              break;
  23.137          }
  23.138 -
  23.139 -        break;
  23.140 -
  23.141 -    case MMUEXT_PIN_L2_TABLE:
  23.142 -        type = PGT_l2_page_table;
  23.143 -        goto pin_page;
  23.144 -
  23.145 -#ifdef __x86_64__
  23.146 -    case MMUEXT_PIN_L3_TABLE:
  23.147 -        type = PGT_l3_page_table;
  23.148 -        goto pin_page;
  23.149 -
  23.150 -    case MMUEXT_PIN_L4_TABLE:
  23.151 -        type = PGT_l4_page_table;
  23.152 -        goto pin_page;
  23.153 -#endif /* __x86_64__ */
  23.154 -
  23.155 -    case MMUEXT_UNPIN_TABLE:
  23.156 -        if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) )
  23.157 -        {
  23.158 -            MEM_LOG("Page %p bad domain (dom=%p)",
  23.159 -                    ptr, page_get_owner(page));
  23.160 -        }
  23.161 -        else if ( likely(test_and_clear_bit(_PGT_pinned, 
  23.162 -                                            &page->u.inuse.type_info)) )
  23.163 -        {
  23.164 -            put_page_and_type(page);
  23.165 -            put_page(page);
  23.166 -        }
  23.167 -        else
  23.168 -        {
  23.169 -            okay = 0;
  23.170 -            put_page(page);
  23.171 -            MEM_LOG("Pfn %p not pinned", pfn);
  23.172 -        }
  23.173 -        break;
  23.174 -
  23.175 -    case MMUEXT_NEW_BASEPTR:
  23.176 -        okay = new_guest_cr3(pfn);
  23.177 -        break;
  23.178 -        
  23.179 -#ifdef __x86_64__
  23.180 -    case MMUEXT_NEW_USER_BASEPTR:
  23.181 -        okay = get_page_and_type_from_pagenr(pfn, PGT_root_page_table, d);
  23.182 -        if ( unlikely(!okay) )
  23.183 -        {
  23.184 -            MEM_LOG("Error while installing new baseptr %p", pfn);
  23.185 -        }
  23.186 -        else
  23.187 -        {
  23.188 -            unsigned long old_pfn =
  23.189 -                pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
  23.190 -            ed->arch.guest_table_user = mk_pagetable(pfn << PAGE_SHIFT);
  23.191 -            if ( old_pfn != 0 )
  23.192 -                put_page_and_type(&frame_table[old_pfn]);
  23.193 -        }
  23.194 -        break;
  23.195 -#endif
  23.196 -        
  23.197 -    case MMUEXT_TLB_FLUSH:
  23.198 -        percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
  23.199 -        break;
  23.200 -    
  23.201 -    case MMUEXT_INVLPG:
  23.202 -        __flush_tlb_one(ptr);
  23.203 -        break;
  23.204 -
  23.205 -    case MMUEXT_FLUSH_CACHE:
  23.206 -        if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
  23.207 -        {
  23.208 -            MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
  23.209 -            okay = 0;
  23.210 -        }
  23.211 -        else
  23.212 -        {
  23.213 -            wbinvd();
  23.214 -        }
  23.215 -        break;
  23.216 -
  23.217 -    case MMUEXT_SET_LDT:
  23.218 +    }
  23.219 +    else
  23.220      {
  23.221 -        unsigned long ents = val >> MMUEXT_CMD_SHIFT;
  23.222 -        if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
  23.223 -             (ents > 8192) ||
  23.224 -             ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
  23.225 -             ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
  23.226 -        {
  23.227 -            okay = 0;
  23.228 -            MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents);
  23.229 -        }
  23.230 -        else if ( (ed->arch.ldt_ents != ents) || 
  23.231 -                  (ed->arch.ldt_base != ptr) )
  23.232 -        {
  23.233 -            invalidate_shadow_ldt(ed);
  23.234 -            ed->arch.ldt_base = ptr;
  23.235 -            ed->arch.ldt_ents = ents;
  23.236 -            load_LDT(ed);
  23.237 -            percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
  23.238 -            if ( ents != 0 )
  23.239 -                percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
  23.240 -        }
  23.241 -        break;
  23.242 -    }
  23.243 -
  23.244 -    case MMUEXT_SET_FOREIGNDOM:
  23.245 -        domid = (domid_t)(val >> 16);
  23.246 -
  23.247 -        if ( (e = percpu_info[cpu].foreign) != NULL )
  23.248 -            put_domain(e);
  23.249 -        percpu_info[cpu].foreign = NULL;
  23.250 -
  23.251 -        if ( !IS_PRIV(d) )
  23.252 +        percpu_info[cpu].foreign = e = find_domain_by_id(domid);
  23.253 +        if ( e == NULL )
  23.254          {
  23.255              switch ( domid )
  23.256              {
  23.257 +            case DOMID_XEN:
  23.258 +                get_knownalive_domain(dom_xen);
  23.259 +                percpu_info[cpu].foreign = dom_xen;
  23.260 +                break;
  23.261              case DOMID_IO:
  23.262                  get_knownalive_domain(dom_io);
  23.263                  percpu_info[cpu].foreign = dom_io;
  23.264                  break;
  23.265              default:
  23.266 -                MEM_LOG("Dom %u cannot set foreign dom\n", d->id);
  23.267 +                MEM_LOG("Unknown domain '%u'", domid);
  23.268                  okay = 0;
  23.269                  break;
  23.270              }
  23.271          }
  23.272 -        else
  23.273 +    }
  23.274 +
  23.275 + out:
  23.276 +    return okay;
  23.277 +}
  23.278 +
  23.279 +int do_mmuext_op(
  23.280 +    struct mmuext_op *uops,
  23.281 +    unsigned int count,
  23.282 +    unsigned int *pdone,
  23.283 +    unsigned int foreigndom)
  23.284 +{
  23.285 +    struct mmuext_op op;
  23.286 +    int rc = 0, i = 0, okay, cpu = smp_processor_id();
  23.287 +    unsigned int type, done = 0;
  23.288 +    struct pfn_info *page;
  23.289 +    struct exec_domain *ed = current;
  23.290 +    struct domain *d = ed->domain, *e;
  23.291 +    u32 x, y, _d, _nd;
  23.292 +
  23.293 +    LOCK_BIGLOCK(d);
  23.294 +
  23.295 +    cleanup_writable_pagetable(d);
  23.296 +
  23.297 +    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
  23.298 +    {
  23.299 +        count &= ~MMU_UPDATE_PREEMPTED;
  23.300 +        if ( unlikely(pdone != NULL) )
  23.301 +            (void)get_user(done, pdone);
  23.302 +    }
  23.303 +
  23.304 +    if ( !set_foreigndom(cpu, foreigndom) )
  23.305 +    {
  23.306 +        rc = -EINVAL;
  23.307 +        goto out;
  23.308 +    }
  23.309 +
  23.310 +    if ( unlikely(!array_access_ok(VERIFY_READ, uops, count, sizeof(op))) )
  23.311 +    {
  23.312 +        rc = -EFAULT;
  23.313 +        goto out;
  23.314 +    }
  23.315 +
  23.316 +    for ( i = 0; i < count; i++ )
  23.317 +    {
  23.318 +        if ( hypercall_preempt_check() )
  23.319          {
  23.320 -            percpu_info[cpu].foreign = e = find_domain_by_id(domid);
  23.321 -            if ( e == NULL )
  23.322 -            {
  23.323 -                switch ( domid )
  23.324 -                {
  23.325 -                case DOMID_XEN:
  23.326 -                    get_knownalive_domain(dom_xen);
  23.327 -                    percpu_info[cpu].foreign = dom_xen;
  23.328 -                    break;
  23.329 -                case DOMID_IO:
  23.330 -                    get_knownalive_domain(dom_io);
  23.331 -                    percpu_info[cpu].foreign = dom_io;
  23.332 -                    break;
  23.333 -                default:
  23.334 -                    MEM_LOG("Unknown domain '%u'", domid);
  23.335 -                    okay = 0;
  23.336 -                    break;
  23.337 -                }
  23.338 -            }
  23.339 -        }
  23.340 -        break;
  23.341 -
  23.342 -    case MMUEXT_TRANSFER_PAGE:
  23.343 -        domid  = (domid_t)(val >> 16);
  23.344 -        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
  23.345 -        
  23.346 -        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
  23.347 -             unlikely(!pfn_is_ram(pfn)) ||
  23.348 -             unlikely((e = find_domain_by_id(domid)) == NULL) )
  23.349 -        {
  23.350 -            MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
  23.351 -            okay = 0;
  23.352 +            rc = hypercall4_create_continuation(
  23.353 +                __HYPERVISOR_mmuext_op, uops,
  23.354 +                (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
  23.355              break;
  23.356          }
  23.357  
  23.358 -        spin_lock(&d->page_alloc_lock);
  23.359 -
  23.360 -        /*
  23.361 -         * The tricky bit: atomically release ownership while there is just one
  23.362 -         * benign reference to the page (PGC_allocated). If that reference
  23.363 -         * disappears then the deallocation routine will safely spin.
  23.364 -         */
  23.365 -        _d  = pickle_domptr(d);
  23.366 -        _nd = page->u.inuse._domain;
  23.367 -        y   = page->count_info;
  23.368 -        do {
  23.369 -            x = y;
  23.370 -            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
  23.371 -                          (1|PGC_allocated)) ||
  23.372 -                 unlikely(_nd != _d) )
  23.373 +        if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) )
  23.374 +        {
  23.375 +            MEM_LOG("Bad __copy_from_user");
  23.376 +            rc = -EFAULT;
  23.377 +            break;
  23.378 +        }
  23.379 +
  23.380 +        okay = 1;
  23.381 +        page = &frame_table[op.mfn];
  23.382 +
  23.383 +        switch ( op.cmd )
  23.384 +        {
  23.385 +        case MMUEXT_PIN_L1_TABLE:
  23.386 +            /*
  23.387 +             * We insist that, if you pin an L1 page, it's the first thing that
  23.388 +             * you do to it. This is because we require the backptr to still be
  23.389 +             * mutable. This assumption seems safe.
  23.390 +             */
  23.391 +            type = PGT_l1_page_table | PGT_va_mutable;
  23.392 +
  23.393 +        pin_page:
  23.394 +            okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
  23.395 +            if ( unlikely(!okay) )
  23.396              {
  23.397 -                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
  23.398 -                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
  23.399 -                        d, d->id, unpickle_domptr(_nd), x, 
  23.400 -                        page->u.inuse.type_info);
  23.401 -                spin_unlock(&d->page_alloc_lock);
  23.402 -                put_domain(e);
  23.403 -                return 0;
  23.404 +                MEM_LOG("Error while pinning MFN %p", op.mfn);
  23.405 +                break;
  23.406              }
  23.407 -            __asm__ __volatile__(
  23.408 -                LOCK_PREFIX "cmpxchg8b %2"
  23.409 -                : "=d" (_nd), "=a" (y),
  23.410 -                "=m" (*(volatile u64 *)(&page->count_info))
  23.411 -                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
  23.412 -        } 
  23.413 -        while ( unlikely(_nd != _d) || unlikely(y != x) );
  23.414 -
  23.415 -        /*
  23.416 -         * Unlink from 'd'. At least one reference remains (now anonymous), so
  23.417 -         * noone else is spinning to try to delete this page from 'd'.
  23.418 -         */
  23.419 -        d->tot_pages--;
  23.420 -        list_del(&page->list);
  23.421 +            
  23.422 +            if ( unlikely(test_and_set_bit(_PGT_pinned,
  23.423 +                                           &page->u.inuse.type_info)) )
  23.424 +            {
  23.425 +                MEM_LOG("MFN %p already pinned", op.mfn);
  23.426 +                put_page_and_type(page);
  23.427 +                okay = 0;
  23.428 +                break;
  23.429 +            }
  23.430 +            
  23.431 +            break;
  23.432 +
  23.433 +        case MMUEXT_PIN_L2_TABLE:
  23.434 +            type = PGT_l2_page_table;
  23.435 +            goto pin_page;
  23.436 +
  23.437 +#ifdef __x86_64__
  23.438 +        case MMUEXT_PIN_L3_TABLE:
  23.439 +            type = PGT_l3_page_table;
  23.440 +            goto pin_page;
  23.441 +
  23.442 +        case MMUEXT_PIN_L4_TABLE:
  23.443 +            type = PGT_l4_page_table;
  23.444 +            goto pin_page;
  23.445 +#endif /* __x86_64__ */
  23.446 +
  23.447 +        case MMUEXT_UNPIN_TABLE:
  23.448 +            if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
  23.449 +            {
  23.450 +                MEM_LOG("MFN %p bad domain (dom=%p)",
  23.451 +                        op.mfn, page_get_owner(page));
  23.452 +            }
  23.453 +            else if ( likely(test_and_clear_bit(_PGT_pinned, 
  23.454 +                                                &page->u.inuse.type_info)) )
  23.455 +            {
  23.456 +                put_page_and_type(page);
  23.457 +                put_page(page);
  23.458 +            }
  23.459 +            else
  23.460 +            {
  23.461 +                okay = 0;
  23.462 +                put_page(page);
  23.463 +                MEM_LOG("MFN %p not pinned", op.mfn);
  23.464 +            }
  23.465 +            break;
  23.466 +
  23.467 +        case MMUEXT_NEW_BASEPTR:
  23.468 +            okay = new_guest_cr3(op.mfn);
  23.469 +            break;
  23.470          
  23.471 -        spin_unlock(&d->page_alloc_lock);
  23.472 -
  23.473 -        spin_lock(&e->page_alloc_lock);
  23.474 -
  23.475 -        /*
  23.476 -         * Check that 'e' will accept the page and has reservation headroom.
  23.477 -         * Also, a domain mustn't have PGC_allocated pages when it is dying.
  23.478 -         */
  23.479 -        ASSERT(e->tot_pages <= e->max_pages);
  23.480 -        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
  23.481 -             unlikely(e->tot_pages == e->max_pages) ||
  23.482 -             unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
  23.483 +#ifdef __x86_64__
  23.484 +        case MMUEXT_NEW_USER_BASEPTR:
  23.485 +            okay = get_page_and_type_from_pagenr(
  23.486 +                op.mfn, PGT_root_page_table, d);
  23.487 +            if ( unlikely(!okay) )
  23.488 +            {
  23.489 +                MEM_LOG("Error while installing new MFN %p", op.mfn);
  23.490 +            }
  23.491 +            else
  23.492 +            {
  23.493 +                unsigned long old_mfn =
  23.494 +                    pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
  23.495 +                ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
  23.496 +                if ( old_mfn != 0 )
  23.497 +                    put_page_and_type(&frame_table[old_mfn]);
  23.498 +            }
  23.499 +            break;
  23.500 +#endif
  23.501 +        
  23.502 +        case MMUEXT_TLB_FLUSH_LOCAL:
  23.503 +            percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
  23.504 +            break;
  23.505 +    
  23.506 +        case MMUEXT_INVLPG_LOCAL:
  23.507 +            __flush_tlb_one(op.linear_addr);
  23.508 +            break;
  23.509 +
  23.510 +        case MMUEXT_TLB_FLUSH_MULTI:
  23.511 +            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
  23.512 +            break;
  23.513 +    
  23.514 +        case MMUEXT_INVLPG_MULTI:
  23.515 +            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
  23.516 +            break;
  23.517 +
  23.518 +        case MMUEXT_TLB_FLUSH_ALL:
  23.519 +            flush_tlb_mask(d->cpuset);
  23.520 +            break;
  23.521 +    
  23.522 +        case MMUEXT_INVLPG_ALL:
  23.523 +            flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
  23.524 +            break;
  23.525 +
  23.526 +        case MMUEXT_FLUSH_CACHE:
  23.527 +            if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
  23.528 +            {
  23.529 +                MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
  23.530 +                okay = 0;
  23.531 +            }
  23.532 +            else
  23.533 +            {
  23.534 +                wbinvd();
  23.535 +            }
  23.536 +            break;
  23.537 +
  23.538 +        case MMUEXT_SET_LDT:
  23.539          {
  23.540 -            MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
  23.541 -                    "provided a bad grant ref, or is dying (%p).\n",
  23.542 -                    e->tot_pages, e->max_pages, e->d_flags);
  23.543 +            unsigned long ptr  = op.linear_addr;
  23.544 +            unsigned long ents = op.nr_ents;
  23.545 +            if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
  23.546 +                 (ents > 8192) ||
  23.547 +                 ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
  23.548 +                 ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
  23.549 +            {
  23.550 +                okay = 0;
  23.551 +                MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents);
  23.552 +            }
  23.553 +            else if ( (ed->arch.ldt_ents != ents) || 
  23.554 +                      (ed->arch.ldt_base != ptr) )
  23.555 +            {
  23.556 +                invalidate_shadow_ldt(ed);
  23.557 +                ed->arch.ldt_base = ptr;
  23.558 +                ed->arch.ldt_ents = ents;
  23.559 +                load_LDT(ed);
  23.560 +                percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
  23.561 +                if ( ents != 0 )
  23.562 +                    percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
  23.563 +            }
  23.564 +            break;
  23.565 +        }
  23.566 +
  23.567 +        case MMUEXT_REASSIGN_PAGE:
  23.568 +            if ( unlikely(!IS_PRIV(d)) )
  23.569 +            {
  23.570 +                MEM_LOG("Dom %u has no reassignment priv", d->id);
  23.571 +                okay = 0;
  23.572 +                break;
  23.573 +            }
  23.574 +            
  23.575 +            e = percpu_info[cpu].foreign;
  23.576 +            if ( unlikely(e == NULL) )
  23.577 +            {
  23.578 +                MEM_LOG("No FOREIGNDOM to reassign MFN %p to", op.mfn);
  23.579 +                okay = 0;
  23.580 +                break;
  23.581 +            }
  23.582 +            
  23.583 +            /*
  23.584 +             * Grab both page_list locks, in order. This prevents the page from
  23.585 +             * disappearing elsewhere while we modify the owner, and we'll need
  23.586 +             * both locks if we're successful so that we can change lists.
  23.587 +             */
  23.588 +            if ( d < e )
  23.589 +            {
  23.590 +                spin_lock(&d->page_alloc_lock);
  23.591 +                spin_lock(&e->page_alloc_lock);
  23.592 +            }
  23.593 +            else
  23.594 +            {
  23.595 +                spin_lock(&e->page_alloc_lock);
  23.596 +                spin_lock(&d->page_alloc_lock);
  23.597 +            }
  23.598 +            
  23.599 +            /* A domain shouldn't have PGC_allocated pages when it is dying. */
  23.600 +            if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
  23.601 +                 unlikely(IS_XEN_HEAP_FRAME(page)) )
  23.602 +            {
  23.603 +                MEM_LOG("Reassign page is Xen heap, or dest dom is dying.");
  23.604 +                okay = 0;
  23.605 +                goto reassign_fail;
  23.606 +            }
  23.607 +
  23.608 +            /*
  23.609 +             * The tricky bit: atomically change owner while there is just one
  23.610 +             * benign reference to the page (PGC_allocated). If that reference
  23.611 +             * disappears then the deallocation routine will safely spin.
  23.612 +             */
  23.613 +            _d  = pickle_domptr(d);
  23.614 +            _nd = page->u.inuse._domain;
  23.615 +            y   = page->count_info;
  23.616 +            do {
  23.617 +                x = y;
  23.618 +                if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
  23.619 +                              (1|PGC_allocated)) ||
  23.620 +                     unlikely(_nd != _d) )
  23.621 +                {
  23.622 +                    MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
  23.623 +                            " caf=%08x, taf=%08x\n", page_to_pfn(page),
  23.624 +                            d, d->id, unpickle_domptr(_nd), x,
  23.625 +                            page->u.inuse.type_info);
  23.626 +                    okay = 0;
  23.627 +                    goto reassign_fail;
  23.628 +                }
  23.629 +                __asm__ __volatile__(
  23.630 +                    LOCK_PREFIX "cmpxchg8b %3"
  23.631 +                    : "=d" (_nd), "=a" (y), "=c" (e),
  23.632 +                    "=m" (*(volatile u64 *)(&page->count_info))
  23.633 +                    : "0" (_d), "1" (x), "c" (e), "b" (x) );
  23.634 +            } 
  23.635 +            while ( unlikely(_nd != _d) || unlikely(y != x) );
  23.636 +            
  23.637 +            /*
  23.638 +             * Unlink from 'd'. We transferred at least one reference to 'e',
  23.639 +             * so noone else is spinning to try to delete this page from 'd'.
  23.640 +             */
  23.641 +            d->tot_pages--;
  23.642 +            list_del(&page->list);
  23.643 +            
  23.644 +            /*
  23.645 +             * Add the page to 'e'. Someone may already have removed the last
  23.646 +             * reference and want to remove the page from 'e'. However, we have
  23.647 +             * the lock so they'll spin waiting for us.
  23.648 +             */
  23.649 +            if ( unlikely(e->tot_pages++ == 0) )
  23.650 +                get_knownalive_domain(e);
  23.651 +            list_add_tail(&page->list, &e->page_list);
  23.652 +            
  23.653 +        reassign_fail:        
  23.654 +            spin_unlock(&d->page_alloc_lock);
  23.655              spin_unlock(&e->page_alloc_lock);
  23.656 -            put_domain(e);
  23.657 +            break;
  23.658 +            
  23.659 +        default:
  23.660 +            MEM_LOG("Invalid extended pt command 0x%p", op.cmd);
  23.661              okay = 0;
  23.662              break;
  23.663          }
  23.664  
  23.665 -        /* Okay, add the page to 'e'. */
  23.666 -        if ( unlikely(e->tot_pages++ == 0) )
  23.667 -            get_knownalive_domain(e);
  23.668 -        list_add_tail(&page->list, &e->page_list);
  23.669 -        page_set_owner(page, e);
  23.670 -
  23.671 -        spin_unlock(&e->page_alloc_lock);
  23.672 -
  23.673 -        /* Transfer is all done: tell the guest about its new page frame. */
  23.674 -        gnttab_notify_transfer(e, gntref, pfn);
  23.675 -        
  23.676 -        put_domain(e);
  23.677 -        break;
  23.678 -
  23.679 -    case MMUEXT_REASSIGN_PAGE:
  23.680 -        if ( unlikely(!IS_PRIV(d)) )
  23.681 +        if ( unlikely(!okay) )
  23.682          {
  23.683 -            MEM_LOG("Dom %u has no reassignment priv", d->id);
  23.684 -            okay = 0;
  23.685 -            break;
  23.686 -        }
  23.687 -
  23.688 -        e = percpu_info[cpu].foreign;
  23.689 -        if ( unlikely(e == NULL) )
  23.690 -        {
  23.691 -            MEM_LOG("No FOREIGNDOM to reassign pfn %p to", pfn);
  23.692 -            okay = 0;
  23.693 +            rc = -EINVAL;
  23.694              break;
  23.695          }
  23.696  
  23.697 -        /*
  23.698 -         * Grab both page_list locks, in order. This prevents the page from
  23.699 -         * disappearing elsewhere while we modify the owner, and we'll need
  23.700 -         * both locks if we're successful so that we can change lists.
  23.701 -         */
  23.702 -        if ( d < e )
  23.703 -        {
  23.704 -            spin_lock(&d->page_alloc_lock);
  23.705 -            spin_lock(&e->page_alloc_lock);
  23.706 -        }
  23.707 -        else
  23.708 -        {
  23.709 -            spin_lock(&e->page_alloc_lock);
  23.710 -            spin_lock(&d->page_alloc_lock);
  23.711 -        }
  23.712 -
  23.713 -        /* A domain shouldn't have PGC_allocated pages when it is dying. */
  23.714 -        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
  23.715 -             unlikely(IS_XEN_HEAP_FRAME(page)) )
  23.716 -        {
  23.717 -            MEM_LOG("Reassignment page is Xen heap, or dest dom is dying.");
  23.718 -            okay = 0;
  23.719 -            goto reassign_fail;
  23.720 -        }
  23.721 -
  23.722 -        /*
  23.723 -         * The tricky bit: atomically change owner while there is just one
  23.724 -         * benign reference to the page (PGC_allocated). If that reference
  23.725 -         * disappears then the deallocation routine will safely spin.
  23.726 -         */
  23.727 -        _d  = pickle_domptr(d);
  23.728 -        _nd = page->u.inuse._domain;
  23.729 -        y   = page->count_info;
  23.730 -        do {
  23.731 -            x = y;
  23.732 -            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
  23.733 -                          (1|PGC_allocated)) ||
  23.734 -                 unlikely(_nd != _d) )
  23.735 -            {
  23.736 -                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
  23.737 -                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
  23.738 -                        d, d->id, unpickle_domptr(_nd), x,
  23.739 -                        page->u.inuse.type_info);
  23.740 -                okay = 0;
  23.741 -                goto reassign_fail;
  23.742 -            }
  23.743 -            __asm__ __volatile__(
  23.744 -                LOCK_PREFIX "cmpxchg8b %3"
  23.745 -                : "=d" (_nd), "=a" (y), "=c" (e),
  23.746 -                "=m" (*(volatile u64 *)(&page->count_info))
  23.747 -                : "0" (_d), "1" (x), "c" (e), "b" (x) );
  23.748 -        } 
  23.749 -        while ( unlikely(_nd != _d) || unlikely(y != x) );
  23.750 -        
  23.751 -        /*
  23.752 -         * Unlink from 'd'. We transferred at least one reference to 'e', so
  23.753 -         * noone else is spinning to try to delete this page from 'd'.
  23.754 -         */
  23.755 -        d->tot_pages--;
  23.756 -        list_del(&page->list);
  23.757 -        
  23.758 -        /*
  23.759 -         * Add the page to 'e'. Someone may already have removed the last
  23.760 -         * reference and want to remove the page from 'e'. However, we have
  23.761 -         * the lock so they'll spin waiting for us.
  23.762 -         */
  23.763 -        if ( unlikely(e->tot_pages++ == 0) )
  23.764 -            get_knownalive_domain(e);
  23.765 -        list_add_tail(&page->list, &e->page_list);
  23.766 -
  23.767 -    reassign_fail:        
  23.768 -        spin_unlock(&d->page_alloc_lock);
  23.769 -        spin_unlock(&e->page_alloc_lock);
  23.770 -        break;
  23.771 -
  23.772 -    case MMUEXT_CLEAR_FOREIGNDOM:
  23.773 -        if ( (e = percpu_info[cpu].foreign) != NULL )
  23.774 -            put_domain(e);
  23.775 -        percpu_info[cpu].foreign = NULL;
  23.776 -        break;
  23.777 -
  23.778 -    default:
  23.779 -        MEM_LOG("Invalid extended pt command 0x%p", val & MMUEXT_CMD_MASK);
  23.780 -        okay = 0;
  23.781 -        break;
  23.782 +        uops++;
  23.783      }
  23.784  
  23.785 -    return okay;
  23.786 + out:
  23.787 +    process_deferred_ops(cpu);
  23.788 +
  23.789 +    /* Add incremental work we have done to the @done output parameter. */
  23.790 +    if ( unlikely(pdone != NULL) )
  23.791 +        __put_user(done + i, pdone);
  23.792 +
  23.793 +    UNLOCK_BIGLOCK(d);
  23.794 +    return rc;
  23.795  }
  23.796  
  23.797  int do_mmu_update(
  23.798 -    mmu_update_t *ureqs, unsigned int count, unsigned int *pdone)
  23.799 +    mmu_update_t *ureqs,
  23.800 +    unsigned int count,
  23.801 +    unsigned int *pdone,
  23.802 +    unsigned int foreigndom)
  23.803  {
  23.804 -/*
  23.805 - * We steal the m.s.b. of the @count parameter to indicate whether this
  23.806 - * invocation of do_mmu_update() is resuming a previously preempted call.
  23.807 - * We steal the next 15 bits to remember the current FOREIGNDOM.
  23.808 - */
  23.809 -#define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
  23.810 -#define MMU_UPDATE_PREEMPT_FDOM_SHIFT ((sizeof(int)*8)-16)
  23.811 -#define MMU_UPDATE_PREEMPT_FDOM_MASK  (0x7FFFU<<MMU_UPDATE_PREEMPT_FDOM_SHIFT)
  23.812 -
  23.813      mmu_update_t req;
  23.814 -    unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
  23.815 +    unsigned long va = 0, pfn, prev_pfn = 0;
  23.816      struct pfn_info *page;
  23.817      int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
  23.818      unsigned int cmd, done = 0;
  23.819 @@ -1654,7 +1666,6 @@ int do_mmu_update(
  23.820      struct exec_domain *ed = current;
  23.821      struct domain *d = ed->domain;
  23.822      u32 type_info;
  23.823 -    domid_t domid;
  23.824  
  23.825      LOCK_BIGLOCK(d);
  23.826  
  23.827 @@ -1666,31 +1677,17 @@ int do_mmu_update(
  23.828      if ( unlikely(shadow_mode_translate(d)) )
  23.829          domain_crash_synchronous();
  23.830  
  23.831 -    /*
  23.832 -     * If we are resuming after preemption, read how much work we have already
  23.833 -     * done. This allows us to set the @done output parameter correctly.
  23.834 -     * We also reset FOREIGNDOM here.
  23.835 -     */
  23.836 -    if ( unlikely(count&(MMU_UPDATE_PREEMPTED|MMU_UPDATE_PREEMPT_FDOM_MASK)) )
  23.837 +    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
  23.838      {
  23.839 -        if ( !(count & MMU_UPDATE_PREEMPTED) )
  23.840 -        {
  23.841 -            /* Count overflow into private FOREIGNDOM field. */
  23.842 -            MEM_LOG("do_mmu_update count is too large");
  23.843 -            rc = -EINVAL;
  23.844 -            goto out;
  23.845 -        }
  23.846          count &= ~MMU_UPDATE_PREEMPTED;
  23.847 -        domid = count >> MMU_UPDATE_PREEMPT_FDOM_SHIFT;
  23.848 -        count &= ~MMU_UPDATE_PREEMPT_FDOM_MASK;
  23.849          if ( unlikely(pdone != NULL) )
  23.850              (void)get_user(done, pdone);
  23.851 -        if ( (domid != current->domain->id) &&
  23.852 -             !do_extended_command(0, MMUEXT_SET_FOREIGNDOM | (domid << 16)) )
  23.853 -        {
  23.854 -            rc = -EINVAL;
  23.855 -            goto out;
  23.856 -        }
  23.857 +    }
  23.858 +
  23.859 +    if ( !set_foreigndom(cpu, foreigndom) )
  23.860 +    {
  23.861 +        rc = -EINVAL;
  23.862 +        goto out;
  23.863      }
  23.864  
  23.865      perfc_incrc(calls_to_mmu_update); 
  23.866 @@ -1707,11 +1704,9 @@ int do_mmu_update(
  23.867      {
  23.868          if ( hypercall_preempt_check() )
  23.869          {
  23.870 -            rc = hypercall3_create_continuation(
  23.871 +            rc = hypercall4_create_continuation(
  23.872                  __HYPERVISOR_mmu_update, ureqs, 
  23.873 -                (count - i) |
  23.874 -                (FOREIGNDOM->id << MMU_UPDATE_PREEMPT_FDOM_SHIFT) | 
  23.875 -                MMU_UPDATE_PREEMPTED, pdone);
  23.876 +                (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
  23.877              break;
  23.878          }
  23.879  
  23.880 @@ -1863,15 +1858,6 @@ int do_mmu_update(
  23.881              put_page(&frame_table[pfn]);
  23.882              break;
  23.883  
  23.884 -            /*
  23.885 -             * MMU_EXTENDED_COMMAND: Extended command is specified
  23.886 -             * in the least-siginificant bits of the 'value' field.
  23.887 -             */
  23.888 -        case MMU_EXTENDED_COMMAND:
  23.889 -            req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
  23.890 -            okay = do_extended_command(req.ptr, req.val);
  23.891 -            break;
  23.892 -
  23.893          default:
  23.894              MEM_LOG("Invalid page update command %p", req.ptr);
  23.895              break;
  23.896 @@ -1893,20 +1879,7 @@ int do_mmu_update(
  23.897      if ( unlikely(prev_spl1e != 0) ) 
  23.898          unmap_domain_mem((void *)prev_spl1e);
  23.899  
  23.900 -    deferred_ops = percpu_info[cpu].deferred_ops;
  23.901 -    percpu_info[cpu].deferred_ops = 0;
  23.902 -
  23.903 -    if ( deferred_ops & DOP_FLUSH_TLB )
  23.904 -        local_flush_tlb();
  23.905 -        
  23.906 -    if ( deferred_ops & DOP_RELOAD_LDT )
  23.907 -        (void)map_ldt_shadow_page(0);
  23.908 -
  23.909 -    if ( unlikely(percpu_info[cpu].foreign != NULL) )
  23.910 -    {
  23.911 -        put_domain(percpu_info[cpu].foreign);
  23.912 -        percpu_info[cpu].foreign = NULL;
  23.913 -    }
  23.914 +    process_deferred_ops(cpu);
  23.915  
  23.916      /* Add incremental work we have done to the @done output parameter. */
  23.917      if ( unlikely(pdone != NULL) )
  23.918 @@ -2016,11 +1989,10 @@ int do_update_va_mapping(unsigned long v
  23.919                           unsigned long val, 
  23.920                           unsigned long flags)
  23.921  {
  23.922 -    struct exec_domain      *ed  = current;
  23.923 -    struct domain           *d   = ed->domain;
  23.924 -    unsigned int             cpu = ed->processor;
  23.925 -    unsigned long            deferred_ops;
  23.926 -    int                      rc = 0;
  23.927 +    struct exec_domain *ed  = current;
  23.928 +    struct domain      *d   = ed->domain;
  23.929 +    unsigned int        cpu = ed->processor;
  23.930 +    int                 rc = 0;
  23.931  
  23.932      perfc_incrc(calls_to_update_va);
  23.933  
  23.934 @@ -2046,17 +2018,25 @@ int do_update_va_mapping(unsigned long v
  23.935      if ( unlikely(shadow_mode_enabled(d)) )
  23.936          update_shadow_va_mapping(va, val, ed, d);
  23.937  
  23.938 -    deferred_ops = percpu_info[cpu].deferred_ops;
  23.939 -    percpu_info[cpu].deferred_ops = 0;
  23.940 -
  23.941 -    if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
  23.942 -         unlikely(flags & UVMF_FLUSH_TLB) )
  23.943 +    switch ( flags & UVMF_FLUSH_MASK )
  23.944 +    {
  23.945 +    case UVMF_TLB_FLUSH_LOCAL:
  23.946          local_flush_tlb();
  23.947 -    else if ( unlikely(flags & UVMF_INVLPG) )
  23.948 +        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
  23.949 +        break;
  23.950 +    case UVMF_TLB_FLUSH_ALL:
  23.951 +        flush_tlb_mask(d->cpuset);
  23.952 +        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
  23.953 +        break;
  23.954 +    case UVMF_INVLPG_LOCAL:
  23.955          __flush_tlb_one(va);
  23.956 -
  23.957 -    if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
  23.958 -        (void)map_ldt_shadow_page(0);
  23.959 +        break;
  23.960 +    case UVMF_INVLPG_ALL:
  23.961 +        flush_tlb_mask(d->cpuset); /* XXX KAF XXX */
  23.962 +        break;
  23.963 +    }
  23.964 +
  23.965 +    process_deferred_ops(cpu);
  23.966      
  23.967      UNLOCK_BIGLOCK(d);
  23.968  
  23.969 @@ -2084,9 +2064,6 @@ int do_update_va_mapping_otherdomain(uns
  23.970  
  23.971      rc = do_update_va_mapping(va, val, flags);
  23.972  
  23.973 -    put_domain(d);
  23.974 -    percpu_info[cpu].foreign = NULL;
  23.975 -
  23.976      return rc;
  23.977  }
  23.978  
  23.979 @@ -3176,6 +3153,97 @@ void audit_domains_key(unsigned char key
  23.980  
  23.981  #endif /* NDEBUG */
  23.982  
  23.983 +/* Graveyard: stuff below may be useful in future. */
  23.984 +#if 0
  23.985 +    case MMUEXT_TRANSFER_PAGE:
  23.986 +        domid  = (domid_t)(val >> 16);
  23.987 +        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
  23.988 +        
  23.989 +        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
  23.990 +             unlikely(!pfn_is_ram(pfn)) ||
  23.991 +             unlikely((e = find_domain_by_id(domid)) == NULL) )
  23.992 +        {
  23.993 +            MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
  23.994 +            okay = 0;
  23.995 +            break;
  23.996 +        }
  23.997 +
  23.998 +        spin_lock(&d->page_alloc_lock);
  23.999 +
 23.1000 +        /*
 23.1001 +         * The tricky bit: atomically release ownership while there is just one
 23.1002 +         * benign reference to the page (PGC_allocated). If that reference
 23.1003 +         * disappears then the deallocation routine will safely spin.
 23.1004 +         */
 23.1005 +        _d  = pickle_domptr(d);
 23.1006 +        _nd = page->u.inuse._domain;
 23.1007 +        y   = page->count_info;
 23.1008 +        do {
 23.1009 +            x = y;
 23.1010 +            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
 23.1011 +                          (1|PGC_allocated)) ||
 23.1012 +                 unlikely(_nd != _d) )
 23.1013 +            {
 23.1014 +                MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
 23.1015 +                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
 23.1016 +                        d, d->id, unpickle_domptr(_nd), x, 
 23.1017 +                        page->u.inuse.type_info);
 23.1018 +                spin_unlock(&d->page_alloc_lock);
 23.1019 +                put_domain(e);
 23.1020 +                return 0;
 23.1021 +            }
 23.1022 +            __asm__ __volatile__(
 23.1023 +                LOCK_PREFIX "cmpxchg8b %2"
 23.1024 +                : "=d" (_nd), "=a" (y),
 23.1025 +                "=m" (*(volatile u64 *)(&page->count_info))
 23.1026 +                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
 23.1027 +        } 
 23.1028 +        while ( unlikely(_nd != _d) || unlikely(y != x) );
 23.1029 +
 23.1030 +        /*
 23.1031 +         * Unlink from 'd'. At least one reference remains (now anonymous), so
 23.1032 +         * noone else is spinning to try to delete this page from 'd'.
 23.1033 +         */
 23.1034 +        d->tot_pages--;
 23.1035 +        list_del(&page->list);
 23.1036 +        
 23.1037 +        spin_unlock(&d->page_alloc_lock);
 23.1038 +
 23.1039 +        spin_lock(&e->page_alloc_lock);
 23.1040 +
 23.1041 +        /*
 23.1042 +         * Check that 'e' will accept the page and has reservation headroom.
 23.1043 +         * Also, a domain mustn't have PGC_allocated pages when it is dying.
 23.1044 +         */
 23.1045 +        ASSERT(e->tot_pages <= e->max_pages);
 23.1046 +        if ( unlikely(test_bit(DF_DYING, &e->d_flags)) ||
 23.1047 +             unlikely(e->tot_pages == e->max_pages) ||
 23.1048 +             unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
 23.1049 +        {
 23.1050 +            MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
 23.1051 +                    "provided a bad grant ref, or is dying (%p).\n",
 23.1052 +                    e->tot_pages, e->max_pages, e->d_flags);
 23.1053 +            spin_unlock(&e->page_alloc_lock);
 23.1054 +            put_domain(e);
 23.1055 +            okay = 0;
 23.1056 +            break;
 23.1057 +        }
 23.1058 +
 23.1059 +        /* Okay, add the page to 'e'. */
 23.1060 +        if ( unlikely(e->tot_pages++ == 0) )
 23.1061 +            get_knownalive_domain(e);
 23.1062 +        list_add_tail(&page->list, &e->page_list);
 23.1063 +        page_set_owner(page, e);
 23.1064 +
 23.1065 +        spin_unlock(&e->page_alloc_lock);
 23.1066 +
 23.1067 +        /* Transfer is all done: tell the guest about its new page frame. */
 23.1068 +        gnttab_notify_transfer(e, gntref, pfn);
 23.1069 +        
 23.1070 +        put_domain(e);
 23.1071 +        break;
 23.1072 +#endif
 23.1073 +
 23.1074  /*
 23.1075   * Local variables:
 23.1076   * mode: C
    24.1 --- a/xen/arch/x86/x86_32/entry.S	Wed Mar 30 18:13:34 2005 +0000
    24.2 +++ b/xen/arch/x86/x86_32/entry.S	Wed Mar 30 18:17:26 2005 +0000
    24.3 @@ -742,6 +742,8 @@ ENTRY(hypercall_table)
    24.4          .long SYMBOL_NAME(do_update_va_mapping_otherdomain)
    24.5          .long SYMBOL_NAME(do_switch_vm86)
    24.6          .long SYMBOL_NAME(do_boot_vcpu)
    24.7 +        .long SYMBOL_NAME(do_ni_hypercall)       /* 25 */
    24.8 +        .long SYMBOL_NAME(do_mmuext_op)
    24.9          .rept NR_hypercalls-((.-hypercall_table)/4)
   24.10          .long SYMBOL_NAME(do_ni_hypercall)
   24.11          .endr
    25.1 --- a/xen/arch/x86/x86_64/entry.S	Wed Mar 30 18:13:34 2005 +0000
    25.2 +++ b/xen/arch/x86/x86_64/entry.S	Wed Mar 30 18:17:26 2005 +0000
    25.3 @@ -449,6 +449,7 @@ ENTRY(hypercall_table)
    25.4          .quad SYMBOL_NAME(do_switch_to_user)
    25.5          .quad SYMBOL_NAME(do_boot_vcpu)
    25.6          .quad SYMBOL_NAME(do_set_segment_base)   /* 25 */
    25.7 +        .quad SYMBOL_NAME(do_mmuext_op)
    25.8          .rept NR_hypercalls-((.-hypercall_table)/4)
    25.9          .quad SYMBOL_NAME(do_ni_hypercall)
   25.10          .endr
    26.1 --- a/xen/include/public/xen.h	Wed Mar 30 18:13:34 2005 +0000
    26.2 +++ b/xen/include/public/xen.h	Wed Mar 30 18:17:26 2005 +0000
    26.3 @@ -58,6 +58,7 @@
    26.4  #define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
    26.5  #define __HYPERVISOR_boot_vcpu            24
    26.6  #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
    26.7 +#define __HYPERVISOR_mmuext_op            26
    26.8  
    26.9  /*
   26.10   * MULTICALLS
   26.11 @@ -86,14 +87,10 @@
   26.12   * MMU-UPDATE REQUESTS
   26.13   * 
   26.14   * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
   26.15 + * A foreigndom (FD) can be specified (or DOMID_SELF for none).
   26.16 + * Where the FD has some effect, it is described below.
   26.17   * ptr[1:0] specifies the appropriate MMU_* command.
   26.18   * 
   26.19 - * FOREIGN DOMAIN (FD)
   26.20 - * -------------------
   26.21 - *  Some commands recognise an explicitly-declared foreign domain,
   26.22 - *  in which case they will operate with respect to the foreigner rather than
   26.23 - *  the calling domain. Where the FD has some effect, it is described below.
   26.24 - * 
   26.25   * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
   26.26   * Updates an entry in a page table. If updating an L1 table, and the new
   26.27   * table entry is valid/present, the mapped frame must belong to the FD, if
   26.28 @@ -109,61 +106,58 @@
   26.29   * ptr[:2]  -- Machine address within the frame whose mapping to modify.
   26.30   *             The frame must belong to the FD, if one is specified.
   26.31   * val      -- Value to write into the mapping entry.
   26.32 - *  
   26.33 - * ptr[1:0] == MMU_EXTENDED_COMMAND:
   26.34 - * val[7:0] -- MMUEXT_* command.
   26.35 - * 
   26.36 - *   val[7:0] == MMUEXT_(UN)PIN_*_TABLE:
   26.37 - *   ptr[:2]  -- Machine address of frame to be (un)pinned as a p.t. page.
   26.38 - *               The frame must belong to the FD, if one is specified.
   26.39 - * 
   26.40 - *   val[7:0] == MMUEXT_NEW_BASEPTR:
   26.41 - *   ptr[:2]  -- Machine address of new page-table base to install in MMU.
   26.42 - * 
   26.43 - *   val[7:0] == MMUEXT_NEW_USER_BASEPTR: [x86/64 only]
   26.44 - *   ptr[:2]  -- Machine address of new page-table base to install in MMU
   26.45 - *               when in user space.
   26.46 - * 
   26.47 - *   val[7:0] == MMUEXT_TLB_FLUSH_LOCAL:
   26.48 - *   No additional arguments. Flushes local TLB.
   26.49 - * 
   26.50 - *   val[7:0] == MMUEXT_INVLPG_LOCAL:
   26.51 - *   ptr[:2]  -- Linear address to be flushed from the local TLB.
   26.52 - * 
   26.53 - *   val[7:0] == MMUEXT_FLUSH_CACHE:
   26.54 - *   No additional arguments. Writes back and flushes cache contents.
   26.55 - * 
   26.56 - *   val[7:0] == MMUEXT_SET_LDT:
   26.57 - *   ptr[:2]  -- Linear address of LDT base (NB. must be page-aligned).
   26.58 - *   val[:8]  -- Number of entries in LDT.
   26.59 - * 
   26.60 - *   val[7:0] == MMUEXT_TRANSFER_PAGE:
   26.61 - *   val[31:16] -- Domain to whom page is to be transferred.
   26.62 - *   (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table.
   26.63 - *   ptr[:12]  -- Page frame to be reassigned to the FD.
   26.64 - *                (NB. The frame must currently belong to the calling domain).
   26.65 - * 
   26.66 - *   val[7:0] == MMUEXT_SET_FOREIGNDOM:
   26.67 - *   val[31:16] -- Domain to set as the Foreign Domain (FD).
   26.68 - *                 (NB. DOMID_SELF is not recognised)
   26.69 - *                 If FD != DOMID_IO then the caller must be privileged.
   26.70 - * 
   26.71 - *   val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
   26.72 - *   Clears the FD.
   26.73 - * 
   26.74 - *   val[7:0] == MMUEXT_REASSIGN_PAGE:
   26.75 - *   ptr[:2]  -- A machine address within the page to be reassigned to the FD.
   26.76 - *               (NB. page must currently belong to the calling domain).
   26.77 - * 
   26.78 - *   val[7:0] == MMUEXT_TLB_FLUSH_MULTI:
   26.79 - *   Flush TLBs of VCPUs specified in @mask.
   26.80 - * 
   26.81 - *   val[7:0] == MMUEXT_INVLPG_MULTI:
   26.82 - *   ptr[:2]  -- Linear address to be flushed from TLB of VCPUs in @mask.
   26.83   */
   26.84  #define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
   26.85 -#define MMU_MACHPHYS_UPDATE      2 /* ptr = MA of frame to modify entry for  */
   26.86 -#define MMU_EXTENDED_COMMAND     3 /* least 8 bits of val demux further      */
   26.87 +#define MMU_MACHPHYS_UPDATE      1 /* ptr = MA of frame to modify entry for  */
   26.88 +
   26.89 +/*
   26.90 + * MMU EXTENDED OPERATIONS
   26.91 + * 
   26.92 + * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
   26.93 + * A foreigndom (FD) can be specified (or DOMID_SELF for none).
   26.94 + * Where the FD has some effect, it is described below.
   26.95 + * 
   26.96 + * cmd: MMUEXT_(UN)PIN_*_TABLE
   26.97 + * mfn: Machine frame number to be (un)pinned as a p.t. page.
   26.98 + *      The frame must belong to the FD, if one is specified.
   26.99 + * 
  26.100 + * cmd: MMUEXT_NEW_BASEPTR
  26.101 + * mfn: Machine frame number of new page-table base to install in MMU.
  26.102 + * 
  26.103 + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
  26.104 + * mfn: Machine frame number of new page-table base to install in MMU
  26.105 + *      when in user space.
  26.106 + * 
  26.107 + * cmd: MMUEXT_TLB_FLUSH_LOCAL
  26.108 + * No additional arguments. Flushes local TLB.
  26.109 + * 
  26.110 + * cmd: MMUEXT_INVLPG_LOCAL
  26.111 + * linear_addr: Linear address to be flushed from the local TLB.
  26.112 + * 
  26.113 + * cmd: MMUEXT_TLB_FLUSH_MULTI
  26.114 + * cpuset: Set of VCPUs to be flushed.
  26.115 + * 
  26.116 + * cmd: MMUEXT_INVLPG_MULTI
  26.117 + * linear_addr: Linear address to be flushed.
  26.118 + * cpuset: Set of VCPUs to be flushed.
  26.119 + * 
  26.120 + * cmd: MMUEXT_TLB_FLUSH_ALL
  26.121 + * No additional arguments. Flushes all VCPUs' TLBs.
  26.122 + * 
  26.123 + * cmd: MMUEXT_INVLPG_ALL
  26.124 + * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
  26.125 + * 
  26.126 + * cmd: MMUEXT_FLUSH_CACHE
  26.127 + * No additional arguments. Writes back and flushes cache contents.
  26.128 + * 
  26.129 + * cmd: MMUEXT_SET_LDT
  26.130 + * linear_addr: Linear address of LDT base (NB. must be page-aligned).
  26.131 + * nr_ents: Number of entries in LDT.
  26.132 + * 
  26.133 + * cmd: MMUEXT_REASSIGN_PAGE
  26.134 + * mfn: Machine frame number to be reassigned to the FD.
  26.135 + *      (NB. page must currently belong to the calling domain).
  26.136 + */
  26.137  #define MMUEXT_PIN_L1_TABLE      0 /* ptr = MA of frame to pin               */
  26.138  #define MMUEXT_PIN_L2_TABLE      1 /* ptr = MA of frame to pin               */
  26.139  #define MMUEXT_PIN_L3_TABLE      2 /* ptr = MA of frame to pin               */
  26.140 @@ -172,25 +166,39 @@
  26.141  #define MMUEXT_NEW_BASEPTR       5 /* ptr = MA of new pagetable base         */
  26.142  #define MMUEXT_TLB_FLUSH_LOCAL   6 /* ptr = NULL                             */
  26.143  #define MMUEXT_INVLPG_LOCAL      7 /* ptr = VA to invalidate                 */
  26.144 -#define MMUEXT_FLUSH_CACHE       8
  26.145 -#define MMUEXT_SET_LDT           9 /* ptr = VA of table; val = # entries     */
  26.146 -#define MMUEXT_SET_FOREIGNDOM   10 /* val[31:16] = dom                       */
  26.147 -#define MMUEXT_CLEAR_FOREIGNDOM 11
  26.148 -#define MMUEXT_TRANSFER_PAGE    12 /* ptr = MA of frame; val[31:16] = dom    */
  26.149 -#define MMUEXT_REASSIGN_PAGE    13
  26.150 -#define MMUEXT_NEW_USER_BASEPTR 14
  26.151 -#define MMUEXT_TLB_FLUSH_MULTI  15 /* ptr = NULL; mask = VCPUs to flush      */
  26.152 -#define MMUEXT_INVLPG_MULTI     16 /* ptr = VA to inval.; mask = VCPUs       */
  26.153 -#define MMUEXT_CMD_MASK        255
  26.154 -#define MMUEXT_CMD_SHIFT         8
  26.155 +#define MMUEXT_TLB_FLUSH_MULTI   8 /* ptr = NULL; mask = VCPUs to flush      */
  26.156 +#define MMUEXT_INVLPG_MULTI      9 /* ptr = VA to inval.; mask = VCPUs       */
  26.157 +#define MMUEXT_TLB_FLUSH_ALL    10
  26.158 +#define MMUEXT_INVLPG_ALL       11
  26.159 +#define MMUEXT_FLUSH_CACHE      12
  26.160 +#define MMUEXT_SET_LDT          13 /* ptr = VA of table; val = # entries     */
  26.161 +#define MMUEXT_REASSIGN_PAGE    14
  26.162 +#define MMUEXT_NEW_USER_BASEPTR 15
  26.163 +
  26.164 +#ifndef __ASSEMBLY__
  26.165 +struct mmuext_op {
  26.166 +    unsigned int cmd;
  26.167 +    union {
  26.168 +        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */
  26.169 +        memory_t mfn;
  26.170 +        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
  26.171 +        memory_t linear_addr;
  26.172 +    };
  26.173 +    union {
  26.174 +        /* SET_LDT */
  26.175 +        unsigned int nr_ents;
  26.176 +        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
  26.177 +        unsigned long cpuset;
  26.178 +    };
  26.179 +};
  26.180 +#endif
  26.181  
  26.182  /* These are passed as 'flags' to update_va_mapping. They can be ORed. */
  26.183 -#define UVMF_FLUSH_TLB          1 /* Flush entire TLB. */
  26.184 -#define UVMF_INVLPG             2 /* Flush the VA mapping being updated. */
  26.185 -
  26.186 -/* Backwards source compatibility. */
  26.187 -#define MMUEXT_TLB_FLUSH        MMUEXT_TLB_FLUSH_LOCAL
  26.188 -#define MMUEXT_INVLPG           MMUEXT_INVLPG_LOCAL
  26.189 +#define UVMF_TLB_FLUSH_LOCAL    1 /* Flush local CPU's TLB.          */
  26.190 +#define UVMF_INVLPG_LOCAL       2 /* Flush VA from local CPU's TLB.  */
  26.191 +#define UVMF_TLB_FLUSH_ALL      3 /* Flush all TLBs.                 */
  26.192 +#define UVMF_INVLPG_ALL         4 /* Flush VA from all TLBs.         */
  26.193 +#define UVMF_FLUSH_MASK         7
  26.194  
  26.195  /*
  26.196   * Commands to HYPERVISOR_sched_op().
  26.197 @@ -270,7 +278,6 @@ typedef struct
  26.198  {
  26.199      memory_t ptr;       /* Machine address of PTE. */
  26.200      memory_t val;       /* New contents of PTE.    */
  26.201 -    /*unsigned long mask;*/ /* VCPU mask (certain extended commands). */
  26.202  } PACKED mmu_update_t;
  26.203  
  26.204  /*