direct-io.hg

changeset 5671:ec3b7c87b577

Here are two patches which update the hypercall interfaces to
use 64-bit values for both page table entries and physical
addresses. These changes are needed to use more than 4GB with
PAE paging enabled.

The first patch is a pretty straightforward update for xen, it
simply makes the values 64-bit wide everythere.

The second patch adapts the linux kernel to the hypercall
interface changes. It also introduces two MULTI_* functions
(for the update_va_mapping hypercalls) which have simliar
behavior like the HYPERVISOR_* counterparts but fill
multicall_entry_t instead of doing the call directly.

The tools don't need source code changes, but must be rebuilt
due to the change in the xen public header file.

Domain0 boots fine, unpriviliged domain boots fine with fully
functional networking. Note this is non-PAE mode, tools don't
have support for PAE domU boots yet.

Signed-off-by: Gerd Knorr <kraxel@suse.de>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jul 05 16:08:03 2005 +0000 (2005-07-05)
parents 47295d6ca1d7
children ffb406dfb170
files linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h xen/arch/x86/mm.c xen/include/public/dom0_ops.h xen/include/public/xen.h
line diff
     1.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Tue Jul 05 09:01:37 2005 +0000
     1.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Tue Jul 05 16:08:03 2005 +0000
     1.3 @@ -141,10 +141,8 @@ static void fast_flush_area(int idx, int
     1.4  
     1.5      for ( i = 0; i < nr_pages; i++ )
     1.6      {
     1.7 -        mcl[i].op = __HYPERVISOR_update_va_mapping;
     1.8 -        mcl[i].args[0] = MMAP_VADDR(idx, i);
     1.9 -        mcl[i].args[1] = 0;
    1.10 -        mcl[i].args[2] = 0;
    1.11 +	MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
    1.12 +				__pte(0), 0);
    1.13      }
    1.14  
    1.15      mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
    1.16 @@ -545,11 +543,10 @@ static void dispatch_rw_block_io(blkif_t
    1.17  
    1.18      for ( i = 0; i < nseg; i++ )
    1.19      {
    1.20 -        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
    1.21 -        mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
    1.22 -        mcl[i].args[1] = (seg[i].buf & PAGE_MASK) | remap_prot;
    1.23 -        mcl[i].args[2] = 0;
    1.24 -        mcl[i].args[3] = blkif->domid;
    1.25 +	MULTI_update_va_mapping_otherdomain(
    1.26 +	    mcl+i, MMAP_VADDR(pending_idx, i),
    1.27 +	    pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, remap_prot),
    1.28 +	    0, blkif->domid);
    1.29  #ifdef CONFIG_XEN_BLKDEV_TAP_BE
    1.30          if ( blkif->is_blktap )
    1.31              mcl[i].args[3] = ID_TO_DOM(req->id);
     2.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Tue Jul 05 09:01:37 2005 +0000
     2.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Tue Jul 05 16:08:03 2005 +0000
     2.3 @@ -234,11 +234,9 @@ static void net_rx_action(unsigned long 
     2.4           * Heed the comment in pgtable-2level.h:pte_page(). :-)
     2.5           */
     2.6          phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
     2.7 -        
     2.8 -        mcl->op = __HYPERVISOR_update_va_mapping;
     2.9 -        mcl->args[0] = vdata;
    2.10 -        mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
    2.11 -        mcl->args[2] = 0;
    2.12 +
    2.13 +        MULTI_update_va_mapping(mcl, vdata,
    2.14 +				pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
    2.15          mcl++;
    2.16  
    2.17          mcl->op = __HYPERVISOR_mmuext_op;
    2.18 @@ -425,10 +423,8 @@ static void net_tx_action(unsigned long 
    2.19      while ( dc != dp )
    2.20      {
    2.21          pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
    2.22 -        mcl[0].op = __HYPERVISOR_update_va_mapping;
    2.23 -        mcl[0].args[0] = MMAP_VADDR(pending_idx);
    2.24 -        mcl[0].args[1] = 0;
    2.25 -        mcl[0].args[2] = 0;
    2.26 +	MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
    2.27 +				__pte(0), 0);
    2.28          mcl++;     
    2.29      }
    2.30  
    2.31 @@ -571,11 +567,10 @@ static void net_tx_action(unsigned long 
    2.32          /* Packets passed to netif_rx() must have some headroom. */
    2.33          skb_reserve(skb, 16);
    2.34  
    2.35 -        mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
    2.36 -        mcl[0].args[0] = MMAP_VADDR(pending_idx);
    2.37 -        mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
    2.38 -        mcl[0].args[2] = 0;
    2.39 -        mcl[0].args[3] = netif->domid;
    2.40 +	MULTI_update_va_mapping_otherdomain(
    2.41 +	    mcl, MMAP_VADDR(pending_idx),
    2.42 +	    pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
    2.43 +	    0, netif->domid);
    2.44          mcl++;
    2.45  
    2.46          memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
     3.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue Jul 05 09:01:37 2005 +0000
     3.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Tue Jul 05 16:08:03 2005 +0000
     3.3 @@ -395,10 +395,8 @@ static void network_alloc_rx_buffers(str
     3.4  	phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
     3.5  	    = INVALID_P2M_ENTRY;
     3.6  
     3.7 -        rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
     3.8 -        rx_mcl[i].args[0] = (unsigned long)skb->head;
     3.9 -        rx_mcl[i].args[1] = 0;
    3.10 -        rx_mcl[i].args[2] = 0;
    3.11 +	MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
    3.12 +				__pte(0), 0);
    3.13      }
    3.14  
    3.15      /* After all PTEs have been zapped we blow away stale TLB entries. */
    3.16 @@ -585,10 +583,8 @@ static int netif_poll(struct net_device 
    3.17          mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
    3.18          mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
    3.19          mmu++;
    3.20 -        mcl->op = __HYPERVISOR_update_va_mapping;
    3.21 -        mcl->args[0] = (unsigned long)skb->head;
    3.22 -        mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
    3.23 -        mcl->args[2] = 0;
    3.24 +	MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
    3.25 +				pfn_pte_ma(rx->addr >> PAGE_SHIFT, PAGE_KERNEL), 0);
    3.26          mcl++;
    3.27  
    3.28          phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
     4.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Tue Jul 05 09:01:37 2005 +0000
     4.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Tue Jul 05 16:08:03 2005 +0000
     4.3 @@ -189,10 +189,8 @@ static void fast_flush_area(int idx, int
     4.4  
     4.5      for ( i = 0; i < nr_pages; i++ )
     4.6      {
     4.7 -        mcl[i].op = __HYPERVISOR_update_va_mapping;
     4.8 -        mcl[i].args[0] = MMAP_VADDR(idx, i);
     4.9 -        mcl[i].args[1] = 0;
    4.10 -        mcl[i].args[2] = 0;
    4.11 +	MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
    4.12 +				__pte(0), 0);
    4.13      }
    4.14  
    4.15      mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
    4.16 @@ -651,11 +649,10 @@ static void dispatch_usb_io(usbif_priv_t
    4.17      for ( i = 0, offset = 0; offset < req->length;
    4.18            i++, offset += PAGE_SIZE )
    4.19      {
    4.20 -	mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
    4.21 -	mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
    4.22 -        mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
    4.23 -        mcl[i].args[2] = 0;
    4.24 -        mcl[i].args[3] = up->domid;
    4.25 +	MULTI_update_va_mapping_otherdomain(
    4.26 +	    mcl+i, MMAP_VADDR(pending_idx, i),
    4.27 +	    pfn_pte_ma(buffer_mach >> PAGE_SHIFT, remap_prot),
    4.28 +	    0, up->domid);
    4.29          
    4.30          phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
    4.31              FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
    4.32 @@ -667,11 +664,10 @@ static void dispatch_usb_io(usbif_priv_t
    4.33      if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
    4.34      {
    4.35          /* Map in ISO schedule, if necessary. */
    4.36 -        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
    4.37 -        mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
    4.38 -        mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
    4.39 -        mcl[i].args[2] = 0;
    4.40 -        mcl[i].args[3] = up->domid;
    4.41 +	MULTI_update_va_mapping_otherdomain(
    4.42 +	    mcl+i, MMAP_VADDR(pending_idx, i),
    4.43 +	    pfn_pte_ma(req->iso_schedule >> PAGE_SHIFT, remap_prot),
    4.44 +	    0, up->domid);
    4.45  
    4.46          phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
    4.47              FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
     5.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Tue Jul 05 09:01:37 2005 +0000
     5.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Tue Jul 05 16:08:03 2005 +0000
     5.3 @@ -371,13 +371,19 @@ HYPERVISOR_update_va_mapping(
     5.4      unsigned long va, pte_t new_val, unsigned long flags)
     5.5  {
     5.6      int ret;
     5.7 -    unsigned long ign1, ign2, ign3;
     5.8 +    unsigned long ign1, ign2, ign3, ign4;
     5.9  
    5.10      __asm__ __volatile__ (
    5.11          TRAP_INSTR
    5.12 -        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
    5.13 +        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
    5.14  	: "0" (__HYPERVISOR_update_va_mapping), 
    5.15 -          "1" (va), "2" ((new_val).pte_low), "3" (flags)
    5.16 +          "1" (va), "2" ((new_val).pte_low),
    5.17 +#ifdef CONFIG_X86_PAE
    5.18 +	  "3" ((new_val).pte_high),
    5.19 +#else
    5.20 +	  "3" (0),
    5.21 +#endif
    5.22 +	  "4" (flags)
    5.23  	: "memory" );
    5.24  
    5.25      if ( unlikely(ret < 0) )
    5.26 @@ -473,13 +479,20 @@ HYPERVISOR_update_va_mapping_otherdomain
    5.27      unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
    5.28  {
    5.29      int ret;
    5.30 -    unsigned long ign1, ign2, ign3, ign4;
    5.31 +    unsigned long ign1, ign2, ign3, ign4, ign5;
    5.32  
    5.33      __asm__ __volatile__ (
    5.34          TRAP_INSTR
    5.35 -        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
    5.36 +        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3),
    5.37 +	  "=S" (ign4), "=D" (ign5)
    5.38  	: "0" (__HYPERVISOR_update_va_mapping_otherdomain),
    5.39 -          "1" (va), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
    5.40 +          "1" (va), "2" ((new_val).pte_low),
    5.41 +#ifdef CONFIG_X86_PAE
    5.42 +	  "3" ((new_val).pte_high),
    5.43 +#else
    5.44 +	  "3" (0),
    5.45 +#endif
    5.46 +	  "4" (flags), "5" (domid) :
    5.47          "memory" );
    5.48      
    5.49      return ret;
     6.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Tue Jul 05 09:01:37 2005 +0000
     6.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Tue Jul 05 16:08:03 2005 +0000
     6.3 @@ -139,4 +139,49 @@ unsigned long allocate_empty_lowmem_regi
     6.4  
     6.5  #include <asm/hypercall.h>
     6.6  
     6.7 +static inline void
     6.8 +MULTI_update_va_mapping(
     6.9 +    multicall_entry_t *mcl, unsigned long va,
    6.10 +    pte_t new_val, unsigned long flags)
    6.11 +{
    6.12 +    mcl->op = __HYPERVISOR_update_va_mapping;
    6.13 +    mcl->args[0] = va;
    6.14 +#if defined(CONFIG_X86_64)
    6.15 +    mcl->args[1] = new_val.pte;
    6.16 +    mcl->args[2] = flags;
    6.17 +#elif defined(CONFIG_X86_PAE)
    6.18 +    mcl->args[1] = new_val.pte_low;
    6.19 +    mcl->args[2] = new_val.pte_high;
    6.20 +    mcl->args[3] = flags;
    6.21 +#else
    6.22 +    mcl->args[1] = new_val.pte_low;
    6.23 +    mcl->args[2] = 0;
    6.24 +    mcl->args[3] = flags;
    6.25 +#endif
    6.26 +}
    6.27 +
    6.28 +static inline void
    6.29 +MULTI_update_va_mapping_otherdomain(
    6.30 +    multicall_entry_t *mcl, unsigned long va,
    6.31 +    pte_t new_val, unsigned long flags, domid_t domid)
    6.32 +{
    6.33 +    mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
    6.34 +    mcl->args[0] = va;
    6.35 +#if defined(CONFIG_X86_64)
    6.36 +    mcl->args[1] = new_val.pte;
    6.37 +    mcl->args[2] = flags;
    6.38 +    mcl->args[3] = domid;
    6.39 +#elif defined(CONFIG_X86_PAE)
    6.40 +    mcl->args[1] = new_val.pte_low;
    6.41 +    mcl->args[2] = new_val.pte_high;
    6.42 +    mcl->args[3] = flags;
    6.43 +    mcl->args[4] = domid;
    6.44 +#else
    6.45 +    mcl->args[1] = new_val.pte_low;
    6.46 +    mcl->args[2] = 0;
    6.47 +    mcl->args[3] = flags;
    6.48 +    mcl->args[4] = domid;
    6.49 +#endif
    6.50 +}
    6.51 +
    6.52  #endif /* __HYPERVISOR_H__ */
     7.1 --- a/xen/arch/x86/mm.c	Tue Jul 05 09:01:37 2005 +0000
     7.2 +++ b/xen/arch/x86/mm.c	Tue Jul 05 16:08:03 2005 +0000
     7.3 @@ -2020,7 +2020,8 @@ int do_mmu_update(
     7.4              }
     7.5  
     7.6              va = map_domain_page_with_cache(mfn, &mapcache);
     7.7 -            va = (void *)((unsigned long)va + (req.ptr & ~PAGE_MASK));
     7.8 +            va = (void *)((unsigned long)va +
     7.9 +                          (unsigned long)(req.ptr & ~PAGE_MASK));
    7.10              page = &frame_table[mfn];
    7.11  
    7.12              switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
    7.13 @@ -2164,7 +2165,7 @@ int do_mmu_update(
    7.14              break;
    7.15  
    7.16          default:
    7.17 -            MEM_LOG("Invalid page update command %lx", req.ptr);
    7.18 +            MEM_LOG("Invalid page update command %x", cmd);
    7.19              break;
    7.20          }
    7.21  
    7.22 @@ -2251,11 +2252,10 @@ int update_grant_va_mapping(unsigned lon
    7.23  }
    7.24  
    7.25  
    7.26 -int do_update_va_mapping(unsigned long va,
    7.27 -                         unsigned long val32,
    7.28 +int do_update_va_mapping(unsigned long va, u64 val64,
    7.29                           unsigned long flags)
    7.30  {
    7.31 -    l1_pgentry_t   val = l1e_from_intpte(val32);
    7.32 +    l1_pgentry_t   val = l1e_from_intpte(val64);
    7.33      struct vcpu   *v   = current;
    7.34      struct domain *d   = v->domain;
    7.35      unsigned int   cpu = v->processor;
    7.36 @@ -2349,8 +2349,7 @@ int do_update_va_mapping(unsigned long v
    7.37      return rc;
    7.38  }
    7.39  
    7.40 -int do_update_va_mapping_otherdomain(unsigned long va,
    7.41 -                                     unsigned long val32,
    7.42 +int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
    7.43                                       unsigned long flags,
    7.44                                       domid_t domid)
    7.45  {
    7.46 @@ -2368,7 +2367,7 @@ int do_update_va_mapping_otherdomain(uns
    7.47          return -ESRCH;
    7.48      }
    7.49  
    7.50 -    rc = do_update_va_mapping(va, val32, flags);
    7.51 +    rc = do_update_va_mapping(va, val64, flags);
    7.52  
    7.53      return rc;
    7.54  }
     8.1 --- a/xen/include/public/dom0_ops.h	Tue Jul 05 09:01:37 2005 +0000
     8.2 +++ b/xen/include/public/dom0_ops.h	Tue Jul 05 16:08:03 2005 +0000
     8.3 @@ -19,7 +19,7 @@
     8.4   * This makes sure that old versions of dom0 tools will stop working in a
     8.5   * well-defined way (rather than crashing the machine, for instance).
     8.6   */
     8.7 -#define DOM0_INTERFACE_VERSION   0xAAAA100A
     8.8 +#define DOM0_INTERFACE_VERSION   0xAAAA100B
     8.9  
    8.10  /************************************************************************/
    8.11  
     9.1 --- a/xen/include/public/xen.h	Tue Jul 05 09:01:37 2005 +0000
     9.2 +++ b/xen/include/public/xen.h	Tue Jul 05 16:08:03 2005 +0000
     9.3 @@ -274,8 +274,8 @@ typedef u16 domid_t;
     9.4   */
     9.5  typedef struct
     9.6  {
     9.7 -    memory_t ptr;       /* Machine address of PTE. */
     9.8 -    memory_t val;       /* New contents of PTE.    */
     9.9 +    u64 ptr;       /* Machine address of PTE. */
    9.10 +    u64 val;       /* New contents of PTE.    */
    9.11  } mmu_update_t;
    9.12  
    9.13  /*