ia64/xen-unstable

changeset 4421:09485b0bafad

bitkeeper revision 1.1236.1.184 (424d4a00y8MNt89B4nCZ8LKcrTcZUw)

Fix multi-VCPU TLB shootdown interface -- specify pointer to VCPU
bitmap, so it is read at time of flush and not before (which might be
too early, before all updates are flushed, leading to races). Also
add selective multi-VCPU shootdown capability to update_va_mapping()
and use this to make ptep_set_access_flags() a single hypercall.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 01 13:17:52 2005 +0000 (2005-04-01)
parents d1b9f9da04c2
children a6d955deec8e bc90a74f8571
files freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c linux-2.4.29-xen-sparse/mm/memory.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c xen/arch/x86/mm.c xen/include/public/xen.h
line diff
     1.1 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Fri Apr 01 10:04:58 2005 +0000
     1.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c	Fri Apr 01 13:17:52 2005 +0000
     1.3 @@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_p
     1.4      MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping;
     1.5      MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va;
     1.6      MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma;
     1.7 -    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL;
     1.8 +    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG|UVMF_LOCAL;
     1.9      mcl_increment_idx();
    1.10  }
    1.11  
     2.1 --- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c	Fri Apr 01 10:04:58 2005 +0000
     2.2 +++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c	Fri Apr 01 13:17:52 2005 +0000
     2.3 @@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
     2.4      PT_UPDATES_FLUSH();
     2.5  
     2.6      /* After all PTEs have been zapped we blow away stale TLB entries. */
     2.7 -    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
     2.8 +    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
     2.9  
    2.10      /* Give away a batch of pages. */
    2.11      xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
     3.1 --- a/linux-2.4.29-xen-sparse/mm/memory.c	Fri Apr 01 10:04:58 2005 +0000
     3.2 +++ b/linux-2.4.29-xen-sparse/mm/memory.c	Fri Apr 01 13:17:52 2005 +0000
     3.3 @@ -911,7 +911,7 @@ static inline void establish_pte(struct 
     3.4  {
     3.5  #ifdef CONFIG_XEN
     3.6  	if ( likely(vma->vm_mm == current->mm) ) {
     3.7 -		HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL);
     3.8 +		HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL);
     3.9  	} else {
    3.10  		set_pte(page_table, entry);
    3.11  		flush_tlb_page(vma, address);
     4.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Fri Apr 01 10:04:58 2005 +0000
     4.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Fri Apr 01 13:17:52 2005 +0000
     4.3 @@ -108,7 +108,7 @@ void xen_tlb_flush_mask(cpumask_t mask)
     4.4  {
     4.5      struct mmuext_op op;
     4.6      op.cmd = MMUEXT_TLB_FLUSH_MULTI;
     4.7 -    op.cpuset = mask.bits[0];
     4.8 +    op.cpuset = (unsigned long)mask.bits;
     4.9      BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    4.10  }
    4.11  
    4.12 @@ -124,7 +124,7 @@ void xen_invlpg_mask(cpumask_t mask, uns
    4.13  {
    4.14      struct mmuext_op op;
    4.15      op.cmd = MMUEXT_INVLPG_MULTI;
    4.16 -    op.cpuset = mask.bits[0];
    4.17 +    op.cpuset = (unsigned long)mask.bits;
    4.18      op.linear_addr = ptr & PAGE_MASK;
    4.19      BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    4.20  }
     5.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Fri Apr 01 10:04:58 2005 +0000
     5.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Fri Apr 01 13:17:52 2005 +0000
     5.3 @@ -111,7 +111,7 @@ static void fast_flush_area(int idx, int
     5.4          mcl[i].args[2] = 0;
     5.5      }
     5.6  
     5.7 -    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
     5.8 +    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     5.9      if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
    5.10          BUG();
    5.11  }
     6.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Fri Apr 01 10:04:58 2005 +0000
     6.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c	Fri Apr 01 13:17:52 2005 +0000
     6.3 @@ -270,7 +270,7 @@ static void net_rx_action(unsigned long 
     6.4      mcl->args[3] = DOMID_SELF;
     6.5      mcl++;
     6.6  
     6.7 -    mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL;
     6.8 +    mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     6.9      if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
    6.10          BUG();
    6.11  
    6.12 @@ -429,7 +429,7 @@ static void net_tx_action(unsigned long 
    6.13          mcl++;     
    6.14      }
    6.15  
    6.16 -    mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL;
    6.17 +    mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
    6.18      if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
    6.19          BUG();
    6.20  
     7.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Fri Apr 01 10:04:58 2005 +0000
     7.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c	Fri Apr 01 13:17:52 2005 +0000
     7.3 @@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(str
     7.4      }
     7.5  
     7.6      /* After all PTEs have been zapped we blow away stale TLB entries. */
     7.7 -    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL;
     7.8 +    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     7.9  
    7.10      /* Give away a batch of pages. */
    7.11      rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
     8.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Fri Apr 01 10:04:58 2005 +0000
     8.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c	Fri Apr 01 13:17:52 2005 +0000
     8.3 @@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int
     8.4          mcl[i].args[2] = 0;
     8.5      }
     8.6  
     8.7 -    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
     8.8 +    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     8.9      if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
    8.10          BUG();
    8.11  }
     9.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Fri Apr 01 10:04:58 2005 +0000
     9.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Fri Apr 01 13:17:52 2005 +0000
     9.3 @@ -407,8 +407,7 @@ extern void noexec_setup(const char *str
     9.4  	do {								  \
     9.5  		if (__dirty) {						  \
     9.6  		        if ( likely((__vma)->vm_mm == current->mm) ) {    \
     9.7 -			    HYPERVISOR_update_va_mapping((__address), (__entry), 0); \
     9.8 -			    flush_tlb_page((__vma), (__address));         \
     9.9 +			    HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
    9.10  			} else {                                          \
    9.11                              xen_l1_entry_update((__ptep), (__entry).pte_low); \
    9.12  			    flush_tlb_page((__vma), (__address));         \
    10.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c	Fri Apr 01 10:04:58 2005 +0000
    10.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c	Fri Apr 01 13:17:52 2005 +0000
    10.3 @@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_soft
    10.4  	xpq_flush_queue();
    10.5  
    10.6  	/* After all PTEs have been zapped we blow away stale TLB entries. */
    10.7 -	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
    10.8 +	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
    10.9  
   10.10  	/* Give away a batch of pages. */
   10.11  	rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
   10.12 @@ -681,7 +681,7 @@ xen_network_handler(void *arg)
   10.13  		mcl->op = __HYPERVISOR_update_va_mapping;
   10.14  		mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
   10.15  		mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
   10.16 -		mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0;
   10.17 +		mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0;
   10.18  		mcl++;
   10.19  
   10.20  		xpmap_phys_to_machine_mapping
   10.21 @@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_s
   10.22  	xpq_flush_queue();
   10.23  
   10.24  	/* After all PTEs have been zapped we blow away stale TLB entries. */
   10.25 -	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
   10.26 +	rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
   10.27  
   10.28  	/* Give away a batch of pages. */
   10.29  	rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
    11.1 --- a/xen/arch/x86/mm.c	Fri Apr 01 10:04:58 2005 +0000
    11.2 +++ b/xen/arch/x86/mm.c	Fri Apr 01 13:17:52 2005 +0000
    11.3 @@ -1329,6 +1329,25 @@ static int set_foreigndom(unsigned int c
    11.4      return okay;
    11.5  }
    11.6  
    11.7 +static inline unsigned long vcpuset_to_pcpuset(
    11.8 +    struct domain *d, unsigned long vset)
    11.9 +{
   11.10 +    unsigned int  vcpu;
   11.11 +    unsigned long pset = 0;
   11.12 +    struct exec_domain *ed;
   11.13 +
   11.14 +    while ( vset != 0 )
   11.15 +    {
   11.16 +        vcpu = find_first_set_bit(vset);
   11.17 +        vset &= ~(1UL << vcpu);
   11.18 +        if ( (vcpu < MAX_VIRT_CPUS) &&
   11.19 +             ((ed = d->exec_domain[vcpu]) != NULL) )
   11.20 +            pset |= 1UL << ed->processor;
   11.21 +    }
   11.22 +
   11.23 +    return pset;
   11.24 +}
   11.25 +
   11.26  int do_mmuext_op(
   11.27      struct mmuext_op *uops,
   11.28      unsigned int count,
   11.29 @@ -1478,19 +1497,17 @@ int do_mmuext_op(
   11.30          case MMUEXT_TLB_FLUSH_MULTI:
   11.31          case MMUEXT_INVLPG_MULTI:
   11.32          {
   11.33 -            unsigned long inset = op.cpuset, outset = 0;
   11.34 -            while ( inset != 0 )
   11.35 +            unsigned long vset, pset;
   11.36 +            if ( unlikely(get_user(vset, (unsigned long *)op.cpuset)) )
   11.37              {
   11.38 -                unsigned int vcpu = find_first_set_bit(inset);
   11.39 -                inset &= ~(1UL<<vcpu);
   11.40 -                if ( (vcpu < MAX_VIRT_CPUS) &&
   11.41 -                     ((ed = d->exec_domain[vcpu]) != NULL) )
   11.42 -                    outset |= 1UL << ed->processor;
   11.43 +                okay = 0;
   11.44 +                break;
   11.45              }
   11.46 +            pset = vcpuset_to_pcpuset(d, vset);
   11.47              if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
   11.48 -                flush_tlb_mask(outset & d->cpuset);
   11.49 +                flush_tlb_mask(pset & d->cpuset);
   11.50              else
   11.51 -                flush_tlb_one_mask(outset & d->cpuset, op.linear_addr);
   11.52 +                flush_tlb_one_mask(pset & d->cpuset, op.linear_addr);
   11.53              break;
   11.54          }
   11.55  
   11.56 @@ -1999,6 +2016,7 @@ int do_update_va_mapping(unsigned long v
   11.57      struct exec_domain *ed  = current;
   11.58      struct domain      *d   = ed->domain;
   11.59      unsigned int        cpu = ed->processor;
   11.60 +    unsigned long       vset, pset, bmap_ptr;
   11.61      int                 rc = 0;
   11.62  
   11.63      perfc_incrc(calls_to_update_va);
   11.64 @@ -2013,11 +2031,6 @@ int do_update_va_mapping(unsigned long v
   11.65  
   11.66      cleanup_writable_pagetable(d);
   11.67  
   11.68 -    /*
   11.69 -     * XXX When we make this support 4MB superpages we should also deal with 
   11.70 -     * the case of updating L2 entries.
   11.71 -     */
   11.72 -
   11.73      if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
   11.74                                  mk_l1_pgentry(val))) )
   11.75          rc = -EINVAL;
   11.76 @@ -2025,21 +2038,42 @@ int do_update_va_mapping(unsigned long v
   11.77      if ( unlikely(shadow_mode_enabled(d)) )
   11.78          update_shadow_va_mapping(va, val, ed, d);
   11.79  
   11.80 -    switch ( flags & UVMF_FLUSH_MASK )
   11.81 +    switch ( flags & UVMF_FLUSHTYPE_MASK )
   11.82      {
   11.83 -    case UVMF_TLB_FLUSH_LOCAL:
   11.84 -        local_flush_tlb();
   11.85 -        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
   11.86 +    case UVMF_TLB_FLUSH:
   11.87 +        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
   11.88 +        {
   11.89 +        case UVMF_LOCAL:
   11.90 +            local_flush_tlb();
   11.91 +            break;
   11.92 +        case UVMF_ALL:
   11.93 +            flush_tlb_mask(d->cpuset);
   11.94 +            break;
   11.95 +        default:
   11.96 +            if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) )
   11.97 +                rc = -EFAULT;
   11.98 +            pset = vcpuset_to_pcpuset(d, vset);
   11.99 +            flush_tlb_mask(pset & d->cpuset);
  11.100 +            break;
  11.101 +        }
  11.102          break;
  11.103 -    case UVMF_TLB_FLUSH_ALL:
  11.104 -        flush_tlb_mask(d->cpuset);
  11.105 -        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
  11.106 -        break;
  11.107 -    case UVMF_INVLPG_LOCAL:
  11.108 -        local_flush_tlb_one(va);
  11.109 -        break;
  11.110 -    case UVMF_INVLPG_ALL:
  11.111 -        flush_tlb_one_mask(d->cpuset, va);
  11.112 +
  11.113 +    case UVMF_INVLPG:
  11.114 +        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
  11.115 +        {
  11.116 +        case UVMF_LOCAL:
  11.117 +            local_flush_tlb_one(va);
  11.118 +            break;
  11.119 +        case UVMF_ALL:
  11.120 +            flush_tlb_one_mask(d->cpuset, va);
  11.121 +            break;
  11.122 +        default:
  11.123 +            if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) )
  11.124 +                rc = -EFAULT;
  11.125 +            pset = vcpuset_to_pcpuset(d, vset);
  11.126 +            flush_tlb_one_mask(pset & d->cpuset, va);
  11.127 +            break;
  11.128 +        }
  11.129          break;
  11.130      }
  11.131  
    12.1 --- a/xen/include/public/xen.h	Fri Apr 01 10:04:58 2005 +0000
    12.2 +++ b/xen/include/public/xen.h	Fri Apr 01 13:17:52 2005 +0000
    12.3 @@ -135,11 +135,11 @@
    12.4   * linear_addr: Linear address to be flushed from the local TLB.
    12.5   * 
    12.6   * cmd: MMUEXT_TLB_FLUSH_MULTI
    12.7 - * cpuset: Set of VCPUs to be flushed.
    12.8 + * cpuset: Pointer to bitmap of VCPUs to be flushed.
    12.9   * 
   12.10   * cmd: MMUEXT_INVLPG_MULTI
   12.11   * linear_addr: Linear address to be flushed.
   12.12 - * cpuset: Set of VCPUs to be flushed.
   12.13 + * cpuset: Pointer to bitmap of VCPUs to be flushed.
   12.14   * 
   12.15   * cmd: MMUEXT_TLB_FLUSH_ALL
   12.16   * No additional arguments. Flushes all VCPUs' TLBs.
   12.17 @@ -188,17 +188,21 @@ struct mmuext_op {
   12.18          /* SET_LDT */
   12.19          unsigned int nr_ents;
   12.20          /* TLB_FLUSH_MULTI, INVLPG_MULTI */
   12.21 -        unsigned long cpuset;
   12.22 +        void *cpuset;
   12.23      };
   12.24  };
   12.25  #endif
   12.26  
   12.27  /* These are passed as 'flags' to update_va_mapping. They can be ORed. */
   12.28 -#define UVMF_TLB_FLUSH_LOCAL    1 /* Flush local CPU's TLB.          */
   12.29 -#define UVMF_INVLPG_LOCAL       2 /* Flush VA from local CPU's TLB.  */
   12.30 -#define UVMF_TLB_FLUSH_ALL      3 /* Flush all TLBs.                 */
   12.31 -#define UVMF_INVLPG_ALL         4 /* Flush VA from all TLBs.         */
   12.32 -#define UVMF_FLUSH_MASK         7
   12.33 +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
   12.34 +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
   12.35 +#define UVMF_NONE               (0UL)    /* No flushing at all.   */
   12.36 +#define UVMF_TLB_FLUSH          (1UL<<0) /* Flush entire TLB(s).  */
   12.37 +#define UVMF_INVLPG             (2UL<<0) /* Flush only one entry. */
   12.38 +#define UVMF_FLUSHTYPE_MASK     (3UL<<0)
   12.39 +#define UVMF_MULTI              (0UL<<1) /* Flush subset of TLBs. */
   12.40 +#define UVMF_LOCAL              (0UL<<2) /* Flush local TLB.      */
   12.41 +#define UVMF_ALL                (1UL<<2) /* Flush all TLBs.       */
   12.42  
   12.43  /*
   12.44   * Commands to HYPERVISOR_sched_op().