direct-io.hg

changeset 2340:34e52c54c854

bitkeeper revision 1.1159.1.101 (412b5ac2PQ9FDoJKc14Km1yEm114Rw)

Grant-table pin/unpin operation.
author kaf24@labyrinth.cl.cam.ac.uk
date Tue Aug 24 15:12:02 2004 +0000 (2004-08-24)
parents 7ffdab765137
children d21069f49572 9e3b9c9938cc
files xen/common/grant_table.c xen/include/asm-x86/system.h xen/include/hypervisor-ifs/grant_table.h
line diff
     1.1 --- a/xen/common/grant_table.c	Tue Aug 24 10:29:53 2004 +0000
     1.2 +++ b/xen/common/grant_table.c	Tue Aug 24 15:12:02 2004 +0000
     1.3 @@ -24,25 +24,63 @@
     1.4  #include <xen/config.h>
     1.5  #include <xen/sched.h>
     1.6  
     1.7 -#define update_shared_flags(x,y,z) (0)
     1.8 +static inline void
     1.9 +check_tlb_flush(
    1.10 +    active_grant_entry_t *a)
    1.11 +{
    1.12 +    if ( unlikely(NEED_FLUSH(tlbflush_time[smp_processor_id()],
    1.13 +                             a->tlbflush_timestamp)) )
    1.14 +    {
    1.15 +        perfc_incr(need_flush_tlb_flush);
    1.16 +        local_flush_tlb();
    1.17 +    }
    1.18 +}
    1.19  
    1.20 -static long gnttab_update_pin_status(gnttab_update_pin_status_t *uop)
    1.21 +static void
    1.22 +make_entry_mappable(
    1.23 +    grant_table_t *t, active_grant_entry_t *a)
    1.24 +{
    1.25 +    u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)];
    1.26 +    a->next = *ph;
    1.27 +    *ph = a - t->active;
    1.28 +}
    1.29 +
    1.30 +static void
    1.31 +make_entry_unmappable(
    1.32 +    grant_table_t *t, active_grant_entry_t *a)
    1.33 +{
    1.34 +    active_grant_entry_t *p;
    1.35 +    u16 *ph = &t->maphash[GNT_MAPHASH(a->frame)];
    1.36 +    while ( (p = &t->active[*ph]) != a )
    1.37 +        ph = &p->next;
    1.38 +    *ph = a->next;
    1.39 +    a->next = GNT_MAPHASH_INVALID;
    1.40 +    check_tlb_flush(a);
    1.41 +}
    1.42 +
    1.43 +static long
    1.44 +gnttab_update_pin_status(
    1.45 +    gnttab_update_pin_status_t *uop)
    1.46  {
    1.47      domid_t        dom, sdom;
    1.48      grant_ref_t    ref;
    1.49      u16            pin_flags;
    1.50      struct domain *ld, *rd;
    1.51 -    u32            sflags;
    1.52 +    u16            sflags, prev_sflags;
    1.53      active_grant_entry_t *act;
    1.54      grant_entry_t *sha;
    1.55      long           rc = 0;
    1.56  
    1.57      ld = current;
    1.58  
    1.59 -    if ( unlikely(__get_user(dom, &uop->dom)) || 
    1.60 -         unlikely(__get_user(ref, &uop->ref)) ||
    1.61 -         unlikely(__get_user(pin_flags, &uop->pin_flags)) )
    1.62 +    /* Bitwise-OR avoids short-circuiting which screws control flow. */
    1.63 +    if ( unlikely(__get_user(dom, &uop->dom) |
    1.64 +                  __get_user(ref, &uop->ref) |
    1.65 +                  __get_user(pin_flags, &uop->pin_flags)) )
    1.66 +    {
    1.67 +        DPRINTK("Fault while reading gnttab_update_pin_status_t.\n");
    1.68          return -EFAULT;
    1.69 +    }
    1.70  
    1.71      pin_flags &= (GNTPIN_dev_accessible | 
    1.72                    GNTPIN_host_accessible |
    1.73 @@ -50,10 +88,16 @@ static long gnttab_update_pin_status(gnt
    1.74  
    1.75      if ( unlikely(ref >= NR_GRANT_ENTRIES) || 
    1.76           unlikely(pin_flags == GNTPIN_readonly) )
    1.77 +    {
    1.78 +        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, pin_flags);
    1.79          return -EINVAL;
    1.80 +    }
    1.81  
    1.82      if ( unlikely((rd = find_domain_by_id(dom)) == NULL) )
    1.83 +    {
    1.84 +        DPRINTK("Could not find domain %d\n", dom);
    1.85          return -ESRCH;
    1.86 +    }
    1.87  
    1.88      act = &rd->grant_table->active[ref];
    1.89      sha = &rd->grant_table->shared[ref];
    1.90 @@ -63,79 +107,167 @@ static long gnttab_update_pin_status(gnt
    1.91          if ( unlikely(pin_flags == 0) )
    1.92              goto out;
    1.93  
    1.94 +        /* CASE 1: Activating a previously inactive entry. */
    1.95 +
    1.96          sflags = sha->flags;
    1.97          sdom   = sha->domid;
    1.98  
    1.99 -        do {
   1.100 +        for ( ; ; )
   1.101 +        {
   1.102 +            u32 scombo, prev_scombo;
   1.103 +
   1.104              if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
   1.105                   unlikely(sdom != ld->domain) )
   1.106              {
   1.107 +                DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   1.108 +                        sflags, sdom, ld->domain);
   1.109 +                rc = -EINVAL;
   1.110 +                goto out;
   1.111              }
   1.112 -        
   1.113 +
   1.114              sflags |= GTF_reading;
   1.115              if ( !(pin_flags & GNTPIN_readonly) )
   1.116              {
   1.117                  sflags |= GTF_writing;
   1.118                  if ( unlikely(sflags & GTF_readonly) )
   1.119                  {
   1.120 +                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
   1.121 +                    rc = -EINVAL;
   1.122 +                    goto out;
   1.123                  }
   1.124              }
   1.125 +
   1.126 +            /* Merge two 16-bit values into a 32-bit combined update. */
   1.127 +            /* NB. Endianness! */
   1.128 +            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
   1.129 +
   1.130 +            /* NB. prev_sflags is updated in place to seen value. */
   1.131 +            if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
   1.132 +                                       prev_scombo | GTF_writing)) )
   1.133 +            {
   1.134 +                DPRINTK("Fault while modifying shared flags and domid.\n");
   1.135 +                rc = -EINVAL;
   1.136 +                goto out;
   1.137 +            }
   1.138 +
   1.139 +            /* Did the combined update work (did we see what we expected?). */
   1.140 +            if ( prev_scombo == scombo )
   1.141 +                break;
   1.142 +
   1.143 +            /* Didn't see what we expected. Split out the seen flags & dom. */
   1.144 +            /* NB. Endianness! */
   1.145 +            sflags = (u16)prev_scombo;
   1.146 +            sdom   = (u16)(prev_scombo >> 16);
   1.147          }
   1.148 -        while ( !update_shared_flags(sha, sflags, sdom) );
   1.149 +
   1.150 +        /* rmb(); */ /* not on x86 */
   1.151  
   1.152          act->status = pin_flags;
   1.153          act->domid  = sdom;
   1.154 +        act->frame  = sha->frame;
   1.155  
   1.156 -        /* XXX MAP XXX */
   1.157 +        make_entry_mappable(rd->grant_table, act);
   1.158      }
   1.159      else if ( pin_flags == 0 )
   1.160      {
   1.161 +        /* CASE 2: Deactivating a previously active entry. */
   1.162 +
   1.163          if ( unlikely((act->status & 
   1.164                         (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) )
   1.165          {
   1.166 +            DPRINTK("Attempt to deactivate a mapped g.e. (%x)\n", act->status);
   1.167 +            rc = -EINVAL;
   1.168 +            goto out;
   1.169          }
   1.170  
   1.171 +        act->status = 0;
   1.172 +        make_entry_unmappable(rd->grant_table, act);
   1.173 +
   1.174          clear_bit(_GTF_writing, &sha->flags);
   1.175          clear_bit(_GTF_reading, &sha->flags);
   1.176 -
   1.177 -        act->status = 0;
   1.178 -
   1.179 -        /* XXX UNMAP XXX */
   1.180      }
   1.181      else 
   1.182      {
   1.183 +        /* CASE 3: Active modications to an already active entry. */
   1.184 +
   1.185 +        /*
   1.186 +         * Check mapping counts up front, as necessary.
   1.187 +         * After this compound check, the operation cannot fail.
   1.188 +         */
   1.189 +        if ( ((pin_flags & (GNTPIN_readonly|GNTPIN_host_accessible)) !=
   1.190 +              GNTPIN_host_accessible) &&
   1.191 +             (unlikely((act->status & GNTPIN_wmap_mask) != 0) ||
   1.192 +              (((pin_flags & GNTPIN_host_accessible) == 0) &&
   1.193 +               unlikely((act->status & GNTPIN_rmap_mask) != 0))) )
   1.194 +        {
   1.195 +            DPRINTK("Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
   1.196 +                    pin_flags, act->status);
   1.197 +            rc = -EINVAL;
   1.198 +            goto out;
   1.199 +        }
   1.200 +
   1.201 +        /* Check for changes to host accessibility. */
   1.202 +        if ( pin_flags & GNTPIN_host_accessible )
   1.203 +        {
   1.204 +            if ( !(act->status & GNTPIN_host_accessible) )
   1.205 +                make_entry_mappable(rd->grant_table, act);
   1.206 +        }
   1.207 +        else if ( act->status & GNTPIN_host_accessible )
   1.208 +            make_entry_unmappable(rd->grant_table, act);
   1.209 +
   1.210 +        /* Check for changes to write accessibility. */
   1.211          if ( pin_flags & GNTPIN_readonly )
   1.212          {
   1.213              if ( !(act->status & GNTPIN_readonly) )
   1.214              {
   1.215 +                check_tlb_flush(act);
   1.216 +                clear_bit(_GTF_writing, &sha->flags);
   1.217              }
   1.218          }
   1.219          else if ( act->status & GNTPIN_readonly )
   1.220          {
   1.221 +            sflags = sha->flags;
   1.222 +            do {
   1.223 +                prev_sflags = sflags;
   1.224 +
   1.225 +                if ( unlikely(prev_sflags & GTF_readonly) )
   1.226 +                {
   1.227 +                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
   1.228 +                    rc = -EINVAL;
   1.229 +                    goto out;
   1.230 +                }
   1.231 +                
   1.232 +                /* NB. prev_sflags is updated in place to seen value. */
   1.233 +                if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
   1.234 +                                           prev_sflags | GTF_writing)) )
   1.235 +                {
   1.236 +                    DPRINTK("Fault while modifying shared flags.\n");
   1.237 +                    rc = -EINVAL;
   1.238 +                    goto out;
   1.239 +                }
   1.240 +            }
   1.241 +            while ( prev_sflags != sflags );
   1.242          }
   1.243  
   1.244 -        if ( pin_flags & GNTPIN_host_accessible )
   1.245 -        {
   1.246 -            if ( !(act->status & GNTPIN_host_accessible) )
   1.247 -            {
   1.248 -                /* XXX MAP XXX */
   1.249 -            }
   1.250 -        }
   1.251 -        else if ( act->status & GNTPIN_host_accessible )
   1.252 -        {
   1.253 -            /* XXX UNMAP XXX */
   1.254 -        }
   1.255 +        /* Update status word -- this includes device accessibility. */
   1.256 +        act->status &= ~(GNTPIN_dev_accessible |
   1.257 +                         GNTPIN_host_accessible |
   1.258 +                         GNTPIN_readonly);
   1.259 +        act->status |= pin_flags;
   1.260 +    }
   1.261  
   1.262 -        act->status &= ~GNTPIN_dev_accessible;
   1.263 -        act->status |= pin_flags & GNTPIN_dev_accessible; 
   1.264 -    }
   1.265 +    /* Unchecked and unconditional. */
   1.266 +    (void)__put_user(act->frame, &uop->dev_bus_addr);
   1.267 +    (void)__put_user(act->frame, &uop->host_phys_addr);
   1.268  
   1.269   out:
   1.270      put_domain(rd);
   1.271      return rc;
   1.272  }
   1.273  
   1.274 -long do_grant_table_op(gnttab_op_t *uop)
   1.275 +long 
   1.276 +do_grant_table_op(
   1.277 +    gnttab_op_t *uop)
   1.278  {
   1.279      long rc;
   1.280      u32  cmd;
   1.281 @@ -157,7 +289,9 @@ long do_grant_table_op(gnttab_op_t *uop)
   1.282      return rc;
   1.283  }
   1.284  
   1.285 -int grant_table_create(struct domain *d)
   1.286 +int 
   1.287 +grant_table_create(
   1.288 +    struct domain *d)
   1.289  {
   1.290      grant_table_t *t;
   1.291      int            i;
   1.292 @@ -197,7 +331,9 @@ int grant_table_create(struct domain *d)
   1.293      return -ENOMEM;
   1.294  }
   1.295  
   1.296 -void grant_table_destroy(struct domain *d)
   1.297 +void
   1.298 +grant_table_destroy(
   1.299 +    struct domain *d)
   1.300  {
   1.301      grant_table_t *t;
   1.302  
   1.303 @@ -211,7 +347,9 @@ void grant_table_destroy(struct domain *
   1.304      }
   1.305  }
   1.306  
   1.307 -void grant_table_init(void)
   1.308 +void
   1.309 +grant_table_init(
   1.310 +    void)
   1.311  {
   1.312      /* Nothing. */
   1.313  }
     2.1 --- a/xen/include/asm-x86/system.h	Tue Aug 24 10:29:53 2004 +0000
     2.2 +++ b/xen/include/asm-x86/system.h	Tue Aug 24 15:12:02 2004 +0000
     2.3 @@ -11,14 +11,6 @@
     2.4  #define wbinvd() \
     2.5  	__asm__ __volatile__ ("wbinvd": : :"memory");
     2.6  
     2.7 -static inline unsigned long get_limit(unsigned long segment)
     2.8 -{
     2.9 -	unsigned long __limit;
    2.10 -	__asm__("lsll %1,%0"
    2.11 -		:"=r" (__limit):"r" (segment));
    2.12 -	return __limit+1;
    2.13 -}
    2.14 -
    2.15  #define nop() __asm__ __volatile__ ("nop")
    2.16  
    2.17  #define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
    2.18 @@ -32,7 +24,7 @@ struct __xchg_dummy { unsigned long a[10
    2.19   * Note 2: xchg has side effect, so that attribute volatile is necessary,
    2.20   *   but generally the primitive is invalid, *ptr is output argument. --ANK
    2.21   */
    2.22 -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
    2.23 +static always_inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
    2.24  {
    2.25  	switch (size) {
    2.26  		case 1:
    2.27 @@ -78,7 +70,7 @@ static inline unsigned long __xchg(unsig
    2.28   * indicated by comparing RETURN with OLD.
    2.29   */
    2.30  
    2.31 -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
    2.32 +static always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
    2.33  				      unsigned long new, int size)
    2.34  {
    2.35  	unsigned long prev;
    2.36 @@ -126,17 +118,14 @@ static inline unsigned long __cmpxchg(vo
    2.37  
    2.38  
    2.39  /*
    2.40 - * This function causes longword _o to be changed to _n at location _p.
    2.41 + * This function causes value _o to be changed to _n at location _p.
    2.42   * If this access causes a fault then we return 1, otherwise we return 0.
    2.43 - * If no fault occurs then _o is updated to teh value we saw at _p. If this
    2.44 + * If no fault occurs then _o is updated to the value we saw at _p. If this
    2.45   * is the same as the initial value of _o then _n is written to location _p.
    2.46   */
    2.47 -#ifdef __i386__
    2.48 -#define cmpxchg_user(_p,_o,_n)                                          \
    2.49 -({                                                                      \
    2.50 -    int _rc;                                                            \
    2.51 +#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
    2.52      __asm__ __volatile__ (                                              \
    2.53 -        "1: " LOCK_PREFIX "cmpxchg"__OS" %2,%3\n"                       \
    2.54 +        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
    2.55          "2:\n"                                                          \
    2.56          ".section .fixup,\"ax\"\n"                                      \
    2.57          "3:     movl $1,%1\n"                                           \
    2.58 @@ -147,12 +136,45 @@ static inline unsigned long __cmpxchg(vo
    2.59          "       .long 1b,3b\n"                                          \
    2.60          ".previous"                                                     \
    2.61          : "=a" (_o), "=r" (_rc)                                         \
    2.62 -        : "q" (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
    2.63 -        : "memory");                                                    \
    2.64 +        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
    2.65 +        : "memory");
    2.66 +#ifdef __i386__
    2.67 +#define cmpxchg_user(_p,_o,_n)                                          \
    2.68 +({                                                                      \
    2.69 +    int _rc;                                                            \
    2.70 +    switch ( sizeof(*(_p)) ) {                                          \
    2.71 +    case 1:                                                             \
    2.72 +        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
    2.73 +        break;                                                          \
    2.74 +    case 2:                                                             \
    2.75 +        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
    2.76 +        break;                                                          \
    2.77 +    case 4:                                                             \
    2.78 +        __cmpxchg_user(_p,_o,_n,"l","","r");                            \
    2.79 +        break;                                                          \
    2.80 +    }                                                                   \
    2.81      _rc;                                                                \
    2.82  })
    2.83  #else
    2.84 -#define cmpxchg_user(_p,_o,_n) ({ __asm__ __volatile__ ( "" : : "r" (_p), "r" (_o), "r" (_n) ); BUG(); 0; })
    2.85 +#define cmpxchg_user(_p,_o,_n)                                          \
    2.86 +({                                                                      \
    2.87 +    int _rc;                                                            \
    2.88 +    switch ( sizeof(*(_p)) ) {                                          \
    2.89 +    case 1:                                                             \
    2.90 +        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
    2.91 +        break;                                                          \
    2.92 +    case 2:                                                             \
    2.93 +        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
    2.94 +        break;                                                          \
    2.95 +    case 4:                                                             \
    2.96 +        __cmpxchg_user(_p,_o,_n,"l","k","r");                           \
    2.97 +        break;                                                          \
    2.98 +    case 8:                                                             \
    2.99 +        __cmpxchg_user(_p,_o,_n,"q","","r");                            \
   2.100 +        break;                                                          \
   2.101 +    }                                                                   \
   2.102 +    _rc;                                                                \
   2.103 +})
   2.104  #endif
   2.105  
   2.106  /*
     3.1 --- a/xen/include/hypervisor-ifs/grant_table.h	Tue Aug 24 10:29:53 2004 +0000
     3.2 +++ b/xen/include/hypervisor-ifs/grant_table.h	Tue Aug 24 15:12:02 2004 +0000
     3.3 @@ -16,10 +16,11 @@
     3.4   */
     3.5  
     3.6  /* Some rough guidelines on accessing and updating grant-table entries
     3.7 - * in a concurreny-safe manner. For more information, Linux contains a
     3.8 + * in a concurrency-safe manner. For more information, Linux contains a
     3.9   * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
    3.10   * 
    3.11 - * NB. WMB is a no-op on current-generation x86 processors.
    3.12 + * NB. WMB is a no-op on current-generation x86 processors. However, a
    3.13 + *     compiler barrier will still be required.
    3.14   * 
    3.15   * Introducing a valid entry into the grant table:
    3.16   *  1. Write ent->domid.
    3.17 @@ -31,11 +32,13 @@
    3.18   *  1. flags = ent->flags.
    3.19   *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
    3.20   *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
    3.21 - *  4. WMB.
    3.22 + *  NB. No need for WMB as reuse of entry is control-dependent on success of
    3.23 + *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
    3.24   * 
    3.25   * Removing an unused GTF_accept_transfer entry:
    3.26 - *  1. Clear ent->flags.
    3.27 - *  2. WMB.
    3.28 + *  1. Check result of SMP-safe CMPXCHG(&ent->frame, 0, <any non-zero value>).
    3.29 + *  2. Clear ent->flags.
    3.30 + *  3. WMB (ordering of step 2 vs. steps 1,2 of introducing a new entry).
    3.31   * 
    3.32   * Changing a GTF_permit_access from writable to read-only:
    3.33   *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.