ia64/xen-unstable

changeset 2347:950fc272bee9

bitkeeper revision 1.1159.1.106 (412dec2dJdF7UJz-ddgHVVOekKtAHQ)

Completed first cut of Xen support for grant tables.
The device drivers now need modifying to use them.
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Aug 26 13:57:01 2004 +0000 (2004-08-26)
parents f2b75edc9ce1
children 4cbe16ee99d3 ce1a57f8737a
files xen/arch/x86/memory.c xen/common/grant_table.c xen/include/hypervisor-ifs/grant_table.h xen/include/hypervisor-ifs/hypervisor-if.h xen/include/xen/grant_table.h
line diff
     1.1 --- a/xen/arch/x86/memory.c	Wed Aug 25 16:26:15 2004 +0000
     1.2 +++ b/xen/arch/x86/memory.c	Thu Aug 26 13:57:01 2004 +0000
     1.3 @@ -443,7 +443,8 @@ get_page_from_l1e(
     1.4          if ( unlikely((count_info & PGC_count_mask) == 0) ||
     1.5               unlikely(e == NULL) || unlikely(!get_domain(e)) )
     1.6               return 0;
     1.7 -        rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW);
     1.8 +        rc = gnttab_try_map(
     1.9 +            e, d, pfn, (l1v & _PAGE_RW) ? GNTTAB_MAP_RW : GNTTAB_MAP_RO);
    1.10          put_domain(e);
    1.11          return rc;
    1.12      }
    1.13 @@ -484,11 +485,12 @@ get_page_from_l2e(
    1.14  
    1.15  static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
    1.16  {
    1.17 -    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
    1.18      unsigned long    l1v  = l1_pgentry_val(l1e);
    1.19 +    unsigned long    pfn  = l1_pgentry_to_pagenr(l1e);
    1.20 +    struct pfn_info *page = &frame_table[pfn];
    1.21      struct domain   *e = page->u.inuse.domain;
    1.22  
    1.23 -    if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
    1.24 +    if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) )
    1.25          return;
    1.26  
    1.27      if ( unlikely(e != d) )
    1.28 @@ -504,7 +506,8 @@ static void put_page_from_l1e(l1_pgentry
    1.29           * mappings and which unmappings are counted via the grant entry, but
    1.30           * really it doesn't matter as privileged domains have carte blanche.
    1.31           */
    1.32 -        if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) )
    1.33 +        if ( likely(gnttab_try_map(e, d, pfn, (l1v & _PAGE_RW) ? 
    1.34 +                                   GNTTAB_UNMAP_RW : GNTTAB_UNMAP_RO)) )
    1.35              return;
    1.36          /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
    1.37      }
    1.38 @@ -824,6 +827,7 @@ static int do_extended_command(unsigned 
    1.39      struct domain *d = current, *nd, *e;
    1.40      u32 x, y;
    1.41      domid_t domid;
    1.42 +    grant_ref_t gntref;
    1.43  
    1.44      switch ( cmd )
    1.45      {
    1.46 @@ -978,6 +982,88 @@ static int do_extended_command(unsigned 
    1.47          }
    1.48          break;
    1.49  
    1.50 +    case MMUEXT_TRANSFER_PAGE:
    1.51 +        domid  = (domid_t)(val >> 16);
    1.52 +        gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
    1.53 +        
    1.54 +        if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
    1.55 +             unlikely(!pfn_is_ram(pfn)) ||
    1.56 +             unlikely((e = find_domain_by_id(domid)) == NULL) )
    1.57 +        {
    1.58 +            MEM_LOG("Bad frame (%08lx) or bad domid (%d).\n", pfn, domid);
    1.59 +            okay = 0;
    1.60 +            break;
    1.61 +        }
    1.62 +
    1.63 +        spin_lock(&d->page_alloc_lock);
    1.64 +
    1.65 +        /*
    1.66 +         * The tricky bit: atomically release ownership while there is just one
    1.67 +         * benign reference to the page (PGC_allocated). If that reference
    1.68 +         * disappears then the deallocation routine will safely spin.
    1.69 +         */
    1.70 +        nd = page->u.inuse.domain;
    1.71 +        y  = page->count_info;
    1.72 +        do {
    1.73 +            x = y;
    1.74 +            if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != 
    1.75 +                          (1|PGC_allocated)) ||
    1.76 +                 unlikely(nd != d) )
    1.77 +            {
    1.78 +                MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
    1.79 +                        " caf=%08x, taf=%08x\n", page_to_pfn(page),
    1.80 +                        d, d->domain, nd, x, page->u.inuse.type_info);
    1.81 +                spin_unlock(&d->page_alloc_lock);
    1.82 +                put_domain(e);
    1.83 +                okay = 0;
    1.84 +                break;
    1.85 +            }
    1.86 +            __asm__ __volatile__(
    1.87 +                LOCK_PREFIX "cmpxchg8b %2"
    1.88 +                : "=d" (nd), "=a" (y),
    1.89 +                "=m" (*(volatile u64 *)(&page->count_info))
    1.90 +                : "0" (d), "1" (x), "c" (NULL), "b" (x) );
    1.91 +        } 
    1.92 +        while ( unlikely(nd != d) || unlikely(y != x) );
    1.93 +
    1.94 +        /*
    1.95 +         * Unlink from 'd'. At least one reference remains (now anonymous), so
    1.96 +         * noone else is spinning to try to delete this page from 'd'.
    1.97 +         */
    1.98 +        d->tot_pages--;
    1.99 +        list_del(&page->list);
   1.100 +        
   1.101 +        spin_unlock(&d->page_alloc_lock);
   1.102 +
   1.103 +        spin_lock(&e->page_alloc_lock);
   1.104 +
   1.105 +        /* Check that 'e' will accept the page and has reservation headroom. */
   1.106 +        ASSERT(e->tot_pages <= e->max_pages);
   1.107 +        if ( unlikely(e->tot_pages == e->max_pages) ||
   1.108 +             unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
   1.109 +        {
   1.110 +            MEM_LOG("Transferee has no reservation headroom (%ld,%ld), or "
   1.111 +                    "provided a bad grant ref.\n", e->tot_pages, e->max_pages);
   1.112 +            spin_unlock(&e->page_alloc_lock);
   1.113 +            put_domain(e);
   1.114 +            okay = 0;
   1.115 +            break;
   1.116 +        }
   1.117 +
   1.118 +        /* Okay, add the page to 'e'. */
   1.119 +        if ( unlikely(e->tot_pages++ == 0) )
   1.120 +            get_knownalive_domain(e);
   1.121 +        list_add_tail(&page->list, &e->page_list);
   1.122 +        page->u.inuse.domain = e;
   1.123 +
   1.124 +        spin_unlock(&e->page_alloc_lock);
   1.125 +
   1.126 +        /* Transfer is all done: tell the guest about its new page frame. */
   1.127 +        gnttab_notify_transfer(e, gntref, pfn);
   1.128 +        
   1.129 +        put_domain(e);
   1.130 +        break;
   1.131 +
   1.132      case MMUEXT_REASSIGN_PAGE:
   1.133          if ( unlikely(!IS_PRIV(d)) )
   1.134          {
     2.1 --- a/xen/common/grant_table.c	Wed Aug 25 16:26:15 2004 +0000
     2.2 +++ b/xen/common/grant_table.c	Thu Aug 26 13:57:01 2004 +0000
     2.3 @@ -73,12 +73,22 @@ gnttab_update_pin_status(
     2.4      grant_ref_t    ref;
     2.5      u16            pin_flags;
     2.6      struct domain *ld, *rd;
     2.7 -    u16            sflags, prev_sflags;
     2.8 +    u16            sflags;
     2.9      active_grant_entry_t *act;
    2.10      grant_entry_t *sha;
    2.11      long           rc = 0;
    2.12      unsigned long  frame;
    2.13  
    2.14 +    /*
    2.15 +     * We bound the number of times we retry CMPXCHG on memory locations
    2.16 +     * that we share with a guest OS. The reason is that the guest can modify
    2.17 +     * that location at a higher rate than we can read-modify-CMPXCHG, so
    2.18 +     * the guest could cause us to livelock. There are a few cases
    2.19 +     * where it is valid for the guest to race our updates (e.g., to change
    2.20 +     * the GTF_readonly flag), so we allow a few retries before failing.
    2.21 +     */
    2.22 +    int            retries = 0;
    2.23 +
    2.24      ld = current;
    2.25  
    2.26      /* Bitwise-OR avoids short-circuiting which screws control flow. */
    2.27 @@ -127,7 +137,7 @@ gnttab_update_pin_status(
    2.28  
    2.29          for ( ; ; )
    2.30          {
    2.31 -            u32 scombo, prev_scombo;
    2.32 +            u32 scombo, prev_scombo, new_scombo;
    2.33  
    2.34              if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
    2.35                   unlikely(sdom != ld->domain) )
    2.36 @@ -135,29 +145,34 @@ gnttab_update_pin_status(
    2.37                           "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
    2.38                          sflags, sdom, ld->domain);
    2.39  
    2.40 -            sflags |= GTF_reading;
    2.41 +            /* Merge two 16-bit values into a 32-bit combined update. */
    2.42 +            /* NB. Endianness! */
    2.43 +            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
    2.44 +
    2.45 +            new_scombo = scombo | GTF_reading;
    2.46              if ( !(pin_flags & GNTPIN_readonly) )
    2.47              {
    2.48 -                sflags |= GTF_writing;
    2.49 +                new_scombo |= GTF_writing;
    2.50                  if ( unlikely(sflags & GTF_readonly) )
    2.51                      PIN_FAIL(EINVAL,
    2.52                               "Attempt to write-pin a r/o grant entry.\n");
    2.53              }
    2.54  
    2.55 -            /* Merge two 16-bit values into a 32-bit combined update. */
    2.56 -            /* NB. Endianness! */
    2.57 -            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
    2.58 -
    2.59 -            /* NB. prev_sflags is updated in place to seen value. */
    2.60 -            if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
    2.61 -                                       prev_scombo | GTF_writing)) )
    2.62 +            /* NB. prev_scombo is updated in place to seen value. */
    2.63 +            if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
    2.64 +                                       prev_scombo, 
    2.65 +                                       new_scombo)) )
    2.66                  PIN_FAIL(EINVAL,
    2.67                           "Fault while modifying shared flags and domid.\n");
    2.68  
    2.69              /* Did the combined update work (did we see what we expected?). */
    2.70 -            if ( prev_scombo == scombo )
    2.71 +            if ( likely(prev_scombo == scombo) )
    2.72                  break;
    2.73  
    2.74 +            if ( retries++ == 4 )
    2.75 +                PIN_FAIL(EINVAL,
    2.76 +                         "Shared grant entry is unstable.\n");
    2.77 +
    2.78              /* Didn't see what we expected. Split out the seen flags & dom. */
    2.79              /* NB. Endianness! */
    2.80              sflags = (u16)prev_scombo;
    2.81 @@ -243,10 +258,12 @@ gnttab_update_pin_status(
    2.82          else if ( act->status & GNTPIN_readonly )
    2.83          {
    2.84              sflags = sha->flags;
    2.85 -            do {
    2.86 -                prev_sflags = sflags;
    2.87  
    2.88 -                if ( unlikely(prev_sflags & GTF_readonly) )
    2.89 +            for ( ; ; )
    2.90 +            {
    2.91 +                u16 prev_sflags;
    2.92 +                
    2.93 +                if ( unlikely(sflags & GTF_readonly) )
    2.94                      PIN_FAIL(EINVAL,
    2.95                               "Attempt to write-pin a r/o grant entry.\n");
    2.96  
    2.97 @@ -255,13 +272,23 @@ gnttab_update_pin_status(
    2.98                      PIN_FAIL(EINVAL,
    2.99                               "Attempt to write-pin a unwritable page.\n");
   2.100  
   2.101 +                prev_sflags = sflags;
   2.102 +
   2.103                  /* NB. prev_sflags is updated in place to seen value. */
   2.104                  if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
   2.105                                             prev_sflags | GTF_writing)) )
   2.106                      PIN_FAIL(EINVAL,
   2.107                               "Fault while modifying shared flags.\n");
   2.108 +
   2.109 +                if ( likely(prev_sflags == sflags) )
   2.110 +                    break;
   2.111 +
   2.112 +                if ( retries++ == 4 )
   2.113 +                    PIN_FAIL(EINVAL,
   2.114 +                             "Shared grant entry is unstable.\n");
   2.115 +
   2.116 +                sflags = prev_sflags;
   2.117              }
   2.118 -            while ( prev_sflags != sflags );
   2.119          }
   2.120  
   2.121          /* Update status word -- this includes device accessibility. */
   2.122 @@ -281,6 +308,51 @@ gnttab_update_pin_status(
   2.123      return rc;
   2.124  }
   2.125  
   2.126 +static long 
   2.127 +gnttab_setup_table(
   2.128 +    gnttab_setup_table_t *uop)
   2.129 +{
   2.130 +    gnttab_setup_table_t  op;
   2.131 +    struct domain        *d;
   2.132 +
   2.133 +    if ( unlikely(__copy_from_user(&op, uop, sizeof(op)) != 0) )
   2.134 +    {
   2.135 +        DPRINTK("Fault while reading gnttab_setup_table_t.\n");
   2.136 +        return -EFAULT;
   2.137 +    }
   2.138 +
   2.139 +    if ( unlikely(op.nr_frames > 1) )
   2.140 +    {
   2.141 +        DPRINTK("Xen only supports one grant-table frame per domain.\n");
   2.142 +        return -EINVAL;
   2.143 +    }
   2.144 +
   2.145 +    if ( op.dom == DOMID_SELF )
   2.146 +        op.dom = current->domain;
   2.147 +
   2.148 +    if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
   2.149 +    {
   2.150 +        DPRINTK("Bad domid %d.\n", op.dom);
   2.151 +        return -ESRCH;
   2.152 +    }
   2.153 +
   2.154 +    if ( op.nr_frames == 1 )
   2.155 +    {
   2.156 +        ASSERT(d->grant_table != NULL);
   2.157 +
   2.158 +        if ( unlikely(put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
   2.159 +                               &op.frame_list[0])) )
   2.160 +        {
   2.161 +            DPRINTK("Fault while writing frame list.\n");
   2.162 +            put_domain(d);
   2.163 +            return -EFAULT;
   2.164 +        }
   2.165 +    }
   2.166 +
   2.167 +    put_domain(d);
   2.168 +    return 0;
   2.169 +}
   2.170 +
   2.171  long 
   2.172  do_grant_table_op(
   2.173      gnttab_op_t *uop)
   2.174 @@ -297,6 +369,9 @@ do_grant_table_op(
   2.175      case GNTTABOP_update_pin_status:
   2.176          rc = gnttab_update_pin_status(&uop->u.update_pin_status);
   2.177          break;
   2.178 +    case GNTTABOP_setup_table:
   2.179 +        rc = gnttab_setup_table(&uop->u.setup_table);
   2.180 +        break;
   2.181      default:
   2.182          rc = -ENOSYS;
   2.183          break;
   2.184 @@ -307,16 +382,144 @@ do_grant_table_op(
   2.185  
   2.186  int
   2.187  gnttab_try_map(
   2.188 -    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
   2.189 +    struct domain *rd, struct domain *ld, unsigned long frame, int op)
   2.190 +{
   2.191 +    grant_table_t        *t;
   2.192 +    active_grant_entry_t *a;
   2.193 +    u16                  *ph, h;
   2.194 +
   2.195 +    if ( unlikely((t = rd->grant_table) == NULL) )
   2.196 +        return 0;
   2.197 +
   2.198 +    spin_lock(&t->lock);
   2.199 +
   2.200 +    ph = &t->maphash[GNT_MAPHASH(frame)];
   2.201 +    while ( (h = *ph) != GNT_MAPHASH_INVALID )
   2.202 +    {
   2.203 +        if ( (a = &t->active[*ph])->frame != frame )
   2.204 +            goto found;
   2.205 +        ph = &a->next;
   2.206 +    }
   2.207 +    
   2.208 + fail:
   2.209 +    spin_unlock(&t->lock);
   2.210 +    return 0;
   2.211 +
   2.212 + found:
   2.213 +    if ( !(a->status & GNTPIN_host_accessible) )
   2.214 +        goto fail;
   2.215 +
   2.216 +    switch ( op )
   2.217 +    {
   2.218 +    case GNTTAB_MAP_RO:
   2.219 +        if ( (a->status & GNTPIN_rmap_mask) == GNTPIN_rmap_mask )
   2.220 +            goto fail;
   2.221 +        a->status += 1 << GNTPIN_rmap_shift;
   2.222 +        break;
   2.223 +
   2.224 +    case GNTTAB_MAP_RW:
   2.225 +        if ( (a->status & GNTPIN_wmap_mask) == GNTPIN_wmap_mask )
   2.226 +            goto fail;
   2.227 +        a->status += 1 << GNTPIN_wmap_shift;
   2.228 +        break;
   2.229 +
   2.230 +    case GNTTAB_UNMAP_RO:
   2.231 +        if ( (a->status & GNTPIN_rmap_mask) == 0 )
   2.232 +            goto fail;
   2.233 +        a->status -= 1 << GNTPIN_rmap_shift;
   2.234 +        break;
   2.235 +
   2.236 +    case GNTTAB_UNMAP_RW:
   2.237 +        if ( (a->status & GNTPIN_wmap_mask) == 0 )
   2.238 +            goto fail;
   2.239 +        a->status -= 1 << GNTPIN_wmap_shift;
   2.240 +        break;
   2.241 +
   2.242 +    default:
   2.243 +        BUG();
   2.244 +    }
   2.245 +
   2.246 +    spin_unlock(&t->lock);
   2.247 +    return 1;
   2.248 +}
   2.249 +
   2.250 +int 
   2.251 +gnttab_prepare_for_transfer(
   2.252 +    struct domain *rd, struct domain *ld, grant_ref_t ref)
   2.253  {
   2.254 +    grant_table_t *t;
   2.255 +    grant_entry_t *e;
   2.256 +    domid_t        sdom;
   2.257 +    u16            sflags;
   2.258 +    u32            scombo, prev_scombo;
   2.259 +    int            retries = 0;
   2.260 +
   2.261 +    if ( unlikely((t = rd->grant_table) == NULL) ||
   2.262 +         unlikely(ref >= NR_GRANT_ENTRIES) )
   2.263 +    {
   2.264 +        DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->domain, ref);
   2.265 +        return 0;
   2.266 +    }
   2.267 +
   2.268 +    spin_lock(&t->lock);
   2.269 +
   2.270 +    e = &t->shared[ref];
   2.271 +    
   2.272 +    sflags = e->flags;
   2.273 +    sdom   = e->domid;
   2.274 +
   2.275 +    for ( ; ; )
   2.276 +    {
   2.277 +        if ( unlikely(sflags != GTF_accept_transfer) ||
   2.278 +             unlikely(sdom != ld->domain) )
   2.279 +        {
   2.280 +            DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
   2.281 +                    sflags, sdom, ld->domain);
   2.282 +            goto fail;
   2.283 +        }
   2.284 +
   2.285 +        /* Merge two 16-bit values into a 32-bit combined update. */
   2.286 +        /* NB. Endianness! */
   2.287 +        prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
   2.288 +
   2.289 +        /* NB. prev_scombo is updated in place to seen value. */
   2.290 +        if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo, 
   2.291 +                                   prev_scombo | GTF_transfer_committed)) )
   2.292 +        {
   2.293 +            DPRINTK("Fault while modifying shared flags and domid.\n");
   2.294 +            goto fail;
   2.295 +        }
   2.296 +
   2.297 +        /* Did the combined update work (did we see what we expected?). */
   2.298 +        if ( likely(prev_scombo == scombo) )
   2.299 +            break;
   2.300 +
   2.301 +        if ( retries++ == 4 )
   2.302 +        {
   2.303 +            DPRINTK("Shared grant entry is unstable.\n");
   2.304 +            goto fail;
   2.305 +        }
   2.306 +
   2.307 +        /* Didn't see what we expected. Split out the seen flags & dom. */
   2.308 +        /* NB. Endianness! */
   2.309 +        sflags = (u16)prev_scombo;
   2.310 +        sdom   = (u16)(prev_scombo >> 16);
   2.311 +    }
   2.312 +
   2.313 +    spin_unlock(&t->lock);
   2.314 +    return 1;
   2.315 +
   2.316 + fail:
   2.317 +    spin_unlock(&t->lock);
   2.318      return 0;
   2.319  }
   2.320  
   2.321 -int
   2.322 -gnttab_try_unmap(
   2.323 -    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
   2.324 +void 
   2.325 +gnttab_notify_transfer(
   2.326 +    struct domain *rd, grant_ref_t ref, unsigned long frame)
   2.327  {
   2.328 -    return 0;
   2.329 +    wmb(); /* Ensure that the reassignment is globally visible. */
   2.330 +    rd->grant_table->shared[ref].frame = frame;
   2.331  }
   2.332  
   2.333  int 
     3.1 --- a/xen/include/hypervisor-ifs/grant_table.h	Wed Aug 25 16:26:15 2004 +0000
     3.2 +++ b/xen/include/hypervisor-ifs/grant_table.h	Thu Aug 26 13:57:01 2004 +0000
     3.3 @@ -28,18 +28,32 @@
     3.4   *  3. Write memory barrier (WMB).
     3.5   *  4. Write ent->flags, inc. valid type.
     3.6   * 
     3.7 - * Removing an unused GTF_permit_access entry:
     3.8 + * Invalidating an unused GTF_permit_access entry:
     3.9   *  1. flags = ent->flags.
    3.10   *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
    3.11   *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
    3.12   *  NB. No need for WMB as reuse of entry is control-dependent on success of
    3.13   *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
    3.14 + *
    3.15 + * Invalidating an in-use GTF_permit_access entry:
    3.16 + *  This cannot be done directly. Request assistance from the domain controller
    3.17 + *  which can set a timeout on the use of a grant entry and take necessary
    3.18 + *  action. (NB. This is not yet implemented!).
    3.19   * 
    3.20 - * Removing an unused GTF_accept_transfer entry:
    3.21 - *  1. Check result of SMP-safe CMPXCHG(&ent->frame, 0, <any non-zero value>).
    3.22 - *  2. Clear ent->flags.
    3.23 - *  3. WMB (ordering of step 2 vs. steps 1,2 of introducing a new entry).
    3.24 - * 
    3.25 + * Invalidating an unused GTF_accept_transfer entry:
    3.26 + *  1. flags = ent->flags.
    3.27 + *  2. Observe that !(flags & GTF_transfer_committed). [*]
    3.28 + *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
    3.29 + *  NB. No need for WMB as reuse of entry is control-dependent on success of
    3.30 + *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
    3.31 + *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
    3.32 + *      The guest must /not/ modify the grant entry until the address of the
    3.33 + *      transferred frame is written. It is safe for the guest to spin waiting
    3.34 + *      for this to occur (detect by observing non-zero value in ent->frame).
    3.35 + *
    3.36 + * Invalidating a committed GTF_accept_transfer entry:
    3.37 + *  1. Wait for ent->frame != 0.
    3.38 + *
    3.39   * Changing a GTF_permit_access from writable to read-only:
    3.40   *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
    3.41   * 
    3.42 @@ -90,6 +104,18 @@ typedef struct {
    3.43  #define _GTF_writing        (4)
    3.44  #define GTF_writing         (1<<_GTF_writing)
    3.45  
    3.46 +/*
    3.47 + * Subflags for GTF_accept_transfer:
    3.48 + *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
    3.49 + *      to transferring ownership of a page frame. When a guest sees this flag
    3.50 + *      it must /not/ modify the grant entry until the address of the
    3.51 + *      transferred frame is written into the entry.
    3.52 + *      NB. It is safe for the guest to spin-wait on the frame address:
    3.53 + *          Xen will always write the frame address in a timely manner.
    3.54 + */
    3.55 +#define _GTF_transfer_committed (2)
    3.56 +#define GTF_transfer_committed  (1<<_GTF_transfer_committed)
    3.57 +
    3.58  
    3.59  /***********************************
    3.60   * GRANT TABLE QUERIES AND USES
    3.61 @@ -124,11 +150,32 @@ typedef struct {
    3.62      MEMORY_PADDING;
    3.63  } PACKED gnttab_update_pin_status_t; /* 16 bytes */
    3.64  
    3.65 +/*
    3.66 + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
    3.67 + * <nr_frames> pages. The frame addresses are written to the <frame_list>.
    3.68 + * Only <nr_frames> addresses are written, even if the table is larger.
    3.69 + * NOTES:
    3.70 + *  1. <dom> may be specified as DOMID_SELF.
    3.71 + *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
    3.72 + *  3. Xen may not support more than a single grant-table page per domain.
    3.73 + */
    3.74 +#define GNTTABOP_setup_table          1
    3.75 +typedef struct {
    3.76 +    /* IN parameters. */
    3.77 +    domid_t     dom;                  /*  0 */
    3.78 +    u16         nr_frames;            /*  2 */
    3.79 +    u32         __pad;
    3.80 +    /* OUT parameters. */
    3.81 +    unsigned long *frame_list;        /*  8 */
    3.82 +    MEMORY_PADDING;
    3.83 +} PACKED gnttab_setup_table_t; /* 16 bytes */
    3.84 +
    3.85  typedef struct {
    3.86      u32 cmd; /* GNTTABOP_* */         /*  0 */
    3.87      u32 __reserved;                   /*  4 */
    3.88      union {                           /*  8 */
    3.89          gnttab_update_pin_status_t update_pin_status;
    3.90 +        gnttab_setup_table_t       setup_table;
    3.91          u8                         __dummy[16];
    3.92      } PACKED u;
    3.93  } PACKED gnttab_op_t; /* 24 bytes */
     4.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h	Wed Aug 25 16:26:15 2004 +0000
     4.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h	Thu Aug 26 13:57:01 2004 +0000
     4.3 @@ -121,17 +121,23 @@
     4.4   *   ptr[:2]  -- Linear address of LDT base (NB. must be page-aligned).
     4.5   *   val[:8]  -- Number of entries in LDT.
     4.6   * 
     4.7 + *   val[7:0] == MMUEXT_TRANSFER_PAGE:
     4.8 + *   val[31:16] -- Domain to whom page is to be transferred.
     4.9 + *   (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table.
    4.10 + *   ptr[:12]  -- Page frame to be reassigned to the FD.
    4.11 + *                (NB. The frame must currently belong to the calling domain).
    4.12 + * 
    4.13   *   val[7:0] == MMUEXT_SET_FOREIGNDOM:
    4.14 - *   val[31:15] -- Domain to set as the Foreign Domain (FD).
    4.15 + *   val[31:16] -- Domain to set as the Foreign Domain (FD).
    4.16   *                 (NB. DOMID_SELF is not recognised)
    4.17   *                 If FD != DOMID_IO then the caller must be privileged.
    4.18   * 
    4.19 + *   val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
    4.20 + *   Clears the FD.
    4.21 + * 
    4.22   *   val[7:0] == MMUEXT_REASSIGN_PAGE:
    4.23   *   ptr[:2]  -- A machine address within the page to be reassigned to the FD.
    4.24   *               (NB. page must currently belong to the calling domain).
    4.25 - * 
    4.26 - *   val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
    4.27 - *   Clears the FD.
    4.28   */
    4.29  #define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
    4.30  #define MMU_MACHPHYS_UPDATE      2 /* ptr = MA of frame to modify entry for  */
    4.31 @@ -145,9 +151,10 @@
    4.32  #define MMUEXT_TLB_FLUSH         6 /* ptr = NULL                             */
    4.33  #define MMUEXT_INVLPG            7 /* ptr = VA to invalidate                 */
    4.34  #define MMUEXT_SET_LDT           8 /* ptr = VA of table; val = # entries     */
    4.35 -#define MMUEXT_SET_FOREIGNDOM    9 /* val[31:15] = dom                       */
    4.36 -#define MMUEXT_REASSIGN_PAGE    10
    4.37 +#define MMUEXT_TRANSFER_PAGE     9 /* ptr = MA of frame; val[31:16] = dom    */
    4.38 +#define MMUEXT_SET_FOREIGNDOM   10 /* val[31:16] = dom                       */
    4.39  #define MMUEXT_CLEAR_FOREIGNDOM 11
    4.40 +#define MMUEXT_REASSIGN_PAGE    12
    4.41  #define MMUEXT_CMD_MASK        255
    4.42  #define MMUEXT_CMD_SHIFT         8
    4.43  
     5.1 --- a/xen/include/xen/grant_table.h	Wed Aug 25 16:26:15 2004 +0000
     5.2 +++ b/xen/include/xen/grant_table.h	Thu Aug 26 13:57:01 2004 +0000
     5.3 @@ -77,9 +77,24 @@ void grant_table_destroy(
     5.4      struct domain *d);
     5.5  
     5.6  /* Create/destroy host-CPU mappings via a grant-table entry. */
     5.7 +#define GNTTAB_MAP_RO   0
     5.8 +#define GNTTAB_MAP_RW   1
     5.9 +#define GNTTAB_UNMAP_RO 2
    5.10 +#define GNTTAB_UNMAP_RW 3
    5.11  int gnttab_try_map(
    5.12 -    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
    5.13 -int gnttab_try_unmap(
    5.14 -    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
    5.15 +    struct domain *rd, struct domain *ld, unsigned long frame, int op);
    5.16 +
    5.17 +/*
    5.18 + * Check that the given grant reference (rd,ref) allows 'ld' to transfer
    5.19 + * ownership of a page frame. If so, lock down the grant entry.
    5.20 + */
    5.21 +int 
    5.22 +gnttab_prepare_for_transfer(
    5.23 +    struct domain *rd, struct domain *ld, grant_ref_t ref);
    5.24 +
    5.25 +/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
    5.26 +void 
    5.27 +gnttab_notify_transfer(
    5.28 +    struct domain *rd, grant_ref_t ref, unsigned long frame);
    5.29  
    5.30  #endif /* __XEN_GRANT_H__ */