direct-io.hg

changeset 4461:09e69c727ad2

bitkeeper revision 1.1236.56.1 (4251a1f9OIyZY2I2LqBlxl0mi64FkA)

Grant tables: substantially more robust.
Block front and back drivers: support for using grant tables for interdomain communication.
author cwc22@centipede.cl.cam.ac.uk
date Mon Apr 04 20:22:17 2005 +0000 (2005-04-04)
parents 7d50508a59d1
children eb31fbbaa355
files .rootkeys docs/misc/grant-tables.txt linux-2.4.29-xen-sparse/arch/xen/config.in linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h linux-2.6.11-xen-sparse/arch/xen/Kconfig linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h xen/arch/x86/mm.c xen/common/grant_table.c xen/include/public/grant_table.h xen/include/public/io/blkif.h xen/include/xen/grant_table.h
line diff
     1.1 --- a/.rootkeys	Sun Apr 03 13:17:25 2005 +0000
     1.2 +++ b/.rootkeys	Mon Apr 04 20:22:17 2005 +0000
     1.3 @@ -20,6 +20,7 @@ 41c0c4116itF389v0CEWcmzue6zJkA docs/misc
     1.4  4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO
     1.5  412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
     1.6  420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
     1.7 +4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
     1.8  40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
     1.9  410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
    1.10  3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/docs/misc/grant-tables.txt	Mon Apr 04 20:22:17 2005 +0000
     2.3 @@ -0,0 +1,325 @@
     2.4 +********************************************************************************
     2.5 + A Rough Introduction to Using Grant Tables
     2.6 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     2.7 +                                              Christopher Clark, March, 2005.
     2.8 +
     2.9 +Grant tables are a mechanism for sharing and transferring frames between
    2.10 +domains, without requiring the participating domains to be privileged.
    2.11 +
    2.12 +The first mode of use allows domA to grant domB access to a specific frame,
    2.13 +whilst retaining ownership. The block front driver uses this to grant memory
    2.14 +access to the block back driver, so that it may read or write as requested.
    2.15 +
    2.16 + 1. domA creates a grant access reference, and transmits the ref id to domB.
    2.17 + 2. domB uses the reference to map the granted frame.
    2.18 + 3. domB performs the memory access.
    2.19 + 4. domB unmaps the granted frame.
    2.20 + 5. domA removes its grant.
    2.21 +
    2.22 +
    2.23 +The second mode allows domA to accept a transfer of ownership of a frame from
    2.24 +domB. The net front and back driver will use this for packet tx/rx. This
    2.25 +mechanism is still being implemented, though the xen<->guest interface design
    2.26 +is complete.
    2.27 +
    2.28 + 1. domA creates an accept transfer grant reference, and transmits it to domB.
    2.29 + 2. domB uses the ref to hand over a frame it owns.
    2.30 + 3. domA accepts the transfer
    2.31 + 4. domA clears the used reference.
    2.32 +
    2.33 +
    2.34 +********************************************************************************
    2.35 + Data structures
    2.36 + ~~~~~~~~~~~~~~~
    2.37 +
    2.38 + The following data structures are used by Xen and the guests to implement
    2.39 + grant tables:
    2.40 +
    2.41 + 1. Shared grant entries
    2.42 + 2. Active grant entries
    2.43 + 3. Map tracking
    2.44 +
    2.45 + These are not the users primary interface to grant tables, but are discussed
    2.46 + because an understanding of how they work may be useful. Each of these is a
    2.47 + finite resource.
    2.48 +
    2.49 + Shared grant entries
    2.50 + ~~~~~~~~~~~~~~~~~~~~
    2.51 +
    2.52 + A set of pages are shared between Xen and a guest, holding the shared grant
    2.53 + entries. The guest writes into these entries to create grant references. The
    2.54 + index of the entry is transmitted to the remote domain: this is the
    2.55 + reference used to activate an entry. Xen will write into a shared entry to
    2.56 + indicate to a guest that its grant is in use.
    2.57 +  sha->domid : remote domain being granted rights
    2.58 +  sha->frame : machine frame being granted
    2.59 +  sha->flags : allow access, allow transfer, remote is reading/writing, etc.
    2.60 +
    2.61 + Active grant entries
    2.62 + ~~~~~~~~~~~~~~~~~~~~
    2.63 +
    2.64 + Xen maintains a set of private frames per domain, holding the active grant
    2.65 + entries for safety, and to reference count mappings.
    2.66 +  act->domid : remote domain being granted rights
    2.67 +  act->frame : machine frame being granted
    2.68 +  act->pin   : used to hold reference counts
    2.69 +
    2.70 + Map tracking
    2.71 + ~~~~~~~~~~~~
    2.72 +
    2.73 + Every time a frame is mapped, a map track entry is stored in the metadata of
    2.74 + the mapping domain. The index of this entry is returned from the map call,
    2.75 + and is used to unmap the frame. Map track entries are also searched whenever a
    2.76 + page table entry containing a foreign frame number is overwritten: the first
    2.77 + matching map track entry is then removed, as if unmap had been invoked.
    2.78 + These are not used by the transfer mechanism.
    2.79 +  map->domid         : owner of the mapped frame
    2.80 +  map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
    2.81 +
    2.82 +********************************************************************************
    2.83 +
    2.84 + Granting a foreign domain access to frames
    2.85 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    2.86 +
    2.87 + domA [frame]--> domB
    2.88 +
    2.89 +
    2.90 + domA:  #include <asm-xen/gnttab.h>
    2.91 +        grant_ref_t gref[BATCH_SIZE];
    2.92 +
    2.93 +        for ( i = 0; i < BATCH_SIZE; i++ )
    2.94 +            gref[i] = gnttab_grant_foreign_access( domBid, mfn, (readonly ? 1 : 0) );
    2.95 +
    2.96 +
    2.97 + .. gref is then somehow transmitted to domB for use.
    2.98 +
    2.99 +
   2.100 + Mapping foreign frames
   2.101 + ~~~~~~~~~~~~~~~~~~~~~~
   2.102 +
   2.103 + domB:  #include <asm-xen/hypervisor.h>
   2.104 +        unsigned long       mmap_vstart;
   2.105 +        gnttab_op_t         aop[BATCH_SIZE];
   2.106 +        grant_ref_t         mapped_handle[BATCH_SIZE];
   2.107 +
   2.108 +        if ( (mmap_vstart = allocate_empty_lowmem_region(BATCH_SIZE)) == 0 )
   2.109 +            BUG();
   2.110 +
   2.111 +        for ( i = 0; i < BATCH_SIZE; i++ )
   2.112 +        {
   2.113 +            aop[i].u.map_grant_ref.host_virt_addr =
   2.114 +                                              mmap_vstart + (i * PAGE_SIZE);
   2.115 +            aop[i].u.map_grant_ref.dom      = domAid;
   2.116 +            aop[i].u.map_grant_ref.ref      = gref[i];
   2.117 +            aop[i].u.map_grant_ref.flags    = ( GNTMAP_host_map | GNTMAP_readonly );
   2.118 +        }
   2.119 +
   2.120 +        if ( unlikely(HYPERVISOR_grant_table_op(
   2.121 +                        GNTTABOP_map_grant_ref, aop, BATCH_SIZE)))
   2.122 +            BUG();
   2.123 +
   2.124 +        for ( i = 0; i < BATCH_SIZE; i++ )
   2.125 +        {
   2.126 +            if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) )
   2.127 +            {
   2.128 +                tidyup_all(aop, i);
   2.129 +                goto panic;
   2.130 +            }
   2.131 +
   2.132 +            phys_to_machine_mapping[__pa(mmap_vstart + (i * PAGE_SIZE))>>PAGE_SHIFT] =
   2.133 +                FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
   2.134 +
   2.135 +            mapped_handle[i] = aop[i].u.map_grant_ref.handle;
   2.136 +        }
   2.137 +
   2.138 +
   2.139 +
   2.140 + Unmapping foreign frames
   2.141 + ~~~~~~~~~~~~~~~~~~~~~~~~
   2.142 +
   2.143 + domB:
   2.144 +        for ( i = 0; i < BATCH_SIZE; i++ )
   2.145 +        {
   2.146 +            aop[i].u.unmap_grant_ref.host_virt_addr = mmap_vstart + (i * PAGE_SIZE);
   2.147 +            aop[i].u.unmap_grant_ref.dev_bus_addr   = 0;
   2.148 +            aop[i].u.unmap_grant_ref.handle         = mapped_handle[i];
   2.149 +        }
   2.150 +        if ( unlikely(HYPERVISOR_grant_table_op(
   2.151 +                        GNTTABOP_unmap_grant_ref, aop, BATCH_SIZE)))
   2.152 +            BUG();
   2.153 +
   2.154 +
   2.155 + Ending foreign access
   2.156 + ~~~~~~~~~~~~~~~~~~~~~
   2.157 +
   2.158 +    Note that this only prevents further mappings; it does _not_ revoke access.
   2.159 +    Should _only_ be used when the remote domain has unmapped the frame.
   2.160 +    gnttab_query_foreign_access( gref ) will indicate the state of any mapping.
   2.161 +
   2.162 + domA:
   2.163 +        if ( gnttab_query_foreign_access( gref[i] ) == 0 )
   2.164 +            gnttab_end_foreign_access( gref[i], readonly );
   2.165 +
   2.166 +        TODO: readonly yet to be implemented.
   2.167 +
   2.168 +
   2.169 +********************************************************************************
   2.170 +
   2.171 + Transferring ownership of a frame to another domain
   2.172 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2.173 +
   2.174 + [ XXX: Transfer mechanism is alpha-calibre code, untested, use at own risk XXX ]
   2.175 + [ XXX: show use of batch operations below, rather than single frame XXX ]
   2.176 + [ XXX: linux internal interface could/should be wrapped to be tidier XXX ]
   2.177 +
   2.178 +
   2.179 + Prepare to accept a frame from a foreign domain
   2.180 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2.181 +
   2.182 +  domA:
   2.183 +        if ( (p = alloc_page(GFP_HIGHUSER)) == NULL )
   2.184 +        {
   2.185 +            printk("Cannot alloc a frame to surrender\n");
   2.186 +            break;
   2.187 +        }
   2.188 +        pfn = p - mem_map;
   2.189 +        mfn = phys_to_machine_mapping[pfn];
   2.190 +                                                                                       
   2.191 +        if ( !PageHighMem(p) )
   2.192 +        {
   2.193 +            v = phys_to_virt(pfn << PAGE_SHIFT);
   2.194 +            scrub_pages(v, 1);
   2.195 +            queue_l1_entry_update(get_ptep((unsigned long)v), 0);
   2.196 +        }
   2.197 +                                                                                       
   2.198 +        /* Ensure that ballooned highmem pages don't have cached mappings. */
   2.199 +        kmap_flush_unused();
   2.200 +
   2.201 +        /* Flush updates through and flush the TLB. */
   2.202 +        xen_tlb_flush();
   2.203 +                                                                                       
   2.204 +        phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
   2.205 +                                                                                       
   2.206 +        if ( HYPERVISOR_dom_mem_op(
   2.207 +            MEMOP_decrease_reservation, &mfn, 1, 0) != 1 )
   2.208 +        {
   2.209 +            printk("MEMOP_decrease_reservation failed\n");
   2.210 +            /* er... ok. free the page then */
   2.211 +            __free_page(p);
   2.212 +            break;
   2.213 +        }
   2.214 +                                                                                       
   2.215 +        accepting_pfn = pfn;
   2.216 +        ref = gnttab_grant_foreign_transfer( (domid_t) args.arg[0], pfn );
   2.217 +        printk("Accepting dom %lu frame at ref (%d)\n", args.arg[0], ref);
   2.218 +                                                                                       
   2.219 +
   2.220 + Transfer a frame to a foreign domain
   2.221 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2.222 +
   2.223 +  domB:
   2.224 +        mmu_update_t            update;
   2.225 +        domid_t                 domid;
   2.226 +        grant_ref_t             gref;
   2.227 +        unsigned long           pfn, mfn, *v;
   2.228 +        struct page            *transfer_page = 0;
   2.229 +                                                                                       
   2.230 +        /* alloc a page and grant access.
   2.231 +         * alloc page returns a page struct. */
   2.232 +        if ( (transfer_page = alloc_page(GFP_HIGHUSER)) == NULL )
   2.233 +            return -ENOMEM;
   2.234 +
   2.235 +        pfn = transfer_page - mem_map;
   2.236 +        mfn = phys_to_machine_mapping[pfn];
   2.237 +
   2.238 +        /* need to remove all references to this page */
   2.239 +        if ( !PageHighMem(transfer_page) )
   2.240 +        {
   2.241 +            v = phys_to_virt(pfn << PAGE_SHIFT);
   2.242 +            scrub_pages(v, 1);
   2.243 +            sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
   2.244 +            queue_l1_entry_update(get_ptep((unsigned long)v), 0);
   2.245 +        }
   2.246 +#ifdef CONFIG_XEN_SCRUB_PAGES
   2.247 +        else
   2.248 +        {
   2.249 +            v = kmap(transfer_page);
   2.250 +            scrub_pages(v, 1);
   2.251 +            sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
   2.252 +            kunmap(transfer_page);
   2.253 +        }
   2.254 +#endif
   2.255 +        /* Delete any cached kmappings */
   2.256 +        kmap_flush_unused();
   2.257 +
   2.258 +        /* Flush updates through and flush the TLB */
   2.259 +        xen_tlb_flush();
   2.260 +
   2.261 +        /* invalidate in P2M */
   2.262 +        phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
   2.263 +
   2.264 +        domid = (domid_t)args.arg[0];
   2.265 +        gref  = (grant_ref_t)args.arg[1];
   2.266 +
   2.267 +        update.ptr  = MMU_EXTENDED_COMMAND;
   2.268 +        update.ptr |= ((gref & 0x00FF) << 2);
   2.269 +        update.ptr |= mfn << PAGE_SHIFT;
   2.270 +                                                                                       
   2.271 +        update.val  = MMUEXT_TRANSFER_PAGE;
   2.272 +        update.val |= (domid << 16);
   2.273 +        update.val |= (gref & 0xFF00);
   2.274 +                                                                                       
   2.275 +        ret = HYPERVISOR_mmu_update(&update, 1, NULL);
   2.276 +                                                                                       
   2.277 +
   2.278 + Map a transferred frame
   2.279 + ~~~~~~~~~~~~~~~~~~~~~~~
   2.280 +
   2.281 + TODO:
   2.282 +
   2.283 +
   2.284 + Clear the used transfer reference
   2.285 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2.286 +
   2.287 + TODO:
   2.288 +
   2.289 +
   2.290 +********************************************************************************
   2.291 +
   2.292 + Using a private reserve of grant references
   2.293 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2.294 +
   2.295 +Where it is known in advance how many grant references are required, and
   2.296 +failure to allocate them on demand would cause difficulty, a batch can be
   2.297 +allocated and held in a private reserve.
   2.298 +
   2.299 +To reserve a private batch:
   2.300 +
   2.301 +    /* housekeeping data - treat as opaque: */
   2.302 +    grant_ref_t gref_head, gref_terminal;
   2.303 +
   2.304 +    if ( 0 > gnttab_alloc_grant_references( number_to_reserve,
   2.305 +                                            &gref_head, &gref_terminal ))
   2.306 +        return -ENOSPC;
   2.307 +
   2.308 +
   2.309 +To release a batch back to the shared pool:
   2.310 +
   2.311 +    gnttab_free_grant_references( number_reserved, gref_head );
   2.312 +
   2.313 +
   2.314 +To claim a reserved reference:
   2.315 +
   2.316 +    ref = gnttab_claim_grant_reference( &gref_head, gref_terminal );
   2.317 +
   2.318 +
   2.319 +To release a claimed reference back to the reserve pool:
   2.320 +
   2.321 +    gnttab_release_grant_reference( &gref_head, gref );
   2.322 +
   2.323 +
   2.324 +To use a claimed reference to grant access, use these alternative functions
   2.325 +that take an additional parameter of the grant reference to use:
   2.326 +
   2.327 +    gnttab_grant_foreign_access_ref
   2.328 +    gnttab_grant_foreign_transfer_ref
     3.1 --- a/linux-2.4.29-xen-sparse/arch/xen/config.in	Sun Apr 03 13:17:25 2005 +0000
     3.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/config.in	Mon Apr 04 20:22:17 2005 +0000
     3.3 @@ -22,6 +22,7 @@ fi
     3.4  bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
     3.5  bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
     3.6  bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
     3.7 +bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
     3.8  bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
     3.9  endmenu
    3.10  # The IBM S/390 patch needs this.
     4.1 --- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0	Sun Apr 03 13:17:25 2005 +0000
     4.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0	Mon Apr 04 20:22:17 2005 +0000
     4.3 @@ -16,6 +16,7 @@ CONFIG_XEN_PHYSDEV_ACCESS=y
     4.4  CONFIG_XEN_SCRUB_PAGES=y
     4.5  CONFIG_XEN_NETDEV_FRONTEND=y
     4.6  CONFIG_XEN_BLKDEV_FRONTEND=y
     4.7 +# CONFIG_XEN_BLKDEV_GRANT is not set
     4.8  # CONFIG_XEN_USB_FRONTEND is not set
     4.9  CONFIG_NO_IDLE_HZ=y
    4.10  CONFIG_FOREIGN_PAGES=y
     5.1 --- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU	Sun Apr 03 13:17:25 2005 +0000
     5.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU	Mon Apr 04 20:22:17 2005 +0000
     5.3 @@ -15,6 +15,7 @@ CONFIG_UID16=y
     5.4  CONFIG_XEN_SCRUB_PAGES=y
     5.5  CONFIG_XEN_NETDEV_FRONTEND=y
     5.6  CONFIG_XEN_BLKDEV_FRONTEND=y
     5.7 +# CONFIG_XEN_BLKDEV_GRANT is not set
     5.8  # CONFIG_XEN_USB_FRONTEND is not set
     5.9  CONFIG_NO_IDLE_HZ=y
    5.10  # CONFIG_FOREIGN_PAGES is not set
     6.1 --- a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Sun Apr 03 13:17:25 2005 +0000
     6.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Mon Apr 04 20:22:17 2005 +0000
     6.3 @@ -67,9 +67,14 @@ static int xlvbd_get_vbd_info(vdisk_t *d
     6.4      memset(&req, 0, sizeof(req));
     6.5      req.operation   = BLKIF_OP_PROBE;
     6.6      req.nr_segments = 1;
     6.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT
     6.8 +    blkif_control_probe_send(&req, &rsp,
     6.9 +                             (unsigned long)(virt_to_machine(buf)));
    6.10 +#else
    6.11      req.frame_and_sects[0] = virt_to_machine(buf) | 7;
    6.12  
    6.13      blkif_control_send(&req, &rsp);
    6.14 +#endif
    6.15  
    6.16      if ( rsp.status <= 0 )
    6.17      {
     7.1 --- a/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h	Sun Apr 03 13:17:25 2005 +0000
     7.2 +++ b/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h	Mon Apr 04 20:22:17 2005 +0000
     7.3 @@ -17,6 +17,7 @@
     7.4  #include <linux/kernel.h>
     7.5  #include <asm/apicdef.h>
     7.6  #include <asm/page.h>
     7.7 +#include <asm-xen/gnttab.h>
     7.8  #ifdef CONFIG_HIGHMEM
     7.9  #include <linux/threads.h>
    7.10  #include <asm/kmap_types.h>
    7.11 @@ -52,7 +53,8 @@ enum fixed_addresses {
    7.12  	FIX_NETRING2_BASE,
    7.13  	FIX_NETRING3_BASE,
    7.14          FIX_SHARED_INFO,
    7.15 -	FIX_GNTTAB,
    7.16 +	FIX_GNTTAB_BEGIN,
    7.17 +    FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
    7.18  #ifdef CONFIG_VGA_CONSOLE
    7.19  #define NR_FIX_BTMAPS   32  /* 128KB For the Dom0 VGA Console A0000-C0000 */
    7.20  #else
     8.1 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig	Sun Apr 03 13:17:25 2005 +0000
     8.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig	Mon Apr 04 20:22:17 2005 +0000
     8.3 @@ -61,6 +61,16 @@ config XEN_BLKDEV_TAP_BE
     8.4            with the blktap.  This option will be removed as the block drivers are
     8.5            modified to use grant tables.
     8.6  
     8.7 +config XEN_BLKDEV_GRANT
     8.8 +        bool "Grant table substrate for block drivers (DANGEROUS)"
     8.9 +        depends on !XEN_BLKDEV_TAP_BE
    8.10 +        default n
    8.11 +        help
    8.12 +          This introduces the use of grant tables as a data exhange mechanism
    8.13 +          between the frontend and backend block drivers. This currently
    8.14 +          conflicts with the block tap, and should be considered untested
    8.15 +          and likely to render your system unstable.
    8.16 +
    8.17  config XEN_NETDEV_BACKEND
    8.18  	bool "Network-device backend driver"
    8.19  	depends on XEN_PHYSDEV_ACCESS
     9.1 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Sun Apr 03 13:17:25 2005 +0000
     9.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c	Mon Apr 04 20:22:17 2005 +0000
     9.3 @@ -41,9 +41,14 @@ EXPORT_SYMBOL(gnttab_end_foreign_access)
     9.4  EXPORT_SYMBOL(gnttab_query_foreign_access);
     9.5  EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
     9.6  EXPORT_SYMBOL(gnttab_end_foreign_transfer);
     9.7 +EXPORT_SYMBOL(gnttab_alloc_grant_references);
     9.8 +EXPORT_SYMBOL(gnttab_free_grant_references);
     9.9 +EXPORT_SYMBOL(gnttab_claim_grant_reference);
    9.10 +EXPORT_SYMBOL(gnttab_release_grant_reference);
    9.11 +EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
    9.12 +EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
    9.13  
    9.14 -#define NR_GRANT_REFS 512
    9.15 -static grant_ref_t gnttab_free_list[NR_GRANT_REFS];
    9.16 +static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
    9.17  static grant_ref_t gnttab_free_head;
    9.18  
    9.19  static grant_entry_t *shared;
    9.20 @@ -61,7 +66,7 @@ get_free_entry(
    9.21      void)
    9.22  {
    9.23      grant_ref_t fh, nfh = gnttab_free_head;
    9.24 -    do { if ( unlikely((fh = nfh) == NR_GRANT_REFS) ) return -1; }
    9.25 +    do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
    9.26      while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
    9.27                                      gnttab_free_list[fh])) != fh) );
    9.28      return fh;
    9.29 @@ -97,6 +102,17 @@ gnttab_grant_foreign_access(
    9.30      return ref;
    9.31  }
    9.32  
    9.33 +void
    9.34 +gnttab_grant_foreign_access_ref(
    9.35 +    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
    9.36 +{
    9.37 +    shared[ref].frame = frame;
    9.38 +    shared[ref].domid = domid;
    9.39 +    wmb();
    9.40 +    shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
    9.41 +}
    9.42 +
    9.43 +
    9.44  int
    9.45  gnttab_query_foreign_access( grant_ref_t ref )
    9.46  {
    9.47 @@ -124,14 +140,14 @@ gnttab_end_foreign_access( grant_ref_t r
    9.48  
    9.49  int
    9.50  gnttab_grant_foreign_transfer(
    9.51 -    domid_t domid)
    9.52 +    domid_t domid, unsigned long pfn )
    9.53  {
    9.54      int ref;
    9.55  
    9.56      if ( unlikely((ref = get_free_entry()) == -1) )
    9.57          return -ENOSPC;
    9.58  
    9.59 -    shared[ref].frame = 0;
    9.60 +    shared[ref].frame = pfn;
    9.61      shared[ref].domid = domid;
    9.62      wmb();
    9.63      shared[ref].flags = GTF_accept_transfer;
    9.64 @@ -139,6 +155,16 @@ gnttab_grant_foreign_transfer(
    9.65      return ref;
    9.66  }
    9.67  
    9.68 +void
    9.69 +gnttab_grant_foreign_transfer_ref(
    9.70 +    grant_ref_t ref, domid_t domid, unsigned long pfn )
    9.71 +{
    9.72 +    shared[ref].frame = pfn;
    9.73 +    shared[ref].domid = domid;
    9.74 +    wmb();
    9.75 +    shared[ref].flags = GTF_accept_transfer;
    9.76 +}
    9.77 +
    9.78  unsigned long
    9.79  gnttab_end_foreign_transfer(
    9.80      grant_ref_t ref)
    9.81 @@ -163,6 +189,60 @@ gnttab_end_foreign_transfer(
    9.82      return frame;
    9.83  }
    9.84  
    9.85 +void
    9.86 +gnttab_free_grant_references( u16 count, grant_ref_t head )
    9.87 +{
    9.88 +    /* TODO: O(N)...? */
    9.89 +    grant_ref_t to_die = 0, next = head;
    9.90 +    int i;
    9.91 +
    9.92 +    for ( i = 0; i < count; i++ )
    9.93 +        to_die = next;
    9.94 +        next = gnttab_free_list[next];
    9.95 +        put_free_entry( to_die );
    9.96 +}
    9.97 +
    9.98 +int
    9.99 +gnttab_alloc_grant_references( u16 count,
   9.100 +                               grant_ref_t *head,
   9.101 +                               grant_ref_t *terminal )
   9.102 +{
   9.103 +    int i;
   9.104 +    grant_ref_t h = gnttab_free_head;
   9.105 +
   9.106 +    for ( i = 0; i < count; i++ )
   9.107 +        if ( unlikely(get_free_entry() == -1) )
   9.108 +            goto not_enough_refs;
   9.109 +
   9.110 +    *head = h;
   9.111 +    *terminal = gnttab_free_head;
   9.112 +
   9.113 +    return 0;
   9.114 +
   9.115 +not_enough_refs:
   9.116 +    gnttab_free_head = h;
   9.117 +    return -ENOSPC;
   9.118 +}
   9.119 +
   9.120 +int
   9.121 +gnttab_claim_grant_reference( grant_ref_t *private_head,
   9.122 +                              grant_ref_t  terminal )
   9.123 +{
   9.124 +    grant_ref_t g;
   9.125 +    if ( unlikely((g = *private_head) == terminal) )
   9.126 +        return -ENOSPC;
   9.127 +    *private_head = gnttab_free_list[g];
   9.128 +    return g;
   9.129 +}
   9.130 +
   9.131 +void
   9.132 +gnttab_release_grant_reference( grant_ref_t *private_head,
   9.133 +                                grant_ref_t  release )
   9.134 +{
   9.135 +    gnttab_free_list[release] = *private_head;
   9.136 +    *private_head = release;
   9.137 +}
   9.138 +
   9.139  static int grant_ioctl(struct inode *inode, struct file *file,
   9.140                         unsigned int cmd, unsigned long data)
   9.141  {
   9.142 @@ -194,7 +274,7 @@ static int grant_ioctl(struct inode *ino
   9.143          TRAP_INSTR "; "
   9.144          "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
   9.145          : "=a" (ret) : "0" (&hypercall) : "memory" );
   9.146 -                                                                                    
   9.147 +
   9.148      return ret;
   9.149  }
   9.150  
   9.151 @@ -212,7 +292,14 @@ static int grant_read(char *page, char *
   9.152      gt = (grant_entry_t *)shared;
   9.153      len = 0;
   9.154  
   9.155 -    for ( i = 0; i < NR_GRANT_REFS; i++ )
   9.156 +    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
   9.157 +        /* TODO: safety catch here until this can handle >PAGE_SIZE output */
   9.158 +        if (len > (PAGE_SIZE - 200))
   9.159 +        {
   9.160 +            len += sprintf( page + len, "Truncated.\n");
   9.161 +            break;
   9.162 +        }
   9.163 +
   9.164          if ( gt[i].flags )
   9.165              len += sprintf( page + len,
   9.166                      "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n", 
   9.167 @@ -235,22 +322,25 @@ static int grant_write(struct file *file
   9.168  static int __init gnttab_init(void)
   9.169  {
   9.170      gnttab_setup_table_t setup;
   9.171 -    unsigned long        frame;
   9.172 +    unsigned long        frames[NR_GRANT_FRAMES];
   9.173      int                  i;
   9.174  
   9.175 -    for ( i = 0; i < NR_GRANT_REFS; i++ )
   9.176 -        gnttab_free_list[i] = i + 1;
   9.177 +    setup.dom        = DOMID_SELF;
   9.178 +    setup.nr_frames  = NR_GRANT_FRAMES;
   9.179 +    setup.frame_list = frames;
   9.180  
   9.181 -    setup.dom        = DOMID_SELF;
   9.182 -    setup.nr_frames  = 1;
   9.183 -    setup.frame_list = &frame;
   9.184      if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 )
   9.185          BUG();
   9.186      if ( setup.status != 0 )
   9.187          BUG();
   9.188  
   9.189 -    set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT);
   9.190 -    shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB);
   9.191 +    for ( i = 0; i < NR_GRANT_FRAMES; i++ )
   9.192 +        set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
   9.193 +
   9.194 +    shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
   9.195 +
   9.196 +    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
   9.197 +        gnttab_free_list[i] = i + 1;
   9.198  
   9.199      /*
   9.200       *  /proc/xen/grant : used by libxc to access grant tables
   9.201 @@ -269,6 +359,7 @@ static int __init gnttab_init(void)
   9.202      grant_pde->read_proc  = &grant_read;
   9.203      grant_pde->write_proc = &grant_write;
   9.204  
   9.205 +    printk("Grant table initialized\n");
   9.206      return 0;
   9.207  }
   9.208  
    10.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Sun Apr 03 13:17:25 2005 +0000
    10.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c	Mon Apr 04 20:22:17 2005 +0000
    10.3 @@ -8,10 +8,14 @@
    10.4   *  arch/xen/drivers/blkif/frontend
    10.5   * 
    10.6   * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    10.7 + * Copyright (c) 2005, Christopher Clark
    10.8   */
    10.9  
   10.10  #include "common.h"
   10.11  #include <asm-xen/evtchn.h>
   10.12 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   10.13 +#include <asm-xen/xen-public/grant_table.h>
   10.14 +#endif
   10.15  
   10.16  /*
   10.17   * These are rather arbitrary. They are fairly large because adjacent requests
   10.18 @@ -80,6 +84,17 @@ static inline void flush_plugged_queue(v
   10.19  }
   10.20  #endif
   10.21  
   10.22 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   10.23 +/* When using grant tables to map a frame for device access then the
   10.24 + * handle returned must be used to unmap the frame. This is needed to
   10.25 + * drop the ref count on the frame.
   10.26 + */
   10.27 +static u16 pending_grant_handles[MMAP_PAGES];
   10.28 +#define pending_handle(_idx, _i) \
   10.29 +    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
   10.30 +#define BLKBACK_INVALID_HANDLE (0xFFFF)
   10.31 +#endif
   10.32 +
   10.33  #ifdef CONFIG_XEN_BLKDEV_TAP_BE
   10.34  /*
   10.35   * If the tap driver is used, we may get pages belonging to either the tap
   10.36 @@ -100,6 +115,27 @@ static void make_response(blkif_t *blkif
   10.37  
   10.38  static void fast_flush_area(int idx, int nr_pages)
   10.39  {
   10.40 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   10.41 +    gnttab_op_t       aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   10.42 +    unsigned int      i, invcount = 0;
   10.43 +    u16               handle;
   10.44 +
   10.45 +    for ( i = 0; i < nr_pages; i++ )
   10.46 +    {
   10.47 +        if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
   10.48 +        {
   10.49 +            aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i);
   10.50 +            aop[i].u.unmap_grant_ref.dev_bus_addr   = 0;
   10.51 +            aop[i].u.unmap_grant_ref.handle         = handle;
   10.52 +            pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
   10.53 +            invcount++;
   10.54 +        }
   10.55 +    }
   10.56 +    if ( unlikely(HYPERVISOR_grant_table_op(
   10.57 +                    GNTTABOP_unmap_grant_ref, aop, invcount)))
   10.58 +        BUG();
   10.59 +#else
   10.60 +
   10.61      multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   10.62      int               i;
   10.63  
   10.64 @@ -114,6 +150,7 @@ static void fast_flush_area(int idx, int
   10.65      mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
   10.66      if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   10.67          BUG();
   10.68 +#endif
   10.69  }
   10.70  
   10.71  
   10.72 @@ -347,6 +384,26 @@ static void dispatch_probe(blkif_t *blki
   10.73           (blkif_last_sect(req->frame_and_sects[0]) != 7) )
   10.74          goto out;
   10.75  
   10.76 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   10.77 +    {
   10.78 +        gnttab_op_t     op;
   10.79 +
   10.80 +        op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0);
   10.81 +        op.u.map_grant_ref.flags = GNTMAP_host_map;
   10.82 +        op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
   10.83 +        op.u.map_grant_ref.dom = blkif->domid;
   10.84 +
   10.85 +        if ( unlikely(HYPERVISOR_grant_table_op(
   10.86 +                        GNTTABOP_map_grant_ref, &op, 1)))
   10.87 +            BUG();
   10.88 +
   10.89 +        if ( op.u.map_grant_ref.dev_bus_addr == 0 )
   10.90 +            goto out;
   10.91 +
   10.92 +        pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle;
   10.93 +    }
   10.94 +#else /* else CONFIG_XEN_BLKDEV_GRANT */
   10.95 +
   10.96  #ifdef CONFIG_XEN_BLKDEV_TAP_BE
   10.97      /* Grab the real frontend out of the probe message. */
   10.98      if (req->frame_and_sects[1] == BLKTAP_COOKIE) 
   10.99 @@ -369,7 +426,8 @@ static void dispatch_probe(blkif_t *blki
  10.100          
  10.101          goto out;
  10.102  #endif
  10.103 -    
  10.104 +#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
  10.105 +   
  10.106      rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
  10.107                      PAGE_SIZE / sizeof(vdisk_t));
  10.108  
  10.109 @@ -382,10 +440,15 @@ static void dispatch_rw_block_io(blkif_t
  10.110  {
  10.111      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
  10.112      int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
  10.113 -    unsigned long fas, remap_prot;
  10.114 +    unsigned long fas = 0;
  10.115      int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
  10.116      pending_req_t *pending_req;
  10.117 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  10.118 +    gnttab_op_t       aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  10.119 +#else
  10.120 +    unsigned long remap_prot;
  10.121      multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  10.122 +#endif
  10.123      struct phys_req preq;
  10.124      struct { 
  10.125          unsigned long buf; unsigned int nsec;
  10.126 @@ -412,14 +475,58 @@ static void dispatch_rw_block_io(blkif_t
  10.127      preq.sector_number = req->sector_number;
  10.128      preq.nr_sects      = 0;
  10.129  
  10.130 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  10.131      for ( i = 0; i < nseg; i++ )
  10.132      {
  10.133 +        fas         = req->frame_and_sects[i];
  10.134 +        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
  10.135 +
  10.136 +        if ( seg[i].nsec <= 0 )
  10.137 +            goto bad_descriptor;
  10.138 +        preq.nr_sects += seg[i].nsec;
  10.139 +
  10.140 +        aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i);
  10.141 +
  10.142 +        aop[i].u.map_grant_ref.dom = blkif->domid;
  10.143 +        aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas);
  10.144 +        aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map   |
  10.145 +                                       ( ( operation == READ ) ?
  10.146 +                                             0 : GNTMAP_readonly ) );
  10.147 +    }
  10.148 +
  10.149 +    if ( unlikely(HYPERVISOR_grant_table_op(
  10.150 +                    GNTTABOP_map_grant_ref, aop, nseg)))
  10.151 +        BUG();
  10.152 +
  10.153 +    for ( i = 0; i < nseg; i++ )
  10.154 +    {
  10.155 +        if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) )
  10.156 +        {
  10.157 +            DPRINTK("invalid buffer -- could not remap it\n");
  10.158 +            fast_flush_area(pending_idx, nseg);
  10.159 +            goto bad_descriptor;
  10.160 +        }
  10.161 +
  10.162 +        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
  10.163 +            FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
  10.164 +
  10.165 +        pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle;
  10.166 +    }
  10.167 +#endif
  10.168 +
  10.169 +    for ( i = 0; i < nseg; i++ )
  10.170 +    {
  10.171 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  10.172 +        seg[i].buf  = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) |
  10.173 +                      (blkif_first_sect(fas) << 9);
  10.174 +#else
  10.175          fas          = req->frame_and_sects[i];
  10.176          seg[i].buf  = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
  10.177          seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
  10.178          if ( seg[i].nsec <= 0 )
  10.179              goto bad_descriptor;
  10.180          preq.nr_sects += seg[i].nsec;
  10.181 +#endif
  10.182      }
  10.183  
  10.184      if ( vbd_translate(&preq, blkif, operation) != 0 )
  10.185 @@ -430,6 +537,7 @@ static void dispatch_rw_block_io(blkif_t
  10.186          goto bad_descriptor;
  10.187      }
  10.188  
  10.189 +#ifndef CONFIG_XEN_BLKDEV_GRANT
  10.190      if ( operation == READ )
  10.191          remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
  10.192      else
  10.193 @@ -461,6 +569,7 @@ static void dispatch_rw_block_io(blkif_t
  10.194              goto bad_descriptor;
  10.195          }
  10.196      }
  10.197 +#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
  10.198  
  10.199      pending_req = &pending_reqs[pending_idx];
  10.200      pending_req->blkif     = blkif;
  10.201 @@ -628,9 +737,15 @@ static int __init blkif_init(void)
  10.202  
  10.203      blkif_ctrlif_init();
  10.204      
  10.205 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  10.206 +    memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
  10.207 +    printk(KERN_ALERT "Blkif backend is using grant tables.\n");
  10.208 +#endif
  10.209 +
  10.210  #ifdef CONFIG_XEN_BLKDEV_TAP_BE
  10.211      printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
  10.212  #endif
  10.213 +
  10.214      return 0;
  10.215  }
  10.216  
    11.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c	Sun Apr 03 13:17:25 2005 +0000
    11.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c	Mon Apr 04 20:22:17 2005 +0000
    11.3 @@ -7,6 +7,7 @@
    11.4   * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
    11.5   * Copyright (c) 2004, Christian Limpach
    11.6   * Copyright (c) 2004, Andrew Warfield
    11.7 + * Copyright (c) 2005, Christopher Clark
    11.8   * 
    11.9   * This file may be distributed separately from the Linux kernel, or
   11.10   * incorporated into other software packages, subject to the following license:
   11.11 @@ -30,6 +31,14 @@
   11.12   * IN THE SOFTWARE.
   11.13   */
   11.14  
   11.15 +#if 1
   11.16 +#define ASSERT(_p) \
   11.17 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   11.18 +    __LINE__, __FILE__); *(int*)0=0; }
   11.19 +#else
   11.20 +#define ASSERT(_p)
   11.21 +#endif
   11.22 +
   11.23  #include <linux/version.h>
   11.24  
   11.25  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   11.26 @@ -46,6 +55,10 @@
   11.27  #include <scsi/scsi.h>
   11.28  #include <asm-xen/ctrl_if.h>
   11.29  #include <asm-xen/evtchn.h>
   11.30 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.31 +#include <asm-xen/xen-public/grant_table.h>
   11.32 +#include <asm-xen/gnttab.h>
   11.33 +#endif
   11.34  
   11.35  typedef unsigned char byte; /* from linux/ide.h */
   11.36  
   11.37 @@ -76,6 +89,13 @@ static blkif_front_ring_t blk_ring;
   11.38  
   11.39  #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
   11.40  
   11.41 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.42 +static domid_t rdomid = 0;
   11.43 +static grant_ref_t gref_head, gref_terminal;
   11.44 +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
   11.45 +    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
   11.46 +#endif
   11.47 +
   11.48  unsigned long rec_ring_free;
   11.49  blkif_request_t rec_ring[BLK_RING_SIZE];
   11.50  
   11.51 @@ -130,7 +150,11 @@ static inline void translate_req_to_pfn(
   11.52      xreq->sector_number = req->sector_number;
   11.53  
   11.54      for ( i = 0; i < req->nr_segments; i++ )
   11.55 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.56 +        xreq->frame_and_sects[i] = req->frame_and_sects[i];
   11.57 +#else
   11.58          xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
   11.59 +#endif
   11.60  }
   11.61  
   11.62  static inline void translate_req_to_mfn(blkif_request_t *xreq,
   11.63 @@ -145,7 +169,11 @@ static inline void translate_req_to_mfn(
   11.64      xreq->sector_number = req->sector_number;
   11.65  
   11.66      for ( i = 0; i < req->nr_segments; i++ )
   11.67 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.68 +        xreq->frame_and_sects[i] = req->frame_and_sects[i];
   11.69 +#else
   11.70          xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
   11.71 +#endif
   11.72  }
   11.73  
   11.74  
   11.75 @@ -274,6 +302,9 @@ static int blkif_queue_request(struct re
   11.76      int idx;
   11.77      unsigned long id;
   11.78      unsigned int fsect, lsect;
   11.79 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.80 +    int ref;
   11.81 +#endif
   11.82  
   11.83      if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
   11.84          return 1;
   11.85 @@ -299,8 +330,23 @@ static int blkif_queue_request(struct re
   11.86              buffer_ma = page_to_phys(bvec->bv_page);
   11.87              fsect = bvec->bv_offset >> 9;
   11.88              lsect = fsect + (bvec->bv_len >> 9) - 1;
   11.89 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   11.90 +            /* install a grant reference. */
   11.91 +            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
   11.92 +            ASSERT( ref != -ENOSPC );
   11.93 +
   11.94 +            gnttab_grant_foreign_access_ref(
   11.95 +                        ref,
   11.96 +                        rdomid,
   11.97 +                        buffer_ma >> PAGE_SHIFT,
   11.98 +                        rq_data_dir(req) );
   11.99 +
  11.100 +            ring_req->frame_and_sects[ring_req->nr_segments++] =
  11.101 +                (((u32) ref) << 16) | (fsect << 3) | lsect;
  11.102 +#else
  11.103              ring_req->frame_and_sects[ring_req->nr_segments++] =
  11.104                  buffer_ma | (fsect << 3) | lsect;
  11.105 +#endif
  11.106          }
  11.107      }
  11.108  
  11.109 @@ -719,6 +765,9 @@ static int blkif_queue_request(unsigned 
  11.110      blkif_request_t    *req;
  11.111      struct buffer_head *bh;
  11.112      unsigned int        fsect, lsect;
  11.113 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.114 +    int ref;
  11.115 +#endif
  11.116  
  11.117      fsect = (buffer_ma & ~PAGE_MASK) >> 9;
  11.118      lsect = fsect + nr_sectors - 1;
  11.119 @@ -766,11 +815,25 @@ static int blkif_queue_request(unsigned 
  11.120       
  11.121              bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
  11.122       
  11.123 -
  11.124              rec_ring[req->id].id = id;
  11.125 +                                                                                                
  11.126 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.127 +            /* install a grant reference. */
  11.128 +            ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
  11.129 +            ASSERT( ref != -ENOSPC );
  11.130  
  11.131 -            req->frame_and_sects[req->nr_segments] = 
  11.132 -                buffer_ma | (fsect<<3) | lsect;
  11.133 +            gnttab_grant_foreign_access_ref(
  11.134 +                        ref,
  11.135 +                        rdomid,
  11.136 +                        buffer_ma >> PAGE_SHIFT,
  11.137 +                        ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
  11.138 +
  11.139 +            req->frame_and_sects[req->nr_segments] =
  11.140 +                (((u32) ref ) << 16) | (fsect << 3) | lsect;
  11.141 +#else
  11.142 +            req->frame_and_sects[req->nr_segments] =
  11.143 +                buffer_ma | (fsect << 3) | lsect;
  11.144 +#endif
  11.145              if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
  11.146                  sg_next_sect += nr_sectors;
  11.147              else
  11.148 @@ -808,7 +871,21 @@ static int blkif_queue_request(unsigned 
  11.149      req->sector_number = (blkif_sector_t)sector_number;
  11.150      req->device        = device; 
  11.151      req->nr_segments   = 1;
  11.152 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.153 +    /* install a grant reference. */
  11.154 +    ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
  11.155 +    ASSERT( ref != -ENOSPC );
  11.156 +
  11.157 +    gnttab_grant_foreign_access_ref(
  11.158 +                ref,
  11.159 +                rdomid,
  11.160 +                buffer_ma >> PAGE_SHIFT,
  11.161 +                ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
  11.162 +
  11.163 +    req->frame_and_sects[0] = (((u32) ref)<<16)  | (fsect<<3) | lsect;
  11.164 +#else
  11.165      req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
  11.166 +#endif
  11.167  
  11.168      /* Keep a private copy so we can reissue requests when recovering. */    
  11.169      translate_req_to_pfn(&rec_ring[xid], req );
  11.170 @@ -966,6 +1043,20 @@ static void blkif_int(int irq, void *dev
  11.171  
  11.172  /*****************************  COMMON CODE  *******************************/
  11.173  
  11.174 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.175 +void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
  11.176 +                              unsigned long address)
  11.177 +{
  11.178 +    int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
  11.179 +    ASSERT( ref != -ENOSPC );
  11.180 +
  11.181 +    gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
  11.182 +
  11.183 +    req->frame_and_sects[0] = (((u32) ref) << 16) | 7;
  11.184 +
  11.185 +    blkif_control_send(req, rsp);
  11.186 +}
  11.187 +#endif
  11.188  
  11.189  void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  11.190  {
  11.191 @@ -1146,6 +1237,9 @@ static void blkif_connect(blkif_fe_inter
  11.192  
  11.193      blkif_evtchn = status->evtchn;
  11.194      blkif_irq    = bind_evtchn_to_irq(blkif_evtchn);
  11.195 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.196 +    rdomid       = status->domid;
  11.197 +#endif
  11.198  
  11.199      err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
  11.200      if ( err )
  11.201 @@ -1301,7 +1395,14 @@ int wait_for_blkif(void)
  11.202  int __init xlblk_init(void)
  11.203  {
  11.204      int i;
  11.205 -    
  11.206 +
  11.207 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.208 +    if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
  11.209 +                                            &gref_head, &gref_terminal ))
  11.210 +        return 1;
  11.211 +    printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
  11.212 +#endif
  11.213 +
  11.214      if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
  11.215           (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
  11.216          return 0;
  11.217 @@ -1330,12 +1431,19 @@ void blkdev_resume(void)
  11.218      send_driver_status(1);
  11.219  }
  11.220  
  11.221 -/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
  11.222 -
  11.223  void blkif_completion(blkif_request_t *req)
  11.224  {
  11.225      int i;
  11.226 +#ifdef CONFIG_XEN_BLKDEV_GRANT
  11.227 +    grant_ref_t gref;
  11.228  
  11.229 +    for ( i = 0; i < req->nr_segments; i++ )
  11.230 +    {
  11.231 +        gref = blkif_gref_from_fas(req->frame_and_sects[i]);
  11.232 +        gnttab_release_grant_reference(&gref_head, gref);
  11.233 +    }
  11.234 +#else
  11.235 +    /* This is a hack to get the dirty logging bits set */
  11.236      switch ( req->operation )
  11.237      {
  11.238      case BLKIF_OP_READ:
  11.239 @@ -1347,5 +1455,5 @@ void blkif_completion(blkif_request_t *r
  11.240          }
  11.241          break;
  11.242      }
  11.243 -    
  11.244 +#endif
  11.245  }
    12.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h	Sun Apr 03 13:17:25 2005 +0000
    12.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h	Mon Apr 04 20:22:17 2005 +0000
    12.3 @@ -102,6 +102,10 @@ extern int blkif_ioctl(struct inode *ino
    12.4  extern int blkif_check(dev_t dev);
    12.5  extern int blkif_revalidate(dev_t dev);
    12.6  extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
    12.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT
    12.8 +extern void blkif_control_probe_send(
    12.9 +    blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
   12.10 +#endif
   12.11  extern void do_blkif_request (request_queue_t *rq); 
   12.12  
   12.13  extern void xlvbd_update_vbds(void);
    13.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c	Sun Apr 03 13:17:25 2005 +0000
    13.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c	Mon Apr 04 20:22:17 2005 +0000
    13.3 @@ -135,9 +135,14 @@ static vdisk_t * xlvbd_probe(int *ret)
    13.4      memset(&req, 0, sizeof(req));
    13.5      req.operation = BLKIF_OP_PROBE;
    13.6      req.nr_segments = 1;
    13.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT
    13.8 +    blkif_control_probe_send(&req, &rsp,
    13.9 +                             (unsigned long)(virt_to_machine(buf)));
   13.10 +#else
   13.11      req.frame_and_sects[0] = virt_to_machine(buf) | 7;
   13.12  
   13.13      blkif_control_send(&req, &rsp);
   13.14 +#endif
   13.15      if ( rsp.status <= 0 ) {
   13.16          printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
   13.17          goto out;
    14.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Sun Apr 03 13:17:25 2005 +0000
    14.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h	Mon Apr 04 20:22:17 2005 +0000
    14.3 @@ -27,6 +27,7 @@
    14.4  #include <asm/acpi.h>
    14.5  #include <asm/apicdef.h>
    14.6  #include <asm/page.h>
    14.7 +#include <asm-xen/gnttab.h>
    14.8  #ifdef CONFIG_HIGHMEM
    14.9  #include <linux/threads.h>
   14.10  #include <asm/kmap_types.h>
   14.11 @@ -84,7 +85,8 @@ enum fixed_addresses {
   14.12  	FIX_PCIE_MCFG,
   14.13  #endif
   14.14  	FIX_SHARED_INFO,
   14.15 -	FIX_GNTTAB,
   14.16 +	FIX_GNTTAB_BEGIN,
   14.17 +	FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
   14.18  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
   14.19  #define NR_FIX_ISAMAPS	256
   14.20  	FIX_ISAMAP_END,
    15.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h	Sun Apr 03 13:17:25 2005 +0000
    15.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h	Mon Apr 04 20:22:17 2005 +0000
    15.3 @@ -7,6 +7,7 @@
    15.4   * (i.e., mechanisms for both sender and recipient of grant references)
    15.5   * 
    15.6   * Copyright (c) 2004, K A Fraser
    15.7 + * Copyright (c) 2005, Christopher Clark
    15.8   */
    15.9  
   15.10  #ifndef __ASM_GNTTAB_H__
   15.11 @@ -16,6 +17,10 @@
   15.12  #include <asm-xen/hypervisor.h>
   15.13  #include <asm-xen/xen-public/grant_table.h>
   15.14  
   15.15 +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
   15.16 +#define NR_GRANT_FRAMES 4
   15.17 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
   15.18 +
   15.19  int
   15.20  gnttab_grant_foreign_access(
   15.21      domid_t domid, unsigned long frame, int readonly);
   15.22 @@ -26,7 +31,7 @@ gnttab_end_foreign_access(
   15.23  
   15.24  int
   15.25  gnttab_grant_foreign_transfer(
   15.26 -    domid_t domid);
   15.27 +    domid_t domid, unsigned long pfn);
   15.28  
   15.29  unsigned long
   15.30  gnttab_end_foreign_transfer(
   15.31 @@ -36,4 +41,32 @@ int
   15.32  gnttab_query_foreign_access( 
   15.33      grant_ref_t ref );
   15.34  
   15.35 +/*
   15.36 + * operations on reserved batches of grant references
   15.37 + */
   15.38 +int
   15.39 +gnttab_alloc_grant_references(
   15.40 +    u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
   15.41 +
   15.42 +void
   15.43 +gnttab_free_grant_references(
   15.44 +    u16 count, grant_ref_t private_head );
   15.45 +
   15.46 +int
   15.47 +gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
   15.48 +);
   15.49 +
   15.50 +void
   15.51 +gnttab_release_grant_reference(
   15.52 +    grant_ref_t *private_head, grant_ref_t release );
   15.53 +
   15.54 +void
   15.55 +gnttab_grant_foreign_access_ref(
   15.56 +    grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
   15.57 +
   15.58 +void
   15.59 +gnttab_grant_foreign_transfer_ref(
   15.60 +    grant_ref_t, domid_t domid, unsigned long pfn);
   15.61 +
   15.62 +
   15.63  #endif /* __ASM_GNTTAB_H__ */
    16.1 --- a/xen/arch/x86/mm.c	Sun Apr 03 13:17:25 2005 +0000
    16.2 +++ b/xen/arch/x86/mm.c	Mon Apr 04 20:22:17 2005 +0000
    16.3 @@ -1997,7 +1997,13 @@ int update_grant_va_mapping(unsigned lon
    16.4          l1_pgentry_t ol1e = mk_l1_pgentry(_ol1e);
    16.5  
    16.6          if ( update_l1e(pl1e, ol1e, mk_l1_pgentry(_nl1e)) )
    16.7 +        {
    16.8              put_page_from_l1e(ol1e, d);
    16.9 +            if ( _ol1e & _PAGE_PRESENT )
   16.10 +                rc = 0; /* Caller needs to invalidate TLB entry */
   16.11 +            else
   16.12 +                rc = 1; /* Caller need not invalidate TLB entry */
   16.13 +        }
   16.14          else
   16.15              rc = -EINVAL;
   16.16      }
   16.17 @@ -3278,7 +3284,7 @@ void audit_domains_key(unsigned char key
   16.18          spin_unlock(&e->page_alloc_lock);
   16.19  
   16.20          /* Transfer is all done: tell the guest about its new page frame. */
   16.21 -        gnttab_notify_transfer(e, gntref, pfn);
   16.22 +        gnttab_notify_transfer(e, d, gntref, pfn);
   16.23          
   16.24          put_domain(e);
   16.25          break;
    17.1 --- a/xen/common/grant_table.c	Sun Apr 03 13:17:25 2005 +0000
    17.2 +++ b/xen/common/grant_table.c	Mon Apr 04 20:22:17 2005 +0000
    17.3 @@ -22,18 +22,19 @@
    17.4   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    17.5   */
    17.6  
    17.7 -#define GRANT_DEBUG 1
    17.8 +#define GRANT_DEBUG 0
    17.9 +#define GRANT_DEBUG_VERBOSE 0
   17.10  
   17.11  #include <xen/config.h>
   17.12  #include <xen/sched.h>
   17.13 -#include <asm-x86/mm.h>
   17.14 -#include <asm-x86/shadow.h>
   17.15 +#include <asm/mm.h>
   17.16 +#include <asm/shadow.h>
   17.17  
   17.18 -#define PIN_FAIL(_rc, _f, _a...)   \
   17.19 +#define PIN_FAIL(_lbl, _rc, _f, _a...)   \
   17.20      do {                           \
   17.21          DPRINTK( _f, ## _a );      \
   17.22          rc = (_rc);                \
   17.23 -        goto fail;                 \
   17.24 +        goto _lbl;                 \
   17.25      } while ( 0 )
   17.26  
   17.27  static inline int
   17.28 @@ -58,23 +59,38 @@ put_maptrack_handle(
   17.29  }
   17.30  
   17.31  static int
   17.32 -__gnttab_map_grant_ref(
   17.33 -    gnttab_map_grant_ref_t *uop,
   17.34 -    unsigned long *va)
   17.35 +__gnttab_activate_grant_ref(
   17.36 +    struct domain          *mapping_d,          /* IN */
   17.37 +    struct exec_domain     *mapping_ed,
   17.38 +    struct domain          *granting_d,
   17.39 +    grant_ref_t             ref,
   17.40 +    u16                     dev_hst_ro_flags,
   17.41 +    unsigned long           host_virt_addr,
   17.42 +    unsigned long          *pframe )            /* OUT */
   17.43  {
   17.44 -    domid_t               dom, sdom;
   17.45 -    grant_ref_t           ref;
   17.46 -    struct domain        *ld, *rd;
   17.47 -    struct exec_domain   *led;
   17.48 -    u16                   flags, sflags;
   17.49 -    int                   handle;
   17.50 +    domid_t               sdom;
   17.51 +    u16                   sflags;
   17.52      active_grant_entry_t *act;
   17.53      grant_entry_t        *sha;
   17.54 -    s16                   rc = 0;
   17.55 -    unsigned long         frame = 0, host_virt_addr;
   17.56 +    s16                   rc = 1;
   17.57 +    unsigned long         frame = 0;
   17.58 +    int                   retries = 0;
   17.59  
   17.60 -    /* Returns 0 if TLB flush / invalidate required by caller.
   17.61 -     * va will indicate the address to be invalidated. */
   17.62 +    /*
   17.63 +     * Objectives of this function:
   17.64 +     * . Make the record ( granting_d, ref ) active, if not already.
   17.65 +     * . Update shared grant entry of owner, indicating frame is mapped.
   17.66 +     * . Increment the owner act->pin reference counts.
   17.67 +     * . get_page on shared frame if new mapping.
   17.68 +     * . get_page_type if this is first RW mapping of frame.
   17.69 +     * . Add PTE to virtual address space of mapping_d, if necessary.
   17.70 +     * Returns:
   17.71 +     * .  -ve: error
   17.72 +     * .    1: ok
   17.73 +     * .    0: ok and TLB invalidate of host_virt_addr needed.
   17.74 +     *
   17.75 +     * On success, *pframe contains mfn.
   17.76 +     */
   17.77  
   17.78      /*
   17.79       * We bound the number of times we retry CMPXCHG on memory locations that
   17.80 @@ -84,7 +100,222 @@ static int
   17.81       * the guest to race our updates (e.g., to change the GTF_readonly flag),
   17.82       * so we allow a few retries before failing.
   17.83       */
   17.84 -    int            retries = 0;
   17.85 +
   17.86 +    act = &granting_d->grant_table->active[ref];
   17.87 +    sha = &granting_d->grant_table->shared[ref];
   17.88 +
   17.89 +    spin_lock(&granting_d->grant_table->lock);
   17.90 +
   17.91 +    if ( act->pin == 0 )
   17.92 +    {
   17.93 +        /* CASE 1: Activating a previously inactive entry. */
   17.94 +
   17.95 +        sflags = sha->flags;
   17.96 +        sdom   = sha->domid;
   17.97 +
   17.98 +        for ( ; ; )
   17.99 +        {
  17.100 +            u32 scombo, prev_scombo, new_scombo;
  17.101 +
  17.102 +            if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
  17.103 +                 unlikely(sdom != mapping_d->id) )
  17.104 +                PIN_FAIL(unlock_out, GNTST_general_error,
  17.105 +                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
  17.106 +                        sflags, sdom, mapping_d->id);
  17.107 +
  17.108 +            /* Merge two 16-bit values into a 32-bit combined update. */
  17.109 +            /* NB. Endianness! */
  17.110 +            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
  17.111 +
  17.112 +            new_scombo = scombo | GTF_reading;
  17.113 +            if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
  17.114 +            {
  17.115 +                new_scombo |= GTF_writing;
  17.116 +                if ( unlikely(sflags & GTF_readonly) )
  17.117 +                    PIN_FAIL(unlock_out, GNTST_general_error,
  17.118 +                             "Attempt to write-pin a r/o grant entry.\n");
  17.119 +            }
  17.120 +
  17.121 +            /* NB. prev_scombo is updated in place to seen value. */
  17.122 +            if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
  17.123 +                                       prev_scombo,
  17.124 +                                       new_scombo)) )
  17.125 +                PIN_FAIL(unlock_out, GNTST_general_error,
  17.126 +                         "Fault while modifying shared flags and domid.\n");
  17.127 +
  17.128 +            /* Did the combined update work (did we see what we expected?). */
  17.129 +            if ( likely(prev_scombo == scombo) )
  17.130 +                break;
  17.131 +
  17.132 +            if ( retries++ == 4 )
  17.133 +                PIN_FAIL(unlock_out, GNTST_general_error,
  17.134 +                         "Shared grant entry is unstable.\n");
  17.135 +
  17.136 +            /* Didn't see what we expected. Split out the seen flags & dom. */
  17.137 +            /* NB. Endianness! */
  17.138 +            sflags = (u16)prev_scombo;
  17.139 +            sdom   = (u16)(prev_scombo >> 16);
  17.140 +        }
  17.141 +
  17.142 +        /* rmb(); */ /* not on x86 */
  17.143 +
  17.144 +        frame = __translate_gpfn_to_mfn(granting_d, sha->frame);
  17.145 +
  17.146 +        if ( unlikely(!pfn_is_ram(frame)) ||
  17.147 +             unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
  17.148 +                        get_page(&frame_table[frame], granting_d) :
  17.149 +                        get_page_and_type(&frame_table[frame], granting_d,
  17.150 +                                          PGT_writable_page))) )
  17.151 +        {
  17.152 +            clear_bit(_GTF_writing, &sha->flags);
  17.153 +            clear_bit(_GTF_reading, &sha->flags);
  17.154 +            PIN_FAIL(unlock_out, GNTST_general_error,
  17.155 +                     "Could not pin the granted frame (%lx)!\n", frame);
  17.156 +        }
  17.157 +
  17.158 +        if ( dev_hst_ro_flags & GNTMAP_device_map )
  17.159 +            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
  17.160 +                GNTPIN_devr_inc : GNTPIN_devw_inc;
  17.161 +        if ( dev_hst_ro_flags & GNTMAP_host_map )
  17.162 +            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
  17.163 +                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.164 +        act->domid = sdom;
  17.165 +        act->frame = frame;
  17.166 +    }
  17.167 +    else 
  17.168 +    {
  17.169 +        /* CASE 2: Active modications to an already active entry. */
  17.170 +
  17.171 +        /*
  17.172 +         * A cheesy check for possible pin-count overflow.
  17.173 +         * A more accurate check cannot be done with a single comparison.
  17.174 +         */
  17.175 +        if ( (act->pin & 0x80808080U) != 0 )
  17.176 +            PIN_FAIL(unlock_out, ENOSPC, "Risk of counter overflow %08x\n", act->pin);
  17.177 +
  17.178 +        frame = act->frame;
  17.179 +
  17.180 +        if ( !(dev_hst_ro_flags & GNTMAP_readonly) && 
  17.181 +             !((sflags = sha->flags) & GTF_writing) )
  17.182 +        {
  17.183 +            for ( ; ; )
  17.184 +            {
  17.185 +                u16 prev_sflags;
  17.186 +                
  17.187 +                if ( unlikely(sflags & GTF_readonly) )
  17.188 +                    PIN_FAIL(unlock_out, GNTST_general_error,
  17.189 +                             "Attempt to write-pin a r/o grant entry.\n");
  17.190 +
  17.191 +                prev_sflags = sflags;
  17.192 +
  17.193 +                /* NB. prev_sflags is updated in place to seen value. */
  17.194 +                if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
  17.195 +                                           prev_sflags | GTF_writing)) )
  17.196 +                    PIN_FAIL(unlock_out, GNTST_general_error,
  17.197 +                         "Fault while modifying shared flags.\n");
  17.198 +
  17.199 +                if ( likely(prev_sflags == sflags) )
  17.200 +                    break;
  17.201 +
  17.202 +                if ( retries++ == 4 )
  17.203 +                    PIN_FAIL(unlock_out, GNTST_general_error,
  17.204 +                             "Shared grant entry is unstable.\n");
  17.205 +
  17.206 +                sflags = prev_sflags;
  17.207 +            }
  17.208 +
  17.209 +            if ( unlikely(!get_page_type(&frame_table[frame],
  17.210 +                                         PGT_writable_page)) )
  17.211 +            {
  17.212 +                clear_bit(_GTF_writing, &sha->flags);
  17.213 +                PIN_FAIL(unlock_out, GNTST_general_error,
  17.214 +                         "Attempt to write-pin a unwritable page.\n");
  17.215 +            }
  17.216 +        }
  17.217 +
  17.218 +        if ( dev_hst_ro_flags & GNTMAP_device_map )
  17.219 +            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? 
  17.220 +                GNTPIN_devr_inc : GNTPIN_devw_inc;
  17.221 +        if ( dev_hst_ro_flags & GNTMAP_host_map )
  17.222 +            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
  17.223 +                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.224 +    }
  17.225 +
  17.226 +    /* At this point:
  17.227 +     * act->pin updated to reflect mapping.
  17.228 +     * sha->flags updated to indicate to granting domain mapping done.
  17.229 +     * frame contains the mfn.
  17.230 +     */
  17.231 +
  17.232 +    spin_unlock(&granting_d->grant_table->lock);
  17.233 +
  17.234 +    if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
  17.235 +    {
  17.236 +        /* Write update into the pagetable
  17.237 +         */
  17.238 +
  17.239 +        rc = update_grant_va_mapping( host_virt_addr,
  17.240 +                                (frame << PAGE_SHIFT) | _PAGE_PRESENT  |
  17.241 +                                                        _PAGE_ACCESSED |
  17.242 +                                                        _PAGE_DIRTY    |
  17.243 +                       ((dev_hst_ro_flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
  17.244 +                       mapping_d, mapping_ed );
  17.245 +
  17.246 +        /* IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
  17.247 +         * This is done in the outer gnttab_map_grant_ref.
  17.248 +         */
  17.249 +
  17.250 +        if ( 0 > rc )
  17.251 +        {
  17.252 +            /* Abort. */
  17.253 +
  17.254 +            spin_lock(&granting_d->grant_table->lock);
  17.255 +
  17.256 +            if ( dev_hst_ro_flags & GNTMAP_readonly )
  17.257 +                act->pin -= GNTPIN_hstr_inc;
  17.258 +            else
  17.259 +            {
  17.260 +                act->pin -= GNTPIN_hstw_inc;
  17.261 +                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
  17.262 +                {
  17.263 +                    clear_bit(_GTF_writing, &sha->flags);
  17.264 +                    put_page_type(&frame_table[frame]);
  17.265 +                }
  17.266 +            }
  17.267 +            if ( act->pin == 0 )
  17.268 +            {
  17.269 +                clear_bit(_GTF_reading, &sha->flags);
  17.270 +                put_page(&frame_table[frame]);
  17.271 +            }
  17.272 +
  17.273 +            spin_unlock(&granting_d->grant_table->lock);
  17.274 +        }
  17.275 +
  17.276 +    }
  17.277 +    *pframe = frame;
  17.278 +    return rc;
  17.279 +
  17.280 + unlock_out:
  17.281 +    spin_unlock(&granting_d->grant_table->lock);
  17.282 +    return rc;
  17.283 +}
  17.284 +
  17.285 +static int
  17.286 +__gnttab_map_grant_ref(
  17.287 +    gnttab_map_grant_ref_t *uop,
  17.288 +    unsigned long *va)
  17.289 +{
  17.290 +    domid_t               dom;
  17.291 +    grant_ref_t           ref;
  17.292 +    struct domain        *ld, *rd;
  17.293 +    struct exec_domain   *led;
  17.294 +    u16                   dev_hst_ro_flags;
  17.295 +    int                   handle;
  17.296 +    unsigned long         frame, host_virt_addr;
  17.297 +    int                   rc;
  17.298 +
  17.299 +    /* Returns 0 if TLB flush / invalidate required by caller.
  17.300 +     * va will indicate the address to be invalidated. */
  17.301  
  17.302      led = current;
  17.303      ld = led->domain;
  17.304 @@ -93,25 +324,27 @@ static int
  17.305      if ( unlikely(__get_user(dom, &uop->dom) |
  17.306                    __get_user(ref, &uop->ref) |
  17.307                    __get_user(host_virt_addr, &uop->host_virt_addr) |
  17.308 -                  __get_user(flags, &uop->flags)) )
  17.309 +                  __get_user(dev_hst_ro_flags, &uop->flags)) )
  17.310      {
  17.311          DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
  17.312          return -EFAULT; /* don't set status */
  17.313      }
  17.314  
  17.315 -    if ( ((host_virt_addr != 0) || (flags & GNTMAP_host_map) ) &&
  17.316 +
  17.317 +    if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map) ) &&
  17.318           unlikely(!__addr_ok(host_virt_addr)))
  17.319      {
  17.320          DPRINTK("Bad virtual address (%x) or flags (%x).\n",
  17.321 -                host_virt_addr, flags);
  17.322 +                host_virt_addr, dev_hst_ro_flags);
  17.323          (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
  17.324          return GNTST_bad_gntref;
  17.325      }
  17.326  
  17.327      if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
  17.328 -         unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
  17.329 +         unlikely((dev_hst_ro_flags & (GNTMAP_device_map|GNTMAP_host_map)) ==
  17.330 +0) )
  17.331      {
  17.332 -        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags);
  17.333 +        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
  17.334          (void)__put_user(GNTST_bad_gntref, &uop->handle);
  17.335          return GNTST_bad_gntref;
  17.336      }
  17.337 @@ -126,6 +359,7 @@ static int
  17.338          return GNTST_bad_domain;
  17.339      }
  17.340  
  17.341 +    /* get a maptrack handle */
  17.342      if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
  17.343      {
  17.344          put_domain(rd);
  17.345 @@ -133,229 +367,40 @@ static int
  17.346          (void)__put_user(GNTST_no_device_space, &uop->handle);
  17.347          return GNTST_no_device_space;
  17.348      }
  17.349 +
  17.350 +#ifdef GRANT_DEBUG_VERBOSE
  17.351      DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
  17.352 -            ref, dom, flags);
  17.353 -
  17.354 -    act = &rd->grant_table->active[ref];
  17.355 -    sha = &rd->grant_table->shared[ref];
  17.356 -
  17.357 -    spin_lock(&rd->grant_table->lock);
  17.358 -
  17.359 -    if ( act->pin == 0 )
  17.360 -    {
  17.361 -        /* CASE 1: Activating a previously inactive entry. */
  17.362 -
  17.363 -        sflags = sha->flags;
  17.364 -        sdom   = sha->domid;
  17.365 -
  17.366 -        for ( ; ; )
  17.367 -        {
  17.368 -            u32 scombo, prev_scombo, new_scombo;
  17.369 -
  17.370 -            if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
  17.371 -                 unlikely(sdom != ld->id) )
  17.372 -                PIN_FAIL(GNTST_general_error,
  17.373 -                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
  17.374 -                        sflags, sdom, ld->id);
  17.375 -
  17.376 -            /* Merge two 16-bit values into a 32-bit combined update. */
  17.377 -            /* NB. Endianness! */
  17.378 -            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
  17.379 +            ref, dom, dev_hst_ro_flags);
  17.380 +#endif
  17.381  
  17.382 -            new_scombo = scombo | GTF_reading;
  17.383 -            if ( !(flags & GNTMAP_readonly) )
  17.384 -            {
  17.385 -                new_scombo |= GTF_writing;
  17.386 -                if ( unlikely(sflags & GTF_readonly) )
  17.387 -                    PIN_FAIL(GNTST_general_error,
  17.388 -                             "Attempt to write-pin a r/o grant entry.\n");
  17.389 -            }
  17.390 -
  17.391 -            /* NB. prev_scombo is updated in place to seen value. */
  17.392 -            if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
  17.393 -                                       prev_scombo,
  17.394 -                                       new_scombo)) )
  17.395 -                PIN_FAIL(GNTST_general_error,
  17.396 -                         "Fault while modifying shared flags and domid.\n");
  17.397 -
  17.398 -            /* Did the combined update work (did we see what we expected?). */
  17.399 -            if ( likely(prev_scombo == scombo) )
  17.400 -                break;
  17.401 -
  17.402 -            if ( retries++ == 4 )
  17.403 -                PIN_FAIL(GNTST_general_error,
  17.404 -                         "Shared grant entry is unstable.\n");
  17.405 -
  17.406 -            /* Didn't see what we expected. Split out the seen flags & dom. */
  17.407 -            /* NB. Endianness! */
  17.408 -            sflags = (u16)prev_scombo;
  17.409 -            sdom   = (u16)(prev_scombo >> 16);
  17.410 -        }
  17.411 -
  17.412 -        /* rmb(); */ /* not on x86 */
  17.413 -
  17.414 -        frame = __translate_gpfn_to_mfn(rd, sha->frame);
  17.415 +    if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
  17.416 +                                                  dev_hst_ro_flags,
  17.417 +                                                  host_virt_addr, &frame)))
  17.418 +    {
  17.419 +        /* Only make the maptrack live _after_ writing the pte,
  17.420 +         * in case we overwrite the same frame number, causing a
  17.421 +         *  maptrack walk to find it
  17.422 +         */
  17.423 +        ld->grant_table->maptrack[handle].domid = dom;
  17.424  
  17.425 -        if ( unlikely(!pfn_is_ram(frame)) ||
  17.426 -             unlikely(!((flags & GNTMAP_readonly) ?
  17.427 -                        get_page(&frame_table[frame], rd) :
  17.428 -                        get_page_and_type(&frame_table[frame], rd,
  17.429 -                                          PGT_writable_page))) )
  17.430 -        {
  17.431 -            clear_bit(_GTF_writing, &sha->flags);
  17.432 -            clear_bit(_GTF_reading, &sha->flags);
  17.433 -            PIN_FAIL(GNTST_general_error,
  17.434 -                     "Could not pin the granted frame!\n");
  17.435 -        }
  17.436 +        ld->grant_table->maptrack[handle].ref_and_flags
  17.437 +            = (ref << MAPTRACK_REF_SHIFT) |
  17.438 +              (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
  17.439  
  17.440 -        if ( flags & GNTMAP_device_map )
  17.441 -            act->pin += (flags & GNTMAP_readonly) ? 
  17.442 -                GNTPIN_devr_inc : GNTPIN_devw_inc;
  17.443 -        if ( flags & GNTMAP_host_map )
  17.444 -            act->pin += (flags & GNTMAP_readonly) ?
  17.445 -                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.446 -        act->domid = sdom;
  17.447 -        act->frame = frame;
  17.448 -    }
  17.449 -    else 
  17.450 -    {
  17.451 -        /* CASE 2: Active modications to an already active entry. */
  17.452 -
  17.453 -        /*
  17.454 -         * A cheesy check for possible pin-count overflow.
  17.455 -         * A more accurate check cannot be done with a single comparison.
  17.456 -         */
  17.457 -        if ( (act->pin & 0x80808080U) != 0 )
  17.458 -            PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin);
  17.459 -
  17.460 -        frame = act->frame;
  17.461 +        (void)__put_user(frame, &uop->dev_bus_addr);
  17.462  
  17.463 -        if ( !(flags & GNTMAP_readonly) && 
  17.464 -             !((sflags = sha->flags) & GTF_writing) )
  17.465 -        {
  17.466 -            for ( ; ; )
  17.467 -            {
  17.468 -                u16 prev_sflags;
  17.469 -                
  17.470 -                if ( unlikely(sflags & GTF_readonly) )
  17.471 -                    PIN_FAIL(GNTST_general_error,
  17.472 -                             "Attempt to write-pin a r/o grant entry.\n");
  17.473 -
  17.474 -                prev_sflags = sflags;
  17.475 -
  17.476 -                /* NB. prev_sflags is updated in place to seen value. */
  17.477 -                if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
  17.478 -                                           prev_sflags | GTF_writing)) )
  17.479 -                    PIN_FAIL(GNTST_general_error,
  17.480 -                         "Fault while modifying shared flags.\n");
  17.481 -
  17.482 -                if ( likely(prev_sflags == sflags) )
  17.483 -                    break;
  17.484 +        if ( dev_hst_ro_flags & GNTMAP_host_map )
  17.485 +            *va = host_virt_addr;
  17.486  
  17.487 -                if ( retries++ == 4 )
  17.488 -                    PIN_FAIL(GNTST_general_error,
  17.489 -                             "Shared grant entry is unstable.\n");
  17.490 -
  17.491 -                sflags = prev_sflags;
  17.492 -            }
  17.493 -
  17.494 -            if ( unlikely(!get_page_type(&frame_table[frame],
  17.495 -                                         PGT_writable_page)) )
  17.496 -            {
  17.497 -                clear_bit(_GTF_writing, &sha->flags);
  17.498 -                PIN_FAIL(GNTST_general_error,
  17.499 -                         "Attempt to write-pin a unwritable page.\n");
  17.500 -            }
  17.501 -        }
  17.502 -
  17.503 -        if ( flags & GNTMAP_device_map )
  17.504 -            act->pin += (flags & GNTMAP_readonly) ? 
  17.505 -                GNTPIN_devr_inc : GNTPIN_devw_inc;
  17.506 -        if ( flags & GNTMAP_host_map )
  17.507 -            act->pin += (flags & GNTMAP_readonly) ?
  17.508 -                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.509 +        (void)__put_user(handle, &uop->handle);
  17.510 +    }
  17.511 +    else
  17.512 +    {
  17.513 +        (void)__put_user(rc, &uop->handle);
  17.514 +        put_maptrack_handle(ld->grant_table, handle);
  17.515      }
  17.516  
  17.517 -    /* At this point:
  17.518 -     * act->pin updated to reflect mapping
  17.519 -     * sha->flags updated to indicate to granting domain mapping done
  17.520 -     * frame contains the mfn
  17.521 -     */
  17.522 -
  17.523 -    if ( (host_virt_addr != 0) && (flags & GNTMAP_host_map) )
  17.524 -    {
  17.525 -        /* Write update into the pagetable
  17.526 -         */
  17.527 -
  17.528 -        /* cwc22: TODO: check locking... */
  17.529 -
  17.530 -        spin_unlock(&rd->grant_table->lock);
  17.531 -
  17.532 -        rc = update_grant_va_mapping( host_virt_addr,
  17.533 -                                (frame << PAGE_SHIFT) | _PAGE_PRESENT  |
  17.534 -                                                        _PAGE_ACCESSED |
  17.535 -                                                        _PAGE_DIRTY    |
  17.536 -                       ((flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
  17.537 -                       ld, led );
  17.538 -
  17.539 -        spin_lock(&rd->grant_table->lock);
  17.540 -
  17.541 -        if ( 0 > rc )
  17.542 -        {
  17.543 -            /* Abort. */
  17.544 -            act->pin -= (flags & GNTMAP_readonly) ?
  17.545 -                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.546 -
  17.547 -            if ( flags & GNTMAP_readonly )
  17.548 -                act->pin -= GNTPIN_hstr_inc;
  17.549 -            else
  17.550 -            {
  17.551 -                act->pin -= GNTPIN_hstw_inc;
  17.552 -                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
  17.553 -                {
  17.554 -                    put_page_type(&frame_table[frame]);
  17.555 -                    clear_bit(_GTF_writing, &sha->flags);
  17.556 -                }
  17.557 -            }
  17.558 -            if ( act->pin == 0 )
  17.559 -            {
  17.560 -                put_page(&frame_table[frame]);
  17.561 -                clear_bit(_GTF_reading, &sha->flags);
  17.562 -            }
  17.563 -            goto fail;
  17.564 -        }
  17.565 -
  17.566 -        rc = 0;
  17.567 -        *va = host_virt_addr;
  17.568 -
  17.569 -        /* IMPORTANT: must flush / invalidate entry in TLB.
  17.570 -         * This is done in the outer gnttab_map_grant_ref when return 0.
  17.571 -         */
  17.572 -    }
  17.573 -
  17.574 -    /*
  17.575 -     * Only make the maptrack live _after_ writing the pte, in case we 
  17.576 -     * overwrite the same frame number, causing a maptrack walk to find it.
  17.577 -     */
  17.578 -    ld->grant_table->maptrack[handle].domid         = dom;
  17.579 -    ld->grant_table->maptrack[handle].ref_and_flags =
  17.580 -        (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK);
  17.581 -
  17.582 -    /* Unchecked and unconditional writes to user uop. */
  17.583 -    if ( flags & GNTMAP_device_map )
  17.584 -        (void)__put_user(frame,  &uop->dev_bus_addr);
  17.585 -
  17.586 -    (void)__put_user(handle, &uop->handle);
  17.587 -
  17.588 -    spin_unlock(&rd->grant_table->lock);
  17.589      put_domain(rd);
  17.590 -    return 0;
  17.591 -
  17.592 - fail:
  17.593 -    (void)__put_user(rc, &uop->handle);
  17.594 -    spin_unlock(&rd->grant_table->lock);
  17.595 -    put_domain(rd);
  17.596 -    put_maptrack_handle(ld->grant_table, handle);
  17.597      return rc;
  17.598  }
  17.599  
  17.600 @@ -364,17 +409,21 @@ gnttab_map_grant_ref(
  17.601      gnttab_map_grant_ref_t *uop, unsigned int count)
  17.602  {
  17.603      int i, flush = 0;
  17.604 -    unsigned long va = 0;
  17.605 +    unsigned long va[8];
  17.606  
  17.607      for ( i = 0; i < count; i++ )
  17.608 -        if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
  17.609 +        if ( __gnttab_map_grant_ref(&uop[i],
  17.610 +             &va[ (flush < 8 ? flush : 0) ]   ) == 0)
  17.611              flush++;
  17.612  
  17.613 -    /* XXX KAF: I think we are probably flushing too much here. */
  17.614 -    if ( flush == 1 )
  17.615 -        flush_tlb_one_mask(current->domain->cpuset, va);
  17.616 -    else if ( flush != 0 )
  17.617 -        flush_tlb_mask(current->domain->cpuset);
  17.618 +    if ( flush != 0 )
  17.619 +    {
  17.620 +        if ( flush <= 8 )
  17.621 +            for ( i = 0; i < flush; i++ )
  17.622 +                flush_tlb_one_mask(current->domain->cpuset, va[i]);
  17.623 +        else 
  17.624 +            local_flush_tlb();
  17.625 +    }
  17.626  
  17.627      return 0;
  17.628  }
  17.629 @@ -392,6 +441,7 @@ static int
  17.630      active_grant_entry_t *act;
  17.631      grant_entry_t *sha;
  17.632      grant_mapping_t *map;
  17.633 +    u16            flags;
  17.634      s16            rc = 1;
  17.635      unsigned long  frame, virt;
  17.636  
  17.637 @@ -416,8 +466,9 @@ static int
  17.638          return GNTST_bad_handle;
  17.639      }
  17.640  
  17.641 -    dom = map->domid;
  17.642 -    ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
  17.643 +    dom   = map->domid;
  17.644 +    ref   = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
  17.645 +    flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
  17.646  
  17.647      if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
  17.648           unlikely(ld == rd) )
  17.649 @@ -428,45 +479,56 @@ static int
  17.650          (void)__put_user(GNTST_bad_domain, &uop->status);
  17.651          return GNTST_bad_domain;
  17.652      }
  17.653 +#ifdef GRANT_DEBUG_VERBOSE
  17.654      DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
  17.655              ref, dom, handle);
  17.656 +#endif
  17.657  
  17.658      act = &rd->grant_table->active[ref];
  17.659      sha = &rd->grant_table->shared[ref];
  17.660  
  17.661      spin_lock(&rd->grant_table->lock);
  17.662  
  17.663 -    if ( frame != 0 )
  17.664 +    if ( frame == 0 )
  17.665 +        frame = act->frame;
  17.666 +    else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
  17.667 +    {
  17.668 +        if ( !( flags & GNTMAP_device_map ) )
  17.669 +            PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
  17.670 +                     "Bad frame number: frame not mapped for device access.\n");
  17.671 +        frame = act->frame;
  17.672 +
  17.673 +        /* frame will be unmapped for device access below if virt addr ok */
  17.674 +    }
  17.675 +    else
  17.676      {
  17.677          if ( unlikely(frame != act->frame) )
  17.678 -            PIN_FAIL(GNTST_general_error,
  17.679 +            PIN_FAIL(unmap_out, GNTST_general_error,
  17.680                       "Bad frame number doesn't match gntref.\n");
  17.681 -        if ( map->ref_and_flags & GNTMAP_device_map )
  17.682 -            act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? 
  17.683 -                GNTPIN_devr_inc : GNTPIN_devw_inc;
  17.684 +        if ( flags & GNTMAP_device_map )
  17.685 +            act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
  17.686 +                                                  : GNTPIN_devw_inc;
  17.687  
  17.688          map->ref_and_flags &= ~GNTMAP_device_map;
  17.689          (void)__put_user(0, &uop->dev_bus_addr);
  17.690 +
  17.691 +        /* frame is now unmapped for device access */
  17.692      }
  17.693 -    else
  17.694 -        frame = act->frame;
  17.695 -
  17.696 -    /* frame is now unmapped for device access */
  17.697  
  17.698      if ( (virt != 0) &&
  17.699 -         (map->ref_and_flags & GNTMAP_host_map) &&
  17.700 +         (flags & GNTMAP_host_map) &&
  17.701           ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
  17.702      {
  17.703          l1_pgentry_t   *pl1e;
  17.704          unsigned long   _ol1e;
  17.705  
  17.706          pl1e = &linear_pg_table[l1_linear_offset(virt)];
  17.707 -
  17.708 +                                                                                            
  17.709          if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
  17.710          {
  17.711              DPRINTK("Could not find PTE entry for address %x\n", virt);
  17.712              rc = -EINVAL;
  17.713 -            goto fail;
  17.714 +            goto unmap_out;
  17.715          }
  17.716  
  17.717          /* check that the virtual address supplied is actually
  17.718 @@ -477,7 +539,7 @@ static int
  17.719              DPRINTK("PTE entry %x for address %x doesn't match frame %x\n",
  17.720                      _ol1e, virt, frame);
  17.721              rc = -EINVAL;
  17.722 -            goto fail;
  17.723 +            goto unmap_out;
  17.724          }
  17.725  
  17.726          /* Delete pagetable entry
  17.727 @@ -487,35 +549,53 @@ static int
  17.728              DPRINTK("Cannot delete PTE entry at %x for virtual address %x\n",
  17.729                      pl1e, virt);
  17.730              rc = -EINVAL;
  17.731 -            goto fail;
  17.732 +            goto unmap_out;
  17.733          }
  17.734  
  17.735          map->ref_and_flags &= ~GNTMAP_host_map;
  17.736  
  17.737 -        act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
  17.738 -                        GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  17.739 +        act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
  17.740 +                                              : GNTPIN_hstw_inc;
  17.741 +
  17.742 +        if ( frame == GNTUNMAP_DEV_FROM_VIRT )
  17.743 +        {
  17.744 +            act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
  17.745 +                                                  : GNTPIN_devw_inc;
  17.746 +
  17.747 +            map->ref_and_flags &= ~GNTMAP_device_map;
  17.748 +            (void)__put_user(0, &uop->dev_bus_addr);
  17.749 +        }
  17.750 +
  17.751          rc = 0;
  17.752          *va = virt;
  17.753      }
  17.754  
  17.755      if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
  17.756 +    {
  17.757 +        map->ref_and_flags = 0;
  17.758          put_maptrack_handle(ld->grant_table, handle);
  17.759 +    }
  17.760 +
  17.761 +    /* If just unmapped a writable mapping, mark as dirtied */
  17.762 +    if ( unlikely(shadow_mode_log_dirty(rd)) &&
  17.763 +        !( flags & GNTMAP_readonly ) )
  17.764 +         mark_dirty(rd, frame);
  17.765  
  17.766      /* If the last writable mapping has been removed, put_page_type */
  17.767 -    if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
  17.768 -              !(map->ref_and_flags & GNTMAP_readonly) )
  17.769 +    if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
  17.770 +         ( !( flags & GNTMAP_readonly ) ) )
  17.771      {
  17.772 +        clear_bit(_GTF_writing, &sha->flags);
  17.773          put_page_type(&frame_table[frame]);
  17.774 -        clear_bit(_GTF_writing, &sha->flags);
  17.775      }
  17.776  
  17.777      if ( act->pin == 0 )
  17.778      {
  17.779 +        clear_bit(_GTF_reading, &sha->flags);
  17.780          put_page(&frame_table[frame]);
  17.781 -        clear_bit(_GTF_reading, &sha->flags);
  17.782      }
  17.783  
  17.784 - fail:
  17.785 + unmap_out:
  17.786      (void)__put_user(rc, &uop->status);
  17.787      spin_unlock(&rd->grant_table->lock);
  17.788      put_domain(rd);
  17.789 @@ -527,16 +607,21 @@ gnttab_unmap_grant_ref(
  17.790      gnttab_unmap_grant_ref_t *uop, unsigned int count)
  17.791  {
  17.792      int i, flush = 0;
  17.793 -    unsigned long va = 0;
  17.794 +    unsigned long va[8];
  17.795  
  17.796      for ( i = 0; i < count; i++ )
  17.797 -        if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
  17.798 +        if ( __gnttab_unmap_grant_ref(&uop[i],
  17.799 +             &va[ (flush < 8 ? flush : 0) ]   ) == 0)
  17.800              flush++;
  17.801  
  17.802 -    if ( flush == 1 )
  17.803 -        flush_tlb_one_mask(current->domain->cpuset, va);
  17.804 -    else if ( flush != 0 )
  17.805 -        flush_tlb_mask(current->domain->cpuset);
  17.806 +    if ( flush != 0 )
  17.807 +    {
  17.808 +        if ( flush <= 8 )
  17.809 +            for ( i = 0; i < flush; i++ )
  17.810 +                flush_tlb_one_mask(current->domain->cpuset, va[i]);
  17.811 +        else 
  17.812 +            local_flush_tlb();
  17.813 +    }
  17.814  
  17.815      return 0;
  17.816  }
  17.817 @@ -547,6 +632,7 @@ gnttab_setup_table(
  17.818  {
  17.819      gnttab_setup_table_t  op;
  17.820      struct domain        *d;
  17.821 +    int                   i;
  17.822  
  17.823      if ( count != 1 )
  17.824          return -EINVAL;
  17.825 @@ -557,9 +643,10 @@ gnttab_setup_table(
  17.826          return -EFAULT;
  17.827      }
  17.828  
  17.829 -    if ( unlikely(op.nr_frames > 1) )
  17.830 +    if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
  17.831      {
  17.832 -        DPRINTK("Xen only supports one grant-table frame per domain.\n");
  17.833 +        DPRINTK("Xen only supports at most %d grant-table frames per domain.\n",
  17.834 +                NR_GRANT_FRAMES);
  17.835          (void)put_user(GNTST_general_error, &uop->status);
  17.836          return 0;
  17.837      }
  17.838 @@ -581,12 +668,15 @@ gnttab_setup_table(
  17.839          return 0;
  17.840      }
  17.841  
  17.842 -    if ( op.nr_frames == 1 )
  17.843 +    if ( op.nr_frames <= NR_GRANT_FRAMES )
  17.844      {
  17.845          ASSERT(d->grant_table != NULL);
  17.846          (void)put_user(GNTST_okay, &uop->status);
  17.847 -        (void)put_user(virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT,
  17.848 -                       &uop->frame_list[0]);
  17.849 +
  17.850 +        for ( i = 0; i < op.nr_frames; i++ )
  17.851 +            (void)put_user( (
  17.852 +                virt_to_phys( (char*)(d->grant_table->shared)+(i*PAGE_SIZE) )
  17.853 +                              >> PAGE_SHIFT ), &uop->frame_list[i]);
  17.854      }
  17.855  
  17.856      put_domain(d);
  17.857 @@ -634,30 +724,34 @@ gnttab_dump_table(gnttab_dump_table_t *u
  17.858      DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
  17.859              op.dom, shared_mfn);
  17.860  
  17.861 -    spin_lock(&gt->lock);
  17.862 -
  17.863      ASSERT(d->grant_table->active != NULL);
  17.864      ASSERT(d->grant_table->shared != NULL);
  17.865 +    ASSERT(d->grant_table->maptrack != NULL);
  17.866  
  17.867      for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  17.868      {
  17.869 -        act      = &gt->active[i];
  17.870          sha_copy =  gt->shared[i];
  17.871  
  17.872 -        if ( act->pin || act->domid || act->frame ||
  17.873 -             sha_copy.flags || sha_copy.domid || sha_copy.frame )
  17.874 +        if ( sha_copy.flags )
  17.875 +        {
  17.876 +            DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
  17.877 +                    op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
  17.878 +        }
  17.879 +    }
  17.880 +
  17.881 +    spin_lock(&gt->lock);
  17.882 +
  17.883 +    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  17.884 +    {
  17.885 +        act = &gt->active[i];
  17.886 +
  17.887 +        if ( act->pin )
  17.888          {
  17.889              DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%lx)\n",
  17.890                      op.dom, i, act->pin, act->domid, act->frame);
  17.891 -            DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
  17.892 -                    op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
  17.893 -
  17.894          }
  17.895 -
  17.896      }
  17.897  
  17.898 -    ASSERT(d->grant_table->maptrack != NULL);
  17.899 -
  17.900      for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ )
  17.901      {
  17.902          maptrack = &gt->maptrack[i];
  17.903 @@ -746,17 +840,18 @@ gnttab_check_unmap(
  17.904  
  17.905      lgt = ld->grant_table;
  17.906  
  17.907 -    /* Fast exit if we're not mapping anything using grant tables */
  17.908 -    if ( lgt->map_count == 0 )
  17.909 -        return 0;
  17.910 -
  17.911 -#ifdef GRANT_DEBUG
  17.912 -    if ( ld->id != 0 ) {
  17.913 +#ifdef GRANT_DEBUG_VERBOSE
  17.914 +    if ( ld->id != 0 )
  17.915 +    {
  17.916          DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
  17.917                  rd->id, ld->id, frame, readonly);
  17.918      }
  17.919  #endif
  17.920  
  17.921 +    /* Fast exit if we're not mapping anything using grant tables */
  17.922 +    if ( lgt->map_count == 0 )
  17.923 +        return 0;
  17.924 +
  17.925      if ( get_domain(rd) == 0 )
  17.926      {
  17.927          DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", rd->id);
  17.928 @@ -809,15 +904,15 @@ gnttab_check_unmap(
  17.929                  /* any more granted writable mappings? */
  17.930                  if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
  17.931                  {
  17.932 +                    clear_bit(_GTF_writing, &rgt->shared[ref].flags);
  17.933                      put_page_type(&frame_table[frame]);
  17.934 -                    clear_bit(_GTF_writing, &rgt->shared[ref].flags);
  17.935                  }
  17.936              }
  17.937  
  17.938              if ( act->pin == 0 )
  17.939              {
  17.940 +                clear_bit(_GTF_reading, &rgt->shared[ref].flags);
  17.941                  put_page(&frame_table[frame]);
  17.942 -                clear_bit(_GTF_reading, &rgt->shared[ref].flags);
  17.943              }
  17.944              spin_unlock(&rgt->lock);
  17.945  
  17.946 @@ -839,29 +934,41 @@ int
  17.947  gnttab_prepare_for_transfer(
  17.948      struct domain *rd, struct domain *ld, grant_ref_t ref)
  17.949  {
  17.950 -    grant_table_t *t;
  17.951 -    grant_entry_t *e;
  17.952 +    grant_table_t *rgt;
  17.953 +    grant_entry_t *sha;
  17.954      domid_t        sdom;
  17.955      u16            sflags;
  17.956      u32            scombo, prev_scombo;
  17.957      int            retries = 0;
  17.958 +    unsigned long  target_pfn;
  17.959  
  17.960 -    if ( unlikely((t = rd->grant_table) == NULL) ||
  17.961 +    DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
  17.962 +            rd->id, ld->id, ref);
  17.963 +
  17.964 +    if ( unlikely((rgt = rd->grant_table) == NULL) ||
  17.965           unlikely(ref >= NR_GRANT_ENTRIES) )
  17.966      {
  17.967          DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->id, ref);
  17.968          return 0;
  17.969      }
  17.970  
  17.971 -    spin_lock(&t->lock);
  17.972 +    spin_lock(&rgt->lock);
  17.973  
  17.974 -    e = &t->shared[ref];
  17.975 +    sha = &rgt->shared[ref];
  17.976      
  17.977 -    sflags = e->flags;
  17.978 -    sdom   = e->domid;
  17.979 +    sflags = sha->flags;
  17.980 +    sdom   = sha->domid;
  17.981  
  17.982      for ( ; ; )
  17.983      {
  17.984 +        target_pfn = sha->frame;
  17.985 +
  17.986 +        if ( unlikely(target_pfn >= max_page ) )
  17.987 +        {
  17.988 +            DPRINTK("Bad pfn (%x)\n", target_pfn);
  17.989 +            goto fail;
  17.990 +        }
  17.991 +
  17.992          if ( unlikely(sflags != GTF_accept_transfer) ||
  17.993               unlikely(sdom != ld->id) )
  17.994          {
  17.995 @@ -875,7 +982,7 @@ gnttab_prepare_for_transfer(
  17.996          prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
  17.997  
  17.998          /* NB. prev_scombo is updated in place to seen value. */
  17.999 -        if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo, 
 17.1000 +        if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
 17.1001                                     prev_scombo | GTF_transfer_committed)) )
 17.1002          {
 17.1003              DPRINTK("Fault while modifying shared flags and domid.\n");
 17.1004 @@ -898,29 +1005,50 @@ gnttab_prepare_for_transfer(
 17.1005          sdom   = (u16)(prev_scombo >> 16);
 17.1006      }
 17.1007  
 17.1008 -    spin_unlock(&t->lock);
 17.1009 +    spin_unlock(&rgt->lock);
 17.1010      return 1;
 17.1011  
 17.1012   fail:
 17.1013 -    spin_unlock(&t->lock);
 17.1014 +    spin_unlock(&rgt->lock);
 17.1015      return 0;
 17.1016  }
 17.1017  
 17.1018  void 
 17.1019  gnttab_notify_transfer(
 17.1020 -    struct domain *rd, grant_ref_t ref, unsigned long sframe)
 17.1021 +    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
 17.1022  {
 17.1023 -    unsigned long frame;
 17.1024 +    grant_entry_t  *sha;
 17.1025 +    unsigned long   pfn;
 17.1026 +
 17.1027 +    DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
 17.1028 +            rd->id, ld->id, ref);
 17.1029 +
 17.1030 +    sha = &rd->grant_table->shared[ref];
 17.1031 +
 17.1032 +    spin_lock(&rd->grant_table->lock);
 17.1033 +
 17.1034 +    pfn = sha->frame;
 17.1035  
 17.1036 -    /* cwc22
 17.1037 -     * TODO: this requires that the machine_to_phys_mapping
 17.1038 -     *       has already been updated, so the accept_transfer hypercall
 17.1039 -     *       must do this.
 17.1040 -     */
 17.1041 -    frame = __mfn_to_gpfn(rd, sframe);
 17.1042 +    if ( unlikely(pfn >= max_page ) )
 17.1043 +        DPRINTK("Bad pfn (%x)\n", pfn);
 17.1044 +    else
 17.1045 +    {
 17.1046 +        machine_to_phys_mapping[frame] = pfn;
 17.1047 +
 17.1048 +        if ( unlikely(shadow_mode_log_dirty(ld)))
 17.1049 +             mark_dirty(ld, frame);
 17.1050  
 17.1051 -    wmb(); /* Ensure that the reassignment is globally visible. */
 17.1052 -    rd->grant_table->shared[ref].frame = frame;
 17.1053 +        if (shadow_mode_translate(ld))
 17.1054 +            __phys_to_machine_mapping[pfn] = frame;
 17.1055 +    }
 17.1056 +    sha->frame = __mfn_to_gpfn(rd, frame);
 17.1057 +    sha->domid = rd->id;
 17.1058 +    wmb();
 17.1059 +    sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
 17.1060 +
 17.1061 +    spin_unlock(&rd->grant_table->lock);
 17.1062 +
 17.1063 +    return;
 17.1064  }
 17.1065  
 17.1066  int 
 17.1067 @@ -943,6 +1071,7 @@ grant_table_create(
 17.1068          goto no_mem;
 17.1069      memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
 17.1070  
 17.1071 +    /* Tracking of mapped foreign frames table */
 17.1072      if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL )
 17.1073          goto no_mem;
 17.1074      memset(t->maptrack, 0, PAGE_SIZE);
 17.1075 @@ -950,10 +1079,16 @@ grant_table_create(
 17.1076          t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
 17.1077  
 17.1078      /* Shared grant table. */
 17.1079 -    if ( (t->shared = (void *)alloc_xenheap_page()) == NULL )
 17.1080 +    if ( (t->shared = (void *)alloc_xenheap_pages(ORDER_GRANT_FRAMES)) == NULL )
 17.1081          goto no_mem;
 17.1082 -    memset(t->shared, 0, PAGE_SIZE);
 17.1083 -    SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
 17.1084 +    memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
 17.1085 +
 17.1086 +    for ( i = 0; i < NR_GRANT_FRAMES; i++ )
 17.1087 +    {
 17.1088 +        SHARE_PFN_WITH_DOMAIN(virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
 17.1089 +        machine_to_phys_mapping[ (virt_to_phys((char*)(t->shared)+(i*PAGE_SIZE))
 17.1090 +                                 >> PAGE_SHIFT) ] = INVALID_M2P_ENTRY;
 17.1091 +    }
 17.1092  
 17.1093      /* Okay, install the structure. */
 17.1094      wmb(); /* avoid races with lock-free access to d->grant_table */
 17.1095 @@ -1055,7 +1190,7 @@ grant_table_destroy(
 17.1096      {
 17.1097          /* Free memory relating to this grant table. */
 17.1098          d->grant_table = NULL;
 17.1099 -        free_xenheap_page((unsigned long)t->shared);
 17.1100 +        free_xenheap_pages((unsigned long)t->shared, ORDER_GRANT_FRAMES);
 17.1101          free_xenheap_page((unsigned long)t->maptrack);
 17.1102          xfree(t->active);
 17.1103          xfree(t);
    18.1 --- a/xen/include/public/grant_table.h	Sun Apr 03 13:17:25 2005 +0000
    18.2 +++ b/xen/include/public/grant_table.h	Mon Apr 04 20:22:17 2005 +0000
    18.3 @@ -185,6 +185,8 @@ typedef struct {
    18.4      u32         __pad;
    18.5  } PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */
    18.6  
    18.7 +#define GNTUNMAP_DEV_FROM_VIRT (~0U)
    18.8 +
    18.9  /*
   18.10   * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
   18.11   * <nr_frames> pages. The frame addresses are written to the <frame_list>.
   18.12 @@ -248,8 +250,9 @@ typedef struct {
   18.13  #define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
   18.14  #define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
   18.15  #define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
   18.16 -#define GNTST_no_device_space  (-6) /* Out of space in I/O MMU.              */
   18.17 -#define GNTST_permission_denied (-7) /* Not enough privilege for operation.  */
   18.18 +#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/
   18.19 +#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
   18.20 +#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
   18.21  
   18.22  #define GNTTABOP_error_msgs {                   \
   18.23      "okay",                                     \
   18.24 @@ -258,6 +261,7 @@ typedef struct {
   18.25      "invalid grant reference",                  \
   18.26      "invalid mapping handle",                   \
   18.27      "invalid virtual address",                  \
   18.28 +    "invalid device address",                   \
   18.29      "no spare translation slot in the I/O MMU", \
   18.30      "permission denied"                         \
   18.31  }
    19.1 --- a/xen/include/public/io/blkif.h	Sun Apr 03 13:17:25 2005 +0000
    19.2 +++ b/xen/include/public/io/blkif.h	Mon Apr 04 20:22:17 2005 +0000
    19.3 @@ -34,16 +34,24 @@ typedef struct {
    19.4      blkif_vdev_t   device;       /*  2: only for read/write requests         */
    19.5      unsigned long  id;           /*  4: private guest value, echoed in resp  */
    19.6      blkif_sector_t sector_number;    /* start sector idx on disk (r/w only)  */
    19.7 -    /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame.   */
    19.8 +    /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect                        */
    19.9 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   19.10 +    /* @f_a_s[:16]= grant reference (16 bits)                                */
   19.11 +#else
   19.12 +    /* @f_a_s[:12]=@frame: machine page frame number.                        */
   19.13 +#endif
   19.14      /* @first_sect: first sector in frame to transfer (inclusive).           */
   19.15      /* @last_sect: last sector in frame to transfer (inclusive).             */
   19.16 -    /* @frame: machine page frame number.                                    */
   19.17      unsigned long  frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   19.18  } PACKED blkif_request_t;
   19.19  
   19.20  #define blkif_first_sect(_fas) (((_fas)>>3)&7)
   19.21  #define blkif_last_sect(_fas)  ((_fas)&7)
   19.22  
   19.23 +#ifdef CONFIG_XEN_BLKDEV_GRANT
   19.24 +#define blkif_gref_from_fas(_fas) ((_fas)>>16)
   19.25 +#endif
   19.26 +
   19.27  typedef struct {
   19.28      unsigned long   id;              /* copied from request */
   19.29      u8              operation;       /* copied from request */
    20.1 --- a/xen/include/xen/grant_table.h	Sun Apr 03 13:17:25 2005 +0000
    20.2 +++ b/xen/include/xen/grant_table.h	Mon Apr 04 20:22:17 2005 +0000
    20.3 @@ -51,7 +51,10 @@ typedef struct {
    20.4  #define GNTPIN_devr_inc      (1 << GNTPIN_devr_shift)
    20.5  #define GNTPIN_devr_mask     (0xFFU << GNTPIN_devr_shift)
    20.6  
    20.7 -#define NR_GRANT_ENTRIES     (PAGE_SIZE / sizeof(grant_entry_t))
    20.8 +#define ORDER_GRANT_FRAMES   2
    20.9 +#define NR_GRANT_FRAMES      (1U << ORDER_GRANT_FRAMES)
   20.10 +#define NR_GRANT_ENTRIES     (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
   20.11 +
   20.12  
   20.13  /*
   20.14   * Tracks a mapping of another domain's grant reference. Each domain has a
   20.15 @@ -104,7 +107,7 @@ gnttab_prepare_for_transfer(
   20.16  /* Notify 'rd' of a completed transfer via an already-locked grant entry. */
   20.17  void 
   20.18  gnttab_notify_transfer(
   20.19 -    struct domain *rd, grant_ref_t ref, unsigned long frame);
   20.20 +    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame);
   20.21  
   20.22  /* Pre-domain destruction release of granted device mappings of other domains.*/
   20.23  void