]> xenbits.xensource.com Git - people/ssmith/nc2-2.6.27.git/commitdiff
V2 grant table support.
authorSteven Smith <ssmith@weybridge.uk.xensource.com>
Thu, 28 May 2009 14:02:36 +0000 (15:02 +0100)
committerSteven Smith <ssmith@weybridge.uk.xensource.com>
Tue, 30 Jun 2009 12:00:52 +0000 (13:00 +0100)
drivers/xen/core/gnttab.c
include/xen/gnttab.h
include/xen/interface/grant_table.h

index cea08c0cef905e28b2fc169a5732ca166d18c304..4cb3ad4fb4d1f92739dd95da7a66eb5b013d384b 100644 (file)
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 #define GNTTAB_LIST_END 0xffffffff
-#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
+#define ENTRIES_PER_GRANT_FRAME (grant_table_version == 1 ?                \
+                                (PAGE_SIZE / sizeof(grant_entry_v1_t)) :   \
+                                (PAGE_SIZE / sizeof(grant_entry_v2_t)))
+
+static void pending_free_timer(unsigned long ignore);
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
 static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
+static int initialised_grant_table;
 static grant_ref_t gnttab_free_head;
+static grant_ref_t gnttab_pending_free_gref_head = GNTTAB_LIST_END;
+static LIST_HEAD(gnttab_pending_free_pages);
+static DEFINE_TIMER(gnttab_delayed_free_timer, pending_free_timer, 0, 0);
+static DEFINE_SPINLOCK(gnttab_pending_free_lock);
 static DEFINE_SPINLOCK(gnttab_list_lock);
 
-static struct grant_entry *shared;
+static union {
+    grant_entry_v1_t *v1;
+    grant_entry_v2_t *v2;
+    void *raw;
+} shared;
+
+static grant_status_t *grstatus;
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
 
+static int grant_table_version;
+
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
@@ -74,12 +91,19 @@ static int gnttab_expand(unsigned int req_entries);
 #define nr_freelist_frames(grant_frames)                               \
        (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP)
 
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
+#define nr_status_frames(grant_frames)                         \
+       (((grant_frames) * ENTRIES_PER_GRANT_FRAME + SPP - 1) / SPP)
+
+
 static int get_free_entries(int count)
 {
        unsigned long flags;
        int ref, rc;
        grant_ref_t head;
 
+       BUG_ON(!initialised_grant_table);
+
        spin_lock_irqsave(&gnttab_list_lock, flags);
 
        if ((gnttab_free_count < count) &&
@@ -152,66 +176,245 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
        if (unlikely((ref = get_free_entry()) < 0))
                return -ENOSPC;
 
-       shared[ref].frame = frame;
-       shared[ref].domid = domid;
-       wmb();
-       BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
-       shared[ref].flags = GTF_permit_access | flags;
+       gnttab_grant_foreign_access_ref(ref, domid, frame, flags);
 
        return ref;
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+                                       int flags, unsigned page_off,
+                                       unsigned length)
+{
+       int ref;
+
+       if (unlikely((ref = get_free_entry()) < 0))
+               return -ENOSPC;
+
+       gnttab_grant_foreign_access_ref_subpage(ref, domid, frame, flags,
+                                               page_off, length);
+
+       return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
+
+void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid,
+                                            unsigned long frame, int flags,
+                                            unsigned page_off,
+                                            unsigned length)
+{
+       BUG_ON(flags & (GTF_accept_transfer | GTF_reading |
+                       GTF_writing | GTF_sub_page | GTF_permit_access));
+       BUG_ON(grant_table_version == 1);
+       shared.v2[ref].sub_page.frame = frame;
+       shared.v2[ref].sub_page.page_off = page_off;
+       shared.v2[ref].sub_page.length = length;
+       shared.v2[ref].hdr.domid = domid;
+       wmb();
+       shared.v2[ref].hdr.flags = GTF_permit_access | GTF_sub_page | flags;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_subpage);
+
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+                                     domid_t trans_domid,
+                                     grant_ref_t trans_gref)
+{
+       int ref;
+
+       if (unlikely((ref = get_free_entry()) < 0))
+               return -ENOSPC;
+
+       gnttab_grant_foreign_access_ref_trans(ref, domid, flags,
+                                             trans_domid, trans_gref);
+
+       return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
+
+void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid,
+                                          int flags,
+                                          domid_t trans_domid,
+                                          grant_ref_t trans_gref)
+{
+       BUG_ON(flags & (GTF_accept_transfer | GTF_reading |
+                       GTF_writing | GTF_sub_page | GTF_permit_access));
+       BUG_ON(grant_table_version == 1);
+       shared.v2[ref].transitive.trans_domid = trans_domid;
+       shared.v2[ref].transitive.gref = trans_gref;
+       shared.v2[ref].hdr.domid = domid;
+       wmb();
+       shared.v2[ref].hdr.flags = GTF_permit_access | GTF_transitive | flags;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_trans);
+
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int flags)
 {
-       shared[ref].frame = frame;
-       shared[ref].domid = domid;
-       wmb();
-       BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
-       shared[ref].flags = GTF_permit_access | flags;
+       BUG_ON(flags & (GTF_accept_transfer | GTF_reading |
+                       GTF_writing | GTF_sub_page));
+       if (grant_table_version == 1) {
+               shared.v1[ref].frame = frame;
+               shared.v1[ref].domid = domid;
+               wmb();
+               shared.v1[ref].flags = GTF_permit_access | flags;
+       } else {
+               shared.v2[ref].frame = frame;
+               shared.v2[ref].hdr.domid = domid;
+               wmb();
+               shared.v2[ref].hdr.flags = GTF_permit_access | flags;
+       }
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
 
+int gnttab_subpage_grants_available(void)
+{
+       return grant_table_version == 2;
+}
+EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
 
 int gnttab_query_foreign_access(grant_ref_t ref)
 {
        u16 nflags;
 
-       nflags = shared[ref].flags;
+       if (grant_table_version == 1)
+               nflags = shared.v1[ref].flags;
+       else
+               nflags = grstatus[ref];
 
        return (nflags & (GTF_reading|GTF_writing));
 }
 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
 
-int gnttab_end_foreign_access_ref(grant_ref_t ref)
+static int _gnttab_end_foreign_access_ref(grant_ref_t ref)
 {
        u16 flags, nflags;
-
-       nflags = shared[ref].flags;
-       do {
-               if ((flags = nflags) & (GTF_reading|GTF_writing)) {
-                       printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
+       u16 *pflags;
+
+       if (grant_table_version == 1) {
+               pflags = &shared.v1[ref].flags;
+               nflags = *pflags;
+               do {
+                       if ((flags = nflags) & (GTF_reading|GTF_writing)) {
+                               return 0;
+                       }
+               } while ((nflags = synch_cmpxchg_subword(pflags, flags, 0)) !=
+                        flags);
+               return 1;
+       } else {
+               shared.v2[ref].hdr.flags = 0;
+               mb();
+               if (grstatus[ref] & (GTF_reading|GTF_writing)) {
                        return 0;
+               } else {
+                       /* The read of grstatus needs to have acquire
+                          semantics.  On x86, reads already have
+                          that, and we just need to protect against
+                          compiler reorderings.  On other
+                          architectures we may need a full
+                          barrier. */
+#ifdef CONFIG_X86
+                       barrier();
+#else
+                       mb();
+#endif
+                       return 1;
                }
-       } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
-                flags);
+       }
+}
+
+int gnttab_end_foreign_access_ref(grant_ref_t gref)
+{
+       int res;
 
-       return 1;
+       res = _gnttab_end_foreign_access_ref(gref);
+       if (res == 0)
+               printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
+       return res;
 }
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
 
+static void pending_free_timer(unsigned long ignore)
+{
+       grant_ref_t gref, next_gref;
+       grant_ref_t prev; /* The last gref which we failed to release,
+                            or GNTTAB_LIST_END if there is no such
+                            gref. */
+       int need_mod_timer;
+       struct page *page, *next_page;
+
+       spin_lock(&gnttab_pending_free_lock);
+       prev = GNTTAB_LIST_END;
+       for (gref = gnttab_pending_free_gref_head;
+            gref != GNTTAB_LIST_END;
+            gref = next_gref) {
+               next_gref = gnttab_entry(gref);
+               if (_gnttab_end_foreign_access_ref(gref)) {
+                       put_free_entry(gref);
+                       if (prev != GNTTAB_LIST_END)
+                               gnttab_entry(prev) = next_gref;
+                       else
+                               gnttab_pending_free_gref_head = next_gref;
+               } else {
+                       prev = gref;
+               }
+       }
+       list_for_each_entry_safe(page, next_page,
+                                &gnttab_pending_free_pages, lru) {
+               gref = page->index;
+               if (_gnttab_end_foreign_access_ref(gref)) {
+                       list_del(&page->lru);
+                       put_free_entry(gref);
+                       /* The page hasn't been used in this domain
+                          for more than a second, so it's probably
+                          cold. */
+                       if (put_page_testzero(page)) {
+#ifdef MODULE
+                               __free_page(page);
+#else
+                               free_cold_page(page);
+#endif
+                       }
+               }
+       }
+
+       need_mod_timer =
+               (gnttab_pending_free_gref_head != GNTTAB_LIST_END) ||
+               !list_empty(&gnttab_pending_free_pages);
+       spin_unlock(&gnttab_pending_free_lock);
+
+       if (need_mod_timer)
+               mod_timer(&gnttab_delayed_free_timer, jiffies + HZ);
+}
+
 void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page)
 {
-       if (gnttab_end_foreign_access_ref(ref)) {
+       int need_mod_timer;
+       struct page *page_struct;
+
+       if (_gnttab_end_foreign_access_ref(ref)) {
                put_free_entry(ref);
                if (page != 0)
                        free_page(page);
        } else {
-               /* XXX This needs to be fixed so that the ref and page are
-                  placed on a list to be freed up later. */
-               printk(KERN_DEBUG
-                      "WARNING: leaking g.e. and page still in use!\n");
+               spin_lock_bh(&gnttab_pending_free_lock);
+               if (page == 0) {
+                       if (gnttab_pending_free_gref_head == GNTTAB_LIST_END)
+                               need_mod_timer = 1;
+                       else
+                               need_mod_timer = 0;
+                       gnttab_entry(ref) = gnttab_pending_free_gref_head;
+                       gnttab_pending_free_gref_head = ref;
+               } else {
+                       need_mod_timer =
+                               list_empty(&gnttab_pending_free_pages);
+                       page_struct = virt_to_page((void *)page);
+                       page_struct->index = ref;
+                       list_add_tail(&page_struct->lru,
+                                     &gnttab_pending_free_pages);
+               }
+               spin_unlock_bh(&gnttab_pending_free_lock);
+               if (need_mod_timer)
+                       mod_timer(&gnttab_delayed_free_timer, jiffies + HZ);
        }
 }
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
@@ -231,37 +434,53 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
                                       unsigned long pfn)
 {
-       shared[ref].frame = pfn;
-       shared[ref].domid = domid;
-       wmb();
-       shared[ref].flags = GTF_accept_transfer;
+       if (grant_table_version == 1) {
+               shared.v1[ref].frame = pfn;
+               shared.v1[ref].domid = domid;
+               wmb();
+               shared.v1[ref].flags = GTF_accept_transfer;
+       } else {
+               shared.v2[ref].frame = pfn;
+               shared.v2[ref].hdr.domid = domid;
+               wmb();
+               shared.v2[ref].hdr.flags = GTF_accept_transfer;
+       }
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
 {
        unsigned long frame;
-       u16           flags;
+       u16           flags;
+       u16          *pflags;
+
+       if (grant_table_version == 1)
+               pflags = &shared.v1[ref].flags;
+       else
+               pflags = &shared.v2[ref].hdr.flags;
 
        /*
         * If a transfer is not even yet started, try to reclaim the grant
         * reference and return failure (== 0).
         */
-       while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
-               if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags)
+       while (!((flags = *pflags) & GTF_transfer_committed)) {
+               if (synch_cmpxchg_subword(pflags, flags, 0) == flags)
                        return 0;
                cpu_relax();
        }
 
        /* If a transfer is in progress then wait until it is completed. */
        while (!(flags & GTF_transfer_completed)) {
-               flags = shared[ref].flags;
+               flags = *pflags;
                cpu_relax();
        }
 
        /* Read the frame number /after/ reading completion status. */
        rmb();
-       frame = shared[ref].frame;
+       if (grant_table_version == 1)
+               frame = shared.v1[ref].frame;
+       else
+               frame = shared.v2[ref].frame;
        BUG_ON(frame == 0);
 
        return frame;
@@ -435,6 +654,30 @@ static inline unsigned int max_nr_grant_frames(void)
        return xen_max;
 }
 
+static void gnttab_request_version(void)
+{
+       int rc;
+       struct gnttab_set_version gsv;
+
+       gsv.version = 2;
+       rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+       if (rc == 0) {
+               grant_table_version = 2;
+               printk("<0>Grant table version 2.\n");
+       } else {
+               if (grant_table_version == 2) {
+                       /* If we've already used version 2 features,
+                          but then suddenly discover that they're not
+                          available (e.g. migrating to an older
+                          version of Xen), almost unbounded badness
+                          can happen. */
+                       panic("we need grant tables version 2, but only version 1 is available");
+               }
+               grant_table_version = 1;
+               printk("<0>Grant table version 1.\n");
+       }
+}
+
 #ifdef CONFIG_XEN
 
 static DEFINE_SEQLOCK(gnttab_dma_lock);
@@ -450,6 +693,16 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
        return 0;
 }
 
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+                            unsigned long addr, void *data)
+{
+       uint64_t **frames = (uint64_t **)data;
+
+       set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
+       (*frames)++;
+       return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
                        unsigned long addr, void *data)
@@ -467,43 +720,94 @@ void *arch_gnttab_alloc_shared(unsigned long *frames)
        BUG_ON(area == NULL);
        return area->addr;
 }
+
+void *arch_gnttab_alloc_status(unsigned long *frames)
+{
+       struct vm_struct *area;
+       area = alloc_vm_area(PAGE_SIZE * 
+                            nr_status_frames(boot_max_nr_grant_frames));
+       BUG_ON(area == NULL);
+       return area->addr;
+}
 #endif /* CONFIG_X86 */
 
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
        struct gnttab_setup_table setup;
-       unsigned long *frames;
+       unsigned long *gframes, *sframes;
        unsigned int nr_gframes = end_idx + 1;
+       unsigned int nr_sframes;
        int rc;
 
-       frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
-       if (!frames)
+       BUG_ON(grant_table_version == 0);
+
+       gframes = kmalloc(nr_gframes  * sizeof(unsigned long), GFP_ATOMIC);
+       if (!gframes)
                return -ENOMEM;
 
-       setup.dom        = DOMID_SELF;
-       setup.nr_frames  = nr_gframes;
-       set_xen_guest_handle(setup.frame_list, frames);
+       setup.dom        = DOMID_SELF;
+       setup.nr_frames  = nr_gframes;
+       set_xen_guest_handle(setup.frame_list, gframes);
 
        rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
        if (rc == -ENOSYS) {
-               kfree(frames);
+               kfree(gframes);
                return -ENOSYS;
        }
 
        BUG_ON(rc || setup.status);
 
-       if (shared == NULL)
-               shared = arch_gnttab_alloc_shared(frames);
+       if (shared.raw == NULL)
+               shared.raw = arch_gnttab_alloc_shared(gframes);
+
+       if (grant_table_version > 1) {
+               struct gnttab_get_status_frames getframes;
+
+               nr_sframes= nr_status_frames(nr_gframes);
+
+               sframes = kmalloc(nr_sframes  * sizeof(unsigned long), 
+                                 GFP_ATOMIC);
+               if (!sframes) {
+                       kfree(gframes);
+                       return -ENOMEM;
+               }
+               getframes.dom        = DOMID_SELF;
+               getframes.nr_frames  = nr_sframes;
+               getframes.frame_list = (unsigned long)sframes;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, 
+                                              &getframes, 1);
+               if (rc == -ENOSYS) {
+                       kfree(gframes);
+                       kfree(sframes);
+                       return -ENOSYS;
+               }
+
+               BUG_ON(rc || getframes.status);
+
+               if (grstatus == NULL)
+                       grstatus = arch_gnttab_alloc_status(sframes);
+       }
 
 #ifdef CONFIG_X86
-       rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+       rc = apply_to_page_range(&init_mm, (unsigned long)shared.raw,
                                 PAGE_SIZE * nr_gframes,
-                                map_pte_fn, &frames);
+                                map_pte_fn, &gframes);
        BUG_ON(rc);
-       frames -= nr_gframes; /* adjust after map_pte_fn() */
+       gframes -= nr_gframes; /* adjust after map_pte_fn() */
+
+       if (grant_table_version > 1) {
+               rc = apply_to_page_range(&init_mm, (unsigned long)grstatus,
+                                        PAGE_SIZE * nr_sframes,
+                                        map_pte_fn_status, &sframes);
+               BUG_ON(rc);
+               sframes -= nr_sframes; /* adjust after map_pte_fn() */
+       }
 #endif /* CONFIG_X86 */
 
-       kfree(frames);
+       kfree(gframes);
+       if (grant_table_version > 1)
+               kfree(sframes);
 
        return 0;
 }
@@ -589,6 +893,7 @@ int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
 
        new_page->mapping = page->mapping;
        new_page->index = page->index;
+       new_page->private = page->private;
        set_bit(PG_foreign, &new_page->flags);
        *pagep = new_page;
 
@@ -709,6 +1014,7 @@ EXPORT_SYMBOL(gnttab_post_map_adjust);
 
 static int gnttab_resume(struct sys_device *dev)
 {
+       gnttab_request_version();
        if (max_nr_grant_frames() < nr_grant_frames)
                return -ENOSYS;
        return gnttab_map(0, nr_grant_frames - 1);
@@ -719,9 +1025,12 @@ static int gnttab_resume(struct sys_device *dev)
 #ifdef CONFIG_X86
 static int gnttab_suspend(struct sys_device *dev, pm_message_t state)
 {
-       apply_to_page_range(&init_mm, (unsigned long)shared,
+       apply_to_page_range(&init_mm, (unsigned long)shared.raw,
                            PAGE_SIZE * nr_grant_frames,
                            unmap_pte_fn, NULL);
+       apply_to_page_range(&init_mm, (unsigned long)grstatus,
+                           PAGE_SIZE * nr_status_frames(nr_grant_frames),
+                           unmap_pte_fn, NULL);
        return 0;
 }
 #else
@@ -744,7 +1053,8 @@ static struct sys_device device_gnttab = {
 
 #include <platform-pci.h>
 
-static unsigned long resume_frames;
+static unsigned long resume_frames_gnttab;
+static unsigned long resume_frames_status;
 
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
@@ -758,7 +1068,24 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
                xatp.domid = DOMID_SELF;
                xatp.idx = i;
                xatp.space = XENMAPSPACE_grant_table;
-               xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
+               xatp.gpfn = (resume_frames_gnttab >> PAGE_SHIFT) + i;
+               if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+                       BUG();
+       } while (i-- > start_idx);
+
+       return 0;
+}
+
+static int gnttab_map_status(unsigned int start_idx, unsigned int end_idx)
+{
+       struct xen_add_to_physmap xatp;
+       unsigned int i = end_idx;
+
+       do {
+               xatp.domid = DOMID_SELF;
+               xatp.idx = i | XENMAPIDX_grant_table_status;
+               xatp.space = XENMAPSPACE_grant_table;
+               xatp.gpfn = (resume_frames_status >> PAGE_SHIFT) + i;
                if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
                        BUG();
        } while (i-- > start_idx);
@@ -769,16 +1096,21 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 int gnttab_resume(void)
 {
        unsigned int max_nr_gframes, nr_gframes;
+       unsigned int max_nr_sframes, nr_sframes;
+
+       gnttab_request_version();
 
        nr_gframes = nr_grant_frames;
        max_nr_gframes = max_nr_grant_frames();
        if (max_nr_gframes < nr_gframes)
                return -ENOSYS;
 
-       if (!resume_frames) {
-               resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
-               shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
-               if (shared == NULL) {
+       if (!resume_frames_gnttab) {
+               resume_frames_gnttab =
+                       alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+               shared.raw = ioremap(resume_frames_gnttab,
+                                    PAGE_SIZE * max_nr_gframes);
+               if (shared.raw == NULL) {
                        printk("error to ioremap gnttab share frames\n");
                        return -1;
                }
@@ -786,6 +1118,22 @@ int gnttab_resume(void)
 
        gnttab_map(0, nr_gframes - 1);
 
+       if (grant_table_version > 1) {
+               nr_sframes = nr_status_frames(nr_gframes);
+               max_nr_sframes = nr_status_frames(max_nr_gframes);
+               if (!resume_frames_status) {
+                       resume_frames_status =
+                               alloc_xen_mmio(PAGE_SIZE * max_nr_sframes);
+                       grstatus = ioremap(resume_frames_status,
+                                          PAGE_SIZE * max_nr_sframes);
+                       if (grstatus == NULL) {
+                               printk("error ioremap()ing gnttab status frames\n");
+                               return -1;
+                       }
+               }
+
+               gnttab_map_status(0, nr_sframes - 1);
+       }
        return 0;
 }
 
@@ -872,6 +1220,8 @@ int __devinit gnttab_init(void)
        }
 #endif
 
+       initialised_grant_table = 1;
+
        return 0;
 
  ini_nomem:
index bde65fda0dc5146c3d290e5021d1ce5eb80952e5..a5277357e78f944524ed5d590c7332dfea882094 100644 (file)
@@ -53,6 +53,19 @@ struct gnttab_free_callback {
 
 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
                                int flags);
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+                                        int flags, unsigned page_off,
+                                        unsigned length);
+void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid,
+                                           int flags,
+                                           domid_t trans_domid,
+                                           grant_ref_t trans_gref);
+
+/*
+ * Are sub-page grants available on this version of Xen?  Returns 1 if
+ * they are, and 0 if they're not.
+ */
+int gnttab_subpage_grants_available(void);
 
 /*
  * End access through the given grant reference, iff the grant entry is no
@@ -98,6 +111,14 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
 
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int flags);
+void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid,
+                                             unsigned long frame, int flags,
+                                             unsigned page_off,
+                                             unsigned length);
+void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid,
+                                           int flags,
+                                           domid_t trans_domid,
+                                           grant_ref_t trans_gref);
 
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
                                       unsigned long pfn);
index c5c2044da447fcd2731ad26f3cf6b05087acdbf3..1c3a7c79d036340d1549d34f78c14db61fdd0e35 100644 (file)
  *  Use SMP-safe bit-setting instruction.
  */
 
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
 /*
  * A grant table comprises a packed array of grant entries in one or more
  * page frames shared between Xen and a guest.
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility.  New guests should use version 2.
+ */
+struct grant_entry_v1 {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
     uint16_t flags;
     /* The domain being granted foreign privileges. [GST] */
@@ -100,7 +110,7 @@ struct grant_entry {
      */
     uint32_t frame;
 };
-typedef struct grant_entry grant_entry_t;
+typedef struct grant_entry_v1 grant_entry_v1_t;
 
 /*
  * Type of grant entry.
@@ -108,10 +118,13 @@ typedef struct grant_entry grant_entry_t;
  *  GTF_permit_access: Allow @domid to map/access @frame.
  *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
  *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
  */
 #define GTF_invalid         (0U<<0)
 #define GTF_permit_access   (1U<<0)
 #define GTF_accept_transfer (2U<<0)
+#define GTF_transitive      (3U<<0)
 #define GTF_type_mask       (3U<<0)
 
 /*
@@ -120,6 +133,9 @@ typedef struct grant_entry grant_entry_t;
  *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
  *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
  *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
  */
 #define _GTF_readonly       (2)
 #define GTF_readonly        (1U<<_GTF_readonly)
@@ -133,6 +149,8 @@ typedef struct grant_entry grant_entry_t;
 #define GTF_PCD             (1U<<_GTF_PCD)
 #define _GTF_PAT            (7)
 #define GTF_PAT             (1U<<_GTF_PAT)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U<<_GTF_sub_page)
 
 /*
  * Subflags for GTF_accept_transfer:
@@ -149,15 +167,76 @@ typedef struct grant_entry grant_entry_t;
 #define _GTF_transfer_completed (3)
 #define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
 
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
 
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
  */
+struct grant_entry_header {
+    uint16_t flags;
+    domid_t  domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
 
 /*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+struct grant_entry_v2 {
+    grant_entry_header_t hdr;
+    union {
+        /*
+         * The frame to which we are granting access.  This field has
+         * the same meaning as the grant_entry_v1 field of the same
+         * name.
+         */
+        uint32_t frame;
+
+        /*
+         * If the grant type is GTF_grant_access and GTF_sub_page is
+         * set, @domid is allowed to access bytes
+         * [@page_off,@page_off+@length) in frame @frame.
+         */
+        struct {
+            uint32_t frame;
+            uint16_t page_off;
+            uint16_t length;
+        } sub_page;
+
+        /*
+         * If the grant is GTF_transitive, @domid is allowed to use
+         * the grant @gref in domain @trans_domid, as if it was the
+         * local domain.  Obviously, the transitive access must be
+         * compatible with the original grant.
+         *
+         * The current version of Xen does not allow transitive grants
+         * to be mapped.
+         */
+        struct {
+            domid_t trans_domid;
+            uint16_t pad0;
+            grant_ref_t gref;
+        } transitive;
+
+        uint32_t __spacer[3]; /* Pad to a power of two */
+    };
+};
+typedef struct grant_entry_v2 grant_entry_v2_t;
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
  */
-typedef uint32_t grant_ref_t;
 
 /*
  * Handle to track a mapping created via a grant reference.
@@ -365,6 +444,46 @@ struct gnttab_unmap_and_replace {
 typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
 DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
 
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation can only be performed
+ * once in any given domain.  It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version          8
+struct gnttab_set_version {
+    /* IN parameters */
+    uint32_t version;
+};
+DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_set_version);
+typedef struct gnttab_set_version gnttab_set_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated 
+ * from the other shared grant fields to allow more efficient synchronization 
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+  */
+#define GNTTABOP_get_status_frames     9
+struct gnttab_get_status_frames {
+    /* IN parameters. */
+    uint32_t nr_frames;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    uint64_t frame_list;
+};
+DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
 
 /*
  * Bitfield values for gnttab_map_grant_ref.flags.