From: Steven Smith Date: Thu, 28 May 2009 14:02:36 +0000 (+0100) Subject: V2 grant table support. X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=ab766802ed26ab1ee9c7a2308deb3410606ca23d;p=people%2Fssmith%2Fnc2-2.6.27.git V2 grant table support. --- diff --git a/drivers/xen/core/gnttab.c b/drivers/xen/core/gnttab.c index cea08c0c..4cb3ad4f 100644 --- a/drivers/xen/core/gnttab.c +++ b/drivers/xen/core/gnttab.c @@ -53,19 +53,36 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) +#define ENTRIES_PER_GRANT_FRAME (grant_table_version == 1 ? \ + (PAGE_SIZE / sizeof(grant_entry_v1_t)) : \ + (PAGE_SIZE / sizeof(grant_entry_v2_t))) + +static void pending_free_timer(unsigned long ignore); static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; +static int initialised_grant_table; static grant_ref_t gnttab_free_head; +static grant_ref_t gnttab_pending_free_gref_head = GNTTAB_LIST_END; +static LIST_HEAD(gnttab_pending_free_pages); +static DEFINE_TIMER(gnttab_delayed_free_timer, pending_free_timer, 0, 0); +static DEFINE_SPINLOCK(gnttab_pending_free_lock); static DEFINE_SPINLOCK(gnttab_list_lock); -static struct grant_entry *shared; +static union { + grant_entry_v1_t *v1; + grant_entry_v2_t *v2; + void *raw; +} shared; + +static grant_status_t *grstatus; static struct gnttab_free_callback *gnttab_free_callback_list; +static int grant_table_version; + static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) @@ -74,12 +91,19 @@ static int gnttab_expand(unsigned int req_entries); #define nr_freelist_frames(grant_frames) \ (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP) +#define SPP (PAGE_SIZE / sizeof(grant_status_t)) +#define nr_status_frames(grant_frames) \ + (((grant_frames) * ENTRIES_PER_GRANT_FRAME + SPP - 1) / SPP) + + static int get_free_entries(int count) { unsigned long flags; int ref, rc; grant_ref_t head; + BUG_ON(!initialised_grant_table); + spin_lock_irqsave(&gnttab_list_lock, flags); if ((gnttab_free_count < count) && @@ -152,66 +176,245 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, if (unlikely((ref = get_free_entry()) < 0)) return -ENOSPC; - shared[ref].frame = frame; - shared[ref].domid = domid; - wmb(); - BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); - shared[ref].flags = GTF_permit_access | flags; + gnttab_grant_foreign_access_ref(ref, domid, frame, flags); return ref; } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, + int flags, unsigned page_off, + unsigned length) +{ + int ref; + + if (unlikely((ref = get_free_entry()) < 0)) + return -ENOSPC; + + gnttab_grant_foreign_access_ref_subpage(ref, domid, frame, flags, + page_off, length); + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage); + +void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page | GTF_permit_access)); + BUG_ON(grant_table_version == 1); + shared.v2[ref].sub_page.frame = frame; + shared.v2[ref].sub_page.page_off = page_off; + shared.v2[ref].sub_page.length = length; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_permit_access | GTF_sub_page | flags; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_subpage); + +int gnttab_grant_foreign_access_trans(domid_t domid, int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + int ref; + + if (unlikely((ref = get_free_entry()) < 0)) + return -ENOSPC; + + gnttab_grant_foreign_access_ref_trans(ref, domid, flags, + trans_domid, trans_gref); + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans); + +void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid, + int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page | GTF_permit_access)); + BUG_ON(grant_table_version == 1); + shared.v2[ref].transitive.trans_domid = trans_domid; + shared.v2[ref].transitive.gref = trans_gref; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_permit_access | GTF_transitive | flags; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_trans); + void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int flags) { - shared[ref].frame = frame; - shared[ref].domid = domid; - wmb(); - BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing)); - shared[ref].flags = GTF_permit_access | flags; + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page)); + if (grant_table_version == 1) { + shared.v1[ref].frame = frame; + shared.v1[ref].domid = domid; + wmb(); + shared.v1[ref].flags = GTF_permit_access | flags; + } else { + shared.v2[ref].frame = frame; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_permit_access | flags; + } } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); +int gnttab_subpage_grants_available(void) +{ + return grant_table_version == 2; +} +EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); int gnttab_query_foreign_access(grant_ref_t ref) { u16 nflags; - nflags = shared[ref].flags; + if (grant_table_version == 1) + nflags = shared.v1[ref].flags; + else + nflags = grstatus[ref]; return (nflags & (GTF_reading|GTF_writing)); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); -int gnttab_end_foreign_access_ref(grant_ref_t ref) +static int _gnttab_end_foreign_access_ref(grant_ref_t ref) { u16 flags, nflags; - - nflags = shared[ref].flags; - do { - if ((flags = nflags) & (GTF_reading|GTF_writing)) { - printk(KERN_DEBUG "WARNING: g.e. still in use!\n"); + u16 *pflags; + + if (grant_table_version == 1) { + pflags = &shared.v1[ref].flags; + nflags = *pflags; + do { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { + return 0; + } + } while ((nflags = synch_cmpxchg_subword(pflags, flags, 0)) != + flags); + return 1; + } else { + shared.v2[ref].hdr.flags = 0; + mb(); + if (grstatus[ref] & (GTF_reading|GTF_writing)) { return 0; + } else { + /* The read of grstatus needs to have acquire + semantics. On x86, reads already have + that, and we just need to protect against + compiler reorderings. On other + architectures we may need a full + barrier. */ +#ifdef CONFIG_X86 + barrier(); +#else + mb(); +#endif + return 1; } - } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) != - flags); + } +} + +int gnttab_end_foreign_access_ref(grant_ref_t gref) +{ + int res; - return 1; + res = _gnttab_end_foreign_access_ref(gref); + if (res == 0) + printk(KERN_DEBUG "WARNING: g.e. still in use!\n"); + return res; } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +static void pending_free_timer(unsigned long ignore) +{ + grant_ref_t gref, next_gref; + grant_ref_t prev; /* The last gref which we failed to release, + or GNTTAB_LIST_END if there is no such + gref. */ + int need_mod_timer; + struct page *page, *next_page; + + spin_lock(&gnttab_pending_free_lock); + prev = GNTTAB_LIST_END; + for (gref = gnttab_pending_free_gref_head; + gref != GNTTAB_LIST_END; + gref = next_gref) { + next_gref = gnttab_entry(gref); + if (_gnttab_end_foreign_access_ref(gref)) { + put_free_entry(gref); + if (prev != GNTTAB_LIST_END) + gnttab_entry(prev) = next_gref; + else + gnttab_pending_free_gref_head = next_gref; + } else { + prev = gref; + } + } + list_for_each_entry_safe(page, next_page, + &gnttab_pending_free_pages, lru) { + gref = page->index; + if (_gnttab_end_foreign_access_ref(gref)) { + list_del(&page->lru); + put_free_entry(gref); + /* The page hasn't been used in this domain + for more than a second, so it's probably + cold. */ + if (put_page_testzero(page)) { +#ifdef MODULE + __free_page(page); +#else + free_cold_page(page); +#endif + } + } + } + + need_mod_timer = + (gnttab_pending_free_gref_head != GNTTAB_LIST_END) || + !list_empty(&gnttab_pending_free_pages); + spin_unlock(&gnttab_pending_free_lock); + + if (need_mod_timer) + mod_timer(&gnttab_delayed_free_timer, jiffies + HZ); +} + void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page) { - if (gnttab_end_foreign_access_ref(ref)) { + int need_mod_timer; + struct page *page_struct; + + if (_gnttab_end_foreign_access_ref(ref)) { put_free_entry(ref); if (page != 0) free_page(page); } else { - /* XXX This needs to be fixed so that the ref and page are - placed on a list to be freed up later. */ - printk(KERN_DEBUG - "WARNING: leaking g.e. and page still in use!\n"); + spin_lock_bh(&gnttab_pending_free_lock); + if (page == 0) { + if (gnttab_pending_free_gref_head == GNTTAB_LIST_END) + need_mod_timer = 1; + else + need_mod_timer = 0; + gnttab_entry(ref) = gnttab_pending_free_gref_head; + gnttab_pending_free_gref_head = ref; + } else { + need_mod_timer = + list_empty(&gnttab_pending_free_pages); + page_struct = virt_to_page((void *)page); + page_struct->index = ref; + list_add_tail(&page_struct->lru, + &gnttab_pending_free_pages); + } + spin_unlock_bh(&gnttab_pending_free_lock); + if (need_mod_timer) + mod_timer(&gnttab_delayed_free_timer, jiffies + HZ); } } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); @@ -231,37 +434,53 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { - shared[ref].frame = pfn; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = GTF_accept_transfer; + if (grant_table_version == 1) { + shared.v1[ref].frame = pfn; + shared.v1[ref].domid = domid; + wmb(); + shared.v1[ref].flags = GTF_accept_transfer; + } else { + shared.v2[ref].frame = pfn; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_accept_transfer; + } } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; - u16 flags; + u16 flags; + u16 *pflags; + + if (grant_table_version == 1) + pflags = &shared.v1[ref].flags; + else + pflags = &shared.v2[ref].hdr.flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags) + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (synch_cmpxchg_subword(pflags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; + flags = *pflags; cpu_relax(); } /* Read the frame number /after/ reading completion status. */ rmb(); - frame = shared[ref].frame; + if (grant_table_version == 1) + frame = shared.v1[ref].frame; + else + frame = shared.v2[ref].frame; BUG_ON(frame == 0); return frame; @@ -435,6 +654,30 @@ static inline unsigned int max_nr_grant_frames(void) return xen_max; } +static void gnttab_request_version(void) +{ + int rc; + struct gnttab_set_version gsv; + + gsv.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); + if (rc == 0) { + grant_table_version = 2; + printk("<0>Grant table version 2.\n"); + } else { + if (grant_table_version == 2) { + /* If we've already used version 2 features, + but then suddenly discover that they're not + available (e.g. migrating to an older + version of Xen), almost unbounded badness + can happen. */ + panic("we need grant tables version 2, but only version 1 is available"); + } + grant_table_version = 1; + printk("<0>Grant table version 1.\n"); + } +} + #ifdef CONFIG_XEN static DEFINE_SEQLOCK(gnttab_dma_lock); @@ -450,6 +693,16 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page, return 0; } +static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + uint64_t **frames = (uint64_t **)data; + + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + #ifdef CONFIG_PM_SLEEP static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) @@ -467,43 +720,94 @@ void *arch_gnttab_alloc_shared(unsigned long *frames) BUG_ON(area == NULL); return area->addr; } + +void *arch_gnttab_alloc_status(unsigned long *frames) +{ + struct vm_struct *area; + area = alloc_vm_area(PAGE_SIZE * + nr_status_frames(boot_max_nr_grant_frames)); + BUG_ON(area == NULL); + return area->addr; +} #endif /* CONFIG_X86 */ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; - unsigned long *frames; + unsigned long *gframes, *sframes; unsigned int nr_gframes = end_idx + 1; + unsigned int nr_sframes; int rc; - frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); - if (!frames) + BUG_ON(grant_table_version == 0); + + gframes = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); + if (!gframes) return -ENOMEM; - setup.dom = DOMID_SELF; - setup.nr_frames = nr_gframes; - set_xen_guest_handle(setup.frame_list, frames); + setup.dom = DOMID_SELF; + setup.nr_frames = nr_gframes; + set_xen_guest_handle(setup.frame_list, gframes); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); if (rc == -ENOSYS) { - kfree(frames); + kfree(gframes); return -ENOSYS; } BUG_ON(rc || setup.status); - if (shared == NULL) - shared = arch_gnttab_alloc_shared(frames); + if (shared.raw == NULL) + shared.raw = arch_gnttab_alloc_shared(gframes); + + if (grant_table_version > 1) { + struct gnttab_get_status_frames getframes; + + nr_sframes= nr_status_frames(nr_gframes); + + sframes = kmalloc(nr_sframes * sizeof(unsigned long), + GFP_ATOMIC); + if (!sframes) { + kfree(gframes); + return -ENOMEM; + } + getframes.dom = DOMID_SELF; + getframes.nr_frames = nr_sframes; + getframes.frame_list = (unsigned long)sframes; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getframes, 1); + if (rc == -ENOSYS) { + kfree(gframes); + kfree(sframes); + return -ENOSYS; + } + + BUG_ON(rc || getframes.status); + + if (grstatus == NULL) + grstatus = arch_gnttab_alloc_status(sframes); + } #ifdef CONFIG_X86 - rc = apply_to_page_range(&init_mm, (unsigned long)shared, + rc = apply_to_page_range(&init_mm, (unsigned long)shared.raw, PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); + map_pte_fn, &gframes); BUG_ON(rc); - frames -= nr_gframes; /* adjust after map_pte_fn() */ + gframes -= nr_gframes; /* adjust after map_pte_fn() */ + + if (grant_table_version > 1) { + rc = apply_to_page_range(&init_mm, (unsigned long)grstatus, + PAGE_SIZE * nr_sframes, + map_pte_fn_status, &sframes); + BUG_ON(rc); + sframes -= nr_sframes; /* adjust after map_pte_fn() */ + } #endif /* CONFIG_X86 */ - kfree(frames); + kfree(gframes); + if (grant_table_version > 1) + kfree(sframes); return 0; } @@ -589,6 +893,7 @@ int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) new_page->mapping = page->mapping; new_page->index = page->index; + new_page->private = page->private; set_bit(PG_foreign, &new_page->flags); *pagep = new_page; @@ -709,6 +1014,7 @@ EXPORT_SYMBOL(gnttab_post_map_adjust); static int gnttab_resume(struct sys_device *dev) { + gnttab_request_version(); if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); @@ -719,9 +1025,12 @@ static int gnttab_resume(struct sys_device *dev) #ifdef CONFIG_X86 static int gnttab_suspend(struct sys_device *dev, pm_message_t state) { - apply_to_page_range(&init_mm, (unsigned long)shared, + apply_to_page_range(&init_mm, (unsigned long)shared.raw, PAGE_SIZE * nr_grant_frames, unmap_pte_fn, NULL); + apply_to_page_range(&init_mm, (unsigned long)grstatus, + PAGE_SIZE * nr_status_frames(nr_grant_frames), + unmap_pte_fn, NULL); return 0; } #else @@ -744,7 +1053,8 @@ static struct sys_device device_gnttab = { #include -static unsigned long resume_frames; +static unsigned long resume_frames_gnttab; +static unsigned long resume_frames_status; static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { @@ -758,7 +1068,24 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; - xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; + xatp.gpfn = (resume_frames_gnttab >> PAGE_SHIFT) + i; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + } while (i-- > start_idx); + + return 0; +} + +static int gnttab_map_status(unsigned int start_idx, unsigned int end_idx) +{ + struct xen_add_to_physmap xatp; + unsigned int i = end_idx; + + do { + xatp.domid = DOMID_SELF; + xatp.idx = i | XENMAPIDX_grant_table_status; + xatp.space = XENMAPSPACE_grant_table; + xatp.gpfn = (resume_frames_status >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); } while (i-- > start_idx); @@ -769,16 +1096,21 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) int gnttab_resume(void) { unsigned int max_nr_gframes, nr_gframes; + unsigned int max_nr_sframes, nr_sframes; + + gnttab_request_version(); nr_gframes = nr_grant_frames; max_nr_gframes = max_nr_grant_frames(); if (max_nr_gframes < nr_gframes) return -ENOSYS; - if (!resume_frames) { - resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); - shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); - if (shared == NULL) { + if (!resume_frames_gnttab) { + resume_frames_gnttab = + alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); + shared.raw = ioremap(resume_frames_gnttab, + PAGE_SIZE * max_nr_gframes); + if (shared.raw == NULL) { printk("error to ioremap gnttab share frames\n"); return -1; } @@ -786,6 +1118,22 @@ int gnttab_resume(void) gnttab_map(0, nr_gframes - 1); + if (grant_table_version > 1) { + nr_sframes = nr_status_frames(nr_gframes); + max_nr_sframes = nr_status_frames(max_nr_gframes); + if (!resume_frames_status) { + resume_frames_status = + alloc_xen_mmio(PAGE_SIZE * max_nr_sframes); + grstatus = ioremap(resume_frames_status, + PAGE_SIZE * max_nr_sframes); + if (grstatus == NULL) { + printk("error ioremap()ing gnttab status frames\n"); + return -1; + } + } + + gnttab_map_status(0, nr_sframes - 1); + } return 0; } @@ -872,6 +1220,8 @@ int __devinit gnttab_init(void) } #endif + initialised_grant_table = 1; + return 0; ini_nomem: diff --git a/include/xen/gnttab.h b/include/xen/gnttab.h index bde65fda..a5277357 100644 --- a/include/xen/gnttab.h +++ b/include/xen/gnttab.h @@ -53,6 +53,19 @@ struct gnttab_free_callback { int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int flags); +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, + int flags, unsigned page_off, + unsigned length); +void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid, + int flags, + domid_t trans_domid, + grant_ref_t trans_gref); + +/* + * Are sub-page grants available on this version of Xen? Returns 1 if + * they are, and 0 if they're not. + */ +int gnttab_subpage_grants_available(void); /* * End access through the given grant reference, iff the grant entry is no @@ -98,6 +111,14 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int flags); +void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length); +void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid, + int flags, + domid_t trans_domid, + grant_ref_t trans_gref); void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, unsigned long pfn); diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index c5c2044d..1c3a7c79 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -83,13 +83,23 @@ * Use SMP-safe bit-setting instruction. */ +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + /* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -100,7 +110,7 @@ struct grant_entry { */ uint32_t frame; }; -typedef struct grant_entry grant_entry_t; +typedef struct grant_entry_v1 grant_entry_v1_t; /* * Type of grant entry. @@ -108,10 +118,13 @@ typedef struct grant_entry grant_entry_t; * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -120,6 +133,9 @@ typedef struct grant_entry grant_entry_t; * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -133,6 +149,8 @@ typedef struct grant_entry grant_entry_t; #define GTF_PCD (1U<<_GTF_PCD) #define _GTF_PAT (7) #define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -149,15 +167,76 @@ typedef struct grant_entry grant_entry_t; #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ -/*********************************** - * GRANT TABLE QUERIES AND USES +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; /* - * Reference to a grant entry in a specified domain's grant table. + * Version 2 of the grant entry structure. + */ +struct grant_entry_v2 { + grant_entry_header_t hdr; + union { + /* + * The frame to which we are granting access. This field has + * the same meaning as the grant_entry_v1 field of the same + * name. + */ + uint32_t frame; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is + * set, @domid is allowed to access bytes + * [@page_off,@page_off+@length) in frame @frame. + */ + struct { + uint32_t frame; + uint16_t page_off; + uint16_t length; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use + * the grant @gref in domain @trans_domid, as if it was the + * local domain. Obviously, the transitive access must be + * compatible with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[3]; /* Pad to a power of two */ + }; +}; +typedef struct grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES */ -typedef uint32_t grant_ref_t; /* * Handle to track a mapping created via a grant reference. @@ -365,6 +444,46 @@ struct gnttab_unmap_and_replace { typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN parameters */ + uint32_t version; +}; +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_set_version); +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + uint64_t frame_list; +}; +DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); /* * Bitfield values for gnttab_map_grant_ref.flags.