From 254c1e78a6b14b2a9d376a1eb3212fa57b699749 Mon Sep 17 00:00:00 2001 From: Steven Smith Date: Wed, 15 Apr 2009 14:02:12 +0100 Subject: [PATCH] Add support for v2 grant tables. --- arch/x86/xen/grant-table.c | 16 +- drivers/xen/grant-table.c | 244 +++++++++++++++++++++++----- include/xen/grant_table.h | 23 ++- include/xen/interface/grant_table.h | 120 +++++++++++++- 4 files changed, 343 insertions(+), 60 deletions(-) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 49ba9b5224d..db42957a5da 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -62,30 +62,28 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, return 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - struct grant_entry **__shared) +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_frames, + unsigned long max_nr_frames, void **__shared) { int rc; - struct grant_entry *shared = *__shared; + void *shared = *__shared; if (shared == NULL) { struct vm_struct *area = - xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); + xen_alloc_vm_area(PAGE_SIZE * max_nr_frames); BUG_ON(area == NULL); shared = area->addr; *__shared = shared; } rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, + PAGE_SIZE * nr_frames, map_pte_fn, &frames); return rc; } -void arch_gnttab_unmap_shared(struct grant_entry *shared, - unsigned long nr_gframes) +void arch_gnttab_unmap_shared(void *shared, unsigned long nr_frames) { apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + PAGE_SIZE * nr_frames, unmap_pte_fn, NULL); } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 385187ed7b8..08edcaeb487 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -49,7 +49,8 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) +#define GREFS_PER_GRANT_FRAME_V1 (PAGE_SIZE / sizeof(struct grant_entry_v1)) +#define GREFS_PER_GRANT_FRAME_V2 (PAGE_SIZE / sizeof(struct grant_entry_v2)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -58,13 +59,34 @@ static int gnttab_free_count; static grant_ref_t gnttab_free_head; static DEFINE_SPINLOCK(gnttab_list_lock); -static struct grant_entry *shared; +static union { + struct grant_entry_v1 *v1; + struct grant_entry_v2 *v2; +} shared; + +static grant_status_t *grstatus; static struct gnttab_free_callback *gnttab_free_callback_list; +static int grant_table_version; + static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define SPP (PAGE_SIZE / sizeof(grant_status_t)) + +static inline unsigned grefs_per_grant_frame(void) +{ + if (grant_table_version == 1) + return GREFS_PER_GRANT_FRAME_V1; + else + return GREFS_PER_GRANT_FRAME_V2; +} + +static inline unsigned nr_status_frames(unsigned grant_frames) +{ + return (grant_frames * grefs_per_grant_frame() + SPP - 1) / SPP; +} static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { @@ -149,10 +171,18 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid, * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. */ - shared[ref].frame = frame; - shared[ref].domid = domid; - wmb(); - shared[ref].flags = flags; + BUG_ON(flags & (GTF_reading | GTF_writing | GTF_sub_page)); + if (grant_table_version == 1) { + shared.v1[ref].frame = frame; + shared.v1[ref].domid = domid; + wmb(); + shared.v1[ref].flags = flags; + } else { + shared.v2[ref].frame = frame; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = flags; + } } /* @@ -180,30 +210,100 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); +void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page | GTF_permit_access)); + BUG_ON(grant_table_version == 1); + shared.v2[ref].sub_page.frame = frame; + shared.v2[ref].sub_page.page_off = page_off; + shared.v2[ref].sub_page.length = length; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_permit_access | GTF_sub_page | flags; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_subpage); + +void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid, + int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + BUG_ON(flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page | GTF_permit_access)); + BUG_ON(grant_table_version == 1); + shared.v2[ref].transitive.trans_domid = trans_domid; + shared.v2[ref].transitive.gref = trans_gref; + shared.v2[ref].hdr.domid = domid; + wmb(); + shared.v2[ref].hdr.flags = GTF_permit_access | GTF_transitive | flags; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_trans); + +int gnttab_subpage_grants_available(void) +{ + return grant_table_version == 2; +} +EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); + int gnttab_query_foreign_access(grant_ref_t ref) { u16 nflags; - nflags = shared[ref].flags; + if (grant_table_version == 1) + nflags = shared.v1[ref].flags; + else + nflags = grstatus[ref]; return (nflags & (GTF_reading|GTF_writing)); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); +/* The semantics of this function are subtly different between version + 1 and version 2 grant tables. On version 1, it's a no-op if the + reference is still in use, i.e. the grant remains completely valid. + On version 2, you go to a kind of half-revoked state, in which + extant references to the grant are still valid, but no further + references can be made. This shouldn't be a problem; users of this + interface almsot always guarantee that, provided the remote domain + is behaving itself, you'll never even try to end a busy grant. */ int gnttab_end_foreign_access_ref(grant_ref_t ref) { u16 flags, nflags; - nflags = shared[ref].flags; - do { - flags = nflags; - if (flags & (GTF_reading|GTF_writing)) { - printk(KERN_ALERT "WARNING: g.e. still in use!\n"); - return 0; - } - } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); - + if (grant_table_version == 1) { + nflags = shared.v1[ref].flags; + do { + flags = nflags; + if (flags & (GTF_reading|GTF_writing)) + goto err; + nflags = sync_cmpxchg(&shared.v1[ref].flags, flags, 0); + } while (nflags != flags); + } else { + shared.v2[ref].hdr.flags = 0; + mb(); + if (grstatus[ref] & (GTF_reading|GTF_writing)) + goto err; + + /* The read of grstatus needs to have acquire + semantics. On x86, reads already have that, and we + just need to protect against compiler reorderings. + On other architectures we may need a full + barrier. */ +#ifdef CONFIG_X86 + barrier(); +#else + mb(); +#endif + } return 1; + +err: + printk(KERN_ALERT "WARNING: g.e. still in use!\n"); + return 0; } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); @@ -246,25 +346,35 @@ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; u16 flags; + u16 *pflags; + + if (grant_table_version == 1) + pflags = &shared.v1[ref].flags; + else + pflags = &shared.v2[ref].hdr.flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; + flags = *pflags; cpu_relax(); } - rmb(); /* Read the frame number /after/ reading completion status. */ - frame = shared[ref].frame; + /* Read the frame number /after/ reading completion status. */ + rmb(); + if (grant_table_version == 1) + frame = shared.v1[ref].frame; + else + frame = shared.v2[ref].frame; BUG_ON(frame == 0); return frame; @@ -383,11 +493,11 @@ static int grow_gnttab_list(unsigned int more_frames) unsigned int nr_glist_frames, new_nr_glist_frames; new_nr_grant_frames = nr_grant_frames + more_frames; - extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + extra_entries = more_frames * grefs_per_grant_frame(); - nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP; new_nr_glist_frames = - (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + (new_nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP; for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); if (!gnttab_list[i]) @@ -395,12 +505,12 @@ static int grow_gnttab_list(unsigned int more_frames) } - for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; - i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + for (i = grefs_per_grant_frame() * nr_grant_frames; + i < grefs_per_grant_frame() * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; - gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_head = grefs_per_grant_frame() * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; @@ -438,36 +548,83 @@ static inline unsigned int max_nr_grant_frames(void) return xen_max; } +static inline unsigned max_nr_grant_status_frames(void) +{ + return nr_status_frames(max_nr_grant_frames()); +} + +static void gnttab_request_version(void) +{ + int rc; + struct gnttab_set_version gsv; + + gsv.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); + if (rc == 0) { + grant_table_version = 2; + } else { + if (grant_table_version == 2) { + /* If we've already used version 2 features, + but then suddenly discover that they're not + available (e.g. migrating to an older + version of Xen), almost unbounded badness + can happen. */ + panic("we need grant tables version 2, but only version 1 is available"); + } + grant_table_version = 1; + } +} + static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; + struct gnttab_get_status_frames getframes; unsigned long *frames; + unsigned long *sframes; unsigned int nr_gframes = end_idx + 1; + unsigned int nr_sframes = nr_status_frames(nr_gframes); int rc; + rc = -ENOMEM; frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); - if (!frames) - return -ENOMEM; + sframes = kmalloc(nr_sframes * sizeof(unsigned long), GFP_ATOMIC); + if (!frames || !sframes) + goto out; setup.dom = DOMID_SELF; setup.nr_frames = nr_gframes; set_xen_guest_handle(setup.frame_list, frames); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); - if (rc == -ENOSYS) { - kfree(frames); - return -ENOSYS; - } + if (rc == -ENOSYS) + goto out; BUG_ON(rc || setup.status); rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), - &shared); + (void **)&shared); BUG_ON(rc); + if (grant_table_version > 1) { + getframes.dom = DOMID_SELF; + getframes.nr_frames = nr_sframes; + set_xen_guest_handle(getframes.frame_list, sframes); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getframes, 1); + BUG_ON(rc || getframes.status); + + rc = arch_gnttab_map_shared(sframes, nr_sframes, + max_nr_grant_status_frames(), + (void **)&grstatus); + BUG_ON(rc); + } + +out: kfree(frames); + kfree(sframes); + return rc; - return 0; } static void gnttab_page_free(struct page *page, unsigned int order) @@ -575,6 +732,7 @@ EXPORT_SYMBOL_GPL(gnttab_reset_grant_page); int gnttab_resume(void) { + gnttab_request_version(); if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); @@ -582,7 +740,11 @@ int gnttab_resume(void) int gnttab_suspend(void) { - arch_gnttab_unmap_shared(shared, nr_grant_frames); + if (grant_table_version == 1) + arch_gnttab_unmap_shared(shared.v1, nr_grant_frames); + else + arch_gnttab_unmap_shared(shared.v2, nr_grant_frames); + arch_gnttab_unmap_shared(grstatus, nr_status_frames(nr_grant_frames)); return 0; } @@ -592,8 +754,8 @@ static int gnttab_expand(unsigned int req_entries) unsigned int cur, extra; cur = nr_grant_frames; - extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / - GREFS_PER_GRANT_FRAME); + extra = ((req_entries + (grefs_per_grant_frame()-1)) / + grefs_per_grant_frame()); if (cur + extra > max_nr_grant_frames()) return -ENOSPC; @@ -620,14 +782,14 @@ static int __devinit gnttab_init(void) * grant reference free list on the current hypervisor. */ max_nr_glist_frames = (boot_max_nr_grant_frames * - GREFS_PER_GRANT_FRAME / RPP); + grefs_per_grant_frame() / RPP); gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), GFP_KERNEL); if (gnttab_list == NULL) return -ENOMEM; - nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP; for (i = 0; i < nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); if (gnttab_list[i] == NULL) @@ -637,7 +799,7 @@ static int __devinit gnttab_init(void) if (gnttab_resume() < 0) return -ENODEV; - nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + nr_init_grefs = nr_grant_frames * grefs_per_grant_frame(); for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 8493a834cf4..95871f019c0 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -65,6 +65,21 @@ int gnttab_resume(void); int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int flags); +void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length); +void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid, + int flags, + domid_t trans_domid, + grant_ref_t trans_gref); + +/* + * Are sub-page grants available on this version of Xen? Returns 1 if + * they are, and 0 if they're not. + */ +int gnttab_subpage_grants_available(void); + /* * End access through the given grant reference, iff the grant entry is no * longer in use. Return 1 if the grant entry was freed, 0 if it is still in @@ -146,11 +161,9 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, unsigned long addr, unmap->dev_bus_addr = 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - struct grant_entry **__shared); -void arch_gnttab_unmap_shared(struct grant_entry *shared, - unsigned long nr_gframes); +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_frames, + unsigned long max_nr_frames, void **__shared); +void arch_gnttab_unmap_shared(void *shared, unsigned long nr_frames); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index 8211af80c73..c0f3e42706e 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -83,13 +83,18 @@ * Use SMP-safe bit-setting instruction. */ +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + /* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -107,10 +112,13 @@ struct grant_entry { * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -118,6 +126,9 @@ struct grant_entry { * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -125,6 +136,8 @@ struct grant_entry { #define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) #define GTF_writing (1U<<_GTF_writing) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -141,15 +154,75 @@ struct grant_entry { #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ -/*********************************** - * GRANT TABLE QUERIES AND USES +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; /* - * Reference to a grant entry in a specified domain's grant table. + * Version 2 of the grant entry structure. + */ +struct grant_entry_v2 { + grant_entry_header_t hdr; + union { + /* + * The frame to which we are granting access. This field has + * the same meaning as the grant_entry_v1 field of the same + * name. + */ + uint32_t frame; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is + * set, @domid is allowed to access bytes + * [@page_off,@page_off+@length) in frame @frame. + */ + struct { + uint32_t frame; + uint16_t page_off; + uint16_t length; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use + * the grant @gref in domain @trans_domid, as if it was the + * local domain. Obviously, the transitive access must be + * compatible with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[3]; /* Pad to a power of two */ + }; +}; + +typedef uint16_t grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES */ -typedef uint32_t grant_ref_t; /* * Handle to track a mapping created via a grant reference. @@ -342,6 +415,43 @@ struct gnttab_unmap_and_replace { }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN parameters */ + uint32_t version; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + GUEST_HANDLE(ulong) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); + /* * Bitfield values for update_pin_status.flags. */ -- 2.39.5