/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME_V1 (PAGE_SIZE / sizeof(struct grant_entry_v1))
+#define GREFS_PER_GRANT_FRAME_V2 (PAGE_SIZE / sizeof(struct grant_entry_v2))
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
-static struct grant_entry *shared;
+static union {
+ struct grant_entry_v1 *v1;
+ struct grant_entry_v2 *v2;
+} shared;
+
+static grant_status_t *grstatus;
static struct gnttab_free_callback *gnttab_free_callback_list;
+static int grant_table_version;
+
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
+
+static inline unsigned grefs_per_grant_frame(void)
+{
+ if (grant_table_version == 1)
+ return GREFS_PER_GRANT_FRAME_V1;
+ else
+ return GREFS_PER_GRANT_FRAME_V2;
+}
+
+static inline unsigned nr_status_frames(unsigned grant_frames)
+{
+ return (grant_frames * grefs_per_grant_frame() + SPP - 1) / SPP;
+}
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
* 3. Write memory barrier (WMB).
* 4. Write ent->flags, inc. valid type.
*/
- shared[ref].frame = frame;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = flags;
+ BUG_ON(flags & (GTF_reading | GTF_writing | GTF_sub_page));
+ if (grant_table_version == 1) {
+ shared.v1[ref].frame = frame;
+ shared.v1[ref].domid = domid;
+ wmb();
+ shared.v1[ref].flags = flags;
+ } else {
+ shared.v2[ref].frame = frame;
+ shared.v2[ref].hdr.domid = domid;
+ wmb();
+ shared.v2[ref].hdr.flags = flags;
+ }
}
/*
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+void gnttab_grant_foreign_access_ref_subpage(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off,
+ unsigned length)
+{
+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_sub_page | GTF_permit_access));
+ BUG_ON(grant_table_version == 1);
+ shared.v2[ref].sub_page.frame = frame;
+ shared.v2[ref].sub_page.page_off = page_off;
+ shared.v2[ref].sub_page.length = length;
+ shared.v2[ref].hdr.domid = domid;
+ wmb();
+ shared.v2[ref].hdr.flags = GTF_permit_access | GTF_sub_page | flags;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_subpage);
+
+void gnttab_grant_foreign_access_ref_trans(grant_ref_t ref, domid_t domid,
+ int flags,
+ domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_sub_page | GTF_permit_access));
+ BUG_ON(grant_table_version == 1);
+ shared.v2[ref].transitive.trans_domid = trans_domid;
+ shared.v2[ref].transitive.gref = trans_gref;
+ shared.v2[ref].hdr.domid = domid;
+ wmb();
+ shared.v2[ref].hdr.flags = GTF_permit_access | GTF_transitive | flags;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref_trans);
+
+int gnttab_subpage_grants_available(void)
+{
+ return grant_table_version == 2;
+}
+EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
+
int gnttab_query_foreign_access(grant_ref_t ref)
{
u16 nflags;
- nflags = shared[ref].flags;
+ if (grant_table_version == 1)
+ nflags = shared.v1[ref].flags;
+ else
+ nflags = grstatus[ref];
return (nflags & (GTF_reading|GTF_writing));
}
EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
+/* The semantics of this function are subtly different between version
+ 1 and version 2 grant tables. On version 1, it's a no-op if the
+ reference is still in use, i.e. the grant remains completely valid.
+ On version 2, you go to a kind of half-revoked state, in which
+ extant references to the grant are still valid, but no further
+ references can be made. This shouldn't be a problem; users of this
+ interface almsot always guarantee that, provided the remote domain
+ is behaving itself, you'll never even try to end a busy grant. */
int gnttab_end_foreign_access_ref(grant_ref_t ref)
{
u16 flags, nflags;
- nflags = shared[ref].flags;
- do {
- flags = nflags;
- if (flags & (GTF_reading|GTF_writing)) {
- printk(KERN_ALERT "WARNING: g.e. still in use!\n");
- return 0;
- }
- } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
-
+ if (grant_table_version == 1) {
+ nflags = shared.v1[ref].flags;
+ do {
+ flags = nflags;
+ if (flags & (GTF_reading|GTF_writing))
+ goto err;
+ nflags = sync_cmpxchg(&shared.v1[ref].flags, flags, 0);
+ } while (nflags != flags);
+ } else {
+ shared.v2[ref].hdr.flags = 0;
+ mb();
+ if (grstatus[ref] & (GTF_reading|GTF_writing))
+ goto err;
+
+ /* The read of grstatus needs to have acquire
+ semantics. On x86, reads already have that, and we
+ just need to protect against compiler reorderings.
+ On other architectures we may need a full
+ barrier. */
+#ifdef CONFIG_X86
+ barrier();
+#else
+ mb();
+#endif
+ }
return 1;
+
+err:
+ printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+ return 0;
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
{
unsigned long frame;
u16 flags;
+ u16 *pflags;
+
+ if (grant_table_version == 1)
+ pflags = &shared.v1[ref].flags;
+ else
+ pflags = &shared.v2[ref].hdr.flags;
/*
* If a transfer is not even yet started, try to reclaim the grant
* reference and return failure (== 0).
*/
- while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
- if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
return 0;
cpu_relax();
}
/* If a transfer is in progress then wait until it is completed. */
while (!(flags & GTF_transfer_completed)) {
- flags = shared[ref].flags;
+ flags = *pflags;
cpu_relax();
}
- rmb(); /* Read the frame number /after/ reading completion status. */
- frame = shared[ref].frame;
+ /* Read the frame number /after/ reading completion status. */
+ rmb();
+ if (grant_table_version == 1)
+ frame = shared.v1[ref].frame;
+ else
+ frame = shared.v2[ref].frame;
BUG_ON(frame == 0);
return frame;
unsigned int nr_glist_frames, new_nr_glist_frames;
new_nr_grant_frames = nr_grant_frames + more_frames;
- extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
+ extra_entries = more_frames * grefs_per_grant_frame();
- nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP;
new_nr_glist_frames =
- (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ (new_nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP;
for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
if (!gnttab_list[i])
}
- for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
- i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+ for (i = grefs_per_grant_frame() * nr_grant_frames;
+ i < grefs_per_grant_frame() * new_nr_grant_frames - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
- gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ gnttab_free_head = grefs_per_grant_frame() * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames;
return xen_max;
}
+static inline unsigned max_nr_grant_status_frames(void)
+{
+ return nr_status_frames(max_nr_grant_frames());
+}
+
+static void gnttab_request_version(void)
+{
+ int rc;
+ struct gnttab_set_version gsv;
+
+ gsv.version = 2;
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+ if (rc == 0) {
+ grant_table_version = 2;
+ } else {
+ if (grant_table_version == 2) {
+ /* If we've already used version 2 features,
+ but then suddenly discover that they're not
+ available (e.g. migrating to an older
+ version of Xen), almost unbounded badness
+ can happen. */
+ panic("we need grant tables version 2, but only version 1 is available");
+ }
+ grant_table_version = 1;
+ }
+}
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
+ struct gnttab_get_status_frames getframes;
unsigned long *frames;
+ unsigned long *sframes;
unsigned int nr_gframes = end_idx + 1;
+ unsigned int nr_sframes = nr_status_frames(nr_gframes);
int rc;
+ rc = -ENOMEM;
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
- if (!frames)
- return -ENOMEM;
+ sframes = kmalloc(nr_sframes * sizeof(unsigned long), GFP_ATOMIC);
+ if (!frames || !sframes)
+ goto out;
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
set_xen_guest_handle(setup.frame_list, frames);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
- if (rc == -ENOSYS) {
- kfree(frames);
- return -ENOSYS;
- }
+ if (rc == -ENOSYS)
+ goto out;
BUG_ON(rc || setup.status);
rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
- &shared);
+ (void **)&shared);
BUG_ON(rc);
+ if (grant_table_version > 1) {
+ getframes.dom = DOMID_SELF;
+ getframes.nr_frames = nr_sframes;
+ set_xen_guest_handle(getframes.frame_list, sframes);
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+ &getframes, 1);
+ BUG_ON(rc || getframes.status);
+
+ rc = arch_gnttab_map_shared(sframes, nr_sframes,
+ max_nr_grant_status_frames(),
+ (void **)&grstatus);
+ BUG_ON(rc);
+ }
+
+out:
kfree(frames);
+ kfree(sframes);
+ return rc;
- return 0;
}
static void gnttab_page_free(struct page *page, unsigned int order)
int gnttab_resume(void)
{
+ gnttab_request_version();
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
int gnttab_suspend(void)
{
- arch_gnttab_unmap_shared(shared, nr_grant_frames);
+ if (grant_table_version == 1)
+ arch_gnttab_unmap_shared(shared.v1, nr_grant_frames);
+ else
+ arch_gnttab_unmap_shared(shared.v2, nr_grant_frames);
+ arch_gnttab_unmap_shared(grstatus, nr_status_frames(nr_grant_frames));
return 0;
}
unsigned int cur, extra;
cur = nr_grant_frames;
- extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
- GREFS_PER_GRANT_FRAME);
+ extra = ((req_entries + (grefs_per_grant_frame()-1)) /
+ grefs_per_grant_frame());
if (cur + extra > max_nr_grant_frames())
return -ENOSPC;
* grant reference free list on the current hypervisor.
*/
max_nr_glist_frames = (boot_max_nr_grant_frames *
- GREFS_PER_GRANT_FRAME / RPP);
+ grefs_per_grant_frame() / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
GFP_KERNEL);
if (gnttab_list == NULL)
return -ENOMEM;
- nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
+ nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame() + RPP - 1) / RPP;
for (i = 0; i < nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
if (gnttab_list[i] == NULL)
if (gnttab_resume() < 0)
return -ENODEV;
- nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+ nr_init_grefs = nr_grant_frames * grefs_per_grant_frame();
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
gnttab_entry(i) = i + 1;
* Use SMP-safe bit-setting instruction.
*/
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
/*
* A grant table comprises a packed array of grant entries in one or more
* page frames shared between Xen and a guest.
* [XEN]: This field is written by Xen and read by the sharing guest.
* [GST]: This field is written by the guest and read by Xen.
*/
-struct grant_entry {
+struct grant_entry_v1 {
/* GTF_xxx: various type and flag information. [XEN,GST] */
uint16_t flags;
/* The domain being granted foreign privileges. [GST] */
* GTF_permit_access: Allow @domid to map/access @frame.
* GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
* to this guest. Xen writes the page number to @frame.
+ * GTF_transitive: Allow @domid to transitively access a subrange of
+ * @trans_grant in @trans_domid. No mappings are allowed.
*/
#define GTF_invalid (0U<<0)
#define GTF_permit_access (1U<<0)
#define GTF_accept_transfer (2U<<0)
+#define GTF_transitive (3U<<0)
#define GTF_type_mask (3U<<0)
/*
* GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
* GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
* GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_sub_page: Grant access to only a subrange of the page. @domid
+ * will only be allowed to copy from the grant, and not
+ * map it. [GST]
*/
#define _GTF_readonly (2)
#define GTF_readonly (1U<<_GTF_readonly)
#define GTF_reading (1U<<_GTF_reading)
#define _GTF_writing (4)
#define GTF_writing (1U<<_GTF_writing)
+#define _GTF_sub_page (8)
+#define GTF_sub_page (1U<<_GTF_sub_page)
/*
* Subflags for GTF_accept_transfer:
#define _GTF_transfer_completed (3)
#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+/*
+ * Version 2 grant table entries. These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix. The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
*/
+struct grant_entry_header {
+ uint16_t flags;
+ domid_t domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
/*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+struct grant_entry_v2 {
+ grant_entry_header_t hdr;
+ union {
+ /*
+ * The frame to which we are granting access. This field has
+ * the same meaning as the grant_entry_v1 field of the same
+ * name.
+ */
+ uint32_t frame;
+
+ /*
+ * If the grant type is GTF_grant_access and GTF_sub_page is
+ * set, @domid is allowed to access bytes
+ * [@page_off,@page_off+@length) in frame @frame.
+ */
+ struct {
+ uint32_t frame;
+ uint16_t page_off;
+ uint16_t length;
+ } sub_page;
+
+ /*
+ * If the grant is GTF_transitive, @domid is allowed to use
+ * the grant @gref in domain @trans_domid, as if it was the
+ * local domain. Obviously, the transitive access must be
+ * compatible with the original grant.
+ *
+ * The current version of Xen does not allow transitive grants
+ * to be mapped.
+ */
+ struct {
+ domid_t trans_domid;
+ uint16_t pad0;
+ grant_ref_t gref;
+ } transitive;
+
+ uint32_t __spacer[3]; /* Pad to a power of two */
+ };
+};
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
*/
-typedef uint32_t grant_ref_t;
/*
* Handle to track a mapping created via a grant reference.
};
DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure. This operation can only be performed
+ * once in any given domain. It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version 8
+struct gnttab_set_version {
+ /* IN parameters */
+ uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames 9
+struct gnttab_get_status_frames {
+ /* IN parameters. */
+ domid_t dom;
+ uint32_t nr_frames;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+ GUEST_HANDLE(ulong) frame_list;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
/*
* Bitfield values for update_pin_status.flags.
*/