return 0;
}
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ uint64_t **frames = (uint64_t **)data;
+
+ set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+ (*frames)++;
+ return 0;
+}
+
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
- struct grant_entry **__shared)
+ void **__shared)
{
int rc;
- struct grant_entry *shared = *__shared;
+ void *shared = *__shared;
if (shared == NULL) {
struct vm_struct *area =
return rc;
}
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
- unsigned long nr_gframes)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared)
+{
+ int rc;
+ grant_status_t *shared = *__shared;
+
+ if (shared == NULL) {
+ struct vm_struct *area =
+ xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+ BUG_ON(area == NULL);
+ shared = area->addr;
+ *__shared = shared;
+ }
+
+ rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+ PAGE_SIZE * nr_gframes,
+ map_pte_fn_status, &frames);
+ return rc;
+}
+
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
{
apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME \
+(grant_table_version == 1 ? \
+ (PAGE_SIZE / sizeof(struct grant_entry_v1)) : \
+ (PAGE_SIZE / sizeof(union grant_entry_v2)))
static void pending_free_timer(unsigned long ignore);
static DEFINE_SPINLOCK(gnttab_pending_free_lock);
static DEFINE_SPINLOCK(gnttab_list_lock);
-static struct grant_entry *shared;
+static union {
+ struct grant_entry_v1 *v1;
+ union grant_entry_v2 *v2;
+ void *raw;
+} shared;
+
+static grant_status_t *grstatus;
static struct gnttab_free_callback *gnttab_free_callback_list;
+static int grant_table_version;
+
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
* 1. Write ent->domid.
* 2. Write ent->frame:
* GTF_permit_access: Frame to which access is permitted.
- * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
- * frame, or zero if none.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by
+ * new frame, or zero if none.
* 3. Write memory barrier (WMB).
* 4. Write ent->flags, inc. valid type.
*/
- shared[ref].frame = frame;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = flags;
+ if (grant_table_version == 1) {
+ shared.v1[ref].frame = frame;
+ shared.v1[ref].domid = domid;
+ wmb();
+ shared.v1[ref].flags = flags;
+ } else {
+ shared.v2[ref].full_page.frame = frame;
+ shared.v2[ref].hdr.domid = domid;
+ wmb();
+ shared.v2[ref].hdr.flags = GTF_permit_access | flags;
+ }
}
/*
{
u16 nflags;
- nflags = shared[ref].flags;
+ if (grant_table_version == 1)
+ nflags = shared.v1[ref].flags;
+ else
+ nflags = grstatus[ref];
return (nflags & (GTF_reading|GTF_writing));
}
static int _gnttab_end_foreign_access_ref(grant_ref_t ref)
{
u16 flags, nflags;
-
- nflags = shared[ref].flags;
- do {
- flags = nflags;
- if (flags & (GTF_reading|GTF_writing))
+ u16 *pflags;
+
+ if (grant_table_version == 1) {
+ pflags = &shared.v1[ref].flags;
+ nflags = *pflags;
+ do {
+ flags = nflags;
+ if (flags & (GTF_reading|GTF_writing))
+ return 0;
+ nflags = sync_cmpxchg(&shared.v1[ref].flags, flags,
+ 0);
+ } while (nflags != flags);
+ } else {
+ shared.v2[ref].hdr.flags = 0;
+ mb();
+ if (grstatus[ref] & (GTF_reading|GTF_writing)) {
return 0;
- } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+ } else {
+ /* The read of grstatus needs to have acquire
+ semantics. On x86, reads already have
+ that, and we just need to protect against
+ compiler reorderings. On other
+ architectures we may need a full
+ barrier. */
+#ifdef CONFIG_X86
+ barrier();
+#else
+ mb();
+#endif
+ }
+ }
return 1;
}
{
unsigned long frame;
u16 flags;
+ u16 *pflags;
+
+ if (grant_table_version == 1)
+ pflags = &shared.v1[ref].flags;
+ else
+ pflags = &shared.v2[ref].hdr.flags;
/*
* If a transfer is not even yet started, try to reclaim the grant
* reference and return failure (== 0).
*/
- while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
- if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
return 0;
cpu_relax();
}
/* If a transfer is in progress then wait until it is completed. */
while (!(flags & GTF_transfer_completed)) {
- flags = shared[ref].flags;
+ flags = *pflags;
cpu_relax();
}
rmb(); /* Read the frame number /after/ reading completion status. */
- frame = shared[ref].frame;
+ if (grant_table_version == 1)
+ frame = shared.v1[ref].frame;
+ else
+ frame = shared.v2[ref].full_page.frame;
BUG_ON(frame == 0);
return frame;
return xen_max;
}
+static unsigned nr_status_frames(unsigned nr_grant_frames)
+{
+ return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
+}
+
+static void gnttab_request_version(void)
+{
+ int rc;
+ struct gnttab_set_version gsv;
+
+ gsv.version = 2;
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+ if (rc == 0) {
+ grant_table_version = 2;
+ printk(KERN_NOTICE "Using V2 grant tables.\n");
+ } else {
+ if (grant_table_version == 2) {
+ /* If we've already used version 2 features,
+ but then suddenly discover that they're not
+ available (e.g. migrating to an older
+ version of Xen), almost unbounded badness
+ can happen. */
+ panic("we need grant tables version 2, but only version 1 is available");
+ }
+ grant_table_version = 1;
+ printk(KERN_WARNING "Using legacy V1 grant tables; upgrade to a newer version of Xen.\n");
+ }
+}
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
- unsigned long *frames;
+ unsigned long *gframes;
+ uint64_t *sframes;
unsigned int nr_gframes = end_idx + 1;
+ unsigned int nr_sframes;
int rc;
- frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
- if (!frames)
+ gframes = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+ if (!gframes)
return -ENOMEM;
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
- set_xen_guest_handle(setup.frame_list, frames);
+ set_xen_guest_handle(setup.frame_list, gframes);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) {
- kfree(frames);
+ kfree(gframes);
return -ENOSYS;
}
BUG_ON(rc || setup.status);
- rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
- &shared);
+ if (grant_table_version > 1) {
+ struct gnttab_get_status_frames getframes;
+
+ nr_sframes = nr_status_frames(nr_gframes);
+
+ sframes = kmalloc(nr_sframes * sizeof(uint64_t),
+ GFP_ATOMIC);
+ if (!sframes) {
+ kfree(gframes);
+ return -ENOMEM;
+ }
+ getframes.dom = DOMID_SELF;
+ getframes.nr_frames = nr_sframes;
+ set_xen_guest_handle(getframes.frame_list, sframes);
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+ &getframes, 1);
+ if (rc == -ENOSYS) {
+ kfree(gframes);
+ kfree(sframes);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || getframes.status);
+
+ rc = arch_gnttab_map_status(
+ sframes, nr_sframes,
+ nr_status_frames(max_nr_grant_frames()),
+ &grstatus);
+ BUG_ON(rc);
+ kfree(sframes);
+ }
+
+ rc = arch_gnttab_map_shared(gframes, nr_gframes, max_nr_grant_frames(),
+ &shared.raw);
BUG_ON(rc);
- kfree(frames);
+ kfree(gframes);
return 0;
}
int gnttab_resume(void)
{
+ gnttab_request_version();
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
int gnttab_suspend(void)
{
- arch_gnttab_unmap_shared(shared, nr_grant_frames);
+ arch_gnttab_unmap(shared.raw, nr_grant_frames);
+ arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
return 0;
}
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
- struct grant_entry **__shared);
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
- unsigned long nr_gframes);
+ void **__shared);
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared);
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
* Use SMP-safe bit-setting instruction.
*/
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
/*
* A grant table comprises a packed array of grant entries in one or more
* page frames shared between Xen and a guest.
* [XEN]: This field is written by Xen and read by the sharing guest.
* [GST]: This field is written by the guest and read by Xen.
*/
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility. New guests should use version 2.
+ */
+struct grant_entry_v1 {
/* GTF_xxx: various type and flag information. [XEN,GST] */
uint16_t flags;
/* The domain being granted foreign privileges. [GST] */
* GTF_permit_access: Allow @domid to map/access @frame.
* GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
* to this guest. Xen writes the page number to @frame.
+ * GTF_transitive: Allow @domid to transitively access a subrange of
+ * @trans_grant in @trans_domid. No mappings are allowed.
*/
#define GTF_invalid (0U<<0)
#define GTF_permit_access (1U<<0)
#define GTF_accept_transfer (2U<<0)
+#define GTF_transitive (3U<<0)
#define GTF_type_mask (3U<<0)
/*
* GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
* GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
* GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_sub_page: Grant access to only a subrange of the page. @domid
+ * will only be allowed to copy from the grant, and not
+ * map it. [GST]
*/
#define _GTF_readonly (2)
#define GTF_readonly (1U<<_GTF_readonly)
#define GTF_reading (1U<<_GTF_reading)
#define _GTF_writing (4)
#define GTF_writing (1U<<_GTF_writing)
+#define _GTF_sub_page (8)
+#define GTF_sub_page (1U<<_GTF_sub_page)
/*
* Subflags for GTF_accept_transfer:
#define _GTF_transfer_completed (3)
#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+/*
+ * Version 2 grant table entries. These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix. The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
*/
+struct grant_entry_header {
+ uint16_t flags;
+ domid_t domid;
+};
/*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+union grant_entry_v2 {
+ struct grant_entry_header hdr;
+
+ /*
+ * This member is used for V1-style full page grants, where either:
+ *
+ * -- hdr.type is GTF_accept_transfer, or
+ * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+ *
+ * In that case, the frame field has the same semantics as the
+ * field of the same name in the V1 entry structure.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ uint32_t pad0;
+ uint64_t frame;
+ } full_page;
+
+ /*
+ * If the grant type is GTF_grant_access and GTF_sub_page is set,
+ * @domid is allowed to access bytes [@page_off,@page_off+@length)
+ * in frame @frame.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ uint16_t page_off;
+ uint16_t length;
+ uint64_t frame;
+ } sub_page;
+
+ /*
+ * If the grant is GTF_transitive, @domid is allowed to use the
+ * grant @gref in domain @trans_domid, as if it was the local
+ * domain. Obviously, the transitive access must be compatible
+ * with the original grant.
+ *
+ * The current version of Xen does not allow transitive grants
+ * to be mapped.
+ */
+ struct {
+ struct grant_entry_header hdr;
+ domid_t trans_domid;
+ uint16_t pad0;
+ grant_ref_t gref;
+ } transitive;
+
+ uint32_t __spacer[4]; /* Pad to a power of two */
+};
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
*/
-typedef uint32_t grant_ref_t;
/*
* Handle to track a mapping created via a grant reference.
#define GNTTABOP_copy 5
struct gnttab_copy {
- /* IN parameters. */
- struct {
- union {
- grant_ref_t ref;
- unsigned long gmfn;
- } u;
- domid_t domid;
- uint16_t offset;
- } source, dest;
- uint16_t len;
- uint16_t flags; /* GNTCOPY_* */
- /* OUT parameters. */
- int16_t status;
+ /* IN parameters. */
+ struct {
+ union {
+ grant_ref_t ref;
+ unsigned long gmfn;
+ } u;
+ domid_t domid;
+ uint16_t offset;
+ } source, dest;
+ uint16_t len;
+ uint16_t flags; /* GNTCOPY_* */
+ /* OUT parameters. */
+ int16_t status;
};
DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
};
DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure. This operation can only be performed
+ * once in any given domain. It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version 8
+struct gnttab_set_version {
+ /* IN parameters */
+ uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames 9
+struct gnttab_get_status_frames {
+ /* IN parameters. */
+ uint32_t nr_frames;
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+ GUEST_HANDLE(uint64_t) frame_list;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+#define GNTTABOP_get_version 10
+struct gnttab_get_version {
+ /* IN parameters */
+ domid_t dom;
+ uint16_t pad;
+ /* OUT parameters */
+ uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
+
/*
* Bitfield values for update_pin_status.flags.
*/