unsigned short operation;
int status;
struct list_head free_list;
+ unsigned int flag_persistent:1;
};
#define BLKBACK_INVALID_HANDLE (~0)
static void make_response(struct xen_blkif *blkif, u64 id,
unsigned short op, int st);
+static void add_persistent_gnt(struct persistent_gnt *persistent_gnt,
+ struct xen_blkif *blkif)
+{
+ BUG_ON(blkif->persistent_gnt_c >=
+ BLKIF_MAX_PERSISTENT_REQUESTS_PER_DEV *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blkif->persistent_gnts[blkif->persistent_gnt_c++] = persistent_gnt;
+}
+
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+ grant_ref_t gref)
+{
+ int i;
+
+ for (i = 0; i < blkif->persistent_gnt_c; i++)
+ if (gref == blkif->persistent_gnts[i]->gnt)
+ return blkif->persistent_gnts[i];
+ return NULL;
+}
+
/*
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
*/
{
struct xen_blkif *blkif = arg;
struct xen_vbd *vbd = &blkif->vbd;
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt *persistent_gnt;
+ int i;
+ int ret = 0;
+ int segs_to_unmap;
xen_blkif_get(blkif);
print_stats(blkif);
}
+ /* Free all persistent grant pages */
+
+ while ((segs_to_unmap = min(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ blkif->persistent_gnt_c))) {
+
+ for (i = 0; i < segs_to_unmap; i++) {
+ persistent_gnt = blkif->persistent_gnts
+ [blkif->persistent_gnt_c - i - 1];
+
+ gnttab_set_unmap_op(&unmap[i],
+ pfn_to_kaddr(page_to_pfn(
+ persistent_gnt->page)),
+ GNTMAP_host_map,
+ persistent_gnt->handle);
+
+ pages[i] = persistent_gnt->page;
+ }
+
+ ret = gnttab_unmap_refs(unmap, pages, segs_to_unmap, false);
+ BUG_ON(ret);
+
+ blkif->persistent_gnt_c -= segs_to_unmap;
+
+ }
+
if (log_stats)
print_stats(blkif);
static int xen_blkbk_map(struct blkif_request *req,
struct pending_req *pending_req,
- struct seg_buf seg[])
+ struct seg_buf seg[],
+ struct page *pages[])
{
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt
+ *new_persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt *persistent_gnt;
+ phys_addr_t addr;
int i;
+ int new_map;
int nseg = req->u.rw.nr_segments;
+ int segs_to_map = 0;
int ret = 0;
+ int use_persistent_gnts;
+
+ use_persistent_gnts = (pending_req->blkif->vbd.feature_gnt_persistent);
+
+ if (pending_req->blkif->persistent_gnt_c >=
+ BLKIF_MAX_SEGMENTS_PER_REQUEST *
+ BLKIF_MAX_PERSISTENT_REQUESTS_PER_DEV)
+ return -EIO;
+ pending_req->flag_persistent = use_persistent_gnts;
/*
* Fill out preq.nr_sects with proper amount of sectors, and setup
* assign map[..] with the PFN of the page in our domain with the
for (i = 0; i < nseg; i++) {
uint32_t flags;
+ if (use_persistent_gnts) {
+ persistent_gnt = get_persistent_gnt(
+ pending_req->blkif,
+ req->u.rw.seg[i].gref);
+ if (!persistent_gnt) {
+ new_map = 1;
+ persistent_gnt = kmalloc(
+ sizeof(struct persistent_gnt),
+ GFP_KERNEL);
+ if (!persistent_gnt)
+ return -ENOMEM;
+ persistent_gnt->page = alloc_page(GFP_KERNEL);
+ if (!persistent_gnt->page) {
+ kfree(persistent_gnt);
+ return -ENOMEM;
+ }
+ persistent_gnt->gnt = req->u.rw.seg[i].gref;
+
+ pages_to_gnt[segs_to_map] =
+ persistent_gnt->page;
+ new_persistent_gnts[segs_to_map] =
+ persistent_gnt;
+
+ add_persistent_gnt(persistent_gnt,
+ pending_req->blkif);
+
+ } else {
+ new_map = 0;
+ }
+ pages[i] = persistent_gnt->page;
+ addr = (unsigned long) pfn_to_kaddr(
+ page_to_pfn(persistent_gnt->page));
+ persistent_gnts[i] = persistent_gnt;
+ } else {
+ new_map = 1;
+ pages[i] = blkbk->pending_page(pending_req, i);
+ addr = vaddr(pending_req, i);
+ pages_to_gnt[i] = blkbk->pending_page(pending_req, i);
+ }
+
flags = GNTMAP_host_map;
- if (pending_req->operation != BLKIF_OP_READ)
+ if (!use_persistent_gnts &&
+ (pending_req->operation != BLKIF_OP_READ))
flags |= GNTMAP_readonly;
- gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
- req->u.rw.seg[i].gref,
- pending_req->blkif->domid);
+ if (new_map) {
+ gnttab_set_map_op(&map[segs_to_map++], addr,
+ flags, req->u.rw.seg[i].gref,
+ pending_req->blkif->domid);
+ }
}
- ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
- BUG_ON(ret);
+ if (segs_to_map) {
+ ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
+ BUG_ON(ret);
+ }
/*
* Now swizzle the MFN in our domain with the MFN from the other domain
* so that when we access vaddr(pending_req,i) it has the contents of
* the page from the other domain.
*/
- for (i = 0; i < nseg; i++) {
+ for (i = 0; i < segs_to_map; i++) {
if (unlikely(map[i].status != 0)) {
pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
map[i].handle = BLKBACK_INVALID_HANDLE;
ret |= 1;
}
- pending_handle(pending_req, i) = map[i].handle;
+ if (use_persistent_gnts) {
+ /* store the `out' values from map */
+ pending_req->blkif->persistent_gnts
+ [pending_req->blkif->persistent_gnt_c - segs_to_map +
+ i]->handle = map[i].handle;
+ new_persistent_gnts[i]->dev_bus_addr =
+ map[i].dev_bus_addr;
+ }
if (ret)
continue;
-
- seg[i].buf = map[i].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
+ }
+ for (i = 0; i < nseg; i++) {
+ if (use_persistent_gnts) {
+ pending_handle(pending_req, i) =
+ persistent_gnts[i]->handle;
+ seg[i].buf = persistent_gnts[i]->dev_bus_addr |
+ (req->u.rw.seg[i].first_sect << 9);
+ } else {
+ pending_handle(pending_req, i) = map[i].handle;
+ seg[i].buf = map[i].dev_bus_addr |
+ (req->u.rw.seg[i].first_sect << 9);
+ }
}
return ret;
}
* the proper response on the ring.
*/
if (atomic_dec_and_test(&pending_req->pendcnt)) {
- xen_blkbk_unmap(pending_req);
+ if (!pending_req->flag_persistent)
+ xen_blkbk_unmap(pending_req);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
xen_blkif_put(pending_req->blkif);
int operation;
struct blk_plug plug;
bool drain = false;
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
switch (req->operation) {
case BLKIF_OP_READ:
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
- if (xen_blkbk_map(req, pending_req, seg))
+ if (xen_blkbk_map(req, pending_req, seg, pages))
goto fail_flush;
/*
for (i = 0; i < nseg; i++) {
while ((bio == NULL) ||
(bio_add_page(bio,
- blkbk->pending_page(pending_req, i),
+ pages[i],
seg[i].nsec << 9,
seg[i].buf & ~PAGE_MASK) == 0)) {
return 0;
fail_flush:
- xen_blkbk_unmap(pending_req);
+ if (!blkif->vbd.feature_gnt_persistent)
+ xen_blkbk_unmap(pending_req);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
BLKIF_STATE_SUSPENDED,
};
+struct gnt_list {
+ grant_ref_t gref;
+ unsigned long pfn;
+ struct gnt_list *tail;
+};
+
struct blk_shadow {
struct blkif_request req;
struct request *request;
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct gnt_list *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
static DEFINE_MUTEX(blkfront_mutex);
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
+ struct gnt_list *persistent_gnts;
+ unsigned int persistent_gnts_c;
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
+ unsigned int feature_persistent:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
int is_ready;
unsigned long id;
unsigned int fsect, lsect;
int i, ref;
+
+ /*
+ * stores if we have negotiated with the backend, agreeing to use
+ * persistent grants.
+ */
+ int use_persistent_gnts;
+
+ /*
+ * Used when doing persistent grants to store if we are able to queue
+ * the request by just using existing persistent grants (0), or if we
+ * have to get new grants, as there are not sufficiently many free.
+ */
+ int new_persistent_gnts;
grant_ref_t gref_head;
+ struct page *granted_page;
+ struct gnt_list *gnt_list_entry;
struct scatterlist *sg;
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
- if (gnttab_alloc_grant_references(
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
- gnttab_request_free_callback(
- &info->callback,
- blkif_restart_queue_callback,
- info,
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
- return 1;
- }
+ use_persistent_gnts = info->feature_persistent;
+
+ if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+ new_persistent_gnts = 1;
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+ gnttab_request_free_callback(
+ &info->callback,
+ blkif_restart_queue_callback,
+ info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+ } else
+ new_persistent_gnts = 0;
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
BLKIF_MAX_SEGMENTS_PER_REQUEST);
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
- buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
- gnttab_grant_foreign_access_ref(
+ if (use_persistent_gnts && info->persistent_gnts_c) {
+ gnt_list_entry = info->persistent_gnts;
+
+ info->persistent_gnts = info->persistent_gnts->
+ tail;
+ ref = gnt_list_entry->gref;
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ info->persistent_gnts_c--;
+ } else {
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ if (use_persistent_gnts) {
+ gnt_list_entry =
+ kmalloc(sizeof(struct gnt_list),
+ GFP_ATOMIC);
+ if (!gnt_list_entry)
+ return -ENOMEM;
+
+ granted_page = alloc_page(GFP_ATOMIC);
+ if (!granted_page) {
+ kfree(gnt_list_entry);
+ return -ENOMEM;
+ }
+
+ gnt_list_entry->pfn =
+ page_to_pfn(granted_page);
+ gnt_list_entry->gref = ref;
+ } else
+ granted_page = sg_page(sg);
+
+ buffer_mfn = pfn_to_mfn(page_to_pfn(
+ granted_page));
+ gnttab_grant_foreign_access_ref(
ref,
info->xbdev->otherend_id,
buffer_mfn,
+ !use_persistent_gnts &&
rq_data_dir(req));
+ }
+
+ if (use_persistent_gnts)
+ info->shadow[id].grants_used[i] =
+ gnt_list_entry;
+
+ if (use_persistent_gnts && rq_data_dir(req)) {
+ char *bvec_data;
+ void *shared_data;
+
+ BUG_ON(sg->offset + sg->length > PAGE_SIZE);
+
+ shared_data = kmap_atomic(
+ pfn_to_page(gnt_list_entry->pfn));
+ bvec_data = kmap_atomic(sg_page(sg));
+
+ /*
+ * this does not wipe data stored outside the
+ * range sg->offset..sg->offset+sg->length.
+ * Therefore, blkback *could* see data from
+ * previous requests. This is OK as long as
+ * persistent grants are shared with just one
+ * domain. It may need refactoring if This
+ * changes
+ */
+ memcpy(shared_data + sg->offset,
+ bvec_data + sg->offset,
+ sg->length);
+
+ kunmap_atomic(bvec_data);
+ kunmap_atomic(shared_data);
+ }
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+
ring_req->u.rw.seg[i] =
(struct blkif_request_segment) {
.gref = ref,
/* Keep a private copy so we can reissue requests when recovering. */
info->shadow[id].req = *ring_req;
- gnttab_free_grant_references(gref_head);
+ if (new_persistent_gnts)
+ gnttab_free_grant_references(gref_head);
return 0;
}
static void xlvbd_flush(struct blkfront_info *info)
{
blk_queue_flush(info->rq, info->feature_flush);
- printk(KERN_INFO "blkfront: %s: %s: %s\n",
+ printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
info->gd->disk_name,
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
"flush diskcache" : "barrier or flush"),
- info->feature_flush ? "enabled" : "disabled");
+ info->feature_flush ? "enabled" : "disabled",
+ info->feature_persistent ? "persistent" : "non-persistent");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
static void blkif_free(struct blkfront_info *info, int suspend)
{
+ struct gnt_list *persistent_gnt;
+
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
info->connected = suspend ?
/* No more blkif_request(). */
if (info->rq)
blk_stop_queue(info->rq);
+
+ /* Remove all persistent grants */
+ while (info->persistent_gnts) {
+ persistent_gnt = info->persistent_gnts;
+ info->persistent_gnts = persistent_gnt->tail;
+ gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ kfree(persistent_gnt);
+ }
+
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
spin_unlock_irq(&info->io_lock);
}
-static void blkif_completion(struct blk_shadow *s)
+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+ struct blkif_response *bret)
{
int i;
- /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
- * flag. */
- for (i = 0; i < s->req.u.rw.nr_segments; i++)
- gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+ struct gnt_list *new_gnt_list_entry;
+ struct bio_vec *bvec;
+ struct req_iterator iter;
+ unsigned long flags;
+ char *bvec_data;
+ void *shared_data;
+
+
+ if (info->feature_persistent == 0) {
+ /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same
+ * place flag. */
+ for (i = 0; i < s->req.u.rw.nr_segments; i++)
+ gnttab_end_foreign_access(s->req.u.rw.seg[i].gref,
+ 0, 0UL);
+ return;
+ }
+
+ i = 0;
+ if (bret->operation == BLKIF_OP_READ)
+ rq_for_each_segment(bvec, s->request, iter) {
+ BUG_ON(bvec->bv_offset + bvec->bv_len > PAGE_SIZE);
+
+ shared_data = kmap_atomic
+ (pfn_to_page(s->grants_used[i++]->pfn));
+ bvec_data = bvec_kmap_irq(bvec, &flags);
+ memcpy(bvec_data, shared_data + bvec->bv_offset,
+ bvec->bv_len);
+ bvec_kunmap_irq(bvec_data, &flags);
+ kunmap_atomic(shared_data);
+ }
+ /* Add the persistent grant into the list of free grants */
+ for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+ new_gnt_list_entry = s->grants_used[i];
+ new_gnt_list_entry->tail = info->persistent_gnts;
+ info->persistent_gnts = new_gnt_list_entry;
+ info->persistent_gnts_c++;
+ }
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
- blkif_completion(&info->shadow[id]);
+ blkif_completion(&info->shadow[id], info, bret);
if (add_id_to_freelist(info, id)) {
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
message = "writing protocol";
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-persistent-grants", "%d", 1);
+ if (err) {
+ dev_warn(&dev->dev,
+ "writing persistent grants feature to xenbus");
+ info->feature_persistent = 0;
+ }
err = xenbus_transaction_end(xbt, 0);
if (err) {
spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
+ info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+ !info->feature_persistent &&
rq_data_dir(info->shadow[req->u.rw.id].request));
}
info->shadow[req->u.rw.id].req = *req;
unsigned long sector_size;
unsigned int binfo;
int err;
- int barrier, flush, discard;
+ int barrier, flush, discard, persistent;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
}
+ /*
+ * Are we dealing with an old blkback that will unmap
+ * all grefs?
+ */
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-persistent-grants", "%d", &persistent,
+ NULL);
+
+ if (err)
+ info->feature_persistent = 0;
+ else
+ info->feature_persistent = persistent;
+
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"feature-discard", "%d", &discard,
NULL);