]> xenbits.xensource.com Git - legacy/linux-2.6.18-xen.git/commitdiff
block: backport Jens Axboe's commit from
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 11 Feb 2008 10:08:57 +0000 (10:08 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 11 Feb 2008 10:08:57 +0000 (10:08 +0000)
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support

blkback: permit and implement empty barrier.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
block/elevator.c
block/ll_rw_blk.c
drivers/xen/blkback/blkback.c
fs/bio.c
include/linux/bio.h
include/linux/blkdev.h
mm/highmem.c

index 8ed284691c0bc69aa3be19d1db52b6b9f3c768da..09153aca0f1819d02e672ea0c866d79df13874cc 100644 (file)
@@ -493,6 +493,16 @@ struct request *elv_next_request(request_queue_t *q)
        int ret;
 
        while ((rq = __elv_next_request(q)) != NULL) {
+               /*
+                * Kill the empty barrier place holder, the driver must
+                * not ever see it.
+                */
+               if (blk_empty_barrier(rq)) {
+                       blkdev_dequeue_request(rq);
+                       end_that_request_chunk(rq, 1, 0);
+                       end_that_request_last(rq, 1);
+                       continue;
+               }
                if (!(rq->flags & REQ_STARTED)) {
                        elevator_t *e = q->elevator;
 
index 3ecdb3476517b297c0c897edc49b58cd7a7e0895..59b3f2631fd0a8ee0d997b9982980e322a78657b 100644 (file)
@@ -483,9 +483,12 @@ static inline struct request *start_ordered(request_queue_t *q,
         * Queue ordered sequence.  As we stack them at the head, we
         * need to queue in reverse order.  Note that we rely on that
         * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-        * request gets inbetween ordered sequence.
+        * request gets inbetween ordered sequence. If this request is
+        * an empty barrier, we don't need to do a postflush ever since
+        * there will be no data written between the pre and post flush.
+        * Hence a single flush will suffice.
         */
-       if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+       if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
                queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
        else
                q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -2967,7 +2970,7 @@ static inline void blk_partition_remap(struct bio *bio)
 {
        struct block_device *bdev = bio->bi_bdev;
 
-       if (bdev != bdev->bd_contains) {
+       if (bio_sectors(bio) && bdev != bdev->bd_contains) {
                struct hd_struct *p = bdev->bd_part;
                const int rw = bio_data_dir(bio);
 
@@ -3028,7 +3031,7 @@ void generic_make_request(struct bio *bio)
        might_sleep();
        /* Test device or partition size, when known. */
        maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-       if (maxsector) {
+       if (maxsector && nr_sectors) {
                sector_t sector = bio->bi_sector;
 
                if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3094,7 +3097,7 @@ end_io:
                old_dev = bio->bi_bdev->bd_dev;
 
                maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-               if (maxsector) {
+               if (maxsector && nr_sectors) {
                        sector_t sector = bio->bi_sector;
 
                        if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3128,21 +3131,25 @@ void submit_bio(int rw, struct bio *bio)
 {
        int count = bio_sectors(bio);
 
-       BIO_BUG_ON(!bio->bi_size);
-       BIO_BUG_ON(!bio->bi_io_vec);
        bio->bi_rw |= rw;
-       if (rw & WRITE)
-               count_vm_events(PGPGOUT, count);
-       else
-               count_vm_events(PGPGIN, count);
 
-       if (unlikely(block_dump)) {
-               char b[BDEVNAME_SIZE];
-               printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-                       current->comm, current->pid,
-                       (rw & WRITE) ? "WRITE" : "READ",
-                       (unsigned long long)bio->bi_sector,
-                       bdevname(bio->bi_bdev,b));
+       if (!bio_empty_barrier(bio)) {
+               BIO_BUG_ON(!bio->bi_size);
+               BIO_BUG_ON(!bio->bi_io_vec);
+
+               if (rw & WRITE)
+                       count_vm_events(PGPGOUT, count);
+               else
+                       count_vm_events(PGPGIN, count);
+
+               if (unlikely(block_dump)) {
+                       char b[BDEVNAME_SIZE];
+                       printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+                               current->comm, current->pid,
+                               (rw & WRITE) ? "WRITE" : "READ",
+                               (unsigned long long)bio->bi_sector,
+                               bdevname(bio->bi_bdev,b));
+               }
        }
 
        generic_make_request(bio);
@@ -3260,6 +3267,13 @@ static int __end_that_request_first(struct request *req, int uptodate,
        while ((bio = req->bio) != NULL) {
                int nbytes;
 
+               /* For an empty barrier request, the low level driver must
+                * store a potential error location in ->sector. We pass
+                * that back up in ->bi_sector
+                */
+               if (blk_empty_barrier(req))
+                       bio->bi_sector = req->sector;
+
                if (nr_bytes >= bio->bi_size) {
                        req->bio = bio->bi_next;
                        nbytes = bio->bi_size;
index c1e9e815ee489566b38d797142f3d75745dbd5ed..f9f6452c5dadfddb83179e96338c6948a075fdcf 100644 (file)
@@ -407,7 +407,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
-       if (unlikely(nseg == 0) || 
+       if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
            unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                DPRINTK("Bad number of segments in request (%d)\n", nseg);
                goto fail_response;
@@ -500,6 +500,18 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                preq.sector_number += seg[i].nsec;
        }
 
+       if (!bio) {
+               BUG_ON(operation != WRITE_BARRIER);
+               bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
+               if (unlikely(bio == NULL))
+                       goto fail_put_bio;
+
+               bio->bi_bdev    = preq.bdev;
+               bio->bi_private = pending_req;
+               bio->bi_end_io  = end_block_io_op;
+               bio->bi_sector  = -1;
+       }
+
        plug_queue(blkif, bio);
        atomic_set(&pending_req->pendcnt, nbio);
        blkif_get(blkif);
index 6a0b9ad8f8c9d031d4c32fb32e1c53e0677d1ef2..bd439331fbc6881828c4cfc6ffca84e6bc519f8e 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -112,7 +112,8 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
 
        BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-       mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+       if (bio->bi_io_vec)
+               mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
        mempool_free(bio, bio_set->bio_pool);
 }
 
index 76bdaeab6f622240fe8f898a9caf0815770f7c38..d07589057f5f9359ef0331c2b09898a01e1c28ad 100644 (file)
@@ -172,12 +172,27 @@ struct bio {
 #define bio_offset(bio)                bio_iovec((bio))->bv_offset
 #define bio_segments(bio)      ((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)       ((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio)   (bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio)          (page_address(bio_page((bio))) + bio_offset((bio)))
 #define bio_barrier(bio)       ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)          ((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)      ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)      ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+       if (bio->bi_vcnt)
+               return bio_iovec(bio)->bv_len >> 9;
+
+       return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+       if (bio->bi_vcnt)
+               return page_address(bio_page(bio)) + bio_offset(bio);
+
+       return NULL;
+}
 
 /*
  * will die
index aafe82788b4efcaebf6d7513d98a3fc038124c4c..08ad294bf03c6612cabe8785f6be95d6c59963fa 100644 (file)
@@ -506,6 +506,8 @@ enum {
 #define blk_barrier_rq(rq)     ((rq)->flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)         ((rq)->flags & REQ_FUA)
 
+#define blk_empty_barrier(rq)   (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
+
 #define list_entry_rq(ptr)     list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)                ((rq)->flags & 1)
index 821d4cd1f6a5f45501653dc856cf446fdb36e9a5..db5c048e6af2b68bb32495c4b7b726747b21afce 100644 (file)
@@ -467,6 +467,12 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
 {
        mempool_t *pool;
 
+       /*
+        * Data-less bio, nothing to bounce
+        */
+       if (bio_empty_barrier(*bio_orig))
+               return;
+
        /*
         * for non-isa bounce case, just check if the bounce pfn is equal
         * to or bigger than the highest pfn in the system -- in that case,