ia64/linux-2.6.18-xen.hg

changeset 417:7128fe32720e

block: backport Jens Axboe's commit from
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support

blkback: permit and implement empty barrier.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Feb 11 10:08:57 2008 +0000 (2008-02-11)
parents bda2d96f9e28
children 90fbf541d772
files block/elevator.c block/ll_rw_blk.c drivers/xen/blkback/blkback.c fs/bio.c include/linux/bio.h include/linux/blkdev.h mm/highmem.c
line diff
     1.1 --- a/block/elevator.c	Mon Feb 11 10:05:29 2008 +0000
     1.2 +++ b/block/elevator.c	Mon Feb 11 10:08:57 2008 +0000
     1.3 @@ -493,6 +493,16 @@ struct request *elv_next_request(request
     1.4  	int ret;
     1.5  
     1.6  	while ((rq = __elv_next_request(q)) != NULL) {
     1.7 +		/*
     1.8 +		 * Kill the empty barrier place holder, the driver must
     1.9 +		 * not ever see it.
    1.10 +		 */
    1.11 +		if (blk_empty_barrier(rq)) {
    1.12 +			blkdev_dequeue_request(rq);
    1.13 +			end_that_request_chunk(rq, 1, 0);
    1.14 +			end_that_request_last(rq, 1);
    1.15 +			continue;
    1.16 +		}
    1.17  		if (!(rq->flags & REQ_STARTED)) {
    1.18  			elevator_t *e = q->elevator;
    1.19  
     2.1 --- a/block/ll_rw_blk.c	Mon Feb 11 10:05:29 2008 +0000
     2.2 +++ b/block/ll_rw_blk.c	Mon Feb 11 10:08:57 2008 +0000
     2.3 @@ -483,9 +483,12 @@ static inline struct request *start_orde
     2.4  	 * Queue ordered sequence.  As we stack them at the head, we
     2.5  	 * need to queue in reverse order.  Note that we rely on that
     2.6  	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
     2.7 -	 * request gets inbetween ordered sequence.
     2.8 +	 * request gets inbetween ordered sequence. If this request is
     2.9 +	 * an empty barrier, we don't need to do a postflush ever since
    2.10 +	 * there will be no data written between the pre and post flush.
    2.11 +	 * Hence a single flush will suffice.
    2.12  	 */
    2.13 -	if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
    2.14 +	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
    2.15  		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
    2.16  	else
    2.17  		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
    2.18 @@ -2967,7 +2970,7 @@ static inline void blk_partition_remap(s
    2.19  {
    2.20  	struct block_device *bdev = bio->bi_bdev;
    2.21  
    2.22 -	if (bdev != bdev->bd_contains) {
    2.23 +	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
    2.24  		struct hd_struct *p = bdev->bd_part;
    2.25  		const int rw = bio_data_dir(bio);
    2.26  
    2.27 @@ -3028,7 +3031,7 @@ void generic_make_request(struct bio *bi
    2.28  	might_sleep();
    2.29  	/* Test device or partition size, when known. */
    2.30  	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
    2.31 -	if (maxsector) {
    2.32 +	if (maxsector && nr_sectors) {
    2.33  		sector_t sector = bio->bi_sector;
    2.34  
    2.35  		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
    2.36 @@ -3094,7 +3097,7 @@ end_io:
    2.37  		old_dev = bio->bi_bdev->bd_dev;
    2.38  
    2.39  		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
    2.40 -		if (maxsector) {
    2.41 +		if (maxsector && nr_sectors) {
    2.42  			sector_t sector = bio->bi_sector;
    2.43  
    2.44  			if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
    2.45 @@ -3128,21 +3131,25 @@ void submit_bio(int rw, struct bio *bio)
    2.46  {
    2.47  	int count = bio_sectors(bio);
    2.48  
    2.49 -	BIO_BUG_ON(!bio->bi_size);
    2.50 -	BIO_BUG_ON(!bio->bi_io_vec);
    2.51  	bio->bi_rw |= rw;
    2.52 -	if (rw & WRITE)
    2.53 -		count_vm_events(PGPGOUT, count);
    2.54 -	else
    2.55 -		count_vm_events(PGPGIN, count);
    2.56 -
    2.57 -	if (unlikely(block_dump)) {
    2.58 -		char b[BDEVNAME_SIZE];
    2.59 -		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
    2.60 -			current->comm, current->pid,
    2.61 -			(rw & WRITE) ? "WRITE" : "READ",
    2.62 -			(unsigned long long)bio->bi_sector,
    2.63 -			bdevname(bio->bi_bdev,b));
    2.64 +
    2.65 +	if (!bio_empty_barrier(bio)) {
    2.66 +		BIO_BUG_ON(!bio->bi_size);
    2.67 +		BIO_BUG_ON(!bio->bi_io_vec);
    2.68 +
    2.69 +		if (rw & WRITE)
    2.70 +			count_vm_events(PGPGOUT, count);
    2.71 +		else
    2.72 +			count_vm_events(PGPGIN, count);
    2.73 +
    2.74 +		if (unlikely(block_dump)) {
    2.75 +			char b[BDEVNAME_SIZE];
    2.76 +			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
    2.77 +				current->comm, current->pid,
    2.78 +				(rw & WRITE) ? "WRITE" : "READ",
    2.79 +				(unsigned long long)bio->bi_sector,
    2.80 +				bdevname(bio->bi_bdev,b));
    2.81 +		}
    2.82  	}
    2.83  
    2.84  	generic_make_request(bio);
    2.85 @@ -3260,6 +3267,13 @@ static int __end_that_request_first(stru
    2.86  	while ((bio = req->bio) != NULL) {
    2.87  		int nbytes;
    2.88  
    2.89 +		/* For an empty barrier request, the low level driver must
    2.90 +		 * store a potential error location in ->sector. We pass
    2.91 +		 * that back up in ->bi_sector
    2.92 +		 */
    2.93 +		if (blk_empty_barrier(req))
    2.94 +			bio->bi_sector = req->sector;
    2.95 +
    2.96  		if (nr_bytes >= bio->bi_size) {
    2.97  			req->bio = bio->bi_next;
    2.98  			nbytes = bio->bi_size;
     3.1 --- a/drivers/xen/blkback/blkback.c	Mon Feb 11 10:05:29 2008 +0000
     3.2 +++ b/drivers/xen/blkback/blkback.c	Mon Feb 11 10:08:57 2008 +0000
     3.3 @@ -407,7 +407,7 @@ static void dispatch_rw_block_io(blkif_t
     3.4  
     3.5  	/* Check that number of segments is sane. */
     3.6  	nseg = req->nr_segments;
     3.7 -	if (unlikely(nseg == 0) || 
     3.8 +	if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
     3.9  	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
    3.10  		DPRINTK("Bad number of segments in request (%d)\n", nseg);
    3.11  		goto fail_response;
    3.12 @@ -500,6 +500,18 @@ static void dispatch_rw_block_io(blkif_t
    3.13  		preq.sector_number += seg[i].nsec;
    3.14  	}
    3.15  
    3.16 +	if (!bio) {
    3.17 +		BUG_ON(operation != WRITE_BARRIER);
    3.18 +		bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
    3.19 +		if (unlikely(bio == NULL))
    3.20 +			goto fail_put_bio;
    3.21 +
    3.22 +		bio->bi_bdev    = preq.bdev;
    3.23 +		bio->bi_private = pending_req;
    3.24 +		bio->bi_end_io  = end_block_io_op;
    3.25 +		bio->bi_sector  = -1;
    3.26 +	}
    3.27 +
    3.28  	plug_queue(blkif, bio);
    3.29  	atomic_set(&pending_req->pendcnt, nbio);
    3.30  	blkif_get(blkif);
     4.1 --- a/fs/bio.c	Mon Feb 11 10:05:29 2008 +0000
     4.2 +++ b/fs/bio.c	Mon Feb 11 10:08:57 2008 +0000
     4.3 @@ -112,7 +112,8 @@ void bio_free(struct bio *bio, struct bi
     4.4  
     4.5  	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
     4.6  
     4.7 -	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
     4.8 +	if (bio->bi_io_vec)
     4.9 +		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
    4.10  	mempool_free(bio, bio_set->bio_pool);
    4.11  }
    4.12  
     5.1 --- a/include/linux/bio.h	Mon Feb 11 10:05:29 2008 +0000
     5.2 +++ b/include/linux/bio.h	Mon Feb 11 10:08:57 2008 +0000
     5.3 @@ -172,12 +172,27 @@ struct bio {
     5.4  #define bio_offset(bio)		bio_iovec((bio))->bv_offset
     5.5  #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
     5.6  #define bio_sectors(bio)	((bio)->bi_size >> 9)
     5.7 -#define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
     5.8 -#define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
     5.9  #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
    5.10  #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
    5.11  #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
    5.12  #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
    5.13 +#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
    5.14 +
    5.15 +static inline unsigned int bio_cur_sectors(struct bio *bio)
    5.16 +{
    5.17 +	if (bio->bi_vcnt)
    5.18 +		return bio_iovec(bio)->bv_len >> 9;
    5.19 +
    5.20 +	return 0;
    5.21 +}
    5.22 +
    5.23 +static inline void *bio_data(struct bio *bio)
    5.24 +{
    5.25 +	if (bio->bi_vcnt)
    5.26 +		return page_address(bio_page(bio)) + bio_offset(bio);
    5.27 +
    5.28 +	return NULL;
    5.29 +}
    5.30  
    5.31  /*
    5.32   * will die
     6.1 --- a/include/linux/blkdev.h	Mon Feb 11 10:05:29 2008 +0000
     6.2 +++ b/include/linux/blkdev.h	Mon Feb 11 10:08:57 2008 +0000
     6.3 @@ -506,6 +506,8 @@ enum {
     6.4  #define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
     6.5  #define blk_fua_rq(rq)		((rq)->flags & REQ_FUA)
     6.6  
     6.7 +#define blk_empty_barrier(rq)   (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
     6.8 +
     6.9  #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
    6.10  
    6.11  #define rq_data_dir(rq)		((rq)->flags & 1)
     7.1 --- a/mm/highmem.c	Mon Feb 11 10:05:29 2008 +0000
     7.2 +++ b/mm/highmem.c	Mon Feb 11 10:08:57 2008 +0000
     7.3 @@ -468,6 +468,12 @@ void blk_queue_bounce(request_queue_t *q
     7.4  	mempool_t *pool;
     7.5  
     7.6  	/*
     7.7 +	 * Data-less bio, nothing to bounce
     7.8 +	 */
     7.9 +	if (bio_empty_barrier(*bio_orig))
    7.10 +		return;
    7.11 +
    7.12 +	/*
    7.13  	 * for non-isa bounce case, just check if the bounce pfn is equal
    7.14  	 * to or bigger than the highest pfn in the system -- in that case,
    7.15  	 * don't waste time iterating over bio segments