]> xenbits.xensource.com Git - people/dstodden/blktap.git/commitdiff
CA-27617: improve VHD sequential write performance by not skipping bitmap regions...
authorAndrei Lifchits <andrei.lifchits@citrix.com>
Wed, 11 Nov 2009 19:06:42 +0000 (11:06 -0800)
committerAndrei Lifchits <andrei.lifchits@citrix.com>
Wed, 11 Nov 2009 19:06:42 +0000 (11:06 -0800)
(i.e., introduce redundant writes to make the pattern on the underlying block device sequential)

drivers/block-vhd.c

index e8004df3a4fe5da24ea1b7125ab4b25faed61257..86a21efaf5db4ff22dcee9ffad56cf275b3e1000 100644 (file)
@@ -104,6 +104,7 @@ unsigned int SPB;
 #define VHD_OP_BITMAP_READ           3
 #define VHD_OP_BITMAP_WRITE          4
 #define VHD_OP_ZERO_BM_WRITE         5
+#define VHD_OP_REDUNDANT_BM_WRITE    6
 
 #define VHD_BM_BAT_LOCKED            0
 #define VHD_BM_BAT_CLEAR             1
@@ -222,6 +223,12 @@ struct vhd_state {
        struct vhd_request       *vreq_free[VHD_REQS_DATA];
        struct vhd_request        vreq_list[VHD_REQS_DATA];
 
+       /* for redundant bitmap writes */
+       int                       padbm_size;
+       char                     *padbm_buf;
+       long int                  debug_skipped_redundant_writes;
+       long int                  debug_done_redundant_writes;
+
        td_driver_t              *driver;
 
        uint64_t                  queued;
@@ -491,6 +498,7 @@ fail:
 static int
 vhd_initialize_dynamic_disk(struct vhd_state *s)
 {
+       u32 bm_size;
        int err;
 
        err = vhd_get_header(&s->vhd);
@@ -510,6 +518,19 @@ vhd_initialize_dynamic_disk(struct vhd_state *s)
        s->spb     = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
        s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
 
+       s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+       if (s->bm_secs % getpagesize())
+               s->padbm_size += getpagesize();
+
+       err = posix_memalign((void **)&s->padbm_buf, 512, s->padbm_size);
+       if (err)
+               return -err;
+       bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+       memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+       memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+       s->debug_skipped_redundant_writes = 0;
+       s->debug_done_redundant_writes = 0;
+
        if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
                return 0;
 
@@ -716,6 +737,10 @@ _vhd_close(td_driver_t *driver)
        DBG(TLOG_WARN, "vhd_close\n");
        s = (struct vhd_state *)driver->data;
 
+       DPRINTF("gaps written/skipped: %ld/%ld\n", 
+                       s->debug_done_redundant_writes,
+                       s->debug_skipped_redundant_writes);
+
        /* don't write footer if tapdisk is read-only */
        if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
                goto free;
@@ -1327,6 +1352,56 @@ schedule_zero_bm_write(struct vhd_state *s,
        aio_write(s, req, offset);
 }
 
+/* This is a performance optimization. When writing sequentially into full 
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in 
+ * throughput. To prevent skipping, we issue redundant writes into the (padded) 
+ * bitmap area just to make all writes sequential. This will help VHDs on raw 
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the 
+ * block is completely full (i.e. the batmap entry is set). If the block is not 
+ * completely full then one of the following two things will be true:
+ *  1. we'll either be allocating new sectors in this block and writing its
+ *     bitmap transactionally, which will be slow anyways; or
+ *  2. the IO will be skipping over the unallocated sectors again, so the
+ *     pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact 
+ * simplifies the implementation of redundant writes: since we know the bitmap 
+ * cannot be updated by anyone else, we don't have to worry about transactions 
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, u32 blk)
+{
+       uint64_t offset;
+       struct vhd_bitmap *bm;
+       struct vhd_request *req;
+
+       ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+       ASSERT(test_batmap(s, blk));
+
+       req = alloc_vhd_request(s);
+       if (!req) 
+               return;
+
+       req->treq.buf = s->padbm_buf;
+
+       offset = bat_entry(s, blk);
+       ASSERT(offset != DD_BLK_UNUSED);
+       offset <<= VHD_SECTOR_SHIFT;
+       offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+       req->op        = VHD_OP_REDUNDANT_BM_WRITE;
+       req->treq.sec  = blk * s->spb;
+       req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+       req->next      = NULL;
+
+       DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+           blk, offset);
+
+       aio_write(s, req, offset);
+}
+
 static int
 update_bat(struct vhd_state *s, uint32_t blk)
 {
@@ -1523,7 +1598,11 @@ schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
                        set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
                } else
                        add_to_transaction(&bm->tx, req);
-       }
+       } else if (sec == 0 &&  /* first sector inside data block */
+                  s->vhd.footer.type != HD_TYPE_FIXED && 
+                  treq.sec > 0 && /* not the first block */
+                  test_batmap(s, blk))
+               schedule_redundant_bm_write(s, blk);
 
        aio_write(s, req, offset);
 
@@ -2042,6 +2121,26 @@ finish_zero_bm_write(struct vhd_request *req)
                finish_data_transaction(s, bm);
 }
 
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+       /* u32 blk; */
+       struct vhd_state *s = (struct vhd_state *) req->state;
+
+       s->returned++;
+       TRACE(s);       
+       /* blk = req->treq.sec / s->spb;
+          DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+       if (req->error) {
+               DPRINTF("******* finish redundant W: error: %d\n", req->error);
+       }
+       free_vhd_request(s, req);
+       s->debug_done_redundant_writes++;
+       return 0;
+}
+
+
 static void
 finish_bitmap_read(struct vhd_request *req)
 {
@@ -2210,6 +2309,10 @@ vhd_complete(void *arg, struct tiocb *tiocb, int err)
                finish_zero_bm_write(req);
                break;
 
+       case VHD_OP_REDUNDANT_BM_WRITE:
+               finish_redundant_bm_write(req);
+               break;
+
        case VHD_OP_BAT_WRITE:
                finish_bat_write(req);
                break;