#define VHD_OP_BITMAP_READ 3
#define VHD_OP_BITMAP_WRITE 4
#define VHD_OP_ZERO_BM_WRITE 5
+#define VHD_OP_REDUNDANT_BM_WRITE 6
#define VHD_BM_BAT_LOCKED 0
#define VHD_BM_BAT_CLEAR 1
struct vhd_request *vreq_free[VHD_REQS_DATA];
struct vhd_request vreq_list[VHD_REQS_DATA];
+ /* for redundant bitmap writes */
+ int padbm_size;
+ char *padbm_buf;
+ long int debug_skipped_redundant_writes;
+ long int debug_done_redundant_writes;
+
td_driver_t *driver;
uint64_t queued;
static int
vhd_initialize_dynamic_disk(struct vhd_state *s)
{
+ u32 bm_size;
int err;
err = vhd_get_header(&s->vhd);
s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
+ s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+ if (s->bm_secs % getpagesize())
+ s->padbm_size += getpagesize();
+
+ err = posix_memalign((void **)&s->padbm_buf, 512, s->padbm_size);
+ if (err)
+ return -err;
+ bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+ memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+ memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+ s->debug_skipped_redundant_writes = 0;
+ s->debug_done_redundant_writes = 0;
+
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
return 0;
DBG(TLOG_WARN, "vhd_close\n");
s = (struct vhd_state *)driver->data;
+ DPRINTF("gaps written/skipped: %ld/%ld\n",
+ s->debug_done_redundant_writes,
+ s->debug_skipped_redundant_writes);
+
/* don't write footer if tapdisk is read-only */
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
goto free;
aio_write(s, req, offset);
}
+/* This is a performance optimization. When writing sequentially into full
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in
+ * throughput. To prevent skipping, we issue redundant writes into the (padded)
+ * bitmap area just to make all writes sequential. This will help VHDs on raw
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the
+ * block is completely full (i.e. the batmap entry is set). If the block is not
+ * completely full then one of the following two things will be true:
+ * 1. we'll either be allocating new sectors in this block and writing its
+ * bitmap transactionally, which will be slow anyways; or
+ * 2. the IO will be skipping over the unallocated sectors again, so the
+ * pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact
+ * simplifies the implementation of redundant writes: since we know the bitmap
+ * cannot be updated by anyone else, we don't have to worry about transactions
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, u32 blk)
+{
+ uint64_t offset;
+ struct vhd_bitmap *bm;
+ struct vhd_request *req;
+
+ ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+ ASSERT(test_batmap(s, blk));
+
+ req = alloc_vhd_request(s);
+ if (!req)
+ return;
+
+ req->treq.buf = s->padbm_buf;
+
+ offset = bat_entry(s, blk);
+ ASSERT(offset != DD_BLK_UNUSED);
+ offset <<= VHD_SECTOR_SHIFT;
+ offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+ req->op = VHD_OP_REDUNDANT_BM_WRITE;
+ req->treq.sec = blk * s->spb;
+ req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+ req->next = NULL;
+
+ DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+ blk, offset);
+
+ aio_write(s, req, offset);
+}
+
static int
update_bat(struct vhd_state *s, uint32_t blk)
{
set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
} else
add_to_transaction(&bm->tx, req);
- }
+ } else if (sec == 0 && /* first sector inside data block */
+ s->vhd.footer.type != HD_TYPE_FIXED &&
+ treq.sec > 0 && /* not the first block */
+ test_batmap(s, blk))
+ schedule_redundant_bm_write(s, blk);
aio_write(s, req, offset);
finish_data_transaction(s, bm);
}
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+ /* u32 blk; */
+ struct vhd_state *s = (struct vhd_state *) req->state;
+
+ s->returned++;
+ TRACE(s);
+ /* blk = req->treq.sec / s->spb;
+ DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+ if (req->error) {
+ DPRINTF("******* finish redundant W: error: %d\n", req->error);
+ }
+ free_vhd_request(s, req);
+ s->debug_done_redundant_writes++;
+ return 0;
+}
+
+
static void
finish_bitmap_read(struct vhd_request *req)
{
finish_zero_bm_write(req);
break;
+ case VHD_OP_REDUNDANT_BM_WRITE:
+ finish_redundant_bm_write(req);
+ break;
+
case VHD_OP_BAT_WRITE:
finish_bat_write(req);
break;