]> xenbits.xensource.com Git - qemu-xen-4.6-testing.git/commitdiff
Perform emulated IDE flushes asynchronously.
authorIan Jackson <iwj@mariner.uk.xensource.com>
Fri, 28 Mar 2008 16:54:09 +0000 (16:54 +0000)
committerIan Jackson <Ian.Jackson@eu.citrix.com>
Mon, 12 May 2008 11:16:24 +0000 (12:16 +0100)
We arrange for the WIN_FLUSH_CACHE and WIN_FLUSH_CACHE_EXT
commands to use a new bdrv_aio_flush facility.

If there is an error, the ATA-7 spec says that we are supposed to know
which is the first block whose flush failed and leave that in the
block offset registers.  However since we are using f(data)sync that's
not possible for us.  There is sadly no way for us to report the error
which won't encourage the guest to try to understand what went wrong
and then do the flush again expecting the remaining blocks to be
written (as specified by ATA-7).

So if the asynchronous flush fails, we kill the disk by detaching
->bs.  This makes it vanish: we don't generate any more interrupts,
leave status set to busy, and ignore future commands (and discard any
in-flight IO).  Alan Cox reports that this will probably induce the
best available behaviour in guests (retry for a while and then give
up).  Fine-grained error reporting is available if the guest turns off
the write cache.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Modified-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
block-qcow.c
block-qcow2.c
block-raw-posix.c
block.c
block.h
block_int.h
hw/ide.c

index 59f1c66c56e9aee4b918a37bea135b8b8ededaa0..4798ade04711586da6352b4f6d7112408638b580 100644 (file)
@@ -725,6 +725,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
     qemu_aio_release(acb);
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -913,6 +920,7 @@ BlockDriver bdrv_qcow = {
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
     .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_flush = qcow_aio_flush,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
     .bdrv_get_info = qcow_get_info,
index 9bad090c4555de95b0cc35a29e399d83ad8db5b6..9c63a632faf49871d0d02e99abc661aa2aa5e819 100644 (file)
@@ -1007,6 +1007,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
     qemu_aio_release(acb);
 }
 
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    return bdrv_aio_flush(s->hd, cb, opaque);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -2241,6 +2248,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_aio_read = qcow_aio_read,
     .bdrv_aio_write = qcow_aio_write,
     .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_flush = qcow_aio_flush,
     .aiocb_size = sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,
 
index 45d0a932c813e7988b7559e90d8e5823b62f05e3..768d52c57793629adcbcad0ef7c35ce65d0c8faf 100644 (file)
@@ -762,6 +762,21 @@ static int fd_open(BlockDriverState *bs)
 {
     return 0;
 }
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawAIOCB *acb;
+
+    acb = raw_aio_setup(bs, 0, NULL, 0, cb, opaque);
+    if (!acb)
+        return NULL;
+    if (aio_fsync(O_SYNC, &acb->aiocb) < 0) {
+        qemu_aio_release(acb);
+        return NULL;
+    }
+    return &acb->common;
+}
 #endif
 
 #if defined(__linux__)
@@ -913,6 +928,7 @@ BlockDriver bdrv_host_device = {
     .bdrv_aio_read = raw_aio_read,
     .bdrv_aio_write = raw_aio_write,
     .bdrv_aio_cancel = raw_aio_cancel,
+    .bdrv_aio_flush = raw_aio_flush,
     .aiocb_size = sizeof(RawAIOCB),
     .bdrv_pread = raw_pread,
     .bdrv_pwrite = raw_pwrite,
diff --git a/block.c b/block.c
index fc32a60c6e2ee41688a94a336360cc7451d8269a..348a215623c20a72ac978a321eec98a08dbc4ad4 100644 (file)
--- a/block.c
+++ b/block.c
@@ -50,6 +50,8 @@ static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
                         uint8_t *buf, int nb_sectors);
@@ -137,6 +139,8 @@ static void bdrv_register(BlockDriver *bdrv)
         bdrv->bdrv_read = bdrv_read_em;
         bdrv->bdrv_write = bdrv_write_em;
     }
+    if (!bdrv->bdrv_aio_flush)
+        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
     bdrv->next = first_drv;
     first_drv = bdrv;
 }
@@ -1156,6 +1160,17 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
     drv->bdrv_aio_cancel(acb);
 }
 
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 
+                                 BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return NULL;
+
+    return drv->bdrv_aio_flush(bs, cb, opaque);
+}
+
 
 /**************************************************************/
 /* async block device emulation */
@@ -1181,6 +1196,15 @@ static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
     return NULL;
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    ret = bdrv_flush(bs);
+    cb(opaque, ret);
+    return NULL;
+}
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
 {
 }
@@ -1224,6 +1248,21 @@ static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
     return &acb->common;
 }
 
+static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BlockDriverAIOCBSync *acb;
+    int ret;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb->bh)
+        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    ret = bdrv_flush(bs);
+    acb->ret = ret;
+    qemu_bh_schedule(acb->bh);
+    return &acb->common;
+}
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 {
     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
diff --git a/block.h b/block.h
index 861a65b18036d51330ef77b4e4cb355fc16f91af..0ecb0f8e4bc9e32c12aeddbd9967247648274e89 100644 (file)
--- a/block.h
+++ b/block.h
@@ -86,6 +86,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
                                  const uint8_t *buf, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, 
+                                 BlockDriverCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockDriverAIOCB *acb);
 
 void qemu_aio_init(void);
index 796ce294ff9732eeef0d8dfb55096901ec5875c8..bda0eab250cc7f94ea3ab97a52449156a976779c 100644 (file)
@@ -55,6 +55,8 @@ struct BlockDriver {
         int64_t sector_num, const uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
     void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
+    BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque);
     int aiocb_size;
 
     const char *protocol_name;
index 0fb58e79946789c4a468f15448c74b4b4d2ffeee..06eacdb015ae3399c8d8edd8e268ef9eb6f27410 100644 (file)
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -724,6 +724,7 @@ static inline void ide_abort_command(IDEState *s)
 static inline void ide_set_irq(IDEState *s)
 {
     BMDMAState *bm = s->bmdma;
+    if (!s->bs) return; /* yikes */
     if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
         if (bm) {
             bm->status |= BM_STATUS_INT;
@@ -903,6 +904,8 @@ static void ide_read_dma_cb(void *opaque, int ret)
        return;
     }
 
+    if (!s->bs) return; /* yikes */
+
     n = s->io_buffer_size >> 9;
     sector_num = ide_get_sector(s);
     if (n > 0) {
@@ -1016,6 +1019,8 @@ static void ide_write_dma_cb(void *opaque, int ret)
        return;
     }
 
+    if (!s->bs) return; /* yikes */
+
     n = s->io_buffer_size >> 9;
     sector_num = ide_get_sector(s);
     if (n > 0) {
@@ -1068,6 +1073,39 @@ static void ide_sector_write_dma(IDEState *s)
     ide_dma_start(s, ide_write_dma_cb);
 }
 
+static void ide_device_utterly_broken(IDEState *s) {
+    s->status |= BUSY_STAT;
+    s->bs = NULL;
+    /* This prevents all future commands from working.  All of the
+     * asynchronous callbacks (and ide_set_irq, as a safety measure)
+     * check to see whether this has happened and bail if so.
+     */
+}
+
+static void ide_flush_cb(void *opaque, int ret)
+{
+    IDEState *s = opaque;
+
+    if (!s->bs) return; /* yikes */
+
+    if (ret) {
+        /* We are completely doomed.  The IDE spec does not permit us
+        * to return an error from a flush except via a protocol which
+        * requires us to say where the error is and which
+        * contemplates the guest repeating the flush attempt to
+        * attempt flush the remaining data.  We can't support that
+        * because f(data)sync (which is what the block drivers use
+        * eventually) doesn't report the necessary information or
+        * give us the necessary control.  So we make the disk vanish.
+        */
+       ide_device_utterly_broken(s);
+       return;
+    }
+    else
+        s->status = READY_STAT;
+    ide_set_irq(s);
+}
+
 static void ide_atapi_cmd_ok(IDEState *s)
 {
     s->error = 0;
@@ -1294,6 +1332,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
     IDEState *s = bm->ide_if;
     int data_offset, n;
 
+    if (!s->bs) return; /* yikes */
+
     if (ret < 0) {
         ide_atapi_io_error(s, ret);
         goto eot;
@@ -1872,6 +1912,8 @@ static void cdrom_change_cb(void *opaque)
     IDEState *s = opaque;
     uint64_t nb_sectors;
 
+    if (!s->bs) return; /* yikes */
+
     /* XXX: send interrupt too */
     bdrv_get_geometry(s->bs, &nb_sectors);
     s->nb_sectors = nb_sectors;
@@ -1975,8 +2017,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         printf("ide: CMD=%02x\n", val);
 #endif
         s = ide_if->cur_drive;
-        /* ignore commands to non existant slave */
-        if (s != ide_if && !s->bs)
+       /* ignore commands to non existant device */
+        if (!s->bs)
             break;
 
         switch(val) {
@@ -2169,12 +2211,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
             break;
         case WIN_FLUSH_CACHE:
         case WIN_FLUSH_CACHE_EXT:
-            if (s->bs) {
-                ret = bdrv_flush(s->bs);
-               if (ret) goto abort_cmd;
-           }
-           s->status = READY_STAT;
-            ide_set_irq(s);
+           s->status = BUSY_STAT;
+           bdrv_aio_flush(s->bs, ide_flush_cb, s);
             break;
         case WIN_STANDBY:
         case WIN_STANDBY2: