From: Ian Jackson Date: Fri, 28 Mar 2008 16:54:09 +0000 (+0000) Subject: Perform emulated IDE flushes asynchronously. X-Git-Tag: xen-3.3.0-rc1~241 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=00d0f1e4a56051d4257b55c23e667eb35e611c6b;p=qemu-xen-3.3-testing.git Perform emulated IDE flushes asynchronously. We arrange for the WIN_FLUSH_CACHE and WIN_FLUSH_CACHE_EXT commands to use a new bdrv_aio_flush facility. If there is an error, the ATA-7 spec says that we are supposed to know which is the first block whose flush failed and leave that in the block offset registers. However since we are using f(data)sync that's not possible for us. There is sadly no way for us to report the error which won't encourage the guest to try to understand what went wrong and then do the flush again expecting the remaining blocks to be written (as specified by ATA-7). So if the asynchronous flush fails, we kill the disk by detaching ->bs. This makes it vanish: we don't generate any more interrupts, leave status set to busy, and ignore future commands (and discard any in-flight IO). Alan Cox reports that this will probably induce the best available behaviour in guests (retry for a while and then give up). Fine-grained error reporting is available if the guest turns off the write cache. Signed-off-by: Ian Jackson Modified-by: Ian Jackson Signed-off-by: Kouya Shimura --- diff --git a/block-qcow.c b/block-qcow.c index 59f1c66c..4798ade0 100644 --- a/block-qcow.c +++ b/block-qcow.c @@ -725,6 +725,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) qemu_aio_release(acb); } +static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_aio_flush(s->hd, cb, opaque); +} + static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -913,6 +920,7 @@ BlockDriver bdrv_qcow = { .bdrv_aio_read = qcow_aio_read, .bdrv_aio_write = qcow_aio_write, .bdrv_aio_cancel = qcow_aio_cancel, + .bdrv_aio_flush = qcow_aio_flush, .aiocb_size = sizeof(QCowAIOCB), .bdrv_write_compressed = qcow_write_compressed, .bdrv_get_info = qcow_get_info, diff --git a/block-qcow2.c b/block-qcow2.c index 9bad090c..9c63a632 100644 --- a/block-qcow2.c +++ b/block-qcow2.c @@ -1007,6 +1007,13 @@ static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) qemu_aio_release(acb); } +static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + return bdrv_aio_flush(s->hd, cb, opaque); +} + static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -2241,6 +2248,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_aio_read = qcow_aio_read, .bdrv_aio_write = qcow_aio_write, .bdrv_aio_cancel = qcow_aio_cancel, + .bdrv_aio_flush = qcow_aio_flush, .aiocb_size = sizeof(QCowAIOCB), .bdrv_write_compressed = qcow_write_compressed, diff --git a/block-raw-posix.c b/block-raw-posix.c index 45d0a932..768d52c5 100644 --- a/block-raw-posix.c +++ b/block-raw-posix.c @@ -762,6 +762,21 @@ static int fd_open(BlockDriverState *bs) { return 0; } + +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + RawAIOCB *acb; + + acb = raw_aio_setup(bs, 0, NULL, 0, cb, opaque); + if (!acb) + return NULL; + if (aio_fsync(O_SYNC, &acb->aiocb) < 0) { + qemu_aio_release(acb); + return NULL; + } + return &acb->common; +} #endif #if defined(__linux__) @@ -913,6 +928,7 @@ BlockDriver bdrv_host_device = { .bdrv_aio_read = raw_aio_read, .bdrv_aio_write = raw_aio_write, .bdrv_aio_cancel = raw_aio_cancel, + .bdrv_aio_flush = raw_aio_flush, .aiocb_size = sizeof(RawAIOCB), .bdrv_pread = raw_pread, .bdrv_pwrite = raw_pwrite, diff --git a/block.c b/block.c index fc32a60c..348a2156 100644 --- a/block.c +++ b/block.c @@ -50,6 +50,8 @@ static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs, static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb); static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); @@ -137,6 +139,8 @@ static void bdrv_register(BlockDriver *bdrv) bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; } + if (!bdrv->bdrv_aio_flush) + bdrv->bdrv_aio_flush = bdrv_aio_flush_em; bdrv->next = first_drv; first_drv = bdrv; } @@ -1156,6 +1160,17 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) drv->bdrv_aio_cancel(acb); } +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriver *drv = bs->drv; + + if (!drv) + return NULL; + + return drv->bdrv_aio_flush(bs, cb, opaque); +} + /**************************************************************/ /* async block device emulation */ @@ -1181,6 +1196,15 @@ static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, return NULL; } +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + ret = bdrv_flush(bs); + cb(opaque, ret); + return NULL; +} + static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb) { } @@ -1224,6 +1248,21 @@ static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs, return &acb->common; } +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriverAIOCBSync *acb; + int ret; + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb->bh) + acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); + ret = bdrv_flush(bs); + acb->ret = ret; + qemu_bh_schedule(acb->bh); + return &acb->common; +} + static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb) { BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb; diff --git a/block.h b/block.h index 861a65b1..0ecb0f8e 100644 --- a/block.h +++ b/block.h @@ -86,6 +86,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num, BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); void qemu_aio_init(void); diff --git a/block_int.h b/block_int.h index 796ce294..bda0eab2 100644 --- a/block_int.h +++ b/block_int.h @@ -55,6 +55,8 @@ struct BlockDriver { int64_t sector_num, const uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb); + BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); int aiocb_size; const char *protocol_name; diff --git a/hw/ide.c b/hw/ide.c index 0fb58e79..06eacdb0 100644 --- a/hw/ide.c +++ b/hw/ide.c @@ -724,6 +724,7 @@ static inline void ide_abort_command(IDEState *s) static inline void ide_set_irq(IDEState *s) { BMDMAState *bm = s->bmdma; + if (!s->bs) return; /* yikes */ if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) { if (bm) { bm->status |= BM_STATUS_INT; @@ -903,6 +904,8 @@ static void ide_read_dma_cb(void *opaque, int ret) return; } + if (!s->bs) return; /* yikes */ + n = s->io_buffer_size >> 9; sector_num = ide_get_sector(s); if (n > 0) { @@ -1016,6 +1019,8 @@ static void ide_write_dma_cb(void *opaque, int ret) return; } + if (!s->bs) return; /* yikes */ + n = s->io_buffer_size >> 9; sector_num = ide_get_sector(s); if (n > 0) { @@ -1068,6 +1073,39 @@ static void ide_sector_write_dma(IDEState *s) ide_dma_start(s, ide_write_dma_cb); } +static void ide_device_utterly_broken(IDEState *s) { + s->status |= BUSY_STAT; + s->bs = NULL; + /* This prevents all future commands from working. All of the + * asynchronous callbacks (and ide_set_irq, as a safety measure) + * check to see whether this has happened and bail if so. + */ +} + +static void ide_flush_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + + if (!s->bs) return; /* yikes */ + + if (ret) { + /* We are completely doomed. The IDE spec does not permit us + * to return an error from a flush except via a protocol which + * requires us to say where the error is and which + * contemplates the guest repeating the flush attempt to + * attempt flush the remaining data. We can't support that + * because f(data)sync (which is what the block drivers use + * eventually) doesn't report the necessary information or + * give us the necessary control. So we make the disk vanish. + */ + ide_device_utterly_broken(s); + return; + } + else + s->status = READY_STAT; + ide_set_irq(s); +} + static void ide_atapi_cmd_ok(IDEState *s) { s->error = 0; @@ -1294,6 +1332,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) IDEState *s = bm->ide_if; int data_offset, n; + if (!s->bs) return; /* yikes */ + if (ret < 0) { ide_atapi_io_error(s, ret); goto eot; @@ -1872,6 +1912,8 @@ static void cdrom_change_cb(void *opaque) IDEState *s = opaque; uint64_t nb_sectors; + if (!s->bs) return; /* yikes */ + /* XXX: send interrupt too */ bdrv_get_geometry(s->bs, &nb_sectors); s->nb_sectors = nb_sectors; @@ -1975,8 +2017,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val) printf("ide: CMD=%02x\n", val); #endif s = ide_if->cur_drive; - /* ignore commands to non existant slave */ - if (s != ide_if && !s->bs) + /* ignore commands to non existant device */ + if (!s->bs) break; switch(val) { @@ -2169,12 +2211,8 @@ static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val) break; case WIN_FLUSH_CACHE: case WIN_FLUSH_CACHE_EXT: - if (s->bs) { - ret = bdrv_flush(s->bs); - if (ret) goto abort_cmd; - } - s->status = READY_STAT; - ide_set_irq(s); + s->status = BUSY_STAT; + bdrv_aio_flush(s->bs, ide_flush_cb, s); break; case WIN_STANDBY: case WIN_STANDBY2: