From: t_jeang Date: Tue, 6 Jan 2009 12:06:01 +0000 (+0000) Subject: imported patch CA-7672-blk-shutdown.patch X-Git-Tag: blktap-missed-requests-CA-7665 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=c87707a2a1e6f2851669973fc37273f010b07648;p=xenclient%2Fkernel.git imported patch CA-7672-blk-shutdown.patch --- diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 1652edf9..f94ff73b 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -208,16 +208,16 @@ int blkif_schedule(void *arg) if (debug_lvl) printk(KERN_DEBUG "%s: started\n", current->comm); - while (!kthread_should_stop()) { + while (!blkif->remove_requested) { if (try_to_freeze()) continue; wait_event_interruptible( blkif->wq, - blkif->waiting_reqs || kthread_should_stop()); + blkif->waiting_reqs || blkif->remove_requested); wait_event_interruptible( pending_free_wq, - !list_empty(&pending_free) || kthread_should_stop()); + !list_empty(&pending_free) || blkif->remove_requested); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -235,8 +235,8 @@ int blkif_schedule(void *arg) if (debug_lvl) printk(KERN_DEBUG "%s: exiting\n", current->comm); - blkif->xenblkd = NULL; blkif_put(blkif); + blkback_close(blkif); return 0; } @@ -324,7 +324,7 @@ static int do_block_io_op(blkif_t *blkif) break; } - if (kthread_should_stop()) { + if (blkif->remove_requested) { more_to_do = 1; break; } diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 425d8cd6..53911bff 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -48,6 +48,8 @@ pr_debug("(file=%s, line=%d) " _f, \ __FILE__ , __LINE__ , ## _a ) +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + struct vbd { blkif_vdev_t handle; /* what the domain refers to this vbd as */ unsigned char readonly; /* Non-zero -> read-only */ @@ -73,6 +75,7 @@ typedef struct blkif_st { /* Back pointer to the backend_info. */ struct backend_info *be; /* Private fields. */ + int remove_requested; spinlock_t blk_ring_lock; atomic_t refcnt; @@ -112,6 +115,7 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, unsigned minor, int readonly, int cdrom); void vbd_free(struct vbd *vbd); +void vbd_sync(struct vbd *vbd); unsigned long long vbd_size(struct vbd *vbd); unsigned int vbd_info(struct vbd *vbd); @@ -138,4 +142,6 @@ void blkif_notify_work(blkif_t *blkif); int blkback_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); +void blkback_close(blkif_t *blkif); + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 6df7b734..02fe4c8d 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -146,11 +146,6 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) void blkif_disconnect(blkif_t *blkif) { - if (blkif->xenblkd) { - kthread_stop(blkif->xenblkd); - blkif->xenblkd = NULL; - } - atomic_dec(&blkif->refcnt); wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); atomic_inc(&blkif->refcnt); diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 2594419f..8729176f 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -98,6 +98,12 @@ void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } +void vbd_sync(struct vbd *vbd) +{ + if (vbd->bdev) + fsync_bdev(vbd->bdev); +} + int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) { struct vbd *vbd = &blkif->vbd; diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 9d95137f..08ff8fc1 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -32,11 +32,19 @@ struct backend_info struct xenbus_device *dev; blkif_t *blkif; struct xenbus_watch backend_watch; + struct xenbus_watch shutdown_watch; unsigned major; unsigned minor; char *mode; + int group_added; + char *nodename; + atomic_t refcnt; + pid_t kthread_pid; + int shutdown_signalled; }; +DECLARE_MUTEX(blkback_dev_sem); + static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, @@ -93,6 +101,9 @@ static void update_blkif_status(blkif_t *blkif) xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); return; } + + blkif->be->kthread_pid = blkif->xenblkd->pid; + atomic_inc(&blkif->be->refcnt); err = xenbus_printf(XBT_NIL, blkif->be->dev->nodename, "kthread-pid", "%d", blkif->xenblkd->pid); @@ -147,6 +158,7 @@ VBD_SHOW(mode, "%s\n", be->mode); int xenvbd_sysfs_addif(struct xenbus_device *dev) { int error; + struct backend_info *be = dev->dev.driver_data; error = device_create_file(&dev->dev, &dev_attr_physical_device); if (error) @@ -160,6 +172,8 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) if (error) goto fail3; + be->group_added = 1; + return 0; fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); @@ -170,19 +184,81 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); void xenvbd_sysfs_delif(struct xenbus_device *dev) { + struct backend_info *be = dev->dev.driver_data; + if (be->group_added == 0) + return; sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); device_remove_file(&dev->dev, &dev_attr_physical_device); + be->group_added = 0; +} + +static int kthread_remove(struct backend_info *be) +{ + blkif_t *blkif = be->blkif; + + if (!blkif || !blkif->xenblkd) + return 0; + + blkif->remove_requested = 1; + wake_up_process(blkif->xenblkd); + + return -EBUSY; +} + +static void signal_shutdown(struct backend_info *be) +{ + int err; + + down(&blkback_dev_sem); + + if (be->shutdown_signalled) + goto out; + + err = xenbus_write(XBT_NIL, be->nodename, "shutdown-done", ""); + if (err) + WPRINTK("Error writing shutdown-done for %s: %d\n", + be->nodename, err); + + if (be->dev) + xenbus_switch_state(be->dev, XenbusStateClosed); + + be->shutdown_signalled = 1; + + out: + up(&blkback_dev_sem); +} + +static void backend_release(struct backend_info *be) +{ + blkif_t *blkif = be->blkif; + + if (current->pid == be->kthread_pid) + signal_shutdown(be); + + if (!atomic_dec_and_test(&be->refcnt)) + return; + + signal_shutdown(be); + + if (blkif) { + blkif_disconnect(blkif); + vbd_free(&blkif->vbd); + blkif_free(blkif); + be->blkif = NULL; + } + + kfree(be->nodename); + kfree(be); } static int blkback_remove(struct xenbus_device *dev) { struct backend_info *be = dev->dev.driver_data; - DPRINTK(""); - - if (be->major || be->minor) - xenvbd_sysfs_delif(dev); + down(&blkback_dev_sem); + be->dev = NULL; + up(&blkback_dev_sem); if (be->backend_watch.node) { unregister_xenbus_watch(&be->backend_watch); @@ -190,18 +266,74 @@ static int blkback_remove(struct xenbus_device *dev) be->backend_watch.node = NULL; } - if (be->blkif) { - blkif_disconnect(be->blkif); - vbd_free(&be->blkif->vbd); - blkif_free(be->blkif); - be->blkif = NULL; + if (be->shutdown_watch.node) { + unregister_xenbus_watch(&be->shutdown_watch); + kfree(be->shutdown_watch.node); + be->shutdown_watch.node = NULL; } - kfree(be); + if (kthread_remove(be)) + WPRINTK("BAD REMOVE REQUEST for %s\n", be->nodename); + + xenvbd_sysfs_delif(dev); + backend_release(be); + dev->dev.driver_data = NULL; + return 0; } +/* + * called by kthread when closing + */ +void blkback_close(blkif_t *blkif) +{ + blkif_disconnect(blkif); + vbd_sync(&blkif->vbd); + blkif->remove_requested = 0; + + down(&blkback_dev_sem); + if (blkif->be->dev) + xenvbd_sysfs_delif(blkif->be->dev); + up(&blkback_dev_sem); + + backend_release(blkif->be); + blkif->xenblkd = NULL; +} + +static void start_shutdown(struct xenbus_watch *watch, + const char **vec, unsigned int length) +{ + int err; + char *type; + unsigned int len; + struct backend_info *be + = container_of(watch, struct backend_info, shutdown_watch); + struct xenbus_device *dev = be->dev; + + if (be->shutdown_signalled) + return; + + type = xenbus_read(XBT_NIL, dev->nodename, "shutdown-request", &len); + err = (IS_ERR(type) ? PTR_ERR(type) : 0); + + if (XENBUS_EXIST_ERR(err)) + return; + + if (err) { + xenbus_dev_fatal(dev, err, "reading shutdown-request"); + return; + } + + xenbus_switch_state(dev, XenbusStateClosing); + + if (len == sizeof("force") - 1 && !memcmp(type, "force", len)) + if (!kthread_remove(be)) + signal_shutdown(be); /* shutdown immediately */ + + kfree(type); +} + int blkback_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state) { @@ -234,6 +366,15 @@ static int blkback_probe(struct xenbus_device *dev, } be->dev = dev; dev->dev.driver_data = be; + atomic_set(&be->refcnt, 1); + + be->nodename = kasprintf(GFP_KERNEL, "%s", dev->nodename); + if (!be->nodename) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + kfree(be); + return -ENOMEM; + } be->blkif = blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { @@ -251,6 +392,11 @@ static int blkback_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_watch_path2(dev, dev->nodename, "shutdown-request", + &be->shutdown_watch, start_shutdown); + if (err) + goto fail; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -359,13 +505,17 @@ static void frontend_changed(struct xenbus_device *dev, struct backend_info *be = dev->dev.driver_data; int err; - DPRINTK("%s", xenbus_strstate(frontend_state)); + DPRINTK("%s: %s", dev->nodename, xenbus_strstate(frontend_state)); switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { printk(KERN_INFO "%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); + + xenbus_rm(XBT_NIL, dev->nodename, "shutdown-done"); + be->shutdown_signalled = 0; + xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -388,15 +538,14 @@ static void frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: - xenbus_switch_state(dev, XenbusStateClosed); - if (xenbus_dev_is_online(dev)) - break; - /* fall through if not online */ + if (!kthread_remove(be)) + signal_shutdown(be); + break; + case XenbusStateUnknown: device_unregister(&dev->dev); break; @@ -406,6 +555,8 @@ static void frontend_changed(struct xenbus_device *dev, frontend_state); break; } + + DPRINTK("%s: %s", dev->nodename, xenbus_strstate(dev->state)); } diff --git a/drivers/xen/blkfront/blkfront.c b/drivers/xen/blkfront/blkfront.c index c4b1ecd6..83e5c6a6 100644 --- a/drivers/xen/blkfront/blkfront.c +++ b/drivers/xen/blkfront/blkfront.c @@ -265,7 +265,8 @@ static void backend_changed(struct xenbus_device *dev, struct blkfront_info *info = dev->dev.driver_data; struct block_device *bd; - DPRINTK("blkfront:backend_changed.\n"); + DPRINTK("%s: %s\n", dev->nodename, + xenbus_strstate(backend_state)); switch (backend_state) { case XenbusStateInitialising: diff --git a/drivers/xen/blktap/backdev.c b/drivers/xen/blktap/backdev.c index 9281dcf6..3ebed59f 100644 --- a/drivers/xen/blktap/backdev.c +++ b/drivers/xen/blktap/backdev.c @@ -50,6 +50,10 @@ backdev_end_requests(struct tap_blkif *info) int ret, i; spin_lock_irq(&backdev_io_lock); + + if (!info->idx_map) + goto start_queue; + for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { if (info->idx_map[usr_idx] == INVALID_REQ) continue; @@ -57,21 +61,26 @@ backdev_end_requests(struct tap_blkif *info) mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); pending_req = &pending_reqs[mmap_idx][pending_idx]; blkif_put(info->blkif); + WPRINTK("failing %s of %d pages pending on close\n", + (pending_req->operation == BLKIF_OP_READ ? + "read" : "write"), pending_req->nr_pages); if (pending_req->inuse != 2) continue; for (i = 0; i < pending_req->nr_pages; i++) umap_uaddr(&init_mm, idx_to_kaddr(mmap_idx, pending_idx, i)); req = (struct request *)(unsigned long)pending_req->id; - /* See NOTE in do_backdev_request. */ - ret = end_that_request_first(req, 1, req->hard_nr_sectors); + ret = end_that_request_first(req, 0, req->hard_nr_sectors); BUG_ON(ret); - end_that_request_last(req, 1); + end_that_request_last(req, 0); } + + start_queue: if (info->backdev) { info->backdev->gd->queue->queuedata = NULL; blk_start_queue(info->backdev->gd->queue); } + spin_unlock_irq(&backdev_io_lock); } @@ -95,7 +104,7 @@ destroy_backdev(struct tap_blkif *uinfo) blk_cleanup_queue(info->gd->queue); if (uinfo->blkif->xenblkd) - wake_up_process(uinfo->blkif->xenblkd); + wake_up_process(uinfo->blkif->xenblkd); blkif_put(uinfo->blkif); @@ -240,7 +249,7 @@ static int umap_uaddr_fn( pte_t *ptep, struct page *pmd_page, unsigned long addr, void *data) { struct mm_struct *mm = (struct mm_struct *)data; - + DPRINTK("unmap_uaddr ptep %p\n", ptep); pte_clear(mm, addr, ptep); xen_invlpg(addr); @@ -291,11 +300,14 @@ process_backdev_request(struct tap_blkif *uinfo, struct backdev_info *info) end_request(req, 0); continue; } + +#if 0 if (uinfo->remove_requested) { DPRINTK("device no longer in use %d\n", info->uinfo->minor); end_request(req, 0); continue; } +#endif if (RING_FULL(&uinfo->ufe_ring)) { wait: @@ -419,12 +431,11 @@ do_backdev_request(request_queue_t *rq) info = rq->queuedata; if (info == NULL || info->uinfo == NULL || info->uinfo->remove_requested) { - /* NOTE: we pretend that the request succeeded because - * this seems better than returning block device - * errors to dom0 given that the linux filesystem code - * doesn't seem to handle these too gracefully. */ - while ((req = elv_next_request(rq))) - end_request(req, 1); + while ((req = elv_next_request(rq))) { + WPRINTK("backdev closed: failing secs %llu - %llu\n", + req->sector, req->sector + req->nr_sectors); + end_request(req, 0); + } return; } info->uinfo->blkif->waiting_reqs = 1; @@ -510,6 +521,40 @@ backdev_restart_queue(struct tap_blkif *uinfo) return; } +int +backdev_setup(struct tap_blkif *uinfo) +{ + int ret; + unsigned long domid, busid; + + if (!uinfo->trans.domid && !uinfo->trans.busid) + return -EINVAL; + + domid = uinfo->trans.domid; + busid = uinfo->trans.busid; + + while (backend_device_linked(domid, busid) && + uinfo->backdev == NULL && uinfo->dev_inuse && + uinfo->trans.domid == domid && uinfo->trans.busid == busid) { + + ret = wait_event_interruptible(backdev_setup_wq, + uinfo->backdev || + uinfo->dev_inuse == 0 || + uinfo->trans.domid != domid || + uinfo->trans.busid != busid || + !backend_device_linked(domid, + busid)); + if (ret) + return ret; + } + + if (!backend_device_linked(domid, busid) || uinfo->dev_inuse == 0 || + uinfo->trans.domid != domid || uinfo->trans.busid != busid) + return -EINVAL; + + return 0; +} + int create_backdev(struct tap_blkif *uinfo) { diff --git a/drivers/xen/blktap/backdev.h b/drivers/xen/blktap/backdev.h index c25086cf..db54666e 100644 --- a/drivers/xen/blktap/backdev.h +++ b/drivers/xen/blktap/backdev.h @@ -11,6 +11,7 @@ struct backdev_info { extern int register_backdev(void); extern int create_backdev(struct tap_blkif *); extern int destroy_backdev(struct tap_blkif *); +extern int backdev_setup(struct tap_blkif *); extern void backdev_finish_req(struct tap_blkif *, int, blkif_response_t *, struct pending_req *); extern void backdev_restart_queue(struct tap_blkif *); diff --git a/drivers/xen/blktap/blktap.c b/drivers/xen/blktap/blktap.c index 6d014083..581f551e 100644 --- a/drivers/xen/blktap/blktap.c +++ b/drivers/xen/blktap/blktap.c @@ -54,7 +54,6 @@ #include #include #include -#include #include "blktap.h" #include "backdev.h" @@ -253,7 +252,7 @@ static void blktap_vm_close(struct vm_area_struct *vma) info->vma = NULL; clear_bit(2, &info->dev_inuse); - if (info->blkif->xenblkd) + if (info->blkif && info->blkif->xenblkd) wake_up_process(info->blkif->xenblkd); up_write(&info->vm_update_sem); @@ -349,6 +348,8 @@ found: } out: + if (info) + info->blkif = NULL; return info; } @@ -369,6 +370,24 @@ associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif) return NULL; } +void disassociate_blkif(domid_t domid, int xenbus_id) +{ + int i; + tap_blkif_t *info; + + for (i = 1; i < MAX_TAP_DEV; i++) { + info = tapfds[i]; + if (info && info->trans.domid == domid && + info->trans.busid == xenbus_id) { + info->trans.busid = 0; + info->trans.domid = 0; + clear_bit(0, &info->dev_inuse); + wake_up(&backdev_setup_wq); + return; + } + } +} + static int blktap_open(struct inode *inode, struct file *filp) { int idx = iminor(inode) - BLKTAP_MINOR; @@ -389,7 +408,8 @@ static int blktap_open(struct inode *inode, struct file *filp) DPRINTK("Opening device /dev/xen/blktap%d\n", idx); /* Only one process can access device at a time */ - if (test_and_set_bit(1, &info->dev_inuse)) + if (!test_bit(0, &info->dev_inuse) || + test_and_set_bit(1, &info->dev_inuse)) return -EBUSY; clear_bit(0, &info->dev_inuse); @@ -411,6 +431,7 @@ static int blktap_release(struct inode *inode, struct file *filp) DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor); clear_bit(1, &info->dev_inuse); + wake_up(&backdev_setup_wq); filp->private_data = NULL; @@ -591,6 +612,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, return -EINVAL; clear_bit(0, &tapfds[dev]->dev_inuse); + wake_up(&backdev_setup_wq); return 0; } @@ -616,19 +638,12 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, case BLKTAP_IOCTL_BACKDEV_SETUP: { unsigned long dev = arg; - int ret; DPRINTK("BLKTAP_IOCTL_BACKDEV_SETUP ioctl: %ld\n", dev); if (dev > MAX_TAP_DEV || tapfds[dev] == NULL) return -EINVAL; - - while (tapfds[dev]->backdev == NULL) { - ret = wait_event_interruptible(backdev_setup_wq, - tapfds[dev]->backdev); - if (ret) - return ret; - } - return 0; + + return backdev_setup(tapfds[dev]); } } @@ -653,7 +668,6 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait) void blktap_kick_user(struct tap_blkif *info) { - wake_up_interruptible(&info->wait); } @@ -825,7 +839,8 @@ static void fast_flush_area(pending_req_t *req, int pending_idx, int usr_idx, } offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; - ClearPageReserved(map[offset]); + if (map[offset]) + ClearPageReserved(map[offset]); map[offset] = NULL; BLKTAP_INVALIDATE_HANDLE(khandle); @@ -900,7 +915,7 @@ int tap_blkif_schedule(void *arg) backdev_restart_queue(info); if (info->remove_requested && backdev_users(info) == 0) { - sys_kill(info->pid, SIGTERM); + signal_tapdisk(blkif->be); info->remove_requested = 0; } @@ -922,9 +937,9 @@ int tap_blkif_schedule(void *arg) kfree(info->idx_map); info->idx_map = NULL; - tap_blkif_free(blkif); - blkif->xenblkd = NULL; - + blkif_put(blkif); + blktap_close(blkif); + clear_bit(3, &info->dev_inuse); info->remove_requested = 0; @@ -1067,7 +1082,7 @@ static int do_block_io_op(tap_blkif_t *info) break; } - if (kthread_should_stop()) { + if (info->remove_requested) { more_to_do = 1; break; } diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h index a7411fd2..1cf4e72c 100644 --- a/drivers/xen/blktap/blktap.h +++ b/drivers/xen/blktap/blktap.h @@ -139,4 +139,6 @@ void free_req(pending_req_t *req); void blktap_kick_user(struct tap_blkif *info); +int backend_device_linked(unsigned long domid, unsigned long busid); + extern wait_queue_head_t backdev_setup_wq; diff --git a/drivers/xen/blktap/common.h b/drivers/xen/blktap/common.h index e1c304ba..6168d512 100644 --- a/drivers/xen/blktap/common.h +++ b/drivers/xen/blktap/common.h @@ -91,7 +91,7 @@ blkif_t *tap_alloc_blkif(domid_t domid); void tap_blkif_free(blkif_t *blkif); int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); -void tap_blkif_unmap(blkif_t *blkif); +void tap_blkif_disconnect(blkif_t *blkif); int tap_blkif_connected(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) @@ -118,7 +118,11 @@ int tap_blkif_schedule(void *arg); struct tap_blkif *associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif); -void signal_tapdisk(int idx); +void disassociate_blkif(domid_t domid, int xenbus_id); + +int signal_tapdisk(struct backend_info *be); + +void blktap_close(blkif_t *blkif); void tap_blkif_notify_work(blkif_t *blkif); diff --git a/drivers/xen/blktap/interface.c b/drivers/xen/blktap/interface.c index 11f80019..7ea696f2 100644 --- a/drivers/xen/blktap/interface.c +++ b/drivers/xen/blktap/interface.c @@ -145,8 +145,12 @@ int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, return 0; } -void tap_blkif_unmap(blkif_t *blkif) +void tap_blkif_disconnect(blkif_t *blkif) { + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + if (blkif->irq) { unbind_from_irqhandler(blkif->irq, blkif); blkif->irq = 0; @@ -160,10 +164,8 @@ void tap_blkif_unmap(blkif_t *blkif) void tap_blkif_free(blkif_t *blkif) { - atomic_dec(&blkif->refcnt); - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - - tap_blkif_unmap(blkif); + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); kmem_cache_free(blkif_cachep, blkif); } diff --git a/drivers/xen/blktap/xenbus.c b/drivers/xen/blktap/xenbus.c index 0ab00f80..d9456411 100644 --- a/drivers/xen/blktap/xenbus.c +++ b/drivers/xen/blktap/xenbus.c @@ -37,20 +37,29 @@ #include #include #include +#include #include #include "common.h" #include "blktap.h" - struct backend_info { struct xenbus_device *dev; blkif_t *blkif; struct xenbus_watch backend_watch; + struct xenbus_watch shutdown_watch; int xenbus_id; + int dom_id; int group_added; + char *nodename; + atomic_t refcnt; + pid_t kthread_pid; + int shutdown_signalled; + struct list_head list; }; +DECLARE_MUTEX(blktap_dev_sem); +LIST_HEAD(backend_devices); static void connect(struct backend_info *); static int connect_ring(struct backend_info *); @@ -62,6 +71,42 @@ static void tap_backend_changed(struct xenbus_watch *, const char **, static void tap_frontend_changed(struct xenbus_device *dev, enum xenbus_state frontend_state); +static void +link_backend_device(struct backend_info *be) +{ + down(&blktap_dev_sem); + list_add(&be->list, &backend_devices); + up(&blktap_dev_sem); + wake_up(&backdev_setup_wq); +} + +static void +unlink_backend_device(struct backend_info *be) +{ + down(&blktap_dev_sem); + list_del(&be->list); + up(&blktap_dev_sem); + wake_up(&backdev_setup_wq); +} + +int +backend_device_linked(unsigned long domid, unsigned long busid) +{ + int linked = 0; + struct backend_info *b; + + down(&blktap_dev_sem); + list_for_each_entry(b, &backend_devices, list) { + if (b->dom_id == domid && b->xenbus_id == busid) { + linked = 1; + break; + } + } + up(&blktap_dev_sem); + + return linked; +} + static int strsep_len(const char *str, char c, unsigned int len) { unsigned int i; @@ -92,6 +137,25 @@ static long get_id(const char *str) DPRINTK("Get_id called for %s (%s)\n",str,num); return simple_strtol(num, NULL, 10); +} + +static long get_dom_id(const char *str) +{ + int start, end; + const char *ptr; + char *tptr, num[25]; + + start = strsep_len(str, '/', 1); + end = strsep_len(str, '/', 2); + if (start < 0 || end < 0) + return -1; + + ptr = str + start + 1; + strncpy(num, ptr, end - start); + tptr = num + (end - (start + 1)); + *tptr = '\0'; + + return simple_strtol(num, NULL, 10); } static int blktap_name(blkif_t *blkif, char *buf) @@ -150,7 +214,7 @@ static struct attribute_group tapstat_group = { .attrs = tapstat_attrs, }; -int xentap_sysfs_addif(struct xenbus_device *dev) +static int xentap_sysfs_addif(struct xenbus_device *dev) { int err; struct backend_info *be = dev->dev.driver_data; @@ -160,49 +224,199 @@ int xentap_sysfs_addif(struct xenbus_device *dev) return err; } -void xentap_sysfs_delif(struct xenbus_device *dev) +static void xentap_sysfs_delif(struct xenbus_device *dev) { struct backend_info *be = dev->dev.driver_data; + if (be->group_added == 0) + return; sysfs_remove_group(&dev->dev.kobj, &tapstat_group); be->group_added = 0; } +static int kthread_remove(struct backend_info *be) +{ + blkif_t *blkif = be->blkif; + + if (!blkif || !blkif->xenblkd) + return 0; + + blkif->tapif->remove_requested = 1; + wake_up_process(blkif->xenblkd); + + return -EBUSY; +} + +static void signal_shutdown(struct backend_info *be) +{ + int err; + + down(&blktap_dev_sem); + + if (be->shutdown_signalled) + goto out; + + if (be->blkif) + disassociate_blkif(be->blkif->domid, be->xenbus_id); + + err = xenbus_write(XBT_NIL, be->nodename, "shutdown-done", ""); + if (err) + WPRINTK("Error writing shutdown-done for %s: %d\n", + be->nodename, err); + + if (be->dev) + xenbus_switch_state(be->dev, XenbusStateClosed); + + be->shutdown_signalled = 1; + + out: + up(&blktap_dev_sem); +} + +static void backend_release(struct backend_info *be) +{ + blkif_t *blkif = be->blkif; + + if (current->pid == be->kthread_pid) + signal_shutdown(be); + + if (!atomic_dec_and_test(&be->refcnt)) + return; + + signal_shutdown(be); + + if (blkif) { + tap_blkif_disconnect(blkif); + tap_blkif_free(blkif); + be->blkif = NULL; + } + + kfree(be->nodename); + kfree(be); +} + static int blktap_remove(struct xenbus_device *dev) { struct backend_info *be = dev->dev.driver_data; - if (be->group_added) - xentap_sysfs_delif(be->dev); + down(&blktap_dev_sem); + be->dev = NULL; + up(&blktap_dev_sem); + + unlink_backend_device(be); + if (be->backend_watch.node) { unregister_xenbus_watch(&be->backend_watch); kfree(be->backend_watch.node); + be->backend_watch.node = NULL; } - if (be->blkif) { - if (be->blkif->xenblkd == NULL) { - if (atomic_read(&be->blkif->refcnt) != 1) - WPRINTK("refcnt is %d, expected 1", - atomic_read(&be->blkif->refcnt)); - /* If the kthread was never started, free tap_blkif. */ - tap_blkif_free(be->blkif); - } else { - struct tap_blkif *info = be->blkif->tapif; - - down_write(&info->vm_update_sem); - tap_blkif_unmap(be->blkif); - be->blkif->be = NULL; - up_write(&info->vm_update_sem); - - be->blkif->tapif->remove_requested = 1; - wake_up_process(be->blkif->xenblkd); - blkif_put(be->blkif); - } + + if (be->shutdown_watch.node) { + unregister_xenbus_watch(&be->shutdown_watch); + kfree(be->shutdown_watch.node); + be->shutdown_watch.node = NULL; } - be->blkif->be = NULL; - kfree(be); + + if (kthread_remove(be)) + WPRINTK("BAD REMOVE REQUEST for %s\n", be->nodename); + + xentap_sysfs_delif(dev); + backend_release(be); + dev->dev.driver_data = NULL; + return 0; } +/* + * called by kthread when closing + */ +void blktap_close(blkif_t *blkif) +{ + tap_blkif_disconnect(blkif); + blkif->tapif->remove_requested = 0; + + down(&blktap_dev_sem); + if (blkif->be->dev) + xentap_sysfs_delif(blkif->be->dev); + up(&blktap_dev_sem); + + backend_release(blkif->be); + blkif->xenblkd = NULL; +} + +static void start_shutdown(struct xenbus_watch *watch, + const char **vec, unsigned int length) +{ + int err; + char *type; + unsigned int len; + struct backend_info *be + = container_of(watch, struct backend_info, shutdown_watch); + struct xenbus_device *dev = be->dev; + + if (be->shutdown_signalled) + return; + + type = xenbus_read(XBT_NIL, dev->nodename, "shutdown-request", &len); + err = (IS_ERR(type) ? PTR_ERR(type) : 0); + + if (XENBUS_EXIST_ERR(err)) + return; + + if (err) { + xenbus_dev_fatal(dev, err, "reading shutdown-request"); + return; + } + + xenbus_switch_state(dev, XenbusStateClosing); + + if (len == sizeof("force") - 1 && !memcmp(type, "force", len)) + if (!kthread_remove(be)) + signal_shutdown(be); /* shutdown immediately */ + + kfree(type); +} + +int signal_tapdisk(struct backend_info *be) +{ + int err; + + err = xenbus_write(XBT_NIL, be->nodename, "shutdown-tapdisk", ""); + if (err) + WPRINTK("ERROR writing shutdown-tapdisk\n"); + + return err; +} + +static void blktap_reconnect(struct backend_info *be) +{ + int err; + struct xenbus_device *dev = be->dev; + + err = xenbus_rm(XBT_NIL, dev->nodename, "shutdown-done"); + if (err) { + xenbus_dev_fatal(dev, err, "removing shutdown-done"); + return; + } + + err = xenbus_rm(XBT_NIL, dev->nodename, "shutdown-tapdisk"); + if (err) { + xenbus_dev_fatal(dev, err, "removing shutdown-tapdisk"); + return; + } + + be->blkif->sectors = 0; + err = xenbus_rm(XBT_NIL, be->nodename, "info"); + if (err) { + xenbus_dev_fatal(be->dev, err, "refreshing info"); + return; + } + + be->shutdown_signalled = 0; + + xenbus_switch_state(dev, XenbusStateInitWait); +} + static void tap_update_blkif_status(blkif_t *blkif) { int err; @@ -233,7 +447,7 @@ static void tap_update_blkif_status(blkif_t *blkif) int tap_blkif_connected(blkif_t *blkif) { - return (blkif->be && blkif->be->dev->state == XenbusStateConnected); + return (blkif->irq && blkif->blk_rings.common.sring); } /** @@ -256,7 +470,26 @@ static int blktap_probe(struct xenbus_device *dev, be->dev = dev; dev->dev.driver_data = be; + atomic_set(&be->refcnt, 1); + + be->dom_id = get_dom_id(dev->nodename); be->xenbus_id = get_id(dev->nodename); + if (be->xenbus_id == -1 || be->dom_id == -1) { + xenbus_dev_fatal(dev, -EINVAL, + "getting dom/bus id"); + kfree(be); + return -EINVAL; + } + + be->nodename = kasprintf(GFP_KERNEL, "%s", dev->nodename); + if (!be->nodename) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + kfree(be); + return -ENOMEM; + } + + link_backend_device(be); be->blkif = tap_alloc_blkif(dev->otherend_id); if (IS_ERR(be->blkif)) { @@ -275,7 +508,12 @@ static int blktap_probe(struct xenbus_device *dev, &be->backend_watch, tap_backend_changed); if (err) goto fail; - + + err = xenbus_watch_path2(dev, dev->nodename, "shutdown-request", + &be->shutdown_watch, start_shutdown); + if (err) + goto fail; + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -305,8 +543,7 @@ static void tap_backend_changed(struct xenbus_watch *watch, /** * Check to see whether userspace code has opened the image - * and written sector - * and disk info to xenstore + * and written sector and disk info to xenstore */ err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, NULL); @@ -319,6 +556,10 @@ static void tap_backend_changed(struct xenbus_watch *watch, err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", &be->blkif->sectors, NULL); + if (err) { + xenbus_dev_error(dev, err, "getting sectors"); + return; + } if (be->blkif->xenblkd == NULL) { char name[TASK_COMM_LEN]; @@ -349,6 +590,9 @@ static void tap_backend_changed(struct xenbus_watch *watch, return; } + atomic_inc(&be->refcnt); + be->kthread_pid = be->blkif->xenblkd->pid; + err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid", "%d", be->blkif->xenblkd->pid); if (err) { @@ -370,14 +614,16 @@ static void tap_frontend_changed(struct xenbus_device *dev, struct backend_info *be = dev->dev.driver_data; int err; - DPRINTK("\n"); + DPRINTK("%s: front end: %s\n", + dev->nodename, xenbus_strstate(frontend_state)); switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { printk(KERN_INFO "%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); - xenbus_switch_state(dev, XenbusStateInitWait); + + blktap_reconnect(be); } break; @@ -399,15 +645,14 @@ static void tap_frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - tap_blkif_unmap(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: - xenbus_switch_state(dev, XenbusStateClosed); - if (xenbus_dev_is_online(dev)) - break; - /* fall through if not online */ + if (!kthread_remove(be)) + signal_shutdown(be); + break; + case XenbusStateUnknown: device_unregister(&dev->dev); break; @@ -417,6 +662,9 @@ static void tap_frontend_changed(struct xenbus_device *dev, frontend_state); break; } + + DPRINTK("%s: back end: %s\n", + dev->nodename, xenbus_strstate(dev->state)); }