]> xenbits.xensource.com Git - xenclient/kernel.git/commitdiff
imported patch CA-7672-blk-shutdown.patch blktap-missed-requests-CA-7665
authort_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:01 +0000 (12:06 +0000)
committert_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:01 +0000 (12:06 +0000)
13 files changed:
drivers/xen/blkback/blkback.c
drivers/xen/blkback/common.h
drivers/xen/blkback/interface.c
drivers/xen/blkback/vbd.c
drivers/xen/blkback/xenbus.c
drivers/xen/blkfront/blkfront.c
drivers/xen/blktap/backdev.c
drivers/xen/blktap/backdev.h
drivers/xen/blktap/blktap.c
drivers/xen/blktap/blktap.h
drivers/xen/blktap/common.h
drivers/xen/blktap/interface.c
drivers/xen/blktap/xenbus.c

index 1652edf9d41cd519e7bda9e0f4378d8c573968eb..f94ff73bff125f717fdf0542f81e3424092368e1 100644 (file)
@@ -208,16 +208,16 @@ int blkif_schedule(void *arg)
        if (debug_lvl)
                printk(KERN_DEBUG "%s: started\n", current->comm);
 
-       while (!kthread_should_stop()) {
+       while (!blkif->remove_requested) {
                if (try_to_freeze())
                        continue;
 
                wait_event_interruptible(
                        blkif->wq,
-                       blkif->waiting_reqs || kthread_should_stop());
+                       blkif->waiting_reqs || blkif->remove_requested);
                wait_event_interruptible(
                        pending_free_wq,
-                       !list_empty(&pending_free) || kthread_should_stop());
+                       !list_empty(&pending_free) || blkif->remove_requested);
 
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
@@ -235,8 +235,8 @@ int blkif_schedule(void *arg)
        if (debug_lvl)
                printk(KERN_DEBUG "%s: exiting\n", current->comm);
 
-       blkif->xenblkd = NULL;
        blkif_put(blkif);
+       blkback_close(blkif);
 
        return 0;
 }
@@ -324,7 +324,7 @@ static int do_block_io_op(blkif_t *blkif)
                        break;
                }
 
-               if (kthread_should_stop()) {
+               if (blkif->remove_requested) {
                        more_to_do = 1;
                        break;
                }
index 425d8cd6ee25e6ab0e72b85e74682c8f8243b540..53911bffb1f99b518237ab06b952d952d9e71c2b 100644 (file)
@@ -48,6 +48,8 @@
        pr_debug("(file=%s, line=%d) " _f,      \
                 __FILE__ , __LINE__ , ## _a )
 
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
 struct vbd {
        blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
        unsigned char  readonly;    /* Non-zero -> read-only */
@@ -73,6 +75,7 @@ typedef struct blkif_st {
        /* Back pointer to the backend_info. */
        struct backend_info *be;
        /* Private fields. */
+       int remove_requested;
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
 
@@ -112,6 +115,7 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
               unsigned minor, int readonly, int cdrom);
 void vbd_free(struct vbd *vbd);
+void vbd_sync(struct vbd *vbd);
 
 unsigned long long vbd_size(struct vbd *vbd);
 unsigned int vbd_info(struct vbd *vbd);
@@ -138,4 +142,6 @@ void blkif_notify_work(blkif_t *blkif);
 int blkback_barrier(struct xenbus_transaction xbt,
                    struct backend_info *be, int state);
 
+void blkback_close(blkif_t *blkif);
+
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
index 6df7b734d694d591de1f212249df1faeab0c3a5f..02fe4c8d2bfe21a844ce3b955271d02a71216e57 100644 (file)
@@ -146,11 +146,6 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 
 void blkif_disconnect(blkif_t *blkif)
 {
-       if (blkif->xenblkd) {
-               kthread_stop(blkif->xenblkd);
-               blkif->xenblkd = NULL;
-       }
-
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
        atomic_inc(&blkif->refcnt);
index 2594419f4e311677e6465d77ada8c89ff312b0ca..8729176f3e1519e4695a7fb3024f32362af1389f 100644 (file)
@@ -98,6 +98,12 @@ void vbd_free(struct vbd *vbd)
        vbd->bdev = NULL;
 }
 
+void vbd_sync(struct vbd *vbd)
+{
+       if (vbd->bdev)
+               fsync_bdev(vbd->bdev);
+}
+
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
        struct vbd *vbd = &blkif->vbd;
index 9d95137f11c347ddb1385274ff05dedcaa581324..08ff8fc12df027c78a8a2b7fe3d53eb8a22b0f08 100644 (file)
@@ -32,11 +32,19 @@ struct backend_info
        struct xenbus_device *dev;
        blkif_t *blkif;
        struct xenbus_watch backend_watch;
+       struct xenbus_watch shutdown_watch;
        unsigned major;
        unsigned minor;
        char *mode;
+       int group_added;
+       char *nodename;
+       atomic_t refcnt;
+       pid_t kthread_pid;
+       int shutdown_signalled;
 };
 
+DECLARE_MUTEX(blkback_dev_sem);
+
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
@@ -93,6 +101,9 @@ static void update_blkif_status(blkif_t *blkif)
                xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
                return;
        }
+       
+       blkif->be->kthread_pid = blkif->xenblkd->pid;
+       atomic_inc(&blkif->be->refcnt);
 
        err = xenbus_printf(XBT_NIL, blkif->be->dev->nodename, "kthread-pid",
                            "%d", blkif->xenblkd->pid);
@@ -147,6 +158,7 @@ VBD_SHOW(mode, "%s\n", be->mode);
 int xenvbd_sysfs_addif(struct xenbus_device *dev)
 {
        int error;
+       struct backend_info *be = dev->dev.driver_data;
        
        error = device_create_file(&dev->dev, &dev_attr_physical_device);
        if (error)
@@ -160,6 +172,8 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev)
        if (error)
                goto fail3;
 
+       be->group_added = 1;
+
        return 0;
 
 fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
@@ -170,19 +184,81 @@ fail1:    device_remove_file(&dev->dev, &dev_attr_physical_device);
 
 void xenvbd_sysfs_delif(struct xenbus_device *dev)
 {
+       struct backend_info *be = dev->dev.driver_data;
+       if (be->group_added == 0)
+               return;
        sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
        device_remove_file(&dev->dev, &dev_attr_mode);
        device_remove_file(&dev->dev, &dev_attr_physical_device);
+       be->group_added = 0;
+}
+
+static int kthread_remove(struct backend_info *be)
+{
+       blkif_t *blkif = be->blkif;
+
+       if (!blkif || !blkif->xenblkd)
+               return 0;
+
+       blkif->remove_requested = 1;
+       wake_up_process(blkif->xenblkd);
+
+       return -EBUSY;
+}
+
+static void signal_shutdown(struct backend_info *be)
+{
+       int err;
+
+       down(&blkback_dev_sem);
+
+       if (be->shutdown_signalled)
+               goto out;
+
+       err = xenbus_write(XBT_NIL, be->nodename, "shutdown-done", "");
+       if (err)
+               WPRINTK("Error writing shutdown-done for %s: %d\n", 
+                       be->nodename, err);
+
+       if (be->dev)
+               xenbus_switch_state(be->dev, XenbusStateClosed);
+
+       be->shutdown_signalled = 1;
+
+ out:
+       up(&blkback_dev_sem);
+}
+
+static void backend_release(struct backend_info *be)
+{
+       blkif_t *blkif = be->blkif;
+
+       if (current->pid == be->kthread_pid)
+               signal_shutdown(be);
+
+       if (!atomic_dec_and_test(&be->refcnt))
+               return;
+
+       signal_shutdown(be);
+
+       if (blkif) {
+               blkif_disconnect(blkif);
+               vbd_free(&blkif->vbd);
+               blkif_free(blkif);
+               be->blkif = NULL;
+       }
+
+       kfree(be->nodename);
+       kfree(be);
 }
 
 static int blkback_remove(struct xenbus_device *dev)
 {
        struct backend_info *be = dev->dev.driver_data;
 
-       DPRINTK("");
-
-       if (be->major || be->minor)
-               xenvbd_sysfs_delif(dev);
+       down(&blkback_dev_sem);
+       be->dev = NULL;
+       up(&blkback_dev_sem);
 
        if (be->backend_watch.node) {
                unregister_xenbus_watch(&be->backend_watch);
@@ -190,18 +266,74 @@ static int blkback_remove(struct xenbus_device *dev)
                be->backend_watch.node = NULL;
        }
 
-       if (be->blkif) {
-               blkif_disconnect(be->blkif);
-               vbd_free(&be->blkif->vbd);
-               blkif_free(be->blkif);
-               be->blkif = NULL;
+       if (be->shutdown_watch.node) {
+               unregister_xenbus_watch(&be->shutdown_watch);
+               kfree(be->shutdown_watch.node);
+               be->shutdown_watch.node = NULL;
        }
 
-       kfree(be);
+       if (kthread_remove(be))
+               WPRINTK("BAD REMOVE REQUEST for %s\n", be->nodename);
+
+       xenvbd_sysfs_delif(dev);
+       backend_release(be);
+
        dev->dev.driver_data = NULL;
+
        return 0;
 }
 
+/*
+ * called by kthread when closing
+ */
+void blkback_close(blkif_t *blkif)
+{
+       blkif_disconnect(blkif);
+       vbd_sync(&blkif->vbd);
+       blkif->remove_requested = 0;
+
+       down(&blkback_dev_sem);
+       if (blkif->be->dev)
+               xenvbd_sysfs_delif(blkif->be->dev);
+       up(&blkback_dev_sem);
+
+       backend_release(blkif->be);
+       blkif->xenblkd = NULL;
+}
+
+static void start_shutdown(struct xenbus_watch *watch,
+                          const char **vec, unsigned int length)
+{
+       int err;
+       char *type;
+       unsigned int len;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, shutdown_watch);
+       struct xenbus_device *dev = be->dev;
+
+       if (be->shutdown_signalled)
+               return;
+
+       type = xenbus_read(XBT_NIL, dev->nodename, "shutdown-request", &len);
+       err  = (IS_ERR(type) ? PTR_ERR(type) : 0);
+
+       if (XENBUS_EXIST_ERR(err))
+               return;
+
+       if (err) {
+               xenbus_dev_fatal(dev, err, "reading shutdown-request");
+               return;
+       }
+
+       xenbus_switch_state(dev, XenbusStateClosing);
+       
+       if (len == sizeof("force") - 1 && !memcmp(type, "force", len))
+               if (!kthread_remove(be))
+                       signal_shutdown(be); /* shutdown immediately */
+
+       kfree(type);
+}
+
 int blkback_barrier(struct xenbus_transaction xbt,
                    struct backend_info *be, int state)
 {
@@ -234,6 +366,15 @@ static int blkback_probe(struct xenbus_device *dev,
        }
        be->dev = dev;
        dev->dev.driver_data = be;
+       atomic_set(&be->refcnt, 1);
+
+       be->nodename = kasprintf(GFP_KERNEL, "%s", dev->nodename);
+       if (!be->nodename) {
+               xenbus_dev_fatal(dev, -ENOMEM,
+                                "allocating backend structure");
+               kfree(be);
+               return -ENOMEM;
+       }
 
        be->blkif = blkif_alloc(dev->otherend_id);
        if (IS_ERR(be->blkif)) {
@@ -251,6 +392,11 @@ static int blkback_probe(struct xenbus_device *dev,
        if (err)
                goto fail;
 
+       err = xenbus_watch_path2(dev, dev->nodename, "shutdown-request",
+                                &be->shutdown_watch, start_shutdown);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -359,13 +505,17 @@ static void frontend_changed(struct xenbus_device *dev,
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("%s", xenbus_strstate(frontend_state));
+       DPRINTK("%s: %s", dev->nodename, xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
                if (dev->state == XenbusStateClosed) {
                        printk(KERN_INFO "%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
+
+                       xenbus_rm(XBT_NIL, dev->nodename, "shutdown-done");
+                       be->shutdown_signalled = 0;
+
                        xenbus_switch_state(dev, XenbusStateInitWait);
                }
                break;
@@ -388,15 +538,14 @@ static void frontend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateClosing:
-               blkif_disconnect(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
        case XenbusStateClosed:
-               xenbus_switch_state(dev, XenbusStateClosed);
-               if (xenbus_dev_is_online(dev))
-                       break;
-               /* fall through if not online */
+               if (!kthread_remove(be))
+                       signal_shutdown(be);
+               break;
+
        case XenbusStateUnknown:
                device_unregister(&dev->dev);
                break;
@@ -406,6 +555,8 @@ static void frontend_changed(struct xenbus_device *dev,
                                 frontend_state);
                break;
        }
+
+       DPRINTK("%s: %s", dev->nodename, xenbus_strstate(dev->state));
 }
 
 
index c4b1ecd65242608a1552d9a9769c015368d3a69a..83e5c6a605fb6e7dad1fea56483c05af6e5bbe0f 100644 (file)
@@ -265,7 +265,8 @@ static void backend_changed(struct xenbus_device *dev,
        struct blkfront_info *info = dev->dev.driver_data;
        struct block_device *bd;
 
-       DPRINTK("blkfront:backend_changed.\n");
+       DPRINTK("%s: %s\n", dev->nodename,
+               xenbus_strstate(backend_state));
 
        switch (backend_state) {
        case XenbusStateInitialising:
index 9281dcf61ed0e17c92204b47b8cbb56d96b09842..3ebed59f8d591f4b1b8cf87accf9ba03c81b387b 100644 (file)
@@ -50,6 +50,10 @@ backdev_end_requests(struct tap_blkif *info)
        int ret, i;
 
        spin_lock_irq(&backdev_io_lock);
+
+       if (!info->idx_map)
+               goto start_queue;
+
        for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
                if (info->idx_map[usr_idx] == INVALID_REQ)
                        continue;
@@ -57,21 +61,26 @@ backdev_end_requests(struct tap_blkif *info)
                mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
                pending_req = &pending_reqs[mmap_idx][pending_idx];
                blkif_put(info->blkif);
+               WPRINTK("failing %s of %d pages pending on close\n",
+                       (pending_req->operation == BLKIF_OP_READ ?
+                        "read" : "write"), pending_req->nr_pages);
                if (pending_req->inuse != 2)
                        continue;
                for (i = 0; i < pending_req->nr_pages; i++)
                        umap_uaddr(&init_mm, idx_to_kaddr(mmap_idx,
                                                          pending_idx, i));
                req = (struct request *)(unsigned long)pending_req->id;
-               /* See NOTE in do_backdev_request. */
-               ret = end_that_request_first(req, 1, req->hard_nr_sectors);
+               ret = end_that_request_first(req, 0, req->hard_nr_sectors);
                BUG_ON(ret);
-               end_that_request_last(req, 1);
+               end_that_request_last(req, 0);
        }
+
+ start_queue:
        if (info->backdev) {
                info->backdev->gd->queue->queuedata = NULL;
                blk_start_queue(info->backdev->gd->queue);
        }
+
        spin_unlock_irq(&backdev_io_lock);
 }
 
@@ -95,7 +104,7 @@ destroy_backdev(struct tap_blkif *uinfo)
        blk_cleanup_queue(info->gd->queue);
 
        if (uinfo->blkif->xenblkd)
-           wake_up_process(uinfo->blkif->xenblkd);
+               wake_up_process(uinfo->blkif->xenblkd);
 
        blkif_put(uinfo->blkif);
 
@@ -240,7 +249,7 @@ static int umap_uaddr_fn(
        pte_t *ptep, struct page *pmd_page, unsigned long addr, void *data)
 {
        struct mm_struct *mm = (struct mm_struct *)data;
-
+       
        DPRINTK("unmap_uaddr ptep %p\n", ptep);
        pte_clear(mm, addr, ptep);
        xen_invlpg(addr);
@@ -291,11 +300,14 @@ process_backdev_request(struct tap_blkif *uinfo, struct backdev_info *info)
            end_request(req, 0);
            continue;
        }
+
+#if 0
        if (uinfo->remove_requested) {
            DPRINTK("device no longer in use %d\n", info->uinfo->minor);
            end_request(req, 0);
            continue;
        }
+#endif
 
        if (RING_FULL(&uinfo->ufe_ring)) {
          wait:
@@ -419,12 +431,11 @@ do_backdev_request(request_queue_t *rq)
        info = rq->queuedata;
        if (info == NULL || info->uinfo == NULL ||
            info->uinfo->remove_requested) {
-               /* NOTE: we pretend that the request succeeded because
-                * this seems better than returning block device
-                * errors to dom0 given that the linux filesystem code
-                * doesn't seem to handle these too gracefully. */
-               while ((req = elv_next_request(rq)))
-                       end_request(req, 1);
+               while ((req = elv_next_request(rq))) {
+                       WPRINTK("backdev closed: failing secs %llu - %llu\n",
+                               req->sector, req->sector + req->nr_sectors);
+                       end_request(req, 0);
+               }
                return;
        }
        info->uinfo->blkif->waiting_reqs = 1;
@@ -510,6 +521,40 @@ backdev_restart_queue(struct tap_blkif *uinfo)
        return;
 }
 
+int
+backdev_setup(struct tap_blkif *uinfo)
+{
+       int ret;
+       unsigned long domid, busid;
+
+       if (!uinfo->trans.domid && !uinfo->trans.busid)
+               return -EINVAL;
+
+       domid = uinfo->trans.domid;
+       busid = uinfo->trans.busid;
+
+       while (backend_device_linked(domid, busid) &&
+              uinfo->backdev == NULL && uinfo->dev_inuse &&
+              uinfo->trans.domid == domid && uinfo->trans.busid == busid) {
+
+               ret = wait_event_interruptible(backdev_setup_wq,
+                                              uinfo->backdev ||
+                                              uinfo->dev_inuse == 0 ||
+                                              uinfo->trans.domid != domid ||
+                                              uinfo->trans.busid != busid ||
+                                              !backend_device_linked(domid, 
+                                                                     busid));
+               if (ret)
+                       return ret;
+       }
+
+       if (!backend_device_linked(domid, busid) || uinfo->dev_inuse == 0 ||
+           uinfo->trans.domid != domid || uinfo->trans.busid != busid)
+               return -EINVAL;
+
+       return 0;
+}
+
 int
 create_backdev(struct tap_blkif *uinfo)
 {
index c25086cf8ea486bf9a59328890c88187e6765f07..db54666ed296da950e3c2da4c6cb8e8100fb59f9 100644 (file)
@@ -11,6 +11,7 @@ struct backdev_info {
 extern int register_backdev(void);
 extern int create_backdev(struct tap_blkif *);
 extern int destroy_backdev(struct tap_blkif *);
+extern int backdev_setup(struct tap_blkif *);
 extern void backdev_finish_req(struct tap_blkif *, int, blkif_response_t *,
                               struct pending_req *);
 extern void backdev_restart_queue(struct tap_blkif *);
index 6d014083803e2a07b6512d54a73d9abfd6537182..581f551eb3967c35bdabacaf304da5ffb824c9cc 100644 (file)
@@ -54,7 +54,6 @@
 #include <linux/poll.h>
 #include <linux/delay.h>
 #include <asm/tlbflush.h>
-#include <linux/syscalls.h>
 
 #include "blktap.h"
 #include "backdev.h"
@@ -253,7 +252,7 @@ static void blktap_vm_close(struct vm_area_struct *vma)
 
        info->vma = NULL;
        clear_bit(2, &info->dev_inuse);
-       if (info->blkif->xenblkd)
+       if (info->blkif && info->blkif->xenblkd)
                wake_up_process(info->blkif->xenblkd);
 
        up_write(&info->vm_update_sem);
@@ -349,6 +348,8 @@ found:
        }
 
 out:
+       if (info)
+               info->blkif = NULL;
        return info;
 }
 
@@ -369,6 +370,24 @@ associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif)
        return NULL;
 }
 
+void disassociate_blkif(domid_t domid, int xenbus_id)
+{
+       int i;
+       tap_blkif_t *info;
+
+       for (i = 1; i < MAX_TAP_DEV; i++) {
+               info = tapfds[i];
+               if (info && info->trans.domid == domid &&
+                   info->trans.busid == xenbus_id) {
+                       info->trans.busid = 0;
+                       info->trans.domid = 0;
+                       clear_bit(0, &info->dev_inuse);
+                       wake_up(&backdev_setup_wq);
+                       return;
+               }
+       }
+}
+
 static int blktap_open(struct inode *inode, struct file *filp)
 {
        int idx = iminor(inode) - BLKTAP_MINOR;
@@ -389,7 +408,8 @@ static int blktap_open(struct inode *inode, struct file *filp)
        DPRINTK("Opening device /dev/xen/blktap%d\n", idx);
 
        /* Only one process can access device at a time */
-       if (test_and_set_bit(1, &info->dev_inuse))
+       if (!test_bit(0, &info->dev_inuse) || 
+           test_and_set_bit(1, &info->dev_inuse))
                return -EBUSY;
 
        clear_bit(0, &info->dev_inuse);
@@ -411,6 +431,7 @@ static int blktap_release(struct inode *inode, struct file *filp)
 
        DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
        clear_bit(1, &info->dev_inuse);
+       wake_up(&backdev_setup_wq);
 
        filp->private_data = NULL;      
 
@@ -591,6 +612,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
                        return -EINVAL;
 
                clear_bit(0, &tapfds[dev]->dev_inuse);
+               wake_up(&backdev_setup_wq);
 
                return 0;
        }
@@ -616,19 +638,12 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
        case BLKTAP_IOCTL_BACKDEV_SETUP:
        {
                unsigned long dev = arg;
-               int ret;
 
                DPRINTK("BLKTAP_IOCTL_BACKDEV_SETUP ioctl: %ld\n", dev);
                if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
                        return -EINVAL;
-
-               while (tapfds[dev]->backdev == NULL) {
-                       ret = wait_event_interruptible(backdev_setup_wq,
-                                                      tapfds[dev]->backdev);
-                       if (ret)
-                               return ret;
-               }
-               return 0;
+       
+               return backdev_setup(tapfds[dev]);
        }
        }
 
@@ -653,7 +668,6 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait)
 
 void blktap_kick_user(struct tap_blkif *info)
 {
-
        wake_up_interruptible(&info->wait);
 }
 
@@ -825,7 +839,8 @@ static void fast_flush_area(pending_req_t *req, int pending_idx, int usr_idx,
                }
 
                offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
-               ClearPageReserved(map[offset]);
+               if (map[offset])
+                       ClearPageReserved(map[offset]);
                map[offset] = NULL;
 
                BLKTAP_INVALIDATE_HANDLE(khandle);
@@ -900,7 +915,7 @@ int tap_blkif_schedule(void *arg)
                        backdev_restart_queue(info);
 
                if (info->remove_requested && backdev_users(info) == 0) {
-                       sys_kill(info->pid, SIGTERM);
+                       signal_tapdisk(blkif->be);
                        info->remove_requested = 0;
                }
 
@@ -922,9 +937,9 @@ int tap_blkif_schedule(void *arg)
        kfree(info->idx_map);
        info->idx_map = NULL;
 
-       tap_blkif_free(blkif);
-       blkif->xenblkd = NULL;
-
+       blkif_put(blkif);
+       blktap_close(blkif);
+       
        clear_bit(3, &info->dev_inuse);
        info->remove_requested = 0;
 
@@ -1067,7 +1082,7 @@ static int do_block_io_op(tap_blkif_t *info)
                        break;
                }
 
-               if (kthread_should_stop()) {
+               if (info->remove_requested) {
                        more_to_do = 1;
                        break;
                }
index a7411fd2e9a64b5acec0d9792dbf59761c810bde..1cf4e72caee648cd197fce7f603046fb9c3b7947 100644 (file)
@@ -139,4 +139,6 @@ void free_req(pending_req_t *req);
 
 void blktap_kick_user(struct tap_blkif *info);
 
+int backend_device_linked(unsigned long domid, unsigned long busid);
+
 extern wait_queue_head_t backdev_setup_wq;
index e1c304ba98ad1c0c104da5e9ad6ae11aa427cadb..6168d5122f96d1af0a9d96c89d11eb12626bff85 100644 (file)
@@ -91,7 +91,7 @@ blkif_t *tap_alloc_blkif(domid_t domid);
 void tap_blkif_free(blkif_t *blkif);
 int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
                  unsigned int evtchn);
-void tap_blkif_unmap(blkif_t *blkif);
+void tap_blkif_disconnect(blkif_t *blkif);
 int tap_blkif_connected(blkif_t *blkif);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
@@ -118,7 +118,11 @@ int tap_blkif_schedule(void *arg);
 
 struct tap_blkif *associate_blkif(domid_t domid, int xenbus_id,
                                  blkif_t *blkif);
-void signal_tapdisk(int idx);
+void disassociate_blkif(domid_t domid, int xenbus_id);
+
+int signal_tapdisk(struct backend_info *be);
+
+void blktap_close(blkif_t *blkif);
 
 void tap_blkif_notify_work(blkif_t *blkif);
 
index 11f800197a1e7da64b223929759c3e7bbabcb604..7ea696f25306b8184d8ac79890758ad388f1fab6 100644 (file)
@@ -145,8 +145,12 @@ int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
        return 0;
 }
 
-void tap_blkif_unmap(blkif_t *blkif)
+void tap_blkif_disconnect(blkif_t *blkif)
 {
+       atomic_dec(&blkif->refcnt);
+       wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+       atomic_inc(&blkif->refcnt);
+
        if (blkif->irq) {
                unbind_from_irqhandler(blkif->irq, blkif);
                blkif->irq = 0;
@@ -160,10 +164,8 @@ void tap_blkif_unmap(blkif_t *blkif)
 
 void tap_blkif_free(blkif_t *blkif)
 {
-       atomic_dec(&blkif->refcnt);
-       wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
-
-       tap_blkif_unmap(blkif);
+       if (!atomic_dec_and_test(&blkif->refcnt))
+               BUG();
        kmem_cache_free(blkif_cachep, blkif);
 }
 
index 0ab00f80e1b66121dab06f332d5e180b906222bf..d945641177c3de901c6fde4e925a1c89c0e18558 100644 (file)
 #include <stdarg.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/list.h>
 #include <xen/xenbus.h>
 #include "common.h"
 #include "blktap.h"
 
-
 struct backend_info
 {
        struct xenbus_device *dev;
        blkif_t *blkif;
        struct xenbus_watch backend_watch;
+       struct xenbus_watch shutdown_watch;
        int xenbus_id;
+       int dom_id;
        int group_added;
+       char *nodename;
+       atomic_t refcnt;
+       pid_t kthread_pid;
+       int shutdown_signalled;
+       struct list_head list;
 };
 
+DECLARE_MUTEX(blktap_dev_sem);
+LIST_HEAD(backend_devices);
 
 static void connect(struct backend_info *);
 static int connect_ring(struct backend_info *);
@@ -62,6 +71,42 @@ static void tap_backend_changed(struct xenbus_watch *, const char **,
 static void tap_frontend_changed(struct xenbus_device *dev,
                             enum xenbus_state frontend_state);
 
+static void
+link_backend_device(struct backend_info *be)
+{
+       down(&blktap_dev_sem);
+       list_add(&be->list, &backend_devices);
+       up(&blktap_dev_sem);
+       wake_up(&backdev_setup_wq);
+}
+
+static void
+unlink_backend_device(struct backend_info *be)
+{
+       down(&blktap_dev_sem);
+       list_del(&be->list);
+       up(&blktap_dev_sem);
+       wake_up(&backdev_setup_wq);
+}
+
+int
+backend_device_linked(unsigned long domid, unsigned long busid)
+{
+       int linked = 0;
+       struct backend_info *b;
+
+       down(&blktap_dev_sem);
+       list_for_each_entry(b, &backend_devices, list) {
+               if (b->dom_id == domid && b->xenbus_id == busid) {
+                       linked = 1;
+                       break;
+               }
+       }
+       up(&blktap_dev_sem);
+
+       return linked;
+}
+
 static int strsep_len(const char *str, char c, unsigned int len)
 {
         unsigned int i;
@@ -92,6 +137,25 @@ static long get_id(const char *str)
        DPRINTK("Get_id called for %s (%s)\n",str,num);
        
         return simple_strtol(num, NULL, 10);
+}
+
+static long get_dom_id(const char *str)
+{
+       int start, end;
+       const char *ptr;
+       char *tptr, num[25];
+
+       start = strsep_len(str, '/', 1);
+       end   = strsep_len(str, '/', 2);
+       if (start < 0 || end < 0)
+               return -1;
+
+       ptr = str + start + 1;
+       strncpy(num, ptr, end - start);
+       tptr = num + (end - (start + 1));
+       *tptr = '\0';
+
+       return simple_strtol(num, NULL, 10);
 }                              
 
 static int blktap_name(blkif_t *blkif, char *buf)
@@ -150,7 +214,7 @@ static struct attribute_group tapstat_group = {
        .attrs = tapstat_attrs,
 };
 
-int xentap_sysfs_addif(struct xenbus_device *dev)
+static int xentap_sysfs_addif(struct xenbus_device *dev)
 {
        int err;
        struct backend_info *be = dev->dev.driver_data;
@@ -160,49 +224,199 @@ int xentap_sysfs_addif(struct xenbus_device *dev)
        return err;
 }
 
-void xentap_sysfs_delif(struct xenbus_device *dev)
+static void xentap_sysfs_delif(struct xenbus_device *dev)
 {
        struct backend_info *be = dev->dev.driver_data;
+       if (be->group_added == 0)
+               return;
        sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
        be->group_added = 0;
 }
 
+static int kthread_remove(struct backend_info *be)
+{
+       blkif_t *blkif = be->blkif;
+
+       if (!blkif || !blkif->xenblkd)
+               return 0;
+
+       blkif->tapif->remove_requested = 1;
+       wake_up_process(blkif->xenblkd);
+
+       return -EBUSY;
+}
+
+static void signal_shutdown(struct backend_info *be)
+{
+       int err;
+
+       down(&blktap_dev_sem);
+
+       if (be->shutdown_signalled)
+               goto out;
+
+       if (be->blkif)
+               disassociate_blkif(be->blkif->domid, be->xenbus_id);
+
+       err = xenbus_write(XBT_NIL, be->nodename, "shutdown-done", "");
+       if (err)
+               WPRINTK("Error writing shutdown-done for %s: %d\n", 
+                       be->nodename, err);
+
+       if (be->dev)
+               xenbus_switch_state(be->dev, XenbusStateClosed);
+
+       be->shutdown_signalled = 1;
+
+ out:
+       up(&blktap_dev_sem);
+}
+
+static void backend_release(struct backend_info *be)
+{
+       blkif_t *blkif = be->blkif;
+
+       if (current->pid == be->kthread_pid)
+               signal_shutdown(be);
+
+       if (!atomic_dec_and_test(&be->refcnt))
+               return;
+
+       signal_shutdown(be);
+
+       if (blkif) {
+               tap_blkif_disconnect(blkif);
+               tap_blkif_free(blkif);
+               be->blkif = NULL;
+       }
+
+       kfree(be->nodename);
+       kfree(be);
+}
+
 static int blktap_remove(struct xenbus_device *dev)
 {
        struct backend_info *be = dev->dev.driver_data;
 
-       if (be->group_added)
-               xentap_sysfs_delif(be->dev);
+       down(&blktap_dev_sem);
+       be->dev = NULL;
+       up(&blktap_dev_sem);
+
+       unlink_backend_device(be);
+
        if (be->backend_watch.node) {
                unregister_xenbus_watch(&be->backend_watch);
                kfree(be->backend_watch.node);
+               be->backend_watch.node = NULL;
        }
-       if (be->blkif) {
-               if (be->blkif->xenblkd == NULL) {
-                       if (atomic_read(&be->blkif->refcnt) != 1)
-                               WPRINTK("refcnt is %d, expected 1",
-                                       atomic_read(&be->blkif->refcnt));
-                       /* If the kthread was never started, free tap_blkif. */
-                       tap_blkif_free(be->blkif);
-               } else {
-                       struct tap_blkif *info = be->blkif->tapif;
-
-                       down_write(&info->vm_update_sem);
-                       tap_blkif_unmap(be->blkif);
-                       be->blkif->be = NULL;
-                       up_write(&info->vm_update_sem);
-
-                       be->blkif->tapif->remove_requested = 1;
-                       wake_up_process(be->blkif->xenblkd);
-                       blkif_put(be->blkif);
-               }
+
+       if (be->shutdown_watch.node) {
+               unregister_xenbus_watch(&be->shutdown_watch);
+               kfree(be->shutdown_watch.node);
+               be->shutdown_watch.node = NULL;
        }
-       be->blkif->be = NULL;
-       kfree(be);
+
+       if (kthread_remove(be))
+               WPRINTK("BAD REMOVE REQUEST for %s\n", be->nodename);
+
+       xentap_sysfs_delif(dev);
+       backend_release(be);
+
        dev->dev.driver_data = NULL;
+
        return 0;
 }
 
+/*
+ * called by kthread when closing
+ */
+void blktap_close(blkif_t *blkif)
+{
+       tap_blkif_disconnect(blkif);
+       blkif->tapif->remove_requested = 0;
+
+       down(&blktap_dev_sem);
+       if (blkif->be->dev)
+               xentap_sysfs_delif(blkif->be->dev);
+       up(&blktap_dev_sem);
+
+       backend_release(blkif->be);
+       blkif->xenblkd = NULL;
+}
+
+static void start_shutdown(struct xenbus_watch *watch,
+                          const char **vec, unsigned int length)
+{
+       int err;
+       char *type;
+       unsigned int len;
+       struct backend_info *be
+               = container_of(watch, struct backend_info, shutdown_watch);
+       struct xenbus_device *dev = be->dev;
+
+       if (be->shutdown_signalled)
+               return;
+
+       type = xenbus_read(XBT_NIL, dev->nodename, "shutdown-request", &len);
+       err  = (IS_ERR(type) ? PTR_ERR(type) : 0);
+
+       if (XENBUS_EXIST_ERR(err))
+               return;
+
+       if (err) {
+               xenbus_dev_fatal(dev, err, "reading shutdown-request");
+               return;
+       }
+
+       xenbus_switch_state(dev, XenbusStateClosing);
+
+       if (len == sizeof("force") - 1 && !memcmp(type, "force", len))
+               if (!kthread_remove(be))
+                       signal_shutdown(be); /* shutdown immediately */
+
+       kfree(type);
+}
+
+int signal_tapdisk(struct backend_info *be)
+{
+       int err;
+
+       err = xenbus_write(XBT_NIL, be->nodename, "shutdown-tapdisk", "");
+       if (err)
+               WPRINTK("ERROR writing shutdown-tapdisk\n");
+
+       return err;
+}
+
+static void blktap_reconnect(struct backend_info *be)
+{
+       int err;
+       struct xenbus_device *dev = be->dev;
+
+       err = xenbus_rm(XBT_NIL, dev->nodename, "shutdown-done");
+       if (err) {
+               xenbus_dev_fatal(dev, err, "removing shutdown-done");
+               return;
+       }
+
+       err = xenbus_rm(XBT_NIL, dev->nodename, "shutdown-tapdisk");
+       if (err) {
+               xenbus_dev_fatal(dev, err, "removing shutdown-tapdisk");
+               return;
+       }
+
+       be->blkif->sectors = 0;
+       err = xenbus_rm(XBT_NIL, be->nodename, "info");
+       if (err) {
+               xenbus_dev_fatal(be->dev, err, "refreshing info");
+               return;
+       }
+
+       be->shutdown_signalled = 0;
+
+       xenbus_switch_state(dev, XenbusStateInitWait);
+}
+
 static void tap_update_blkif_status(blkif_t *blkif)
 { 
        int err;
@@ -233,7 +447,7 @@ static void tap_update_blkif_status(blkif_t *blkif)
 
 int tap_blkif_connected(blkif_t *blkif)
 {
-       return (blkif->be && blkif->be->dev->state == XenbusStateConnected);
+       return (blkif->irq && blkif->blk_rings.common.sring);
 }
 
 /**
@@ -256,7 +470,26 @@ static int blktap_probe(struct xenbus_device *dev,
 
        be->dev = dev;
        dev->dev.driver_data = be;
+       atomic_set(&be->refcnt, 1);
+
+       be->dom_id    = get_dom_id(dev->nodename);
        be->xenbus_id = get_id(dev->nodename);
+       if (be->xenbus_id == -1 || be->dom_id == -1) {
+               xenbus_dev_fatal(dev, -EINVAL,
+                                "getting dom/bus id");
+               kfree(be);
+               return -EINVAL;
+       }
+
+       be->nodename = kasprintf(GFP_KERNEL, "%s", dev->nodename);
+       if (!be->nodename) {
+               xenbus_dev_fatal(dev, -ENOMEM,
+                                "allocating backend structure");
+               kfree(be);
+               return -ENOMEM;
+       }
+
+       link_backend_device(be);
 
        be->blkif = tap_alloc_blkif(dev->otherend_id);
        if (IS_ERR(be->blkif)) {
@@ -275,7 +508,12 @@ static int blktap_probe(struct xenbus_device *dev,
                                 &be->backend_watch, tap_backend_changed);
        if (err)
                goto fail;
-       
+
+       err = xenbus_watch_path2(dev, dev->nodename, "shutdown-request",
+                                &be->shutdown_watch, start_shutdown);
+       if (err)
+               goto fail;
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err)
                goto fail;
@@ -305,8 +543,7 @@ static void tap_backend_changed(struct xenbus_watch *watch,
 
        /** 
         * Check to see whether userspace code has opened the image 
-        * and written sector
-        * and disk info to xenstore
+        * and written sector and disk info to xenstore
         */
        err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, 
                            NULL);
@@ -319,6 +556,10 @@ static void tap_backend_changed(struct xenbus_watch *watch,
 
        err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", 
                            &be->blkif->sectors, NULL);
+       if (err) {
+               xenbus_dev_error(dev, err, "getting sectors");
+               return;
+       }
 
        if (be->blkif->xenblkd == NULL) {
                char name[TASK_COMM_LEN];
@@ -349,6 +590,9 @@ static void tap_backend_changed(struct xenbus_watch *watch,
                        return;
                }
 
+               atomic_inc(&be->refcnt);
+               be->kthread_pid = be->blkif->xenblkd->pid;
+
                err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid",
                                    "%d", be->blkif->xenblkd->pid);
                if (err) {
@@ -370,14 +614,16 @@ static void tap_frontend_changed(struct xenbus_device *dev,
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       DPRINTK("\n");
+       DPRINTK("%s: front end: %s\n",
+               dev->nodename, xenbus_strstate(frontend_state));
 
        switch (frontend_state) {
        case XenbusStateInitialising:
                if (dev->state == XenbusStateClosed) {
                        printk(KERN_INFO "%s: %s: prepare for reconnect\n",
                               __FUNCTION__, dev->nodename);
-                       xenbus_switch_state(dev, XenbusStateInitWait);
+
+                       blktap_reconnect(be);
                }
                break;
 
@@ -399,15 +645,14 @@ static void tap_frontend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateClosing:
-               tap_blkif_unmap(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
        case XenbusStateClosed:
-               xenbus_switch_state(dev, XenbusStateClosed);
-               if (xenbus_dev_is_online(dev))
-                       break;
-               /* fall through if not online */
+               if (!kthread_remove(be))
+                       signal_shutdown(be);
+               break;
+
        case XenbusStateUnknown:
                device_unregister(&dev->dev);
                break;
@@ -417,6 +662,9 @@ static void tap_frontend_changed(struct xenbus_device *dev,
                                 frontend_state);
                break;
        }
+
+       DPRINTK("%s: back end: %s\n",
+               dev->nodename, xenbus_strstate(dev->state));
 }