]> xenbits.xensource.com Git - xenclient/kernel.git/commitdiff
imported patch blktap-shutdown-cleanup sles-bimodal-blkproto-compatibility
authort_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:01 +0000 (12:06 +0000)
committert_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:01 +0000 (12:06 +0000)
drivers/xen/blktap/backdev.c
drivers/xen/blktap/backdev.h
drivers/xen/blktap/blktap.c
drivers/xen/blktap/blktap.h
drivers/xen/blktap/common.h
drivers/xen/blktap/interface.c
drivers/xen/blktap/xenbus.c
include/linux/mm.h
mm/memory.c

index b98cbedc761b9742b2506d54c2fd527cc053d554..1b9bd1beb0246ab149333c0ad7fe3770e537fd87 100644 (file)
@@ -45,6 +45,8 @@ destroy_backdev(struct tap_blkif *uinfo)
 {
        struct backdev_info *info = uinfo->backdev;
 
+       info->destroy = 1;
+
        DPRINTK("destroy backdev %d users %d\n", uinfo->minor, info->users);
        if (info->users)
                return -EBUSY;
@@ -59,9 +61,13 @@ destroy_backdev(struct tap_blkif *uinfo)
 
        blk_cleanup_queue(info->gd->queue);
 
+       blkif_put(uinfo->blkif);
+
        uinfo->backdev = NULL;
        kfree(info);
 
+       clear_bit(4, &uinfo->dev_inuse);
+
        return 0;
 }
 
@@ -78,7 +84,7 @@ backdev_release(struct inode *inode, struct file *filep)
 {
        struct backdev_info *info = inode->i_bdev->bd_disk->private_data;
        info->users--;
-       if (info->uinfo->dev_inuse == 0)
+       if (info->destroy)
                destroy_backdev(info->uinfo);
        return 0;
 }
@@ -191,6 +197,7 @@ static int umap_uaddr_fn(
 {
        struct mm_struct *mm = (struct mm_struct *)data;
 
+       DPRINTK("unmap_uaddr ptep %p\n", ptep);
        pte_clear(mm, addr, ptep);
        xen_invlpg(addr);
        return 0;
@@ -202,10 +209,9 @@ static int umap_uaddr(struct mm_struct *mm, unsigned long address)
 }
 
 static void
-process_backdev_request(struct backdev_info *info)
+process_backdev_request(struct tap_blkif *uinfo, struct backdev_info *info)
 {
     request_queue_t *rq;
-    struct tap_blkif *uinfo;
     struct request *req;
     blkif_request_t blkif_req;
     blkif_request_t *target;
@@ -225,7 +231,6 @@ process_backdev_request(struct backdev_info *info)
     struct page *pg;
     int nr_sects = 0;
 
-    uinfo = info->uinfo;
     rq = info->gd->queue;
     blkif = uinfo->blkif;
 
@@ -242,11 +247,7 @@ process_backdev_request(struct backdev_info *info)
            end_request(req, 0);
            continue;
        }
-       if (info != req->rq_disk->private_data) {
-           end_request(req, 0);
-           continue;
-       }
-       if (uinfo->dev_inuse == 0) {
+       if (info->destroy) {
            DPRINTK("device no longer in use %d\n", info->uinfo->minor);
            end_request(req, 0);
            continue;
@@ -259,7 +260,6 @@ process_backdev_request(struct backdev_info *info)
            break;
        }
 
-       /* Check we have space on user ring - should never fail. */
        usr_idx = GET_NEXT_REQ(uinfo->idx_map);
        if (usr_idx == INVALID_REQ)
            goto wait;
@@ -320,10 +320,8 @@ process_backdev_request(struct backdev_info *info)
                offset = (uvaddr - uinfo->vma->vm_start) >> PAGE_SHIFT;
                pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
                DPRINTK("mapped uaddr %08lx to page %p\n", uvaddr, pg);
-               ((struct page **)uinfo->vma->vm_private_data)[offset] = pg;
+               uinfo->foreign_map.map[offset] = pg;
                SetPageReserved(pg);
-               if (0) if (vm_insert_page(uinfo->vma, uvaddr, pg))
-                   DPRINTK("boohoo\n");
                pending_handle(mmap_idx, pending_idx,
                               blkif_req.nr_segments).kernel =
                    INVALID_GRANT_HANDLE;
@@ -335,7 +333,6 @@ process_backdev_request(struct backdev_info *info)
            }
        }
 
-       pending_req->blkif     = blkif;
        pending_req->id        = (unsigned long)req;
        pending_req->operation = blkif_req.operation;
        pending_req->status    = BLKIF_RSP_OKAY;
@@ -365,7 +362,7 @@ process_backdev_request(struct backdev_info *info)
     }
 
     if (queued != 0)
-       blktap_kick_user(blkif->dev_num);
+       blktap_kick_user(uinfo);
     return;
 }
 
@@ -375,12 +372,12 @@ do_backdev_request(request_queue_t *rq)
        struct backdev_info *info;
 
        info = rq->queuedata;
-       if (info->uinfo->blkif) {
+       if (info->uinfo) {
                info->uinfo->blkif->waiting_reqs = 1;
                wake_up(&info->uinfo->blkif->wq);
+               DPRINTK("got requests for dev %d wake %p\n",
+                       info->uinfo->minor, info->uinfo->blkif);
        }
-       DPRINTK("got requests for dev %d wake %p/%p\n", info->uinfo->minor,
-               info->uinfo->blkif, &info->uinfo->blkif->wq);
 }
 
 void
@@ -438,9 +435,6 @@ backdev_finish_req(struct tap_blkif *info, int usr_idx, blkif_response_t *res,
                BUG();
        }
 
-       if (info->blkif)
-               info->blkif->waiting_reqs = 1;
-
        spin_unlock_irq(&backdev_io_lock);
 }
 
@@ -449,8 +443,6 @@ backdev_restart_queue(struct tap_blkif *uinfo)
 {
        struct backdev_info *info;
 
-       if (uinfo == NULL)
-               return;
        info = uinfo->backdev;
        if (info == NULL || info->gd == NULL || info->gd->queue == NULL)
                return;
@@ -461,7 +453,7 @@ backdev_restart_queue(struct tap_blkif *uinfo)
                if (blk_queue_stopped(info->gd->queue))
                        blk_start_queue(info->gd->queue);
                /* Kick things off immediately. */
-               process_backdev_request(info);
+               process_backdev_request(uinfo, info);
                spin_unlock_irq(&backdev_io_lock);
        }
        return;
@@ -562,7 +554,12 @@ create_backdev(struct tap_blkif *uinfo)
        if (err)
                goto error;
 
+       set_bit(4, &uinfo->dev_inuse);
+
+       blkif_get(uinfo->blkif);
+
        uinfo->backdev = info;
+       wake_up(&backdev_setup_wq);
 
        goto out;
 
index a885b9ed8b8971b2177bddc4d4e4a5890773c91a..78f273302cc301d32c0f2b7e20e4e9eb77f787c1 100644 (file)
@@ -4,6 +4,7 @@ struct pending_req;
 
 struct backdev_info {
        int users;
+       int destroy;
        struct gendisk *gd;
        struct tap_blkif *uinfo;
 };
index 1acf512213dd8935db9181c2c06a1003f99a5e18..4205ecb77e7f4241989905ce0b9595a06474c0f1 100644 (file)
@@ -70,7 +70,6 @@ static int mmap_pages = MMAP_PAGES;
                      */
 
 static struct tap_blkif *tapfds[MAX_TAP_DEV];
-static int blktap_next_minor;
 
 module_param(blkif_reqs, int, 0);
 /* Run-time switchable: /sys/module/blktap/parameters/ */
@@ -82,7 +81,8 @@ module_param(debug_lvl, int, 0644);
 pending_req_t *pending_reqs[MAX_PENDING_REQS];
 static struct list_head pending_free;
 static DEFINE_SPINLOCK(pending_free_lock);
-static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+DECLARE_WAIT_QUEUE_HEAD(backdev_setup_wq);
 static int alloc_pending_reqs;
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -97,23 +97,8 @@ static unsigned short mmap_inuse = 0;
  * GRANT HANDLES
  */
 
-/* When using grant tables to map a frame for device access then the
- * handle returned must be used to unmap the frame. This is needed to
- * drop the ref count on the frame.
- */
-struct grant_handle_pair
-{
-        grant_handle_t kernel;
-        grant_handle_t user;
-};
-#define INVALID_GRANT_HANDLE   0xFFFF
-
-static struct grant_handle_pair 
+struct grant_handle_pair 
     pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
-#define pending_handle(_id, _idx, _i) \
-    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
-    + (_i)])
-
 
 static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
 
@@ -135,22 +120,6 @@ static int blktap_major;
 #define BLKTAP_IOCTL_PRINT_IDXS      100  
 #define BLKTAP_IOCTL_BACKDEV_SETUP   200
 
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-       return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
-               (arg == BLKTAP_MODE_INTERCEPT_FE) ||
-                (arg == BLKTAP_MODE_INTERPOSE   ));
-}
-
 static inline int OFFSET_TO_USR_IDX(int offset)
 {
        return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
@@ -194,42 +163,43 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma,
        pte_t copy;
        tap_blkif_t *info;
        int offset, seg, usr_idx, pending_idx, mmap_idx;
-       unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT);
        unsigned long kvaddr;
+       struct vm_foreign_map *foreign_map;
        struct page **map;
-       struct page *pg;
        struct grant_handle_pair *khandle;
        struct gnttab_unmap_grant_ref unmap[2];
        int count = 0;
 
+       info = vma->vm_private_data;
+
        /*
-        * If the address is before the start of the grant mapped region or
-        * if vm_file is NULL (meaning mmap failed and we have nothing to do)
+        * Zap entry if the address is before the start of the grant
+        * mapped region.
         */
-       if (uvaddr < uvstart || vma->vm_file == NULL)
-               return ptep_get_and_clear_full(vma->vm_mm, uvaddr, 
+       if (uvaddr < info->user_vstart)
+               return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
                                               ptep, is_fullmm);
 
-       info = vma->vm_file->private_data;
-       map = vma->vm_private_data;
+       foreign_map = vma->vm_private_data;
+       map = foreign_map->map;
 
        /* TODO Should these be changed to if statements? */
        BUG_ON(!info);
        BUG_ON(!info->idx_map);
        BUG_ON(!map);
 
-       offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT);
+       offset = (int)((uvaddr - info->user_vstart) >> PAGE_SHIFT);
        usr_idx = OFFSET_TO_USR_IDX(offset);
        seg = OFFSET_TO_SEG(offset);
 
        pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
        mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
 
-       kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
-       pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-       ClearPageReserved(pg);
-       map[offset + RING_PAGES] = NULL;
+       offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
+       ClearPageReserved(map[offset]);
+       map[offset] = NULL;
 
+       kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
        khandle = &pending_handle(mmap_idx, pending_idx, seg);
 
        if (khandle->kernel != INVALID_GRANT_HANDLE) {
@@ -252,9 +222,6 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma,
                                    khandle->user);
                count++;
        } else {
-               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap));
-
-               /* USING SHADOW PAGE TABLES. */
                copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
                                               is_fullmm);
        }
@@ -269,7 +236,32 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma,
        return copy;
 }
 
+static void blktap_vm_close(struct vm_area_struct *vma)
+{
+       struct tap_blkif *info = vma->vm_private_data;
+
+       down_write(&info->vm_update_sem);
+
+       zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+
+       kfree(info->foreign_map.map);
+
+       /* Free the ring page. */
+       ClearPageReserved(virt_to_page(info->ufe_ring.sring));
+       free_page((unsigned long)info->ufe_ring.sring);
+
+       kfree(info->idx_map);
+       info->idx_map = NULL;
+
+       info->vma = NULL;
+       clear_bit(2, &info->dev_inuse);
+
+       up_write(&info->vm_update_sem);
+}
+
+static 
 struct vm_operations_struct blktap_vm_ops = {
+       close:    blktap_vm_close,
        nopage:   blktap_nopage,
        zap_pte:  blktap_clear_pte,
 };
@@ -313,56 +305,43 @@ static tap_blkif_t *get_next_free_dev(void)
 
        /* tapfds[0] is always NULL */
 
-       for (minor = 1; minor < blktap_next_minor; minor++) {
+       for (minor = 1; minor < MAX_TAP_DEV; minor++) {
                info = tapfds[minor];
                /* we could have failed a previous attempt. */
-               if (!info ||
-                   ((info->dev_inuse == 0) &&
-                    (info->backdev == NULL) &&
-                    (info->dev_pending == 0)) ) {
-                       info->dev_pending = 1;
+               if (info == NULL ||
+                   (info->dev_inuse == 0 &&
+                    !test_and_set_bit(0, &info->dev_inuse)))
                        goto found;
-               }
        }
        info = NULL;
-       minor = -1;
 
-       /*
-        * We didn't find free device. If we can still allocate
-        * more, then we grab the next device minor that is
-        * available.  This is done while we are still under
-        * the protection of the pending_free_lock.
-        */
-       if (blktap_next_minor < MAX_TAP_DEV)
-               minor = blktap_next_minor++;
 found:
        spin_unlock_irq(&pending_free_lock);
 
-       if (!info && minor > 0) {
+       if (info == NULL) {
                info = kzalloc(sizeof(*info), GFP_KERNEL);
-               if (unlikely(!info)) {
-                       /*
-                        * If we failed here, try to put back
-                        * the next minor number. But if one
-                        * was just taken, then we just lose this
-                        * minor.  We can try to allocate this
-                        * minor again later.
-                        */
-                       spin_lock_irq(&pending_free_lock);
-                       if (blktap_next_minor == minor+1)
-                               blktap_next_minor--;
+               if (unlikely(!info))
+                       goto out;
+
+               init_rwsem(&info->vm_update_sem);
+               set_bit(0, &info->dev_inuse);
+
+               spin_lock_irq(&pending_free_lock);
+               for (; minor < MAX_TAP_DEV; minor++)
+                       if (tapfds[minor] == NULL)
+                               break;
+               if (minor == MAX_TAP_DEV) {
+                       kfree(info);
+                       info = NULL;
                        spin_unlock_irq(&pending_free_lock);
                        goto out;
                }
 
                info->minor = minor;
-               /*
-                * Make sure that we have a minor before others can
-                * see us.
-                */
-               wmb();
                tapfds[minor] = info;
 
+               spin_unlock_irq(&pending_free_lock);
+
                if ((class = get_xen_class()) != NULL)
                        class_device_create(class, NULL,
                                            MKDEV(blktap_major, minor), NULL,
@@ -373,152 +352,72 @@ out:
        return info;
 }
 
-int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
+struct tap_blkif *
+associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif)
 {
        tap_blkif_t *info;
        int i;
 
-       for (i = 1; i < blktap_next_minor; i++) {
+       for (i = 1; i < MAX_TAP_DEV; i++) {
                info = tapfds[i];
-               if ( info &&
-                    (info->trans.domid == domid) &&
-                    (info->trans.busid == xenbus_id) ) {
+               if (info && info->trans.domid == domid &&
+                   info->trans.busid == xenbus_id) {
                        info->blkif = blkif;
                        info->status = RUNNING;
-                       return i;
+                       return info;
                }
        }
-       return -1;
-}
-
-void signal_tapdisk(int idx) 
-{
-       tap_blkif_t *info;
-       struct task_struct *ptask;
-
-       info = tapfds[idx];
-       if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
-               return;
-
-       if (info->pid > 0) {
-               ptask = find_task_by_pid(info->pid);
-               if (ptask)
-                       info->status = CLEANSHUTDOWN;
-       }
-       info->blkif = NULL;
-
-       return;
+       return NULL;
 }
 
 static int blktap_open(struct inode *inode, struct file *filp)
 {
-       blkif_sring_t *sring;
        int idx = iminor(inode) - BLKTAP_MINOR;
        tap_blkif_t *info;
-       int i;
        
        /* ctrl device, treat differently */
-       if (!idx)
+       if (idx == 0)
                return 0;
 
-       info = tapfds[idx];
-
-       if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
+       if (idx < 1 || idx > MAX_TAP_DEV || tapfds[idx] == NULL) {
                WPRINTK("Unable to open device /dev/xen/blktap%d\n",
                        idx);
                return -ENODEV;
        }
 
-       DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
-       
-       /*Only one process can access device at a time*/
-       if (test_and_set_bit(0, &info->dev_inuse))
+       info = tapfds[idx];
+
+       DPRINTK("Opening device /dev/xen/blktap%d\n", idx);
+
+       /* Only one process can access device at a time */
+       if (test_and_set_bit(1, &info->dev_inuse))
                return -EBUSY;
 
-       info->dev_pending = 0;
-           
-       /* Allocate the fe ring. */
-       sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
-       if (sring == NULL)
-               goto fail_nomem;
+       clear_bit(0, &info->dev_inuse);
 
-       SetPageReserved(virt_to_page(sring));
-    
-       SHARED_RING_INIT(sring);
-       FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
-       
        filp->private_data = info;
-       info->vma = NULL;
 
-       info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
-                               GFP_KERNEL);
-       
-       if (info->idx_map == NULL)
-               goto fail_nomem;
+       DPRINTK("Tap open: device /dev/xen/blktap%d\n", idx);
 
-       if (idx > 0) {
-               init_waitqueue_head(&info->wait);
-               for (i = 0; i < MAX_PENDING_REQS; i++) 
-                       info->idx_map[i] = INVALID_REQ;
-       }
-
-       DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
        return 0;
-
- fail_nomem:
-       return -ENOMEM;
 }
 
 static int blktap_release(struct inode *inode, struct file *filp)
 {
        tap_blkif_t *info = filp->private_data;
-       int ret;
        
        /* check for control device */
-       if (!info)
+       if (info == NULL)
                return 0;
 
-       info->dev_inuse = 0;
        DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
-
-       /* Free the ring page. */
-       ClearPageReserved(virt_to_page(info->ufe_ring.sring));
-       free_page((unsigned long) info->ufe_ring.sring);
-
-       /* Clear any active mappings and free foreign map table */
-       if (info->vma) {
-               zap_page_range(
-                       info->vma, info->vma->vm_start, 
-                       info->vma->vm_end - info->vma->vm_start, NULL);
-
-               kfree(info->vma->vm_private_data);
-
-               info->vma = NULL;
-       }
-
-       if (info->idx_map) {
-               kfree(info->idx_map);
-               info->idx_map = NULL;
-       }
-
-       if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
-               if (info->blkif->xenblkd != NULL) {
-                       kthread_stop(info->blkif->xenblkd);
-                       info->blkif->xenblkd = NULL;
-               }
-               info->status = CLEANSHUTDOWN;
-       }
-
-       ret = destroy_backdev(info);
-       if (ret && ret != -EBUSY)
-               WPRINTK("destroy_backdev failed %d\n", ret);
+       clear_bit(1, &info->dev_inuse);
 
        filp->private_data = NULL;      
 
        return 0;
 }
 
-
 /* Note on mmap:
  * We need to map pages to user space in a way that will allow the block
  * subsystem set up direct IO to them.  This couldn't be done before, because
@@ -539,28 +438,52 @@ static int blktap_release(struct inode *inode, struct file *filp)
  */
 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
 {
+       tap_blkif_t *info = filp->private_data;
+       blkif_sring_t *sring = NULL;
+       struct page **map = NULL;
        int size;
-       struct page **map;
        int i;
-       tap_blkif_t *info = filp->private_data;
        int ret;
 
-       if (info == NULL) {
-               WPRINTK("blktap: mmap, retrieving idx failed\n");
+       if (info == NULL || test_and_set_bit(2, &info->dev_inuse))
                return -ENOMEM;
-       }
-       
-       vma->vm_flags |= VM_RESERVED;
-       vma->vm_ops = &blktap_vm_ops;
 
-       size = vma->vm_end - vma->vm_start;
-       if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
+       size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       if (size != (mmap_pages + RING_PAGES)) {
                WPRINTK("you _must_ map exactly %d pages!\n",
-                      mmap_pages + RING_PAGES);
+                       mmap_pages + RING_PAGES);
                return -EAGAIN;
        }
 
-       size >>= PAGE_SHIFT;
+       /* Allocate the fe ring. */
+       sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+       if (sring == NULL) {
+               WPRINTK("Couldn't alloc sring.\n");
+               goto fail_mem;
+       }
+       info->idx_map = kmalloc(sizeof(unsigned long) *
+                               MAX_PENDING_REQS, GFP_KERNEL);
+       if (info->idx_map == NULL) {
+               WPRINTK("Couldn't alloc idx_map.\n");
+               goto fail_mem;
+       }
+       map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                     * sizeof(struct page *),
+                     GFP_KERNEL);
+       if (map == NULL) {
+               WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
+               goto fail_mem;
+       }
+
+       SetPageReserved(virt_to_page(sring));
+
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
+
+       init_waitqueue_head(&info->wait);
+       for (i = 0; i < MAX_PENDING_REQS; i++) 
+               info->idx_map[i] = INVALID_REQ;
+
        info->rings_vstart = vma->vm_start;
        info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
     
@@ -573,42 +496,39 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
                                      __pa(info->ufe_ring.sring) >> PAGE_SHIFT,
                                      PAGE_SIZE, vma->vm_page_prot);
        if (ret) {
-               WPRINTK("Mapping user ring failed!\n");
+               WPRINTK("Mapping user ring failed.\n");
                goto fail;
        }
 
        /* Mark this VM as containing foreign pages, and set up mappings. */
-       map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
-                     * sizeof(struct page *),
-                     GFP_KERNEL);
-       if (map == NULL) {
-               WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
-               goto fail;
-       }
-
-       for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
-               map[i] = NULL;
-    
-       vma->vm_private_data = map;
+       info->foreign_map.map = map;
+       vma->vm_private_data = &info->foreign_map;
        vma->vm_flags |= VM_FOREIGN;
        vma->vm_flags |= VM_DONTCOPY;
+       vma->vm_flags |= VM_RESERVED;
+       vma->vm_ops = &blktap_vm_ops;
 
 #ifdef CONFIG_X86
        vma->vm_mm->context.has_foreign_mappings = 1;
 #endif
 
        info->vma = vma;
-       info->ring_ok = 1;
        return 0;
+
  fail:
        /* Clear any active mappings. */
        zap_page_range(vma, vma->vm_start, 
                       vma->vm_end - vma->vm_start, NULL);
+       ClearPageReserved(virt_to_page(sring));
+ fail_mem:
+       free_page((unsigned long)sring);
+       kfree(info->idx_map);
+       info->idx_map = NULL;
+       kfree(map);
 
        return -ENOMEM;
 }
 
-
 static int blktap_ioctl(struct inode *inode, struct file *filp,
                         unsigned int cmd, unsigned long arg)
 {
@@ -616,25 +536,17 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
 
        switch(cmd) {
        case BLKTAP_IOCTL_KICK_FE: 
-       {
+               if (info == NULL)
+                       return -ENOENT;
+
                /* There are fe messages to process. */
                return blktap_read_ufe_ring(info);
-       }
+
        case BLKTAP_IOCTL_SETMODE:
-       {
-               if (info) {
-                       if (BLKTAP_MODE_VALID(arg)) {
-                               info->mode = arg;
-                               /* XXX: may need to flush rings here. */
-                               DPRINTK("blktap: set mode to %lx\n", 
-                                      arg);
-                               return 0;
-                       }
-               }
+               /* deprecated */
                return 0;
-       }
+
        case BLKTAP_IOCTL_PRINT_IDXS:
-        {
                if (info) {
                        printk("User Rings: \n-----------\n");
                        printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
@@ -645,16 +557,15 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
                                info->ufe_ring.sring->rsp_prod);
                }
                return 0;
-        }
+
        case BLKTAP_IOCTL_SENDPID:
-       {
                if (info) {
                        info->pid = (pid_t)arg;
                        DPRINTK("blktap: pid received %d\n", 
                               info->pid);
                }
                return 0;
-       }
+
        case BLKTAP_IOCTL_NEWINTF:
        {               
                uint64_t val = (uint64_t)arg;
@@ -672,55 +583,57 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
                info->trans.busid = tr->busid;
                return info->minor;
        }
+
        case BLKTAP_IOCTL_FREEINTF:
        {
                unsigned long dev = arg;
-               unsigned long flags;
 
                DPRINTK("FREEINTF Req for dev %ld\n", dev);
-               info = tapfds[dev];
-
-               if ((dev > MAX_TAP_DEV) || !info)
-                       return 0; /* should this be an error? */
+               if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
+                       return -EINVAL;
 
-               spin_lock_irqsave(&pending_free_lock, flags);
-               if (info->dev_pending)
-                       info->dev_pending = 0;
-               spin_unlock_irqrestore(&pending_free_lock, flags);
+               clear_bit(0, &tapfds[dev]->dev_inuse);
 
                return 0;
        }
+
        case BLKTAP_IOCTL_MINOR:
        {
                unsigned long dev = arg;
 
-               info = tapfds[dev];
-
-               if ((dev > MAX_TAP_DEV) || !info)
+               if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
                        return -EINVAL;
 
-               return info->minor;
+               return tapfds[dev]->minor;
        }
+
        case BLKTAP_IOCTL_MAJOR:
                return blktap_major;
 
        case BLKTAP_QUERY_ALLOC_REQS:
-       {
                WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
                       alloc_pending_reqs, blkif_reqs);
                return (alloc_pending_reqs/blkif_reqs) * 100;
-       }
 
        case BLKTAP_IOCTL_BACKDEV_SETUP:
        {
                unsigned long dev = arg;
+               int ret;
 
                DPRINTK("BLKTAP_IOCTL_BACKDEV_SETUP ioctl: %ld\n", dev);
-               info = tapfds[dev];
+               if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
+                       return -EINVAL;
 
-               return create_backdev(info);
+               while (tapfds[dev]->backdev == NULL) {
+                       ret = wait_event_interruptible(backdev_setup_wq,
+                                                      tapfds[dev]->backdev);
+                       if (ret)
+                               return ret;
+               }
+               return 0;
        }
        }
+
        return -ENOIOCTLCMD;
 }
 
@@ -729,7 +642,7 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait)
        tap_blkif_t *info = filp->private_data;
        
        /* do not work on the control device */
-       if (!info)
+       if (info == NULL)
                return 0;
 
        poll_wait(filp, &info->wait, wait);
@@ -740,24 +653,17 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait)
        return 0;
 }
 
-void blktap_kick_user(int idx)
+void blktap_kick_user(struct tap_blkif *info)
 {
-       tap_blkif_t *info;
-
-       info = tapfds[idx];
-
-       if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
-               return;
 
        wake_up_interruptible(&info->wait);
-
-       return;
 }
 
-static int do_block_io_op(blkif_t *blkif);
+static int do_block_io_op(tap_blkif_t *info);
 static void dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req);
+                                pending_req_t *pending_req,
+                                int usr_idx);
 static void make_response(blkif_t *blkif, u64 id,
                           unsigned short op, int st);
 
@@ -864,60 +770,50 @@ void free_req(pending_req_t *req)
                wake_up(&pending_free_wq);
 }
 
-static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
-                           int tapidx)
+#define DO_FAST_FLUSH
+static void fast_flush_area(pending_req_t *req, int pending_idx, int usr_idx,
+                           tap_blkif_t *info)
 {
+#ifdef DO_FAST_FLUSH
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int i, invcount = 0;
        struct grant_handle_pair *khandle;
        uint64_t ptep;
        int ret, mmap_idx;
        unsigned long kvaddr, uvaddr;
-       tap_blkif_t *info;
-       
-
-       info = tapfds[tapidx];
-
-       if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
-               WPRINTK("fast_flush: Couldn't get info!\n");
-               return;
-       }
+       struct page **map;
+       int offset;
+#endif
 
-       if (info->vma != NULL &&
-           xen_feature(XENFEAT_auto_translated_physmap)) {
-               down_write(&info->vma->vm_mm->mmap_sem);
+       if (xen_feature(XENFEAT_auto_translated_physmap))
                zap_page_range(info->vma, 
-                              MMAP_VADDR(info->user_vstart, u_idx, 0), 
+                              MMAP_VADDR(info->user_vstart, usr_idx, 0), 
                               req->nr_pages << PAGE_SHIFT, NULL);
-               up_write(&info->vma->vm_mm->mmap_sem);
-               return;
-       }
 
+#ifdef DO_FAST_FLUSH
        mmap_idx = req->mem_idx;
 
+       map = info->foreign_map.map;
+
        for (i = 0; i < req->nr_pages; i++) {
-               kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
-               uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
+               kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
+               uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
 
-               khandle = &pending_handle(mmap_idx, k_idx, i);
+               khandle = &pending_handle(mmap_idx, pending_idx, i);
 
                if (khandle->kernel != INVALID_GRANT_HANDLE) {
-                       gnttab_set_unmap_op(&unmap[invcount],
-                                           idx_to_kaddr(mmap_idx, k_idx, i),
+                       gnttab_set_unmap_op(&unmap[invcount], kvaddr,
                                            GNTMAP_host_map, khandle->kernel);
                        invcount++;
 
-                       set_phys_to_machine(
-                               __pa(idx_to_kaddr(mmap_idx, k_idx, i))
-                               >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+                       set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
+                                           INVALID_P2M_ENTRY);
                }
 
                if (khandle->user != INVALID_GRANT_HANDLE) {
                        BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-                       if (create_lookup_pte_addr(
-                               info->vma->vm_mm,
-                               MMAP_VADDR(info->user_vstart, u_idx, i),
-                               &ptep) !=0) {
+                       if (create_lookup_pte_addr(info->vma->vm_mm,
+                                                  uvaddr, &ptep) != 0) {
                                WPRINTK("Couldn't get a pte addr!\n");
                                return;
                        }
@@ -930,15 +826,20 @@ static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
                        invcount++;
                }
 
+               offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+               ClearPageReserved(map[offset]);
+               map[offset] = NULL;
+
                BLKTAP_INVALIDATE_HANDLE(khandle);
        }
-       ret = HYPERVISOR_grant_table_op(
-               GNTTABOP_unmap_grant_ref, unmap, invcount);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap,
+                                       invcount);
        BUG_ON(ret);
-       
-       if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
+#endif
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap))
                zap_page_range(info->vma, 
-                              MMAP_VADDR(info->user_vstart, u_idx, 0), 
+                              MMAP_VADDR(info->user_vstart, usr_idx, 0), 
                               req->nr_pages << PAGE_SHIFT, NULL);
 }
 
@@ -960,13 +861,20 @@ static void print_stats(blkif_t *blkif)
 int tap_blkif_schedule(void *arg)
 {
        blkif_t *blkif = arg;
+       struct tap_blkif *info;
 
        blkif_get(blkif);
 
+       info = blkif->tapif;
+
+       set_bit(3, &info->dev_inuse);
+
+       create_backdev(info);
+
        if (debug_lvl)
                printk(KERN_DEBUG "%s: started\n", current->comm);
 
-       while (!kthread_should_stop()) {
+       while (1) {
                if (try_to_freeze())
                        continue;
 
@@ -977,13 +885,24 @@ int tap_blkif_schedule(void *arg)
                        pending_free_wq,
                        !list_empty(&pending_free) || kthread_should_stop());
 
+               if (kthread_should_stop())
+                       break;
+
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               if (do_block_io_op(blkif))
+               down_read(&info->vm_update_sem);
+               if (info->vma == NULL) {
+                       up_read(&info->vm_update_sem);
+                       break;
+               }
+
+               if (do_block_io_op(info))
                        blkif->waiting_reqs = 1;
                else
-                       backdev_restart_queue(tapfds[blkif->dev_num]);
+                       backdev_restart_queue(info);
+
+               up_read(&info->vm_update_sem);
 
                if (log_stats && time_after(jiffies, blkif->st_print))
                        print_stats(blkif);
@@ -994,9 +913,13 @@ int tap_blkif_schedule(void *arg)
        if (debug_lvl)
                printk(KERN_DEBUG "%s: exiting\n", current->comm);
 
+       destroy_backdev(info);
+
        blkif->xenblkd = NULL;
        blkif_put(blkif);
 
+       clear_bit(3, &info->dev_inuse);
+
        return 0;
 }
 
@@ -1007,33 +930,32 @@ int tap_blkif_schedule(void *arg)
 static int blktap_read_ufe_ring(tap_blkif_t *info)
 {
        /* This is called to read responses from the UFE ring. */
-       RING_IDX i, j, rp;
-       blkif_response_t *resp;
-       blkif_t *blkif=NULL;
+       RING_IDX rc, rp;
        int pending_idx, usr_idx, mmap_idx;
+       blkif_response_t res;
        pending_req_t *pending_req;
-       struct page **map;
+       blkif_t *blkif = info->blkif;
        
-       if (!info)
-               return 0;
-
-       /* We currently only forward packets in INTERCEPT_FE mode. */
-       if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
+       down_read(&info->vm_update_sem);
+       if (info->vma == NULL) {
+               up_read(&info->vm_update_sem);
                return 0;
+       }
 
        /* for each outstanding message on the UFEring  */
        rp = info->ufe_ring.sring->rsp_prod;
        rmb();
-        
-       for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
-               blkif_response_t res;
-               resp = RING_GET_RESPONSE(&info->ufe_ring, i);
-               memcpy(&res, resp, sizeof(res));
+
+       for (rc = info->ufe_ring.rsp_cons; rc != rp; rc++) {
+               memcpy(&res, RING_GET_RESPONSE(&info->ufe_ring, rc),
+                      sizeof(res));
                mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
                ++info->ufe_ring.rsp_cons;
 
                /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
                usr_idx = (int)res.id;
+               DPRINTK("response %d id %x idx_map %p\n", rc, usr_idx,
+                       info->idx_map);
                pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
                mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
 
@@ -1047,35 +969,27 @@ static int blktap_read_ufe_ring(tap_blkif_t *info)
                                       ID_TO_IDX(info->idx_map[usr_idx])));
 
                pending_req = &pending_reqs[mmap_idx][pending_idx];
-               blkif = pending_req->blkif;
-               map = info->vma->vm_private_data;
-
-               for (j = 0; j < pending_req->nr_pages; j++) {
-                       unsigned long uvaddr;
-                       int offset;
 
-                       uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
-
-                       offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
-                       ClearPageReserved(map[offset]);
-                       map[offset] = NULL;
-               }
                if (pending_req->inuse == 2)
                        backdev_finish_req(info, usr_idx, &res, pending_req);
                else {
                        fast_flush_area(pending_req, pending_idx, usr_idx,
-                                       info->minor);
+                                       info);
 
                        info->idx_map[usr_idx] = INVALID_REQ;
-                       make_response(blkif, pending_req->id, res.operation,
-                                     res.status);
+                       make_response(blkif, pending_req->id,
+                                     res.operation, res.status);
                }
-               blkif_put(pending_req->blkif);
+               blkif->waiting_reqs = 1;
+
+               blkif_put(blkif);
                free_req(pending_req);
        }
-               
-       if (info->blkif && info->blkif->waiting_reqs)
-               wake_up(&info->blkif->wq);
+
+       up_read(&info->vm_update_sem);
+
+       if (blkif->waiting_reqs)
+               wake_up(&blkif->wq);
 
        return 0;
 }
@@ -1102,15 +1016,15 @@ irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 /******************************************************************
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
-static int print_dbug = 1;
-static int do_block_io_op(blkif_t *blkif)
+static int do_block_io_op(tap_blkif_t *info)
 {
+       blkif_t *blkif = info->blkif;
        blkif_back_rings_t *blk_rings = &blkif->blk_rings;
        blkif_request_t req;
        pending_req_t *pending_req;
        RING_IDX rc, rp;
        int more_to_do = 0;
-       tap_blkif_t *info;
+       int usr_idx;
 
        if (!tap_blkif_connected(blkif))
                return 0;
@@ -1119,41 +1033,18 @@ static int do_block_io_op(blkif_t *blkif)
        rp = blk_rings->common.sring->req_prod;
        rmb(); /* Ensure we see queued requests up to 'rp'. */
 
-       /*Check blkif has corresponding UE ring*/
-       if (blkif->dev_num < 0) {
-               /*oops*/
-               if (print_dbug) {
-                       WPRINTK("Corresponding UE " 
-                              "ring does not exist!\n");
-                       print_dbug = 0; /*We only print this message once*/
-               }
-               return 0;
-       }
-
-       info = tapfds[blkif->dev_num];
-
-       if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
-               if (print_dbug) {
-                       WPRINTK("Can't get UE info!\n");
-                       print_dbug = 0;
-               }
-               return 0;
-       }
-
        while (rc != rp) {
-               
+               if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
+                       break;
+
                if (RING_FULL(&info->ufe_ring)) {
                        WPRINTK("RING_FULL! More to do\n");
-                       more_to_do = 1;
                        break;
                }
 
-               if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
-                       WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
-                              " More to do\n");
-                       more_to_do = 1;
-                       break;          
-               }
+               usr_idx = GET_NEXT_REQ(info->idx_map);
+               if (usr_idx == INVALID_REQ)
+                       break;
 
                pending_req = alloc_req();
                if (NULL == pending_req) {
@@ -1189,12 +1080,14 @@ static int do_block_io_op(blkif_t *blkif)
                switch (req.operation) {
                case BLKIF_OP_READ:
                        blkif->st_rd_req++;
-                       dispatch_rw_block_io(blkif, &req, pending_req);
+                       dispatch_rw_block_io(blkif, &req, pending_req,
+                                            usr_idx);
                        break;
 
                case BLKIF_OP_WRITE:
                        blkif->st_wr_req++;
-                       dispatch_rw_block_io(blkif, &req, pending_req);
+                       dispatch_rw_block_io(blkif, &req, pending_req,
+                                            usr_idx);
                        break;
 
                default:
@@ -1212,15 +1105,16 @@ static int do_block_io_op(blkif_t *blkif)
                /* Yield point for this unbounded loop. */
                cond_resched();
        }
-               
-       blktap_kick_user(blkif->dev_num);
+
+       blktap_kick_user(info);
 
        return more_to_do;
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif,
                                 blkif_request_t *req,
-                                pending_req_t *pending_req)
+                                pending_req_t *pending_req,
+                                int usr_idx)
 {
        extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
        int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
@@ -1230,46 +1124,18 @@ static void dispatch_rw_block_io(blkif_t *blkif,
        tap_blkif_t *info;
        blkif_request_t *target;
        int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
-       int usr_idx;
        uint16_t mmap_idx = pending_req->mem_idx;
 
-       if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
-               goto fail_response;
-
-       info = tapfds[blkif->dev_num];
-       if (info == NULL)
-               goto fail_response;
-
-       /* Check we have space on user ring - should never fail. */
-       usr_idx = GET_NEXT_REQ(info->idx_map);
-       if (usr_idx == INVALID_REQ) {
-               BUG();
-               goto fail_response;
-       }
+       info = blkif->tapif;
 
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
-       if ( unlikely(nseg == 0) || 
-           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                WPRINTK("Bad number of segments in request (%d)\n", nseg);
                goto fail_response;
        }
        
-       /* Make sure userspace is ready. */
-       if (!info->ring_ok) {
-               WPRINTK("blktap: ring not ready for requests!\n");
-               goto fail_response;
-       }
-
-       if (RING_FULL(&info->ufe_ring)) {
-               WPRINTK("blktap: fe_ring is full, can't add "
-                       "IO Request will be dropped. %d %d\n",
-                       RING_SIZE(&info->ufe_ring),
-                       RING_SIZE(&blkif->blk_rings.common));
-               goto fail_response;
-       }
-
-       pending_req->blkif     = blkif;
        pending_req->id        = req->id;
        pending_req->operation = operation;
        pending_req->status    = BLKIF_RSP_OKAY;
@@ -1340,10 +1206,10 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                                map[i+1].handle = INVALID_GRANT_HANDLE;
                        }
 
-                       pending_handle(mmap_idx, pending_idx, i/2).kernel 
-                               map[i].handle;
-                       pending_handle(mmap_idx, pending_idx, i/2).user   
-                               map[i+1].handle;
+                       pending_handle(mmap_idx, pending_idx, i/2).kernel =
+                               map[i].handle;
+                       pending_handle(mmap_idx, pending_idx, i/2).user =
+                               map[i+1].handle;
 
                        if (ret)
                                continue;
@@ -1353,8 +1219,8 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                                                          >> PAGE_SHIFT));
                        offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
                        pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-                       ((struct page **)info->vma->vm_private_data)[offset] =
-                               pg;
+                       SetPageReserved(pg);
+                       info->foreign_map.map[offset] = pg;
                }
        } else {
                for (i = 0; i < nseg; i++) {
@@ -1373,45 +1239,25 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                                map[i].handle = INVALID_GRANT_HANDLE;
                        }
 
-                       pending_handle(mmap_idx, pending_idx, i).kernel 
-                               map[i].handle;
+                       pending_handle(mmap_idx, pending_idx, i).kernel =
+                               map[i].handle;
 
                        if (ret)
                                continue;
 
                        offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
                        pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-                       ((struct page **)info->vma->vm_private_data)[offset] =
-                               pg;
+                       info->foreign_map.map[offset] = pg;
+                       SetPageReserved(pg);
+
+                       if (vm_insert_page(info->vma, uvaddr, pg))
+                               ret |= 1;
                }
        }
 
        if (ret)
                goto fail_flush;
 
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               down_write(&info->vma->vm_mm->mmap_sem);
-       /* Mark mapped pages as reserved: */
-       for (i = 0; i < req->nr_segments; i++) {
-               unsigned long kvaddr;
-               struct page *pg;
-
-               kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
-               pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-               SetPageReserved(pg);
-               if (xen_feature(XENFEAT_auto_translated_physmap)) {
-                       ret = vm_insert_page(info->vma,
-                                            MMAP_VADDR(info->user_vstart,
-                                                       usr_idx, i), pg);
-                       if (ret) {
-                               up_write(&info->vma->vm_mm->mmap_sem);
-                               goto fail_flush;
-                       }
-               }
-       }
-       if (xen_feature(XENFEAT_auto_translated_physmap))
-               up_write(&info->vma->vm_mm->mmap_sem);
-       
        /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
        info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
 
@@ -1431,15 +1277,15 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 
        return;
 
+
  fail_flush:
        WPRINTK("Reached Fail_flush\n");
-       fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
+       fast_flush_area(pending_req, pending_idx, usr_idx, info);
  fail_response:
        make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
        free_req(pending_req);
        msleep(1); /* back off a bit */
-}
-
+} 
 
 
 /******************************************************************
@@ -1511,13 +1357,11 @@ static int __init blkif_init(void)
                return -ENODEV;
 
        INIT_LIST_HEAD(&pending_free);
-        for(i = 0; i < 2; i++) {
+        for (i = 0; i < 2; i++) {
                ret = req_increase();
                if (ret)
-                       break;
+                       return ret;
        }
-       if (i == 0)
-               return ret;
 
        tap_blkif_interface_init();
 
@@ -1533,7 +1377,6 @@ static int __init blkif_init(void)
 
        /* Dynamically allocate a major for this device */
        ret = register_chrdev(0, "blktap", &blktap_fops);
-
        if (ret < 0) {
                WPRINTK("Couldn't register /dev/xen/blktap\n");
                return -ENOMEM;
@@ -1541,10 +1384,7 @@ static int __init blkif_init(void)
        
        blktap_major = ret;
 
-       /* tapfds[0] is always NULL */
-       blktap_next_minor++;
-
-       DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+       DPRINTK("Created misc_dev [/dev/xen/blktap0]\n");
 
        /* Make sure the xen class exists */
        if ((class = get_xen_class()) != NULL) {
index 7ecb457077a784534e9609a63e3c999b1f6318b6..e5805ffeb5f829de0d15f0d0ab3473f41a82ad7d 100644 (file)
          ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
          ((_seg) * PAGE_SIZE))
 
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+       grant_handle_t kernel;
+       grant_handle_t user;
+};
+#define INVALID_GRANT_HANDLE   0xFFFF
+
+extern struct grant_handle_pair pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
+#define pending_handle(_id, _idx, _i) \
+    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
+    + (_i)])
+
 /*Data struct handed back to userspace for tapdisk device to VBD mapping*/
 typedef struct domid_translate {
        unsigned short domid;
@@ -27,6 +43,8 @@ typedef struct domid_translate {
 
 /*Data struct associated with each of the tapdisk devices*/
 typedef struct tap_blkif {
+       struct vm_foreign_map foreign_map;
+       struct rw_semaphore vm_update_sem;
        struct vm_area_struct *vma;   /*Shared memory area                   */
        unsigned long rings_vstart;   /*Kernel memory mapping                */
        unsigned long user_vstart;    /*User memory mapping                  */
@@ -35,7 +53,6 @@ typedef struct tap_blkif {
        unsigned long ring_ok;        /*make this ring->state                */
        blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
        wait_queue_head_t wait;       /*for poll                             */
-       unsigned long mode;           /*current switching mode               */
        int minor;                    /*Minor number for tapdisk device      */
        pid_t pid;                    /*tapdisk process id                   */
        enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
@@ -54,7 +71,6 @@ typedef struct tap_blkif {
  * response queued for it, with the saved 'id' passed back.
  */
 typedef struct pending_req {
-       blkif_t       *blkif;
        u64            id;
        unsigned short mem_idx;
        int            nr_pages;
@@ -124,4 +140,6 @@ static inline int GET_NEXT_REQ(unsigned long *idx_map)
 pending_req_t* alloc_req(void);
 void free_req(pending_req_t *req);
 
-void blktap_kick_user(int idx);
+void blktap_kick_user(struct tap_blkif *info);
+
+extern wait_queue_head_t backdev_setup_wq;
index af81f099b3b411a38f6536aba1306c44cbe34745..87c6ea2ebd55bf1c5e7a2c48ff58df5a7a451935 100644 (file)
@@ -83,13 +83,12 @@ typedef struct blkif_st {
        grant_handle_t shmem_handle;
        grant_ref_t    shmem_ref;
        
-       int             dev_num;
+       struct tap_blkif *tapif;
        uint64_t        sectors;
 } blkif_t;
 
 blkif_t *tap_alloc_blkif(domid_t domid);
 void tap_blkif_free(blkif_t *blkif);
-void tap_blkif_kmem_cache_free(blkif_t *blkif);
 int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
                  unsigned int evtchn);
 void tap_blkif_unmap(blkif_t *blkif);
@@ -117,7 +116,8 @@ void tap_blkif_xenbus_init(void);
 irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 int tap_blkif_schedule(void *arg);
 
-int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
+struct tap_blkif *associate_blkif(domid_t domid, int xenbus_id,
+                                 blkif_t *blkif);
 void signal_tapdisk(int idx);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
index 9009ba62f2bad302b24e640b40f84a626097ab6c..11f800197a1e7da64b223929759c3e7bbabcb604 100644 (file)
@@ -162,15 +162,8 @@ void tap_blkif_free(blkif_t *blkif)
 {
        atomic_dec(&blkif->refcnt);
        wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
-       atomic_inc(&blkif->refcnt);
 
        tap_blkif_unmap(blkif);
-}
-
-void tap_blkif_kmem_cache_free(blkif_t *blkif)
-{
-       if (!atomic_dec_and_test(&blkif->refcnt))
-               BUG();
        kmem_cache_free(blkif_cachep, blkif);
 }
 
index 840a11c21c1402e670d0f8bd18ef6d7ec76e0acd..3f93a645dfb58ea213a096d3393ac6ede9f6fe80 100644 (file)
@@ -180,9 +180,7 @@ static int blktap_remove(struct xenbus_device *dev)
        if (be->blkif) {
                if (be->blkif->xenblkd)
                        kthread_stop(be->blkif->xenblkd);
-               signal_tapdisk(be->blkif->dev_num);
                tap_blkif_free(be->blkif);
-               tap_blkif_kmem_cache_free(be->blkif);
                be->blkif = NULL;
        }
        kfree(be);
@@ -233,7 +231,6 @@ static int blktap_probe(struct xenbus_device *dev,
                         const struct xenbus_device_id *id)
 {
        int err;
-       char name[TASK_COMM_LEN];
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
        if (!be) {
@@ -258,29 +255,6 @@ static int blktap_probe(struct xenbus_device *dev,
        be->blkif->be = be;
        be->blkif->sectors = 0;
 
-       err = blktap_name(be->blkif, name);
-       if (err) {
-               xenbus_dev_error(be->dev, err, "get blktap dev name");
-               goto fail;
-       }
-       DPRINTK("blktap_probe %d dev %s\n", dev->otherend_id, name);
-
-       be->blkif->xenblkd = kthread_run(tap_blkif_schedule, be->blkif, name);
-       if (IS_ERR(be->blkif->xenblkd)) {
-               err = PTR_ERR(be->blkif->xenblkd);
-               be->blkif->xenblkd = NULL;
-               xenbus_dev_fatal(be->dev, err, "start xenblkd");
-               WPRINTK("Error starting thread\n");
-               goto fail;
-       }
-
-       err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid", "%d",
-                           be->blkif->xenblkd->pid);
-       if (err) {
-               xenbus_dev_error(be->dev, err, "write kthread-pid");
-               return;
-       }
-               
        /* set a watch on disk info, waiting for userspace to update details*/
        err = xenbus_watch_path2(dev, dev->nodename, "info",
                                 &be->backend_watch, tap_backend_changed);
@@ -304,7 +278,7 @@ fail:
  * information in xenstore. 
  */
 static void tap_backend_changed(struct xenbus_watch *watch,
-                           const char **vec, unsigned int len)
+                               const char **vec, unsigned int len)
 {
        int err;
        unsigned long info;
@@ -312,6 +286,8 @@ static void tap_backend_changed(struct xenbus_watch *watch,
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
        
+       DPRINTK("tap_backend_changed %s\n", vec[XS_WATCH_PATH]);
+
        /** 
         * Check to see whether userspace code has opened the image 
         * and written sector
@@ -326,17 +302,47 @@ static void tap_backend_changed(struct xenbus_watch *watch,
                return;
        }
 
-       DPRINTK("Userspace update on disk info, %lu\n",info);
-
        err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", 
                            &be->blkif->sectors, NULL);
 
-       /* Associate tap dev with domid*/
-       be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, 
-                                         be->blkif);
-       DPRINTK("Thread started for domid [%d], connecting disk\n", 
-               be->blkif->dev_num);
+       if (be->blkif->xenblkd == NULL) {
+               char name[TASK_COMM_LEN];
+
+               /* Associate blkif with tap_blkif */
+               be->blkif->tapif = associate_blkif(be->blkif->domid,
+                                                  be->xenbus_id, be->blkif);
+               if (be->blkif->tapif == NULL) {
+                       xenbus_dev_error(be->dev, err, "associate blkif");
+                       return;
+               }
 
+               /* Create name */
+               err = blktap_name(be->blkif, name);
+               if (err) {
+                       xenbus_dev_error(be->dev, err, "get blktap dev name");
+                       return;
+               }
+
+               /* Create kernel thread */
+               be->blkif->xenblkd = kthread_run(tap_blkif_schedule, be->blkif,
+                                                name);
+               if (IS_ERR(be->blkif->xenblkd)) {
+                       err = PTR_ERR(be->blkif->xenblkd);
+                       be->blkif->xenblkd = NULL;
+                       xenbus_dev_fatal(be->dev, err, "start xenblkd");
+                       WPRINTK("Error starting thread\n");
+                       return;
+               }
+
+               err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid",
+                                   "%d", be->blkif->xenblkd->pid);
+               if (err) {
+                       xenbus_dev_error(be->dev, err, "write kthread-pid");
+                       return;
+               }
+
+               DPRINTK("tap_backend_changed created thread %s\n", name);
+       }
        tap_update_blkif_status(be->blkif);
 }
 
@@ -375,11 +381,7 @@ static void tap_frontend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateClosing:
-               if (be->blkif->xenblkd) {
-                       kthread_stop(be->blkif->xenblkd);
-                       be->blkif->xenblkd = NULL;
-               }
-               tap_blkif_free(be->blkif);
+               tap_blkif_unmap(be->blkif);
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
index 61c5d6db594396013b2008847500ca68edb2119e..b5c8fc76fdeb6eaa85e3dbcd6f1a688f3ce488b8 100644 (file)
@@ -171,6 +171,9 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
 #ifdef CONFIG_XEN
 #define VM_FOREIGN     0x04000000      /* Has pages belonging to another VM */
+struct vm_foreign_map {
+       struct page **map;
+};
 #endif
 #define VM_ALWAYSDUMP  0x08000000      /* Always include in core dumps */
 
index 70a7d6cb41ec2e3c338d4d28fbee866bec9a4a39..ff11d46dd98049007c76d4040ff736c39c26714c 100644 (file)
@@ -1082,7 +1082,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 #ifdef CONFIG_XEN
                if (vma && (vma->vm_flags & VM_FOREIGN)) {
-                       struct page **map = vma->vm_private_data;
+                       struct vm_foreign_map *foreign_map =
+                               vma->vm_private_data;
+                       struct page **map = foreign_map->map;
                        int offset = (start - vma->vm_start) >> PAGE_SHIFT;
                        if (map[offset] != NULL) {
                                if (pages) {