From: t_jeang Date: Tue, 6 Jan 2009 12:06:01 +0000 (+0000) Subject: imported patch blktap-shutdown-cleanup X-Git-Tag: sles-bimodal-blkproto-compatibility X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=14f0a6a0845c742ad3959eb55536922d37199890;p=xenclient%2Fkernel.git imported patch blktap-shutdown-cleanup --- diff --git a/drivers/xen/blktap/backdev.c b/drivers/xen/blktap/backdev.c index b98cbedc..1b9bd1be 100644 --- a/drivers/xen/blktap/backdev.c +++ b/drivers/xen/blktap/backdev.c @@ -45,6 +45,8 @@ destroy_backdev(struct tap_blkif *uinfo) { struct backdev_info *info = uinfo->backdev; + info->destroy = 1; + DPRINTK("destroy backdev %d users %d\n", uinfo->minor, info->users); if (info->users) return -EBUSY; @@ -59,9 +61,13 @@ destroy_backdev(struct tap_blkif *uinfo) blk_cleanup_queue(info->gd->queue); + blkif_put(uinfo->blkif); + uinfo->backdev = NULL; kfree(info); + clear_bit(4, &uinfo->dev_inuse); + return 0; } @@ -78,7 +84,7 @@ backdev_release(struct inode *inode, struct file *filep) { struct backdev_info *info = inode->i_bdev->bd_disk->private_data; info->users--; - if (info->uinfo->dev_inuse == 0) + if (info->destroy) destroy_backdev(info->uinfo); return 0; } @@ -191,6 +197,7 @@ static int umap_uaddr_fn( { struct mm_struct *mm = (struct mm_struct *)data; + DPRINTK("unmap_uaddr ptep %p\n", ptep); pte_clear(mm, addr, ptep); xen_invlpg(addr); return 0; @@ -202,10 +209,9 @@ static int umap_uaddr(struct mm_struct *mm, unsigned long address) } static void -process_backdev_request(struct backdev_info *info) +process_backdev_request(struct tap_blkif *uinfo, struct backdev_info *info) { request_queue_t *rq; - struct tap_blkif *uinfo; struct request *req; blkif_request_t blkif_req; blkif_request_t *target; @@ -225,7 +231,6 @@ process_backdev_request(struct backdev_info *info) struct page *pg; int nr_sects = 0; - uinfo = info->uinfo; rq = info->gd->queue; blkif = uinfo->blkif; @@ -242,11 +247,7 @@ process_backdev_request(struct backdev_info *info) end_request(req, 0); continue; } - if (info != req->rq_disk->private_data) { - end_request(req, 0); - continue; - } - if (uinfo->dev_inuse == 0) { + if (info->destroy) { DPRINTK("device no longer in use %d\n", info->uinfo->minor); end_request(req, 0); continue; @@ -259,7 +260,6 @@ process_backdev_request(struct backdev_info *info) break; } - /* Check we have space on user ring - should never fail. */ usr_idx = GET_NEXT_REQ(uinfo->idx_map); if (usr_idx == INVALID_REQ) goto wait; @@ -320,10 +320,8 @@ process_backdev_request(struct backdev_info *info) offset = (uvaddr - uinfo->vma->vm_start) >> PAGE_SHIFT; pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); DPRINTK("mapped uaddr %08lx to page %p\n", uvaddr, pg); - ((struct page **)uinfo->vma->vm_private_data)[offset] = pg; + uinfo->foreign_map.map[offset] = pg; SetPageReserved(pg); - if (0) if (vm_insert_page(uinfo->vma, uvaddr, pg)) - DPRINTK("boohoo\n"); pending_handle(mmap_idx, pending_idx, blkif_req.nr_segments).kernel = INVALID_GRANT_HANDLE; @@ -335,7 +333,6 @@ process_backdev_request(struct backdev_info *info) } } - pending_req->blkif = blkif; pending_req->id = (unsigned long)req; pending_req->operation = blkif_req.operation; pending_req->status = BLKIF_RSP_OKAY; @@ -365,7 +362,7 @@ process_backdev_request(struct backdev_info *info) } if (queued != 0) - blktap_kick_user(blkif->dev_num); + blktap_kick_user(uinfo); return; } @@ -375,12 +372,12 @@ do_backdev_request(request_queue_t *rq) struct backdev_info *info; info = rq->queuedata; - if (info->uinfo->blkif) { + if (info->uinfo) { info->uinfo->blkif->waiting_reqs = 1; wake_up(&info->uinfo->blkif->wq); + DPRINTK("got requests for dev %d wake %p\n", + info->uinfo->minor, info->uinfo->blkif); } - DPRINTK("got requests for dev %d wake %p/%p\n", info->uinfo->minor, - info->uinfo->blkif, &info->uinfo->blkif->wq); } void @@ -438,9 +435,6 @@ backdev_finish_req(struct tap_blkif *info, int usr_idx, blkif_response_t *res, BUG(); } - if (info->blkif) - info->blkif->waiting_reqs = 1; - spin_unlock_irq(&backdev_io_lock); } @@ -449,8 +443,6 @@ backdev_restart_queue(struct tap_blkif *uinfo) { struct backdev_info *info; - if (uinfo == NULL) - return; info = uinfo->backdev; if (info == NULL || info->gd == NULL || info->gd->queue == NULL) return; @@ -461,7 +453,7 @@ backdev_restart_queue(struct tap_blkif *uinfo) if (blk_queue_stopped(info->gd->queue)) blk_start_queue(info->gd->queue); /* Kick things off immediately. */ - process_backdev_request(info); + process_backdev_request(uinfo, info); spin_unlock_irq(&backdev_io_lock); } return; @@ -562,7 +554,12 @@ create_backdev(struct tap_blkif *uinfo) if (err) goto error; + set_bit(4, &uinfo->dev_inuse); + + blkif_get(uinfo->blkif); + uinfo->backdev = info; + wake_up(&backdev_setup_wq); goto out; diff --git a/drivers/xen/blktap/backdev.h b/drivers/xen/blktap/backdev.h index a885b9ed..78f27330 100644 --- a/drivers/xen/blktap/backdev.h +++ b/drivers/xen/blktap/backdev.h @@ -4,6 +4,7 @@ struct pending_req; struct backdev_info { int users; + int destroy; struct gendisk *gd; struct tap_blkif *uinfo; }; diff --git a/drivers/xen/blktap/blktap.c b/drivers/xen/blktap/blktap.c index 1acf5122..4205ecb7 100644 --- a/drivers/xen/blktap/blktap.c +++ b/drivers/xen/blktap/blktap.c @@ -70,7 +70,6 @@ static int mmap_pages = MMAP_PAGES; */ static struct tap_blkif *tapfds[MAX_TAP_DEV]; -static int blktap_next_minor; module_param(blkif_reqs, int, 0); /* Run-time switchable: /sys/module/blktap/parameters/ */ @@ -82,7 +81,8 @@ module_param(debug_lvl, int, 0644); pending_req_t *pending_reqs[MAX_PENDING_REQS]; static struct list_head pending_free; static DEFINE_SPINLOCK(pending_free_lock); -static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); +DECLARE_WAIT_QUEUE_HEAD(backdev_setup_wq); static int alloc_pending_reqs; #define BLKBACK_INVALID_HANDLE (~0) @@ -97,23 +97,8 @@ static unsigned short mmap_inuse = 0; * GRANT HANDLES */ -/* When using grant tables to map a frame for device access then the - * handle returned must be used to unmap the frame. This is needed to - * drop the ref count on the frame. - */ -struct grant_handle_pair -{ - grant_handle_t kernel; - grant_handle_t user; -}; -#define INVALID_GRANT_HANDLE 0xFFFF - -static struct grant_handle_pair +struct grant_handle_pair pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; -#define pending_handle(_id, _idx, _i) \ - (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \ - + (_i)]) - static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/ @@ -135,22 +120,6 @@ static int blktap_major; #define BLKTAP_IOCTL_PRINT_IDXS 100 #define BLKTAP_IOCTL_BACKDEV_SETUP 200 -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ -#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ -#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 -#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ - -#define BLKTAP_MODE_INTERPOSE \ - (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) - - -static inline int BLKTAP_MODE_VALID(unsigned long arg) -{ - return ((arg == BLKTAP_MODE_PASSTHROUGH ) || - (arg == BLKTAP_MODE_INTERCEPT_FE) || - (arg == BLKTAP_MODE_INTERPOSE )); -} - static inline int OFFSET_TO_USR_IDX(int offset) { return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -194,42 +163,43 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma, pte_t copy; tap_blkif_t *info; int offset, seg, usr_idx, pending_idx, mmap_idx; - unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT); unsigned long kvaddr; + struct vm_foreign_map *foreign_map; struct page **map; - struct page *pg; struct grant_handle_pair *khandle; struct gnttab_unmap_grant_ref unmap[2]; int count = 0; + info = vma->vm_private_data; + /* - * If the address is before the start of the grant mapped region or - * if vm_file is NULL (meaning mmap failed and we have nothing to do) + * Zap entry if the address is before the start of the grant + * mapped region. */ - if (uvaddr < uvstart || vma->vm_file == NULL) - return ptep_get_and_clear_full(vma->vm_mm, uvaddr, + if (uvaddr < info->user_vstart) + return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, is_fullmm); - info = vma->vm_file->private_data; - map = vma->vm_private_data; + foreign_map = vma->vm_private_data; + map = foreign_map->map; /* TODO Should these be changed to if statements? */ BUG_ON(!info); BUG_ON(!info->idx_map); BUG_ON(!map); - offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT); + offset = (int)((uvaddr - info->user_vstart) >> PAGE_SHIFT); usr_idx = OFFSET_TO_USR_IDX(offset); seg = OFFSET_TO_SEG(offset); pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); - kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg); - pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - ClearPageReserved(pg); - map[offset + RING_PAGES] = NULL; + offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT); + ClearPageReserved(map[offset]); + map[offset] = NULL; + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg); khandle = &pending_handle(mmap_idx, pending_idx, seg); if (khandle->kernel != INVALID_GRANT_HANDLE) { @@ -252,9 +222,6 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma, khandle->user); count++; } else { - BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap)); - - /* USING SHADOW PAGE TABLES. */ copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, is_fullmm); } @@ -269,7 +236,32 @@ static pte_t blktap_clear_pte(struct vm_area_struct *vma, return copy; } +static void blktap_vm_close(struct vm_area_struct *vma) +{ + struct tap_blkif *info = vma->vm_private_data; + + down_write(&info->vm_update_sem); + + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); + + kfree(info->foreign_map.map); + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(info->ufe_ring.sring)); + free_page((unsigned long)info->ufe_ring.sring); + + kfree(info->idx_map); + info->idx_map = NULL; + + info->vma = NULL; + clear_bit(2, &info->dev_inuse); + + up_write(&info->vm_update_sem); +} + +static struct vm_operations_struct blktap_vm_ops = { + close: blktap_vm_close, nopage: blktap_nopage, zap_pte: blktap_clear_pte, }; @@ -313,56 +305,43 @@ static tap_blkif_t *get_next_free_dev(void) /* tapfds[0] is always NULL */ - for (minor = 1; minor < blktap_next_minor; minor++) { + for (minor = 1; minor < MAX_TAP_DEV; minor++) { info = tapfds[minor]; /* we could have failed a previous attempt. */ - if (!info || - ((info->dev_inuse == 0) && - (info->backdev == NULL) && - (info->dev_pending == 0)) ) { - info->dev_pending = 1; + if (info == NULL || + (info->dev_inuse == 0 && + !test_and_set_bit(0, &info->dev_inuse))) goto found; - } } info = NULL; - minor = -1; - /* - * We didn't find free device. If we can still allocate - * more, then we grab the next device minor that is - * available. This is done while we are still under - * the protection of the pending_free_lock. - */ - if (blktap_next_minor < MAX_TAP_DEV) - minor = blktap_next_minor++; found: spin_unlock_irq(&pending_free_lock); - if (!info && minor > 0) { + if (info == NULL) { info = kzalloc(sizeof(*info), GFP_KERNEL); - if (unlikely(!info)) { - /* - * If we failed here, try to put back - * the next minor number. But if one - * was just taken, then we just lose this - * minor. We can try to allocate this - * minor again later. - */ - spin_lock_irq(&pending_free_lock); - if (blktap_next_minor == minor+1) - blktap_next_minor--; + if (unlikely(!info)) + goto out; + + init_rwsem(&info->vm_update_sem); + set_bit(0, &info->dev_inuse); + + spin_lock_irq(&pending_free_lock); + for (; minor < MAX_TAP_DEV; minor++) + if (tapfds[minor] == NULL) + break; + if (minor == MAX_TAP_DEV) { + kfree(info); + info = NULL; spin_unlock_irq(&pending_free_lock); goto out; } info->minor = minor; - /* - * Make sure that we have a minor before others can - * see us. - */ - wmb(); tapfds[minor] = info; + spin_unlock_irq(&pending_free_lock); + if ((class = get_xen_class()) != NULL) class_device_create(class, NULL, MKDEV(blktap_major, minor), NULL, @@ -373,152 +352,72 @@ out: return info; } -int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +struct tap_blkif * +associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif) { tap_blkif_t *info; int i; - for (i = 1; i < blktap_next_minor; i++) { + for (i = 1; i < MAX_TAP_DEV; i++) { info = tapfds[i]; - if ( info && - (info->trans.domid == domid) && - (info->trans.busid == xenbus_id) ) { + if (info && info->trans.domid == domid && + info->trans.busid == xenbus_id) { info->blkif = blkif; info->status = RUNNING; - return i; + return info; } } - return -1; -} - -void signal_tapdisk(int idx) -{ - tap_blkif_t *info; - struct task_struct *ptask; - - info = tapfds[idx]; - if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) - return; - - if (info->pid > 0) { - ptask = find_task_by_pid(info->pid); - if (ptask) - info->status = CLEANSHUTDOWN; - } - info->blkif = NULL; - - return; + return NULL; } static int blktap_open(struct inode *inode, struct file *filp) { - blkif_sring_t *sring; int idx = iminor(inode) - BLKTAP_MINOR; tap_blkif_t *info; - int i; /* ctrl device, treat differently */ - if (!idx) + if (idx == 0) return 0; - info = tapfds[idx]; - - if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { + if (idx < 1 || idx > MAX_TAP_DEV || tapfds[idx] == NULL) { WPRINTK("Unable to open device /dev/xen/blktap%d\n", idx); return -ENODEV; } - DPRINTK("Opening device /dev/xen/blktap%d\n",idx); - - /*Only one process can access device at a time*/ - if (test_and_set_bit(0, &info->dev_inuse)) + info = tapfds[idx]; + + DPRINTK("Opening device /dev/xen/blktap%d\n", idx); + + /* Only one process can access device at a time */ + if (test_and_set_bit(1, &info->dev_inuse)) return -EBUSY; - info->dev_pending = 0; - - /* Allocate the fe ring. */ - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); - if (sring == NULL) - goto fail_nomem; + clear_bit(0, &info->dev_inuse); - SetPageReserved(virt_to_page(sring)); - - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); - filp->private_data = info; - info->vma = NULL; - info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, - GFP_KERNEL); - - if (info->idx_map == NULL) - goto fail_nomem; + DPRINTK("Tap open: device /dev/xen/blktap%d\n", idx); - if (idx > 0) { - init_waitqueue_head(&info->wait); - for (i = 0; i < MAX_PENDING_REQS; i++) - info->idx_map[i] = INVALID_REQ; - } - - DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx); return 0; - - fail_nomem: - return -ENOMEM; } static int blktap_release(struct inode *inode, struct file *filp) { tap_blkif_t *info = filp->private_data; - int ret; /* check for control device */ - if (!info) + if (info == NULL) return 0; - info->dev_inuse = 0; DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor); - - /* Free the ring page. */ - ClearPageReserved(virt_to_page(info->ufe_ring.sring)); - free_page((unsigned long) info->ufe_ring.sring); - - /* Clear any active mappings and free foreign map table */ - if (info->vma) { - zap_page_range( - info->vma, info->vma->vm_start, - info->vma->vm_end - info->vma->vm_start, NULL); - - kfree(info->vma->vm_private_data); - - info->vma = NULL; - } - - if (info->idx_map) { - kfree(info->idx_map); - info->idx_map = NULL; - } - - if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) { - if (info->blkif->xenblkd != NULL) { - kthread_stop(info->blkif->xenblkd); - info->blkif->xenblkd = NULL; - } - info->status = CLEANSHUTDOWN; - } - - ret = destroy_backdev(info); - if (ret && ret != -EBUSY) - WPRINTK("destroy_backdev failed %d\n", ret); + clear_bit(1, &info->dev_inuse); filp->private_data = NULL; return 0; } - /* Note on mmap: * We need to map pages to user space in a way that will allow the block * subsystem set up direct IO to them. This couldn't be done before, because @@ -539,28 +438,52 @@ static int blktap_release(struct inode *inode, struct file *filp) */ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) { + tap_blkif_t *info = filp->private_data; + blkif_sring_t *sring = NULL; + struct page **map = NULL; int size; - struct page **map; int i; - tap_blkif_t *info = filp->private_data; int ret; - if (info == NULL) { - WPRINTK("blktap: mmap, retrieving idx failed\n"); + if (info == NULL || test_and_set_bit(2, &info->dev_inuse)) return -ENOMEM; - } - - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &blktap_vm_ops; - size = vma->vm_end - vma->vm_start; - if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) { + size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + if (size != (mmap_pages + RING_PAGES)) { WPRINTK("you _must_ map exactly %d pages!\n", - mmap_pages + RING_PAGES); + mmap_pages + RING_PAGES); return -EAGAIN; } - size >>= PAGE_SHIFT; + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) { + WPRINTK("Couldn't alloc sring.\n"); + goto fail_mem; + } + info->idx_map = kmalloc(sizeof(unsigned long) * + MAX_PENDING_REQS, GFP_KERNEL); + if (info->idx_map == NULL) { + WPRINTK("Couldn't alloc idx_map.\n"); + goto fail_mem; + } + map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + * sizeof(struct page *), + GFP_KERNEL); + if (map == NULL) { + WPRINTK("Couldn't alloc VM_FOREIGN map.\n"); + goto fail_mem; + } + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE); + + init_waitqueue_head(&info->wait); + for (i = 0; i < MAX_PENDING_REQS; i++) + info->idx_map[i] = INVALID_REQ; + info->rings_vstart = vma->vm_start; info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); @@ -573,42 +496,39 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) __pa(info->ufe_ring.sring) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); if (ret) { - WPRINTK("Mapping user ring failed!\n"); + WPRINTK("Mapping user ring failed.\n"); goto fail; } /* Mark this VM as containing foreign pages, and set up mappings. */ - map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - * sizeof(struct page *), - GFP_KERNEL); - if (map == NULL) { - WPRINTK("Couldn't alloc VM_FOREIGN map.\n"); - goto fail; - } - - for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) - map[i] = NULL; - - vma->vm_private_data = map; + info->foreign_map.map = map; + vma->vm_private_data = &info->foreign_map; vma->vm_flags |= VM_FOREIGN; vma->vm_flags |= VM_DONTCOPY; + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &blktap_vm_ops; #ifdef CONFIG_X86 vma->vm_mm->context.has_foreign_mappings = 1; #endif info->vma = vma; - info->ring_ok = 1; return 0; + fail: /* Clear any active mappings. */ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); + ClearPageReserved(virt_to_page(sring)); + fail_mem: + free_page((unsigned long)sring); + kfree(info->idx_map); + info->idx_map = NULL; + kfree(map); return -ENOMEM; } - static int blktap_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -616,25 +536,17 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, switch(cmd) { case BLKTAP_IOCTL_KICK_FE: - { + if (info == NULL) + return -ENOENT; + /* There are fe messages to process. */ return blktap_read_ufe_ring(info); - } + case BLKTAP_IOCTL_SETMODE: - { - if (info) { - if (BLKTAP_MODE_VALID(arg)) { - info->mode = arg; - /* XXX: may need to flush rings here. */ - DPRINTK("blktap: set mode to %lx\n", - arg); - return 0; - } - } + /* deprecated */ return 0; - } + case BLKTAP_IOCTL_PRINT_IDXS: - { if (info) { printk("User Rings: \n-----------\n"); printk("UF: rsp_cons: %2d, req_prod_prv: %2d " @@ -645,16 +557,15 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, info->ufe_ring.sring->rsp_prod); } return 0; - } + case BLKTAP_IOCTL_SENDPID: - { if (info) { info->pid = (pid_t)arg; DPRINTK("blktap: pid received %d\n", info->pid); } return 0; - } + case BLKTAP_IOCTL_NEWINTF: { uint64_t val = (uint64_t)arg; @@ -672,55 +583,57 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, info->trans.busid = tr->busid; return info->minor; } + case BLKTAP_IOCTL_FREEINTF: { unsigned long dev = arg; - unsigned long flags; DPRINTK("FREEINTF Req for dev %ld\n", dev); - info = tapfds[dev]; - - if ((dev > MAX_TAP_DEV) || !info) - return 0; /* should this be an error? */ + if (dev > MAX_TAP_DEV || tapfds[dev] == NULL) + return -EINVAL; - spin_lock_irqsave(&pending_free_lock, flags); - if (info->dev_pending) - info->dev_pending = 0; - spin_unlock_irqrestore(&pending_free_lock, flags); + clear_bit(0, &tapfds[dev]->dev_inuse); return 0; } + case BLKTAP_IOCTL_MINOR: { unsigned long dev = arg; - info = tapfds[dev]; - - if ((dev > MAX_TAP_DEV) || !info) + if (dev > MAX_TAP_DEV || tapfds[dev] == NULL) return -EINVAL; - return info->minor; + return tapfds[dev]->minor; } + case BLKTAP_IOCTL_MAJOR: return blktap_major; case BLKTAP_QUERY_ALLOC_REQS: - { WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n", alloc_pending_reqs, blkif_reqs); return (alloc_pending_reqs/blkif_reqs) * 100; - } case BLKTAP_IOCTL_BACKDEV_SETUP: { unsigned long dev = arg; + int ret; DPRINTK("BLKTAP_IOCTL_BACKDEV_SETUP ioctl: %ld\n", dev); - info = tapfds[dev]; + if (dev > MAX_TAP_DEV || tapfds[dev] == NULL) + return -EINVAL; - return create_backdev(info); + while (tapfds[dev]->backdev == NULL) { + ret = wait_event_interruptible(backdev_setup_wq, + tapfds[dev]->backdev); + if (ret) + return ret; + } + return 0; } } + return -ENOIOCTLCMD; } @@ -729,7 +642,7 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait) tap_blkif_t *info = filp->private_data; /* do not work on the control device */ - if (!info) + if (info == NULL) return 0; poll_wait(filp, &info->wait, wait); @@ -740,24 +653,17 @@ static unsigned int blktap_poll(struct file *filp, poll_table *wait) return 0; } -void blktap_kick_user(int idx) +void blktap_kick_user(struct tap_blkif *info) { - tap_blkif_t *info; - - info = tapfds[idx]; - - if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) - return; wake_up_interruptible(&info->wait); - - return; } -static int do_block_io_op(blkif_t *blkif); +static int do_block_io_op(tap_blkif_t *info); static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, - pending_req_t *pending_req); + pending_req_t *pending_req, + int usr_idx); static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); @@ -864,60 +770,50 @@ void free_req(pending_req_t *req) wake_up(&pending_free_wq); } -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, - int tapidx) +#define DO_FAST_FLUSH +static void fast_flush_area(pending_req_t *req, int pending_idx, int usr_idx, + tap_blkif_t *info) { +#ifdef DO_FAST_FLUSH struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int i, invcount = 0; struct grant_handle_pair *khandle; uint64_t ptep; int ret, mmap_idx; unsigned long kvaddr, uvaddr; - tap_blkif_t *info; - - - info = tapfds[tapidx]; - - if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { - WPRINTK("fast_flush: Couldn't get info!\n"); - return; - } + struct page **map; + int offset; +#endif - if (info->vma != NULL && - xen_feature(XENFEAT_auto_translated_physmap)) { - down_write(&info->vma->vm_mm->mmap_sem); + if (xen_feature(XENFEAT_auto_translated_physmap)) zap_page_range(info->vma, - MMAP_VADDR(info->user_vstart, u_idx, 0), + MMAP_VADDR(info->user_vstart, usr_idx, 0), req->nr_pages << PAGE_SHIFT, NULL); - up_write(&info->vma->vm_mm->mmap_sem); - return; - } +#ifdef DO_FAST_FLUSH mmap_idx = req->mem_idx; + map = info->foreign_map.map; + for (i = 0; i < req->nr_pages; i++) { - kvaddr = idx_to_kaddr(mmap_idx, k_idx, i); - uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); + kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); + uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i); - khandle = &pending_handle(mmap_idx, k_idx, i); + khandle = &pending_handle(mmap_idx, pending_idx, i); if (khandle->kernel != INVALID_GRANT_HANDLE) { - gnttab_set_unmap_op(&unmap[invcount], - idx_to_kaddr(mmap_idx, k_idx, i), + gnttab_set_unmap_op(&unmap[invcount], kvaddr, GNTMAP_host_map, khandle->kernel); invcount++; - set_phys_to_machine( - __pa(idx_to_kaddr(mmap_idx, k_idx, i)) - >> PAGE_SHIFT, INVALID_P2M_ENTRY); + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, + INVALID_P2M_ENTRY); } if (khandle->user != INVALID_GRANT_HANDLE) { BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); - if (create_lookup_pte_addr( - info->vma->vm_mm, - MMAP_VADDR(info->user_vstart, u_idx, i), - &ptep) !=0) { + if (create_lookup_pte_addr(info->vma->vm_mm, + uvaddr, &ptep) != 0) { WPRINTK("Couldn't get a pte addr!\n"); return; } @@ -930,15 +826,20 @@ static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, invcount++; } + offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; + ClearPageReserved(map[offset]); + map[offset] = NULL; + BLKTAP_INVALIDATE_HANDLE(khandle); } - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, + invcount); BUG_ON(ret); - - if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) +#endif + + if (!xen_feature(XENFEAT_auto_translated_physmap)) zap_page_range(info->vma, - MMAP_VADDR(info->user_vstart, u_idx, 0), + MMAP_VADDR(info->user_vstart, usr_idx, 0), req->nr_pages << PAGE_SHIFT, NULL); } @@ -960,13 +861,20 @@ static void print_stats(blkif_t *blkif) int tap_blkif_schedule(void *arg) { blkif_t *blkif = arg; + struct tap_blkif *info; blkif_get(blkif); + info = blkif->tapif; + + set_bit(3, &info->dev_inuse); + + create_backdev(info); + if (debug_lvl) printk(KERN_DEBUG "%s: started\n", current->comm); - while (!kthread_should_stop()) { + while (1) { if (try_to_freeze()) continue; @@ -977,13 +885,24 @@ int tap_blkif_schedule(void *arg) pending_free_wq, !list_empty(&pending_free) || kthread_should_stop()); + if (kthread_should_stop()) + break; + blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + down_read(&info->vm_update_sem); + if (info->vma == NULL) { + up_read(&info->vm_update_sem); + break; + } + + if (do_block_io_op(info)) blkif->waiting_reqs = 1; else - backdev_restart_queue(tapfds[blkif->dev_num]); + backdev_restart_queue(info); + + up_read(&info->vm_update_sem); if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); @@ -994,9 +913,13 @@ int tap_blkif_schedule(void *arg) if (debug_lvl) printk(KERN_DEBUG "%s: exiting\n", current->comm); + destroy_backdev(info); + blkif->xenblkd = NULL; blkif_put(blkif); + clear_bit(3, &info->dev_inuse); + return 0; } @@ -1007,33 +930,32 @@ int tap_blkif_schedule(void *arg) static int blktap_read_ufe_ring(tap_blkif_t *info) { /* This is called to read responses from the UFE ring. */ - RING_IDX i, j, rp; - blkif_response_t *resp; - blkif_t *blkif=NULL; + RING_IDX rc, rp; int pending_idx, usr_idx, mmap_idx; + blkif_response_t res; pending_req_t *pending_req; - struct page **map; + blkif_t *blkif = info->blkif; - if (!info) - return 0; - - /* We currently only forward packets in INTERCEPT_FE mode. */ - if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE)) + down_read(&info->vm_update_sem); + if (info->vma == NULL) { + up_read(&info->vm_update_sem); return 0; + } /* for each outstanding message on the UFEring */ rp = info->ufe_ring.sring->rsp_prod; rmb(); - - for (i = info->ufe_ring.rsp_cons; i != rp; i++) { - blkif_response_t res; - resp = RING_GET_RESPONSE(&info->ufe_ring, i); - memcpy(&res, resp, sizeof(res)); + + for (rc = info->ufe_ring.rsp_cons; rc != rp; rc++) { + memcpy(&res, RING_GET_RESPONSE(&info->ufe_ring, rc), + sizeof(res)); mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */ ++info->ufe_ring.rsp_cons; /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/ usr_idx = (int)res.id; + DPRINTK("response %d id %x idx_map %p\n", rc, usr_idx, + info->idx_map); pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx])); mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]); @@ -1047,35 +969,27 @@ static int blktap_read_ufe_ring(tap_blkif_t *info) ID_TO_IDX(info->idx_map[usr_idx]))); pending_req = &pending_reqs[mmap_idx][pending_idx]; - blkif = pending_req->blkif; - map = info->vma->vm_private_data; - - for (j = 0; j < pending_req->nr_pages; j++) { - unsigned long uvaddr; - int offset; - uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); - - offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; - ClearPageReserved(map[offset]); - map[offset] = NULL; - } if (pending_req->inuse == 2) backdev_finish_req(info, usr_idx, &res, pending_req); else { fast_flush_area(pending_req, pending_idx, usr_idx, - info->minor); + info); info->idx_map[usr_idx] = INVALID_REQ; - make_response(blkif, pending_req->id, res.operation, - res.status); + make_response(blkif, pending_req->id, + res.operation, res.status); } - blkif_put(pending_req->blkif); + blkif->waiting_reqs = 1; + + blkif_put(blkif); free_req(pending_req); } - - if (info->blkif && info->blkif->waiting_reqs) - wake_up(&info->blkif->wq); + + up_read(&info->vm_update_sem); + + if (blkif->waiting_reqs) + wake_up(&blkif->wq); return 0; } @@ -1102,15 +1016,15 @@ irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) /****************************************************************** * DOWNWARD CALLS -- These interface with the block-device layer proper. */ -static int print_dbug = 1; -static int do_block_io_op(blkif_t *blkif) +static int do_block_io_op(tap_blkif_t *info) { + blkif_t *blkif = info->blkif; blkif_back_rings_t *blk_rings = &blkif->blk_rings; blkif_request_t req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; - tap_blkif_t *info; + int usr_idx; if (!tap_blkif_connected(blkif)) return 0; @@ -1119,41 +1033,18 @@ static int do_block_io_op(blkif_t *blkif) rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ - /*Check blkif has corresponding UE ring*/ - if (blkif->dev_num < 0) { - /*oops*/ - if (print_dbug) { - WPRINTK("Corresponding UE " - "ring does not exist!\n"); - print_dbug = 0; /*We only print this message once*/ - } - return 0; - } - - info = tapfds[blkif->dev_num]; - - if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) { - if (print_dbug) { - WPRINTK("Can't get UE info!\n"); - print_dbug = 0; - } - return 0; - } - while (rc != rp) { - + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + if (RING_FULL(&info->ufe_ring)) { WPRINTK("RING_FULL! More to do\n"); - more_to_do = 1; break; } - if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) { - WPRINTK("RING_REQUEST_CONS_OVERFLOW!" - " More to do\n"); - more_to_do = 1; - break; - } + usr_idx = GET_NEXT_REQ(info->idx_map); + if (usr_idx == INVALID_REQ) + break; pending_req = alloc_req(); if (NULL == pending_req) { @@ -1189,12 +1080,14 @@ static int do_block_io_op(blkif_t *blkif) switch (req.operation) { case BLKIF_OP_READ: blkif->st_rd_req++; - dispatch_rw_block_io(blkif, &req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req, + usr_idx); break; case BLKIF_OP_WRITE: blkif->st_wr_req++; - dispatch_rw_block_io(blkif, &req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req, + usr_idx); break; default: @@ -1212,15 +1105,16 @@ static int do_block_io_op(blkif_t *blkif) /* Yield point for this unbounded loop. */ cond_resched(); } - - blktap_kick_user(blkif->dev_num); + + blktap_kick_user(info); return more_to_do; } static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, - pending_req_t *pending_req) + pending_req_t *pending_req, + int usr_idx) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; @@ -1230,46 +1124,18 @@ static void dispatch_rw_block_io(blkif_t *blkif, tap_blkif_t *info; blkif_request_t *target; int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); - int usr_idx; uint16_t mmap_idx = pending_req->mem_idx; - if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV) - goto fail_response; - - info = tapfds[blkif->dev_num]; - if (info == NULL) - goto fail_response; - - /* Check we have space on user ring - should never fail. */ - usr_idx = GET_NEXT_REQ(info->idx_map); - if (usr_idx == INVALID_REQ) { - BUG(); - goto fail_response; - } + info = blkif->tapif; /* Check that number of segments is sane. */ nseg = req->nr_segments; - if ( unlikely(nseg == 0) || - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) { + if (unlikely(nseg == 0) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { WPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; } - /* Make sure userspace is ready. */ - if (!info->ring_ok) { - WPRINTK("blktap: ring not ready for requests!\n"); - goto fail_response; - } - - if (RING_FULL(&info->ufe_ring)) { - WPRINTK("blktap: fe_ring is full, can't add " - "IO Request will be dropped. %d %d\n", - RING_SIZE(&info->ufe_ring), - RING_SIZE(&blkif->blk_rings.common)); - goto fail_response; - } - - pending_req->blkif = blkif; pending_req->id = req->id; pending_req->operation = operation; pending_req->status = BLKIF_RSP_OKAY; @@ -1340,10 +1206,10 @@ static void dispatch_rw_block_io(blkif_t *blkif, map[i+1].handle = INVALID_GRANT_HANDLE; } - pending_handle(mmap_idx, pending_idx, i/2).kernel - = map[i].handle; - pending_handle(mmap_idx, pending_idx, i/2).user - = map[i+1].handle; + pending_handle(mmap_idx, pending_idx, i/2).kernel = + map[i].handle; + pending_handle(mmap_idx, pending_idx, i/2).user = + map[i+1].handle; if (ret) continue; @@ -1353,8 +1219,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, >> PAGE_SHIFT)); offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - ((struct page **)info->vma->vm_private_data)[offset] = - pg; + SetPageReserved(pg); + info->foreign_map.map[offset] = pg; } } else { for (i = 0; i < nseg; i++) { @@ -1373,45 +1239,25 @@ static void dispatch_rw_block_io(blkif_t *blkif, map[i].handle = INVALID_GRANT_HANDLE; } - pending_handle(mmap_idx, pending_idx, i).kernel - = map[i].handle; + pending_handle(mmap_idx, pending_idx, i).kernel = + map[i].handle; if (ret) continue; offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - ((struct page **)info->vma->vm_private_data)[offset] = - pg; + info->foreign_map.map[offset] = pg; + SetPageReserved(pg); + + if (vm_insert_page(info->vma, uvaddr, pg)) + ret |= 1; } } if (ret) goto fail_flush; - if (xen_feature(XENFEAT_auto_translated_physmap)) - down_write(&info->vma->vm_mm->mmap_sem); - /* Mark mapped pages as reserved: */ - for (i = 0; i < req->nr_segments; i++) { - unsigned long kvaddr; - struct page *pg; - - kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i); - pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - SetPageReserved(pg); - if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = vm_insert_page(info->vma, - MMAP_VADDR(info->user_vstart, - usr_idx, i), pg); - if (ret) { - up_write(&info->vma->vm_mm->mmap_sem); - goto fail_flush; - } - } - } - if (xen_feature(XENFEAT_auto_translated_physmap)) - up_write(&info->vma->vm_mm->mmap_sem); - /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx); @@ -1431,15 +1277,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, return; + fail_flush: WPRINTK("Reached Fail_flush\n"); - fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num); + fast_flush_area(pending_req, pending_idx, usr_idx, info); fail_response: make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ -} - +} /****************************************************************** @@ -1511,13 +1357,11 @@ static int __init blkif_init(void) return -ENODEV; INIT_LIST_HEAD(&pending_free); - for(i = 0; i < 2; i++) { + for (i = 0; i < 2; i++) { ret = req_increase(); if (ret) - break; + return ret; } - if (i == 0) - return ret; tap_blkif_interface_init(); @@ -1533,7 +1377,6 @@ static int __init blkif_init(void) /* Dynamically allocate a major for this device */ ret = register_chrdev(0, "blktap", &blktap_fops); - if (ret < 0) { WPRINTK("Couldn't register /dev/xen/blktap\n"); return -ENOMEM; @@ -1541,10 +1384,7 @@ static int __init blkif_init(void) blktap_major = ret; - /* tapfds[0] is always NULL */ - blktap_next_minor++; - - DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + DPRINTK("Created misc_dev [/dev/xen/blktap0]\n"); /* Make sure the xen class exists */ if ((class = get_xen_class()) != NULL) { diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h index 7ecb4570..e5805ffe 100644 --- a/drivers/xen/blktap/blktap.h +++ b/drivers/xen/blktap/blktap.h @@ -19,6 +19,22 @@ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ ((_seg) * PAGE_SIZE)) +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + grant_handle_t kernel; + grant_handle_t user; +}; +#define INVALID_GRANT_HANDLE 0xFFFF + +extern struct grant_handle_pair pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES]; +#define pending_handle(_id, _idx, _i) \ + (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \ + + (_i)]) + /*Data struct handed back to userspace for tapdisk device to VBD mapping*/ typedef struct domid_translate { unsigned short domid; @@ -27,6 +43,8 @@ typedef struct domid_translate { /*Data struct associated with each of the tapdisk devices*/ typedef struct tap_blkif { + struct vm_foreign_map foreign_map; + struct rw_semaphore vm_update_sem; struct vm_area_struct *vma; /*Shared memory area */ unsigned long rings_vstart; /*Kernel memory mapping */ unsigned long user_vstart; /*User memory mapping */ @@ -35,7 +53,6 @@ typedef struct tap_blkif { unsigned long ring_ok; /*make this ring->state */ blkif_front_ring_t ufe_ring; /*Rings up to user space. */ wait_queue_head_t wait; /*for poll */ - unsigned long mode; /*current switching mode */ int minor; /*Minor number for tapdisk device */ pid_t pid; /*tapdisk process id */ enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace @@ -54,7 +71,6 @@ typedef struct tap_blkif { * response queued for it, with the saved 'id' passed back. */ typedef struct pending_req { - blkif_t *blkif; u64 id; unsigned short mem_idx; int nr_pages; @@ -124,4 +140,6 @@ static inline int GET_NEXT_REQ(unsigned long *idx_map) pending_req_t* alloc_req(void); void free_req(pending_req_t *req); -void blktap_kick_user(int idx); +void blktap_kick_user(struct tap_blkif *info); + +extern wait_queue_head_t backdev_setup_wq; diff --git a/drivers/xen/blktap/common.h b/drivers/xen/blktap/common.h index af81f099..87c6ea2e 100644 --- a/drivers/xen/blktap/common.h +++ b/drivers/xen/blktap/common.h @@ -83,13 +83,12 @@ typedef struct blkif_st { grant_handle_t shmem_handle; grant_ref_t shmem_ref; - int dev_num; + struct tap_blkif *tapif; uint64_t sectors; } blkif_t; blkif_t *tap_alloc_blkif(domid_t domid); void tap_blkif_free(blkif_t *blkif); -void tap_blkif_kmem_cache_free(blkif_t *blkif); int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); void tap_blkif_unmap(blkif_t *blkif); @@ -117,7 +116,8 @@ void tap_blkif_xenbus_init(void); irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); int tap_blkif_schedule(void *arg); -int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); +struct tap_blkif *associate_blkif(domid_t domid, int xenbus_id, + blkif_t *blkif); void signal_tapdisk(int idx); #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blktap/interface.c b/drivers/xen/blktap/interface.c index 9009ba62..11f80019 100644 --- a/drivers/xen/blktap/interface.c +++ b/drivers/xen/blktap/interface.c @@ -162,15 +162,8 @@ void tap_blkif_free(blkif_t *blkif) { atomic_dec(&blkif->refcnt); wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - atomic_inc(&blkif->refcnt); tap_blkif_unmap(blkif); -} - -void tap_blkif_kmem_cache_free(blkif_t *blkif) -{ - if (!atomic_dec_and_test(&blkif->refcnt)) - BUG(); kmem_cache_free(blkif_cachep, blkif); } diff --git a/drivers/xen/blktap/xenbus.c b/drivers/xen/blktap/xenbus.c index 840a11c2..3f93a645 100644 --- a/drivers/xen/blktap/xenbus.c +++ b/drivers/xen/blktap/xenbus.c @@ -180,9 +180,7 @@ static int blktap_remove(struct xenbus_device *dev) if (be->blkif) { if (be->blkif->xenblkd) kthread_stop(be->blkif->xenblkd); - signal_tapdisk(be->blkif->dev_num); tap_blkif_free(be->blkif); - tap_blkif_kmem_cache_free(be->blkif); be->blkif = NULL; } kfree(be); @@ -233,7 +231,6 @@ static int blktap_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { int err; - char name[TASK_COMM_LEN]; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); if (!be) { @@ -258,29 +255,6 @@ static int blktap_probe(struct xenbus_device *dev, be->blkif->be = be; be->blkif->sectors = 0; - err = blktap_name(be->blkif, name); - if (err) { - xenbus_dev_error(be->dev, err, "get blktap dev name"); - goto fail; - } - DPRINTK("blktap_probe %d dev %s\n", dev->otherend_id, name); - - be->blkif->xenblkd = kthread_run(tap_blkif_schedule, be->blkif, name); - if (IS_ERR(be->blkif->xenblkd)) { - err = PTR_ERR(be->blkif->xenblkd); - be->blkif->xenblkd = NULL; - xenbus_dev_fatal(be->dev, err, "start xenblkd"); - WPRINTK("Error starting thread\n"); - goto fail; - } - - err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid", "%d", - be->blkif->xenblkd->pid); - if (err) { - xenbus_dev_error(be->dev, err, "write kthread-pid"); - return; - } - /* set a watch on disk info, waiting for userspace to update details*/ err = xenbus_watch_path2(dev, dev->nodename, "info", &be->backend_watch, tap_backend_changed); @@ -304,7 +278,7 @@ fail: * information in xenstore. */ static void tap_backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) + const char **vec, unsigned int len) { int err; unsigned long info; @@ -312,6 +286,8 @@ static void tap_backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; + DPRINTK("tap_backend_changed %s\n", vec[XS_WATCH_PATH]); + /** * Check to see whether userspace code has opened the image * and written sector @@ -326,17 +302,47 @@ static void tap_backend_changed(struct xenbus_watch *watch, return; } - DPRINTK("Userspace update on disk info, %lu\n",info); - err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", &be->blkif->sectors, NULL); - /* Associate tap dev with domid*/ - be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, - be->blkif); - DPRINTK("Thread started for domid [%d], connecting disk\n", - be->blkif->dev_num); + if (be->blkif->xenblkd == NULL) { + char name[TASK_COMM_LEN]; + + /* Associate blkif with tap_blkif */ + be->blkif->tapif = associate_blkif(be->blkif->domid, + be->xenbus_id, be->blkif); + if (be->blkif->tapif == NULL) { + xenbus_dev_error(be->dev, err, "associate blkif"); + return; + } + /* Create name */ + err = blktap_name(be->blkif, name); + if (err) { + xenbus_dev_error(be->dev, err, "get blktap dev name"); + return; + } + + /* Create kernel thread */ + be->blkif->xenblkd = kthread_run(tap_blkif_schedule, be->blkif, + name); + if (IS_ERR(be->blkif->xenblkd)) { + err = PTR_ERR(be->blkif->xenblkd); + be->blkif->xenblkd = NULL; + xenbus_dev_fatal(be->dev, err, "start xenblkd"); + WPRINTK("Error starting thread\n"); + return; + } + + err = xenbus_printf(XBT_NIL, dev->nodename, "kthread-pid", + "%d", be->blkif->xenblkd->pid); + if (err) { + xenbus_dev_error(be->dev, err, "write kthread-pid"); + return; + } + + DPRINTK("tap_backend_changed created thread %s\n", name); + } tap_update_blkif_status(be->blkif); } @@ -375,11 +381,7 @@ static void tap_frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - if (be->blkif->xenblkd) { - kthread_stop(be->blkif->xenblkd); - be->blkif->xenblkd = NULL; - } - tap_blkif_free(be->blkif); + tap_blkif_unmap(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; diff --git a/include/linux/mm.h b/include/linux/mm.h index 61c5d6db..b5c8fc76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -171,6 +171,9 @@ extern unsigned int kobjsize(const void *objp); #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #ifdef CONFIG_XEN #define VM_FOREIGN 0x04000000 /* Has pages belonging to another VM */ +struct vm_foreign_map { + struct page **map; +}; #endif #define VM_ALWAYSDUMP 0x08000000 /* Always include in core dumps */ diff --git a/mm/memory.c b/mm/memory.c index 70a7d6cb..ff11d46d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1082,7 +1082,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, #ifdef CONFIG_XEN if (vma && (vma->vm_flags & VM_FOREIGN)) { - struct page **map = vma->vm_private_data; + struct vm_foreign_map *foreign_map = + vma->vm_private_data; + struct page **map = foreign_map->map; int offset = (start - vma->vm_start) >> PAGE_SHIFT; if (map[offset] != NULL) { if (pages) {