*/
static struct tap_blkif *tapfds[MAX_TAP_DEV];
-static int blktap_next_minor;
module_param(blkif_reqs, int, 0);
/* Run-time switchable: /sys/module/blktap/parameters/ */
pending_req_t *pending_reqs[MAX_PENDING_REQS];
static struct list_head pending_free;
static DEFINE_SPINLOCK(pending_free_lock);
-static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+DECLARE_WAIT_QUEUE_HEAD(backdev_setup_wq);
static int alloc_pending_reqs;
#define BLKBACK_INVALID_HANDLE (~0)
* GRANT HANDLES
*/
-/* When using grant tables to map a frame for device access then the
- * handle returned must be used to unmap the frame. This is needed to
- * drop the ref count on the frame.
- */
-struct grant_handle_pair
-{
- grant_handle_t kernel;
- grant_handle_t user;
-};
-#define INVALID_GRANT_HANDLE 0xFFFF
-
-static struct grant_handle_pair
+struct grant_handle_pair
pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
-#define pending_handle(_id, _idx, _i) \
- (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
- + (_i)])
-
static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
#define BLKTAP_IOCTL_PRINT_IDXS 100
#define BLKTAP_IOCTL_BACKDEV_SETUP 200
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
-#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
-#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */
-
-#define BLKTAP_MODE_INTERPOSE \
- (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
- return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
- (arg == BLKTAP_MODE_INTERCEPT_FE) ||
- (arg == BLKTAP_MODE_INTERPOSE ));
-}
-
static inline int OFFSET_TO_USR_IDX(int offset)
{
return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
pte_t copy;
tap_blkif_t *info;
int offset, seg, usr_idx, pending_idx, mmap_idx;
- unsigned long uvstart = vma->vm_start + (RING_PAGES << PAGE_SHIFT);
unsigned long kvaddr;
+ struct vm_foreign_map *foreign_map;
struct page **map;
- struct page *pg;
struct grant_handle_pair *khandle;
struct gnttab_unmap_grant_ref unmap[2];
int count = 0;
+ info = vma->vm_private_data;
+
/*
- * If the address is before the start of the grant mapped region or
- * if vm_file is NULL (meaning mmap failed and we have nothing to do)
+ * Zap entry if the address is before the start of the grant
+ * mapped region.
*/
- if (uvaddr < uvstart || vma->vm_file == NULL)
- return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
+ if (uvaddr < info->user_vstart)
+ return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
ptep, is_fullmm);
- info = vma->vm_file->private_data;
- map = vma->vm_private_data;
+ foreign_map = vma->vm_private_data;
+ map = foreign_map->map;
/* TODO Should these be changed to if statements? */
BUG_ON(!info);
BUG_ON(!info->idx_map);
BUG_ON(!map);
- offset = (int) ((uvaddr - uvstart) >> PAGE_SHIFT);
+ offset = (int)((uvaddr - info->user_vstart) >> PAGE_SHIFT);
usr_idx = OFFSET_TO_USR_IDX(offset);
seg = OFFSET_TO_SEG(offset);
pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
- kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
- pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- ClearPageReserved(pg);
- map[offset + RING_PAGES] = NULL;
+ offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
+ ClearPageReserved(map[offset]);
+ map[offset] = NULL;
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, seg);
khandle = &pending_handle(mmap_idx, pending_idx, seg);
if (khandle->kernel != INVALID_GRANT_HANDLE) {
khandle->user);
count++;
} else {
- BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap));
-
- /* USING SHADOW PAGE TABLES. */
copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
is_fullmm);
}
return copy;
}
+static void blktap_vm_close(struct vm_area_struct *vma)
+{
+ struct tap_blkif *info = vma->vm_private_data;
+
+ down_write(&info->vm_update_sem);
+
+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+
+ kfree(info->foreign_map.map);
+
+ /* Free the ring page. */
+ ClearPageReserved(virt_to_page(info->ufe_ring.sring));
+ free_page((unsigned long)info->ufe_ring.sring);
+
+ kfree(info->idx_map);
+ info->idx_map = NULL;
+
+ info->vma = NULL;
+ clear_bit(2, &info->dev_inuse);
+
+ up_write(&info->vm_update_sem);
+}
+
+static
struct vm_operations_struct blktap_vm_ops = {
+ close: blktap_vm_close,
nopage: blktap_nopage,
zap_pte: blktap_clear_pte,
};
/* tapfds[0] is always NULL */
- for (minor = 1; minor < blktap_next_minor; minor++) {
+ for (minor = 1; minor < MAX_TAP_DEV; minor++) {
info = tapfds[minor];
/* we could have failed a previous attempt. */
- if (!info ||
- ((info->dev_inuse == 0) &&
- (info->backdev == NULL) &&
- (info->dev_pending == 0)) ) {
- info->dev_pending = 1;
+ if (info == NULL ||
+ (info->dev_inuse == 0 &&
+ !test_and_set_bit(0, &info->dev_inuse)))
goto found;
- }
}
info = NULL;
- minor = -1;
- /*
- * We didn't find free device. If we can still allocate
- * more, then we grab the next device minor that is
- * available. This is done while we are still under
- * the protection of the pending_free_lock.
- */
- if (blktap_next_minor < MAX_TAP_DEV)
- minor = blktap_next_minor++;
found:
spin_unlock_irq(&pending_free_lock);
- if (!info && minor > 0) {
+ if (info == NULL) {
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (unlikely(!info)) {
- /*
- * If we failed here, try to put back
- * the next minor number. But if one
- * was just taken, then we just lose this
- * minor. We can try to allocate this
- * minor again later.
- */
- spin_lock_irq(&pending_free_lock);
- if (blktap_next_minor == minor+1)
- blktap_next_minor--;
+ if (unlikely(!info))
+ goto out;
+
+ init_rwsem(&info->vm_update_sem);
+ set_bit(0, &info->dev_inuse);
+
+ spin_lock_irq(&pending_free_lock);
+ for (; minor < MAX_TAP_DEV; minor++)
+ if (tapfds[minor] == NULL)
+ break;
+ if (minor == MAX_TAP_DEV) {
+ kfree(info);
+ info = NULL;
spin_unlock_irq(&pending_free_lock);
goto out;
}
info->minor = minor;
- /*
- * Make sure that we have a minor before others can
- * see us.
- */
- wmb();
tapfds[minor] = info;
+ spin_unlock_irq(&pending_free_lock);
+
if ((class = get_xen_class()) != NULL)
class_device_create(class, NULL,
MKDEV(blktap_major, minor), NULL,
return info;
}
-int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
+struct tap_blkif *
+associate_blkif(domid_t domid, int xenbus_id, blkif_t *blkif)
{
tap_blkif_t *info;
int i;
- for (i = 1; i < blktap_next_minor; i++) {
+ for (i = 1; i < MAX_TAP_DEV; i++) {
info = tapfds[i];
- if ( info &&
- (info->trans.domid == domid) &&
- (info->trans.busid == xenbus_id) ) {
+ if (info && info->trans.domid == domid &&
+ info->trans.busid == xenbus_id) {
info->blkif = blkif;
info->status = RUNNING;
- return i;
+ return info;
}
}
- return -1;
-}
-
-void signal_tapdisk(int idx)
-{
- tap_blkif_t *info;
- struct task_struct *ptask;
-
- info = tapfds[idx];
- if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
- return;
-
- if (info->pid > 0) {
- ptask = find_task_by_pid(info->pid);
- if (ptask)
- info->status = CLEANSHUTDOWN;
- }
- info->blkif = NULL;
-
- return;
+ return NULL;
}
static int blktap_open(struct inode *inode, struct file *filp)
{
- blkif_sring_t *sring;
int idx = iminor(inode) - BLKTAP_MINOR;
tap_blkif_t *info;
- int i;
/* ctrl device, treat differently */
- if (!idx)
+ if (idx == 0)
return 0;
- info = tapfds[idx];
-
- if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
+ if (idx < 1 || idx > MAX_TAP_DEV || tapfds[idx] == NULL) {
WPRINTK("Unable to open device /dev/xen/blktap%d\n",
idx);
return -ENODEV;
}
- DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
-
- /*Only one process can access device at a time*/
- if (test_and_set_bit(0, &info->dev_inuse))
+ info = tapfds[idx];
+
+ DPRINTK("Opening device /dev/xen/blktap%d\n", idx);
+
+ /* Only one process can access device at a time */
+ if (test_and_set_bit(1, &info->dev_inuse))
return -EBUSY;
- info->dev_pending = 0;
-
- /* Allocate the fe ring. */
- sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
- if (sring == NULL)
- goto fail_nomem;
+ clear_bit(0, &info->dev_inuse);
- SetPageReserved(virt_to_page(sring));
-
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
-
filp->private_data = info;
- info->vma = NULL;
- info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
- GFP_KERNEL);
-
- if (info->idx_map == NULL)
- goto fail_nomem;
+ DPRINTK("Tap open: device /dev/xen/blktap%d\n", idx);
- if (idx > 0) {
- init_waitqueue_head(&info->wait);
- for (i = 0; i < MAX_PENDING_REQS; i++)
- info->idx_map[i] = INVALID_REQ;
- }
-
- DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
return 0;
-
- fail_nomem:
- return -ENOMEM;
}
static int blktap_release(struct inode *inode, struct file *filp)
{
tap_blkif_t *info = filp->private_data;
- int ret;
/* check for control device */
- if (!info)
+ if (info == NULL)
return 0;
- info->dev_inuse = 0;
DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
-
- /* Free the ring page. */
- ClearPageReserved(virt_to_page(info->ufe_ring.sring));
- free_page((unsigned long) info->ufe_ring.sring);
-
- /* Clear any active mappings and free foreign map table */
- if (info->vma) {
- zap_page_range(
- info->vma, info->vma->vm_start,
- info->vma->vm_end - info->vma->vm_start, NULL);
-
- kfree(info->vma->vm_private_data);
-
- info->vma = NULL;
- }
-
- if (info->idx_map) {
- kfree(info->idx_map);
- info->idx_map = NULL;
- }
-
- if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
- if (info->blkif->xenblkd != NULL) {
- kthread_stop(info->blkif->xenblkd);
- info->blkif->xenblkd = NULL;
- }
- info->status = CLEANSHUTDOWN;
- }
-
- ret = destroy_backdev(info);
- if (ret && ret != -EBUSY)
- WPRINTK("destroy_backdev failed %d\n", ret);
+ clear_bit(1, &info->dev_inuse);
filp->private_data = NULL;
return 0;
}
-
/* Note on mmap:
* We need to map pages to user space in a way that will allow the block
* subsystem set up direct IO to them. This couldn't be done before, because
*/
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
{
+ tap_blkif_t *info = filp->private_data;
+ blkif_sring_t *sring = NULL;
+ struct page **map = NULL;
int size;
- struct page **map;
int i;
- tap_blkif_t *info = filp->private_data;
int ret;
- if (info == NULL) {
- WPRINTK("blktap: mmap, retrieving idx failed\n");
+ if (info == NULL || test_and_set_bit(2, &info->dev_inuse))
return -ENOMEM;
- }
-
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &blktap_vm_ops;
- size = vma->vm_end - vma->vm_start;
- if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
+ size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ if (size != (mmap_pages + RING_PAGES)) {
WPRINTK("you _must_ map exactly %d pages!\n",
- mmap_pages + RING_PAGES);
+ mmap_pages + RING_PAGES);
return -EAGAIN;
}
- size >>= PAGE_SHIFT;
+ /* Allocate the fe ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL) {
+ WPRINTK("Couldn't alloc sring.\n");
+ goto fail_mem;
+ }
+ info->idx_map = kmalloc(sizeof(unsigned long) *
+ MAX_PENDING_REQS, GFP_KERNEL);
+ if (info->idx_map == NULL) {
+ WPRINTK("Couldn't alloc idx_map.\n");
+ goto fail_mem;
+ }
+ map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+ * sizeof(struct page *),
+ GFP_KERNEL);
+ if (map == NULL) {
+ WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
+ goto fail_mem;
+ }
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
+
+ init_waitqueue_head(&info->wait);
+ for (i = 0; i < MAX_PENDING_REQS; i++)
+ info->idx_map[i] = INVALID_REQ;
+
info->rings_vstart = vma->vm_start;
info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
__pa(info->ufe_ring.sring) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
if (ret) {
- WPRINTK("Mapping user ring failed!\n");
+ WPRINTK("Mapping user ring failed.\n");
goto fail;
}
/* Mark this VM as containing foreign pages, and set up mappings. */
- map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
- * sizeof(struct page *),
- GFP_KERNEL);
- if (map == NULL) {
- WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
- goto fail;
- }
-
- for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
- map[i] = NULL;
-
- vma->vm_private_data = map;
+ info->foreign_map.map = map;
+ vma->vm_private_data = &info->foreign_map;
vma->vm_flags |= VM_FOREIGN;
vma->vm_flags |= VM_DONTCOPY;
+ vma->vm_flags |= VM_RESERVED;
+ vma->vm_ops = &blktap_vm_ops;
#ifdef CONFIG_X86
vma->vm_mm->context.has_foreign_mappings = 1;
#endif
info->vma = vma;
- info->ring_ok = 1;
return 0;
+
fail:
/* Clear any active mappings. */
zap_page_range(vma, vma->vm_start,
vma->vm_end - vma->vm_start, NULL);
+ ClearPageReserved(virt_to_page(sring));
+ fail_mem:
+ free_page((unsigned long)sring);
+ kfree(info->idx_map);
+ info->idx_map = NULL;
+ kfree(map);
return -ENOMEM;
}
-
static int blktap_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
switch(cmd) {
case BLKTAP_IOCTL_KICK_FE:
- {
+ if (info == NULL)
+ return -ENOENT;
+
/* There are fe messages to process. */
return blktap_read_ufe_ring(info);
- }
+
case BLKTAP_IOCTL_SETMODE:
- {
- if (info) {
- if (BLKTAP_MODE_VALID(arg)) {
- info->mode = arg;
- /* XXX: may need to flush rings here. */
- DPRINTK("blktap: set mode to %lx\n",
- arg);
- return 0;
- }
- }
+ /* deprecated */
return 0;
- }
+
case BLKTAP_IOCTL_PRINT_IDXS:
- {
if (info) {
printk("User Rings: \n-----------\n");
printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
info->ufe_ring.sring->rsp_prod);
}
return 0;
- }
+
case BLKTAP_IOCTL_SENDPID:
- {
if (info) {
info->pid = (pid_t)arg;
DPRINTK("blktap: pid received %d\n",
info->pid);
}
return 0;
- }
+
case BLKTAP_IOCTL_NEWINTF:
{
uint64_t val = (uint64_t)arg;
info->trans.busid = tr->busid;
return info->minor;
}
+
case BLKTAP_IOCTL_FREEINTF:
{
unsigned long dev = arg;
- unsigned long flags;
DPRINTK("FREEINTF Req for dev %ld\n", dev);
- info = tapfds[dev];
-
- if ((dev > MAX_TAP_DEV) || !info)
- return 0; /* should this be an error? */
+ if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
+ return -EINVAL;
- spin_lock_irqsave(&pending_free_lock, flags);
- if (info->dev_pending)
- info->dev_pending = 0;
- spin_unlock_irqrestore(&pending_free_lock, flags);
+ clear_bit(0, &tapfds[dev]->dev_inuse);
return 0;
}
+
case BLKTAP_IOCTL_MINOR:
{
unsigned long dev = arg;
- info = tapfds[dev];
-
- if ((dev > MAX_TAP_DEV) || !info)
+ if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
return -EINVAL;
- return info->minor;
+ return tapfds[dev]->minor;
}
+
case BLKTAP_IOCTL_MAJOR:
return blktap_major;
case BLKTAP_QUERY_ALLOC_REQS:
- {
WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
alloc_pending_reqs, blkif_reqs);
return (alloc_pending_reqs/blkif_reqs) * 100;
- }
case BLKTAP_IOCTL_BACKDEV_SETUP:
{
unsigned long dev = arg;
+ int ret;
DPRINTK("BLKTAP_IOCTL_BACKDEV_SETUP ioctl: %ld\n", dev);
- info = tapfds[dev];
+ if (dev > MAX_TAP_DEV || tapfds[dev] == NULL)
+ return -EINVAL;
- return create_backdev(info);
+ while (tapfds[dev]->backdev == NULL) {
+ ret = wait_event_interruptible(backdev_setup_wq,
+ tapfds[dev]->backdev);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
}
+
return -ENOIOCTLCMD;
}
tap_blkif_t *info = filp->private_data;
/* do not work on the control device */
- if (!info)
+ if (info == NULL)
return 0;
poll_wait(filp, &info->wait, wait);
return 0;
}
-void blktap_kick_user(int idx)
+void blktap_kick_user(struct tap_blkif *info)
{
- tap_blkif_t *info;
-
- info = tapfds[idx];
-
- if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
- return;
wake_up_interruptible(&info->wait);
-
- return;
}
-static int do_block_io_op(blkif_t *blkif);
+static int do_block_io_op(tap_blkif_t *info);
static void dispatch_rw_block_io(blkif_t *blkif,
blkif_request_t *req,
- pending_req_t *pending_req);
+ pending_req_t *pending_req,
+ int usr_idx);
static void make_response(blkif_t *blkif, u64 id,
unsigned short op, int st);
wake_up(&pending_free_wq);
}
-static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
- int tapidx)
+#define DO_FAST_FLUSH
+static void fast_flush_area(pending_req_t *req, int pending_idx, int usr_idx,
+ tap_blkif_t *info)
{
+#ifdef DO_FAST_FLUSH
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int i, invcount = 0;
struct grant_handle_pair *khandle;
uint64_t ptep;
int ret, mmap_idx;
unsigned long kvaddr, uvaddr;
- tap_blkif_t *info;
-
-
- info = tapfds[tapidx];
-
- if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
- WPRINTK("fast_flush: Couldn't get info!\n");
- return;
- }
+ struct page **map;
+ int offset;
+#endif
- if (info->vma != NULL &&
- xen_feature(XENFEAT_auto_translated_physmap)) {
- down_write(&info->vma->vm_mm->mmap_sem);
+ if (xen_feature(XENFEAT_auto_translated_physmap))
zap_page_range(info->vma,
- MMAP_VADDR(info->user_vstart, u_idx, 0),
+ MMAP_VADDR(info->user_vstart, usr_idx, 0),
req->nr_pages << PAGE_SHIFT, NULL);
- up_write(&info->vma->vm_mm->mmap_sem);
- return;
- }
+#ifdef DO_FAST_FLUSH
mmap_idx = req->mem_idx;
+ map = info->foreign_map.map;
+
for (i = 0; i < req->nr_pages; i++) {
- kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
- uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
- khandle = &pending_handle(mmap_idx, k_idx, i);
+ khandle = &pending_handle(mmap_idx, pending_idx, i);
if (khandle->kernel != INVALID_GRANT_HANDLE) {
- gnttab_set_unmap_op(&unmap[invcount],
- idx_to_kaddr(mmap_idx, k_idx, i),
+ gnttab_set_unmap_op(&unmap[invcount], kvaddr,
GNTMAP_host_map, khandle->kernel);
invcount++;
- set_phys_to_machine(
- __pa(idx_to_kaddr(mmap_idx, k_idx, i))
- >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
+ INVALID_P2M_ENTRY);
}
if (khandle->user != INVALID_GRANT_HANDLE) {
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
- if (create_lookup_pte_addr(
- info->vma->vm_mm,
- MMAP_VADDR(info->user_vstart, u_idx, i),
- &ptep) !=0) {
+ if (create_lookup_pte_addr(info->vma->vm_mm,
+ uvaddr, &ptep) != 0) {
WPRINTK("Couldn't get a pte addr!\n");
return;
}
invcount++;
}
+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+ ClearPageReserved(map[offset]);
+ map[offset] = NULL;
+
BLKTAP_INVALIDATE_HANDLE(khandle);
}
- ret = HYPERVISOR_grant_table_op(
- GNTTABOP_unmap_grant_ref, unmap, invcount);
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap,
+ invcount);
BUG_ON(ret);
-
- if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
+#endif
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
zap_page_range(info->vma,
- MMAP_VADDR(info->user_vstart, u_idx, 0),
+ MMAP_VADDR(info->user_vstart, usr_idx, 0),
req->nr_pages << PAGE_SHIFT, NULL);
}
int tap_blkif_schedule(void *arg)
{
blkif_t *blkif = arg;
+ struct tap_blkif *info;
blkif_get(blkif);
+ info = blkif->tapif;
+
+ set_bit(3, &info->dev_inuse);
+
+ create_backdev(info);
+
if (debug_lvl)
printk(KERN_DEBUG "%s: started\n", current->comm);
- while (!kthread_should_stop()) {
+ while (1) {
if (try_to_freeze())
continue;
pending_free_wq,
!list_empty(&pending_free) || kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
blkif->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- if (do_block_io_op(blkif))
+ down_read(&info->vm_update_sem);
+ if (info->vma == NULL) {
+ up_read(&info->vm_update_sem);
+ break;
+ }
+
+ if (do_block_io_op(info))
blkif->waiting_reqs = 1;
else
- backdev_restart_queue(tapfds[blkif->dev_num]);
+ backdev_restart_queue(info);
+
+ up_read(&info->vm_update_sem);
if (log_stats && time_after(jiffies, blkif->st_print))
print_stats(blkif);
if (debug_lvl)
printk(KERN_DEBUG "%s: exiting\n", current->comm);
+ destroy_backdev(info);
+
blkif->xenblkd = NULL;
blkif_put(blkif);
+ clear_bit(3, &info->dev_inuse);
+
return 0;
}
static int blktap_read_ufe_ring(tap_blkif_t *info)
{
/* This is called to read responses from the UFE ring. */
- RING_IDX i, j, rp;
- blkif_response_t *resp;
- blkif_t *blkif=NULL;
+ RING_IDX rc, rp;
int pending_idx, usr_idx, mmap_idx;
+ blkif_response_t res;
pending_req_t *pending_req;
- struct page **map;
+ blkif_t *blkif = info->blkif;
- if (!info)
- return 0;
-
- /* We currently only forward packets in INTERCEPT_FE mode. */
- if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
+ down_read(&info->vm_update_sem);
+ if (info->vma == NULL) {
+ up_read(&info->vm_update_sem);
return 0;
+ }
/* for each outstanding message on the UFEring */
rp = info->ufe_ring.sring->rsp_prod;
rmb();
-
- for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
- blkif_response_t res;
- resp = RING_GET_RESPONSE(&info->ufe_ring, i);
- memcpy(&res, resp, sizeof(res));
+
+ for (rc = info->ufe_ring.rsp_cons; rc != rp; rc++) {
+ memcpy(&res, RING_GET_RESPONSE(&info->ufe_ring, rc),
+ sizeof(res));
mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
++info->ufe_ring.rsp_cons;
/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
usr_idx = (int)res.id;
+ DPRINTK("response %d id %x idx_map %p\n", rc, usr_idx,
+ info->idx_map);
pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
ID_TO_IDX(info->idx_map[usr_idx])));
pending_req = &pending_reqs[mmap_idx][pending_idx];
- blkif = pending_req->blkif;
- map = info->vma->vm_private_data;
-
- for (j = 0; j < pending_req->nr_pages; j++) {
- unsigned long uvaddr;
- int offset;
- uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
-
- offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
- ClearPageReserved(map[offset]);
- map[offset] = NULL;
- }
if (pending_req->inuse == 2)
backdev_finish_req(info, usr_idx, &res, pending_req);
else {
fast_flush_area(pending_req, pending_idx, usr_idx,
- info->minor);
+ info);
info->idx_map[usr_idx] = INVALID_REQ;
- make_response(blkif, pending_req->id, res.operation,
- res.status);
+ make_response(blkif, pending_req->id,
+ res.operation, res.status);
}
- blkif_put(pending_req->blkif);
+ blkif->waiting_reqs = 1;
+
+ blkif_put(blkif);
free_req(pending_req);
}
-
- if (info->blkif && info->blkif->waiting_reqs)
- wake_up(&info->blkif->wq);
+
+ up_read(&info->vm_update_sem);
+
+ if (blkif->waiting_reqs)
+ wake_up(&blkif->wq);
return 0;
}
/******************************************************************
* DOWNWARD CALLS -- These interface with the block-device layer proper.
*/
-static int print_dbug = 1;
-static int do_block_io_op(blkif_t *blkif)
+static int do_block_io_op(tap_blkif_t *info)
{
+ blkif_t *blkif = info->blkif;
blkif_back_rings_t *blk_rings = &blkif->blk_rings;
blkif_request_t req;
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0;
- tap_blkif_t *info;
+ int usr_idx;
if (!tap_blkif_connected(blkif))
return 0;
rp = blk_rings->common.sring->req_prod;
rmb(); /* Ensure we see queued requests up to 'rp'. */
- /*Check blkif has corresponding UE ring*/
- if (blkif->dev_num < 0) {
- /*oops*/
- if (print_dbug) {
- WPRINTK("Corresponding UE "
- "ring does not exist!\n");
- print_dbug = 0; /*We only print this message once*/
- }
- return 0;
- }
-
- info = tapfds[blkif->dev_num];
-
- if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
- if (print_dbug) {
- WPRINTK("Can't get UE info!\n");
- print_dbug = 0;
- }
- return 0;
- }
-
while (rc != rp) {
-
+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
+ break;
+
if (RING_FULL(&info->ufe_ring)) {
WPRINTK("RING_FULL! More to do\n");
- more_to_do = 1;
break;
}
- if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
- WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
- " More to do\n");
- more_to_do = 1;
- break;
- }
+ usr_idx = GET_NEXT_REQ(info->idx_map);
+ if (usr_idx == INVALID_REQ)
+ break;
pending_req = alloc_req();
if (NULL == pending_req) {
switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- dispatch_rw_block_io(blkif, &req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req,
+ usr_idx);
break;
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- dispatch_rw_block_io(blkif, &req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req,
+ usr_idx);
break;
default:
/* Yield point for this unbounded loop. */
cond_resched();
}
-
- blktap_kick_user(blkif->dev_num);
+
+ blktap_kick_user(info);
return more_to_do;
}
static void dispatch_rw_block_io(blkif_t *blkif,
blkif_request_t *req,
- pending_req_t *pending_req)
+ pending_req_t *pending_req,
+ int usr_idx)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
tap_blkif_t *info;
blkif_request_t *target;
int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
- int usr_idx;
uint16_t mmap_idx = pending_req->mem_idx;
- if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
- goto fail_response;
-
- info = tapfds[blkif->dev_num];
- if (info == NULL)
- goto fail_response;
-
- /* Check we have space on user ring - should never fail. */
- usr_idx = GET_NEXT_REQ(info->idx_map);
- if (usr_idx == INVALID_REQ) {
- BUG();
- goto fail_response;
- }
+ info = blkif->tapif;
/* Check that number of segments is sane. */
nseg = req->nr_segments;
- if ( unlikely(nseg == 0) ||
- unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
+ if (unlikely(nseg == 0) ||
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
WPRINTK("Bad number of segments in request (%d)\n", nseg);
goto fail_response;
}
- /* Make sure userspace is ready. */
- if (!info->ring_ok) {
- WPRINTK("blktap: ring not ready for requests!\n");
- goto fail_response;
- }
-
- if (RING_FULL(&info->ufe_ring)) {
- WPRINTK("blktap: fe_ring is full, can't add "
- "IO Request will be dropped. %d %d\n",
- RING_SIZE(&info->ufe_ring),
- RING_SIZE(&blkif->blk_rings.common));
- goto fail_response;
- }
-
- pending_req->blkif = blkif;
pending_req->id = req->id;
pending_req->operation = operation;
pending_req->status = BLKIF_RSP_OKAY;
map[i+1].handle = INVALID_GRANT_HANDLE;
}
- pending_handle(mmap_idx, pending_idx, i/2).kernel
- = map[i].handle;
- pending_handle(mmap_idx, pending_idx, i/2).user
- = map[i+1].handle;
+ pending_handle(mmap_idx, pending_idx, i/2).kernel =
+ map[i].handle;
+ pending_handle(mmap_idx, pending_idx, i/2).user =
+ map[i+1].handle;
if (ret)
continue;
>> PAGE_SHIFT));
offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- ((struct page **)info->vma->vm_private_data)[offset] =
- pg;
+ SetPageReserved(pg);
+ info->foreign_map.map[offset] = pg;
}
} else {
for (i = 0; i < nseg; i++) {
map[i].handle = INVALID_GRANT_HANDLE;
}
- pending_handle(mmap_idx, pending_idx, i).kernel
- = map[i].handle;
+ pending_handle(mmap_idx, pending_idx, i).kernel =
+ map[i].handle;
if (ret)
continue;
offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- ((struct page **)info->vma->vm_private_data)[offset] =
- pg;
+ info->foreign_map.map[offset] = pg;
+ SetPageReserved(pg);
+
+ if (vm_insert_page(info->vma, uvaddr, pg))
+ ret |= 1;
}
}
if (ret)
goto fail_flush;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- down_write(&info->vma->vm_mm->mmap_sem);
- /* Mark mapped pages as reserved: */
- for (i = 0; i < req->nr_segments; i++) {
- unsigned long kvaddr;
- struct page *pg;
-
- kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
- pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- SetPageReserved(pg);
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- ret = vm_insert_page(info->vma,
- MMAP_VADDR(info->user_vstart,
- usr_idx, i), pg);
- if (ret) {
- up_write(&info->vma->vm_mm->mmap_sem);
- goto fail_flush;
- }
- }
- }
- if (xen_feature(XENFEAT_auto_translated_physmap))
- up_write(&info->vma->vm_mm->mmap_sem);
-
/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
return;
+
fail_flush:
WPRINTK("Reached Fail_flush\n");
- fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
+ fast_flush_area(pending_req, pending_idx, usr_idx, info);
fail_response:
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
free_req(pending_req);
msleep(1); /* back off a bit */
-}
-
+}
/******************************************************************
return -ENODEV;
INIT_LIST_HEAD(&pending_free);
- for(i = 0; i < 2; i++) {
+ for (i = 0; i < 2; i++) {
ret = req_increase();
if (ret)
- break;
+ return ret;
}
- if (i == 0)
- return ret;
tap_blkif_interface_init();
/* Dynamically allocate a major for this device */
ret = register_chrdev(0, "blktap", &blktap_fops);
-
if (ret < 0) {
WPRINTK("Couldn't register /dev/xen/blktap\n");
return -ENOMEM;
blktap_major = ret;
- /* tapfds[0] is always NULL */
- blktap_next_minor++;
-
- DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+ DPRINTK("Created misc_dev [/dev/xen/blktap0]\n");
/* Make sure the xen class exists */
if ((class = get_xen_class()) != NULL) {