ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c @ 11681:0c29e0d1c67b

[BLKTAP] Remove unnecessary TLB flush from blktap driver.

blktap_poll is calling tlb_flush_all() in its main ring buffer polling
loop. This seems to be superfluous: the hypervisor should be
performing
any necessary tlb flushes on grant table operations performed by the
back-end. Even a simple memory barrier is unnecessary here as the
RING_PUSH_REQUESTS() call performs a wmb() anyway.

And tlb_flush_all() is not exported to modules, so this call prevents
blktap from building as a module. Just remove it.

Signed-off-by: Stephen Tweedie <sct@redhat.com>
author kfraser@localhost.localdomain
date Fri Sep 29 11:17:29 2006 +0100 (2006-09-29)
parents d90be316e5f5
children 3971f49ce592
line source
1 /******************************************************************************
2 * drivers/xen/blktap/blktap.c
3 *
4 * Back-end driver for user level virtual block devices. This portion of the
5 * driver exports a 'unified' block-device interface that can be accessed
6 * by any operating system that implements a compatible front end. Requests
7 * are remapped to a user-space memory region.
8 *
9 * Based on the blkback driver code.
10 *
11 * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
38 #include <linux/spinlock.h>
39 #include <linux/kthread.h>
40 #include <linux/list.h>
41 #include <asm/hypervisor.h>
42 #include "common.h"
43 #include <xen/balloon.h>
44 #include <linux/kernel.h>
45 #include <linux/fs.h>
46 #include <linux/mm.h>
47 #include <linux/errno.h>
48 #include <linux/major.h>
49 #include <linux/gfp.h>
50 #include <linux/poll.h>
51 #include <asm/tlbflush.h>
52 #include <linux/devfs_fs_kernel.h>
54 #define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */
55 #define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
58 struct class *xen_class;
59 EXPORT_SYMBOL_GPL(xen_class);
61 /*
62 * Setup the xen class. This should probably go in another file, but
63 * since blktap is the only user of it so far, it gets to keep it.
64 */
65 int setup_xen_class(void)
66 {
67 int ret;
69 if (xen_class)
70 return 0;
72 xen_class = class_create(THIS_MODULE, "xen");
73 if ((ret = IS_ERR(xen_class))) {
74 xen_class = NULL;
75 return ret;
76 }
78 return 0;
79 }
81 /*
82 * The maximum number of requests that can be outstanding at any time
83 * is determined by
84 *
85 * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
86 *
87 * where mmap_alloc < MAX_DYNAMIC_MEM.
88 *
89 * TODO:
90 * mmap_alloc is initialised to 2 and should be adjustable on the fly via
91 * sysfs.
92 */
93 #define MAX_DYNAMIC_MEM 64
94 #define MAX_PENDING_REQS 64
95 #define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
96 #define MMAP_VADDR(_start, _req,_seg) \
97 (_start + \
98 ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
99 ((_seg) * PAGE_SIZE))
100 static int blkif_reqs = MAX_PENDING_REQS;
101 static int mmap_pages = MMAP_PAGES;
103 #define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
104 * have a bunch of pages reserved for shared
105 * memory rings.
106 */
108 /*Data struct associated with each of the tapdisk devices*/
109 typedef struct tap_blkif {
110 struct vm_area_struct *vma; /*Shared memory area */
111 unsigned long rings_vstart; /*Kernel memory mapping */
112 unsigned long user_vstart; /*User memory mapping */
113 unsigned long dev_inuse; /*One process opens device at a time. */
114 unsigned long dev_pending; /*In process of being opened */
115 unsigned long ring_ok; /*make this ring->state */
116 blkif_front_ring_t ufe_ring; /*Rings up to user space. */
117 wait_queue_head_t wait; /*for poll */
118 unsigned long mode; /*current switching mode */
119 int minor; /*Minor number for tapdisk device */
120 pid_t pid; /*tapdisk process id */
121 enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
122 shutdown */
123 unsigned long *idx_map; /*Record the user ring id to kern
124 [req id, idx] tuple */
125 blkif_t *blkif; /*Associate blkif with tapdev */
126 int sysfs_set; /*Set if it has a class device. */
127 } tap_blkif_t;
129 /*Data struct handed back to userspace for tapdisk device to VBD mapping*/
130 typedef struct domid_translate {
131 unsigned short domid;
132 unsigned short busid;
133 } domid_translate_t ;
135 static domid_translate_t translate_domid[MAX_TAP_DEV];
136 static tap_blkif_t *tapfds[MAX_TAP_DEV];
138 static int __init set_blkif_reqs(char *str)
139 {
140 get_option(&str, &blkif_reqs);
141 return 1;
142 }
143 __setup("blkif_reqs=", set_blkif_reqs);
145 /* Run-time switchable: /sys/module/blktap/parameters/ */
146 static unsigned int log_stats = 0;
147 static unsigned int debug_lvl = 0;
148 module_param(log_stats, int, 0644);
149 module_param(debug_lvl, int, 0644);
151 /*
152 * Each outstanding request that we've passed to the lower device layers has a
153 * 'pending_req' allocated to it. Each buffer_head that completes decrements
154 * the pendcnt towards zero. When it hits zero, the specified domain has a
155 * response queued for it, with the saved 'id' passed back.
156 */
157 typedef struct {
158 blkif_t *blkif;
159 unsigned long id;
160 unsigned short mem_idx;
161 int nr_pages;
162 atomic_t pendcnt;
163 unsigned short operation;
164 int status;
165 struct list_head free_list;
166 int inuse;
167 } pending_req_t;
169 static pending_req_t *pending_reqs[MAX_PENDING_REQS];
170 static struct list_head pending_free;
171 static DEFINE_SPINLOCK(pending_free_lock);
172 static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
173 static int alloc_pending_reqs;
175 typedef unsigned int PEND_RING_IDX;
177 static inline int MASK_PEND_IDX(int i) {
178 return (i & (MAX_PENDING_REQS-1));
179 }
181 static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
182 return (req - pending_reqs[idx]);
183 }
185 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
187 #define BLKBACK_INVALID_HANDLE (~0)
189 typedef struct mmap_page {
190 unsigned long start;
191 struct page *mpage;
192 } mmap_page_t;
194 static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
195 static unsigned short mmap_alloc = 0;
196 static unsigned short mmap_lock = 0;
197 static unsigned short mmap_inuse = 0;
198 static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];
200 /******************************************************************
201 * GRANT HANDLES
202 */
204 /* When using grant tables to map a frame for device access then the
205 * handle returned must be used to unmap the frame. This is needed to
206 * drop the ref count on the frame.
207 */
208 struct grant_handle_pair
209 {
210 grant_handle_t kernel;
211 grant_handle_t user;
212 };
214 static struct grant_handle_pair
215 pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
216 #define pending_handle(_id, _idx, _i) \
217 (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
218 + (_i)])
221 static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
223 #define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
224 #define BLKTAP_DEV_DIR "/dev/xen"
226 static int blktap_major;
228 /* blktap IOCTLs: */
229 #define BLKTAP_IOCTL_KICK_FE 1
230 #define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
231 #define BLKTAP_IOCTL_SETMODE 3
232 #define BLKTAP_IOCTL_SENDPID 4
233 #define BLKTAP_IOCTL_NEWINTF 5
234 #define BLKTAP_IOCTL_MINOR 6
235 #define BLKTAP_IOCTL_MAJOR 7
236 #define BLKTAP_QUERY_ALLOC_REQS 8
237 #define BLKTAP_IOCTL_FREEINTF 9
238 #define BLKTAP_IOCTL_PRINT_IDXS 100
240 /* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
241 #define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
242 #define BLKTAP_MODE_INTERCEPT_FE 0x00000001
243 #define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */
245 #define BLKTAP_MODE_INTERPOSE \
246 (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
249 static inline int BLKTAP_MODE_VALID(unsigned long arg)
250 {
251 return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
252 (arg == BLKTAP_MODE_INTERCEPT_FE) ||
253 (arg == BLKTAP_MODE_INTERPOSE ));
254 }
256 /* Requests passing through the tap to userspace are re-assigned an ID.
257 * We must record a mapping between the BE [IDX,ID] tuple and the userspace
258 * ring ID.
259 */
261 static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
262 {
263 return ((fe_dom << 16) | MASK_PEND_IDX(idx));
264 }
266 extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
267 {
268 return (PEND_RING_IDX)(id & 0x0000ffff);
269 }
271 extern inline int ID_TO_MIDX(unsigned long id)
272 {
273 return (int)(id >> 16);
274 }
276 #define INVALID_REQ 0xdead0000
278 /*TODO: Convert to a free list*/
279 static inline int GET_NEXT_REQ(unsigned long *idx_map)
280 {
281 int i;
282 for (i = 0; i < MAX_PENDING_REQS; i++)
283 if (idx_map[i] == INVALID_REQ)
284 return i;
286 return INVALID_REQ;
287 }
290 #define BLKTAP_INVALID_HANDLE(_g) \
291 (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
293 #define BLKTAP_INVALIDATE_HANDLE(_g) do { \
294 (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
295 } while(0)
298 /******************************************************************
299 * BLKTAP VM OPS
300 */
302 static struct page *blktap_nopage(struct vm_area_struct *vma,
303 unsigned long address,
304 int *type)
305 {
306 /*
307 * if the page has not been mapped in by the driver then return
308 * NOPAGE_SIGBUS to the domain.
309 */
311 return NOPAGE_SIGBUS;
312 }
314 struct vm_operations_struct blktap_vm_ops = {
315 nopage: blktap_nopage,
316 };
318 /******************************************************************
319 * BLKTAP FILE OPS
320 */
322 /*Function Declarations*/
323 static int get_next_free_dev(void);
324 static int blktap_open(struct inode *inode, struct file *filp);
325 static int blktap_release(struct inode *inode, struct file *filp);
326 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
327 static int blktap_ioctl(struct inode *inode, struct file *filp,
328 unsigned int cmd, unsigned long arg);
329 static unsigned int blktap_poll(struct file *file, poll_table *wait);
331 static struct file_operations blktap_fops = {
332 .owner = THIS_MODULE,
333 .poll = blktap_poll,
334 .ioctl = blktap_ioctl,
335 .open = blktap_open,
336 .release = blktap_release,
337 .mmap = blktap_mmap,
338 };
341 static int get_next_free_dev(void)
342 {
343 tap_blkif_t *info;
344 int i = 0, ret = -1;
345 unsigned long flags;
347 spin_lock_irqsave(&pending_free_lock, flags);
349 while (i < MAX_TAP_DEV) {
350 info = tapfds[i];
351 if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
352 && (info->dev_pending == 0) ) {
353 info->dev_pending = 1;
354 ret = i;
355 goto done;
356 }
357 i++;
358 }
360 done:
361 spin_unlock_irqrestore(&pending_free_lock, flags);
363 /*
364 * We are protected by having the dev_pending set.
365 */
366 if (!tapfds[i]->sysfs_set && xen_class) {
367 class_device_create(xen_class, NULL,
368 MKDEV(blktap_major, ret), NULL,
369 "blktap%d", ret);
370 tapfds[i]->sysfs_set = 1;
371 }
372 return ret;
373 }
375 int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
376 {
377 int i;
379 for (i = 0; i < MAX_TAP_DEV; i++)
380 if ( (translate_domid[i].domid == domid)
381 && (translate_domid[i].busid == xenbus_id) ) {
382 tapfds[i]->blkif = blkif;
383 tapfds[i]->status = RUNNING;
384 return i;
385 }
386 return -1;
387 }
389 void signal_tapdisk(int idx)
390 {
391 tap_blkif_t *info;
392 struct task_struct *ptask;
394 info = tapfds[idx];
395 if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
396 ptask = find_task_by_pid(info->pid);
397 if (ptask)
398 info->status = CLEANSHUTDOWN;
399 }
400 info->blkif = NULL;
401 return;
402 }
404 static int blktap_open(struct inode *inode, struct file *filp)
405 {
406 blkif_sring_t *sring;
407 int idx = iminor(inode) - BLKTAP_MINOR;
408 tap_blkif_t *info;
409 int i;
411 if (tapfds[idx] == NULL) {
412 WPRINTK("Unable to open device /dev/xen/blktap%d\n",
413 idx);
414 return -ENOMEM;
415 }
416 DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
418 info = tapfds[idx];
420 /*Only one process can access device at a time*/
421 if (test_and_set_bit(0, &info->dev_inuse))
422 return -EBUSY;
424 info->dev_pending = 0;
426 /* Allocate the fe ring. */
427 sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
428 if (sring == NULL)
429 goto fail_nomem;
431 SetPageReserved(virt_to_page(sring));
433 SHARED_RING_INIT(sring);
434 FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
436 filp->private_data = info;
437 info->vma = NULL;
439 info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
440 GFP_KERNEL);
442 if (idx > 0) {
443 init_waitqueue_head(&info->wait);
444 for (i = 0; i < MAX_PENDING_REQS; i++)
445 info->idx_map[i] = INVALID_REQ;
446 }
448 DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
449 return 0;
451 fail_nomem:
452 return -ENOMEM;
453 }
455 static int blktap_release(struct inode *inode, struct file *filp)
456 {
457 tap_blkif_t *info = filp->private_data;
459 /* can this ever happen? - sdr */
460 if (!info) {
461 WPRINTK("Trying to free device that doesn't exist "
462 "[/dev/xen/blktap%d]\n",iminor(inode) - BLKTAP_MINOR);
463 return -EBADF;
464 }
465 info->dev_inuse = 0;
466 DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
468 /* Free the ring page. */
469 ClearPageReserved(virt_to_page(info->ufe_ring.sring));
470 free_page((unsigned long) info->ufe_ring.sring);
472 /* Clear any active mappings and free foreign map table */
473 if (info->vma) {
474 zap_page_range(
475 info->vma, info->vma->vm_start,
476 info->vma->vm_end - info->vma->vm_start, NULL);
477 info->vma = NULL;
478 }
480 if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
481 kthread_stop(info->blkif->xenblkd);
482 info->blkif->xenblkd = NULL;
483 info->status = CLEANSHUTDOWN;
484 }
485 return 0;
486 }
489 /* Note on mmap:
490 * We need to map pages to user space in a way that will allow the block
491 * subsystem set up direct IO to them. This couldn't be done before, because
492 * there isn't really a sane way to translate a user virtual address down to a
493 * physical address when the page belongs to another domain.
494 *
495 * My first approach was to map the page in to kernel memory, add an entry
496 * for it in the physical frame list (using alloc_lomem_region as in blkback)
497 * and then attempt to map that page up to user space. This is disallowed
498 * by xen though, which realizes that we don't really own the machine frame
499 * underlying the physical page.
500 *
501 * The new approach is to provide explicit support for this in xen linux.
502 * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
503 * mapped from other vms. vma->vm_private_data is set up as a mapping
504 * from pages to actual page structs. There is a new clause in get_user_pages
505 * that does the right thing for this sort of mapping.
506 */
507 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
508 {
509 int size;
510 struct page **map;
511 int i;
512 tap_blkif_t *info = filp->private_data;
514 if (info == NULL) {
515 WPRINTK("blktap: mmap, retrieving idx failed\n");
516 return -ENOMEM;
517 }
519 vma->vm_flags |= VM_RESERVED;
520 vma->vm_ops = &blktap_vm_ops;
522 size = vma->vm_end - vma->vm_start;
523 if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
524 WPRINTK("you _must_ map exactly %d pages!\n",
525 mmap_pages + RING_PAGES);
526 return -EAGAIN;
527 }
529 size >>= PAGE_SHIFT;
530 info->rings_vstart = vma->vm_start;
531 info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
533 /* Map the ring pages to the start of the region and reserve it. */
534 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
536 if (remap_pfn_range(vma, vma->vm_start,
537 __pa(info->ufe_ring.sring) >> PAGE_SHIFT,
538 PAGE_SIZE, vma->vm_page_prot)) {
539 WPRINTK("Mapping user ring failed!\n");
540 goto fail;
541 }
543 /* Mark this VM as containing foreign pages, and set up mappings. */
544 map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
545 * sizeof(struct page_struct*),
546 GFP_KERNEL);
547 if (map == NULL) {
548 WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
549 goto fail;
550 }
552 for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
553 map[i] = NULL;
555 vma->vm_private_data = map;
556 vma->vm_flags |= VM_FOREIGN;
558 info->vma = vma;
559 info->ring_ok = 1;
560 return 0;
561 fail:
562 /* Clear any active mappings. */
563 zap_page_range(vma, vma->vm_start,
564 vma->vm_end - vma->vm_start, NULL);
566 return -ENOMEM;
567 }
570 static int blktap_ioctl(struct inode *inode, struct file *filp,
571 unsigned int cmd, unsigned long arg)
572 {
573 tap_blkif_t *info = filp->private_data;
575 switch(cmd) {
576 case BLKTAP_IOCTL_KICK_FE:
577 {
578 /* There are fe messages to process. */
579 return blktap_read_ufe_ring(info);
580 }
581 case BLKTAP_IOCTL_SETMODE:
582 {
583 if (info) {
584 if (BLKTAP_MODE_VALID(arg)) {
585 info->mode = arg;
586 /* XXX: may need to flush rings here. */
587 DPRINTK("blktap: set mode to %lx\n",
588 arg);
589 return 0;
590 }
591 }
592 return 0;
593 }
594 case BLKTAP_IOCTL_PRINT_IDXS:
595 {
596 if (info) {
597 printk("User Rings: \n-----------\n");
598 printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
599 "| req_prod: %2d, rsp_prod: %2d\n",
600 info->ufe_ring.rsp_cons,
601 info->ufe_ring.req_prod_pvt,
602 info->ufe_ring.sring->req_prod,
603 info->ufe_ring.sring->rsp_prod);
604 }
605 return 0;
606 }
607 case BLKTAP_IOCTL_SENDPID:
608 {
609 if (info) {
610 info->pid = (pid_t)arg;
611 DPRINTK("blktap: pid received %d\n",
612 info->pid);
613 }
614 return 0;
615 }
616 case BLKTAP_IOCTL_NEWINTF:
617 {
618 uint64_t val = (uint64_t)arg;
619 domid_translate_t *tr = (domid_translate_t *)&val;
620 int newdev;
622 DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
623 tr->domid, tr->busid);
624 newdev = get_next_free_dev();
625 if (newdev < 1) {
626 WPRINTK("Error initialising /dev/xen/blktap - "
627 "No more devices\n");
628 return -1;
629 }
630 translate_domid[newdev].domid = tr->domid;
631 translate_domid[newdev].busid = tr->busid;
632 return newdev;
633 }
634 case BLKTAP_IOCTL_FREEINTF:
635 {
636 unsigned long dev = arg;
637 unsigned long flags;
639 /* Looking at another device */
640 info = NULL;
642 if ( (dev > 0) && (dev < MAX_TAP_DEV) )
643 info = tapfds[dev];
645 spin_lock_irqsave(&pending_free_lock, flags);
646 if ( (info != NULL) && (info->dev_pending) )
647 info->dev_pending = 0;
648 spin_unlock_irqrestore(&pending_free_lock, flags);
650 return 0;
651 }
652 case BLKTAP_IOCTL_MINOR:
653 {
654 unsigned long dev = arg;
656 /* Looking at another device */
657 info = NULL;
659 if ( (dev > 0) && (dev < MAX_TAP_DEV) )
660 info = tapfds[dev];
662 if (info != NULL)
663 return info->minor;
664 else
665 return -1;
666 }
667 case BLKTAP_IOCTL_MAJOR:
668 return blktap_major;
670 case BLKTAP_QUERY_ALLOC_REQS:
671 {
672 WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
673 alloc_pending_reqs, blkif_reqs);
674 return (alloc_pending_reqs/blkif_reqs) * 100;
675 }
676 }
677 return -ENOIOCTLCMD;
678 }
680 static unsigned int blktap_poll(struct file *filp, poll_table *wait)
681 {
682 tap_blkif_t *info = filp->private_data;
684 if (!info) {
685 WPRINTK(" poll, retrieving idx failed\n");
686 return 0;
687 }
689 /* do not work on the control device */
690 if (!info->minor)
691 return 0;
693 poll_wait(filp, &info->wait, wait);
694 if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
695 RING_PUSH_REQUESTS(&info->ufe_ring);
696 return POLLIN | POLLRDNORM;
697 }
698 return 0;
699 }
701 void blktap_kick_user(int idx)
702 {
703 tap_blkif_t *info;
705 if (idx == 0)
706 return;
708 info = tapfds[idx];
710 if (info != NULL)
711 wake_up_interruptible(&info->wait);
713 return;
714 }
716 static int do_block_io_op(blkif_t *blkif);
717 static void dispatch_rw_block_io(blkif_t *blkif,
718 blkif_request_t *req,
719 pending_req_t *pending_req);
720 static void make_response(blkif_t *blkif, unsigned long id,
721 unsigned short op, int st);
723 /******************************************************************
724 * misc small helpers
725 */
726 static int req_increase(void)
727 {
728 int i, j;
729 struct page *page;
730 int ret;
732 ret = -EINVAL;
733 if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
734 goto done;
736 #ifdef __ia64__
737 extern unsigned long alloc_empty_foreign_map_page_range(
738 unsigned long pages);
739 mmap_start[mmap_alloc].start = (unsigned long)
740 alloc_empty_foreign_map_page_range(mmap_pages);
741 #else /* ! ia64 */
742 page = balloon_alloc_empty_page_range(mmap_pages);
743 ret = -ENOMEM;
744 if (page == NULL) {
745 printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__);
746 goto done;
747 }
749 /* Pin all of the pages. */
750 for (i=0; i<mmap_pages; i++)
751 get_page(&page[i]);
753 mmap_start[mmap_alloc].start =
754 (unsigned long)pfn_to_kaddr(page_to_pfn(page));
755 mmap_start[mmap_alloc].mpage = page;
757 #endif
759 pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) *
760 blkif_reqs, GFP_KERNEL);
761 pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
762 mmap_pages, GFP_KERNEL);
764 ret = -ENOMEM;
765 if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
766 kfree(pending_reqs[mmap_alloc]);
767 kfree(pending_addrs[mmap_alloc]);
768 WPRINTK("%s: out of memory\n", __FUNCTION__);
769 ret = -ENOMEM;
770 goto done;
771 }
773 ret = 0;
775 DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
776 __FUNCTION__, blkif_reqs, mmap_pages,
777 mmap_start[mmap_alloc].start);
779 BUG_ON(mmap_start[mmap_alloc].start == 0);
781 for (i = 0; i < mmap_pages; i++)
782 pending_addrs[mmap_alloc][i] =
783 mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);
785 for (i = 0; i < MAX_PENDING_REQS ; i++) {
786 list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
787 &pending_free);
788 pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
789 for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
790 BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc,
791 i, j));
792 }
794 mmap_alloc++;
795 DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
796 done:
797 return ret;
798 }
800 static void mmap_req_del(int mmap)
801 {
802 int i;
803 struct page *page;
805 /*Spinlock already acquired*/
806 kfree(pending_reqs[mmap]);
807 kfree(pending_addrs[mmap]);
809 #ifdef __ia64__
810 /*Not sure what goes here yet!*/
811 #else
813 /* Unpin all of the pages. */
814 page = mmap_start[mmap].mpage;
815 for (i=0; i<mmap_pages; i++)
816 put_page(&page[i]);
818 balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
819 #endif
821 mmap_lock = 0;
822 DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
823 mmap_alloc--;
824 }
826 static pending_req_t* alloc_req(void)
827 {
828 pending_req_t *req = NULL;
829 unsigned long flags;
831 spin_lock_irqsave(&pending_free_lock, flags);
833 if (!list_empty(&pending_free)) {
834 req = list_entry(pending_free.next, pending_req_t, free_list);
835 list_del(&req->free_list);
836 }
838 if (req) {
839 req->inuse = 1;
840 alloc_pending_reqs++;
841 }
842 spin_unlock_irqrestore(&pending_free_lock, flags);
844 return req;
845 }
847 static void free_req(pending_req_t *req)
848 {
849 unsigned long flags;
850 int was_empty;
852 spin_lock_irqsave(&pending_free_lock, flags);
854 alloc_pending_reqs--;
855 req->inuse = 0;
856 if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
857 mmap_inuse--;
858 if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
859 spin_unlock_irqrestore(&pending_free_lock, flags);
860 return;
861 }
862 was_empty = list_empty(&pending_free);
863 list_add(&req->free_list, &pending_free);
865 spin_unlock_irqrestore(&pending_free_lock, flags);
867 if (was_empty)
868 wake_up(&pending_free_wq);
869 }
871 static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int
872 tapidx)
873 {
874 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
875 unsigned int i, invcount = 0;
876 struct grant_handle_pair *khandle;
877 uint64_t ptep;
878 int ret, mmap_idx;
879 unsigned long kvaddr, uvaddr;
881 tap_blkif_t *info = tapfds[tapidx];
883 if (info == NULL) {
884 WPRINTK("fast_flush: Couldn't get info!\n");
885 return;
886 }
887 mmap_idx = req->mem_idx;
889 for (i = 0; i < req->nr_pages; i++) {
890 kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
891 uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
893 khandle = &pending_handle(mmap_idx, k_idx, i);
894 if (BLKTAP_INVALID_HANDLE(khandle)) {
895 WPRINTK("BLKTAP_INVALID_HANDLE\n");
896 continue;
897 }
898 gnttab_set_unmap_op(&unmap[invcount],
899 MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i),
900 GNTMAP_host_map, khandle->kernel);
901 invcount++;
903 if (create_lookup_pte_addr(
904 info->vma->vm_mm,
905 MMAP_VADDR(info->user_vstart, u_idx, i),
906 &ptep) !=0) {
907 WPRINTK("Couldn't get a pte addr!\n");
908 return;
909 }
911 gnttab_set_unmap_op(&unmap[invcount],
912 ptep, GNTMAP_host_map,
913 khandle->user);
914 invcount++;
916 BLKTAP_INVALIDATE_HANDLE(khandle);
917 }
918 ret = HYPERVISOR_grant_table_op(
919 GNTTABOP_unmap_grant_ref, unmap, invcount);
920 BUG_ON(ret);
922 if (info->vma != NULL)
923 zap_page_range(info->vma,
924 MMAP_VADDR(info->user_vstart, u_idx, 0),
925 req->nr_pages << PAGE_SHIFT, NULL);
926 }
928 /******************************************************************
929 * SCHEDULER FUNCTIONS
930 */
932 static void print_stats(blkif_t *blkif)
933 {
934 printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
935 current->comm, blkif->st_oo_req,
936 blkif->st_rd_req, blkif->st_wr_req);
937 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
938 blkif->st_rd_req = 0;
939 blkif->st_wr_req = 0;
940 blkif->st_oo_req = 0;
941 }
943 int tap_blkif_schedule(void *arg)
944 {
945 blkif_t *blkif = arg;
947 blkif_get(blkif);
949 if (debug_lvl)
950 printk(KERN_DEBUG "%s: started\n", current->comm);
952 while (!kthread_should_stop()) {
953 wait_event_interruptible(
954 blkif->wq,
955 blkif->waiting_reqs || kthread_should_stop());
956 wait_event_interruptible(
957 pending_free_wq,
958 !list_empty(&pending_free) || kthread_should_stop());
960 blkif->waiting_reqs = 0;
961 smp_mb(); /* clear flag *before* checking for work */
963 if (do_block_io_op(blkif))
964 blkif->waiting_reqs = 1;
966 if (log_stats && time_after(jiffies, blkif->st_print))
967 print_stats(blkif);
968 }
970 if (log_stats)
971 print_stats(blkif);
972 if (debug_lvl)
973 printk(KERN_DEBUG "%s: exiting\n", current->comm);
975 blkif->xenblkd = NULL;
976 blkif_put(blkif);
978 return 0;
979 }
981 /******************************************************************
982 * COMPLETION CALLBACK -- Called by user level ioctl()
983 */
985 static int blktap_read_ufe_ring(tap_blkif_t *info)
986 {
987 /* This is called to read responses from the UFE ring. */
988 RING_IDX i, j, rp;
989 blkif_response_t *resp;
990 blkif_t *blkif=NULL;
991 int pending_idx, usr_idx, mmap_idx;
992 pending_req_t *pending_req;
994 if (!info)
995 return 0;
997 /* We currently only forward packets in INTERCEPT_FE mode. */
998 if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
999 return 0;
1001 /* for each outstanding message on the UFEring */
1002 rp = info->ufe_ring.sring->rsp_prod;
1003 rmb();
1005 for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
1006 resp = RING_GET_RESPONSE(&info->ufe_ring, i);
1007 ++info->ufe_ring.rsp_cons;
1009 /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
1010 usr_idx = (int)resp->id;
1011 pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
1012 mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
1014 if ( (mmap_idx >= mmap_alloc) ||
1015 (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
1016 WPRINTK("Incorrect req map"
1017 "[%d], internal map [%d,%d (%d)]\n",
1018 usr_idx, mmap_idx,
1019 ID_TO_IDX(info->idx_map[usr_idx]),
1020 MASK_PEND_IDX(
1021 ID_TO_IDX(info->idx_map[usr_idx])));
1023 pending_req = &pending_reqs[mmap_idx][pending_idx];
1024 blkif = pending_req->blkif;
1026 for (j = 0; j < pending_req->nr_pages; j++) {
1028 unsigned long kvaddr, uvaddr;
1029 struct page **map = info->vma->vm_private_data;
1030 struct page *pg;
1031 int offset;
1033 uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
1034 kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
1035 pending_idx, j);
1037 pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
1038 ClearPageReserved(pg);
1039 offset = (uvaddr - info->vma->vm_start)
1040 >> PAGE_SHIFT;
1041 map[offset] = NULL;
1043 fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
1044 make_response(blkif, pending_req->id, resp->operation,
1045 resp->status);
1046 info->idx_map[usr_idx] = INVALID_REQ;
1047 blkif_put(pending_req->blkif);
1048 free_req(pending_req);
1051 return 0;
1055 /******************************************************************************
1056 * NOTIFICATION FROM GUEST OS.
1057 */
1059 static void blkif_notify_work(blkif_t *blkif)
1061 blkif->waiting_reqs = 1;
1062 wake_up(&blkif->wq);
1065 irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1067 blkif_notify_work(dev_id);
1068 return IRQ_HANDLED;
1073 /******************************************************************
1074 * DOWNWARD CALLS -- These interface with the block-device layer proper.
1075 */
1076 static int print_dbug = 1;
1077 static int do_block_io_op(blkif_t *blkif)
1079 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
1080 blkif_request_t *req;
1081 pending_req_t *pending_req;
1082 RING_IDX rc, rp;
1083 int more_to_do = 0;
1084 tap_blkif_t *info;
1086 rc = blk_ring->req_cons;
1087 rp = blk_ring->sring->req_prod;
1088 rmb(); /* Ensure we see queued requests up to 'rp'. */
1090 /*Check blkif has corresponding UE ring*/
1091 if (blkif->dev_num == -1) {
1092 /*oops*/
1093 if (print_dbug) {
1094 WPRINTK("Corresponding UE "
1095 "ring does not exist!\n");
1096 print_dbug = 0; /*We only print this message once*/
1098 return 0;
1101 info = tapfds[blkif->dev_num];
1102 if (info == NULL || !info->dev_inuse) {
1103 if (print_dbug) {
1104 WPRINTK("Can't get UE info!\n");
1105 print_dbug = 0;
1107 return 0;
1110 while (rc != rp) {
1112 if (RING_FULL(&info->ufe_ring)) {
1113 WPRINTK("RING_FULL! More to do\n");
1114 more_to_do = 1;
1115 break;
1118 if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
1119 WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
1120 " More to do\n");
1121 more_to_do = 1;
1122 break;
1125 pending_req = alloc_req();
1126 if (NULL == pending_req) {
1127 blkif->st_oo_req++;
1128 more_to_do = 1;
1129 break;
1132 req = RING_GET_REQUEST(blk_ring, rc);
1133 blk_ring->req_cons = ++rc; /* before make_response() */
1135 switch (req->operation) {
1136 case BLKIF_OP_READ:
1137 blkif->st_rd_req++;
1138 dispatch_rw_block_io(blkif, req, pending_req);
1139 break;
1141 case BLKIF_OP_WRITE:
1142 blkif->st_wr_req++;
1143 dispatch_rw_block_io(blkif, req, pending_req);
1144 break;
1146 default:
1147 WPRINTK("unknown operation [%d]\n",
1148 req->operation);
1149 make_response(blkif, req->id, req->operation,
1150 BLKIF_RSP_ERROR);
1151 free_req(pending_req);
1152 break;
1156 blktap_kick_user(blkif->dev_num);
1158 return more_to_do;
1161 static void dispatch_rw_block_io(blkif_t *blkif,
1162 blkif_request_t *req,
1163 pending_req_t *pending_req)
1165 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
1166 int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
1167 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
1168 unsigned int nseg;
1169 int ret, i;
1170 tap_blkif_t *info = tapfds[blkif->dev_num];
1171 uint64_t sector;
1173 blkif_request_t *target;
1174 int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
1175 int usr_idx = GET_NEXT_REQ(info->idx_map);
1176 uint16_t mmap_idx = pending_req->mem_idx;
1178 /*Check we have space on user ring - should never fail*/
1179 if(usr_idx == INVALID_REQ) goto fail_flush;
1181 /* Check that number of segments is sane. */
1182 nseg = req->nr_segments;
1183 if ( unlikely(nseg == 0) ||
1184 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
1185 WPRINTK("Bad number of segments in request (%d)\n", nseg);
1186 goto fail_response;
1189 /* Make sure userspace is ready. */
1190 if (!info->ring_ok) {
1191 WPRINTK("blktap: ring not ready for requests!\n");
1192 goto fail_response;
1195 if (RING_FULL(&info->ufe_ring)) {
1196 WPRINTK("blktap: fe_ring is full, can't add "
1197 "IO Request will be dropped. %d %d\n",
1198 RING_SIZE(&info->ufe_ring),
1199 RING_SIZE(&blkif->blk_ring));
1200 goto fail_response;
1203 pending_req->blkif = blkif;
1204 pending_req->id = req->id;
1205 pending_req->operation = operation;
1206 pending_req->status = BLKIF_RSP_OKAY;
1207 pending_req->nr_pages = nseg;
1208 op = 0;
1209 for (i = 0; i < nseg; i++) {
1210 unsigned long uvaddr;
1211 unsigned long kvaddr;
1212 uint64_t ptep;
1213 struct page *page;
1214 uint32_t flags;
1216 uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
1217 kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
1218 pending_idx, i);
1219 page = virt_to_page(kvaddr);
1221 sector = req->sector_number + (8*i);
1222 if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
1223 WPRINTK("BLKTAP: Sector request greater"
1224 "than size\n");
1225 WPRINTK("BLKTAP: %s request sector"
1226 "[%llu,%llu], Total [%llu]\n",
1227 (req->operation ==
1228 BLKIF_OP_WRITE ? "WRITE" : "READ"),
1229 (long long unsigned) sector,
1230 (long long unsigned) sector>>9,
1231 blkif->sectors);
1234 flags = GNTMAP_host_map;
1235 if (operation == WRITE)
1236 flags |= GNTMAP_readonly;
1237 gnttab_set_map_op(&map[op], kvaddr, flags,
1238 req->seg[i].gref, blkif->domid);
1239 op++;
1241 /* Now map it to user. */
1242 ret = create_lookup_pte_addr(info->vma->vm_mm,
1243 uvaddr, &ptep);
1244 if (ret) {
1245 WPRINTK("Couldn't get a pte addr!\n");
1246 fast_flush_area(pending_req, pending_idx, usr_idx,
1247 blkif->dev_num);
1248 goto fail_flush;
1251 flags = GNTMAP_host_map | GNTMAP_application_map
1252 | GNTMAP_contains_pte;
1253 if (operation == WRITE)
1254 flags |= GNTMAP_readonly;
1255 gnttab_set_map_op(&map[op], ptep, flags,
1256 req->seg[i].gref, blkif->domid);
1257 op++;
1260 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
1261 BUG_ON(ret);
1263 for (i = 0; i < (nseg*2); i+=2) {
1264 unsigned long uvaddr;
1265 unsigned long kvaddr;
1266 unsigned long offset;
1267 struct page *pg;
1269 uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
1270 kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
1271 pending_idx, i/2);
1273 if (unlikely(map[i].status != 0)) {
1274 WPRINTK("invalid kernel buffer -- "
1275 "could not remap it\n");
1276 goto fail_flush;
1279 if (unlikely(map[i+1].status != 0)) {
1280 WPRINTK("invalid user buffer -- "
1281 "could not remap it\n");
1282 goto fail_flush;
1285 pending_handle(mmap_idx, pending_idx, i/2).kernel
1286 = map[i].handle;
1287 pending_handle(mmap_idx, pending_idx, i/2).user
1288 = map[i+1].handle;
1289 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
1290 FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
1291 offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
1292 pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
1293 ((struct page **)info->vma->vm_private_data)[offset] =
1294 pg;
1296 /* Mark mapped pages as reserved: */
1297 for (i = 0; i < req->nr_segments; i++) {
1298 unsigned long kvaddr;
1299 struct page *pg;
1301 kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
1302 pending_idx, i);
1303 pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
1304 SetPageReserved(pg);
1307 /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
1308 info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
1310 blkif_get(blkif);
1311 /* Finally, write the request message to the user ring. */
1312 target = RING_GET_REQUEST(&info->ufe_ring,
1313 info->ufe_ring.req_prod_pvt);
1314 memcpy(target, req, sizeof(*req));
1315 target->id = usr_idx;
1316 info->ufe_ring.req_prod_pvt++;
1317 return;
1319 fail_flush:
1320 WPRINTK("Reached Fail_flush\n");
1321 fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
1322 fail_response:
1323 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
1324 free_req(pending_req);
1329 /******************************************************************
1330 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
1331 */
1334 static void make_response(blkif_t *blkif, unsigned long id,
1335 unsigned short op, int st)
1337 blkif_response_t *resp;
1338 unsigned long flags;
1339 blkif_back_ring_t *blk_ring = &blkif->blk_ring;
1340 int more_to_do = 0;
1341 int notify;
1343 spin_lock_irqsave(&blkif->blk_ring_lock, flags);
1344 /* Place on the response ring for the relevant domain. */
1345 resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
1346 resp->id = id;
1347 resp->operation = op;
1348 resp->status = st;
1349 blk_ring->rsp_prod_pvt++;
1350 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
1352 if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
1353 /*
1354 * Tail check for pending requests. Allows frontend to avoid
1355 * notifications if requests are already in flight (lower
1356 * overheads and promotes batching).
1357 */
1358 RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
1359 } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
1360 more_to_do = 1;
1363 spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
1364 if (more_to_do)
1365 blkif_notify_work(blkif);
1366 if (notify)
1367 notify_remote_via_irq(blkif->irq);
1370 static int __init blkif_init(void)
1372 int i,ret,blktap_dir;
1373 tap_blkif_t *info;
1375 if (!is_running_on_xen())
1376 return -ENODEV;
1378 INIT_LIST_HEAD(&pending_free);
1379 for(i = 0; i < 2; i++) {
1380 ret = req_increase();
1381 if (ret)
1382 break;
1384 if (i == 0)
1385 return ret;
1387 tap_blkif_interface_init();
1389 alloc_pending_reqs = 0;
1391 tap_blkif_xenbus_init();
1393 /*Create the blktap devices, but do not map memory or waitqueue*/
1394 for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
1396 /* Dynamically allocate a major for this device */
1397 ret = register_chrdev(0, "blktap", &blktap_fops);
1398 blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
1400 if ( (ret < 0)||(blktap_dir < 0) ) {
1401 WPRINTK("Couldn't register /dev/xen/blktap\n");
1402 return -ENOMEM;
1405 blktap_major = ret;
1407 for(i = 0; i < MAX_TAP_DEV; i++ ) {
1408 info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
1409 if(tapfds[i] == NULL)
1410 return -ENOMEM;
1411 info->minor = i;
1412 info->pid = 0;
1413 info->blkif = NULL;
1415 ret = devfs_mk_cdev(MKDEV(blktap_major, i),
1416 S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
1418 if(ret != 0)
1419 return -ENOMEM;
1420 info->dev_pending = info->dev_inuse = 0;
1422 DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
1425 /* Make sure the xen class exists */
1426 if (!setup_xen_class()) {
1427 /*
1428 * This will allow udev to create the blktap ctrl device.
1429 * We only want to create blktap0 first. We don't want
1430 * to flood the sysfs system with needless blktap devices.
1431 * We only create the device when a request of a new device is
1432 * made.
1433 */
1434 class_device_create(xen_class, NULL,
1435 MKDEV(blktap_major, 0), NULL,
1436 "blktap0");
1437 tapfds[0]->sysfs_set = 1;
1438 } else {
1439 /* this is bad, but not fatal */
1440 WPRINTK("blktap: sysfs xen_class not created\n");
1443 DPRINTK("Blktap device successfully created\n");
1445 return 0;
1448 module_init(blkif_init);
1450 MODULE_LICENSE("Dual BSD/GPL");