ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/blktap.h @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children f59c5daed527
line source
1 #ifndef _BLKTAP_H_
2 #define _BLKTAP_H_
4 #include <linux/fs.h>
5 #include <linux/poll.h>
6 #include <linux/cdev.h>
7 #include <xen/blkif.h>
8 #include <xen/gnttab.h>
10 //#define ENABLE_PASSTHROUGH
12 extern int blktap_debug_level;
14 #define BTPRINTK(level, tag, force, _f, _a...) \
15 do { \
16 if (blktap_debug_level > level && \
17 (force || printk_ratelimit())) \
18 printk(tag "%s: " _f, __func__, ##_a); \
19 } while (0)
21 #define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
22 #define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
23 #define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
24 #define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
26 #define MAX_BLKTAP_DEVICE 256
28 #define BLKTAP_CONTROL 1
29 #define BLKTAP_RING_FD 2
30 #define BLKTAP_RING_VMA 3
31 #define BLKTAP_DEVICE 4
32 #define BLKTAP_SYSFS 5
33 #define BLKTAP_PAUSE_REQUESTED 6
34 #define BLKTAP_PAUSED 7
35 #define BLKTAP_SHUTDOWN_REQUESTED 8
36 #define BLKTAP_PASSTHROUGH 9
37 #define BLKTAP_DEFERRED 10
39 /* blktap IOCTLs: */
40 #define BLKTAP2_IOCTL_KICK_FE 1
41 #define BLKTAP2_IOCTL_ALLOC_TAP 200
42 #define BLKTAP2_IOCTL_FREE_TAP 201
43 #define BLKTAP2_IOCTL_CREATE_DEVICE 202
44 #define BLKTAP2_IOCTL_SET_PARAMS 203
45 #define BLKTAP2_IOCTL_PAUSE 204
46 #define BLKTAP2_IOCTL_REOPEN 205
47 #define BLKTAP2_IOCTL_RESUME 206
49 #define BLKTAP2_MAX_MESSAGE_LEN 256
51 #define BLKTAP2_RING_MESSAGE_PAUSE 1
52 #define BLKTAP2_RING_MESSAGE_RESUME 2
53 #define BLKTAP2_RING_MESSAGE_CLOSE 3
55 #define BLKTAP_REQUEST_FREE 0
56 #define BLKTAP_REQUEST_PENDING 1
58 /*
59 * The maximum number of requests that can be outstanding at any time
60 * is determined by
61 *
62 * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
63 *
64 * where mmap_alloc < MAX_DYNAMIC_MEM.
65 *
66 * TODO:
67 * mmap_alloc is initialised to 2 and should be adjustable on the fly via
68 * sysfs.
69 */
70 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
71 #define MAX_DYNAMIC_MEM BLK_RING_SIZE
72 #define MAX_PENDING_REQS BLK_RING_SIZE
73 #define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
74 #define MMAP_VADDR(_start, _req, _seg) \
75 (_start + \
76 ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
77 ((_seg) * PAGE_SIZE))
79 #define blktap_get(_b) (atomic_inc(&(_b)->refcnt))
80 #define blktap_put(_b) \
81 do { \
82 if (atomic_dec_and_test(&(_b)->refcnt)) \
83 wake_up(&(_b)->wq); \
84 } while (0)
86 struct blktap;
88 struct grant_handle_pair {
89 grant_handle_t kernel;
90 grant_handle_t user;
91 };
92 #define INVALID_GRANT_HANDLE 0xFFFF
94 struct blktap_handle {
95 unsigned int ring;
96 unsigned int device;
97 unsigned int minor;
98 };
100 struct blktap_params {
101 char name[BLKTAP2_MAX_MESSAGE_LEN];
102 unsigned long long capacity;
103 unsigned long sector_size;
104 };
106 struct blktap_device {
107 int users;
108 spinlock_t lock;
109 struct gendisk *gd;
111 #ifdef ENABLE_PASSTHROUGH
112 struct block_device *bdev;
113 #endif
114 };
116 struct blktap_ring {
117 struct vm_area_struct *vma;
118 blkif_front_ring_t ring;
119 struct vm_foreign_map foreign_map;
120 unsigned long ring_vstart;
121 unsigned long user_vstart;
123 int response;
125 wait_queue_head_t poll_wait;
127 dev_t devno;
128 struct class_device *dev;
129 atomic_t sysfs_refcnt;
130 struct mutex sysfs_mutex;
131 };
133 struct blktap_statistics {
134 unsigned long st_print;
135 int st_rd_req;
136 int st_wr_req;
137 int st_oo_req;
138 int st_rd_sect;
139 int st_wr_sect;
140 s64 st_rd_cnt;
141 s64 st_rd_sum_usecs;
142 s64 st_rd_max_usecs;
143 s64 st_wr_cnt;
144 s64 st_wr_sum_usecs;
145 s64 st_wr_max_usecs;
146 };
148 struct blktap_request {
149 uint64_t id;
150 uint16_t usr_idx;
152 uint8_t status;
153 atomic_t pendcnt;
154 uint8_t nr_pages;
155 unsigned short operation;
157 struct timeval time;
158 struct grant_handle_pair handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
159 struct list_head free_list;
160 };
162 struct blktap {
163 int minor;
164 pid_t pid;
165 atomic_t refcnt;
166 unsigned long dev_inuse;
168 struct blktap_params params;
170 struct rw_semaphore tap_sem;
172 struct blktap_ring ring;
173 struct blktap_device device;
175 int pending_cnt;
176 struct blktap_request *pending_requests[MAX_PENDING_REQS];
178 wait_queue_head_t wq;
179 struct list_head deferred_queue;
181 struct blktap_statistics stats;
182 };
184 extern struct blktap *blktaps[MAX_BLKTAP_DEVICE];
186 static inline int
187 blktap_active(struct blktap *tap)
188 {
189 return test_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
190 }
192 static inline int
193 blktap_validate_params(struct blktap *tap, struct blktap_params *params)
194 {
195 /* TODO: sanity check */
196 params->name[sizeof(params->name) - 1] = '\0';
197 BTINFO("%s: capacity: %llu, sector-size: %lu\n",
198 params->name, params->capacity, params->sector_size);
199 return 0;
200 }
202 int blktap_control_destroy_device(struct blktap *);
204 int blktap_ring_init(int *);
205 int blktap_ring_free(void);
206 int blktap_ring_create(struct blktap *);
207 int blktap_ring_destroy(struct blktap *);
208 int blktap_ring_pause(struct blktap *);
209 int blktap_ring_resume(struct blktap *);
210 void blktap_ring_kick_user(struct blktap *);
212 int blktap_sysfs_init(void);
213 void blktap_sysfs_free(void);
214 int blktap_sysfs_create(struct blktap *);
215 int blktap_sysfs_destroy(struct blktap *);
217 int blktap_device_init(int *);
218 void blktap_device_free(void);
219 int blktap_device_create(struct blktap *);
220 int blktap_device_destroy(struct blktap *);
221 int blktap_device_pause(struct blktap *);
222 int blktap_device_resume(struct blktap *);
223 void blktap_device_restart(struct blktap *);
224 void blktap_device_finish_request(struct blktap *,
225 blkif_response_t *,
226 struct blktap_request *);
227 void blktap_device_fail_pending_requests(struct blktap *);
228 #ifdef ENABLE_PASSTHROUGH
229 int blktap_device_enable_passthrough(struct blktap *,
230 unsigned, unsigned);
231 #endif
233 void blktap_defer(struct blktap *);
234 void blktap_run_deferred(void);
236 int blktap_request_pool_init(void);
237 void blktap_request_pool_free(void);
238 int blktap_request_pool_grow(void);
239 int blktap_request_pool_shrink(void);
240 struct blktap_request *blktap_request_allocate(struct blktap *);
241 void blktap_request_free(struct blktap *, struct blktap_request *);
242 unsigned long request_to_kaddr(struct blktap_request *, int);
244 #endif