ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/sysfs.c @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children 85a4c18ad9aa
line source
1 #include <linux/types.h>
2 #include <linux/device.h>
3 #include <linux/module.h>
5 #include "blktap.h"
7 int blktap_debug_level = 1;
9 static struct class *class;
10 static DECLARE_WAIT_QUEUE_HEAD(sysfs_wq);
12 static inline void
13 blktap_sysfs_get(struct blktap *tap)
14 {
15 atomic_inc(&tap->ring.sysfs_refcnt);
16 }
18 static inline void
19 blktap_sysfs_put(struct blktap *tap)
20 {
21 if (atomic_dec_and_test(&tap->ring.sysfs_refcnt))
22 wake_up(&sysfs_wq);
23 }
25 static inline void
26 blktap_sysfs_enter(struct blktap *tap)
27 {
28 blktap_sysfs_get(tap); /* pin sysfs device */
29 mutex_lock(&tap->ring.sysfs_mutex); /* serialize sysfs operations */
30 }
32 static inline void
33 blktap_sysfs_exit(struct blktap *tap)
34 {
35 mutex_unlock(&tap->ring.sysfs_mutex);
36 blktap_sysfs_put(tap);
37 }
39 static ssize_t blktap_sysfs_pause_device(struct class_device *, const char *, size_t);
40 CLASS_DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device);
41 static ssize_t blktap_sysfs_resume_device(struct class_device *, const char *, size_t);
42 CLASS_DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device);
44 static ssize_t
45 blktap_sysfs_set_name(struct class_device *dev, const char *buf, size_t size)
46 {
47 int err;
48 struct blktap *tap = (struct blktap *)dev->class_data;
50 blktap_sysfs_enter(tap);
52 if (!tap->ring.dev ||
53 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
54 err = -ENODEV;
55 goto out;
56 }
58 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
59 err = -EPERM;
60 goto out;
61 }
63 if (size > BLKTAP2_MAX_MESSAGE_LEN) {
64 err = -ENAMETOOLONG;
65 goto out;
66 }
68 if (strnlen(buf, BLKTAP2_MAX_MESSAGE_LEN) >= BLKTAP2_MAX_MESSAGE_LEN) {
69 err = -EINVAL;
70 goto out;
71 }
73 snprintf(tap->params.name, sizeof(tap->params.name) - 1, "%s", buf);
74 err = size;
76 out:
77 blktap_sysfs_exit(tap);
78 return err;
79 }
81 static ssize_t
82 blktap_sysfs_get_name(struct class_device *dev, char *buf)
83 {
84 ssize_t size;
85 struct blktap *tap = (struct blktap *)dev->class_data;
87 blktap_sysfs_enter(tap);
89 if (!tap->ring.dev)
90 size = -ENODEV;
91 else if (tap->params.name[0])
92 size = sprintf(buf, "%s\n", tap->params.name);
93 else
94 size = sprintf(buf, "%d\n", tap->minor);
96 blktap_sysfs_exit(tap);
98 return size;
99 }
100 CLASS_DEVICE_ATTR(name, S_IRUSR | S_IWUSR,
101 blktap_sysfs_get_name, blktap_sysfs_set_name);
103 static ssize_t
104 blktap_sysfs_remove_device(struct class_device *dev,
105 const char *buf, size_t size)
106 {
107 int err;
108 struct blktap *tap = (struct blktap *)dev->class_data;
110 if (!tap->ring.dev)
111 return size;
113 if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
114 return -EBUSY;
116 err = blktap_control_destroy_device(tap);
118 return (err ? : size);
119 }
120 CLASS_DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
122 static ssize_t
123 blktap_sysfs_pause_device(struct class_device *dev,
124 const char *buf, size_t size)
125 {
126 int err;
127 struct blktap *tap = (struct blktap *)dev->class_data;
129 blktap_sysfs_enter(tap);
131 BTDBG("pausing %u:%u: dev_inuse: %lu\n",
132 MAJOR(tap->ring.devno), MINOR(tap->ring.devno), tap->dev_inuse);
134 if (!tap->ring.dev ||
135 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
136 err = -ENODEV;
137 goto out;
138 }
140 if (test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
141 err = -EBUSY;
142 goto out;
143 }
145 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
146 err = 0;
147 goto out;
148 }
150 err = blktap_device_pause(tap);
151 if (!err) {
152 class_device_remove_file(dev, &class_device_attr_pause);
153 class_device_create_file(dev, &class_device_attr_resume);
154 }
156 out:
157 blktap_sysfs_exit(tap);
159 return (err ? err : size);
160 }
162 static ssize_t
163 blktap_sysfs_resume_device(struct class_device *dev,
164 const char *buf, size_t size)
165 {
166 int err;
167 struct blktap *tap = (struct blktap *)dev->class_data;
169 blktap_sysfs_enter(tap);
171 if (!tap->ring.dev ||
172 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
173 err = -ENODEV;
174 goto out;
175 }
177 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
178 err = -EINVAL;
179 goto out;
180 }
182 err = blktap_device_resume(tap);
183 if (!err) {
184 class_device_remove_file(dev, &class_device_attr_resume);
185 class_device_create_file(dev, &class_device_attr_pause);
186 }
188 out:
189 blktap_sysfs_exit(tap);
191 BTDBG("returning %d\n", (err ? err : size));
192 return (err ? err : size);
193 }
195 #ifdef ENABLE_PASSTHROUGH
196 static ssize_t
197 blktap_sysfs_enable_passthrough(struct class_device *dev,
198 const char *buf, size_t size)
199 {
200 int err;
201 unsigned major, minor;
202 struct blktap *tap = (struct blktap *)dev->class_data;
204 BTINFO("passthrough request enabled\n");
206 blktap_sysfs_enter(tap);
208 if (!tap->ring.dev ||
209 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
210 err = -ENODEV;
211 goto out;
212 }
214 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
215 err = -EINVAL;
216 goto out;
217 }
219 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
220 err = -EINVAL;
221 goto out;
222 }
224 err = sscanf(buf, "%x:%x", &major, &minor);
225 if (err != 2) {
226 err = -EINVAL;
227 goto out;
228 }
230 err = blktap_device_enable_passthrough(tap, major, minor);
232 out:
233 blktap_sysfs_exit(tap);
234 BTDBG("returning %d\n", (err ? err : size));
235 return (err ? err : size);
236 }
237 #endif
239 static ssize_t
240 blktap_sysfs_debug_device(struct class_device *dev, char *buf)
241 {
242 char *tmp;
243 int i, ret;
244 struct blktap *tap = (struct blktap *)dev->class_data;
246 tmp = buf;
247 blktap_sysfs_get(tap);
249 if (!tap->ring.dev) {
250 ret = sprintf(tmp, "no device\n");
251 goto out;
252 }
254 tmp += sprintf(tmp, "%s (%u:%u), refcnt: %d, dev_inuse: 0x%08lx\n",
255 tap->params.name, MAJOR(tap->ring.devno),
256 MINOR(tap->ring.devno), atomic_read(&tap->refcnt),
257 tap->dev_inuse);
258 tmp += sprintf(tmp, "capacity: 0x%llx, sector size: 0x%lx, "
259 "device users: %d\n", tap->params.capacity,
260 tap->params.sector_size, tap->device.users);
262 down_read(&tap->tap_sem);
264 tmp += sprintf(tmp, "pending requests: %d\n", tap->pending_cnt);
265 for (i = 0; i < MAX_PENDING_REQS; i++) {
266 struct blktap_request *req = tap->pending_requests[i];
267 if (!req)
268 continue;
270 tmp += sprintf(tmp, "req %d: id: %llu, usr_idx: %d, "
271 "status: 0x%02x, pendcnt: %d, "
272 "nr_pages: %u, op: %d, time: %lu:%lu\n",
273 i, req->id, req->usr_idx,
274 req->status, atomic_read(&req->pendcnt),
275 req->nr_pages, req->operation, req->time.tv_sec,
276 req->time.tv_usec);
277 }
279 up_read(&tap->tap_sem);
280 ret = (tmp - buf) + 1;
282 out:
283 blktap_sysfs_put(tap);
284 BTDBG("%s\n", buf);
286 return ret;
287 }
288 CLASS_DEVICE_ATTR(debug, S_IRUSR, blktap_sysfs_debug_device, NULL);
290 int
291 blktap_sysfs_create(struct blktap *tap)
292 {
293 struct blktap_ring *ring;
294 struct class_device *dev;
296 if (!class)
297 return -ENODEV;
299 ring = &tap->ring;
301 dev = class_device_create(class, NULL, ring->devno,
302 NULL, "blktap%d", tap->minor);
303 if (IS_ERR(dev))
304 return PTR_ERR(dev);
306 ring->dev = dev;
307 dev->class_data = tap;
309 mutex_init(&ring->sysfs_mutex);
310 atomic_set(&ring->sysfs_refcnt, 0);
311 set_bit(BLKTAP_SYSFS, &tap->dev_inuse);
313 class_device_create_file(dev, &class_device_attr_name);
314 class_device_create_file(dev, &class_device_attr_remove);
315 class_device_create_file(dev, &class_device_attr_pause);
316 class_device_create_file(dev, &class_device_attr_debug);
318 return 0;
319 }
321 int
322 blktap_sysfs_destroy(struct blktap *tap)
323 {
324 struct blktap_ring *ring;
325 struct class_device *dev;
327 ring = &tap->ring;
328 dev = ring->dev;
329 if (!class || !dev)
330 return 0;
332 ring->dev = NULL;
333 if (wait_event_interruptible(sysfs_wq,
334 !atomic_read(&tap->ring.sysfs_refcnt)))
335 return -EAGAIN;
337 /* XXX: is it safe to remove the class from a sysfs attribute? */
338 class_device_remove_file(dev, &class_device_attr_name);
339 class_device_remove_file(dev, &class_device_attr_remove);
340 class_device_remove_file(dev, &class_device_attr_pause);
341 class_device_remove_file(dev, &class_device_attr_resume);
342 class_device_remove_file(dev, &class_device_attr_debug);
343 class_device_destroy(class, ring->devno);
345 clear_bit(BLKTAP_SYSFS, &tap->dev_inuse);
347 return 0;
348 }
350 static ssize_t
351 blktap_sysfs_show_verbosity(struct class *class, char *buf)
352 {
353 return sprintf(buf, "%d\n", blktap_debug_level);
354 }
356 static ssize_t
357 blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
358 {
359 int level;
361 if (sscanf(buf, "%d", &level) == 1) {
362 blktap_debug_level = level;
363 return size;
364 }
366 return -EINVAL;
367 }
368 CLASS_ATTR(verbosity, S_IRUSR | S_IWUSR,
369 blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
371 static ssize_t
372 blktap_sysfs_show_devices(struct class *class, char *buf)
373 {
374 int i, ret;
375 struct blktap *tap;
377 ret = 0;
378 for (i = 0; i < MAX_BLKTAP_DEVICE; i++) {
379 tap = blktaps[i];
380 if (!tap)
381 continue;
383 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
384 continue;
386 ret += sprintf(buf + ret, "%d ", tap->minor);
387 ret += snprintf(buf + ret, sizeof(tap->params.name) - 1,
388 tap->params.name);
389 ret += sprintf(buf + ret, "\n");
390 }
392 return ret;
393 }
394 CLASS_ATTR(devices, S_IRUSR, blktap_sysfs_show_devices, NULL);
396 void
397 blktap_sysfs_free(void)
398 {
399 if (!class)
400 return;
402 class_remove_file(class, &class_attr_verbosity);
403 class_remove_file(class, &class_attr_devices);
405 class_destroy(class);
406 }
408 int
409 blktap_sysfs_init(void)
410 {
411 struct class *cls;
413 if (class)
414 return -EEXIST;
416 cls = class_create(THIS_MODULE, "blktap2");
417 if (IS_ERR(cls))
418 return PTR_ERR(cls);
420 class_create_file(cls, &class_attr_verbosity);
421 class_create_file(cls, &class_attr_devices);
423 class = cls;
424 return 0;
425 }