ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c @ 7514:5a4893a537ca

Minor driver code changes/rearrangement to reduce ia64-specific patches
Signed-off by: Dan Magenheimer <dan.magenheimer@hp.com>
author djm@kirby.fc.hp.com
date Fri Nov 04 10:40:29 2005 -0600 (2005-11-04)
parents 895149d24048
children b6cce4237ded
line source
1 /******************************************************************************
2 * blkfront.c
3 *
4 * XenLinux virtual block-device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 *
12 * This file may be distributed separately from the Linux kernel, or
13 * incorporated into other software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
34 #if 1
35 #define ASSERT(p) \
36 if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \
37 __LINE__, __FILE__); *(int*)0=0; }
38 #else
39 #define ASSERT(_p)
40 #endif
42 #include <linux/version.h>
43 #include "block.h"
44 #include <linux/cdrom.h>
45 #include <linux/sched.h>
46 #include <linux/interrupt.h>
47 #include <scsi/scsi.h>
48 #include <asm-xen/evtchn.h>
49 #include <asm-xen/xenbus.h>
50 #include <asm-xen/xen-public/grant_table.h>
51 #include <asm-xen/gnttab.h>
52 #include <asm/hypervisor.h>
54 #define BLKIF_STATE_DISCONNECTED 0
55 #define BLKIF_STATE_CONNECTED 1
57 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
58 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
59 #define GRANT_INVALID_REF 0
61 static void kick_pending_request_queues(struct blkfront_info *info);
63 static void blkif_completion(struct blk_shadow *s);
65 static inline int GET_ID_FROM_FREELIST(
66 struct blkfront_info *info)
67 {
68 unsigned long free = info->shadow_free;
69 BUG_ON(free > BLK_RING_SIZE);
70 info->shadow_free = info->shadow[free].req.id;
71 info->shadow[free].req.id = 0x0fffffee; /* debug */
72 return free;
73 }
75 static inline void ADD_ID_TO_FREELIST(
76 struct blkfront_info *info, unsigned long id)
77 {
78 info->shadow[id].req.id = info->shadow_free;
79 info->shadow[id].request = 0;
80 info->shadow_free = id;
81 }
83 static inline void flush_requests(struct blkfront_info *info)
84 {
85 RING_PUSH_REQUESTS(&info->ring);
86 notify_remote_via_irq(info->irq);
87 }
89 static void kick_pending_request_queues(struct blkfront_info *info)
90 {
91 if (!RING_FULL(&info->ring)) {
92 /* Re-enable calldowns. */
93 blk_start_queue(info->rq);
94 /* Kick things off immediately. */
95 do_blkif_request(info->rq);
96 }
97 }
99 static void blkif_restart_queue(void *arg)
100 {
101 struct blkfront_info *info = (struct blkfront_info *)arg;
102 spin_lock_irq(&blkif_io_lock);
103 kick_pending_request_queues(info);
104 spin_unlock_irq(&blkif_io_lock);
105 }
107 static void blkif_restart_queue_callback(void *arg)
108 {
109 struct blkfront_info *info = (struct blkfront_info *)arg;
110 schedule_work(&info->work);
111 }
113 int blkif_open(struct inode *inode, struct file *filep)
114 {
115 return 0;
116 }
119 int blkif_release(struct inode *inode, struct file *filep)
120 {
121 return 0;
122 }
125 int blkif_ioctl(struct inode *inode, struct file *filep,
126 unsigned command, unsigned long argument)
127 {
128 int i;
130 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
131 command, (long)argument, inode->i_rdev);
133 switch ( command )
134 {
135 case HDIO_GETGEO:
136 /* return ENOSYS to use defaults */
137 return -ENOSYS;
139 case CDROMMULTISESSION:
140 DPRINTK("FIXME: support multisession CDs later\n");
141 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
142 if (put_user(0, (char *)(argument + i)))
143 return -EFAULT;
144 return 0;
146 default:
147 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
148 command);*/
149 return -EINVAL; /* same return as native Linux */
150 }
152 return 0;
153 }
156 /*
157 * blkif_queue_request
158 *
159 * request block io
160 *
161 * id: for guest use only.
162 * operation: BLKIF_OP_{READ,WRITE,PROBE}
163 * buffer: buffer to read/write into. this should be a
164 * virtual address in the guest os.
165 */
166 static int blkif_queue_request(struct request *req)
167 {
168 struct blkfront_info *info = req->rq_disk->private_data;
169 unsigned long buffer_mfn;
170 blkif_request_t *ring_req;
171 struct bio *bio;
172 struct bio_vec *bvec;
173 int idx;
174 unsigned long id;
175 unsigned int fsect, lsect;
176 int ref;
177 grant_ref_t gref_head;
179 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
180 return 1;
182 if (gnttab_alloc_grant_references(
183 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
184 gnttab_request_free_callback(
185 &info->callback,
186 blkif_restart_queue_callback,
187 info,
188 BLKIF_MAX_SEGMENTS_PER_REQUEST);
189 return 1;
190 }
192 /* Fill out a communications ring structure. */
193 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
194 id = GET_ID_FROM_FREELIST(info);
195 info->shadow[id].request = (unsigned long)req;
197 ring_req->id = id;
198 ring_req->operation = rq_data_dir(req) ?
199 BLKIF_OP_WRITE : BLKIF_OP_READ;
200 ring_req->sector_number = (blkif_sector_t)req->sector;
201 ring_req->handle = info->handle;
203 ring_req->nr_segments = 0;
204 rq_for_each_bio (bio, req) {
205 bio_for_each_segment (bvec, bio, idx) {
206 BUG_ON(ring_req->nr_segments
207 == BLKIF_MAX_SEGMENTS_PER_REQUEST);
208 buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
209 fsect = bvec->bv_offset >> 9;
210 lsect = fsect + (bvec->bv_len >> 9) - 1;
211 /* install a grant reference. */
212 ref = gnttab_claim_grant_reference(&gref_head);
213 ASSERT(ref != -ENOSPC);
215 gnttab_grant_foreign_access_ref(
216 ref,
217 info->backend_id,
218 buffer_mfn,
219 rq_data_dir(req) );
221 info->shadow[id].frame[ring_req->nr_segments] =
222 mfn_to_pfn(buffer_mfn);
224 ring_req->frame_and_sects[ring_req->nr_segments] =
225 blkif_fas_from_gref(ref, fsect, lsect);
227 ring_req->nr_segments++;
228 }
229 }
231 info->ring.req_prod_pvt++;
233 /* Keep a private copy so we can reissue requests when recovering. */
234 info->shadow[id].req = *ring_req;
236 gnttab_free_grant_references(gref_head);
238 return 0;
239 }
241 /*
242 * do_blkif_request
243 * read a block; request is in a request queue
244 */
245 void do_blkif_request(request_queue_t *rq)
246 {
247 struct blkfront_info *info = NULL;
248 struct request *req;
249 int queued;
251 DPRINTK("Entered do_blkif_request\n");
253 queued = 0;
255 while ((req = elv_next_request(rq)) != NULL) {
256 info = req->rq_disk->private_data;
258 if (!blk_fs_request(req)) {
259 end_request(req, 0);
260 continue;
261 }
263 if (RING_FULL(&info->ring))
264 goto wait;
266 DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
267 "(%u/%li) buffer:%p [%s]\n",
268 req, req->cmd, req->sector, req->current_nr_sectors,
269 req->nr_sectors, req->buffer,
270 rq_data_dir(req) ? "write" : "read");
272 blkdev_dequeue_request(req);
273 if (blkif_queue_request(req)) {
274 blk_requeue_request(rq, req);
275 wait:
276 /* Avoid pointless unplugs. */
277 blk_stop_queue(rq);
278 break;
279 }
281 queued++;
282 }
284 if (queued != 0)
285 flush_requests(info);
286 }
289 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
290 {
291 struct request *req;
292 blkif_response_t *bret;
293 RING_IDX i, rp;
294 unsigned long flags;
295 struct blkfront_info *info = (struct blkfront_info *)dev_id;
297 spin_lock_irqsave(&blkif_io_lock, flags);
299 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
300 spin_unlock_irqrestore(&blkif_io_lock, flags);
301 return IRQ_HANDLED;
302 }
304 rp = info->ring.sring->rsp_prod;
305 rmb(); /* Ensure we see queued responses up to 'rp'. */
307 for (i = info->ring.rsp_cons; i != rp; i++) {
308 unsigned long id;
310 bret = RING_GET_RESPONSE(&info->ring, i);
311 id = bret->id;
312 req = (struct request *)info->shadow[id].request;
314 blkif_completion(&info->shadow[id]);
316 ADD_ID_TO_FREELIST(info, id);
318 switch (bret->operation) {
319 case BLKIF_OP_READ:
320 case BLKIF_OP_WRITE:
321 if (unlikely(bret->status != BLKIF_RSP_OKAY))
322 DPRINTK("Bad return from blkdev data "
323 "request: %x\n", bret->status);
325 BUG_ON(end_that_request_first(
326 req, (bret->status == BLKIF_RSP_OKAY),
327 req->hard_nr_sectors));
328 end_that_request_last(req);
329 break;
330 default:
331 BUG();
332 }
333 }
335 info->ring.rsp_cons = i;
337 kick_pending_request_queues(info);
339 spin_unlock_irqrestore(&blkif_io_lock, flags);
341 return IRQ_HANDLED;
342 }
344 static void blkif_free(struct blkfront_info *info)
345 {
346 /* Prevent new requests being issued until we fix things up. */
347 spin_lock_irq(&blkif_io_lock);
348 info->connected = BLKIF_STATE_DISCONNECTED;
349 spin_unlock_irq(&blkif_io_lock);
351 /* Free resources associated with old device channel. */
352 if (info->ring.sring != NULL) {
353 free_page((unsigned long)info->ring.sring);
354 info->ring.sring = NULL;
355 }
356 if (info->ring_ref != GRANT_INVALID_REF)
357 gnttab_end_foreign_access(info->ring_ref, 0);
358 info->ring_ref = GRANT_INVALID_REF;
359 if (info->irq)
360 unbind_evtchn_from_irqhandler(info->irq, info);
361 info->evtchn = info->irq = 0;
362 }
364 static void blkif_recover(struct blkfront_info *info)
365 {
366 int i;
367 blkif_request_t *req;
368 struct blk_shadow *copy;
369 int j;
371 /* Stage 1: Make a safe copy of the shadow state. */
372 copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL);
373 BUG_ON(copy == NULL);
374 memcpy(copy, info->shadow, sizeof(info->shadow));
376 /* Stage 2: Set up free list. */
377 memset(&info->shadow, 0, sizeof(info->shadow));
378 for (i = 0; i < BLK_RING_SIZE; i++)
379 info->shadow[i].req.id = i+1;
380 info->shadow_free = info->ring.req_prod_pvt;
381 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
383 /* Stage 3: Find pending requests and requeue them. */
384 for (i = 0; i < BLK_RING_SIZE; i++) {
385 /* Not in use? */
386 if (copy[i].request == 0)
387 continue;
389 /* Grab a request slot and copy shadow state into it. */
390 req = RING_GET_REQUEST(
391 &info->ring, info->ring.req_prod_pvt);
392 *req = copy[i].req;
394 /* We get a new request id, and must reset the shadow state. */
395 req->id = GET_ID_FROM_FREELIST(info);
396 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
398 /* Rewrite any grant references invalidated by susp/resume. */
399 for (j = 0; j < req->nr_segments; j++)
400 gnttab_grant_foreign_access_ref(
401 blkif_gref_from_fas(req->frame_and_sects[j]),
402 info->backend_id,
403 pfn_to_mfn(info->shadow[req->id].frame[j]),
404 rq_data_dir(
405 (struct request *)
406 info->shadow[req->id].request));
407 info->shadow[req->id].req = *req;
409 info->ring.req_prod_pvt++;
410 }
412 kfree(copy);
414 /* info->ring->req_prod will be set when we flush_requests().*/
415 wmb();
417 /* Kicks things back into life. */
418 flush_requests(info);
420 /* Now safe to let other people use the interface. */
421 info->connected = BLKIF_STATE_CONNECTED;
422 }
424 static void blkif_connect(struct blkfront_info *info, u16 evtchn)
425 {
426 int err = 0;
428 info->evtchn = evtchn;
430 err = bind_evtchn_to_irqhandler(
431 info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
432 if (err <= 0) {
433 WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
434 return;
435 }
437 info->irq = err;
438 }
441 static struct xenbus_device_id blkfront_ids[] = {
442 { "vbd" },
443 { "" }
444 };
446 static void watch_for_status(struct xenbus_watch *watch,
447 const char **vec, unsigned int len)
448 {
449 struct blkfront_info *info;
450 unsigned int binfo;
451 unsigned long sectors, sector_size;
452 int err;
453 const char *node;
455 node = vec[XS_WATCH_PATH];
457 info = container_of(watch, struct blkfront_info, watch);
458 node += strlen(watch->node);
460 /* FIXME: clean up when error on the other end. */
461 if (info->connected == BLKIF_STATE_CONNECTED)
462 return;
464 err = xenbus_gather(NULL, watch->node,
465 "sectors", "%lu", &sectors,
466 "info", "%u", &binfo,
467 "sector-size", "%lu", &sector_size,
468 NULL);
469 if (err) {
470 xenbus_dev_error(info->xbdev, err,
471 "reading backend fields at %s", watch->node);
472 return;
473 }
475 info->connected = BLKIF_STATE_CONNECTED;
476 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
478 xenbus_dev_ok(info->xbdev);
480 /* Kick pending requests. */
481 spin_lock_irq(&blkif_io_lock);
482 kick_pending_request_queues(info);
483 spin_unlock_irq(&blkif_io_lock);
484 }
486 static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
487 {
488 blkif_sring_t *sring;
489 int err;
490 evtchn_op_t op = {
491 .cmd = EVTCHNOP_alloc_unbound,
492 .u.alloc_unbound.dom = DOMID_SELF,
493 .u.alloc_unbound.remote_dom = info->backend_id };
495 info->ring_ref = GRANT_INVALID_REF;
497 sring = (void *)__get_free_page(GFP_KERNEL);
498 if (!sring) {
499 xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
500 return -ENOMEM;
501 }
502 SHARED_RING_INIT(sring);
503 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
505 err = gnttab_grant_foreign_access(info->backend_id,
506 virt_to_mfn(info->ring.sring), 0);
507 if (err == -ENOSPC) {
508 free_page((unsigned long)info->ring.sring);
509 info->ring.sring = 0;
510 xenbus_dev_error(dev, err, "granting access to ring page");
511 return err;
512 }
513 info->ring_ref = err;
515 err = HYPERVISOR_event_channel_op(&op);
516 if (err) {
517 gnttab_end_foreign_access(info->ring_ref, 0);
518 info->ring_ref = GRANT_INVALID_REF;
519 free_page((unsigned long)info->ring.sring);
520 info->ring.sring = 0;
521 xenbus_dev_error(dev, err, "allocating event channel");
522 return err;
523 }
525 blkif_connect(info, op.u.alloc_unbound.port);
527 return 0;
528 }
530 /* Common code used when first setting up, and when resuming. */
531 static int talk_to_backend(struct xenbus_device *dev,
532 struct blkfront_info *info)
533 {
534 char *backend;
535 const char *message;
536 struct xenbus_transaction *xbt;
537 int err;
539 backend = NULL;
540 err = xenbus_gather(NULL, dev->nodename,
541 "backend-id", "%i", &info->backend_id,
542 "backend", NULL, &backend,
543 NULL);
544 if (XENBUS_EXIST_ERR(err))
545 goto out;
546 if (backend && strlen(backend) == 0) {
547 err = -ENOENT;
548 goto out;
549 }
550 if (err < 0) {
551 xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
552 dev->nodename);
553 goto out;
554 }
556 /* Create shared ring, alloc event channel. */
557 err = setup_blkring(dev, info);
558 if (err) {
559 xenbus_dev_error(dev, err, "setting up block ring");
560 goto out;
561 }
563 again:
564 xbt = xenbus_transaction_start();
565 if (IS_ERR(xbt)) {
566 xenbus_dev_error(dev, err, "starting transaction");
567 goto destroy_blkring;
568 }
570 err = xenbus_printf(xbt, dev->nodename,
571 "ring-ref","%u", info->ring_ref);
572 if (err) {
573 message = "writing ring-ref";
574 goto abort_transaction;
575 }
576 err = xenbus_printf(xbt, dev->nodename,
577 "event-channel", "%u", info->evtchn);
578 if (err) {
579 message = "writing event-channel";
580 goto abort_transaction;
581 }
583 err = xenbus_transaction_end(xbt, 0);
584 if (err) {
585 if (err == -EAGAIN)
586 goto again;
587 xenbus_dev_error(dev, err, "completing transaction");
588 goto destroy_blkring;
589 }
591 info->watch.node = backend;
592 info->watch.callback = watch_for_status;
593 err = register_xenbus_watch(&info->watch);
594 if (err) {
595 message = "registering watch on backend";
596 goto destroy_blkring;
597 }
599 info->backend = backend;
601 return 0;
603 abort_transaction:
604 xenbus_transaction_end(xbt, 1);
605 xenbus_dev_error(dev, err, "%s", message);
606 destroy_blkring:
607 blkif_free(info);
608 out:
609 if (backend)
610 kfree(backend);
611 return err;
612 }
614 /* Setup supplies the backend dir, virtual device.
616 We place an event channel and shared frame entries.
617 We watch backend to wait if it's ok. */
618 static int blkfront_probe(struct xenbus_device *dev,
619 const struct xenbus_device_id *id)
620 {
621 int err, vdevice, i;
622 struct blkfront_info *info;
624 /* FIXME: Use dynamic device id if this is not set. */
625 err = xenbus_scanf(NULL, dev->nodename,
626 "virtual-device", "%i", &vdevice);
627 if (XENBUS_EXIST_ERR(err))
628 return err;
629 if (err < 0) {
630 xenbus_dev_error(dev, err, "reading virtual-device");
631 return err;
632 }
634 info = kmalloc(sizeof(*info), GFP_KERNEL);
635 if (!info) {
636 xenbus_dev_error(dev, err, "allocating info structure");
637 return err;
638 }
639 info->xbdev = dev;
640 info->vdevice = vdevice;
641 info->connected = BLKIF_STATE_DISCONNECTED;
642 info->mi = NULL;
643 INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
645 info->shadow_free = 0;
646 memset(info->shadow, 0, sizeof(info->shadow));
647 for (i = 0; i < BLK_RING_SIZE; i++)
648 info->shadow[i].req.id = i+1;
649 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
651 /* Front end dir is a number, which is used as the id. */
652 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
653 dev->data = info;
655 err = talk_to_backend(dev, info);
656 if (err) {
657 kfree(info);
658 dev->data = NULL;
659 return err;
660 }
662 {
663 unsigned int len = max(XS_WATCH_PATH, XS_WATCH_TOKEN) + 1;
664 const char *vec[len];
666 vec[XS_WATCH_PATH] = info->watch.node;
667 vec[XS_WATCH_TOKEN] = NULL;
669 /* Call once in case entries already there. */
670 watch_for_status(&info->watch, vec, len);
671 }
673 return 0;
674 }
676 static int blkfront_remove(struct xenbus_device *dev)
677 {
678 struct blkfront_info *info = dev->data;
680 if (info->backend)
681 unregister_xenbus_watch(&info->watch);
683 if (info->mi)
684 xlvbd_del(info);
686 blkif_free(info);
688 kfree(info->backend);
689 kfree(info);
691 return 0;
692 }
694 static int blkfront_suspend(struct xenbus_device *dev)
695 {
696 struct blkfront_info *info = dev->data;
698 unregister_xenbus_watch(&info->watch);
699 kfree(info->backend);
700 info->backend = NULL;
702 return 0;
703 }
705 static int blkfront_resume(struct xenbus_device *dev)
706 {
707 struct blkfront_info *info = dev->data;
708 int err;
710 blkif_free(info);
712 err = talk_to_backend(dev, info);
713 if (!err)
714 blkif_recover(info);
716 return err;
717 }
719 static struct xenbus_driver blkfront = {
720 .name = "vbd",
721 .owner = THIS_MODULE,
722 .ids = blkfront_ids,
723 .probe = blkfront_probe,
724 .remove = blkfront_remove,
725 .resume = blkfront_resume,
726 .suspend = blkfront_suspend,
727 };
729 static int __init xlblk_init(void)
730 {
731 if (xen_init() < 0)
732 return -ENODEV;
734 xenbus_register_driver(&blkfront);
735 return 0;
736 }
738 module_init(xlblk_init);
740 static void blkif_completion(struct blk_shadow *s)
741 {
742 int i;
743 for (i = 0; i < s->req.nr_segments; i++)
744 gnttab_end_foreign_access(
745 blkif_gref_from_fas(s->req.frame_and_sects[i]), 0);
746 }
748 /*
749 * Local variables:
750 * c-file-style: "linux"
751 * indent-tabs-mode: t
752 * c-indent-level: 8
753 * c-basic-offset: 8
754 * tab-width: 8
755 * End:
756 */