ia64/xen-unstable

view linux-2.6.7-xen-sparse/drivers/xen/block/block.c @ 1820:3d4f8eb89670

bitkeeper revision 1.1106.1.2 (40faa780dekT3E5arFwcbQDu1MbX6g)

Cleaned up Xen's instruction emulator.
author kaf24@scramble.cl.cam.ac.uk
date Sun Jul 18 16:38:24 2004 +0000 (2004-07-18)
parents 002fc84add90
children
line source
1 /******************************************************************************
2 * block.c
3 *
4 * XenLinux virtual block-device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 */
11 #include "block.h"
12 #include <linux/cdrom.h>
13 #include <linux/sched.h>
14 #include <linux/interrupt.h>
15 #include <scsi/scsi.h>
16 #include <asm-xen/ctrl_if.h>
18 typedef unsigned char byte; /* from linux/ide.h */
20 #define BLKIF_STATE_CLOSED 0
21 #define BLKIF_STATE_DISCONNECTED 1
22 #define BLKIF_STATE_CONNECTED 2
23 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
24 static unsigned int blkif_evtchn, blkif_irq;
26 static int blkif_control_rsp_valid;
27 static blkif_response_t blkif_control_rsp;
29 static blkif_ring_t *blk_ring;
30 static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
31 static BLKIF_RING_IDX req_prod; /* Private request producer. */
33 static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
34 * recovery. Responses not stored here. */
35 static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
36 * recovery */
37 static int recovery = 0; /* "Recovery in progress" flag. Protected
38 * by the blkif_io_lock */
40 /* We plug the I/O ring if the driver is suspended or if the ring is full. */
41 #define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
42 (blkif_state != BLKIF_STATE_CONNECTED))
44 /*
45 * Request queues with outstanding work, but ring is currently full.
46 * We need no special lock here, as we always access this with the
47 * blkif_io_lock held. We only need a small maximum list.
48 */
49 #define MAX_PENDING 8
50 static request_queue_t *pending_queues[MAX_PENDING];
51 static int nr_pending;
53 static inline void flush_requests(void)
54 {
56 blk_ring->req_prod = req_prod;
58 notify_via_evtchn(blkif_evtchn);
59 }
62 #if 0
63 /*
64 * blkif_update_int/update-vbds_task - handle VBD update events.
65 * Schedule a task for keventd to run, which will update the VBDs and perform
66 * the corresponding updates to our view of VBD state.
67 */
68 static struct tq_struct update_tq;
69 static void update_vbds_task(void *unused)
70 {
71 xlvbd_update_vbds();
72 }
73 #endif
76 int blkif_open(struct inode *inode, struct file *filep)
77 {
78 struct gendisk *gd = inode->i_bdev->bd_disk;
79 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
81 /* Update of usage count is protected by per-device semaphore. */
82 di->mi->usage++;
84 return 0;
85 }
88 int blkif_release(struct inode *inode, struct file *filep)
89 {
90 struct gendisk *gd = inode->i_bdev->bd_disk;
91 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
93 /*
94 * When usage drops to zero it may allow more VBD updates to occur.
95 * Update of usage count is protected by a per-device semaphore.
96 */
97 if (--di->mi->usage == 0) {
98 #if 0
99 update_tq.routine = update_vbds_task;
100 schedule_task(&update_tq);
101 #endif
102 }
104 return 0;
105 }
108 int blkif_ioctl(struct inode *inode, struct file *filep,
109 unsigned command, unsigned long argument)
110 {
111 /* struct gendisk *gd = inode->i_bdev->bd_disk; */
113 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
114 command, (long)argument, inode->i_rdev);
116 switch (command) {
118 case HDIO_GETGEO:
119 /* return ENOSYS to use defaults */
120 return -ENOSYS;
122 default:
123 printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
124 command);
125 return -ENOSYS;
126 }
128 return 0;
129 }
131 #if 0
132 /* check media change: should probably do something here in some cases :-) */
133 int blkif_check(kdev_t dev)
134 {
135 DPRINTK("blkif_check\n");
136 return 0;
137 }
139 int blkif_revalidate(kdev_t dev)
140 {
141 struct block_device *bd;
142 struct gendisk *gd;
143 xen_block_t *disk;
144 unsigned long capacity;
145 int i, rc = 0;
147 if ( (bd = bdget(dev)) == NULL )
148 return -EINVAL;
150 /*
151 * Update of partition info, and check of usage count, is protected
152 * by the per-block-device semaphore.
153 */
154 down(&bd->bd_sem);
156 if ( ((gd = get_gendisk(dev)) == NULL) ||
157 ((disk = xldev_to_xldisk(dev)) == NULL) ||
158 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
159 {
160 rc = -EINVAL;
161 goto out;
162 }
164 if ( disk->usage > 1 )
165 {
166 rc = -EBUSY;
167 goto out;
168 }
170 /* Only reread partition table if VBDs aren't mapped to partitions. */
171 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
172 {
173 for ( i = gd->max_p - 1; i >= 0; i-- )
174 {
175 invalidate_device(dev+i, 1);
176 gd->part[MINOR(dev+i)].start_sect = 0;
177 gd->part[MINOR(dev+i)].nr_sects = 0;
178 gd->sizes[MINOR(dev+i)] = 0;
179 }
181 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
182 }
184 out:
185 up(&bd->bd_sem);
186 bdput(bd);
187 return rc;
188 }
189 #endif
192 /*
193 * blkif_queue_request
194 *
195 * request block io
196 *
197 * id: for guest use only.
198 * operation: BLKIF_OP_{READ,WRITE,PROBE}
199 * buffer: buffer to read/write into. this should be a
200 * virtual address in the guest os.
201 */
202 static int blkif_queue_request(struct request *req)
203 {
204 struct xlbd_disk_info *di =
205 (struct xlbd_disk_info *)req->rq_disk->private_data;
206 unsigned long buffer_ma;
207 blkif_request_t *ring_req;
208 struct bio *bio;
209 struct bio_vec *bvec;
210 int idx, s;
211 unsigned int fsect, lsect;
213 if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
214 return 1;
216 /* Fill out a communications ring structure. */
217 ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
218 ring_req->id = (unsigned long)req;
219 ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
220 BLKIF_OP_READ;
221 ring_req->sector_number = (blkif_sector_t)req->sector;
222 ring_req->device = di->xd_device;
224 s = 0;
225 ring_req->nr_segments = 0;
226 rq_for_each_bio(bio, req) {
227 bio_for_each_segment(bvec, bio, idx) {
228 buffer_ma =
229 phys_to_machine(page_to_phys(bvec->bv_page));
230 if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
231 BUG();
233 fsect = bvec->bv_offset >> 9;
234 lsect = fsect + (bvec->bv_len >> 9) - 1;
235 if (unlikely(lsect > 7))
236 BUG();
238 ring_req->frame_and_sects[ring_req->nr_segments++] =
239 buffer_ma | (fsect << 3) | lsect;
240 s += bvec->bv_len >> 9;
241 }
242 }
244 req_prod++;
246 /* Keep a private copy so we can reissue requests when recovering. */
247 blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req =
248 *ring_req;
249 blk_ring_rec->req_prod++;
251 return 0;
252 }
254 /*
255 * do_blkif_request
256 * read a block; request is in a request queue
257 */
258 void do_blkif_request(request_queue_t *rq)
259 {
260 struct request *req;
261 int queued;
263 DPRINTK("Entered do_blkif_request\n");
265 queued = 0;
267 while ((req = elv_next_request(rq)) != NULL) {
268 if (!blk_fs_request(req)) {
269 end_request(req, 0);
270 continue;
271 }
273 if (BLKIF_RING_FULL) {
274 blk_stop_queue(rq);
275 break;
276 }
277 DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
278 req, req->cmd, req->sector, req->current_nr_sectors,
279 req->nr_sectors, req->buffer,
280 rq_data_dir(req) ? "write" : "read");
281 blkdev_dequeue_request(req);
282 if (blkif_queue_request(req)) {
283 blk_stop_queue(rq);
284 break;
285 }
286 queued++;
287 }
289 if (queued != 0)
290 flush_requests();
291 }
294 static void kick_pending_request_queues(void)
295 {
296 /* We kick pending request queues if the ring is reasonably empty. */
297 if ( (nr_pending != 0) &&
298 ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
299 {
300 /* Attempt to drain the queue, but bail if the ring becomes full. */
301 while ( (nr_pending != 0) && !BLKIF_RING_FULL )
302 do_blkif_request(pending_queues[--nr_pending]);
303 }
304 }
307 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
308 {
309 struct request *req;
310 blkif_response_t *bret;
311 BLKIF_RING_IDX i;
312 unsigned long flags;
314 spin_lock_irqsave(&blkif_io_lock, flags);
316 if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) {
317 printk("Bailed out\n");
319 spin_unlock_irqrestore(&blkif_io_lock, flags);
320 return IRQ_HANDLED;
321 }
323 for (i = resp_cons; i != blk_ring->resp_prod; i++) {
324 bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
325 switch (bret->operation) {
326 case BLKIF_OP_READ:
327 case BLKIF_OP_WRITE:
328 if (unlikely(bret->status != BLKIF_RSP_OKAY))
329 DPRINTK("Bad return from blkdev data request: %lx\n",
330 bret->status);
331 req = (struct request *)bret->id;
332 /* XXXcl pass up status */
333 if (unlikely(end_that_request_first(req, 1,
334 req->hard_nr_sectors)))
335 BUG();
337 end_that_request_last(req);
338 break;
339 case BLKIF_OP_PROBE:
340 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
341 blkif_control_rsp_valid = 1;
342 break;
343 default:
344 BUG();
345 }
346 }
348 resp_cons = i;
349 resp_cons_rec = i;
351 if (xlbd_blk_queue &&
352 test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) {
353 blk_start_queue(xlbd_blk_queue);
354 /* XXXcl call to request_fn should not be needed but
355 * we get stuck without... needs investigating
356 */
357 xlbd_blk_queue->request_fn(xlbd_blk_queue);
358 }
360 spin_unlock_irqrestore(&blkif_io_lock, flags);
362 return IRQ_HANDLED;
363 }
366 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
367 {
368 unsigned long flags;
370 retry:
371 while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
372 {
373 set_current_state(TASK_INTERRUPTIBLE);
374 schedule_timeout(1);
375 }
377 spin_lock_irqsave(&blkif_io_lock, flags);
378 if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
379 {
380 spin_unlock_irqrestore(&blkif_io_lock, flags);
381 goto retry;
382 }
384 memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
385 memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
386 req, sizeof(*req));
387 req_prod++;
388 flush_requests();
390 spin_unlock_irqrestore(&blkif_io_lock, flags);
392 while ( !blkif_control_rsp_valid )
393 {
394 set_current_state(TASK_INTERRUPTIBLE);
395 schedule_timeout(1);
396 }
398 memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
399 blkif_control_rsp_valid = 0;
400 }
403 static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
404 {
405 ctrl_msg_t cmsg;
406 blkif_fe_interface_connect_t up;
408 if ( status->handle != 0 )
409 {
410 printk(KERN_WARNING "Status change on unsupported blkif %d\n",
411 status->handle);
412 return;
413 }
415 switch ( status->status )
416 {
417 case BLKIF_INTERFACE_STATUS_DESTROYED:
418 printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
419 blkif_state);
420 break;
422 case BLKIF_INTERFACE_STATUS_DISCONNECTED:
423 if ( blkif_state != BLKIF_STATE_CLOSED )
424 {
425 printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
426 " in state %d\n", blkif_state);
428 printk(KERN_INFO "VBD driver recovery in progress\n");
430 /* Prevent new requests being issued until we fix things up. */
431 spin_lock_irq(&blkif_io_lock);
432 recovery = 1;
433 blkif_state = BLKIF_STATE_DISCONNECTED;
434 spin_unlock_irq(&blkif_io_lock);
436 /* Free resources associated with old device channel. */
437 free_page((unsigned long)blk_ring);
438 free_irq(blkif_irq, NULL);
439 unbind_evtchn_from_irq(blkif_evtchn);
440 }
442 /* Move from CLOSED to DISCONNECTED state. */
443 blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
444 blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
445 blkif_state = BLKIF_STATE_DISCONNECTED;
447 /* Construct an interface-CONNECT message for the domain controller. */
448 cmsg.type = CMSG_BLKIF_FE;
449 cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
450 cmsg.length = sizeof(blkif_fe_interface_connect_t);
451 up.handle = 0;
452 up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
453 memcpy(cmsg.msg, &up, sizeof(up));
455 /* Tell the controller to bring up the interface. */
456 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
457 break;
459 case BLKIF_INTERFACE_STATUS_CONNECTED:
460 if ( blkif_state == BLKIF_STATE_CLOSED )
461 {
462 printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
463 " in state %d\n", blkif_state);
464 break;
465 }
467 blkif_evtchn = status->evtchn;
468 blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
469 (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
471 if ( recovery )
472 {
473 int i;
475 /* Shouldn't need the blkif_io_lock here - the device is
476 * plugged and the recovery flag prevents the interrupt handler
477 * changing anything. */
479 /* Reissue requests from the private block ring. */
480 for ( i = 0;
481 resp_cons_rec < blk_ring_rec->req_prod;
482 resp_cons_rec++, i++ )
483 {
484 blk_ring->ring[i].req
485 = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
486 }
488 /* Reset the private block ring to match the new ring. */
489 memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring));
490 resp_cons_rec = 0;
492 /* blk_ring->req_prod will be set when we flush_requests().*/
493 blk_ring_rec->req_prod = req_prod = i;
495 wmb();
497 /* Switch off recovery mode, using a memory barrier to ensure that
498 * it's seen before we flush requests - we don't want to miss any
499 * interrupts. */
500 recovery = 0;
501 wmb();
503 /* Kicks things back into life. */
504 flush_requests();
505 }
506 else
507 {
508 /* Probe for discs that are attached to the interface. */
509 xlvbd_init();
510 }
512 blkif_state = BLKIF_STATE_CONNECTED;
514 /* Kick pending requests. */
515 spin_lock_irq(&blkif_io_lock);
516 kick_pending_request_queues();
517 spin_unlock_irq(&blkif_io_lock);
519 break;
521 default:
522 printk(KERN_WARNING "Status change to unknown value %d\n",
523 status->status);
524 break;
525 }
526 }
529 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
530 {
531 switch ( msg->subtype )
532 {
533 case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
534 if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
535 goto parse_error;
536 blkif_status_change((blkif_fe_interface_status_changed_t *)
537 &msg->msg[0]);
538 break;
539 #if 0
540 case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
541 update_tq.routine = update_vbds_task;
542 schedule_task(&update_tq);
543 break;
544 #endif
545 default:
546 goto parse_error;
547 }
549 ctrl_if_send_response(msg);
550 return;
552 parse_error:
553 msg->length = 0;
554 ctrl_if_send_response(msg);
555 }
558 int __init xlblk_init(void)
559 {
560 ctrl_msg_t cmsg;
561 blkif_fe_driver_status_changed_t st;
563 if ( (start_info.flags & SIF_INITDOMAIN)
564 || (start_info.flags & SIF_BLK_BE_DOMAIN) )
565 return 0;
567 printk(KERN_INFO "Initialising Xen virtual block device\n");
569 blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
570 memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
572 (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
573 CALLBACK_IN_BLOCKING_CONTEXT);
575 /* Send a driver-UP notification to the domain controller. */
576 cmsg.type = CMSG_BLKIF_FE;
577 cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
578 cmsg.length = sizeof(blkif_fe_driver_status_changed_t);
579 st.status = BLKIF_DRIVER_STATUS_UP;
580 memcpy(cmsg.msg, &st, sizeof(st));
581 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
583 /*
584 * We should read 'nr_interfaces' from response message and wait
585 * for notifications before proceeding. For now we assume that we
586 * will be notified of exactly one interface.
587 */
588 while ( blkif_state != BLKIF_STATE_CONNECTED )
589 {
590 set_current_state(TASK_INTERRUPTIBLE);
591 schedule_timeout(1);
592 }
594 return 0;
595 #if 0
596 int error;
598 reset_xlblk_interface();
600 xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
601 xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
603 error = request_irq(xlblk_response_irq, xlblk_response_int,
604 SA_SAMPLE_RANDOM, "blkdev", NULL);
605 if (error) {
606 printk(KERN_ALERT "Could not allocate receive interrupt\n");
607 goto fail;
608 }
610 error = request_irq(xlblk_update_irq, xlblk_update_int,
611 0, "blkdev", NULL);
612 if (error) {
613 printk(KERN_ALERT
614 "Could not allocate block update interrupt\n");
615 goto fail;
616 }
618 (void)xlvbd_init();
620 return 0;
622 fail:
623 return error;
624 #endif
625 }
628 static void __exit xlblk_cleanup(void)
629 {
630 /* XXX FIXME */
631 BUG();
632 #if 0
633 /* xlvbd_cleanup(); */
634 free_irq(xlblk_response_irq, NULL);
635 free_irq(xlblk_update_irq, NULL);
636 unbind_virq_from_irq(VIRQ_BLKDEV);
637 unbind_virq_from_irq(VIRQ_VBD_UPD);
638 #endif
639 }
642 module_init(xlblk_init);
643 module_exit(xlblk_cleanup);
646 void blkdev_suspend(void)
647 {
648 }
651 void blkdev_resume(void)
652 {
653 }