ia64/xen-unstable

view xenolinux-2.4.22-sparse/arch/xeno/drivers/block/xl_block.c @ 926:0a901de56d7c

bitkeeper revision 1.588 (3fafd2ccYgSbWe9z2kLiH-DeviUaIA)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/local/scratch/smh22/xeno.bk
author smh22@labyrinth.cl.cam.ac.uk
date Mon Nov 10 18:02:52 2003 +0000 (2003-11-10)
parents 352a82eb57ad 4aba3a48d64f
children cfd9961afd8b 7ae6b4359a0d
line source
1 /******************************************************************************
2 * xl_block.c
3 *
4 * Xenolinux virtual block-device driver.
5 *
6 */
8 #include "xl_block.h"
9 #include <linux/blk.h>
10 #include <linux/cdrom.h>
12 typedef unsigned char byte; /* from linux/ide.h */
14 #define XLBLK_RESPONSE_IRQ _EVENT_BLKDEV
15 #define DEBUG_IRQ _EVENT_DEBUG
17 #define STATE_ACTIVE 0
18 #define STATE_SUSPENDED 1
19 #define STATE_CLOSED 2
20 static unsigned int state = STATE_SUSPENDED;
22 static blk_ring_t *blk_ring;
23 static unsigned int resp_cons; /* Response consumer for comms ring. */
24 static unsigned int req_prod; /* Private request producer. */
26 #define XDI_MAX 64
27 static xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */
29 #if 0
30 static int xlblk_control_msg_pending;
31 #endif
34 /* We plug the I/O ring if the driver is suspended or if the ring is full. */
35 #define RING_PLUGGED ((BLK_RING_INC(req_prod) == resp_cons) || \
36 (state != STATE_ACTIVE))
38 /*
39 * Request queues with outstanding work, but ring is currently full.
40 * We need no special lock here, as we always access this with the
41 * io_request_lock held. We only need a small maximum list.
42 */
43 #define MAX_PENDING 8
44 static request_queue_t *pending_queues[MAX_PENDING];
45 static int nr_pending;
47 static kdev_t sg_dev;
48 static int sg_operation = -1;
49 static unsigned long sg_next_sect;
50 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
52 static inline void signal_requests_to_xen(void)
53 {
54 block_io_op_t op;
56 DISABLE_SCATTERGATHER();
57 blk_ring->req_prod = req_prod;
59 op.cmd = BLOCK_IO_OP_SIGNAL;
60 HYPERVISOR_block_io_op(&op);
61 return;
62 }
64 static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
65 {
66 struct gendisk *gd = xldev_to_gendisk(xldev);
67 return (xl_disk_t *)gd->real_devices +
68 (MINOR(xldev) >> gd->minor_shift);
69 }
72 int xenolinux_block_open(struct inode *inode, struct file *filep)
73 {
74 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
75 disk->usage++;
76 DPRINTK("xenolinux_block_open\n");
77 return 0;
78 }
81 int xenolinux_block_release(struct inode *inode, struct file *filep)
82 {
83 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
84 disk->usage--;
85 DPRINTK("xenolinux_block_release\n");
86 return 0;
87 }
90 int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
91 unsigned command, unsigned long argument)
92 {
93 kdev_t dev = inode->i_rdev;
94 struct hd_geometry *geo = (struct hd_geometry *)argument;
95 struct gendisk *gd;
96 struct hd_struct *part;
98 /* NB. No need to check permissions. That is done for us. */
100 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
101 command, (long) argument, dev);
103 gd = xldev_to_gendisk(dev);
104 part = &gd->part[MINOR(dev)];
106 switch ( command )
107 {
108 case BLKGETSIZE:
109 DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
110 return put_user(part->nr_sects, (unsigned long *) argument);
112 case BLKRRPART: /* re-read partition table */
113 DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
114 return xenolinux_block_revalidate(dev);
116 case BLKSSZGET:
117 return hardsect_size[MAJOR(dev)][MINOR(dev)];
119 case BLKBSZGET: /* get block size */
120 DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
121 break;
123 case BLKBSZSET: /* set block size */
124 DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
125 break;
127 case BLKRASET: /* set read-ahead */
128 DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
129 break;
131 case BLKRAGET: /* get read-ahead */
132 DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
133 break;
135 case HDIO_GETGEO:
136 /* note: these values are complete garbage */
137 DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
138 if (!argument) return -EINVAL;
139 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
140 if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
141 if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
142 if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
143 return 0;
145 case HDIO_GETGEO_BIG:
146 /* note: these values are complete garbage */
147 DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
148 if (!argument) return -EINVAL;
149 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
150 if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
151 if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
152 if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
153 return 0;
155 case CDROMMULTISESSION:
156 DPRINTK("FIXME: support multisession CDs later\n");
157 memset((struct cdrom_multisession *)argument, 0,
158 sizeof(struct cdrom_multisession));
159 return 0;
161 default:
162 printk("ioctl %08x not supported by xl_block\n", command);
163 return -ENOSYS;
164 }
166 return 0;
167 }
169 /* check media change: should probably do something here in some cases :-) */
170 int xenolinux_block_check(kdev_t dev)
171 {
172 DPRINTK("xenolinux_block_check\n");
173 return 0;
174 }
176 int xenolinux_block_revalidate(kdev_t dev)
177 {
178 struct gendisk *gd = xldev_to_gendisk(dev);
179 xl_disk_t *disk = xldev_to_xldisk(dev);
180 unsigned long flags;
181 int i, disk_nr = MINOR(dev) >> gd->minor_shift;
183 DPRINTK("xenolinux_block_revalidate: %d\n", dev);
185 spin_lock_irqsave(&io_request_lock, flags);
186 if ( disk->usage > 1 )
187 {
188 spin_unlock_irqrestore(&io_request_lock, flags);
189 return -EBUSY;
190 }
191 spin_unlock_irqrestore(&io_request_lock, flags);
193 for ( i = gd->nr_real - 1; i >= 0; i-- )
194 {
195 invalidate_device(dev+i, 1);
196 gd->part[MINOR(dev+i)].start_sect = 0;
197 gd->part[MINOR(dev+i)].nr_sects = 0;
198 }
200 #if 0
201 /* VBDs can change under our feet. Check if that has happened. */
202 if ( MAJOR(dev) == XLVIRT_MAJOR )
203 {
204 xen_disk_info_t *xdi = kmalloc(sizeof(*xdi), GFP_KERNEL);
205 if ( xdi != NULL )
206 {
207 memset(xdi, 0, sizeof(*xdi));
208 xenolinux_control_msg(XEN_BLOCK_PROBE,
209 (char *)xdi, sizeof(*xdi));
210 for ( i = 0; i < xdi->count; i++ )
211 if ( IS_VIRTUAL_XENDEV(xdi->disks[i].device) &&
212 ((xdi->disks[i].device & XENDEV_IDX_MASK) == disk_nr) )
213 ((xl_disk_t *)gd->real_devices)[disk_nr].capacity =
214 xdi->disks[i].capacity;
215 kfree(xdi);
216 }
217 }
218 #endif
220 grok_partitions(gd, disk_nr, gd->nr_real, disk->capacity);
222 return 0;
223 }
226 /*
227 * hypervisor_request
228 *
229 * request block io
230 *
231 * id: for guest use only.
232 * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
233 * buffer: buffer to read/write into. this should be a
234 * virtual address in the guest os.
235 */
236 static int hypervisor_request(unsigned long id,
237 int operation,
238 char * buffer,
239 unsigned long sector_number,
240 unsigned short nr_sectors,
241 kdev_t device)
242 {
243 unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
244 struct gendisk *gd;
245 blk_ring_req_entry_t *req;
246 struct buffer_head *bh;
248 if ( nr_sectors >= (1<<9) ) BUG();
249 if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG();
251 if ( state == STATE_CLOSED )
252 return 1;
254 switch ( operation )
255 {
256 #if 0
257 case XEN_BLOCK_PROBE:
258 if ( RING_PLUGGED ) return 1;
259 sector_number = 0;
260 DISABLE_SCATTERGATHER();
261 break;
262 #endif
264 case XEN_BLOCK_READ:
265 case XEN_BLOCK_WRITE:
266 gd = xldev_to_gendisk(device);
267 sector_number += gd->part[MINOR(device)].start_sect;
268 if ( (sg_operation == operation) &&
269 (sg_dev == device) &&
270 (sg_next_sect == sector_number) )
271 {
272 req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req;
273 bh = (struct buffer_head *)id;
274 bh->b_reqnext = (struct buffer_head *)req->id;
275 req->id = id;
276 req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
277 if ( ++req->nr_segments < MAX_BLK_SEGS )
278 sg_next_sect += nr_sectors;
279 else
280 DISABLE_SCATTERGATHER();
281 return 0;
282 }
283 else if ( RING_PLUGGED )
284 {
285 return 1;
286 }
287 else
288 {
289 sg_operation = operation;
290 sg_dev = device;
291 sg_next_sect = sector_number + nr_sectors;
292 }
293 break;
295 default:
296 panic("unknown op %d\n", operation);
297 }
299 /* Fill out a communications ring structure. */
300 req = &blk_ring->ring[req_prod].req;
301 req->id = id;
302 req->operation = operation;
303 req->sector_number = sector_number;
304 req->device = device;
305 req->nr_segments = 1;
306 req->buffer_and_sects[0] = buffer_ma | nr_sectors;
307 req_prod = BLK_RING_INC(req_prod);
309 return 0;
310 }
313 /*
314 * do_xlblk_request
315 * read a block; request is in a request queue
316 */
317 void do_xlblk_request(request_queue_t *rq)
318 {
319 struct request *req;
320 struct buffer_head *bh, *next_bh;
321 int rw, nsect, full, queued = 0;
323 DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME);
325 while ( !rq->plugged && !list_empty(&rq->queue_head))
326 {
327 if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
328 goto out;
330 DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
331 req, req->cmd, req->sector,
332 req->current_nr_sectors, req->nr_sectors, req->bh);
334 rw = req->cmd;
335 if ( rw == READA ) rw = READ;
336 if ((rw != READ) && (rw != WRITE))
337 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
339 req->errors = 0;
341 bh = req->bh;
342 while ( bh != NULL )
343 {
344 next_bh = bh->b_reqnext;
345 bh->b_reqnext = NULL;
347 full = hypervisor_request(
348 (unsigned long)bh,
349 (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
350 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
352 if ( full )
353 {
354 bh->b_reqnext = next_bh;
355 pending_queues[nr_pending++] = rq;
356 if ( nr_pending >= MAX_PENDING ) BUG();
357 goto out;
358 }
360 queued++;
362 /* Dequeue the buffer head from the request. */
363 nsect = bh->b_size >> 9;
364 bh = req->bh = next_bh;
366 if ( bh != NULL )
367 {
368 /* There's another buffer head to do. Update the request. */
369 req->hard_sector += nsect;
370 req->hard_nr_sectors -= nsect;
371 req->sector = req->hard_sector;
372 req->nr_sectors = req->hard_nr_sectors;
373 req->current_nr_sectors = bh->b_size >> 9;
374 req->buffer = bh->b_data;
375 }
376 else
377 {
378 /* That was the last buffer head. Finalise the request. */
379 if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
380 blkdev_dequeue_request(req);
381 end_that_request_last(req);
382 }
383 }
384 }
386 out:
387 if ( queued != 0 ) signal_requests_to_xen();
388 }
391 static void kick_pending_request_queues(void)
392 {
393 /* We kick pending request queues if the ring is reasonably empty. */
394 if ( (nr_pending != 0) &&
395 (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) <
396 (BLK_RING_SIZE >> 1)) )
397 {
398 /* Attempt to drain the queue, but bail if the ring becomes full. */
399 while ( nr_pending != 0 )
400 {
401 do_xlblk_request(pending_queues[--nr_pending]);
402 if ( RING_PLUGGED ) break;
403 }
404 }
405 }
408 static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
409 {
410 int i;
411 unsigned long flags;
412 struct buffer_head *bh, *next_bh;
414 if ( state == STATE_CLOSED )
415 return;
417 spin_lock_irqsave(&io_request_lock, flags);
419 for ( i = resp_cons;
420 i != blk_ring->resp_prod;
421 i = BLK_RING_INC(i) )
422 {
423 blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
424 switch (bret->operation)
425 {
426 case XEN_BLOCK_READ:
427 case XEN_BLOCK_WRITE:
428 if ( bret->status )
429 DPRINTK("Bad return from blkdev data request: %lx\n",
430 bret->status);
431 for ( bh = (struct buffer_head *)bret->id;
432 bh != NULL;
433 bh = next_bh )
434 {
435 next_bh = bh->b_reqnext;
436 bh->b_reqnext = NULL;
437 bh->b_end_io(bh, !bret->status);
438 }
439 break;
441 #if 0
442 case XEN_BLOCK_PROBE:
443 xlblk_control_msg_pending = bret->status;
444 break;
445 #endif
447 default:
448 BUG();
449 }
450 }
452 resp_cons = i;
454 kick_pending_request_queues();
456 spin_unlock_irqrestore(&io_request_lock, flags);
457 }
460 #if 0
461 /* Send a synchronous message to Xen. */
462 int xenolinux_control_msg(int operation, char *buffer, int size)
463 {
464 unsigned long flags;
465 char *aligned_buf;
467 /* We copy from an aligned buffer, as interface needs sector alignment. */
468 aligned_buf = (char *)get_free_page(GFP_KERNEL);
469 if ( aligned_buf == NULL ) BUG();
470 memcpy(aligned_buf, buffer, size);
472 xlblk_control_msg_pending = 2;
473 spin_lock_irqsave(&io_request_lock, flags);
474 /* Note that size gets rounded up to a sector-sized boundary. */
475 if ( hypervisor_request(0, operation, aligned_buf, 0, (size+511)/512, 0) )
476 return -EAGAIN;
477 signal_requests_to_xen();
478 spin_unlock_irqrestore(&io_request_lock, flags);
479 while ( xlblk_control_msg_pending == 2 ) barrier();
481 memcpy(buffer, aligned_buf, size);
482 free_page((unsigned long)aligned_buf);
484 return xlblk_control_msg_pending ? -EINVAL : 0;
485 }
486 #endif
489 static void reset_xlblk_interface(void)
490 {
491 block_io_op_t op;
493 // xlblk_control_msg_pending = 0;
494 nr_pending = 0;
496 op.cmd = BLOCK_IO_OP_RESET;
497 if ( HYPERVISOR_block_io_op(&op) != 0 )
498 printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
500 op.cmd = BLOCK_IO_OP_RING_ADDRESS;
501 (void)HYPERVISOR_block_io_op(&op);
503 set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
504 blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
505 blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
507 wmb();
508 state = STATE_ACTIVE;
509 }
512 int __init xlblk_init(void)
513 {
514 int error;
515 block_io_op_t op;
517 reset_xlblk_interface();
519 error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int,
520 SA_SAMPLE_RANDOM, "blkdev", NULL);
521 if ( error )
522 {
523 printk(KERN_ALERT "Could not allocate receive interrupt\n");
524 goto fail;
525 }
527 /* Setup our [empty] disk information structure */
528 xlblk_disk_info.max = XDI_MAX;
529 xlblk_disk_info.disks = kmalloc(XDI_MAX * sizeof(xen_disk_t), GFP_KERNEL);
530 xlblk_disk_info.count = 0;
532 /* Probe for disk information. */
533 memset(&op, 0, sizeof(op));
534 op.cmd = BLOCK_IO_OP_VBD_PROBE;
535 op.u.probe_params.domain = 0;
536 memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info));
538 error = HYPERVISOR_block_io_op(&op);
540 if ( error )
541 {
542 printk(KERN_ALERT "Could not probe disks (%d)\n", error);
543 free_irq(XLBLK_RESPONSE_IRQ, NULL);
544 goto fail;
545 }
547 /* copy back the [updated] count parameter */
548 xlblk_disk_info.count = op.u.probe_params.xdi.count;
550 /* Pass the information to our virtual block device susbystem. */
551 xlvbd_init(&xlblk_disk_info);
553 return 0;
555 fail:
556 return error;
557 }
559 static void __exit xlblk_cleanup(void)
560 {
561 xlvbd_cleanup();
562 free_irq(XLBLK_RESPONSE_IRQ, NULL);
563 }
566 #ifdef MODULE
567 module_init(xlblk_init);
568 module_exit(xlblk_cleanup);
569 #endif
572 void blkdev_suspend(void)
573 {
574 state = STATE_SUSPENDED;
575 wmb();
577 while ( resp_cons != blk_ring->req_prod )
578 {
579 barrier();
580 current->state = TASK_INTERRUPTIBLE;
581 schedule_timeout(1);
582 }
584 wmb();
585 state = STATE_CLOSED;
586 wmb();
588 clear_fixmap(FIX_BLKRING_BASE);
589 }
592 void blkdev_resume(void)
593 {
594 reset_xlblk_interface();
595 spin_lock_irq(&io_request_lock);
596 kick_pending_request_queues();
597 spin_unlock_irq(&io_request_lock);
598 }