direct-io.hg
changeset 2217:afbab8dc06bd
bitkeeper revision 1.1159.1.47 (411bf0a4rED_ZoNymijokJkGCt-MHg)
initial merge of 2.4 and 2.6 blk device frontend.
initial merge of 2.4 and 2.6 blk device frontend.
author | iap10@labyrinth.cl.cam.ac.uk |
---|---|
date | Thu Aug 12 22:35:16 2004 +0000 (2004-08-12) |
parents | 36edd9229334 |
children | 9166d5ce37f3 |
files | .rootkeys linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/main.c linux-2.4.26-xen-sparse/mkbuildtree linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c |
line diff
1.1 --- a/.rootkeys Thu Aug 12 18:24:13 2004 +0000 1.2 +++ b/.rootkeys Thu Aug 12 22:35:16 2004 +0000 1.3 @@ -54,7 +54,6 @@ 4083dc16z0jvZEH4PiVDbDRreaNp6w linux-2.4 1.4 4083dc16KQus88a4U3uCV6qVCA6_8Q linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/backend/Makefile 1.5 4075806dI5kfeMD5RV-DA0PYoThx_w linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile 1.6 4075806d4-j7vN0Mn0bklI1cRUX1vQ linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/common.h 1.7 -4075806d3fJqqDC1pYYPTZPc575iKg linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/main.c 1.8 4075806dibjCcfuXv6CINMhxWTw3jQ linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c 1.9 3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ linux-2.4.26-xen-sparse/arch/xen/drivers/console/Makefile 1.10 3e5a4e656nfFISThfbyXQOA6HN6YHw linux-2.4.26-xen-sparse/arch/xen/drivers/dom0/Makefile
2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile Thu Aug 12 18:24:13 2004 +0000 2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile Thu Aug 12 22:35:16 2004 +0000 2.3 @@ -1,3 +1,3 @@ 2.4 O_TARGET := drv.o 2.5 -obj-y := main.o vbd.o 2.6 +obj-y := blkfront.o vbd.o 2.7 include $(TOPDIR)/Rules.make
3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/drivers/blkif/frontend/main.c Thu Aug 12 18:24:13 2004 +0000 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,846 +0,0 @@ 3.4 -/****************************************************************************** 3.5 - * arch/xen/drivers/blkif/frontend/main.c 3.6 - * 3.7 - * Xenolinux virtual block-device driver. 3.8 - * 3.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 3.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 3.11 - */ 3.12 - 3.13 -#include "common.h" 3.14 -#include <linux/blk.h> 3.15 -#include <linux/cdrom.h> 3.16 -#include <linux/tqueue.h> 3.17 -#include <linux/sched.h> 3.18 -#include <linux/interrupt.h> 3.19 -#include <scsi/scsi.h> 3.20 -#include <asm/ctrl_if.h> 3.21 - 3.22 -typedef unsigned char byte; /* from linux/ide.h */ 3.23 - 3.24 -#define BLKIF_STATE_CLOSED 0 3.25 -#define BLKIF_STATE_DISCONNECTED 1 3.26 -#define BLKIF_STATE_CONNECTED 2 3.27 -static unsigned int blkif_state = BLKIF_STATE_CLOSED; 3.28 -static unsigned int blkif_evtchn, blkif_irq; 3.29 - 3.30 -static int blkif_control_rsp_valid; 3.31 -static blkif_response_t blkif_control_rsp; 3.32 - 3.33 -static blkif_ring_t *blk_ring; 3.34 -static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ 3.35 -static BLKIF_RING_IDX req_prod; /* Private request producer. */ 3.36 - 3.37 -static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for 3.38 - * recovery. Responses not stored here. */ 3.39 -static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for 3.40 - * recovery */ 3.41 -static int recovery = 0; /* "Recovery in progress" flag. Protected 3.42 - * by the io_request_lock */ 3.43 - 3.44 - 3.45 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */ 3.46 -#define RING_PLUGGED (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \ 3.47 - (blkif_state != BLKIF_STATE_CONNECTED)) 3.48 - 3.49 - 3.50 -/* 3.51 - * Request queues with outstanding work, but ring is currently full. 3.52 - * We need no special lock here, as we always access this with the 3.53 - * io_request_lock held. We only need a small maximum list. 3.54 - */ 3.55 -#define MAX_PENDING 8 3.56 -static request_queue_t *pending_queues[MAX_PENDING]; 3.57 -static int nr_pending; 3.58 - 3.59 -static kdev_t sg_dev; 3.60 -static int sg_operation = -1; 3.61 -static unsigned long sg_next_sect; 3.62 -#define DISABLE_SCATTERGATHER() (sg_operation = -1) 3.63 - 3.64 - 3.65 -static inline void translate_req_to_pfn(blkif_request_t *xreq, 3.66 - blkif_request_t *req) 3.67 -{ 3.68 - int i; 3.69 - 3.70 - *xreq = *req; 3.71 - 3.72 - for ( i = 0; i < req->nr_segments; i++ ) 3.73 - { 3.74 - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 3.75 - (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 3.76 - PAGE_SHIFT); 3.77 - } 3.78 -} 3.79 - 3.80 -static inline void translate_req_to_mfn(blkif_request_t *xreq, 3.81 - blkif_request_t *req) 3.82 -{ 3.83 - int i; 3.84 - 3.85 - *xreq = *req; 3.86 - 3.87 - for ( i = 0; i < req->nr_segments; i++ ) 3.88 - { 3.89 - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 3.90 - (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 3.91 - PAGE_SHIFT); 3.92 - } 3.93 -} 3.94 - 3.95 - 3.96 -static inline void flush_requests(void) 3.97 -{ 3.98 - DISABLE_SCATTERGATHER(); 3.99 - wmb(); /* Ensure that the frontend can see the requests. */ 3.100 - blk_ring->req_prod = req_prod; 3.101 - notify_via_evtchn(blkif_evtchn); 3.102 -} 3.103 - 3.104 - 3.105 -/* 3.106 - * blkif_update_int/update-vbds_task - handle VBD update events. 3.107 - * Schedule a task for keventd to run, which will update the VBDs and perform 3.108 - * the corresponding updates to our view of VBD state. 3.109 - */ 3.110 -static struct tq_struct update_tq; 3.111 -static void update_vbds_task(void *unused) 3.112 -{ 3.113 - xlvbd_update_vbds(); 3.114 -} 3.115 - 3.116 - 3.117 -int blkif_open(struct inode *inode, struct file *filep) 3.118 -{ 3.119 - short xldev = inode->i_rdev; 3.120 - struct gendisk *gd = get_gendisk(xldev); 3.121 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 3.122 - short minor = MINOR(xldev); 3.123 - 3.124 - if ( gd->part[minor].nr_sects == 0 ) 3.125 - { 3.126 - /* 3.127 - * Device either doesn't exist, or has zero capacity; we use a few 3.128 - * cheesy heuristics to return the relevant error code 3.129 - */ 3.130 - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 3.131 - ((minor & (gd->max_p - 1)) != 0) ) 3.132 - { 3.133 - /* 3.134 - * We have a real device, but no such partition, or we just have a 3.135 - * partition number so guess this is the problem. 3.136 - */ 3.137 - return -ENXIO; /* no such device or address */ 3.138 - } 3.139 - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 3.140 - { 3.141 - /* This is a removable device => assume that media is missing. */ 3.142 - return -ENOMEDIUM; /* media not present (this is a guess) */ 3.143 - } 3.144 - else 3.145 - { 3.146 - /* Just go for the general 'no such device' error. */ 3.147 - return -ENODEV; /* no such device */ 3.148 - } 3.149 - } 3.150 - 3.151 - /* Update of usage count is protected by per-device semaphore. */ 3.152 - disk->usage++; 3.153 - 3.154 - return 0; 3.155 -} 3.156 - 3.157 - 3.158 -int blkif_release(struct inode *inode, struct file *filep) 3.159 -{ 3.160 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 3.161 - 3.162 - /* 3.163 - * When usage drops to zero it may allow more VBD updates to occur. 3.164 - * Update of usage count is protected by a per-device semaphore. 3.165 - */ 3.166 - if ( --disk->usage == 0 ) 3.167 - { 3.168 -#if 0 3.169 - update_tq.routine = update_vbds_task; 3.170 - schedule_task(&update_tq); 3.171 -#endif 3.172 - } 3.173 - 3.174 - return 0; 3.175 -} 3.176 - 3.177 - 3.178 -int blkif_ioctl(struct inode *inode, struct file *filep, 3.179 - unsigned command, unsigned long argument) 3.180 -{ 3.181 - kdev_t dev = inode->i_rdev; 3.182 - struct hd_geometry *geo = (struct hd_geometry *)argument; 3.183 - struct gendisk *gd; 3.184 - struct hd_struct *part; 3.185 - int i; 3.186 - 3.187 - /* NB. No need to check permissions. That is done for us. */ 3.188 - 3.189 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 3.190 - command, (long) argument, dev); 3.191 - 3.192 - gd = get_gendisk(dev); 3.193 - part = &gd->part[MINOR(dev)]; 3.194 - 3.195 - switch ( command ) 3.196 - { 3.197 - case BLKGETSIZE: 3.198 - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 3.199 - return put_user(part->nr_sects, (unsigned long *) argument); 3.200 - 3.201 - case BLKGETSIZE64: 3.202 - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 3.203 - (u64)part->nr_sects * 512); 3.204 - return put_user((u64)part->nr_sects * 512, (u64 *) argument); 3.205 - 3.206 - case BLKRRPART: /* re-read partition table */ 3.207 - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 3.208 - return blkif_revalidate(dev); 3.209 - 3.210 - case BLKSSZGET: 3.211 - return hardsect_size[MAJOR(dev)][MINOR(dev)]; 3.212 - 3.213 - case BLKBSZGET: /* get block size */ 3.214 - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 3.215 - break; 3.216 - 3.217 - case BLKBSZSET: /* set block size */ 3.218 - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 3.219 - break; 3.220 - 3.221 - case BLKRASET: /* set read-ahead */ 3.222 - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 3.223 - break; 3.224 - 3.225 - case BLKRAGET: /* get read-ahead */ 3.226 - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 3.227 - break; 3.228 - 3.229 - case HDIO_GETGEO: 3.230 - /* note: these values are complete garbage */ 3.231 - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 3.232 - if (!argument) return -EINVAL; 3.233 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 3.234 - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 3.235 - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 3.236 - if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; 3.237 - return 0; 3.238 - 3.239 - case HDIO_GETGEO_BIG: 3.240 - /* note: these values are complete garbage */ 3.241 - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 3.242 - if (!argument) return -EINVAL; 3.243 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 3.244 - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 3.245 - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 3.246 - if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; 3.247 - return 0; 3.248 - 3.249 - case CDROMMULTISESSION: 3.250 - DPRINTK("FIXME: support multisession CDs later\n"); 3.251 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 3.252 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 3.253 - return 0; 3.254 - 3.255 - case SCSI_IOCTL_GET_BUS_NUMBER: 3.256 - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); 3.257 - return -ENOSYS; 3.258 - 3.259 - default: 3.260 - printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command); 3.261 - return -ENOSYS; 3.262 - } 3.263 - 3.264 - return 0; 3.265 -} 3.266 - 3.267 -/* check media change: should probably do something here in some cases :-) */ 3.268 -int blkif_check(kdev_t dev) 3.269 -{ 3.270 - DPRINTK("blkif_check\n"); 3.271 - return 0; 3.272 -} 3.273 - 3.274 -int blkif_revalidate(kdev_t dev) 3.275 -{ 3.276 - struct block_device *bd; 3.277 - struct gendisk *gd; 3.278 - xl_disk_t *disk; 3.279 - unsigned long capacity; 3.280 - int i, rc = 0; 3.281 - 3.282 - if ( (bd = bdget(dev)) == NULL ) 3.283 - return -EINVAL; 3.284 - 3.285 - /* 3.286 - * Update of partition info, and check of usage count, is protected 3.287 - * by the per-block-device semaphore. 3.288 - */ 3.289 - down(&bd->bd_sem); 3.290 - 3.291 - if ( ((gd = get_gendisk(dev)) == NULL) || 3.292 - ((disk = xldev_to_xldisk(dev)) == NULL) || 3.293 - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 3.294 - { 3.295 - rc = -EINVAL; 3.296 - goto out; 3.297 - } 3.298 - 3.299 - if ( disk->usage > 1 ) 3.300 - { 3.301 - rc = -EBUSY; 3.302 - goto out; 3.303 - } 3.304 - 3.305 - /* Only reread partition table if VBDs aren't mapped to partitions. */ 3.306 - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 3.307 - { 3.308 - for ( i = gd->max_p - 1; i >= 0; i-- ) 3.309 - { 3.310 - invalidate_device(dev+i, 1); 3.311 - gd->part[MINOR(dev+i)].start_sect = 0; 3.312 - gd->part[MINOR(dev+i)].nr_sects = 0; 3.313 - gd->sizes[MINOR(dev+i)] = 0; 3.314 - } 3.315 - 3.316 - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 3.317 - } 3.318 - 3.319 - out: 3.320 - up(&bd->bd_sem); 3.321 - bdput(bd); 3.322 - return rc; 3.323 -} 3.324 - 3.325 - 3.326 -/* 3.327 - * blkif_queue_request 3.328 - * 3.329 - * request block io 3.330 - * 3.331 - * id: for guest use only. 3.332 - * operation: BLKIF_OP_{READ,WRITE,PROBE} 3.333 - * buffer: buffer to read/write into. this should be a 3.334 - * virtual address in the guest os. 3.335 - */ 3.336 -static int blkif_queue_request(unsigned long id, 3.337 - int operation, 3.338 - char * buffer, 3.339 - unsigned long sector_number, 3.340 - unsigned short nr_sectors, 3.341 - kdev_t device) 3.342 -{ 3.343 - unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 3.344 - struct gendisk *gd; 3.345 - blkif_request_t *req; 3.346 - struct buffer_head *bh; 3.347 - unsigned int fsect, lsect; 3.348 - 3.349 - fsect = (buffer_ma & ~PAGE_MASK) >> 9; 3.350 - lsect = fsect + nr_sectors - 1; 3.351 - 3.352 - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ 3.353 - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 3.354 - BUG(); 3.355 - if ( lsect > 7 ) 3.356 - BUG(); 3.357 - 3.358 - buffer_ma &= PAGE_MASK; 3.359 - 3.360 - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) 3.361 - return 1; 3.362 - 3.363 - switch ( operation ) 3.364 - { 3.365 - 3.366 - case BLKIF_OP_READ: 3.367 - case BLKIF_OP_WRITE: 3.368 - gd = get_gendisk(device); 3.369 - 3.370 - /* 3.371 - * Update the sector_number we'll pass down as appropriate; note that 3.372 - * we could sanity check that resulting sector will be in this 3.373 - * partition, but this will happen in driver backend anyhow. 3.374 - */ 3.375 - sector_number += gd->part[MINOR(device)].start_sect; 3.376 - 3.377 - /* 3.378 - * If this unit doesn't consist of virtual partitions then we clear 3.379 - * the partn bits from the device number. 3.380 - */ 3.381 - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 3.382 - GENHD_FL_VIRT_PARTNS) ) 3.383 - device &= ~(gd->max_p - 1); 3.384 - 3.385 - if ( (sg_operation == operation) && 3.386 - (sg_dev == device) && 3.387 - (sg_next_sect == sector_number) ) 3.388 - { 3.389 - req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req; 3.390 - bh = (struct buffer_head *)id; 3.391 - bh->b_reqnext = (struct buffer_head *)req->id; 3.392 - req->id = id; 3.393 - req->frame_and_sects[req->nr_segments] = 3.394 - buffer_ma | (fsect<<3) | lsect; 3.395 - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 3.396 - sg_next_sect += nr_sectors; 3.397 - else 3.398 - DISABLE_SCATTERGATHER(); 3.399 - 3.400 - /* Update the copy of the request in the recovery ring. */ 3.401 - translate_req_to_pfn(&blk_ring_rec->ring[ 3.402 - MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req, req); 3.403 - 3.404 - return 0; 3.405 - } 3.406 - else if ( RING_PLUGGED ) 3.407 - { 3.408 - return 1; 3.409 - } 3.410 - else 3.411 - { 3.412 - sg_operation = operation; 3.413 - sg_dev = device; 3.414 - sg_next_sect = sector_number + nr_sectors; 3.415 - } 3.416 - break; 3.417 - 3.418 - default: 3.419 - panic("unknown op %d\n", operation); 3.420 - } 3.421 - 3.422 - /* Fill out a communications ring structure. */ 3.423 - req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 3.424 - req->id = id; 3.425 - req->operation = operation; 3.426 - req->sector_number = (blkif_sector_t)sector_number; 3.427 - req->device = device; 3.428 - req->nr_segments = 1; 3.429 - req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; 3.430 - req_prod++; 3.431 - 3.432 - /* Keep a private copy so we can reissue requests when recovering. */ 3.433 - translate_req_to_pfn(&blk_ring_rec->ring[ 3.434 - MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req, req); 3.435 - blk_ring_rec->req_prod++; 3.436 - 3.437 - return 0; 3.438 -} 3.439 - 3.440 - 3.441 -/* 3.442 - * do_blkif_request 3.443 - * read a block; request is in a request queue 3.444 - */ 3.445 -void do_blkif_request(request_queue_t *rq) 3.446 -{ 3.447 - struct request *req; 3.448 - struct buffer_head *bh, *next_bh; 3.449 - int rw, nsect, full, queued = 0; 3.450 - 3.451 - DPRINTK("Entered do_blkif_request\n"); 3.452 - 3.453 - while ( !rq->plugged && !list_empty(&rq->queue_head)) 3.454 - { 3.455 - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 3.456 - goto out; 3.457 - 3.458 - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 3.459 - req, req->cmd, req->sector, 3.460 - req->current_nr_sectors, req->nr_sectors, req->bh); 3.461 - 3.462 - rw = req->cmd; 3.463 - if ( rw == READA ) 3.464 - rw = READ; 3.465 - if ( unlikely((rw != READ) && (rw != WRITE)) ) 3.466 - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 3.467 - 3.468 - req->errors = 0; 3.469 - 3.470 - bh = req->bh; 3.471 - while ( bh != NULL ) 3.472 - { 3.473 - next_bh = bh->b_reqnext; 3.474 - bh->b_reqnext = NULL; 3.475 - 3.476 - full = blkif_queue_request( 3.477 - (unsigned long)bh, 3.478 - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 3.479 - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 3.480 - 3.481 - if ( full ) 3.482 - { 3.483 - bh->b_reqnext = next_bh; 3.484 - pending_queues[nr_pending++] = rq; 3.485 - if ( unlikely(nr_pending >= MAX_PENDING) ) 3.486 - BUG(); 3.487 - goto out; 3.488 - } 3.489 - 3.490 - queued++; 3.491 - 3.492 - /* Dequeue the buffer head from the request. */ 3.493 - nsect = bh->b_size >> 9; 3.494 - bh = req->bh = next_bh; 3.495 - 3.496 - if ( bh != NULL ) 3.497 - { 3.498 - /* There's another buffer head to do. Update the request. */ 3.499 - req->hard_sector += nsect; 3.500 - req->hard_nr_sectors -= nsect; 3.501 - req->sector = req->hard_sector; 3.502 - req->nr_sectors = req->hard_nr_sectors; 3.503 - req->current_nr_sectors = bh->b_size >> 9; 3.504 - req->buffer = bh->b_data; 3.505 - } 3.506 - else 3.507 - { 3.508 - /* That was the last buffer head. Finalise the request. */ 3.509 - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 3.510 - BUG(); 3.511 - blkdev_dequeue_request(req); 3.512 - end_that_request_last(req); 3.513 - } 3.514 - } 3.515 - } 3.516 - 3.517 - out: 3.518 - if ( queued != 0 ) 3.519 - flush_requests(); 3.520 -} 3.521 - 3.522 - 3.523 -static void kick_pending_request_queues(void) 3.524 -{ 3.525 - /* We kick pending request queues if the ring is reasonably empty. */ 3.526 - if ( (nr_pending != 0) && 3.527 - ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) ) 3.528 - { 3.529 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 3.530 - while ( (nr_pending != 0) && !RING_PLUGGED ) 3.531 - do_blkif_request(pending_queues[--nr_pending]); 3.532 - } 3.533 -} 3.534 - 3.535 - 3.536 -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 3.537 -{ 3.538 - BLKIF_RING_IDX i, rp; 3.539 - unsigned long flags; 3.540 - struct buffer_head *bh, *next_bh; 3.541 - 3.542 - spin_lock_irqsave(&io_request_lock, flags); 3.543 - 3.544 - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) ) 3.545 - { 3.546 - spin_unlock_irqrestore(&io_request_lock, flags); 3.547 - return; 3.548 - } 3.549 - 3.550 - rp = blk_ring->resp_prod; 3.551 - rmb(); /* Ensure we see queued responses up to 'rp'. */ 3.552 - 3.553 - for ( i = resp_cons; i != rp; i++ ) 3.554 - { 3.555 - blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 3.556 - switch ( bret->operation ) 3.557 - { 3.558 - case BLKIF_OP_READ: 3.559 - case BLKIF_OP_WRITE: 3.560 - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 3.561 - DPRINTK("Bad return from blkdev data request: %lx\n", 3.562 - bret->status); 3.563 - for ( bh = (struct buffer_head *)bret->id; 3.564 - bh != NULL; 3.565 - bh = next_bh ) 3.566 - { 3.567 - next_bh = bh->b_reqnext; 3.568 - bh->b_reqnext = NULL; 3.569 - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 3.570 - } 3.571 - break; 3.572 - case BLKIF_OP_PROBE: 3.573 - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 3.574 - blkif_control_rsp_valid = 1; 3.575 - break; 3.576 - default: 3.577 - BUG(); 3.578 - } 3.579 - } 3.580 - 3.581 - resp_cons = i; 3.582 - resp_cons_rec = i; 3.583 - 3.584 - kick_pending_request_queues(); 3.585 - 3.586 - spin_unlock_irqrestore(&io_request_lock, flags); 3.587 -} 3.588 - 3.589 - 3.590 -void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 3.591 -{ 3.592 - unsigned long flags; 3.593 - 3.594 - retry: 3.595 - while ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 3.596 - { 3.597 - set_current_state(TASK_INTERRUPTIBLE); 3.598 - schedule_timeout(1); 3.599 - } 3.600 - 3.601 - spin_lock_irqsave(&io_request_lock, flags); 3.602 - if ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 3.603 - { 3.604 - spin_unlock_irqrestore(&io_request_lock, flags); 3.605 - goto retry; 3.606 - } 3.607 - 3.608 - DISABLE_SCATTERGATHER(); 3.609 - blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req; 3.610 - 3.611 - translate_req_to_pfn(&blk_ring_rec->ring[ 3.612 - MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,req); 3.613 - 3.614 - req_prod++; 3.615 - flush_requests(); 3.616 - 3.617 - spin_unlock_irqrestore(&io_request_lock, flags); 3.618 - 3.619 - while ( !blkif_control_rsp_valid ) 3.620 - { 3.621 - set_current_state(TASK_INTERRUPTIBLE); 3.622 - schedule_timeout(1); 3.623 - } 3.624 - 3.625 - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); 3.626 - blkif_control_rsp_valid = 0; 3.627 -} 3.628 - 3.629 - 3.630 -static void blkif_status_change(blkif_fe_interface_status_changed_t *status) 3.631 -{ 3.632 - ctrl_msg_t cmsg; 3.633 - blkif_fe_interface_connect_t up; 3.634 - 3.635 - if ( status->handle != 0 ) 3.636 - { 3.637 - printk(KERN_WARNING "Status change on unsupported blkif %d\n", 3.638 - status->handle); 3.639 - return; 3.640 - } 3.641 - 3.642 - switch ( status->status ) 3.643 - { 3.644 - case BLKIF_INTERFACE_STATUS_DESTROYED: 3.645 - printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n", 3.646 - blkif_state); 3.647 - break; 3.648 - 3.649 - case BLKIF_INTERFACE_STATUS_DISCONNECTED: 3.650 - if ( blkif_state != BLKIF_STATE_CLOSED ) 3.651 - { 3.652 - printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message" 3.653 - " in state %d\n", blkif_state); 3.654 - 3.655 - printk(KERN_INFO "VBD driver recovery in progress\n"); 3.656 - 3.657 - /* Prevent new requests being issued until we fix things up. */ 3.658 - spin_lock_irq(&io_request_lock); 3.659 - recovery = 1; 3.660 - blkif_state = BLKIF_STATE_DISCONNECTED; 3.661 - spin_unlock_irq(&io_request_lock); 3.662 - 3.663 - /* Free resources associated with old device channel. */ 3.664 - free_page((unsigned long)blk_ring); 3.665 - free_irq(blkif_irq, NULL); 3.666 - unbind_evtchn_from_irq(blkif_evtchn); 3.667 - } 3.668 - 3.669 - /* Move from CLOSED to DISCONNECTED state. */ 3.670 - blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 3.671 - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; 3.672 - blkif_state = BLKIF_STATE_DISCONNECTED; 3.673 - 3.674 - /* Construct an interface-CONNECT message for the domain controller. */ 3.675 - cmsg.type = CMSG_BLKIF_FE; 3.676 - cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; 3.677 - cmsg.length = sizeof(blkif_fe_interface_connect_t); 3.678 - up.handle = 0; 3.679 - up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; 3.680 - memcpy(cmsg.msg, &up, sizeof(up)); 3.681 - 3.682 - /* Tell the controller to bring up the interface. */ 3.683 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.684 - break; 3.685 - 3.686 - case BLKIF_INTERFACE_STATUS_CONNECTED: 3.687 - if ( blkif_state == BLKIF_STATE_CLOSED ) 3.688 - { 3.689 - printk(KERN_WARNING "Unexpected blkif-CONNECTED message" 3.690 - " in state %d\n", blkif_state); 3.691 - break; 3.692 - } 3.693 - 3.694 - blkif_evtchn = status->evtchn; 3.695 - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); 3.696 - (void)request_irq(blkif_irq, blkif_int, 3.697 - SA_SAMPLE_RANDOM, "blkif", NULL); 3.698 - 3.699 - if ( recovery ) 3.700 - { 3.701 - int i,j; 3.702 - 3.703 - /* 3.704 - * Shouldn't need the io_request_lock here - the device is plugged 3.705 - * and the recovery flag prevents the interrupt handler changing 3.706 - * anything. 3.707 - */ 3.708 - 3.709 - /* Reissue requests from the private block ring. */ 3.710 - for ( i = 0; 3.711 - resp_cons_rec < blk_ring_rec->req_prod; 3.712 - resp_cons_rec++, i++ ) 3.713 - { 3.714 - translate_req_to_mfn( 3.715 - &blk_ring->ring[i].req, 3.716 - &blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req); 3.717 - } 3.718 - 3.719 - /* Reset the private block ring to match the new ring. */ 3.720 - for ( j = 0; j < i; j++ ) 3.721 - { 3.722 - translate_req_to_pfn( 3.723 - &blk_ring_rec->ring[j].req, 3.724 - &blk_ring->ring[j].req); 3.725 - } 3.726 - 3.727 - resp_cons_rec = 0; 3.728 - 3.729 - /* blk_ring->req_prod will be set when we flush_requests().*/ 3.730 - blk_ring_rec->req_prod = req_prod = i; 3.731 - wmb(); 3.732 - 3.733 - /* Switch off recovery mode, using a memory barrier to ensure that 3.734 - * it's seen before we flush requests - we don't want to miss any 3.735 - * interrupts. */ 3.736 - recovery = 0; 3.737 - wmb(); 3.738 - 3.739 - /* Kicks things back into life. */ 3.740 - flush_requests(); 3.741 - } 3.742 - else 3.743 - { 3.744 - /* Probe for discs that are attached to the interface. */ 3.745 - xlvbd_init(); 3.746 - } 3.747 - 3.748 - blkif_state = BLKIF_STATE_CONNECTED; 3.749 - 3.750 - /* Kick pending requests. */ 3.751 - spin_lock_irq(&io_request_lock); 3.752 - kick_pending_request_queues(); 3.753 - spin_unlock_irq(&io_request_lock); 3.754 - 3.755 - break; 3.756 - 3.757 - default: 3.758 - printk(KERN_WARNING "Status change to unknown value %d\n", 3.759 - status->status); 3.760 - break; 3.761 - } 3.762 -} 3.763 - 3.764 - 3.765 -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 3.766 -{ 3.767 - switch ( msg->subtype ) 3.768 - { 3.769 - case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: 3.770 - if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) ) 3.771 - goto parse_error; 3.772 - blkif_status_change((blkif_fe_interface_status_changed_t *) 3.773 - &msg->msg[0]); 3.774 - break; 3.775 -#if 0 3.776 - case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: 3.777 - update_tq.routine = update_vbds_task; 3.778 - schedule_task(&update_tq); 3.779 - break; 3.780 -#endif 3.781 - default: 3.782 - goto parse_error; 3.783 - } 3.784 - 3.785 - ctrl_if_send_response(msg); 3.786 - return; 3.787 - 3.788 - parse_error: 3.789 - msg->length = 0; 3.790 - ctrl_if_send_response(msg); 3.791 -} 3.792 - 3.793 - 3.794 -int __init xlblk_init(void) 3.795 -{ 3.796 - ctrl_msg_t cmsg; 3.797 - blkif_fe_driver_status_changed_t st; 3.798 - 3.799 - if ( (start_info.flags & SIF_INITDOMAIN) 3.800 - || (start_info.flags & SIF_BLK_BE_DOMAIN) ) 3.801 - return 0; 3.802 - 3.803 - printk(KERN_INFO "Initialising Xen virtual block device\n"); 3.804 - 3.805 - blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 3.806 - memset(blk_ring_rec, 0, sizeof(*blk_ring_rec)); 3.807 - 3.808 - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 3.809 - CALLBACK_IN_BLOCKING_CONTEXT); 3.810 - 3.811 - /* Send a driver-UP notification to the domain controller. */ 3.812 - cmsg.type = CMSG_BLKIF_FE; 3.813 - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; 3.814 - cmsg.length = sizeof(blkif_fe_driver_status_changed_t); 3.815 - st.status = BLKIF_DRIVER_STATUS_UP; 3.816 - memcpy(cmsg.msg, &st, sizeof(st)); 3.817 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.818 - 3.819 - /* 3.820 - * We should read 'nr_interfaces' from response message and wait 3.821 - * for notifications before proceeding. For now we assume that we 3.822 - * will be notified of exactly one interface. 3.823 - */ 3.824 - while ( blkif_state != BLKIF_STATE_CONNECTED ) 3.825 - { 3.826 - set_current_state(TASK_INTERRUPTIBLE); 3.827 - schedule_timeout(1); 3.828 - } 3.829 - 3.830 - return 0; 3.831 -} 3.832 - 3.833 -void blkdev_suspend(void) 3.834 -{ 3.835 -} 3.836 - 3.837 -void blkdev_resume(void) 3.838 -{ 3.839 - ctrl_msg_t cmsg; 3.840 - blkif_fe_driver_status_changed_t st; 3.841 - 3.842 - /* Send a driver-UP notification to the domain controller. */ 3.843 - cmsg.type = CMSG_BLKIF_FE; 3.844 - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; 3.845 - cmsg.length = sizeof(blkif_fe_driver_status_changed_t); 3.846 - st.status = BLKIF_DRIVER_STATUS_UP; 3.847 - memcpy(cmsg.msg, &st, sizeof(st)); 3.848 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.849 -}
4.1 --- a/linux-2.4.26-xen-sparse/mkbuildtree Thu Aug 12 18:24:13 2004 +0000 4.2 +++ b/linux-2.4.26-xen-sparse/mkbuildtree Thu Aug 12 22:35:16 2004 +0000 4.3 @@ -267,3 +267,8 @@ ln -sf ../../../../../${LINUX_26}/driver 4.4 ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/control.c 4.5 ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/interface.c 4.6 ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c 4.7 + 4.8 +cd ${AD}/arch/xen/drivers/blkif/frontend 4.9 +ln -sf ../../../../../${LINUX_26}/drivers/xen/blkfront/blkfront.c 4.10 + 4.11 +
5.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Aug 12 18:24:13 2004 +0000 5.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Aug 12 22:35:16 2004 +0000 5.3 @@ -8,7 +8,16 @@ 5.4 * Copyright (c) 2004, Christian Limpach 5.5 */ 5.6 5.7 +#include <linux/version.h> 5.8 + 5.9 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 5.10 #include "block.h" 5.11 +#else 5.12 +#include "common.h" 5.13 +#include <linux/blk.h> 5.14 +#include <linux/tqueue.h> 5.15 +#endif 5.16 + 5.17 #include <linux/cdrom.h> 5.18 #include <linux/sched.h> 5.19 #include <linux/interrupt.h> 5.20 @@ -50,56 +59,21 @@ static int recovery = 0; /* "R 5.21 static request_queue_t *pending_queues[MAX_PENDING]; 5.22 static int nr_pending; 5.23 5.24 -static inline void translate_req_to_pfn(blkif_request_t *xreq, 5.25 - blkif_request_t *req) 5.26 -{ 5.27 - int i; 5.28 - 5.29 - *xreq = *req; 5.30 +static inline void translate_req_to_mfn(blkif_request_t *xreq, 5.31 + blkif_request_t *req); 5.32 5.33 - for ( i = 0; i < req->nr_segments; i++ ) 5.34 - { 5.35 - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 5.36 - (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 5.37 - PAGE_SHIFT); 5.38 - } 5.39 -} 5.40 +static inline void translate_req_to_pfn(blkif_request_t *xreq, 5.41 + blkif_request_t *req); 5.42 5.43 -static inline void translate_req_to_mfn(blkif_request_t *xreq, 5.44 - blkif_request_t *req) 5.45 -{ 5.46 - int i; 5.47 - 5.48 - *xreq = *req; 5.49 +static inline void flush_requests(void); 5.50 5.51 - for ( i = 0; i < req->nr_segments; i++ ) 5.52 - { 5.53 - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 5.54 - (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 5.55 - PAGE_SHIFT); 5.56 - } 5.57 -} 5.58 +static void kick_pending_request_queues(void); 5.59 + 5.60 +/************************** KERNEL VERSION 2.6 **************************/ 5.61 5.62 -static inline void flush_requests(void) 5.63 -{ 5.64 - wmb(); /* Ensure that the frontend can see the requests. */ 5.65 - blk_ring->req_prod = req_prod; 5.66 - notify_via_evtchn(blkif_evtchn); 5.67 -} 5.68 - 5.69 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 5.70 5.71 -#if 0 5.72 -/* 5.73 - * blkif_update_int/update-vbds_task - handle VBD update events. 5.74 - * Schedule a task for keventd to run, which will update the VBDs and perform 5.75 - * the corresponding updates to our view of VBD state. 5.76 - */ 5.77 -static struct tq_struct update_tq; 5.78 -static void update_vbds_task(void *unused) 5.79 -{ 5.80 - xlvbd_update_vbds(); 5.81 -} 5.82 -#endif 5.83 +#define DISABLE_SCATTERGATHER() 5.84 5.85 5.86 int blkif_open(struct inode *inode, struct file *filep) 5.87 @@ -217,7 +191,6 @@ int blkif_revalidate(kdev_t dev) 5.88 } 5.89 #endif 5.90 5.91 - 5.92 /* 5.93 * blkif_queue_request 5.94 * 5.95 @@ -281,6 +254,7 @@ static int blkif_queue_request(struct re 5.96 return 0; 5.97 } 5.98 5.99 + 5.100 /* 5.101 * do_blkif_request 5.102 * read a block; request is in a request queue 5.103 @@ -321,45 +295,6 @@ void do_blkif_request(request_queue_t *r 5.104 } 5.105 5.106 5.107 -static void kick_pending_request_queues(void) 5.108 -{ 5.109 - /* We kick pending request queues if the ring is reasonably empty. */ 5.110 - if ( (nr_pending != 0) && 5.111 - ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) ) 5.112 - { 5.113 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 5.114 - while ( (nr_pending != 0) && !BLKIF_RING_FULL ) 5.115 - do_blkif_request(pending_queues[--nr_pending]); 5.116 - } 5.117 -} 5.118 - 5.119 - 5.120 -/* Upon block read completion, issue a dummy machphys update for the 5.121 -pages in the buffer, just in case we're being migrated. 5.122 -THIS CODE SHOULD BE REMOVED WHEN WE HAVE GRANT TABLES */ 5.123 - 5.124 -static void blkif_completion(blkif_response_t *bret, struct request *req) 5.125 -{ 5.126 -#if 0 5.127 - struct bio *bio; 5.128 - struct bio_vec *bvec; 5.129 - int idx; 5.130 - unsigned long mfn, pfn; 5.131 - 5.132 - if( bret->operation == BLKIF_OP_READ ) 5.133 - { 5.134 - rq_for_each_bio(bio, req) { 5.135 - bio_for_each_segment(bvec, bio, idx) { 5.136 - mfn = page_to_phys(bvec->bv_page)>>PAGE_SHIFT; 5.137 - pfn = machine_to_phys_mapping[mfn]; 5.138 - queue_machphys_update(mfn, pfn); 5.139 - } 5.140 - } 5.141 - } 5.142 -#endif 5.143 -} 5.144 - 5.145 - 5.146 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 5.147 { 5.148 struct request *req; 5.149 @@ -396,7 +331,6 @@ static irqreturn_t blkif_int(int irq, vo 5.150 req->hard_nr_sectors)) ) 5.151 BUG(); 5.152 end_that_request_last(req); 5.153 - blkif_completion(bret, req); 5.154 break; 5.155 case BLKIF_OP_PROBE: 5.156 memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 5.157 @@ -425,6 +359,549 @@ static irqreturn_t blkif_int(int irq, vo 5.158 return IRQ_HANDLED; 5.159 } 5.160 5.161 +#else 5.162 +/************************** KERNEL VERSION 2.4 **************************/ 5.163 + 5.164 +static kdev_t sg_dev; 5.165 +static int sg_operation = -1; 5.166 +static unsigned long sg_next_sect; 5.167 + 5.168 +#define DISABLE_SCATTERGATHER() (sg_operation = -1) 5.169 + 5.170 +#define blkif_io_lock io_request_lock 5.171 + 5.172 +/* 5.173 + * blkif_update_int/update-vbds_task - handle VBD update events. 5.174 + * Schedule a task for keventd to run, which will update the VBDs and perform 5.175 + * the corresponding updates to our view of VBD state. 5.176 + */ 5.177 + 5.178 +#if 0 5.179 +static struct tq_struct update_tq; 5.180 +static void update_vbds_task(void *unused) 5.181 +{ 5.182 + xlvbd_update_vbds(); 5.183 +} 5.184 +#endif 5.185 + 5.186 +int blkif_open(struct inode *inode, struct file *filep) 5.187 +{ 5.188 + short xldev = inode->i_rdev; 5.189 + struct gendisk *gd = get_gendisk(xldev); 5.190 + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 5.191 + short minor = MINOR(xldev); 5.192 + 5.193 + if ( gd->part[minor].nr_sects == 0 ) 5.194 + { 5.195 + /* 5.196 + * Device either doesn't exist, or has zero capacity; we use a few 5.197 + * cheesy heuristics to return the relevant error code 5.198 + */ 5.199 + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 5.200 + ((minor & (gd->max_p - 1)) != 0) ) 5.201 + { 5.202 + /* 5.203 + * We have a real device, but no such partition, or we just have a 5.204 + * partition number so guess this is the problem. 5.205 + */ 5.206 + return -ENXIO; /* no such device or address */ 5.207 + } 5.208 + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 5.209 + { 5.210 + /* This is a removable device => assume that media is missing. */ 5.211 + return -ENOMEDIUM; /* media not present (this is a guess) */ 5.212 + } 5.213 + else 5.214 + { 5.215 + /* Just go for the general 'no such device' error. */ 5.216 + return -ENODEV; /* no such device */ 5.217 + } 5.218 + } 5.219 + 5.220 + /* Update of usage count is protected by per-device semaphore. */ 5.221 + disk->usage++; 5.222 + 5.223 + return 0; 5.224 +} 5.225 + 5.226 + 5.227 +int blkif_release(struct inode *inode, struct file *filep) 5.228 +{ 5.229 + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 5.230 + 5.231 + /* 5.232 + * When usage drops to zero it may allow more VBD updates to occur. 5.233 + * Update of usage count is protected by a per-device semaphore. 5.234 + */ 5.235 + if ( --disk->usage == 0 ) 5.236 + { 5.237 +#if 0 5.238 + update_tq.routine = update_vbds_task; 5.239 + schedule_task(&update_tq); 5.240 +#endif 5.241 + } 5.242 + 5.243 + return 0; 5.244 +} 5.245 + 5.246 + 5.247 +int blkif_ioctl(struct inode *inode, struct file *filep, 5.248 + unsigned command, unsigned long argument) 5.249 +{ 5.250 + kdev_t dev = inode->i_rdev; 5.251 + struct hd_geometry *geo = (struct hd_geometry *)argument; 5.252 + struct gendisk *gd; 5.253 + struct hd_struct *part; 5.254 + int i; 5.255 + 5.256 + /* NB. No need to check permissions. That is done for us. */ 5.257 + 5.258 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 5.259 + command, (long) argument, dev); 5.260 + 5.261 + gd = get_gendisk(dev); 5.262 + part = &gd->part[MINOR(dev)]; 5.263 + 5.264 + switch ( command ) 5.265 + { 5.266 + case BLKGETSIZE: 5.267 + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 5.268 + return put_user(part->nr_sects, (unsigned long *) argument); 5.269 + 5.270 + case BLKGETSIZE64: 5.271 + DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 5.272 + (u64)part->nr_sects * 512); 5.273 + return put_user((u64)part->nr_sects * 512, (u64 *) argument); 5.274 + 5.275 + case BLKRRPART: /* re-read partition table */ 5.276 + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 5.277 + return blkif_revalidate(dev); 5.278 + 5.279 + case BLKSSZGET: 5.280 + return hardsect_size[MAJOR(dev)][MINOR(dev)]; 5.281 + 5.282 + case BLKBSZGET: /* get block size */ 5.283 + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 5.284 + break; 5.285 + 5.286 + case BLKBSZSET: /* set block size */ 5.287 + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 5.288 + break; 5.289 + 5.290 + case BLKRASET: /* set read-ahead */ 5.291 + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 5.292 + break; 5.293 + 5.294 + case BLKRAGET: /* get read-ahead */ 5.295 + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 5.296 + break; 5.297 + 5.298 + case HDIO_GETGEO: 5.299 + /* note: these values are complete garbage */ 5.300 + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 5.301 + if (!argument) return -EINVAL; 5.302 + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 5.303 + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 5.304 + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 5.305 + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; 5.306 + return 0; 5.307 + 5.308 + case HDIO_GETGEO_BIG: 5.309 + /* note: these values are complete garbage */ 5.310 + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 5.311 + if (!argument) return -EINVAL; 5.312 + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 5.313 + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 5.314 + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 5.315 + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; 5.316 + return 0; 5.317 + 5.318 + case CDROMMULTISESSION: 5.319 + DPRINTK("FIXME: support multisession CDs later\n"); 5.320 + for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 5.321 + if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 5.322 + return 0; 5.323 + 5.324 + case SCSI_IOCTL_GET_BUS_NUMBER: 5.325 + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); 5.326 + return -ENOSYS; 5.327 + 5.328 + default: 5.329 + printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command); 5.330 + return -ENOSYS; 5.331 + } 5.332 + 5.333 + return 0; 5.334 +} 5.335 + 5.336 + 5.337 + 5.338 +/* check media change: should probably do something here in some cases :-) */ 5.339 +int blkif_check(kdev_t dev) 5.340 +{ 5.341 + DPRINTK("blkif_check\n"); 5.342 + return 0; 5.343 +} 5.344 + 5.345 +int blkif_revalidate(kdev_t dev) 5.346 +{ 5.347 + struct block_device *bd; 5.348 + struct gendisk *gd; 5.349 + xl_disk_t *disk; 5.350 + unsigned long capacity; 5.351 + int i, rc = 0; 5.352 + 5.353 + if ( (bd = bdget(dev)) == NULL ) 5.354 + return -EINVAL; 5.355 + 5.356 + /* 5.357 + * Update of partition info, and check of usage count, is protected 5.358 + * by the per-block-device semaphore. 5.359 + */ 5.360 + down(&bd->bd_sem); 5.361 + 5.362 + if ( ((gd = get_gendisk(dev)) == NULL) || 5.363 + ((disk = xldev_to_xldisk(dev)) == NULL) || 5.364 + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 5.365 + { 5.366 + rc = -EINVAL; 5.367 + goto out; 5.368 + } 5.369 + 5.370 + if ( disk->usage > 1 ) 5.371 + { 5.372 + rc = -EBUSY; 5.373 + goto out; 5.374 + } 5.375 + 5.376 + /* Only reread partition table if VBDs aren't mapped to partitions. */ 5.377 + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 5.378 + { 5.379 + for ( i = gd->max_p - 1; i >= 0; i-- ) 5.380 + { 5.381 + invalidate_device(dev+i, 1); 5.382 + gd->part[MINOR(dev+i)].start_sect = 0; 5.383 + gd->part[MINOR(dev+i)].nr_sects = 0; 5.384 + gd->sizes[MINOR(dev+i)] = 0; 5.385 + } 5.386 + 5.387 + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 5.388 + } 5.389 + 5.390 + out: 5.391 + up(&bd->bd_sem); 5.392 + bdput(bd); 5.393 + return rc; 5.394 +} 5.395 + 5.396 + 5.397 + 5.398 + 5.399 +/* 5.400 + * blkif_queue_request 5.401 + * 5.402 + * request block io 5.403 + * 5.404 + * id: for guest use only. 5.405 + * operation: BLKIF_OP_{READ,WRITE,PROBE} 5.406 + * buffer: buffer to read/write into. this should be a 5.407 + * virtual address in the guest os. 5.408 + */ 5.409 +static int blkif_queue_request(unsigned long id, 5.410 + int operation, 5.411 + char * buffer, 5.412 + unsigned long sector_number, 5.413 + unsigned short nr_sectors, 5.414 + kdev_t device) 5.415 +{ 5.416 + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 5.417 + struct gendisk *gd; 5.418 + blkif_request_t *req; 5.419 + struct buffer_head *bh; 5.420 + unsigned int fsect, lsect; 5.421 + 5.422 + fsect = (buffer_ma & ~PAGE_MASK) >> 9; 5.423 + lsect = fsect + nr_sectors - 1; 5.424 + 5.425 + /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ 5.426 + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 5.427 + BUG(); 5.428 + if ( lsect > 7 ) 5.429 + BUG(); 5.430 + 5.431 + buffer_ma &= PAGE_MASK; 5.432 + 5.433 + if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) 5.434 + return 1; 5.435 + 5.436 + switch ( operation ) 5.437 + { 5.438 + 5.439 + case BLKIF_OP_READ: 5.440 + case BLKIF_OP_WRITE: 5.441 + gd = get_gendisk(device); 5.442 + 5.443 + /* 5.444 + * Update the sector_number we'll pass down as appropriate; note that 5.445 + * we could sanity check that resulting sector will be in this 5.446 + * partition, but this will happen in driver backend anyhow. 5.447 + */ 5.448 + sector_number += gd->part[MINOR(device)].start_sect; 5.449 + 5.450 + /* 5.451 + * If this unit doesn't consist of virtual partitions then we clear 5.452 + * the partn bits from the device number. 5.453 + */ 5.454 + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 5.455 + GENHD_FL_VIRT_PARTNS) ) 5.456 + device &= ~(gd->max_p - 1); 5.457 + 5.458 + if ( (sg_operation == operation) && 5.459 + (sg_dev == device) && 5.460 + (sg_next_sect == sector_number) ) 5.461 + { 5.462 + req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req; 5.463 + bh = (struct buffer_head *)id; 5.464 + bh->b_reqnext = (struct buffer_head *)req->id; 5.465 + req->id = id; 5.466 + req->frame_and_sects[req->nr_segments] = 5.467 + buffer_ma | (fsect<<3) | lsect; 5.468 + if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 5.469 + sg_next_sect += nr_sectors; 5.470 + else 5.471 + DISABLE_SCATTERGATHER(); 5.472 + 5.473 + /* Update the copy of the request in the recovery ring. */ 5.474 + translate_req_to_pfn(&blk_ring_rec->ring[ 5.475 + MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req, req); 5.476 + 5.477 + return 0; 5.478 + } 5.479 + else if ( BLKIF_RING_FULL ) 5.480 + { 5.481 + return 1; 5.482 + } 5.483 + else 5.484 + { 5.485 + sg_operation = operation; 5.486 + sg_dev = device; 5.487 + sg_next_sect = sector_number + nr_sectors; 5.488 + } 5.489 + break; 5.490 + 5.491 + default: 5.492 + panic("unknown op %d\n", operation); 5.493 + } 5.494 + 5.495 + /* Fill out a communications ring structure. */ 5.496 + req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 5.497 + req->id = id; 5.498 + req->operation = operation; 5.499 + req->sector_number = (blkif_sector_t)sector_number; 5.500 + req->device = device; 5.501 + req->nr_segments = 1; 5.502 + req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; 5.503 + req_prod++; 5.504 + 5.505 + /* Keep a private copy so we can reissue requests when recovering. */ 5.506 + translate_req_to_pfn(&blk_ring_rec->ring[ 5.507 + MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req, req); 5.508 + blk_ring_rec->req_prod++; 5.509 + 5.510 + return 0; 5.511 +} 5.512 + 5.513 + 5.514 +/* 5.515 + * do_blkif_request 5.516 + * read a block; request is in a request queue 5.517 + */ 5.518 +void do_blkif_request(request_queue_t *rq) 5.519 +{ 5.520 + struct request *req; 5.521 + struct buffer_head *bh, *next_bh; 5.522 + int rw, nsect, full, queued = 0; 5.523 + 5.524 + DPRINTK("Entered do_blkif_request\n"); 5.525 + 5.526 + while ( !rq->plugged && !list_empty(&rq->queue_head)) 5.527 + { 5.528 + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 5.529 + goto out; 5.530 + 5.531 + DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 5.532 + req, req->cmd, req->sector, 5.533 + req->current_nr_sectors, req->nr_sectors, req->bh); 5.534 + 5.535 + rw = req->cmd; 5.536 + if ( rw == READA ) 5.537 + rw = READ; 5.538 + if ( unlikely((rw != READ) && (rw != WRITE)) ) 5.539 + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 5.540 + 5.541 + req->errors = 0; 5.542 + 5.543 + bh = req->bh; 5.544 + while ( bh != NULL ) 5.545 + { 5.546 + next_bh = bh->b_reqnext; 5.547 + bh->b_reqnext = NULL; 5.548 + 5.549 + full = blkif_queue_request( 5.550 + (unsigned long)bh, 5.551 + (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 5.552 + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 5.553 + 5.554 + if ( full ) 5.555 + { 5.556 + bh->b_reqnext = next_bh; 5.557 + pending_queues[nr_pending++] = rq; 5.558 + if ( unlikely(nr_pending >= MAX_PENDING) ) 5.559 + BUG(); 5.560 + goto out; 5.561 + } 5.562 + 5.563 + queued++; 5.564 + 5.565 + /* Dequeue the buffer head from the request. */ 5.566 + nsect = bh->b_size >> 9; 5.567 + bh = req->bh = next_bh; 5.568 + 5.569 + if ( bh != NULL ) 5.570 + { 5.571 + /* There's another buffer head to do. Update the request. */ 5.572 + req->hard_sector += nsect; 5.573 + req->hard_nr_sectors -= nsect; 5.574 + req->sector = req->hard_sector; 5.575 + req->nr_sectors = req->hard_nr_sectors; 5.576 + req->current_nr_sectors = bh->b_size >> 9; 5.577 + req->buffer = bh->b_data; 5.578 + } 5.579 + else 5.580 + { 5.581 + /* That was the last buffer head. Finalise the request. */ 5.582 + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 5.583 + BUG(); 5.584 + blkdev_dequeue_request(req); 5.585 + end_that_request_last(req); 5.586 + } 5.587 + } 5.588 + } 5.589 + 5.590 + out: 5.591 + if ( queued != 0 ) 5.592 + flush_requests(); 5.593 +} 5.594 + 5.595 + 5.596 +static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 5.597 +{ 5.598 + BLKIF_RING_IDX i, rp; 5.599 + unsigned long flags; 5.600 + struct buffer_head *bh, *next_bh; 5.601 + 5.602 + spin_lock_irqsave(&io_request_lock, flags); 5.603 + 5.604 + if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) ) 5.605 + { 5.606 + spin_unlock_irqrestore(&io_request_lock, flags); 5.607 + return; 5.608 + } 5.609 + 5.610 + rp = blk_ring->resp_prod; 5.611 + rmb(); /* Ensure we see queued responses up to 'rp'. */ 5.612 + 5.613 + for ( i = resp_cons; i != rp; i++ ) 5.614 + { 5.615 + blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 5.616 + switch ( bret->operation ) 5.617 + { 5.618 + case BLKIF_OP_READ: 5.619 + case BLKIF_OP_WRITE: 5.620 + if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 5.621 + DPRINTK("Bad return from blkdev data request: %lx\n", 5.622 + bret->status); 5.623 + for ( bh = (struct buffer_head *)bret->id; 5.624 + bh != NULL; 5.625 + bh = next_bh ) 5.626 + { 5.627 + next_bh = bh->b_reqnext; 5.628 + bh->b_reqnext = NULL; 5.629 + bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 5.630 + } 5.631 + break; 5.632 + case BLKIF_OP_PROBE: 5.633 + memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 5.634 + blkif_control_rsp_valid = 1; 5.635 + break; 5.636 + default: 5.637 + BUG(); 5.638 + } 5.639 + } 5.640 + 5.641 + resp_cons = i; 5.642 + resp_cons_rec = i; 5.643 + 5.644 + kick_pending_request_queues(); 5.645 + 5.646 + spin_unlock_irqrestore(&io_request_lock, flags); 5.647 +} 5.648 + 5.649 +#endif 5.650 + 5.651 +/***************************** COMMON CODE *******************************/ 5.652 + 5.653 + 5.654 +static inline void translate_req_to_pfn(blkif_request_t *xreq, 5.655 + blkif_request_t *req) 5.656 +{ 5.657 + int i; 5.658 + 5.659 + *xreq = *req; 5.660 + 5.661 + for ( i = 0; i < req->nr_segments; i++ ) 5.662 + { 5.663 + xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 5.664 + (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 5.665 + PAGE_SHIFT); 5.666 + } 5.667 +} 5.668 + 5.669 +static inline void translate_req_to_mfn(blkif_request_t *xreq, 5.670 + blkif_request_t *req) 5.671 +{ 5.672 + int i; 5.673 + 5.674 + *xreq = *req; 5.675 + 5.676 + for ( i = 0; i < req->nr_segments; i++ ) 5.677 + { 5.678 + xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | 5.679 + (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 5.680 + PAGE_SHIFT); 5.681 + } 5.682 +} 5.683 + 5.684 +static inline void flush_requests(void) 5.685 +{ 5.686 + DISABLE_SCATTERGATHER(); 5.687 + wmb(); /* Ensure that the frontend can see the requests. */ 5.688 + blk_ring->req_prod = req_prod; 5.689 + notify_via_evtchn(blkif_evtchn); 5.690 +} 5.691 + 5.692 + 5.693 +static void kick_pending_request_queues(void) 5.694 +{ 5.695 + /* We kick pending request queues if the ring is reasonably empty. */ 5.696 + if ( (nr_pending != 0) && 5.697 + ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) ) 5.698 + { 5.699 + /* Attempt to drain the queue, but bail if the ring becomes full. */ 5.700 + while ( (nr_pending != 0) && !BLKIF_RING_FULL ) 5.701 + do_blkif_request(pending_queues[--nr_pending]); 5.702 + } 5.703 +} 5.704 5.705 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 5.706 { 5.707 @@ -444,10 +921,11 @@ void blkif_control_send(blkif_request_t 5.708 goto retry; 5.709 } 5.710 5.711 + DISABLE_SCATTERGATHER(); 5.712 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req; 5.713 - translate_req_to_pfn( 5.714 - &blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req, 5.715 - req); 5.716 + 5.717 + translate_req_to_pfn(&blk_ring_rec->ring[ 5.718 + MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,req); 5.719 5.720 req_prod++; 5.721 flush_requests();