ia64/xen-unstable
changeset 2017:767b46546305
bitkeeper revision 1.1108.47.1 (410d0894HhCNQH1pLGY2q0pUmuKCfQ)
add block backend driver for Linux 2.6
add block backend driver for Linux 2.6
line diff
1.1 --- a/.rootkeys Sat Jul 31 16:13:15 2004 +0000 1.2 +++ b/.rootkeys Sun Aug 01 15:13:24 2004 +0000 1.3 @@ -190,11 +190,17 @@ 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6 1.4 3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c 1.5 4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.7-xen-sparse/drivers/char/mem.c 1.6 40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.7-xen-sparse/drivers/xen/Makefile 1.7 -40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig 1.8 -40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/block/Makefile 1.9 -40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/block/block.c 1.10 -40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/block/block.h 1.11 -40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c 1.12 +410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile 1.13 +4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c 1.14 +4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h 1.15 +4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c 1.16 +4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c 1.17 +4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c 1.18 +40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig 1.19 +40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile 1.20 +40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c 1.21 +40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h 1.22 +40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c 1.23 40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.7-xen-sparse/drivers/xen/console/Makefile 1.24 3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.7-xen-sparse/drivers/xen/console/console.c 1.25 40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.7-xen-sparse/drivers/xen/evtchn/Makefile
2.1 --- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig Sat Jul 31 16:13:15 2004 +0000 2.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig Sun Aug 01 15:13:24 2004 +0000 2.3 @@ -38,6 +38,11 @@ config XEN_PHYSDEV_ACCESS 2.4 2.5 endmenu 2.6 2.7 +# Xen's block device backend driver needs 2^12 pages 2.8 +config FORCE_MAX_ZONEORDER 2.9 + int 2.10 + default "12" if XEN_PHYSDEV_ACCESS 2.11 + default "11" if !XEN_PHYSDEV_ACCESS 2.12 2.13 #config VT 2.14 # bool
3.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig Sat Jul 31 16:13:15 2004 +0000 3.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig Sun Aug 01 15:13:24 2004 +0000 3.3 @@ -10,6 +10,7 @@ CONFIG_NO_IDLE_HZ=y 3.4 # 3.5 CONFIG_XEN_PRIVILEGED_GUEST=y 3.6 CONFIG_XEN_PHYSDEV_ACCESS=y 3.7 +CONFIG_FORCE_MAX_ZONEORDER=12 3.8 CONFIG_X86=y 3.9 # CONFIG_X86_64 is not set 3.10
4.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig Sat Jul 31 16:13:15 2004 +0000 4.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig Sun Aug 01 15:13:24 2004 +0000 4.3 @@ -10,6 +10,7 @@ CONFIG_NO_IDLE_HZ=y 4.4 # 4.5 # CONFIG_XEN_PRIVILEGED_GUEST is not set 4.6 # CONFIG_XEN_PHYSDEV_ACCESS is not set 4.7 +CONFIG_FORCE_MAX_ZONEORDER=11 4.8 CONFIG_X86=y 4.9 # CONFIG_X86_64 is not set 4.10
5.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/Makefile Sat Jul 31 16:13:15 2004 +0000 5.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/Makefile Sun Aug 01 15:13:24 2004 +0000 5.3 @@ -1,9 +1,11 @@ 5.4 5.5 5.6 -obj-y += block/ 5.7 +obj-y += blkfront/ 5.8 obj-y += console/ 5.9 obj-y += evtchn/ 5.10 obj-y += netfront/ 5.11 obj-y += privcmd/ 5.12 5.13 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += blkback/ 5.14 obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += netback/ 5.15 +
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile Sun Aug 01 15:13:24 2004 +0000 6.3 @@ -0,0 +1,2 @@ 6.4 + 6.5 +obj-y := blkback.o control.o interface.o vbd.o
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c Sun Aug 01 15:13:24 2004 +0000 7.3 @@ -0,0 +1,588 @@ 7.4 +/****************************************************************************** 7.5 + * arch/xen/drivers/blkif/backend/main.c 7.6 + * 7.7 + * Back-end of the driver for virtual block devices. This portion of the 7.8 + * driver exports a 'unified' block-device interface that can be accessed 7.9 + * by any operating system that implements a compatible front end. A 7.10 + * reference front-end implementation can be found in: 7.11 + * arch/xen/drivers/blkif/frontend 7.12 + * 7.13 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 7.14 + */ 7.15 + 7.16 +#include "common.h" 7.17 + 7.18 +/* 7.19 + * These are rather arbitrary. They are fairly large because adjacent requests 7.20 + * pulled from a communication ring are quite likely to end up being part of 7.21 + * the same scatter/gather request at the disc. 7.22 + * 7.23 + * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW ** 7.24 + * This will increase the chances of being able to write whole tracks. 7.25 + * 64 should be enough to keep us competitive with Linux. 7.26 + */ 7.27 +#define MAX_PENDING_REQS 64 7.28 +#define BATCH_PER_DOMAIN 16 7.29 + 7.30 +/* 7.31 + * NB. We place a page of padding between each buffer page to avoid incorrect 7.32 + * merging of requests by the IDE and SCSI merging routines. Otherwise, two 7.33 + * adjacent buffers in a scatter-gather request would have adjacent page 7.34 + * numbers: since the merge routines don't realise that this is in *pseudophys* 7.35 + * space, not real space, they may collapse the s-g elements! 7.36 + */ 7.37 +static unsigned long mmap_vstart; 7.38 +#define MMAP_PAGES_PER_REQUEST \ 7.39 + (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)) 7.40 +#define MMAP_PAGES \ 7.41 + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) 7.42 +#define MMAP_VADDR(_req,_seg) \ 7.43 + (mmap_vstart + \ 7.44 + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 7.45 + ((_seg) * 2 * PAGE_SIZE)) 7.46 + 7.47 +/* 7.48 + * Each outstanding request that we've passed to the lower device layers has a 7.49 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 7.50 + * the pendcnt towards zero. When it hits zero, the specified domain has a 7.51 + * response queued for it, with the saved 'id' passed back. 7.52 + */ 7.53 +typedef struct { 7.54 + blkif_t *blkif; 7.55 + unsigned long id; 7.56 + int nr_pages; 7.57 + atomic_t pendcnt; 7.58 + unsigned short operation; 7.59 + int status; 7.60 +} pending_req_t; 7.61 + 7.62 +/* 7.63 + * We can't allocate pending_req's in order, since they may complete out of 7.64 + * order. We therefore maintain an allocation ring. This ring also indicates 7.65 + * when enough work has been passed down -- at that point the allocation ring 7.66 + * will be empty. 7.67 + */ 7.68 +static pending_req_t pending_reqs[MAX_PENDING_REQS]; 7.69 +static unsigned char pending_ring[MAX_PENDING_REQS]; 7.70 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; 7.71 +/* NB. We use a different index type to differentiate from shared blk rings. */ 7.72 +typedef unsigned int PEND_RING_IDX; 7.73 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 7.74 +static PEND_RING_IDX pending_prod, pending_cons; 7.75 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 7.76 + 7.77 +#if 0 7.78 +static kmem_cache_t *buffer_head_cachep; 7.79 +#endif 7.80 + 7.81 +static int do_block_io_op(blkif_t *blkif, int max_to_do); 7.82 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req); 7.83 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); 7.84 +static void make_response(blkif_t *blkif, unsigned long id, 7.85 + unsigned short op, int st); 7.86 + 7.87 +static void fast_flush_area(int idx, int nr_pages) 7.88 +{ 7.89 + multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; 7.90 + int i; 7.91 + 7.92 + for ( i = 0; i < nr_pages; i++ ) 7.93 + { 7.94 + mcl[i].op = __HYPERVISOR_update_va_mapping; 7.95 + mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT; 7.96 + mcl[i].args[1] = 0; 7.97 + mcl[i].args[2] = 0; 7.98 + } 7.99 + 7.100 + mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; 7.101 + if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) 7.102 + BUG(); 7.103 +} 7.104 + 7.105 + 7.106 +/****************************************************************** 7.107 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE 7.108 + */ 7.109 + 7.110 +static struct list_head blkio_schedule_list; 7.111 +static spinlock_t blkio_schedule_list_lock; 7.112 + 7.113 +static int __on_blkdev_list(blkif_t *blkif) 7.114 +{ 7.115 + return blkif->blkdev_list.next != NULL; 7.116 +} 7.117 + 7.118 +static void remove_from_blkdev_list(blkif_t *blkif) 7.119 +{ 7.120 + unsigned long flags; 7.121 + if ( !__on_blkdev_list(blkif) ) return; 7.122 + spin_lock_irqsave(&blkio_schedule_list_lock, flags); 7.123 + if ( __on_blkdev_list(blkif) ) 7.124 + { 7.125 + list_del(&blkif->blkdev_list); 7.126 + blkif->blkdev_list.next = NULL; 7.127 + blkif_put(blkif); 7.128 + } 7.129 + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 7.130 +} 7.131 + 7.132 +static void add_to_blkdev_list_tail(blkif_t *blkif) 7.133 +{ 7.134 + unsigned long flags; 7.135 + if ( __on_blkdev_list(blkif) ) return; 7.136 + spin_lock_irqsave(&blkio_schedule_list_lock, flags); 7.137 + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) 7.138 + { 7.139 + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); 7.140 + blkif_get(blkif); 7.141 + } 7.142 + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 7.143 +} 7.144 + 7.145 + 7.146 +/****************************************************************** 7.147 + * SCHEDULER FUNCTIONS 7.148 + */ 7.149 + 7.150 +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); 7.151 + 7.152 +static int blkio_schedule(void *arg) 7.153 +{ 7.154 + DECLARE_WAITQUEUE(wq, current); 7.155 + 7.156 + blkif_t *blkif; 7.157 + struct list_head *ent; 7.158 + 7.159 + for ( ; ; ) 7.160 + { 7.161 + /* Wait for work to do. */ 7.162 + add_wait_queue(&blkio_schedule_wait, &wq); 7.163 + set_current_state(TASK_INTERRUPTIBLE); 7.164 + if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 7.165 + list_empty(&blkio_schedule_list) ) 7.166 + schedule(); 7.167 + __set_current_state(TASK_RUNNING); 7.168 + remove_wait_queue(&blkio_schedule_wait, &wq); 7.169 + 7.170 + /* Queue up a batch of requests. */ 7.171 + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && 7.172 + !list_empty(&blkio_schedule_list) ) 7.173 + { 7.174 + ent = blkio_schedule_list.next; 7.175 + blkif = list_entry(ent, blkif_t, blkdev_list); 7.176 + blkif_get(blkif); 7.177 + remove_from_blkdev_list(blkif); 7.178 + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) 7.179 + add_to_blkdev_list_tail(blkif); 7.180 + blkif_put(blkif); 7.181 + } 7.182 + 7.183 +#if 0 /* XXXcl tq */ 7.184 + /* Push the batch through to disc. */ 7.185 + run_task_queue(&tq_disk); 7.186 +#endif 7.187 + } 7.188 +} 7.189 + 7.190 +static void maybe_trigger_blkio_schedule(void) 7.191 +{ 7.192 + /* 7.193 + * Needed so that two processes, who together make the following predicate 7.194 + * true, don't both read stale values and evaluate the predicate 7.195 + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... 7.196 + */ 7.197 + smp_mb(); 7.198 + 7.199 + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && 7.200 + !list_empty(&blkio_schedule_list) ) 7.201 + wake_up(&blkio_schedule_wait); 7.202 +} 7.203 + 7.204 + 7.205 + 7.206 +/****************************************************************** 7.207 + * COMPLETION CALLBACK -- Called as bh->b_end_io() 7.208 + */ 7.209 + 7.210 +static void __end_block_io_op(pending_req_t *pending_req, int uptodate) 7.211 +{ 7.212 + unsigned long flags; 7.213 + 7.214 + /* An error fails the entire request. */ 7.215 + if ( !uptodate ) 7.216 + { 7.217 + DPRINTK("Buffer not up-to-date at end of operation\n"); 7.218 + pending_req->status = BLKIF_RSP_ERROR; 7.219 + } 7.220 + 7.221 + if ( atomic_dec_and_test(&pending_req->pendcnt) ) 7.222 + { 7.223 + int pending_idx = pending_req - pending_reqs; 7.224 + fast_flush_area(pending_idx, pending_req->nr_pages); 7.225 + make_response(pending_req->blkif, pending_req->id, 7.226 + pending_req->operation, pending_req->status); 7.227 + blkif_put(pending_req->blkif); 7.228 + spin_lock_irqsave(&pend_prod_lock, flags); 7.229 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 7.230 + spin_unlock_irqrestore(&pend_prod_lock, flags); 7.231 + maybe_trigger_blkio_schedule(); 7.232 + } 7.233 +} 7.234 + 7.235 +static int end_block_io_op(struct bio *bio, unsigned int done, int error) 7.236 +{ 7.237 + if (done || error) /* XXXcl */ 7.238 + __end_block_io_op(bio->bi_private, done); 7.239 +#if 0 7.240 + kmem_cache_free(buffer_head_cachep, bh); 7.241 +#else 7.242 + bio_put(bio); 7.243 +#endif 7.244 + return error; 7.245 +} 7.246 + 7.247 + 7.248 + 7.249 +/****************************************************************************** 7.250 + * NOTIFICATION FROM GUEST OS. 7.251 + */ 7.252 + 7.253 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) 7.254 +{ 7.255 + blkif_t *blkif = dev_id; 7.256 + add_to_blkdev_list_tail(blkif); 7.257 + maybe_trigger_blkio_schedule(); 7.258 + return IRQ_HANDLED; 7.259 +} 7.260 + 7.261 + 7.262 + 7.263 +/****************************************************************** 7.264 + * DOWNWARD CALLS -- These interface with the block-device layer proper. 7.265 + */ 7.266 + 7.267 +static int do_block_io_op(blkif_t *blkif, int max_to_do) 7.268 +{ 7.269 + blkif_ring_t *blk_ring = blkif->blk_ring_base; 7.270 + blkif_request_t *req; 7.271 + BLKIF_RING_IDX i; 7.272 + int more_to_do = 0; 7.273 + 7.274 + /* Take items off the comms ring, taking care not to overflow. */ 7.275 + for ( i = blkif->blk_req_cons; 7.276 + (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 7.277 + BLKIF_RING_SIZE); 7.278 + i++ ) 7.279 + { 7.280 + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) 7.281 + { 7.282 + more_to_do = 1; 7.283 + break; 7.284 + } 7.285 + 7.286 + req = &blk_ring->ring[MASK_BLKIF_IDX(i)].req; 7.287 + switch ( req->operation ) 7.288 + { 7.289 + case BLKIF_OP_READ: 7.290 + case BLKIF_OP_WRITE: 7.291 + dispatch_rw_block_io(blkif, req); 7.292 + break; 7.293 + 7.294 + case BLKIF_OP_PROBE: 7.295 + dispatch_probe(blkif, req); 7.296 + break; 7.297 + 7.298 + default: 7.299 + DPRINTK("error: unknown block io operation [%d]\n", 7.300 + blk_ring->ring[i].req.operation); 7.301 + make_response(blkif, blk_ring->ring[i].req.id, 7.302 + blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR); 7.303 + break; 7.304 + } 7.305 + } 7.306 + 7.307 + blkif->blk_req_cons = i; 7.308 + return more_to_do; 7.309 +} 7.310 + 7.311 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) 7.312 +{ 7.313 + int rsp = BLKIF_RSP_ERROR; 7.314 + int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 7.315 + 7.316 + /* We expect one buffer only. */ 7.317 + if ( unlikely(req->nr_segments != 1) ) 7.318 + goto out; 7.319 + 7.320 + /* Make sure the buffer is page-sized. */ 7.321 + if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || 7.322 + (blkif_last_sect(req->frame_and_sects[0]) != 7) ) 7.323 + goto out; 7.324 + 7.325 + if ( HYPERVISOR_update_va_mapping_otherdomain( 7.326 + MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT, 7.327 + (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL }, 7.328 + 0, blkif->domid) ) 7.329 + goto out; 7.330 + 7.331 + rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 7.332 + PAGE_SIZE / sizeof(vdisk_t)); 7.333 + 7.334 + out: 7.335 + fast_flush_area(pending_idx, 1); 7.336 + make_response(blkif, req->id, req->operation, rsp); 7.337 +} 7.338 + 7.339 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) 7.340 +{ 7.341 + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 7.342 +#if 0 7.343 + struct buffer_head *bh; 7.344 +#else 7.345 + struct bio *bio; 7.346 +#endif 7.347 + int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; 7.348 + short nr_sects; 7.349 + unsigned long buffer, fas; 7.350 + int i, j, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 7.351 + pending_req_t *pending_req; 7.352 + unsigned long remap_prot; 7.353 + multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; 7.354 + 7.355 + /* We map virtual scatter/gather segments to physical segments. */ 7.356 + int new_segs, nr_psegs = 0; 7.357 + phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1]; 7.358 + 7.359 + /* Check that number of segments is sane. */ 7.360 + if ( unlikely(req->nr_segments == 0) || 7.361 + unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) 7.362 + { 7.363 + DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); 7.364 + goto bad_descriptor; 7.365 + } 7.366 + 7.367 + /* 7.368 + * Check each address/size pair is sane, and convert into a 7.369 + * physical device and block offset. Note that if the offset and size 7.370 + * crosses a virtual extent boundary, we may end up with more 7.371 + * physical scatter/gather segments than virtual segments. 7.372 + */ 7.373 + for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects ) 7.374 + { 7.375 + fas = req->frame_and_sects[i]; 7.376 + buffer = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9); 7.377 + nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; 7.378 + 7.379 + if ( nr_sects <= 0 ) 7.380 + goto bad_descriptor; 7.381 + 7.382 + phys_seg[nr_psegs].ps_device = req->device; 7.383 + phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; 7.384 + phys_seg[nr_psegs].buffer = buffer; 7.385 + phys_seg[nr_psegs].nr_sects = nr_sects; 7.386 + 7.387 + /* Translate the request into the relevant 'physical device' */ 7.388 + new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation); 7.389 + if ( new_segs < 0 ) 7.390 + { 7.391 + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 7.392 + operation == READ ? "read" : "write", 7.393 + req->sector_number + tot_sects, 7.394 + req->sector_number + tot_sects + nr_sects, 7.395 + req->device); 7.396 + goto bad_descriptor; 7.397 + } 7.398 + 7.399 + nr_psegs += new_segs; 7.400 + ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1)); 7.401 + } 7.402 + 7.403 + /* Nonsensical zero-sized request? */ 7.404 + if ( unlikely(nr_psegs == 0) ) 7.405 + goto bad_descriptor; 7.406 + 7.407 + if ( operation == READ ) 7.408 + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; 7.409 + else 7.410 + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED; 7.411 + 7.412 + for ( i = 0; i < nr_psegs; i++ ) 7.413 + { 7.414 + mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain; 7.415 + mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT; 7.416 + mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot; 7.417 + mcl[i].args[2] = 0; 7.418 + mcl[i].args[3] = blkif->domid; 7.419 + 7.420 + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = 7.421 + phys_seg[i].buffer >> PAGE_SHIFT; 7.422 + } 7.423 + 7.424 + if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) ) 7.425 + BUG(); 7.426 + 7.427 + for ( i = 0; i < nr_psegs; i++ ) 7.428 + { 7.429 + if ( unlikely(mcl[i].args[5] != 0) ) 7.430 + { 7.431 + DPRINTK("invalid buffer -- could not remap it\n"); 7.432 + fast_flush_area(pending_idx, nr_psegs); 7.433 + goto bad_descriptor; 7.434 + } 7.435 + } 7.436 + 7.437 + pending_req = &pending_reqs[pending_idx]; 7.438 + pending_req->blkif = blkif; 7.439 + pending_req->id = req->id; 7.440 + pending_req->operation = operation; 7.441 + pending_req->status = BLKIF_RSP_OKAY; 7.442 + pending_req->nr_pages = nr_psegs; 7.443 + atomic_set(&pending_req->pendcnt, nr_psegs); 7.444 + pending_cons++; 7.445 + 7.446 + blkif_get(blkif); 7.447 + 7.448 + /* Now we pass each segment down to the real blkdev layer. */ 7.449 +#if 0 7.450 + for ( i = 0; i < nr_psegs; i++ ) 7.451 + { 7.452 + bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC); 7.453 + if ( unlikely(bh == NULL) ) 7.454 + { 7.455 + __end_block_io_op(pending_req, 0); 7.456 + continue; /* XXXcl continue!? */ 7.457 + } 7.458 + memset(bh, 0, sizeof (struct buffer_head)); 7.459 + 7.460 + init_waitqueue_head(&bh->b_wait); 7.461 + bh->b_size = phys_seg[i].nr_sects << 9; 7.462 + bh->b_dev = phys_seg[i].dev; 7.463 + bh->b_rdev = phys_seg[i].dev; 7.464 + bh->b_rsector = (unsigned long)phys_seg[i].sector_number; 7.465 + bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + 7.466 + (phys_seg[i].buffer & ~PAGE_MASK); 7.467 + bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i)); 7.468 + bh->b_end_io = end_block_io_op; 7.469 + bh->b_private = pending_req; 7.470 + 7.471 + bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 7.472 + (1 << BH_Req) | (1 << BH_Launder); 7.473 + if ( operation == WRITE ) 7.474 + bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); 7.475 + 7.476 + atomic_set(&bh->b_count, 1); 7.477 + 7.478 + /* Dispatch a single request. We'll flush it to disc later. */ 7.479 + generic_make_request(operation, bh); 7.480 + } 7.481 +#else 7.482 + for ( i = 0; i < nr_psegs; i++ ) 7.483 + { 7.484 + int nr_iovecs = PFN_UP(phys_seg[i].nr_sects << 9); 7.485 + ASSERT(nr_iovecs == 1); 7.486 + bio = bio_alloc(GFP_ATOMIC, nr_iovecs); 7.487 + if ( unlikely(bio == NULL) ) 7.488 + { 7.489 + __end_block_io_op(pending_req, 0); 7.490 + break; 7.491 + } 7.492 + bio->bi_bdev = phys_seg[i].ps_bdev; 7.493 + bio->bi_private = pending_req; 7.494 + bio->bi_end_io = end_block_io_op; 7.495 + bio->bi_sector = phys_seg[i].sector_number; 7.496 + bio->bi_rw = operation; 7.497 + 7.498 + bio->bi_size = 0; 7.499 + 7.500 + for ( j = 0; j < nr_iovecs; j++ ) 7.501 + { 7.502 + struct bio_vec *bv = bio_iovec_idx(bio, j); 7.503 + 7.504 + bv->bv_page = virt_to_page(MMAP_VADDR(pending_idx, i)); 7.505 + bv->bv_len = phys_seg[i].nr_sects << 9; 7.506 + bv->bv_offset = phys_seg[i].buffer & ~PAGE_MASK; 7.507 + 7.508 + bio->bi_size =+ bv->bv_len; 7.509 + bio->bi_vcnt++; 7.510 + } 7.511 + 7.512 + submit_bio(operation, bio); 7.513 + } 7.514 +#endif 7.515 + 7.516 + return; 7.517 + 7.518 + bad_descriptor: 7.519 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 7.520 +} 7.521 + 7.522 + 7.523 + 7.524 +/****************************************************************** 7.525 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING 7.526 + */ 7.527 + 7.528 + 7.529 +static void make_response(blkif_t *blkif, unsigned long id, 7.530 + unsigned short op, int st) 7.531 +{ 7.532 + blkif_response_t *resp; 7.533 + unsigned long flags; 7.534 + 7.535 + /* Place on the response ring for the relevant domain. */ 7.536 + spin_lock_irqsave(&blkif->blk_ring_lock, flags); 7.537 + resp = &blkif->blk_ring_base-> 7.538 + ring[MASK_BLKIF_IDX(blkif->blk_resp_prod)].resp; 7.539 + resp->id = id; 7.540 + resp->operation = op; 7.541 + resp->status = st; 7.542 + wmb(); 7.543 + blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod; 7.544 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); 7.545 + 7.546 + /* Kick the relevant domain. */ 7.547 + notify_via_evtchn(blkif->evtchn); 7.548 +} 7.549 + 7.550 +void blkif_deschedule(blkif_t *blkif) 7.551 +{ 7.552 + remove_from_blkdev_list(blkif); 7.553 +} 7.554 + 7.555 +static int __init blkif_init(void) 7.556 +{ 7.557 + int i; 7.558 + 7.559 + if ( !(start_info.flags & SIF_INITDOMAIN) 7.560 + && !(start_info.flags & SIF_BLK_BE_DOMAIN) ) 7.561 + return 0; 7.562 + 7.563 + blkif_interface_init(); 7.564 + 7.565 + if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) 7.566 + BUG(); 7.567 + 7.568 + pending_cons = 0; 7.569 + pending_prod = MAX_PENDING_REQS; 7.570 + memset(pending_reqs, 0, sizeof(pending_reqs)); 7.571 + for ( i = 0; i < MAX_PENDING_REQS; i++ ) 7.572 + pending_ring[i] = i; 7.573 + 7.574 + spin_lock_init(&blkio_schedule_list_lock); 7.575 + INIT_LIST_HEAD(&blkio_schedule_list); 7.576 + 7.577 + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) 7.578 + BUG(); 7.579 + 7.580 +#if 0 7.581 + buffer_head_cachep = kmem_cache_create( 7.582 + "buffer_head_cache", sizeof(struct buffer_head), 7.583 + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 7.584 +#endif 7.585 + 7.586 + blkif_ctrlif_init(); 7.587 + 7.588 + return 0; 7.589 +} 7.590 + 7.591 +__initcall(blkif_init);
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h Sun Aug 01 15:13:24 2004 +0000 8.3 @@ -0,0 +1,124 @@ 8.4 +/****************************************************************************** 8.5 + * arch/xen/drivers/blkif/backend/common.h 8.6 + */ 8.7 + 8.8 +#ifndef __BLKIF__BACKEND__COMMON_H__ 8.9 +#define __BLKIF__BACKEND__COMMON_H__ 8.10 + 8.11 +#include <linux/config.h> 8.12 +#include <linux/version.h> 8.13 +#include <linux/module.h> 8.14 +#include <linux/rbtree.h> 8.15 +#include <linux/interrupt.h> 8.16 +#include <linux/slab.h> 8.17 +#include <linux/blkdev.h> 8.18 +#include <asm-xen/ctrl_if.h> 8.19 +#include <asm/io.h> 8.20 +#include <asm/setup.h> 8.21 +#include <asm/pgalloc.h> 8.22 + 8.23 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 8.24 +#include <asm-xen/blkif.h> 8.25 +#else 8.26 +#include "../blkif.h" 8.27 +#define irqreturn_t void 8.28 +#define IRQ_HANDLED 8.29 +#endif 8.30 + 8.31 +#if 0 8.32 +#define ASSERT(_p) \ 8.33 + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ 8.34 + __LINE__, __FILE__); *(int*)0=0; } 8.35 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ 8.36 + __FILE__ , __LINE__ , ## _a ) 8.37 +#else 8.38 +#define ASSERT(_p) ((void)0) 8.39 +#define DPRINTK(_f, _a...) ((void)0) 8.40 +#endif 8.41 + 8.42 +typedef struct blkif_st { 8.43 + /* Unique identifier for this interface. */ 8.44 + domid_t domid; 8.45 + unsigned int handle; 8.46 + /* Physical parameters of the comms window. */ 8.47 + unsigned long shmem_frame; 8.48 + unsigned int evtchn; 8.49 + int irq; 8.50 + /* Comms information. */ 8.51 + blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ 8.52 + BLKIF_RING_IDX blk_req_cons; /* Request consumer. */ 8.53 + BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */ 8.54 + /* VBDs attached to this interface. */ 8.55 + struct rb_root vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */ 8.56 + spinlock_t vbd_lock; /* Protects VBD mapping. */ 8.57 + /* Private fields. */ 8.58 + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; 8.59 + /* 8.60 + * DISCONNECT response is deferred until pending requests are ack'ed. 8.61 + * We therefore need to store the id from the original request. 8.62 + */ 8.63 + u8 disconnect_rspid; 8.64 + struct blkif_st *hash_next; 8.65 + struct list_head blkdev_list; 8.66 + spinlock_t blk_ring_lock; 8.67 + atomic_t refcnt; 8.68 +} blkif_t; 8.69 + 8.70 +void blkif_create(blkif_be_create_t *create); 8.71 +void blkif_destroy(blkif_be_destroy_t *destroy); 8.72 +void blkif_connect(blkif_be_connect_t *connect); 8.73 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); 8.74 +void __blkif_disconnect_complete(blkif_t *blkif); 8.75 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); 8.76 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 8.77 +#define blkif_put(_b) \ 8.78 + do { \ 8.79 + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ 8.80 + __blkif_disconnect_complete(_b); \ 8.81 + } while (0) 8.82 + 8.83 +/* An entry in a list of xen_extents. */ 8.84 +typedef struct _blkif_extent_le { 8.85 + blkif_extent_t extent; /* an individual extent */ 8.86 + struct _blkif_extent_le *next; /* and a pointer to the next */ 8.87 + struct block_device *bdev; 8.88 +} blkif_extent_le_t; 8.89 + 8.90 +typedef struct _vbd { 8.91 + blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ 8.92 + unsigned char readonly; /* Non-zero -> read-only */ 8.93 + unsigned char type; /* VDISK_TYPE_xxx */ 8.94 + blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */ 8.95 + struct rb_node rb; /* for linking into R-B tree lookup struct */ 8.96 +} vbd_t; 8.97 + 8.98 +void vbd_create(blkif_be_vbd_create_t *create); 8.99 +void vbd_grow(blkif_be_vbd_grow_t *grow); 8.100 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink); 8.101 +void vbd_destroy(blkif_be_vbd_destroy_t *delete); 8.102 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds); 8.103 +void destroy_all_vbds(blkif_t *blkif); 8.104 + 8.105 +/* Describes a [partial] disk extent (part of a block io request) */ 8.106 +typedef struct { 8.107 + union { 8.108 + unsigned short dev; 8.109 + struct block_device *bdev; 8.110 + } _dev; 8.111 + unsigned short nr_sects; 8.112 + unsigned long buffer; 8.113 + blkif_sector_t sector_number; 8.114 +} phys_seg_t; 8.115 +#define ps_device _dev.dev 8.116 +#define ps_bdev _dev.bdev 8.117 + 8.118 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 8.119 + 8.120 +void blkif_interface_init(void); 8.121 +void blkif_ctrlif_init(void); 8.122 + 8.123 +void blkif_deschedule(blkif_t *blkif); 8.124 + 8.125 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); 8.126 + 8.127 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c Sun Aug 01 15:13:24 2004 +0000 9.3 @@ -0,0 +1,87 @@ 9.4 +/****************************************************************************** 9.5 + * arch/xen/drivers/blkif/backend/control.c 9.6 + * 9.7 + * Routines for interfacing with the control plane. 9.8 + * 9.9 + * Copyright (c) 2004, Keir Fraser 9.10 + */ 9.11 + 9.12 +#include "common.h" 9.13 + 9.14 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 9.15 +{ 9.16 + DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype); 9.17 + 9.18 + switch ( msg->subtype ) 9.19 + { 9.20 + case CMSG_BLKIF_BE_CREATE: 9.21 + if ( msg->length != sizeof(blkif_be_create_t) ) 9.22 + goto parse_error; 9.23 + blkif_create((blkif_be_create_t *)&msg->msg[0]); 9.24 + break; 9.25 + case CMSG_BLKIF_BE_DESTROY: 9.26 + if ( msg->length != sizeof(blkif_be_destroy_t) ) 9.27 + goto parse_error; 9.28 + blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]); 9.29 + break; 9.30 + case CMSG_BLKIF_BE_CONNECT: 9.31 + if ( msg->length != sizeof(blkif_be_connect_t) ) 9.32 + goto parse_error; 9.33 + blkif_connect((blkif_be_connect_t *)&msg->msg[0]); 9.34 + break; 9.35 + case CMSG_BLKIF_BE_DISCONNECT: 9.36 + if ( msg->length != sizeof(blkif_be_disconnect_t) ) 9.37 + goto parse_error; 9.38 + if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) ) 9.39 + return; /* Sending the response is deferred until later. */ 9.40 + break; 9.41 + case CMSG_BLKIF_BE_VBD_CREATE: 9.42 + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 9.43 + goto parse_error; 9.44 + vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]); 9.45 + break; 9.46 + case CMSG_BLKIF_BE_VBD_DESTROY: 9.47 + if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) 9.48 + goto parse_error; 9.49 + vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]); 9.50 + break; 9.51 + case CMSG_BLKIF_BE_VBD_GROW: 9.52 + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 9.53 + goto parse_error; 9.54 + vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]); 9.55 + break; 9.56 + case CMSG_BLKIF_BE_VBD_SHRINK: 9.57 + if ( msg->length != sizeof(blkif_be_vbd_shrink_t) ) 9.58 + goto parse_error; 9.59 + vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]); 9.60 + break; 9.61 + default: 9.62 + goto parse_error; 9.63 + } 9.64 + 9.65 + ctrl_if_send_response(msg); 9.66 + return; 9.67 + 9.68 + parse_error: 9.69 + DPRINTK("Parse error while reading message subtype %d, len %d\n", 9.70 + msg->subtype, msg->length); 9.71 + msg->length = 0; 9.72 + ctrl_if_send_response(msg); 9.73 +} 9.74 + 9.75 +void blkif_ctrlif_init(void) 9.76 +{ 9.77 + ctrl_msg_t cmsg; 9.78 + blkif_be_driver_status_changed_t st; 9.79 + 9.80 + (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 9.81 + CALLBACK_IN_BLOCKING_CONTEXT); 9.82 + 9.83 + /* Send a driver-UP notification to the domain controller. */ 9.84 + cmsg.type = CMSG_BLKIF_BE; 9.85 + cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED; 9.86 + cmsg.length = sizeof(blkif_be_driver_status_changed_t); 9.87 + st.status = BLKIF_DRIVER_STATUS_UP; 9.88 + memcpy(cmsg.msg, &st, sizeof(st)); 9.89 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 9.90 +}
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c Sun Aug 01 15:13:24 2004 +0000 10.3 @@ -0,0 +1,239 @@ 10.4 +/****************************************************************************** 10.5 + * arch/xen/drivers/blkif/backend/interface.c 10.6 + * 10.7 + * Block-device interface management. 10.8 + * 10.9 + * Copyright (c) 2004, Keir Fraser 10.10 + */ 10.11 + 10.12 +#include "common.h" 10.13 + 10.14 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 10.15 +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) 10.16 +#endif 10.17 + 10.18 +#define BLKIF_HASHSZ 1024 10.19 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) 10.20 + 10.21 +static kmem_cache_t *blkif_cachep; 10.22 +static blkif_t *blkif_hash[BLKIF_HASHSZ]; 10.23 + 10.24 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 10.25 +{ 10.26 + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 10.27 + while ( (blkif != NULL) && 10.28 + ((blkif->domid != domid) || (blkif->handle != handle)) ) 10.29 + blkif = blkif->hash_next; 10.30 + return blkif; 10.31 +} 10.32 + 10.33 +void __blkif_disconnect_complete(blkif_t *blkif) 10.34 +{ 10.35 + ctrl_msg_t cmsg; 10.36 + blkif_be_disconnect_t disc; 10.37 + 10.38 + /* 10.39 + * These can't be done in __blkif_disconnect() because at that point there 10.40 + * may be outstanding requests at the disc whose asynchronous responses 10.41 + * must still be notified to the remote driver. 10.42 + */ 10.43 + unbind_evtchn_from_irq(blkif->evtchn); 10.44 + vfree(blkif->blk_ring_base); 10.45 + 10.46 + /* Construct the deferred response message. */ 10.47 + cmsg.type = CMSG_BLKIF_BE; 10.48 + cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; 10.49 + cmsg.id = blkif->disconnect_rspid; 10.50 + cmsg.length = sizeof(blkif_be_disconnect_t); 10.51 + disc.domid = blkif->domid; 10.52 + disc.blkif_handle = blkif->handle; 10.53 + disc.status = BLKIF_BE_STATUS_OKAY; 10.54 + memcpy(cmsg.msg, &disc, sizeof(disc)); 10.55 + 10.56 + /* 10.57 + * Make sure message is constructed /before/ status change, because 10.58 + * after the status change the 'blkif' structure could be deallocated at 10.59 + * any time. Also make sure we send the response /after/ status change, 10.60 + * as otherwise a subsequent CONNECT request could spuriously fail if 10.61 + * another CPU doesn't see the status change yet. 10.62 + */ 10.63 + mb(); 10.64 + if ( blkif->status != DISCONNECTING ) 10.65 + BUG(); 10.66 + blkif->status = DISCONNECTED; 10.67 + mb(); 10.68 + 10.69 + /* Send the successful response. */ 10.70 + ctrl_if_send_response(&cmsg); 10.71 +} 10.72 + 10.73 +void blkif_create(blkif_be_create_t *create) 10.74 +{ 10.75 + domid_t domid = create->domid; 10.76 + unsigned int handle = create->blkif_handle; 10.77 + blkif_t **pblkif, *blkif; 10.78 + 10.79 + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) 10.80 + { 10.81 + DPRINTK("Could not create blkif: out of memory\n"); 10.82 + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 10.83 + return; 10.84 + } 10.85 + 10.86 + memset(blkif, 0, sizeof(*blkif)); 10.87 + blkif->domid = domid; 10.88 + blkif->handle = handle; 10.89 + blkif->status = DISCONNECTED; 10.90 + spin_lock_init(&blkif->vbd_lock); 10.91 + spin_lock_init(&blkif->blk_ring_lock); 10.92 + atomic_set(&blkif->refcnt, 0); 10.93 + 10.94 + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 10.95 + while ( *pblkif != NULL ) 10.96 + { 10.97 + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 10.98 + { 10.99 + DPRINTK("Could not create blkif: already exists\n"); 10.100 + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; 10.101 + kmem_cache_free(blkif_cachep, blkif); 10.102 + return; 10.103 + } 10.104 + pblkif = &(*pblkif)->hash_next; 10.105 + } 10.106 + 10.107 + blkif->hash_next = *pblkif; 10.108 + *pblkif = blkif; 10.109 + 10.110 + DPRINTK("Successfully created blkif\n"); 10.111 + create->status = BLKIF_BE_STATUS_OKAY; 10.112 +} 10.113 + 10.114 +void blkif_destroy(blkif_be_destroy_t *destroy) 10.115 +{ 10.116 + domid_t domid = destroy->domid; 10.117 + unsigned int handle = destroy->blkif_handle; 10.118 + blkif_t **pblkif, *blkif; 10.119 + 10.120 + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 10.121 + while ( (blkif = *pblkif) != NULL ) 10.122 + { 10.123 + if ( (blkif->domid == domid) && (blkif->handle == handle) ) 10.124 + { 10.125 + if ( blkif->status != DISCONNECTED ) 10.126 + goto still_connected; 10.127 + goto destroy; 10.128 + } 10.129 + pblkif = &blkif->hash_next; 10.130 + } 10.131 + 10.132 + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 10.133 + return; 10.134 + 10.135 + still_connected: 10.136 + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 10.137 + return; 10.138 + 10.139 + destroy: 10.140 + *pblkif = blkif->hash_next; 10.141 + destroy_all_vbds(blkif); 10.142 + kmem_cache_free(blkif_cachep, blkif); 10.143 + destroy->status = BLKIF_BE_STATUS_OKAY; 10.144 +} 10.145 + 10.146 +void blkif_connect(blkif_be_connect_t *connect) 10.147 +{ 10.148 + domid_t domid = connect->domid; 10.149 + unsigned int handle = connect->blkif_handle; 10.150 + unsigned int evtchn = connect->evtchn; 10.151 + unsigned long shmem_frame = connect->shmem_frame; 10.152 + struct vm_struct *vma; 10.153 + pgprot_t prot; 10.154 + int error; 10.155 + blkif_t *blkif; 10.156 + 10.157 + blkif = blkif_find_by_handle(domid, handle); 10.158 + if ( unlikely(blkif == NULL) ) 10.159 + { 10.160 + DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 10.161 + connect->domid, connect->blkif_handle); 10.162 + connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 10.163 + return; 10.164 + } 10.165 + 10.166 + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) 10.167 + { 10.168 + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 10.169 + return; 10.170 + } 10.171 + 10.172 + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); 10.173 + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), 10.174 + shmem_frame<<PAGE_SHIFT, PAGE_SIZE, 10.175 + prot, domid); 10.176 + if ( error != 0 ) 10.177 + { 10.178 + if ( error == -ENOMEM ) 10.179 + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 10.180 + else if ( error == -EFAULT ) 10.181 + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 10.182 + else 10.183 + connect->status = BLKIF_BE_STATUS_ERROR; 10.184 + vfree(vma->addr); 10.185 + return; 10.186 + } 10.187 + 10.188 + if ( blkif->status != DISCONNECTED ) 10.189 + { 10.190 + connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 10.191 + vfree(vma->addr); 10.192 + return; 10.193 + } 10.194 + 10.195 + blkif->evtchn = evtchn; 10.196 + blkif->irq = bind_evtchn_to_irq(evtchn); 10.197 + blkif->shmem_frame = shmem_frame; 10.198 + blkif->blk_ring_base = (blkif_ring_t *)vma->addr; 10.199 + blkif->status = CONNECTED; 10.200 + blkif_get(blkif); 10.201 + 10.202 + request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif); 10.203 + 10.204 + connect->status = BLKIF_BE_STATUS_OKAY; 10.205 +} 10.206 + 10.207 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) 10.208 +{ 10.209 + domid_t domid = disconnect->domid; 10.210 + unsigned int handle = disconnect->blkif_handle; 10.211 + blkif_t *blkif; 10.212 + 10.213 + blkif = blkif_find_by_handle(domid, handle); 10.214 + if ( unlikely(blkif == NULL) ) 10.215 + { 10.216 + DPRINTK("blkif_disconnect attempted for non-existent blkif" 10.217 + " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 10.218 + disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 10.219 + return 1; /* Caller will send response error message. */ 10.220 + } 10.221 + 10.222 + if ( blkif->status == CONNECTED ) 10.223 + { 10.224 + blkif->status = DISCONNECTING; 10.225 + blkif->disconnect_rspid = rsp_id; 10.226 + wmb(); /* Let other CPUs see the status change. */ 10.227 + free_irq(blkif->irq, blkif); 10.228 + blkif_deschedule(blkif); 10.229 + blkif_put(blkif); 10.230 + return 0; /* Caller should not send response message. */ 10.231 + } 10.232 + 10.233 + disconnect->status = BLKIF_BE_STATUS_OKAY; 10.234 + return 1; 10.235 +} 10.236 + 10.237 +void __init blkif_interface_init(void) 10.238 +{ 10.239 + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 10.240 + 0, 0, NULL, NULL); 10.241 + memset(blkif_hash, 0, sizeof(blkif_hash)); 10.242 +}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c Sun Aug 01 15:13:24 2004 +0000 11.3 @@ -0,0 +1,540 @@ 11.4 +/****************************************************************************** 11.5 + * arch/xen/drivers/blkif/backend/vbd.c 11.6 + * 11.7 + * Routines for managing virtual block devices (VBDs). 11.8 + * 11.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 11.10 + */ 11.11 + 11.12 +#include "common.h" 11.13 + 11.14 +static dev_t vbd_map_devnum(blkif_pdev_t); 11.15 + 11.16 +void vbd_create(blkif_be_vbd_create_t *create) 11.17 +{ 11.18 + vbd_t *vbd; 11.19 + struct rb_node **rb_p, *rb_parent = NULL; 11.20 + blkif_t *blkif; 11.21 + blkif_vdev_t vdevice = create->vdevice; 11.22 + 11.23 + blkif = blkif_find_by_handle(create->domid, create->blkif_handle); 11.24 + if ( unlikely(blkif == NULL) ) 11.25 + { 11.26 + DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", 11.27 + create->domid, create->blkif_handle); 11.28 + create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 11.29 + return; 11.30 + } 11.31 + 11.32 + spin_lock(&blkif->vbd_lock); 11.33 + 11.34 + rb_p = &blkif->vbd_rb.rb_node; 11.35 + while ( *rb_p != NULL ) 11.36 + { 11.37 + rb_parent = *rb_p; 11.38 + vbd = rb_entry(rb_parent, vbd_t, rb); 11.39 + if ( vdevice < vbd->vdevice ) 11.40 + { 11.41 + rb_p = &rb_parent->rb_left; 11.42 + } 11.43 + else if ( vdevice > vbd->vdevice ) 11.44 + { 11.45 + rb_p = &rb_parent->rb_right; 11.46 + } 11.47 + else 11.48 + { 11.49 + DPRINTK("vbd_create attempted for already existing vbd\n"); 11.50 + create->status = BLKIF_BE_STATUS_VBD_EXISTS; 11.51 + goto out; 11.52 + } 11.53 + } 11.54 + 11.55 + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) 11.56 + { 11.57 + DPRINTK("vbd_create: out of memory\n"); 11.58 + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 11.59 + goto out; 11.60 + } 11.61 + 11.62 + vbd->vdevice = vdevice; 11.63 + vbd->readonly = create->readonly; 11.64 + vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 11.65 + vbd->extents = NULL; 11.66 + 11.67 + rb_link_node(&vbd->rb, rb_parent, rb_p); 11.68 + rb_insert_color(&vbd->rb, &blkif->vbd_rb); 11.69 + 11.70 + DPRINTK("Successful creation of vdev=%04x (dom=%u)\n", 11.71 + vdevice, create->domid); 11.72 + create->status = BLKIF_BE_STATUS_OKAY; 11.73 + 11.74 + out: 11.75 + spin_unlock(&blkif->vbd_lock); 11.76 +} 11.77 + 11.78 + 11.79 +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ 11.80 +void vbd_grow(blkif_be_vbd_grow_t *grow) 11.81 +{ 11.82 + blkif_t *blkif; 11.83 + blkif_extent_le_t **px, *x; 11.84 + vbd_t *vbd = NULL; 11.85 + struct rb_node *rb; 11.86 + blkif_vdev_t vdevice = grow->vdevice; 11.87 +#if 0 11.88 + unsigned long sz; 11.89 +#endif 11.90 + 11.91 + 11.92 + blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); 11.93 + if ( unlikely(blkif == NULL) ) 11.94 + { 11.95 + DPRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n", 11.96 + grow->domid, grow->blkif_handle); 11.97 + grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 11.98 + return; 11.99 + } 11.100 + 11.101 + spin_lock(&blkif->vbd_lock); 11.102 + 11.103 + rb = blkif->vbd_rb.rb_node; 11.104 + while ( rb != NULL ) 11.105 + { 11.106 + vbd = rb_entry(rb, vbd_t, rb); 11.107 + if ( vdevice < vbd->vdevice ) 11.108 + rb = rb->rb_left; 11.109 + else if ( vdevice > vbd->vdevice ) 11.110 + rb = rb->rb_right; 11.111 + else 11.112 + break; 11.113 + } 11.114 + 11.115 + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) 11.116 + { 11.117 + DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); 11.118 + grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 11.119 + goto out; 11.120 + } 11.121 + 11.122 + if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 11.123 + GFP_KERNEL)) == NULL) ) 11.124 + { 11.125 + DPRINTK("vbd_grow: out of memory\n"); 11.126 + grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 11.127 + goto out; 11.128 + } 11.129 + 11.130 + x->extent.device = grow->extent.device; 11.131 + /* XXXcl see comments at top of open_by_devnum */ 11.132 +#if 01 11.133 + x->bdev = open_by_devnum(vbd_map_devnum(x->extent.device), 11.134 + vbd->readonly ? FMODE_READ : FMODE_WRITE); 11.135 +#endif 11.136 + /* XXXcl maybe bd_claim? */ 11.137 + x->extent.sector_start = grow->extent.sector_start; 11.138 + x->extent.sector_length = grow->extent.sector_length; 11.139 + x->next = (blkif_extent_le_t *)NULL; 11.140 + 11.141 +#if 0 11.142 + if( !blk_size[MAJOR(x->extent.device)] ) 11.143 + { 11.144 + DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device); 11.145 + grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND; 11.146 + goto out; 11.147 + } 11.148 + 11.149 + /* convert blocks (1KB) to sectors */ 11.150 + sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2; 11.151 +#endif 11.152 + 11.153 + if ( x->extent.sector_start > 0 ) 11.154 + { 11.155 + DPRINTK("vbd_grow: device %08x start not zero!\n", x->extent.device); 11.156 + grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND; 11.157 + goto out; 11.158 + } 11.159 + 11.160 +#if 0 11.161 + /* 11.162 + * NB. This test assumes sector_start == 0, which is always the case 11.163 + * in Xen 1.3. In fact the whole grow/shrink interface could do with 11.164 + * some simplification. 11.165 + */ 11.166 + if ( x->extent.sector_length > sz ) 11.167 + x->extent.sector_length = sz; 11.168 + 11.169 + DPRINTK("vbd_grow: requested_len %llu actual_len %lu\n", 11.170 + x->extent.sector_length, sz); 11.171 +#endif 11.172 + 11.173 + for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 11.174 + continue; 11.175 + 11.176 + *px = x; 11.177 + 11.178 + DPRINTK("Successful grow of vdev=%04x (dom=%u)\n", 11.179 + vdevice, grow->domid); 11.180 + 11.181 + grow->status = BLKIF_BE_STATUS_OKAY; 11.182 + 11.183 + out: 11.184 + spin_unlock(&blkif->vbd_lock); 11.185 +} 11.186 + 11.187 + 11.188 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink) 11.189 +{ 11.190 + blkif_t *blkif; 11.191 + blkif_extent_le_t **px, *x; 11.192 + vbd_t *vbd = NULL; 11.193 + struct rb_node *rb; 11.194 + blkif_vdev_t vdevice = shrink->vdevice; 11.195 + 11.196 + blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle); 11.197 + if ( unlikely(blkif == NULL) ) 11.198 + { 11.199 + DPRINTK("vbd_shrink attempted for non-existent blkif (%u,%u)\n", 11.200 + shrink->domid, shrink->blkif_handle); 11.201 + shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 11.202 + return; 11.203 + } 11.204 + 11.205 + spin_lock(&blkif->vbd_lock); 11.206 + 11.207 + rb = blkif->vbd_rb.rb_node; 11.208 + while ( rb != NULL ) 11.209 + { 11.210 + vbd = rb_entry(rb, vbd_t, rb); 11.211 + if ( vdevice < vbd->vdevice ) 11.212 + rb = rb->rb_left; 11.213 + else if ( vdevice > vbd->vdevice ) 11.214 + rb = rb->rb_right; 11.215 + else 11.216 + break; 11.217 + } 11.218 + 11.219 + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) 11.220 + { 11.221 + shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 11.222 + goto out; 11.223 + } 11.224 + 11.225 + if ( unlikely(vbd->extents == NULL) ) 11.226 + { 11.227 + shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND; 11.228 + goto out; 11.229 + } 11.230 + 11.231 + /* Find the last extent. We now know that there is at least one. */ 11.232 + for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next ) 11.233 + continue; 11.234 + 11.235 + x = *px; 11.236 + *px = x->next; 11.237 + kfree(x); 11.238 + 11.239 + shrink->status = BLKIF_BE_STATUS_OKAY; 11.240 + 11.241 + out: 11.242 + spin_unlock(&blkif->vbd_lock); 11.243 +} 11.244 + 11.245 + 11.246 +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 11.247 +{ 11.248 + blkif_t *blkif; 11.249 + vbd_t *vbd; 11.250 + struct rb_node *rb; 11.251 + blkif_extent_le_t *x, *t; 11.252 + blkif_vdev_t vdevice = destroy->vdevice; 11.253 + 11.254 + blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); 11.255 + if ( unlikely(blkif == NULL) ) 11.256 + { 11.257 + DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 11.258 + destroy->domid, destroy->blkif_handle); 11.259 + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 11.260 + return; 11.261 + } 11.262 + 11.263 + spin_lock(&blkif->vbd_lock); 11.264 + 11.265 + rb = blkif->vbd_rb.rb_node; 11.266 + while ( rb != NULL ) 11.267 + { 11.268 + vbd = rb_entry(rb, vbd_t, rb); 11.269 + if ( vdevice < vbd->vdevice ) 11.270 + rb = rb->rb_left; 11.271 + else if ( vdevice > vbd->vdevice ) 11.272 + rb = rb->rb_right; 11.273 + else 11.274 + goto found; 11.275 + } 11.276 + 11.277 + destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 11.278 + goto out; 11.279 + 11.280 + found: 11.281 + rb_erase(rb, &blkif->vbd_rb); 11.282 + x = vbd->extents; 11.283 + kfree(vbd); 11.284 + 11.285 + while ( x != NULL ) 11.286 + { 11.287 + t = x->next; 11.288 + kfree(x); 11.289 + x = t; 11.290 + } 11.291 + 11.292 + out: 11.293 + spin_unlock(&blkif->vbd_lock); 11.294 +} 11.295 + 11.296 + 11.297 +void destroy_all_vbds(blkif_t *blkif) 11.298 +{ 11.299 + vbd_t *vbd; 11.300 + struct rb_node *rb; 11.301 + blkif_extent_le_t *x, *t; 11.302 + 11.303 + spin_lock(&blkif->vbd_lock); 11.304 + 11.305 + while ( (rb = blkif->vbd_rb.rb_node) != NULL ) 11.306 + { 11.307 + vbd = rb_entry(rb, vbd_t, rb); 11.308 + 11.309 + rb_erase(rb, &blkif->vbd_rb); 11.310 + x = vbd->extents; 11.311 + kfree(vbd); 11.312 + 11.313 + while ( x != NULL ) 11.314 + { 11.315 + t = x->next; 11.316 + kfree(x); 11.317 + x = t; 11.318 + } 11.319 + } 11.320 + 11.321 + spin_unlock(&blkif->vbd_lock); 11.322 +} 11.323 + 11.324 + 11.325 +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd) 11.326 +{ 11.327 + blkif_extent_le_t *x; 11.328 + 11.329 + vbd_info->device = vbd->vdevice; 11.330 + vbd_info->info = vbd->type; 11.331 + if ( vbd->readonly ) 11.332 + vbd_info->info |= VDISK_FLAG_RO; 11.333 + vbd_info->capacity = 0ULL; 11.334 + for ( x = vbd->extents; x != NULL; x = x->next ) 11.335 + vbd_info->capacity += x->extent.sector_length; 11.336 + 11.337 + return 0; 11.338 +} 11.339 + 11.340 + 11.341 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds) 11.342 +{ 11.343 + int rc = 0, nr_vbds = 0; 11.344 + struct rb_node *rb; 11.345 + 11.346 + spin_lock(&blkif->vbd_lock); 11.347 + 11.348 + if ( (rb = blkif->vbd_rb.rb_node) == NULL ) 11.349 + goto out; 11.350 + 11.351 + new_subtree: 11.352 + /* STEP 1. Find least node (it'll be left-most). */ 11.353 + while ( rb->rb_left != NULL ) 11.354 + rb = rb->rb_left; 11.355 + 11.356 + for ( ; ; ) 11.357 + { 11.358 + /* STEP 2. Dealt with left subtree. Now process current node. */ 11.359 + if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], 11.360 + rb_entry(rb, vbd_t, rb))) != 0 ) 11.361 + goto out; 11.362 + if ( ++nr_vbds == max_vbds ) 11.363 + goto out; 11.364 + 11.365 + /* STEP 3. Process right subtree, if any. */ 11.366 + if ( rb->rb_right != NULL ) 11.367 + { 11.368 + rb = rb->rb_right; 11.369 + goto new_subtree; 11.370 + } 11.371 + 11.372 + /* STEP 4. Done both subtrees. Head back through ancesstors. */ 11.373 + for ( ; ; ) 11.374 + { 11.375 + /* We're done when we get back to the root node. */ 11.376 + if ( rb->rb_parent == NULL ) 11.377 + goto out; 11.378 + /* If we are left of parent, then parent is next to process. */ 11.379 + if ( rb->rb_parent->rb_left == rb ) 11.380 + break; 11.381 + /* If we are right of parent, then we climb to grandparent. */ 11.382 + rb = rb->rb_parent; 11.383 + } 11.384 + 11.385 + rb = rb->rb_parent; 11.386 + } 11.387 + 11.388 + out: 11.389 + spin_unlock(&blkif->vbd_lock); 11.390 + return (rc == 0) ? nr_vbds : rc; 11.391 +} 11.392 + 11.393 + 11.394 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation) 11.395 +{ 11.396 + blkif_extent_le_t *x; 11.397 + vbd_t *vbd; 11.398 + struct rb_node *rb; 11.399 + blkif_sector_t sec_off; 11.400 + unsigned long nr_secs; 11.401 + 11.402 + spin_lock(&blkif->vbd_lock); 11.403 + 11.404 + rb = blkif->vbd_rb.rb_node; 11.405 + while ( rb != NULL ) 11.406 + { 11.407 + vbd = rb_entry(rb, vbd_t, rb); 11.408 + if ( pseg->ps_device < vbd->vdevice ) 11.409 + rb = rb->rb_left; 11.410 + else if ( pseg->ps_device > vbd->vdevice ) 11.411 + rb = rb->rb_right; 11.412 + else 11.413 + goto found; 11.414 + } 11.415 + 11.416 + DPRINTK("vbd_translate; domain %u attempted to access " 11.417 + "non-existent VBD.\n", blkif->domid); 11.418 + 11.419 + spin_unlock(&blkif->vbd_lock); 11.420 + return -ENODEV; 11.421 + 11.422 + found: 11.423 + 11.424 + if ( (operation == WRITE) && vbd->readonly ) 11.425 + { 11.426 + spin_unlock(&blkif->vbd_lock); 11.427 + return -EACCES; 11.428 + } 11.429 + 11.430 + /* 11.431 + * Now iterate through the list of blkif_extents, working out which should 11.432 + * be used to perform the translation. 11.433 + */ 11.434 + sec_off = pseg->sector_number; 11.435 + nr_secs = pseg->nr_sects; 11.436 + for ( x = vbd->extents; x != NULL; x = x->next ) 11.437 + { 11.438 + if ( sec_off < x->extent.sector_length ) 11.439 + { 11.440 +#if 0 11.441 + pseg->ps_device = x->extent.device; 11.442 +#else 11.443 + pseg->ps_bdev = x->bdev; 11.444 +#endif 11.445 + pseg->sector_number = x->extent.sector_start + sec_off; 11.446 + if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) ) 11.447 + goto overrun; 11.448 + spin_unlock(&blkif->vbd_lock); 11.449 + return 1; 11.450 + } 11.451 + sec_off -= x->extent.sector_length; 11.452 + } 11.453 + 11.454 + DPRINTK("vbd_translate: end of vbd.\n"); 11.455 + spin_unlock(&blkif->vbd_lock); 11.456 + return -EACCES; 11.457 + 11.458 + /* 11.459 + * Here we deal with overrun onto the following extent. We don't deal with 11.460 + * overrun of more than one boundary since each request is restricted to 11.461 + * 2^9 512-byte sectors, so it should be trivial for control software to 11.462 + * ensure that extents are large enough to prevent excessive overrun. 11.463 + */ 11.464 + overrun: 11.465 + 11.466 + /* Adjust length of first chunk to run to end of first extent. */ 11.467 + pseg[0].nr_sects = x->extent.sector_length - sec_off; 11.468 + 11.469 + /* Set second chunk buffer and length to start where first chunk ended. */ 11.470 + pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); 11.471 + pseg[1].nr_sects = nr_secs - pseg[0].nr_sects; 11.472 + 11.473 + /* Now move to the next extent. Check it exists and is long enough! */ 11.474 + if ( unlikely((x = x->next) == NULL) || 11.475 + unlikely(x->extent.sector_length < pseg[1].nr_sects) ) 11.476 + { 11.477 + DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); 11.478 + spin_unlock(&blkif->vbd_lock); 11.479 + return -EACCES; 11.480 + } 11.481 + 11.482 + /* Store the real device and start sector for the second chunk. */ 11.483 +#if 0 11.484 + pseg[1].ps_device = x->extent.device; 11.485 +#else 11.486 + pseg->ps_bdev = x->bdev; 11.487 +#endif 11.488 + pseg[1].sector_number = x->extent.sector_start; 11.489 + 11.490 + spin_unlock(&blkif->vbd_lock); 11.491 + return 2; 11.492 +} 11.493 + 11.494 +#define MAJOR_XEN(dev) ((dev)>>8) 11.495 +#define MINOR_XEN(dev) ((dev) & 0xff) 11.496 + 11.497 +#define XEN_IDE0_MAJOR IDE0_MAJOR 11.498 +#define XEN_IDE1_MAJOR IDE1_MAJOR 11.499 +#define XEN_IDE2_MAJOR IDE2_MAJOR 11.500 +#define XEN_IDE3_MAJOR IDE3_MAJOR 11.501 +#define XEN_IDE4_MAJOR IDE4_MAJOR 11.502 +#define XEN_IDE5_MAJOR IDE5_MAJOR 11.503 +#define XEN_IDE6_MAJOR IDE6_MAJOR 11.504 +#define XEN_IDE7_MAJOR IDE7_MAJOR 11.505 +#define XEN_IDE8_MAJOR IDE8_MAJOR 11.506 +#define XEN_IDE9_MAJOR IDE9_MAJOR 11.507 +#define XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR 11.508 +#define XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR 11.509 +#define XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR 11.510 +#define XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR 11.511 +#define XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR 11.512 +#define XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR 11.513 +#define XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR 11.514 +#define XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR 11.515 +#define XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR 11.516 + 11.517 +static dev_t vbd_map_devnum(blkif_pdev_t cookie) 11.518 +{ 11.519 + int new_major; 11.520 + int major = MAJOR_XEN(cookie); 11.521 + int minor = MINOR_XEN(cookie); 11.522 + 11.523 + switch (major) { 11.524 + case XEN_IDE0_MAJOR: new_major = IDE0_MAJOR; break; 11.525 + case XEN_IDE1_MAJOR: new_major = IDE1_MAJOR; break; 11.526 + case XEN_IDE2_MAJOR: new_major = IDE2_MAJOR; break; 11.527 + case XEN_IDE3_MAJOR: new_major = IDE3_MAJOR; break; 11.528 + case XEN_IDE4_MAJOR: new_major = IDE4_MAJOR; break; 11.529 + case XEN_IDE5_MAJOR: new_major = IDE5_MAJOR; break; 11.530 + case XEN_IDE6_MAJOR: new_major = IDE6_MAJOR; break; 11.531 + case XEN_IDE7_MAJOR: new_major = IDE7_MAJOR; break; 11.532 + case XEN_IDE8_MAJOR: new_major = IDE8_MAJOR; break; 11.533 + case XEN_IDE9_MAJOR: new_major = IDE9_MAJOR; break; 11.534 + case XEN_SCSI_DISK0_MAJOR: new_major = SCSI_DISK0_MAJOR; break; 11.535 + case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR: 11.536 + new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR; 11.537 + break; 11.538 + case XEN_SCSI_CDROM_MAJOR: new_major = SCSI_CDROM_MAJOR; break; 11.539 + default: new_major = 0; break; 11.540 + } 11.541 + 11.542 + return MKDEV(new_major, minor); 11.543 +}
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig Sun Aug 01 15:13:24 2004 +0000 12.3 @@ -0,0 +1,6 @@ 12.4 + 12.5 +config XENBLOCK 12.6 + tristate "Block device driver" 12.7 + depends on ARCH_XEN 12.8 + help 12.9 + Block device driver for Xen
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile Sun Aug 01 15:13:24 2004 +0000 13.3 @@ -0,0 +1,3 @@ 13.4 + 13.5 +obj-y := blkfront.o vbd.o 13.6 +
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c Sun Aug 01 15:13:24 2004 +0000 14.3 @@ -0,0 +1,653 @@ 14.4 +/****************************************************************************** 14.5 + * block.c 14.6 + * 14.7 + * XenLinux virtual block-device driver. 14.8 + * 14.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 14.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 14.11 + * Copyright (c) 2004, Christian Limpach 14.12 + */ 14.13 + 14.14 +#include "block.h" 14.15 +#include <linux/cdrom.h> 14.16 +#include <linux/sched.h> 14.17 +#include <linux/interrupt.h> 14.18 +#include <scsi/scsi.h> 14.19 +#include <asm-xen/ctrl_if.h> 14.20 + 14.21 +typedef unsigned char byte; /* from linux/ide.h */ 14.22 + 14.23 +#define BLKIF_STATE_CLOSED 0 14.24 +#define BLKIF_STATE_DISCONNECTED 1 14.25 +#define BLKIF_STATE_CONNECTED 2 14.26 +static unsigned int blkif_state = BLKIF_STATE_CLOSED; 14.27 +static unsigned int blkif_evtchn, blkif_irq; 14.28 + 14.29 +static int blkif_control_rsp_valid; 14.30 +static blkif_response_t blkif_control_rsp; 14.31 + 14.32 +static blkif_ring_t *blk_ring; 14.33 +static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ 14.34 +static BLKIF_RING_IDX req_prod; /* Private request producer. */ 14.35 + 14.36 +static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for 14.37 + * recovery. Responses not stored here. */ 14.38 +static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for 14.39 + * recovery */ 14.40 +static int recovery = 0; /* "Recovery in progress" flag. Protected 14.41 + * by the blkif_io_lock */ 14.42 + 14.43 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ 14.44 +#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \ 14.45 + (blkif_state != BLKIF_STATE_CONNECTED)) 14.46 + 14.47 +/* 14.48 + * Request queues with outstanding work, but ring is currently full. 14.49 + * We need no special lock here, as we always access this with the 14.50 + * blkif_io_lock held. We only need a small maximum list. 14.51 + */ 14.52 +#define MAX_PENDING 8 14.53 +static request_queue_t *pending_queues[MAX_PENDING]; 14.54 +static int nr_pending; 14.55 + 14.56 +static inline void flush_requests(void) 14.57 +{ 14.58 + 14.59 + blk_ring->req_prod = req_prod; 14.60 + 14.61 + notify_via_evtchn(blkif_evtchn); 14.62 +} 14.63 + 14.64 + 14.65 +#if 0 14.66 +/* 14.67 + * blkif_update_int/update-vbds_task - handle VBD update events. 14.68 + * Schedule a task for keventd to run, which will update the VBDs and perform 14.69 + * the corresponding updates to our view of VBD state. 14.70 + */ 14.71 +static struct tq_struct update_tq; 14.72 +static void update_vbds_task(void *unused) 14.73 +{ 14.74 + xlvbd_update_vbds(); 14.75 +} 14.76 +#endif 14.77 + 14.78 + 14.79 +int blkif_open(struct inode *inode, struct file *filep) 14.80 +{ 14.81 + struct gendisk *gd = inode->i_bdev->bd_disk; 14.82 + struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 14.83 + 14.84 + /* Update of usage count is protected by per-device semaphore. */ 14.85 + di->mi->usage++; 14.86 + 14.87 + return 0; 14.88 +} 14.89 + 14.90 + 14.91 +int blkif_release(struct inode *inode, struct file *filep) 14.92 +{ 14.93 + struct gendisk *gd = inode->i_bdev->bd_disk; 14.94 + struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 14.95 + 14.96 + /* 14.97 + * When usage drops to zero it may allow more VBD updates to occur. 14.98 + * Update of usage count is protected by a per-device semaphore. 14.99 + */ 14.100 + if (--di->mi->usage == 0) { 14.101 +#if 0 14.102 + update_tq.routine = update_vbds_task; 14.103 + schedule_task(&update_tq); 14.104 +#endif 14.105 + } 14.106 + 14.107 + return 0; 14.108 +} 14.109 + 14.110 + 14.111 +int blkif_ioctl(struct inode *inode, struct file *filep, 14.112 + unsigned command, unsigned long argument) 14.113 +{ 14.114 + /* struct gendisk *gd = inode->i_bdev->bd_disk; */ 14.115 + 14.116 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 14.117 + command, (long)argument, inode->i_rdev); 14.118 + 14.119 + switch (command) { 14.120 + 14.121 + case HDIO_GETGEO: 14.122 + /* return ENOSYS to use defaults */ 14.123 + return -ENOSYS; 14.124 + 14.125 + default: 14.126 + printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 14.127 + command); 14.128 + return -ENOSYS; 14.129 + } 14.130 + 14.131 + return 0; 14.132 +} 14.133 + 14.134 +#if 0 14.135 +/* check media change: should probably do something here in some cases :-) */ 14.136 +int blkif_check(kdev_t dev) 14.137 +{ 14.138 + DPRINTK("blkif_check\n"); 14.139 + return 0; 14.140 +} 14.141 + 14.142 +int blkif_revalidate(kdev_t dev) 14.143 +{ 14.144 + struct block_device *bd; 14.145 + struct gendisk *gd; 14.146 + xen_block_t *disk; 14.147 + unsigned long capacity; 14.148 + int i, rc = 0; 14.149 + 14.150 + if ( (bd = bdget(dev)) == NULL ) 14.151 + return -EINVAL; 14.152 + 14.153 + /* 14.154 + * Update of partition info, and check of usage count, is protected 14.155 + * by the per-block-device semaphore. 14.156 + */ 14.157 + down(&bd->bd_sem); 14.158 + 14.159 + if ( ((gd = get_gendisk(dev)) == NULL) || 14.160 + ((disk = xldev_to_xldisk(dev)) == NULL) || 14.161 + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 14.162 + { 14.163 + rc = -EINVAL; 14.164 + goto out; 14.165 + } 14.166 + 14.167 + if ( disk->usage > 1 ) 14.168 + { 14.169 + rc = -EBUSY; 14.170 + goto out; 14.171 + } 14.172 + 14.173 + /* Only reread partition table if VBDs aren't mapped to partitions. */ 14.174 + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 14.175 + { 14.176 + for ( i = gd->max_p - 1; i >= 0; i-- ) 14.177 + { 14.178 + invalidate_device(dev+i, 1); 14.179 + gd->part[MINOR(dev+i)].start_sect = 0; 14.180 + gd->part[MINOR(dev+i)].nr_sects = 0; 14.181 + gd->sizes[MINOR(dev+i)] = 0; 14.182 + } 14.183 + 14.184 + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 14.185 + } 14.186 + 14.187 + out: 14.188 + up(&bd->bd_sem); 14.189 + bdput(bd); 14.190 + return rc; 14.191 +} 14.192 +#endif 14.193 + 14.194 + 14.195 +/* 14.196 + * blkif_queue_request 14.197 + * 14.198 + * request block io 14.199 + * 14.200 + * id: for guest use only. 14.201 + * operation: BLKIF_OP_{READ,WRITE,PROBE} 14.202 + * buffer: buffer to read/write into. this should be a 14.203 + * virtual address in the guest os. 14.204 + */ 14.205 +static int blkif_queue_request(struct request *req) 14.206 +{ 14.207 + struct xlbd_disk_info *di = 14.208 + (struct xlbd_disk_info *)req->rq_disk->private_data; 14.209 + unsigned long buffer_ma; 14.210 + blkif_request_t *ring_req; 14.211 + struct bio *bio; 14.212 + struct bio_vec *bvec; 14.213 + int idx, s; 14.214 + unsigned int fsect, lsect; 14.215 + 14.216 + if (unlikely(blkif_state != BLKIF_STATE_CONNECTED)) 14.217 + return 1; 14.218 + 14.219 + /* Fill out a communications ring structure. */ 14.220 + ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 14.221 + ring_req->id = (unsigned long)req; 14.222 + ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : 14.223 + BLKIF_OP_READ; 14.224 + ring_req->sector_number = (blkif_sector_t)req->sector; 14.225 + ring_req->device = di->xd_device; 14.226 + 14.227 + s = 0; 14.228 + ring_req->nr_segments = 0; 14.229 + rq_for_each_bio(bio, req) { 14.230 + bio_for_each_segment(bvec, bio, idx) { 14.231 + buffer_ma = 14.232 + phys_to_machine(page_to_phys(bvec->bv_page)); 14.233 + if (unlikely((buffer_ma & ((1<<9)-1)) != 0)) 14.234 + BUG(); 14.235 + 14.236 + fsect = bvec->bv_offset >> 9; 14.237 + lsect = fsect + (bvec->bv_len >> 9) - 1; 14.238 + if (unlikely(lsect > 7)) 14.239 + BUG(); 14.240 + 14.241 + ring_req->frame_and_sects[ring_req->nr_segments++] = 14.242 + buffer_ma | (fsect << 3) | lsect; 14.243 + s += bvec->bv_len >> 9; 14.244 + } 14.245 + } 14.246 + 14.247 + req_prod++; 14.248 + 14.249 + /* Keep a private copy so we can reissue requests when recovering. */ 14.250 + blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req = 14.251 + *ring_req; 14.252 + blk_ring_rec->req_prod++; 14.253 + 14.254 + return 0; 14.255 +} 14.256 + 14.257 +/* 14.258 + * do_blkif_request 14.259 + * read a block; request is in a request queue 14.260 + */ 14.261 +void do_blkif_request(request_queue_t *rq) 14.262 +{ 14.263 + struct request *req; 14.264 + int queued; 14.265 + 14.266 + DPRINTK("Entered do_blkif_request\n"); 14.267 + 14.268 + queued = 0; 14.269 + 14.270 + while ((req = elv_next_request(rq)) != NULL) { 14.271 + if (!blk_fs_request(req)) { 14.272 + end_request(req, 0); 14.273 + continue; 14.274 + } 14.275 + 14.276 + if (BLKIF_RING_FULL) { 14.277 + blk_stop_queue(rq); 14.278 + break; 14.279 + } 14.280 + DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", 14.281 + req, req->cmd, req->sector, req->current_nr_sectors, 14.282 + req->nr_sectors, req->buffer, 14.283 + rq_data_dir(req) ? "write" : "read"); 14.284 + blkdev_dequeue_request(req); 14.285 + if (blkif_queue_request(req)) { 14.286 + blk_stop_queue(rq); 14.287 + break; 14.288 + } 14.289 + queued++; 14.290 + } 14.291 + 14.292 + if (queued != 0) 14.293 + flush_requests(); 14.294 +} 14.295 + 14.296 + 14.297 +static void kick_pending_request_queues(void) 14.298 +{ 14.299 + /* We kick pending request queues if the ring is reasonably empty. */ 14.300 + if ( (nr_pending != 0) && 14.301 + ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) ) 14.302 + { 14.303 + /* Attempt to drain the queue, but bail if the ring becomes full. */ 14.304 + while ( (nr_pending != 0) && !BLKIF_RING_FULL ) 14.305 + do_blkif_request(pending_queues[--nr_pending]); 14.306 + } 14.307 +} 14.308 + 14.309 + 14.310 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 14.311 +{ 14.312 + struct request *req; 14.313 + blkif_response_t *bret; 14.314 + BLKIF_RING_IDX i; 14.315 + unsigned long flags; 14.316 + 14.317 + spin_lock_irqsave(&blkif_io_lock, flags); 14.318 + 14.319 + if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) { 14.320 + printk("Bailed out\n"); 14.321 + 14.322 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.323 + return IRQ_HANDLED; 14.324 + } 14.325 + 14.326 + for (i = resp_cons; i != blk_ring->resp_prod; i++) { 14.327 + bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 14.328 + switch (bret->operation) { 14.329 + case BLKIF_OP_READ: 14.330 + case BLKIF_OP_WRITE: 14.331 + if (unlikely(bret->status != BLKIF_RSP_OKAY)) 14.332 + DPRINTK("Bad return from blkdev data request: %lx\n", 14.333 + bret->status); 14.334 + req = (struct request *)bret->id; 14.335 + /* XXXcl pass up status */ 14.336 + if (unlikely(end_that_request_first(req, 1, 14.337 + req->hard_nr_sectors))) 14.338 + BUG(); 14.339 + 14.340 + end_that_request_last(req); 14.341 + break; 14.342 + case BLKIF_OP_PROBE: 14.343 + memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 14.344 + blkif_control_rsp_valid = 1; 14.345 + break; 14.346 + default: 14.347 + BUG(); 14.348 + } 14.349 + } 14.350 + 14.351 + resp_cons = i; 14.352 + resp_cons_rec = i; 14.353 + 14.354 + if (xlbd_blk_queue && 14.355 + test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) { 14.356 + blk_start_queue(xlbd_blk_queue); 14.357 + /* XXXcl call to request_fn should not be needed but 14.358 + * we get stuck without... needs investigating 14.359 + */ 14.360 + xlbd_blk_queue->request_fn(xlbd_blk_queue); 14.361 + } 14.362 + 14.363 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.364 + 14.365 + return IRQ_HANDLED; 14.366 +} 14.367 + 14.368 + 14.369 +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 14.370 +{ 14.371 + unsigned long flags; 14.372 + 14.373 + retry: 14.374 + while ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 14.375 + { 14.376 + set_current_state(TASK_INTERRUPTIBLE); 14.377 + schedule_timeout(1); 14.378 + } 14.379 + 14.380 + spin_lock_irqsave(&blkif_io_lock, flags); 14.381 + if ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 14.382 + { 14.383 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.384 + goto retry; 14.385 + } 14.386 + 14.387 + memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req)); 14.388 + memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req, 14.389 + req, sizeof(*req)); 14.390 + req_prod++; 14.391 + flush_requests(); 14.392 + 14.393 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.394 + 14.395 + while ( !blkif_control_rsp_valid ) 14.396 + { 14.397 + set_current_state(TASK_INTERRUPTIBLE); 14.398 + schedule_timeout(1); 14.399 + } 14.400 + 14.401 + memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); 14.402 + blkif_control_rsp_valid = 0; 14.403 +} 14.404 + 14.405 + 14.406 +static void blkif_status_change(blkif_fe_interface_status_changed_t *status) 14.407 +{ 14.408 + ctrl_msg_t cmsg; 14.409 + blkif_fe_interface_connect_t up; 14.410 + 14.411 + if ( status->handle != 0 ) 14.412 + { 14.413 + printk(KERN_WARNING "Status change on unsupported blkif %d\n", 14.414 + status->handle); 14.415 + return; 14.416 + } 14.417 + 14.418 + switch ( status->status ) 14.419 + { 14.420 + case BLKIF_INTERFACE_STATUS_DESTROYED: 14.421 + printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n", 14.422 + blkif_state); 14.423 + break; 14.424 + 14.425 + case BLKIF_INTERFACE_STATUS_DISCONNECTED: 14.426 + if ( blkif_state != BLKIF_STATE_CLOSED ) 14.427 + { 14.428 + printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message" 14.429 + " in state %d\n", blkif_state); 14.430 + 14.431 + printk(KERN_INFO "VBD driver recovery in progress\n"); 14.432 + 14.433 + /* Prevent new requests being issued until we fix things up. */ 14.434 + spin_lock_irq(&blkif_io_lock); 14.435 + recovery = 1; 14.436 + blkif_state = BLKIF_STATE_DISCONNECTED; 14.437 + spin_unlock_irq(&blkif_io_lock); 14.438 + 14.439 + /* Free resources associated with old device channel. */ 14.440 + free_page((unsigned long)blk_ring); 14.441 + free_irq(blkif_irq, NULL); 14.442 + unbind_evtchn_from_irq(blkif_evtchn); 14.443 + } 14.444 + 14.445 + /* Move from CLOSED to DISCONNECTED state. */ 14.446 + blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 14.447 + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; 14.448 + blkif_state = BLKIF_STATE_DISCONNECTED; 14.449 + 14.450 + /* Construct an interface-CONNECT message for the domain controller. */ 14.451 + cmsg.type = CMSG_BLKIF_FE; 14.452 + cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; 14.453 + cmsg.length = sizeof(blkif_fe_interface_connect_t); 14.454 + up.handle = 0; 14.455 + up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; 14.456 + memcpy(cmsg.msg, &up, sizeof(up)); 14.457 + 14.458 + /* Tell the controller to bring up the interface. */ 14.459 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 14.460 + break; 14.461 + 14.462 + case BLKIF_INTERFACE_STATUS_CONNECTED: 14.463 + if ( blkif_state == BLKIF_STATE_CLOSED ) 14.464 + { 14.465 + printk(KERN_WARNING "Unexpected blkif-CONNECTED message" 14.466 + " in state %d\n", blkif_state); 14.467 + break; 14.468 + } 14.469 + 14.470 + blkif_evtchn = status->evtchn; 14.471 + blkif_irq = bind_evtchn_to_irq(blkif_evtchn); 14.472 + (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); 14.473 + 14.474 + if ( recovery ) 14.475 + { 14.476 + int i; 14.477 + 14.478 + /* Shouldn't need the blkif_io_lock here - the device is 14.479 + * plugged and the recovery flag prevents the interrupt handler 14.480 + * changing anything. */ 14.481 + 14.482 + /* Reissue requests from the private block ring. */ 14.483 + for ( i = 0; 14.484 + resp_cons_rec < blk_ring_rec->req_prod; 14.485 + resp_cons_rec++, i++ ) 14.486 + { 14.487 + blk_ring->ring[i].req 14.488 + = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req; 14.489 + } 14.490 + 14.491 + /* Reset the private block ring to match the new ring. */ 14.492 + memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring)); 14.493 + resp_cons_rec = 0; 14.494 + 14.495 + /* blk_ring->req_prod will be set when we flush_requests().*/ 14.496 + blk_ring_rec->req_prod = req_prod = i; 14.497 + 14.498 + wmb(); 14.499 + 14.500 + /* Switch off recovery mode, using a memory barrier to ensure that 14.501 + * it's seen before we flush requests - we don't want to miss any 14.502 + * interrupts. */ 14.503 + recovery = 0; 14.504 + wmb(); 14.505 + 14.506 + /* Kicks things back into life. */ 14.507 + flush_requests(); 14.508 + } 14.509 + else 14.510 + { 14.511 + /* Probe for discs that are attached to the interface. */ 14.512 + xlvbd_init(); 14.513 + } 14.514 + 14.515 + blkif_state = BLKIF_STATE_CONNECTED; 14.516 + 14.517 + /* Kick pending requests. */ 14.518 + spin_lock_irq(&blkif_io_lock); 14.519 + kick_pending_request_queues(); 14.520 + spin_unlock_irq(&blkif_io_lock); 14.521 + 14.522 + break; 14.523 + 14.524 + default: 14.525 + printk(KERN_WARNING "Status change to unknown value %d\n", 14.526 + status->status); 14.527 + break; 14.528 + } 14.529 +} 14.530 + 14.531 + 14.532 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 14.533 +{ 14.534 + switch ( msg->subtype ) 14.535 + { 14.536 + case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: 14.537 + if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) ) 14.538 + goto parse_error; 14.539 + blkif_status_change((blkif_fe_interface_status_changed_t *) 14.540 + &msg->msg[0]); 14.541 + break; 14.542 +#if 0 14.543 + case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: 14.544 + update_tq.routine = update_vbds_task; 14.545 + schedule_task(&update_tq); 14.546 + break; 14.547 +#endif 14.548 + default: 14.549 + goto parse_error; 14.550 + } 14.551 + 14.552 + ctrl_if_send_response(msg); 14.553 + return; 14.554 + 14.555 + parse_error: 14.556 + msg->length = 0; 14.557 + ctrl_if_send_response(msg); 14.558 +} 14.559 + 14.560 + 14.561 +int __init xlblk_init(void) 14.562 +{ 14.563 + ctrl_msg_t cmsg; 14.564 + blkif_fe_driver_status_changed_t st; 14.565 + 14.566 + if ( (start_info.flags & SIF_INITDOMAIN) 14.567 + || (start_info.flags & SIF_BLK_BE_DOMAIN) ) 14.568 + return 0; 14.569 + 14.570 + printk(KERN_INFO "Initialising Xen virtual block device\n"); 14.571 + 14.572 + blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 14.573 + memset(blk_ring_rec, 0, sizeof(*blk_ring_rec)); 14.574 + 14.575 + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 14.576 + CALLBACK_IN_BLOCKING_CONTEXT); 14.577 + 14.578 + /* Send a driver-UP notification to the domain controller. */ 14.579 + cmsg.type = CMSG_BLKIF_FE; 14.580 + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; 14.581 + cmsg.length = sizeof(blkif_fe_driver_status_changed_t); 14.582 + st.status = BLKIF_DRIVER_STATUS_UP; 14.583 + memcpy(cmsg.msg, &st, sizeof(st)); 14.584 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 14.585 + 14.586 + /* 14.587 + * We should read 'nr_interfaces' from response message and wait 14.588 + * for notifications before proceeding. For now we assume that we 14.589 + * will be notified of exactly one interface. 14.590 + */ 14.591 + while ( blkif_state != BLKIF_STATE_CONNECTED ) 14.592 + { 14.593 + set_current_state(TASK_INTERRUPTIBLE); 14.594 + schedule_timeout(1); 14.595 + } 14.596 + 14.597 + return 0; 14.598 +#if 0 14.599 + int error; 14.600 + 14.601 + reset_xlblk_interface(); 14.602 + 14.603 + xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); 14.604 + xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); 14.605 + 14.606 + error = request_irq(xlblk_response_irq, xlblk_response_int, 14.607 + SA_SAMPLE_RANDOM, "blkdev", NULL); 14.608 + if (error) { 14.609 + printk(KERN_ALERT "Could not allocate receive interrupt\n"); 14.610 + goto fail; 14.611 + } 14.612 + 14.613 + error = request_irq(xlblk_update_irq, xlblk_update_int, 14.614 + 0, "blkdev", NULL); 14.615 + if (error) { 14.616 + printk(KERN_ALERT 14.617 + "Could not allocate block update interrupt\n"); 14.618 + goto fail; 14.619 + } 14.620 + 14.621 + (void)xlvbd_init(); 14.622 + 14.623 + return 0; 14.624 + 14.625 + fail: 14.626 + return error; 14.627 +#endif 14.628 +} 14.629 + 14.630 + 14.631 +static void __exit xlblk_cleanup(void) 14.632 +{ 14.633 + /* XXX FIXME */ 14.634 + BUG(); 14.635 +#if 0 14.636 + /* xlvbd_cleanup(); */ 14.637 + free_irq(xlblk_response_irq, NULL); 14.638 + free_irq(xlblk_update_irq, NULL); 14.639 + unbind_virq_from_irq(VIRQ_BLKDEV); 14.640 + unbind_virq_from_irq(VIRQ_VBD_UPD); 14.641 +#endif 14.642 +} 14.643 + 14.644 + 14.645 +module_init(xlblk_init); 14.646 +module_exit(xlblk_cleanup); 14.647 + 14.648 + 14.649 +void blkdev_suspend(void) 14.650 +{ 14.651 +} 14.652 + 14.653 + 14.654 +void blkdev_resume(void) 14.655 +{ 14.656 +}
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h Sun Aug 01 15:13:24 2004 +0000 15.3 @@ -0,0 +1,92 @@ 15.4 +/****************************************************************************** 15.5 + * block.h 15.6 + * 15.7 + * Shared definitions between all levels of XenLinux Virtual block devices. 15.8 + */ 15.9 + 15.10 +#ifndef __XEN_DRIVERS_BLOCK_H__ 15.11 +#define __XEN_DRIVERS_BLOCK_H__ 15.12 + 15.13 +#include <linux/config.h> 15.14 +#include <linux/module.h> 15.15 + 15.16 +#include <linux/kernel.h> 15.17 +#include <linux/sched.h> 15.18 +#include <linux/slab.h> 15.19 +#include <linux/string.h> 15.20 +#include <linux/errno.h> 15.21 + 15.22 +#include <linux/fs.h> 15.23 +#include <linux/hdreg.h> 15.24 +#include <linux/blkdev.h> 15.25 +#include <linux/major.h> 15.26 + 15.27 +#include <linux/devfs_fs_kernel.h> 15.28 + 15.29 +#include <asm/hypervisor-ifs/hypervisor-if.h> 15.30 +#include <asm/io.h> 15.31 +#include <asm/atomic.h> 15.32 +#include <asm/uaccess.h> 15.33 + 15.34 +#include <asm-xen/blkif.h> 15.35 + 15.36 +#if 0 15.37 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 15.38 +#else 15.39 +#define DPRINTK(_f, _a...) ((void)0) 15.40 +#endif 15.41 + 15.42 +#if 0 15.43 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 15.44 +#else 15.45 +#define DPRINTK_IOCTL(_f, _a...) ((void)0) 15.46 +#endif 15.47 + 15.48 +struct xlbd_type_info { 15.49 + int partn_shift; 15.50 + int devs_per_major; 15.51 + int hardsect_size; 15.52 + int max_sectors; 15.53 + char *name; 15.54 +}; 15.55 + 15.56 +/* 15.57 + * We have one of these per vbd, whether ide, scsi or 'other'. They 15.58 + * hang in private_data off the gendisk structure. We may end up 15.59 + * putting all kinds of interesting stuff here :-) 15.60 + */ 15.61 +struct xlbd_major_info { 15.62 + int major; 15.63 + int usage; 15.64 + int xd_device; 15.65 + struct xlbd_type_info *type; 15.66 +}; 15.67 + 15.68 +struct xlbd_disk_info { 15.69 + int xd_device; 15.70 + struct xlbd_major_info *mi; 15.71 +}; 15.72 + 15.73 +typedef struct xen_block { 15.74 + int usage; 15.75 +} xen_block_t; 15.76 + 15.77 +extern struct request_queue *xlbd_blk_queue; 15.78 +extern spinlock_t blkif_io_lock; 15.79 + 15.80 +extern int blkif_open(struct inode *inode, struct file *filep); 15.81 +extern int blkif_release(struct inode *inode, struct file *filep); 15.82 +extern int blkif_ioctl(struct inode *inode, struct file *filep, 15.83 + unsigned command, unsigned long argument); 15.84 +extern int blkif_check(dev_t dev); 15.85 +extern int blkif_revalidate(dev_t dev); 15.86 +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); 15.87 +extern void do_blkif_request (request_queue_t *rq); 15.88 + 15.89 +extern void xlvbd_update_vbds(void); 15.90 + 15.91 +/* Virtual block-device subsystem. */ 15.92 +extern int xlvbd_init(void); 15.93 +extern void xlvbd_cleanup(void); 15.94 + 15.95 +#endif /* __XEN_DRIVERS_BLOCK_H__ */
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c Sun Aug 01 15:13:24 2004 +0000 16.3 @@ -0,0 +1,530 @@ 16.4 +/****************************************************************************** 16.5 + * vbd.c 16.6 + * 16.7 + * XenLinux virtual block-device driver (xvd). 16.8 + * 16.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 16.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 16.11 + * Copyright (c) 2004, Christian Limpach 16.12 + */ 16.13 + 16.14 +#include "block.h" 16.15 +#include <linux/blkdev.h> 16.16 + 16.17 +/* 16.18 + * For convenience we distinguish between ide, scsi and 'other' (i.e. 16.19 + * potentially combinations of the two) in the naming scheme and in a few 16.20 + * other places (like default readahead, etc). 16.21 + */ 16.22 + 16.23 +#define NUM_IDE_MAJORS 10 16.24 +#define NUM_SCSI_MAJORS 9 16.25 +#define NUM_VBD_MAJORS 1 16.26 + 16.27 +static struct xlbd_type_info xlbd_ide_type = { 16.28 + .partn_shift = 6, 16.29 + // XXXcl todo blksize_size[major] = 1024; 16.30 + .hardsect_size = 512, 16.31 + .max_sectors = 128, /* 'hwif->rqsize' if we knew it */ 16.32 + // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ 16.33 + .name = "hd", 16.34 +}; 16.35 + 16.36 +static struct xlbd_type_info xlbd_scsi_type = { 16.37 + .partn_shift = 4, 16.38 + // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */ 16.39 + .hardsect_size = 512, 16.40 + .max_sectors = 128*8, /* XXX 128; */ 16.41 + // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */ 16.42 + .name = "sd", 16.43 +}; 16.44 + 16.45 +static struct xlbd_type_info xlbd_vbd_type = { 16.46 + .partn_shift = 4, 16.47 + // XXXcl todo blksize_size[major] = 512; 16.48 + .hardsect_size = 512, 16.49 + .max_sectors = 128, 16.50 + // XXXcl todo read_ahead[major] = 8; 16.51 + .name = "xvd", 16.52 +}; 16.53 + 16.54 +/* XXXcl handle cciss after finding out why it's "hacked" in */ 16.55 + 16.56 +static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS + 16.57 + NUM_VBD_MAJORS]; 16.58 + 16.59 +/* Information about our VBDs. */ 16.60 +#define MAX_VBDS 64 16.61 +static int nr_vbds; 16.62 +static vdisk_t *vbd_info; 16.63 + 16.64 +struct request_queue *xlbd_blk_queue = NULL; 16.65 + 16.66 +#define MAJOR_XEN(dev) ((dev)>>8) 16.67 +#define MINOR_XEN(dev) ((dev) & 0xff) 16.68 + 16.69 +static struct block_device_operations xlvbd_block_fops = 16.70 +{ 16.71 + .owner = THIS_MODULE, 16.72 + .open = blkif_open, 16.73 + .release = blkif_release, 16.74 + .ioctl = blkif_ioctl, 16.75 +#if 0 16.76 + check_media_change: blkif_check, 16.77 + revalidate: blkif_revalidate, 16.78 +#endif 16.79 +}; 16.80 + 16.81 +spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; 16.82 + 16.83 +static int xlvbd_get_vbd_info(vdisk_t *disk_info) 16.84 +{ 16.85 + vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); 16.86 + blkif_request_t req; 16.87 + blkif_response_t rsp; 16.88 + int nr; 16.89 + 16.90 + memset(&req, 0, sizeof(req)); 16.91 + req.operation = BLKIF_OP_PROBE; 16.92 + req.nr_segments = 1; 16.93 + req.frame_and_sects[0] = virt_to_machine(buf) | 7; 16.94 + 16.95 + blkif_control_send(&req, &rsp); 16.96 + 16.97 + if ( rsp.status <= 0 ) 16.98 + { 16.99 + printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); 16.100 + return -1; 16.101 + } 16.102 + 16.103 + if ( (nr = rsp.status) > MAX_VBDS ) 16.104 + nr = MAX_VBDS; 16.105 + memcpy(disk_info, buf, nr * sizeof(vdisk_t)); 16.106 + 16.107 + return nr; 16.108 +} 16.109 + 16.110 +static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor) 16.111 +{ 16.112 + int mi_idx, new_major; 16.113 + int xd_major = MAJOR_XEN(xd_device); 16.114 + int xd_minor = MINOR_XEN(xd_device); 16.115 + 16.116 + *minor = xd_minor; 16.117 + 16.118 + switch (xd_major) { 16.119 + case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break; 16.120 + case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break; 16.121 + case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break; 16.122 + case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break; 16.123 + case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break; 16.124 + case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break; 16.125 + case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break; 16.126 + case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break; 16.127 + case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break; 16.128 + case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break; 16.129 + case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break; 16.130 + case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: 16.131 + mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR; 16.132 + new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR; 16.133 + break; 16.134 + case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break; 16.135 + default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break; 16.136 + } 16.137 + 16.138 + if (major_info[mi_idx]) 16.139 + return major_info[mi_idx]; 16.140 + 16.141 + major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); 16.142 + if (major_info[mi_idx] == NULL) 16.143 + return NULL; 16.144 + 16.145 + memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info)); 16.146 + 16.147 + switch (mi_idx) { 16.148 + case 0 ... (NUM_IDE_MAJORS - 1): 16.149 + major_info[mi_idx]->type = &xlbd_ide_type; 16.150 + break; 16.151 + case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1): 16.152 + major_info[mi_idx]->type = &xlbd_scsi_type; 16.153 + break; 16.154 + case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ... 16.155 + (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1): 16.156 + major_info[mi_idx]->type = &xlbd_vbd_type; 16.157 + break; 16.158 + } 16.159 + major_info[mi_idx]->major = new_major; 16.160 + 16.161 + if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) { 16.162 + printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n", 16.163 + major_info[mi_idx]->major, major_info[mi_idx]->type->name); 16.164 + goto out; 16.165 + } 16.166 + 16.167 + devfs_mk_dir(major_info[mi_idx]->type->name); 16.168 + 16.169 + return major_info[mi_idx]; 16.170 + 16.171 + out: 16.172 + kfree(major_info[mi_idx]); 16.173 + major_info[mi_idx] = NULL; 16.174 + return NULL; 16.175 +} 16.176 + 16.177 +static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi, 16.178 + int xd_minor, vdisk_t *xd) 16.179 +{ 16.180 + struct gendisk *gd; 16.181 + struct xlbd_disk_info *di; 16.182 + int device, partno; 16.183 + 16.184 + device = MKDEV(mi->major, xd_minor); 16.185 + gd = get_gendisk(device, &partno); 16.186 + if (gd) 16.187 + return gd; 16.188 + 16.189 + di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL); 16.190 + if (di == NULL) 16.191 + return NULL; 16.192 + di->mi = mi; 16.193 + di->xd_device = xd->device; 16.194 + 16.195 + /* Construct an appropriate gendisk structure. */ 16.196 + gd = alloc_disk(1); 16.197 + if (gd == NULL) 16.198 + goto out; 16.199 + 16.200 + gd->major = mi->major; 16.201 + gd->first_minor = xd_minor; 16.202 + gd->fops = &xlvbd_block_fops; 16.203 + gd->private_data = di; 16.204 + sprintf(gd->disk_name, "%s%c%d", mi->type->name, 16.205 + 'a' + (xd_minor >> mi->type->partn_shift), 16.206 + xd_minor & ((1 << mi->type->partn_shift) - 1)); 16.207 + /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */ 16.208 + 16.209 + set_capacity(gd, xd->capacity); 16.210 + 16.211 + if (xlbd_blk_queue == NULL) { 16.212 + xlbd_blk_queue = blk_init_queue(do_blkif_request, 16.213 + &blkif_io_lock); 16.214 + if (xlbd_blk_queue == NULL) 16.215 + goto out; 16.216 + elevator_init(xlbd_blk_queue, &elevator_noop); 16.217 + 16.218 + /* 16.219 + * Turn off barking 'headactive' mode. We dequeue 16.220 + * buffer heads as soon as we pass them to back-end 16.221 + * driver. 16.222 + */ 16.223 + blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */ 16.224 + 16.225 + blk_queue_hardsect_size(xlbd_blk_queue, 16.226 + mi->type->hardsect_size); 16.227 + blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */ 16.228 + 16.229 + /* XXXcl: set mask to PAGE_SIZE for now, to improve either use 16.230 + - blk_queue_merge_bvec to merge requests with adjacent ma's 16.231 + - the tags infrastructure 16.232 + - the dma infrastructure 16.233 + */ 16.234 + blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1); 16.235 + 16.236 + blk_queue_max_phys_segments(xlbd_blk_queue, 16.237 + BLKIF_MAX_SEGMENTS_PER_REQUEST); 16.238 + blk_queue_max_hw_segments(xlbd_blk_queue, 16.239 + BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */ 16.240 + 16.241 + 16.242 + } 16.243 + gd->queue = xlbd_blk_queue; 16.244 + 16.245 + add_disk(gd); 16.246 + 16.247 + return gd; 16.248 + 16.249 + out: 16.250 + if (gd) 16.251 + del_gendisk(gd); 16.252 + kfree(di); 16.253 + return NULL; 16.254 +} 16.255 + 16.256 +/* 16.257 + * xlvbd_init_device - initialise a VBD device 16.258 + * @disk: a vdisk_t describing the VBD 16.259 + * 16.260 + * Takes a vdisk_t * that describes a VBD the domain has access to. 16.261 + * Performs appropriate initialisation and registration of the device. 16.262 + * 16.263 + * Care needs to be taken when making re-entrant calls to ensure that 16.264 + * corruption does not occur. Also, devices that are in use should not have 16.265 + * their details updated. This is the caller's responsibility. 16.266 + */ 16.267 +static int xlvbd_init_device(vdisk_t *xd) 16.268 +{ 16.269 + struct block_device *bd; 16.270 + struct gendisk *gd; 16.271 + struct xlbd_major_info *mi; 16.272 + int device; 16.273 + int minor; 16.274 + 16.275 + int err = -ENOMEM; 16.276 + 16.277 + mi = xlbd_get_major_info(xd->device, &minor); 16.278 + if (mi == NULL) 16.279 + return -EPERM; 16.280 + 16.281 + device = MKDEV(mi->major, minor); 16.282 + 16.283 + if ((bd = bdget(device)) == NULL) 16.284 + return -EPERM; 16.285 + 16.286 + /* 16.287 + * Update of partition info, and check of usage count, is protected 16.288 + * by the per-block-device semaphore. 16.289 + */ 16.290 + down(&bd->bd_sem); 16.291 + 16.292 + gd = xlvbd_get_gendisk(mi, minor, xd); 16.293 + if (mi == NULL) { 16.294 + err = -EPERM; 16.295 + goto out; 16.296 + } 16.297 + 16.298 + if (VDISK_READONLY(xd->info)) 16.299 + set_disk_ro(gd, 1); 16.300 + 16.301 + /* Some final fix-ups depending on the device type */ 16.302 + switch (VDISK_TYPE(xd->info)) { 16.303 + case VDISK_TYPE_CDROM: 16.304 + gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 16.305 + /* FALLTHROUGH */ 16.306 + case VDISK_TYPE_FLOPPY: 16.307 + case VDISK_TYPE_TAPE: 16.308 + gd->flags |= GENHD_FL_REMOVABLE; 16.309 + break; 16.310 + 16.311 + case VDISK_TYPE_DISK: 16.312 + break; 16.313 + 16.314 + default: 16.315 + printk(KERN_ALERT "XenLinux: unknown device type %d\n", 16.316 + VDISK_TYPE(xd->info)); 16.317 + break; 16.318 + } 16.319 + 16.320 + err = 0; 16.321 + out: 16.322 + up(&bd->bd_sem); 16.323 + bdput(bd); 16.324 + return err; 16.325 +} 16.326 + 16.327 +#if 0 16.328 +/* 16.329 + * xlvbd_remove_device - remove a device node if possible 16.330 + * @device: numeric device ID 16.331 + * 16.332 + * Updates the gendisk structure and invalidates devices. 16.333 + * 16.334 + * This is OK for now but in future, should perhaps consider where this should 16.335 + * deallocate gendisks / unregister devices. 16.336 + */ 16.337 +static int xlvbd_remove_device(int device) 16.338 +{ 16.339 + int i, rc = 0, minor = MINOR(device); 16.340 + struct gendisk *gd; 16.341 + struct block_device *bd; 16.342 + xen_block_t *disk = NULL; 16.343 + 16.344 + if ( (bd = bdget(device)) == NULL ) 16.345 + return -1; 16.346 + 16.347 + /* 16.348 + * Update of partition info, and check of usage count, is protected 16.349 + * by the per-block-device semaphore. 16.350 + */ 16.351 + down(&bd->bd_sem); 16.352 + 16.353 + if ( ((gd = get_gendisk(device)) == NULL) || 16.354 + ((disk = xldev_to_xldisk(device)) == NULL) ) 16.355 + BUG(); 16.356 + 16.357 + if ( disk->usage != 0 ) 16.358 + { 16.359 + printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); 16.360 + rc = -1; 16.361 + goto out; 16.362 + } 16.363 + 16.364 + if ( (minor & (gd->max_p-1)) != 0 ) 16.365 + { 16.366 + /* 1: The VBD is mapped to a partition rather than a whole unit. */ 16.367 + invalidate_device(device, 1); 16.368 + gd->part[minor].start_sect = 0; 16.369 + gd->part[minor].nr_sects = 0; 16.370 + gd->sizes[minor] = 0; 16.371 + 16.372 + /* Clear the consists-of-virtual-partitions flag if possible. */ 16.373 + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; 16.374 + for ( i = 1; i < gd->max_p; i++ ) 16.375 + if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) 16.376 + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; 16.377 + 16.378 + /* 16.379 + * If all virtual partitions are now gone, and a 'whole unit' VBD is 16.380 + * present, then we can try to grok the unit's real partition table. 16.381 + */ 16.382 + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && 16.383 + (gd->sizes[minor & ~(gd->max_p-1)] != 0) && 16.384 + !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) 16.385 + { 16.386 + register_disk(gd, 16.387 + device&~(gd->max_p-1), 16.388 + gd->max_p, 16.389 + &xlvbd_block_fops, 16.390 + gd->part[minor&~(gd->max_p-1)].nr_sects); 16.391 + } 16.392 + } 16.393 + else 16.394 + { 16.395 + /* 16.396 + * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. 16.397 + * NB. The partition entries are only cleared if there are no VBDs 16.398 + * mapped to individual partitions on this unit. 16.399 + */ 16.400 + i = gd->max_p - 1; /* Default: clear subpartitions as well. */ 16.401 + if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) 16.402 + i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ 16.403 + while ( i >= 0 ) 16.404 + { 16.405 + invalidate_device(device+i, 1); 16.406 + gd->part[minor+i].start_sect = 0; 16.407 + gd->part[minor+i].nr_sects = 0; 16.408 + gd->sizes[minor+i] = 0; 16.409 + i--; 16.410 + } 16.411 + } 16.412 + 16.413 + out: 16.414 + up(&bd->bd_sem); 16.415 + bdput(bd); 16.416 + return rc; 16.417 +} 16.418 + 16.419 +/* 16.420 + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver 16.421 + * state. The VBDs need to be updated in this way when the domain is 16.422 + * initialised and also each time we receive an XLBLK_UPDATE event. 16.423 + */ 16.424 +void xlvbd_update_vbds(void) 16.425 +{ 16.426 + int i, j, k, old_nr, new_nr; 16.427 + vdisk_t *old_info, *new_info, *merged_info; 16.428 + 16.429 + old_info = vbd_info; 16.430 + old_nr = nr_vbds; 16.431 + 16.432 + new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 16.433 + if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) 16.434 + { 16.435 + kfree(new_info); 16.436 + return; 16.437 + } 16.438 + 16.439 + /* 16.440 + * Final list maximum size is old list + new list. This occurs only when 16.441 + * old list and new list do not overlap at all, and we cannot yet destroy 16.442 + * VBDs in the old list because the usage counts are busy. 16.443 + */ 16.444 + merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); 16.445 + 16.446 + /* @i tracks old list; @j tracks new list; @k tracks merged list. */ 16.447 + i = j = k = 0; 16.448 + 16.449 + while ( (i < old_nr) && (j < new_nr) ) 16.450 + { 16.451 + if ( old_info[i].device < new_info[j].device ) 16.452 + { 16.453 + if ( xlvbd_remove_device(old_info[i].device) != 0 ) 16.454 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 16.455 + i++; 16.456 + } 16.457 + else if ( old_info[i].device > new_info[j].device ) 16.458 + { 16.459 + if ( xlvbd_init_device(&new_info[j]) == 0 ) 16.460 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 16.461 + j++; 16.462 + } 16.463 + else 16.464 + { 16.465 + if ( ((old_info[i].capacity == new_info[j].capacity) && 16.466 + (old_info[i].info == new_info[j].info)) || 16.467 + (xlvbd_remove_device(old_info[i].device) != 0) ) 16.468 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 16.469 + else if ( xlvbd_init_device(&new_info[j]) == 0 ) 16.470 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 16.471 + i++; j++; 16.472 + } 16.473 + } 16.474 + 16.475 + for ( ; i < old_nr; i++ ) 16.476 + { 16.477 + if ( xlvbd_remove_device(old_info[i].device) != 0 ) 16.478 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 16.479 + } 16.480 + 16.481 + for ( ; j < new_nr; j++ ) 16.482 + { 16.483 + if ( xlvbd_init_device(&new_info[j]) == 0 ) 16.484 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 16.485 + } 16.486 + 16.487 + vbd_info = merged_info; 16.488 + nr_vbds = k; 16.489 + 16.490 + kfree(old_info); 16.491 + kfree(new_info); 16.492 +} 16.493 +#endif 16.494 + 16.495 +/* 16.496 + * Set up all the linux device goop for the virtual block devices 16.497 + * (vbd's) that we know about. Note that although from the backend 16.498 + * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device 16.499 + * number, the domain creation tools conventionally allocate these 16.500 + * numbers to correspond to those used by 'real' linux -- this is just 16.501 + * for convenience as it means e.g. that the same /etc/fstab can be 16.502 + * used when booting with or without Xen. 16.503 + */ 16.504 +int xlvbd_init(void) 16.505 +{ 16.506 + int i; 16.507 + 16.508 + /* 16.509 + * If compiled as a module, we don't support unloading yet. We 16.510 + * therefore permanently increment the reference count to 16.511 + * disallow it. 16.512 + */ 16.513 + MOD_INC_USE_COUNT; 16.514 + 16.515 + memset(major_info, 0, sizeof(major_info)); 16.516 + 16.517 + for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) { 16.518 + } 16.519 + 16.520 + vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 16.521 + nr_vbds = xlvbd_get_vbd_info(vbd_info); 16.522 + 16.523 + if (nr_vbds < 0) { 16.524 + kfree(vbd_info); 16.525 + vbd_info = NULL; 16.526 + nr_vbds = 0; 16.527 + } else { 16.528 + for (i = 0; i < nr_vbds; i++) 16.529 + xlvbd_init_device(&vbd_info[i]); 16.530 + } 16.531 + 16.532 + return 0; 16.533 +}
17.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig Sat Jul 31 16:13:15 2004 +0000 17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 17.3 @@ -1,6 +0,0 @@ 17.4 - 17.5 -config XENBLOCK 17.6 - tristate "Block device driver" 17.7 - depends on ARCH_XEN 17.8 - help 17.9 - Block device driver for Xen
18.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/Makefile Sat Jul 31 16:13:15 2004 +0000 18.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 18.3 @@ -1,3 +0,0 @@ 18.4 - 18.5 -obj-y := vbd.o block.o 18.6 -
19.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/block.c Sat Jul 31 16:13:15 2004 +0000 19.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 19.3 @@ -1,653 +0,0 @@ 19.4 -/****************************************************************************** 19.5 - * block.c 19.6 - * 19.7 - * XenLinux virtual block-device driver. 19.8 - * 19.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 19.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 19.11 - * Copyright (c) 2004, Christian Limpach 19.12 - */ 19.13 - 19.14 -#include "block.h" 19.15 -#include <linux/cdrom.h> 19.16 -#include <linux/sched.h> 19.17 -#include <linux/interrupt.h> 19.18 -#include <scsi/scsi.h> 19.19 -#include <asm-xen/ctrl_if.h> 19.20 - 19.21 -typedef unsigned char byte; /* from linux/ide.h */ 19.22 - 19.23 -#define BLKIF_STATE_CLOSED 0 19.24 -#define BLKIF_STATE_DISCONNECTED 1 19.25 -#define BLKIF_STATE_CONNECTED 2 19.26 -static unsigned int blkif_state = BLKIF_STATE_CLOSED; 19.27 -static unsigned int blkif_evtchn, blkif_irq; 19.28 - 19.29 -static int blkif_control_rsp_valid; 19.30 -static blkif_response_t blkif_control_rsp; 19.31 - 19.32 -static blkif_ring_t *blk_ring; 19.33 -static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ 19.34 -static BLKIF_RING_IDX req_prod; /* Private request producer. */ 19.35 - 19.36 -static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for 19.37 - * recovery. Responses not stored here. */ 19.38 -static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for 19.39 - * recovery */ 19.40 -static int recovery = 0; /* "Recovery in progress" flag. Protected 19.41 - * by the blkif_io_lock */ 19.42 - 19.43 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */ 19.44 -#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \ 19.45 - (blkif_state != BLKIF_STATE_CONNECTED)) 19.46 - 19.47 -/* 19.48 - * Request queues with outstanding work, but ring is currently full. 19.49 - * We need no special lock here, as we always access this with the 19.50 - * blkif_io_lock held. We only need a small maximum list. 19.51 - */ 19.52 -#define MAX_PENDING 8 19.53 -static request_queue_t *pending_queues[MAX_PENDING]; 19.54 -static int nr_pending; 19.55 - 19.56 -static inline void flush_requests(void) 19.57 -{ 19.58 - 19.59 - blk_ring->req_prod = req_prod; 19.60 - 19.61 - notify_via_evtchn(blkif_evtchn); 19.62 -} 19.63 - 19.64 - 19.65 -#if 0 19.66 -/* 19.67 - * blkif_update_int/update-vbds_task - handle VBD update events. 19.68 - * Schedule a task for keventd to run, which will update the VBDs and perform 19.69 - * the corresponding updates to our view of VBD state. 19.70 - */ 19.71 -static struct tq_struct update_tq; 19.72 -static void update_vbds_task(void *unused) 19.73 -{ 19.74 - xlvbd_update_vbds(); 19.75 -} 19.76 -#endif 19.77 - 19.78 - 19.79 -int blkif_open(struct inode *inode, struct file *filep) 19.80 -{ 19.81 - struct gendisk *gd = inode->i_bdev->bd_disk; 19.82 - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 19.83 - 19.84 - /* Update of usage count is protected by per-device semaphore. */ 19.85 - di->mi->usage++; 19.86 - 19.87 - return 0; 19.88 -} 19.89 - 19.90 - 19.91 -int blkif_release(struct inode *inode, struct file *filep) 19.92 -{ 19.93 - struct gendisk *gd = inode->i_bdev->bd_disk; 19.94 - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 19.95 - 19.96 - /* 19.97 - * When usage drops to zero it may allow more VBD updates to occur. 19.98 - * Update of usage count is protected by a per-device semaphore. 19.99 - */ 19.100 - if (--di->mi->usage == 0) { 19.101 -#if 0 19.102 - update_tq.routine = update_vbds_task; 19.103 - schedule_task(&update_tq); 19.104 -#endif 19.105 - } 19.106 - 19.107 - return 0; 19.108 -} 19.109 - 19.110 - 19.111 -int blkif_ioctl(struct inode *inode, struct file *filep, 19.112 - unsigned command, unsigned long argument) 19.113 -{ 19.114 - /* struct gendisk *gd = inode->i_bdev->bd_disk; */ 19.115 - 19.116 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 19.117 - command, (long)argument, inode->i_rdev); 19.118 - 19.119 - switch (command) { 19.120 - 19.121 - case HDIO_GETGEO: 19.122 - /* return ENOSYS to use defaults */ 19.123 - return -ENOSYS; 19.124 - 19.125 - default: 19.126 - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 19.127 - command); 19.128 - return -ENOSYS; 19.129 - } 19.130 - 19.131 - return 0; 19.132 -} 19.133 - 19.134 -#if 0 19.135 -/* check media change: should probably do something here in some cases :-) */ 19.136 -int blkif_check(kdev_t dev) 19.137 -{ 19.138 - DPRINTK("blkif_check\n"); 19.139 - return 0; 19.140 -} 19.141 - 19.142 -int blkif_revalidate(kdev_t dev) 19.143 -{ 19.144 - struct block_device *bd; 19.145 - struct gendisk *gd; 19.146 - xen_block_t *disk; 19.147 - unsigned long capacity; 19.148 - int i, rc = 0; 19.149 - 19.150 - if ( (bd = bdget(dev)) == NULL ) 19.151 - return -EINVAL; 19.152 - 19.153 - /* 19.154 - * Update of partition info, and check of usage count, is protected 19.155 - * by the per-block-device semaphore. 19.156 - */ 19.157 - down(&bd->bd_sem); 19.158 - 19.159 - if ( ((gd = get_gendisk(dev)) == NULL) || 19.160 - ((disk = xldev_to_xldisk(dev)) == NULL) || 19.161 - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 19.162 - { 19.163 - rc = -EINVAL; 19.164 - goto out; 19.165 - } 19.166 - 19.167 - if ( disk->usage > 1 ) 19.168 - { 19.169 - rc = -EBUSY; 19.170 - goto out; 19.171 - } 19.172 - 19.173 - /* Only reread partition table if VBDs aren't mapped to partitions. */ 19.174 - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 19.175 - { 19.176 - for ( i = gd->max_p - 1; i >= 0; i-- ) 19.177 - { 19.178 - invalidate_device(dev+i, 1); 19.179 - gd->part[MINOR(dev+i)].start_sect = 0; 19.180 - gd->part[MINOR(dev+i)].nr_sects = 0; 19.181 - gd->sizes[MINOR(dev+i)] = 0; 19.182 - } 19.183 - 19.184 - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 19.185 - } 19.186 - 19.187 - out: 19.188 - up(&bd->bd_sem); 19.189 - bdput(bd); 19.190 - return rc; 19.191 -} 19.192 -#endif 19.193 - 19.194 - 19.195 -/* 19.196 - * blkif_queue_request 19.197 - * 19.198 - * request block io 19.199 - * 19.200 - * id: for guest use only. 19.201 - * operation: BLKIF_OP_{READ,WRITE,PROBE} 19.202 - * buffer: buffer to read/write into. this should be a 19.203 - * virtual address in the guest os. 19.204 - */ 19.205 -static int blkif_queue_request(struct request *req) 19.206 -{ 19.207 - struct xlbd_disk_info *di = 19.208 - (struct xlbd_disk_info *)req->rq_disk->private_data; 19.209 - unsigned long buffer_ma; 19.210 - blkif_request_t *ring_req; 19.211 - struct bio *bio; 19.212 - struct bio_vec *bvec; 19.213 - int idx, s; 19.214 - unsigned int fsect, lsect; 19.215 - 19.216 - if (unlikely(blkif_state != BLKIF_STATE_CONNECTED)) 19.217 - return 1; 19.218 - 19.219 - /* Fill out a communications ring structure. */ 19.220 - ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 19.221 - ring_req->id = (unsigned long)req; 19.222 - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : 19.223 - BLKIF_OP_READ; 19.224 - ring_req->sector_number = (blkif_sector_t)req->sector; 19.225 - ring_req->device = di->xd_device; 19.226 - 19.227 - s = 0; 19.228 - ring_req->nr_segments = 0; 19.229 - rq_for_each_bio(bio, req) { 19.230 - bio_for_each_segment(bvec, bio, idx) { 19.231 - buffer_ma = 19.232 - phys_to_machine(page_to_phys(bvec->bv_page)); 19.233 - if (unlikely((buffer_ma & ((1<<9)-1)) != 0)) 19.234 - BUG(); 19.235 - 19.236 - fsect = bvec->bv_offset >> 9; 19.237 - lsect = fsect + (bvec->bv_len >> 9) - 1; 19.238 - if (unlikely(lsect > 7)) 19.239 - BUG(); 19.240 - 19.241 - ring_req->frame_and_sects[ring_req->nr_segments++] = 19.242 - buffer_ma | (fsect << 3) | lsect; 19.243 - s += bvec->bv_len >> 9; 19.244 - } 19.245 - } 19.246 - 19.247 - req_prod++; 19.248 - 19.249 - /* Keep a private copy so we can reissue requests when recovering. */ 19.250 - blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req = 19.251 - *ring_req; 19.252 - blk_ring_rec->req_prod++; 19.253 - 19.254 - return 0; 19.255 -} 19.256 - 19.257 -/* 19.258 - * do_blkif_request 19.259 - * read a block; request is in a request queue 19.260 - */ 19.261 -void do_blkif_request(request_queue_t *rq) 19.262 -{ 19.263 - struct request *req; 19.264 - int queued; 19.265 - 19.266 - DPRINTK("Entered do_blkif_request\n"); 19.267 - 19.268 - queued = 0; 19.269 - 19.270 - while ((req = elv_next_request(rq)) != NULL) { 19.271 - if (!blk_fs_request(req)) { 19.272 - end_request(req, 0); 19.273 - continue; 19.274 - } 19.275 - 19.276 - if (BLKIF_RING_FULL) { 19.277 - blk_stop_queue(rq); 19.278 - break; 19.279 - } 19.280 - DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", 19.281 - req, req->cmd, req->sector, req->current_nr_sectors, 19.282 - req->nr_sectors, req->buffer, 19.283 - rq_data_dir(req) ? "write" : "read"); 19.284 - blkdev_dequeue_request(req); 19.285 - if (blkif_queue_request(req)) { 19.286 - blk_stop_queue(rq); 19.287 - break; 19.288 - } 19.289 - queued++; 19.290 - } 19.291 - 19.292 - if (queued != 0) 19.293 - flush_requests(); 19.294 -} 19.295 - 19.296 - 19.297 -static void kick_pending_request_queues(void) 19.298 -{ 19.299 - /* We kick pending request queues if the ring is reasonably empty. */ 19.300 - if ( (nr_pending != 0) && 19.301 - ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) ) 19.302 - { 19.303 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 19.304 - while ( (nr_pending != 0) && !BLKIF_RING_FULL ) 19.305 - do_blkif_request(pending_queues[--nr_pending]); 19.306 - } 19.307 -} 19.308 - 19.309 - 19.310 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 19.311 -{ 19.312 - struct request *req; 19.313 - blkif_response_t *bret; 19.314 - BLKIF_RING_IDX i; 19.315 - unsigned long flags; 19.316 - 19.317 - spin_lock_irqsave(&blkif_io_lock, flags); 19.318 - 19.319 - if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) { 19.320 - printk("Bailed out\n"); 19.321 - 19.322 - spin_unlock_irqrestore(&blkif_io_lock, flags); 19.323 - return IRQ_HANDLED; 19.324 - } 19.325 - 19.326 - for (i = resp_cons; i != blk_ring->resp_prod; i++) { 19.327 - bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 19.328 - switch (bret->operation) { 19.329 - case BLKIF_OP_READ: 19.330 - case BLKIF_OP_WRITE: 19.331 - if (unlikely(bret->status != BLKIF_RSP_OKAY)) 19.332 - DPRINTK("Bad return from blkdev data request: %lx\n", 19.333 - bret->status); 19.334 - req = (struct request *)bret->id; 19.335 - /* XXXcl pass up status */ 19.336 - if (unlikely(end_that_request_first(req, 1, 19.337 - req->hard_nr_sectors))) 19.338 - BUG(); 19.339 - 19.340 - end_that_request_last(req); 19.341 - break; 19.342 - case BLKIF_OP_PROBE: 19.343 - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 19.344 - blkif_control_rsp_valid = 1; 19.345 - break; 19.346 - default: 19.347 - BUG(); 19.348 - } 19.349 - } 19.350 - 19.351 - resp_cons = i; 19.352 - resp_cons_rec = i; 19.353 - 19.354 - if (xlbd_blk_queue && 19.355 - test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) { 19.356 - blk_start_queue(xlbd_blk_queue); 19.357 - /* XXXcl call to request_fn should not be needed but 19.358 - * we get stuck without... needs investigating 19.359 - */ 19.360 - xlbd_blk_queue->request_fn(xlbd_blk_queue); 19.361 - } 19.362 - 19.363 - spin_unlock_irqrestore(&blkif_io_lock, flags); 19.364 - 19.365 - return IRQ_HANDLED; 19.366 -} 19.367 - 19.368 - 19.369 -void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 19.370 -{ 19.371 - unsigned long flags; 19.372 - 19.373 - retry: 19.374 - while ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 19.375 - { 19.376 - set_current_state(TASK_INTERRUPTIBLE); 19.377 - schedule_timeout(1); 19.378 - } 19.379 - 19.380 - spin_lock_irqsave(&blkif_io_lock, flags); 19.381 - if ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 19.382 - { 19.383 - spin_unlock_irqrestore(&blkif_io_lock, flags); 19.384 - goto retry; 19.385 - } 19.386 - 19.387 - memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req)); 19.388 - memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req, 19.389 - req, sizeof(*req)); 19.390 - req_prod++; 19.391 - flush_requests(); 19.392 - 19.393 - spin_unlock_irqrestore(&blkif_io_lock, flags); 19.394 - 19.395 - while ( !blkif_control_rsp_valid ) 19.396 - { 19.397 - set_current_state(TASK_INTERRUPTIBLE); 19.398 - schedule_timeout(1); 19.399 - } 19.400 - 19.401 - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); 19.402 - blkif_control_rsp_valid = 0; 19.403 -} 19.404 - 19.405 - 19.406 -static void blkif_status_change(blkif_fe_interface_status_changed_t *status) 19.407 -{ 19.408 - ctrl_msg_t cmsg; 19.409 - blkif_fe_interface_connect_t up; 19.410 - 19.411 - if ( status->handle != 0 ) 19.412 - { 19.413 - printk(KERN_WARNING "Status change on unsupported blkif %d\n", 19.414 - status->handle); 19.415 - return; 19.416 - } 19.417 - 19.418 - switch ( status->status ) 19.419 - { 19.420 - case BLKIF_INTERFACE_STATUS_DESTROYED: 19.421 - printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n", 19.422 - blkif_state); 19.423 - break; 19.424 - 19.425 - case BLKIF_INTERFACE_STATUS_DISCONNECTED: 19.426 - if ( blkif_state != BLKIF_STATE_CLOSED ) 19.427 - { 19.428 - printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message" 19.429 - " in state %d\n", blkif_state); 19.430 - 19.431 - printk(KERN_INFO "VBD driver recovery in progress\n"); 19.432 - 19.433 - /* Prevent new requests being issued until we fix things up. */ 19.434 - spin_lock_irq(&blkif_io_lock); 19.435 - recovery = 1; 19.436 - blkif_state = BLKIF_STATE_DISCONNECTED; 19.437 - spin_unlock_irq(&blkif_io_lock); 19.438 - 19.439 - /* Free resources associated with old device channel. */ 19.440 - free_page((unsigned long)blk_ring); 19.441 - free_irq(blkif_irq, NULL); 19.442 - unbind_evtchn_from_irq(blkif_evtchn); 19.443 - } 19.444 - 19.445 - /* Move from CLOSED to DISCONNECTED state. */ 19.446 - blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 19.447 - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; 19.448 - blkif_state = BLKIF_STATE_DISCONNECTED; 19.449 - 19.450 - /* Construct an interface-CONNECT message for the domain controller. */ 19.451 - cmsg.type = CMSG_BLKIF_FE; 19.452 - cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; 19.453 - cmsg.length = sizeof(blkif_fe_interface_connect_t); 19.454 - up.handle = 0; 19.455 - up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; 19.456 - memcpy(cmsg.msg, &up, sizeof(up)); 19.457 - 19.458 - /* Tell the controller to bring up the interface. */ 19.459 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 19.460 - break; 19.461 - 19.462 - case BLKIF_INTERFACE_STATUS_CONNECTED: 19.463 - if ( blkif_state == BLKIF_STATE_CLOSED ) 19.464 - { 19.465 - printk(KERN_WARNING "Unexpected blkif-CONNECTED message" 19.466 - " in state %d\n", blkif_state); 19.467 - break; 19.468 - } 19.469 - 19.470 - blkif_evtchn = status->evtchn; 19.471 - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); 19.472 - (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); 19.473 - 19.474 - if ( recovery ) 19.475 - { 19.476 - int i; 19.477 - 19.478 - /* Shouldn't need the blkif_io_lock here - the device is 19.479 - * plugged and the recovery flag prevents the interrupt handler 19.480 - * changing anything. */ 19.481 - 19.482 - /* Reissue requests from the private block ring. */ 19.483 - for ( i = 0; 19.484 - resp_cons_rec < blk_ring_rec->req_prod; 19.485 - resp_cons_rec++, i++ ) 19.486 - { 19.487 - blk_ring->ring[i].req 19.488 - = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req; 19.489 - } 19.490 - 19.491 - /* Reset the private block ring to match the new ring. */ 19.492 - memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring)); 19.493 - resp_cons_rec = 0; 19.494 - 19.495 - /* blk_ring->req_prod will be set when we flush_requests().*/ 19.496 - blk_ring_rec->req_prod = req_prod = i; 19.497 - 19.498 - wmb(); 19.499 - 19.500 - /* Switch off recovery mode, using a memory barrier to ensure that 19.501 - * it's seen before we flush requests - we don't want to miss any 19.502 - * interrupts. */ 19.503 - recovery = 0; 19.504 - wmb(); 19.505 - 19.506 - /* Kicks things back into life. */ 19.507 - flush_requests(); 19.508 - } 19.509 - else 19.510 - { 19.511 - /* Probe for discs that are attached to the interface. */ 19.512 - xlvbd_init(); 19.513 - } 19.514 - 19.515 - blkif_state = BLKIF_STATE_CONNECTED; 19.516 - 19.517 - /* Kick pending requests. */ 19.518 - spin_lock_irq(&blkif_io_lock); 19.519 - kick_pending_request_queues(); 19.520 - spin_unlock_irq(&blkif_io_lock); 19.521 - 19.522 - break; 19.523 - 19.524 - default: 19.525 - printk(KERN_WARNING "Status change to unknown value %d\n", 19.526 - status->status); 19.527 - break; 19.528 - } 19.529 -} 19.530 - 19.531 - 19.532 -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 19.533 -{ 19.534 - switch ( msg->subtype ) 19.535 - { 19.536 - case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: 19.537 - if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) ) 19.538 - goto parse_error; 19.539 - blkif_status_change((blkif_fe_interface_status_changed_t *) 19.540 - &msg->msg[0]); 19.541 - break; 19.542 -#if 0 19.543 - case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: 19.544 - update_tq.routine = update_vbds_task; 19.545 - schedule_task(&update_tq); 19.546 - break; 19.547 -#endif 19.548 - default: 19.549 - goto parse_error; 19.550 - } 19.551 - 19.552 - ctrl_if_send_response(msg); 19.553 - return; 19.554 - 19.555 - parse_error: 19.556 - msg->length = 0; 19.557 - ctrl_if_send_response(msg); 19.558 -} 19.559 - 19.560 - 19.561 -int __init xlblk_init(void) 19.562 -{ 19.563 - ctrl_msg_t cmsg; 19.564 - blkif_fe_driver_status_changed_t st; 19.565 - 19.566 - if ( (start_info.flags & SIF_INITDOMAIN) 19.567 - || (start_info.flags & SIF_BLK_BE_DOMAIN) ) 19.568 - return 0; 19.569 - 19.570 - printk(KERN_INFO "Initialising Xen virtual block device\n"); 19.571 - 19.572 - blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 19.573 - memset(blk_ring_rec, 0, sizeof(*blk_ring_rec)); 19.574 - 19.575 - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 19.576 - CALLBACK_IN_BLOCKING_CONTEXT); 19.577 - 19.578 - /* Send a driver-UP notification to the domain controller. */ 19.579 - cmsg.type = CMSG_BLKIF_FE; 19.580 - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; 19.581 - cmsg.length = sizeof(blkif_fe_driver_status_changed_t); 19.582 - st.status = BLKIF_DRIVER_STATUS_UP; 19.583 - memcpy(cmsg.msg, &st, sizeof(st)); 19.584 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 19.585 - 19.586 - /* 19.587 - * We should read 'nr_interfaces' from response message and wait 19.588 - * for notifications before proceeding. For now we assume that we 19.589 - * will be notified of exactly one interface. 19.590 - */ 19.591 - while ( blkif_state != BLKIF_STATE_CONNECTED ) 19.592 - { 19.593 - set_current_state(TASK_INTERRUPTIBLE); 19.594 - schedule_timeout(1); 19.595 - } 19.596 - 19.597 - return 0; 19.598 -#if 0 19.599 - int error; 19.600 - 19.601 - reset_xlblk_interface(); 19.602 - 19.603 - xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); 19.604 - xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); 19.605 - 19.606 - error = request_irq(xlblk_response_irq, xlblk_response_int, 19.607 - SA_SAMPLE_RANDOM, "blkdev", NULL); 19.608 - if (error) { 19.609 - printk(KERN_ALERT "Could not allocate receive interrupt\n"); 19.610 - goto fail; 19.611 - } 19.612 - 19.613 - error = request_irq(xlblk_update_irq, xlblk_update_int, 19.614 - 0, "blkdev", NULL); 19.615 - if (error) { 19.616 - printk(KERN_ALERT 19.617 - "Could not allocate block update interrupt\n"); 19.618 - goto fail; 19.619 - } 19.620 - 19.621 - (void)xlvbd_init(); 19.622 - 19.623 - return 0; 19.624 - 19.625 - fail: 19.626 - return error; 19.627 -#endif 19.628 -} 19.629 - 19.630 - 19.631 -static void __exit xlblk_cleanup(void) 19.632 -{ 19.633 - /* XXX FIXME */ 19.634 - BUG(); 19.635 -#if 0 19.636 - /* xlvbd_cleanup(); */ 19.637 - free_irq(xlblk_response_irq, NULL); 19.638 - free_irq(xlblk_update_irq, NULL); 19.639 - unbind_virq_from_irq(VIRQ_BLKDEV); 19.640 - unbind_virq_from_irq(VIRQ_VBD_UPD); 19.641 -#endif 19.642 -} 19.643 - 19.644 - 19.645 -module_init(xlblk_init); 19.646 -module_exit(xlblk_cleanup); 19.647 - 19.648 - 19.649 -void blkdev_suspend(void) 19.650 -{ 19.651 -} 19.652 - 19.653 - 19.654 -void blkdev_resume(void) 19.655 -{ 19.656 -}
20.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/block.h Sat Jul 31 16:13:15 2004 +0000 20.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 20.3 @@ -1,92 +0,0 @@ 20.4 -/****************************************************************************** 20.5 - * block.h 20.6 - * 20.7 - * Shared definitions between all levels of XenLinux Virtual block devices. 20.8 - */ 20.9 - 20.10 -#ifndef __XEN_DRIVERS_BLOCK_H__ 20.11 -#define __XEN_DRIVERS_BLOCK_H__ 20.12 - 20.13 -#include <linux/config.h> 20.14 -#include <linux/module.h> 20.15 - 20.16 -#include <linux/kernel.h> 20.17 -#include <linux/sched.h> 20.18 -#include <linux/slab.h> 20.19 -#include <linux/string.h> 20.20 -#include <linux/errno.h> 20.21 - 20.22 -#include <linux/fs.h> 20.23 -#include <linux/hdreg.h> 20.24 -#include <linux/blkdev.h> 20.25 -#include <linux/major.h> 20.26 - 20.27 -#include <linux/devfs_fs_kernel.h> 20.28 - 20.29 -#include <asm/hypervisor-ifs/hypervisor-if.h> 20.30 -#include <asm/io.h> 20.31 -#include <asm/atomic.h> 20.32 -#include <asm/uaccess.h> 20.33 - 20.34 -#include <asm-xen/blkif.h> 20.35 - 20.36 -#if 0 20.37 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 20.38 -#else 20.39 -#define DPRINTK(_f, _a...) ((void)0) 20.40 -#endif 20.41 - 20.42 -#if 0 20.43 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 20.44 -#else 20.45 -#define DPRINTK_IOCTL(_f, _a...) ((void)0) 20.46 -#endif 20.47 - 20.48 -struct xlbd_type_info { 20.49 - int partn_shift; 20.50 - int devs_per_major; 20.51 - int hardsect_size; 20.52 - int max_sectors; 20.53 - char *name; 20.54 -}; 20.55 - 20.56 -/* 20.57 - * We have one of these per vbd, whether ide, scsi or 'other'. They 20.58 - * hang in private_data off the gendisk structure. We may end up 20.59 - * putting all kinds of interesting stuff here :-) 20.60 - */ 20.61 -struct xlbd_major_info { 20.62 - int major; 20.63 - int usage; 20.64 - int xd_device; 20.65 - struct xlbd_type_info *type; 20.66 -}; 20.67 - 20.68 -struct xlbd_disk_info { 20.69 - int xd_device; 20.70 - struct xlbd_major_info *mi; 20.71 -}; 20.72 - 20.73 -typedef struct xen_block { 20.74 - int usage; 20.75 -} xen_block_t; 20.76 - 20.77 -extern struct request_queue *xlbd_blk_queue; 20.78 -extern spinlock_t blkif_io_lock; 20.79 - 20.80 -extern int blkif_open(struct inode *inode, struct file *filep); 20.81 -extern int blkif_release(struct inode *inode, struct file *filep); 20.82 -extern int blkif_ioctl(struct inode *inode, struct file *filep, 20.83 - unsigned command, unsigned long argument); 20.84 -extern int blkif_check(dev_t dev); 20.85 -extern int blkif_revalidate(dev_t dev); 20.86 -extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); 20.87 -extern void do_blkif_request (request_queue_t *rq); 20.88 - 20.89 -extern void xlvbd_update_vbds(void); 20.90 - 20.91 -/* Virtual block-device subsystem. */ 20.92 -extern int xlvbd_init(void); 20.93 -extern void xlvbd_cleanup(void); 20.94 - 20.95 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
21.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c Sat Jul 31 16:13:15 2004 +0000 21.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 21.3 @@ -1,530 +0,0 @@ 21.4 -/****************************************************************************** 21.5 - * vbd.c 21.6 - * 21.7 - * XenLinux virtual block-device driver (xvd). 21.8 - * 21.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 21.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 21.11 - * Copyright (c) 2004, Christian Limpach 21.12 - */ 21.13 - 21.14 -#include "block.h" 21.15 -#include <linux/blkdev.h> 21.16 - 21.17 -/* 21.18 - * For convenience we distinguish between ide, scsi and 'other' (i.e. 21.19 - * potentially combinations of the two) in the naming scheme and in a few 21.20 - * other places (like default readahead, etc). 21.21 - */ 21.22 - 21.23 -#define NUM_IDE_MAJORS 10 21.24 -#define NUM_SCSI_MAJORS 9 21.25 -#define NUM_VBD_MAJORS 1 21.26 - 21.27 -static struct xlbd_type_info xlbd_ide_type = { 21.28 - .partn_shift = 6, 21.29 - // XXXcl todo blksize_size[major] = 1024; 21.30 - .hardsect_size = 512, 21.31 - .max_sectors = 128, /* 'hwif->rqsize' if we knew it */ 21.32 - // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ 21.33 - .name = "hd", 21.34 -}; 21.35 - 21.36 -static struct xlbd_type_info xlbd_scsi_type = { 21.37 - .partn_shift = 4, 21.38 - // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */ 21.39 - .hardsect_size = 512, 21.40 - .max_sectors = 128*8, /* XXX 128; */ 21.41 - // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */ 21.42 - .name = "sd", 21.43 -}; 21.44 - 21.45 -static struct xlbd_type_info xlbd_vbd_type = { 21.46 - .partn_shift = 4, 21.47 - // XXXcl todo blksize_size[major] = 512; 21.48 - .hardsect_size = 512, 21.49 - .max_sectors = 128, 21.50 - // XXXcl todo read_ahead[major] = 8; 21.51 - .name = "xvd", 21.52 -}; 21.53 - 21.54 -/* XXXcl handle cciss after finding out why it's "hacked" in */ 21.55 - 21.56 -static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS + 21.57 - NUM_VBD_MAJORS]; 21.58 - 21.59 -/* Information about our VBDs. */ 21.60 -#define MAX_VBDS 64 21.61 -static int nr_vbds; 21.62 -static vdisk_t *vbd_info; 21.63 - 21.64 -struct request_queue *xlbd_blk_queue = NULL; 21.65 - 21.66 -#define MAJOR_XEN(dev) ((dev)>>8) 21.67 -#define MINOR_XEN(dev) ((dev) & 0xff) 21.68 - 21.69 -static struct block_device_operations xlvbd_block_fops = 21.70 -{ 21.71 - .owner = THIS_MODULE, 21.72 - .open = blkif_open, 21.73 - .release = blkif_release, 21.74 - .ioctl = blkif_ioctl, 21.75 -#if 0 21.76 - check_media_change: blkif_check, 21.77 - revalidate: blkif_revalidate, 21.78 -#endif 21.79 -}; 21.80 - 21.81 -spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; 21.82 - 21.83 -static int xlvbd_get_vbd_info(vdisk_t *disk_info) 21.84 -{ 21.85 - vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); 21.86 - blkif_request_t req; 21.87 - blkif_response_t rsp; 21.88 - int nr; 21.89 - 21.90 - memset(&req, 0, sizeof(req)); 21.91 - req.operation = BLKIF_OP_PROBE; 21.92 - req.nr_segments = 1; 21.93 - req.frame_and_sects[0] = virt_to_machine(buf) | 7; 21.94 - 21.95 - blkif_control_send(&req, &rsp); 21.96 - 21.97 - if ( rsp.status <= 0 ) 21.98 - { 21.99 - printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); 21.100 - return -1; 21.101 - } 21.102 - 21.103 - if ( (nr = rsp.status) > MAX_VBDS ) 21.104 - nr = MAX_VBDS; 21.105 - memcpy(disk_info, buf, nr * sizeof(vdisk_t)); 21.106 - 21.107 - return nr; 21.108 -} 21.109 - 21.110 -static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor) 21.111 -{ 21.112 - int mi_idx, new_major; 21.113 - int xd_major = MAJOR_XEN(xd_device); 21.114 - int xd_minor = MINOR_XEN(xd_device); 21.115 - 21.116 - *minor = xd_minor; 21.117 - 21.118 - switch (xd_major) { 21.119 - case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break; 21.120 - case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break; 21.121 - case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break; 21.122 - case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break; 21.123 - case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break; 21.124 - case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break; 21.125 - case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break; 21.126 - case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break; 21.127 - case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break; 21.128 - case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break; 21.129 - case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break; 21.130 - case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: 21.131 - mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR; 21.132 - new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR; 21.133 - break; 21.134 - case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break; 21.135 - default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break; 21.136 - } 21.137 - 21.138 - if (major_info[mi_idx]) 21.139 - return major_info[mi_idx]; 21.140 - 21.141 - major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); 21.142 - if (major_info[mi_idx] == NULL) 21.143 - return NULL; 21.144 - 21.145 - memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info)); 21.146 - 21.147 - switch (mi_idx) { 21.148 - case 0 ... (NUM_IDE_MAJORS - 1): 21.149 - major_info[mi_idx]->type = &xlbd_ide_type; 21.150 - break; 21.151 - case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1): 21.152 - major_info[mi_idx]->type = &xlbd_scsi_type; 21.153 - break; 21.154 - case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ... 21.155 - (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1): 21.156 - major_info[mi_idx]->type = &xlbd_vbd_type; 21.157 - break; 21.158 - } 21.159 - major_info[mi_idx]->major = new_major; 21.160 - 21.161 - if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) { 21.162 - printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n", 21.163 - major_info[mi_idx]->major, major_info[mi_idx]->type->name); 21.164 - goto out; 21.165 - } 21.166 - 21.167 - devfs_mk_dir(major_info[mi_idx]->type->name); 21.168 - 21.169 - return major_info[mi_idx]; 21.170 - 21.171 - out: 21.172 - kfree(major_info[mi_idx]); 21.173 - major_info[mi_idx] = NULL; 21.174 - return NULL; 21.175 -} 21.176 - 21.177 -static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi, 21.178 - int xd_minor, vdisk_t *xd) 21.179 -{ 21.180 - struct gendisk *gd; 21.181 - struct xlbd_disk_info *di; 21.182 - int device, partno; 21.183 - 21.184 - device = MKDEV(mi->major, xd_minor); 21.185 - gd = get_gendisk(device, &partno); 21.186 - if (gd) 21.187 - return gd; 21.188 - 21.189 - di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL); 21.190 - if (di == NULL) 21.191 - return NULL; 21.192 - di->mi = mi; 21.193 - di->xd_device = xd->device; 21.194 - 21.195 - /* Construct an appropriate gendisk structure. */ 21.196 - gd = alloc_disk(1); 21.197 - if (gd == NULL) 21.198 - goto out; 21.199 - 21.200 - gd->major = mi->major; 21.201 - gd->first_minor = xd_minor; 21.202 - gd->fops = &xlvbd_block_fops; 21.203 - gd->private_data = di; 21.204 - sprintf(gd->disk_name, "%s%c%d", mi->type->name, 21.205 - 'a' + (xd_minor >> mi->type->partn_shift), 21.206 - xd_minor & ((1 << mi->type->partn_shift) - 1)); 21.207 - /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */ 21.208 - 21.209 - set_capacity(gd, xd->capacity); 21.210 - 21.211 - if (xlbd_blk_queue == NULL) { 21.212 - xlbd_blk_queue = blk_init_queue(do_blkif_request, 21.213 - &blkif_io_lock); 21.214 - if (xlbd_blk_queue == NULL) 21.215 - goto out; 21.216 - elevator_init(xlbd_blk_queue, &elevator_noop); 21.217 - 21.218 - /* 21.219 - * Turn off barking 'headactive' mode. We dequeue 21.220 - * buffer heads as soon as we pass them to back-end 21.221 - * driver. 21.222 - */ 21.223 - blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */ 21.224 - 21.225 - blk_queue_hardsect_size(xlbd_blk_queue, 21.226 - mi->type->hardsect_size); 21.227 - blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */ 21.228 - 21.229 - /* XXXcl: set mask to PAGE_SIZE for now, to improve either use 21.230 - - blk_queue_merge_bvec to merge requests with adjacent ma's 21.231 - - the tags infrastructure 21.232 - - the dma infrastructure 21.233 - */ 21.234 - blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1); 21.235 - 21.236 - blk_queue_max_phys_segments(xlbd_blk_queue, 21.237 - BLKIF_MAX_SEGMENTS_PER_REQUEST); 21.238 - blk_queue_max_hw_segments(xlbd_blk_queue, 21.239 - BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */ 21.240 - 21.241 - 21.242 - } 21.243 - gd->queue = xlbd_blk_queue; 21.244 - 21.245 - add_disk(gd); 21.246 - 21.247 - return gd; 21.248 - 21.249 - out: 21.250 - if (gd) 21.251 - del_gendisk(gd); 21.252 - kfree(di); 21.253 - return NULL; 21.254 -} 21.255 - 21.256 -/* 21.257 - * xlvbd_init_device - initialise a VBD device 21.258 - * @disk: a vdisk_t describing the VBD 21.259 - * 21.260 - * Takes a vdisk_t * that describes a VBD the domain has access to. 21.261 - * Performs appropriate initialisation and registration of the device. 21.262 - * 21.263 - * Care needs to be taken when making re-entrant calls to ensure that 21.264 - * corruption does not occur. Also, devices that are in use should not have 21.265 - * their details updated. This is the caller's responsibility. 21.266 - */ 21.267 -static int xlvbd_init_device(vdisk_t *xd) 21.268 -{ 21.269 - struct block_device *bd; 21.270 - struct gendisk *gd; 21.271 - struct xlbd_major_info *mi; 21.272 - int device; 21.273 - int minor; 21.274 - 21.275 - int err = -ENOMEM; 21.276 - 21.277 - mi = xlbd_get_major_info(xd->device, &minor); 21.278 - if (mi == NULL) 21.279 - return -EPERM; 21.280 - 21.281 - device = MKDEV(mi->major, minor); 21.282 - 21.283 - if ((bd = bdget(device)) == NULL) 21.284 - return -EPERM; 21.285 - 21.286 - /* 21.287 - * Update of partition info, and check of usage count, is protected 21.288 - * by the per-block-device semaphore. 21.289 - */ 21.290 - down(&bd->bd_sem); 21.291 - 21.292 - gd = xlvbd_get_gendisk(mi, minor, xd); 21.293 - if (mi == NULL) { 21.294 - err = -EPERM; 21.295 - goto out; 21.296 - } 21.297 - 21.298 - if (VDISK_READONLY(xd->info)) 21.299 - set_disk_ro(gd, 1); 21.300 - 21.301 - /* Some final fix-ups depending on the device type */ 21.302 - switch (VDISK_TYPE(xd->info)) { 21.303 - case VDISK_TYPE_CDROM: 21.304 - gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 21.305 - /* FALLTHROUGH */ 21.306 - case VDISK_TYPE_FLOPPY: 21.307 - case VDISK_TYPE_TAPE: 21.308 - gd->flags |= GENHD_FL_REMOVABLE; 21.309 - break; 21.310 - 21.311 - case VDISK_TYPE_DISK: 21.312 - break; 21.313 - 21.314 - default: 21.315 - printk(KERN_ALERT "XenLinux: unknown device type %d\n", 21.316 - VDISK_TYPE(xd->info)); 21.317 - break; 21.318 - } 21.319 - 21.320 - err = 0; 21.321 - out: 21.322 - up(&bd->bd_sem); 21.323 - bdput(bd); 21.324 - return err; 21.325 -} 21.326 - 21.327 -#if 0 21.328 -/* 21.329 - * xlvbd_remove_device - remove a device node if possible 21.330 - * @device: numeric device ID 21.331 - * 21.332 - * Updates the gendisk structure and invalidates devices. 21.333 - * 21.334 - * This is OK for now but in future, should perhaps consider where this should 21.335 - * deallocate gendisks / unregister devices. 21.336 - */ 21.337 -static int xlvbd_remove_device(int device) 21.338 -{ 21.339 - int i, rc = 0, minor = MINOR(device); 21.340 - struct gendisk *gd; 21.341 - struct block_device *bd; 21.342 - xen_block_t *disk = NULL; 21.343 - 21.344 - if ( (bd = bdget(device)) == NULL ) 21.345 - return -1; 21.346 - 21.347 - /* 21.348 - * Update of partition info, and check of usage count, is protected 21.349 - * by the per-block-device semaphore. 21.350 - */ 21.351 - down(&bd->bd_sem); 21.352 - 21.353 - if ( ((gd = get_gendisk(device)) == NULL) || 21.354 - ((disk = xldev_to_xldisk(device)) == NULL) ) 21.355 - BUG(); 21.356 - 21.357 - if ( disk->usage != 0 ) 21.358 - { 21.359 - printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); 21.360 - rc = -1; 21.361 - goto out; 21.362 - } 21.363 - 21.364 - if ( (minor & (gd->max_p-1)) != 0 ) 21.365 - { 21.366 - /* 1: The VBD is mapped to a partition rather than a whole unit. */ 21.367 - invalidate_device(device, 1); 21.368 - gd->part[minor].start_sect = 0; 21.369 - gd->part[minor].nr_sects = 0; 21.370 - gd->sizes[minor] = 0; 21.371 - 21.372 - /* Clear the consists-of-virtual-partitions flag if possible. */ 21.373 - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; 21.374 - for ( i = 1; i < gd->max_p; i++ ) 21.375 - if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) 21.376 - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; 21.377 - 21.378 - /* 21.379 - * If all virtual partitions are now gone, and a 'whole unit' VBD is 21.380 - * present, then we can try to grok the unit's real partition table. 21.381 - */ 21.382 - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && 21.383 - (gd->sizes[minor & ~(gd->max_p-1)] != 0) && 21.384 - !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) 21.385 - { 21.386 - register_disk(gd, 21.387 - device&~(gd->max_p-1), 21.388 - gd->max_p, 21.389 - &xlvbd_block_fops, 21.390 - gd->part[minor&~(gd->max_p-1)].nr_sects); 21.391 - } 21.392 - } 21.393 - else 21.394 - { 21.395 - /* 21.396 - * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. 21.397 - * NB. The partition entries are only cleared if there are no VBDs 21.398 - * mapped to individual partitions on this unit. 21.399 - */ 21.400 - i = gd->max_p - 1; /* Default: clear subpartitions as well. */ 21.401 - if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) 21.402 - i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ 21.403 - while ( i >= 0 ) 21.404 - { 21.405 - invalidate_device(device+i, 1); 21.406 - gd->part[minor+i].start_sect = 0; 21.407 - gd->part[minor+i].nr_sects = 0; 21.408 - gd->sizes[minor+i] = 0; 21.409 - i--; 21.410 - } 21.411 - } 21.412 - 21.413 - out: 21.414 - up(&bd->bd_sem); 21.415 - bdput(bd); 21.416 - return rc; 21.417 -} 21.418 - 21.419 -/* 21.420 - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver 21.421 - * state. The VBDs need to be updated in this way when the domain is 21.422 - * initialised and also each time we receive an XLBLK_UPDATE event. 21.423 - */ 21.424 -void xlvbd_update_vbds(void) 21.425 -{ 21.426 - int i, j, k, old_nr, new_nr; 21.427 - vdisk_t *old_info, *new_info, *merged_info; 21.428 - 21.429 - old_info = vbd_info; 21.430 - old_nr = nr_vbds; 21.431 - 21.432 - new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 21.433 - if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) 21.434 - { 21.435 - kfree(new_info); 21.436 - return; 21.437 - } 21.438 - 21.439 - /* 21.440 - * Final list maximum size is old list + new list. This occurs only when 21.441 - * old list and new list do not overlap at all, and we cannot yet destroy 21.442 - * VBDs in the old list because the usage counts are busy. 21.443 - */ 21.444 - merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); 21.445 - 21.446 - /* @i tracks old list; @j tracks new list; @k tracks merged list. */ 21.447 - i = j = k = 0; 21.448 - 21.449 - while ( (i < old_nr) && (j < new_nr) ) 21.450 - { 21.451 - if ( old_info[i].device < new_info[j].device ) 21.452 - { 21.453 - if ( xlvbd_remove_device(old_info[i].device) != 0 ) 21.454 - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 21.455 - i++; 21.456 - } 21.457 - else if ( old_info[i].device > new_info[j].device ) 21.458 - { 21.459 - if ( xlvbd_init_device(&new_info[j]) == 0 ) 21.460 - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 21.461 - j++; 21.462 - } 21.463 - else 21.464 - { 21.465 - if ( ((old_info[i].capacity == new_info[j].capacity) && 21.466 - (old_info[i].info == new_info[j].info)) || 21.467 - (xlvbd_remove_device(old_info[i].device) != 0) ) 21.468 - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 21.469 - else if ( xlvbd_init_device(&new_info[j]) == 0 ) 21.470 - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 21.471 - i++; j++; 21.472 - } 21.473 - } 21.474 - 21.475 - for ( ; i < old_nr; i++ ) 21.476 - { 21.477 - if ( xlvbd_remove_device(old_info[i].device) != 0 ) 21.478 - memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 21.479 - } 21.480 - 21.481 - for ( ; j < new_nr; j++ ) 21.482 - { 21.483 - if ( xlvbd_init_device(&new_info[j]) == 0 ) 21.484 - memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 21.485 - } 21.486 - 21.487 - vbd_info = merged_info; 21.488 - nr_vbds = k; 21.489 - 21.490 - kfree(old_info); 21.491 - kfree(new_info); 21.492 -} 21.493 -#endif 21.494 - 21.495 -/* 21.496 - * Set up all the linux device goop for the virtual block devices 21.497 - * (vbd's) that we know about. Note that although from the backend 21.498 - * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device 21.499 - * number, the domain creation tools conventionally allocate these 21.500 - * numbers to correspond to those used by 'real' linux -- this is just 21.501 - * for convenience as it means e.g. that the same /etc/fstab can be 21.502 - * used when booting with or without Xen. 21.503 - */ 21.504 -int xlvbd_init(void) 21.505 -{ 21.506 - int i; 21.507 - 21.508 - /* 21.509 - * If compiled as a module, we don't support unloading yet. We 21.510 - * therefore permanently increment the reference count to 21.511 - * disallow it. 21.512 - */ 21.513 - MOD_INC_USE_COUNT; 21.514 - 21.515 - memset(major_info, 0, sizeof(major_info)); 21.516 - 21.517 - for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) { 21.518 - } 21.519 - 21.520 - vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 21.521 - nr_vbds = xlvbd_get_vbd_info(vbd_info); 21.522 - 21.523 - if (nr_vbds < 0) { 21.524 - kfree(vbd_info); 21.525 - vbd_info = NULL; 21.526 - nr_vbds = 0; 21.527 - } else { 21.528 - for (i = 0; i < nr_vbds; i++) 21.529 - xlvbd_init_device(&vbd_info[i]); 21.530 - } 21.531 - 21.532 - return 0; 21.533 -}