direct-io.hg
changeset 6611:f59e0163540e
Updates to blktap driver and user code.
Mostly this makes the tap code work again with all of the changes that
have happened to the block drivers recently. We now use a shared page
per VBD (to the driver), and handle control information through the
store. The taplib interfaces have changed to be based around per-vbd
data structures that you can attach arbitrary handlers for.
There is also initial code for a user-level blockback driver, which
aims to get around the use of loopbacks for file-based vbds. Still
plenty of work to do here -- this is a working incremental checkin and
I'm away from this for the next four weeks.
Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
Mostly this makes the tap code work again with all of the changes that
have happened to the block drivers recently. We now use a shared page
per VBD (to the driver), and handle control information through the
store. The taplib interfaces have changed to be based around per-vbd
data structures that you can attach arbitrary handlers for.
There is also initial code for a user-level blockback driver, which
aims to get around the use of loopbacks for file-based vbds. Still
plenty of work to do here -- this is a working incremental checkin and
I'm away from this for the next four weeks.
Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
line diff
1.1 --- a/.hgignore Sun Sep 04 15:08:16 2005 +0000 1.2 +++ b/.hgignore Sun Sep 04 21:19:44 2005 +0000 1.3 @@ -82,6 +82,7 @@ 1.4 ^tools/blktap/parallax/vdi_validate$ 1.5 ^tools/blktap/parallax/parallax$ 1.6 ^tools/blktap/parallax/blockstored$ 1.7 +^tools/blktap/ublkback/ublkback$ 1.8 ^tools/blktap/xen/.*$ 1.9 ^tools/check/\..*$ 1.10 ^tools/cmdline/.*$
2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Sun Sep 04 15:08:16 2005 +0000 2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Sun Sep 04 21:19:44 2005 +0000 2.3 @@ -1,3 +1,3 @@ 2.4 2.5 -obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 2.6 +obj-y := xenbus.o interface.o blktap.o 2.7
3.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Sep 04 15:08:16 2005 +0000 3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Sep 04 21:19:44 2005 +0000 3.3 @@ -1,90 +1,916 @@ 3.4 /****************************************************************************** 3.5 - * blktap.c 3.6 - * 3.7 - * XenLinux virtual block-device tap. 3.8 + * arch/xen/drivers/blkif/blktap/blktap.c 3.9 * 3.10 - * Copyright (c) 2004, Andrew Warfield 3.11 - * 3.12 - * Based on the original split block driver: 3.13 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 3.14 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 3.15 - * Copyright (c) 2004, Christian Limpach 3.16 + * This is a modified version of the block backend driver that remaps requests 3.17 + * to a user-space memory region. It is intended to be used to write 3.18 + * application-level servers that provide block interfaces to client VMs. 3.19 * 3.20 - * Note that unlike the split block driver code, this driver has been developed 3.21 - * strictly for Linux 2.6 3.22 */ 3.23 3.24 -#include "blktap.h" 3.25 +#include <linux/kernel.h> 3.26 +#include <linux/spinlock.h> 3.27 +#include <asm-xen/balloon.h> 3.28 +#include <linux/kernel.h> 3.29 +#include <linux/fs.h> 3.30 +#include <linux/mm.h> 3.31 +#include <linux/miscdevice.h> 3.32 +#include <linux/errno.h> 3.33 +#include <linux/major.h> 3.34 +#include <linux/gfp.h> 3.35 +#include <linux/poll.h> 3.36 +#include <asm/tlbflush.h> 3.37 +#include "common.h" 3.38 + 3.39 +/* Only one process may open /dev/xen/blktap at any time. */ 3.40 +static unsigned long blktap_dev_inuse; 3.41 +unsigned long blktap_ring_ok; /* make this ring->state */ 3.42 + 3.43 +/* Rings up to user space. */ 3.44 +static blkif_front_ring_t blktap_ufe_ring; 3.45 + 3.46 +/* for poll: */ 3.47 +static wait_queue_head_t blktap_wait; 3.48 + 3.49 +/* current switching mode */ 3.50 +static unsigned long blktap_mode; 3.51 + 3.52 +/* local prototypes */ 3.53 +static int blktap_read_ufe_ring(void); 3.54 3.55 -int __init xlblktap_init(void) 3.56 + 3.57 +/* /dev/xen/blktap resides at device number major=10, minor=200 */ 3.58 +#define BLKTAP_MINOR 202 3.59 + 3.60 +/* blktap IOCTLs: */ 3.61 +#define BLKTAP_IOCTL_KICK_FE 1 3.62 +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */ 3.63 +#define BLKTAP_IOCTL_SETMODE 3 3.64 +#define BLKTAP_IOCTL_PRINT_IDXS 100 3.65 + 3.66 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ 3.67 +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ 3.68 +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 3.69 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ 3.70 +#define BLKTAP_MODE_COPY_FE 0x00000004 /* unimp. */ 3.71 +#define BLKTAP_MODE_COPY_BE 0x00000008 /* unimp. */ 3.72 +#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 /* unimp. */ 3.73 +#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 /* unimp. */ 3.74 + 3.75 +#define BLKTAP_MODE_INTERPOSE \ 3.76 + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) 3.77 + 3.78 +#define BLKTAP_MODE_COPY_BOTH \ 3.79 + (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) 3.80 + 3.81 +#define BLKTAP_MODE_COPY_BOTH_PAGES \ 3.82 + (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) 3.83 + 3.84 +static inline int BLKTAP_MODE_VALID(unsigned long arg) 3.85 { 3.86 - ctrl_msg_t cmsg; 3.87 - blkif_fe_driver_status_t fe_st; 3.88 - blkif_be_driver_status_t be_st; 3.89 + return ( 3.90 + ( arg == BLKTAP_MODE_PASSTHROUGH ) || 3.91 + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 3.92 + ( arg == BLKTAP_MODE_INTERPOSE ) ); 3.93 +/* 3.94 + return ( 3.95 + ( arg == BLKTAP_MODE_PASSTHROUGH ) || 3.96 + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 3.97 + ( arg == BLKTAP_MODE_INTERCEPT_BE ) || 3.98 + ( arg == BLKTAP_MODE_INTERPOSE ) || 3.99 + ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || 3.100 + ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || 3.101 + ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) 3.102 + ); 3.103 +*/ 3.104 +} 3.105 + 3.106 + 3.107 +/****************************************************************** 3.108 + * MMAP REGION 3.109 + */ 3.110 3.111 - printk(KERN_INFO "Initialising Xen block tap device\n"); 3.112 -#ifdef CONFIG_XEN_BLKDEV_GRANT 3.113 - printk(KERN_INFO "Block tap is using grant tables.\n"); 3.114 -#endif 3.115 +/* 3.116 + * We use a big chunk of address space to map in-flight requests into, 3.117 + * and export this region up to user-space. See the comments in blkback 3.118 + * about this -- the two must be kept in sync if the tap is used as a 3.119 + * passthrough. 3.120 + */ 3.121 + 3.122 +#define MAX_PENDING_REQS 64 3.123 +#define BATCH_PER_DOMAIN 16 3.124 3.125 - DPRINTK(" tap - Backend connection init:\n"); 3.126 +/* immediately before the mmap area, we have a bunch of pages reserved 3.127 + * for shared memory rings. 3.128 + */ 3.129 +#define RING_PAGES 1 /* Front */ 3.130 + 3.131 +/* Where things are inside the device mapping. */ 3.132 +struct vm_area_struct *blktap_vma = NULL; 3.133 +unsigned long mmap_vstart; /* Kernel pages for mapping in data. */ 3.134 +unsigned long rings_vstart; /* start of mmaped vma */ 3.135 +unsigned long user_vstart; /* start of user mappings */ 3.136 + 3.137 +#define MMAP_PAGES \ 3.138 + (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) 3.139 +#define MMAP_VADDR(_start, _req,_seg) \ 3.140 + (_start + \ 3.141 + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ 3.142 + ((_seg) * PAGE_SIZE)) 3.143 + 3.144 3.145 3.146 - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 3.147 - CALLBACK_IN_BLOCKING_CONTEXT); 3.148 +/* 3.149 + * Each outstanding request that we've passed to the lower device layers has a 3.150 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 3.151 + * the pendcnt towards zero. When it hits zero, the specified domain has a 3.152 + * response queued for it, with the saved 'id' passed back. 3.153 + */ 3.154 +typedef struct { 3.155 + blkif_t *blkif; 3.156 + unsigned long id; 3.157 + int nr_pages; 3.158 + atomic_t pendcnt; 3.159 + unsigned short operation; 3.160 + int status; 3.161 +} pending_req_t; 3.162 + 3.163 +/* 3.164 + * We can't allocate pending_req's in order, since they may complete out of 3.165 + * order. We therefore maintain an allocation ring. This ring also indicates 3.166 + * when enough work has been passed down -- at that point the allocation ring 3.167 + * will be empty. 3.168 + */ 3.169 +static pending_req_t pending_reqs[MAX_PENDING_REQS]; 3.170 +static unsigned char pending_ring[MAX_PENDING_REQS]; 3.171 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; 3.172 +/* NB. We use a different index type to differentiate from shared blk rings. */ 3.173 +typedef unsigned int PEND_RING_IDX; 3.174 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 3.175 +static PEND_RING_IDX pending_prod, pending_cons; 3.176 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 3.177 + 3.178 +/* Requests passing through the tap to the backend hijack the id field 3.179 + * in the request message. In it we put the AR index _AND_ the fe domid. 3.180 + * the domid is used by the backend to map the pages properly. 3.181 + */ 3.182 + 3.183 +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) 3.184 +{ 3.185 + return ( (fe_dom << 16) | MASK_PEND_IDX(idx) ); 3.186 +} 3.187 + 3.188 +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) 3.189 +{ 3.190 + return (PEND_RING_IDX)( id & 0x0000ffff ); 3.191 +} 3.192 + 3.193 +extern inline domid_t ID_TO_DOM(unsigned long id) 3.194 +{ 3.195 + return (domid_t)(id >> 16); 3.196 +} 3.197 + 3.198 + 3.199 3.200 - /* Send a driver-UP notification to the domain controller. */ 3.201 - cmsg.type = CMSG_BLKIF_FE; 3.202 - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; 3.203 - cmsg.length = sizeof(blkif_fe_driver_status_t); 3.204 - fe_st.status = BLKIF_DRIVER_STATUS_UP; 3.205 - memcpy(cmsg.msg, &fe_st, sizeof(fe_st)); 3.206 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.207 +/****************************************************************** 3.208 + * GRANT HANDLES 3.209 + */ 3.210 + 3.211 +/* When using grant tables to map a frame for device access then the 3.212 + * handle returned must be used to unmap the frame. This is needed to 3.213 + * drop the ref count on the frame. 3.214 + */ 3.215 +struct grant_handle_pair 3.216 +{ 3.217 + u16 kernel; 3.218 + u16 user; 3.219 +}; 3.220 +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; 3.221 +#define pending_handle(_idx, _i) \ 3.222 + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) 3.223 +#define BLKTAP_INVALID_HANDLE(_g) \ 3.224 + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) 3.225 +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ 3.226 + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ 3.227 + } while(0) 3.228 + 3.229 3.230 - DPRINTK(" tap - Frontend connection init:\n"); 3.231 - 3.232 - active_reqs_init(); 3.233 - blkif_interface_init(); 3.234 - blkdev_schedule_init(); 3.235 +/****************************************************************** 3.236 + * BLKTAP VM OPS 3.237 + */ 3.238 + 3.239 +static struct page *blktap_nopage(struct vm_area_struct *vma, 3.240 + unsigned long address, 3.241 + int *type) 3.242 +{ 3.243 + /* 3.244 + * if the page has not been mapped in by the driver then generate 3.245 + * a SIGBUS to the domain. 3.246 + */ 3.247 + 3.248 + force_sig(SIGBUS, current); 3.249 + 3.250 + return 0; 3.251 +} 3.252 + 3.253 +struct vm_operations_struct blktap_vm_ops = { 3.254 + nopage: blktap_nopage, 3.255 +}; 3.256 + 3.257 +/****************************************************************** 3.258 + * BLKTAP FILE OPS 3.259 + */ 3.260 + 3.261 +static int blktap_open(struct inode *inode, struct file *filp) 3.262 +{ 3.263 + blkif_sring_t *sring; 3.264 3.265 - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 3.266 - CALLBACK_IN_BLOCKING_CONTEXT); 3.267 + if ( test_and_set_bit(0, &blktap_dev_inuse) ) 3.268 + return -EBUSY; 3.269 + 3.270 + /* Allocate the fe ring. */ 3.271 + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); 3.272 + if (sring == NULL) 3.273 + goto fail_nomem; 3.274 + 3.275 + SetPageReserved(virt_to_page(sring)); 3.276 + 3.277 + SHARED_RING_INIT(sring); 3.278 + FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE); 3.279 + 3.280 + return 0; 3.281 3.282 - /* Send a driver-UP notification to the domain controller. */ 3.283 - cmsg.type = CMSG_BLKIF_BE; 3.284 - cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; 3.285 - cmsg.length = sizeof(blkif_be_driver_status_t); 3.286 - be_st.status = BLKIF_DRIVER_STATUS_UP; 3.287 - memcpy(cmsg.msg, &be_st, sizeof(be_st)); 3.288 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.289 + fail_nomem: 3.290 + return -ENOMEM; 3.291 +} 3.292 + 3.293 +static int blktap_release(struct inode *inode, struct file *filp) 3.294 +{ 3.295 + blktap_dev_inuse = 0; 3.296 + blktap_ring_ok = 0; 3.297 3.298 - DPRINTK(" tap - Userland channel init:\n"); 3.299 + /* Free the ring page. */ 3.300 + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); 3.301 + free_page((unsigned long) blktap_ufe_ring.sring); 3.302 3.303 - blktap_init(); 3.304 - 3.305 - DPRINTK("Blkif tap device initialized.\n"); 3.306 + /* Clear any active mappings and free foreign map table */ 3.307 + if (blktap_vma != NULL) { 3.308 + zap_page_range(blktap_vma, blktap_vma->vm_start, 3.309 + blktap_vma->vm_end - blktap_vma->vm_start, NULL); 3.310 + blktap_vma = NULL; 3.311 + } 3.312 3.313 return 0; 3.314 } 3.315 3.316 -#if 0 /* tap doesn't handle suspend/resume */ 3.317 -void blkdev_suspend(void) 3.318 + 3.319 +/* Note on mmap: 3.320 + * We need to map pages to user space in a way that will allow the block 3.321 + * subsystem set up direct IO to them. This couldn't be done before, because 3.322 + * there isn't really a sane way to translate a user virtual address down to a 3.323 + * physical address when the page belongs to another domain. 3.324 + * 3.325 + * My first approach was to map the page in to kernel memory, add an entry 3.326 + * for it in the physical frame list (using alloc_lomem_region as in blkback) 3.327 + * and then attempt to map that page up to user space. This is disallowed 3.328 + * by xen though, which realizes that we don't really own the machine frame 3.329 + * underlying the physical page. 3.330 + * 3.331 + * The new approach is to provide explicit support for this in xen linux. 3.332 + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages 3.333 + * mapped from other vms. vma->vm_private_data is set up as a mapping 3.334 + * from pages to actual page structs. There is a new clause in get_user_pages 3.335 + * that does the right thing for this sort of mapping. 3.336 + */ 3.337 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) 3.338 +{ 3.339 + int size; 3.340 + struct page **map; 3.341 + int i; 3.342 + 3.343 + DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n", 3.344 + vma->vm_start, vma->vm_end); 3.345 + 3.346 + vma->vm_flags |= VM_RESERVED; 3.347 + vma->vm_ops = &blktap_vm_ops; 3.348 + 3.349 + size = vma->vm_end - vma->vm_start; 3.350 + if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { 3.351 + printk(KERN_INFO 3.352 + "blktap: you _must_ map exactly %d pages!\n", 3.353 + MMAP_PAGES + RING_PAGES); 3.354 + return -EAGAIN; 3.355 + } 3.356 + 3.357 + size >>= PAGE_SHIFT; 3.358 + DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); 3.359 + 3.360 + rings_vstart = vma->vm_start; 3.361 + user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); 3.362 + 3.363 + /* Map the ring pages to the start of the region and reserve it. */ 3.364 + 3.365 + /* not sure if I really need to do this... */ 3.366 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 3.367 + 3.368 + if (remap_pfn_range(vma, vma->vm_start, 3.369 + __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 3.370 + PAGE_SIZE, vma->vm_page_prot)) 3.371 + { 3.372 + WPRINTK("Mapping user ring failed!\n"); 3.373 + goto fail; 3.374 + } 3.375 + 3.376 + /* Mark this VM as containing foreign pages, and set up mappings. */ 3.377 + map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) 3.378 + * sizeof(struct page_struct*), 3.379 + GFP_KERNEL); 3.380 + if (map == NULL) 3.381 + { 3.382 + WPRINTK("Couldn't alloc VM_FOREIGH map.\n"); 3.383 + goto fail; 3.384 + } 3.385 + 3.386 + for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) 3.387 + map[i] = NULL; 3.388 + 3.389 + vma->vm_private_data = map; 3.390 + vma->vm_flags |= VM_FOREIGN; 3.391 + 3.392 + blktap_vma = vma; 3.393 + blktap_ring_ok = 1; 3.394 + 3.395 + return 0; 3.396 + fail: 3.397 + /* Clear any active mappings. */ 3.398 + zap_page_range(vma, vma->vm_start, 3.399 + vma->vm_end - vma->vm_start, NULL); 3.400 + 3.401 + return -ENOMEM; 3.402 +} 3.403 + 3.404 +static int blktap_ioctl(struct inode *inode, struct file *filp, 3.405 + unsigned int cmd, unsigned long arg) 3.406 +{ 3.407 + switch(cmd) { 3.408 + case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ 3.409 + return blktap_read_ufe_ring(); 3.410 + 3.411 + case BLKTAP_IOCTL_SETMODE: 3.412 + if (BLKTAP_MODE_VALID(arg)) { 3.413 + blktap_mode = arg; 3.414 + /* XXX: may need to flush rings here. */ 3.415 + printk(KERN_INFO "blktap: set mode to %lx\n", arg); 3.416 + return 0; 3.417 + } 3.418 + case BLKTAP_IOCTL_PRINT_IDXS: 3.419 + { 3.420 + //print_fe_ring_idxs(); 3.421 + WPRINTK("User Rings: \n-----------\n"); 3.422 + WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " 3.423 + "| req_prod: %2d, rsp_prod: %2d\n", 3.424 + blktap_ufe_ring.rsp_cons, 3.425 + blktap_ufe_ring.req_prod_pvt, 3.426 + blktap_ufe_ring.sring->req_prod, 3.427 + blktap_ufe_ring.sring->rsp_prod); 3.428 + 3.429 + } 3.430 + } 3.431 + return -ENOIOCTLCMD; 3.432 +} 3.433 + 3.434 +static unsigned int blktap_poll(struct file *file, poll_table *wait) 3.435 +{ 3.436 + poll_wait(file, &blktap_wait, wait); 3.437 + if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) 3.438 + { 3.439 + flush_tlb_all(); 3.440 + 3.441 + RING_PUSH_REQUESTS(&blktap_ufe_ring); 3.442 + return POLLIN | POLLRDNORM; 3.443 + } 3.444 + 3.445 + return 0; 3.446 +} 3.447 + 3.448 +void blktap_kick_user(void) 3.449 { 3.450 + /* blktap_ring->req_prod = blktap_req_prod; */ 3.451 + wake_up_interruptible(&blktap_wait); 3.452 +} 3.453 + 3.454 +static struct file_operations blktap_fops = { 3.455 + owner: THIS_MODULE, 3.456 + poll: blktap_poll, 3.457 + ioctl: blktap_ioctl, 3.458 + open: blktap_open, 3.459 + release: blktap_release, 3.460 + mmap: blktap_mmap, 3.461 +}; 3.462 + 3.463 + 3.464 + 3.465 +static int do_block_io_op(blkif_t *blkif, int max_to_do); 3.466 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); 3.467 +static void make_response(blkif_t *blkif, unsigned long id, 3.468 + unsigned short op, int st); 3.469 + 3.470 + 3.471 +static void fast_flush_area(int idx, int nr_pages) 3.472 +{ 3.473 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 3.474 + unsigned int i, op = 0; 3.475 + struct grant_handle_pair *handle; 3.476 + unsigned long ptep; 3.477 + 3.478 + for (i=0; i<nr_pages; i++) 3.479 + { 3.480 + handle = &pending_handle(idx, i); 3.481 + if (!BLKTAP_INVALID_HANDLE(handle)) 3.482 + { 3.483 + 3.484 + unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); 3.485 + unmap[op].dev_bus_addr = 0; 3.486 + unmap[op].handle = handle->kernel; 3.487 + op++; 3.488 + 3.489 + if (create_lookup_pte_addr(blktap_vma->vm_mm, 3.490 + MMAP_VADDR(user_vstart, idx, i), 3.491 + &ptep) !=0) { 3.492 + DPRINTK("Couldn't get a pte addr!\n"); 3.493 + return; 3.494 + } 3.495 + unmap[op].host_addr = ptep; 3.496 + unmap[op].dev_bus_addr = 0; 3.497 + unmap[op].handle = handle->user; 3.498 + op++; 3.499 + 3.500 + BLKTAP_INVALIDATE_HANDLE(handle); 3.501 + } 3.502 + } 3.503 + if ( unlikely(HYPERVISOR_grant_table_op( 3.504 + GNTTABOP_unmap_grant_ref, unmap, op))) 3.505 + BUG(); 3.506 + 3.507 + if (blktap_vma != NULL) 3.508 + zap_page_range(blktap_vma, 3.509 + MMAP_VADDR(user_vstart, idx, 0), 3.510 + nr_pages << PAGE_SHIFT, NULL); 3.511 +} 3.512 + 3.513 +/****************************************************************** 3.514 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE 3.515 + */ 3.516 + 3.517 +static struct list_head blkio_schedule_list; 3.518 +static spinlock_t blkio_schedule_list_lock; 3.519 + 3.520 +static int __on_blkdev_list(blkif_t *blkif) 3.521 +{ 3.522 + return blkif->blkdev_list.next != NULL; 3.523 +} 3.524 + 3.525 +static void remove_from_blkdev_list(blkif_t *blkif) 3.526 +{ 3.527 + unsigned long flags; 3.528 + if ( !__on_blkdev_list(blkif) ) return; 3.529 + spin_lock_irqsave(&blkio_schedule_list_lock, flags); 3.530 + if ( __on_blkdev_list(blkif) ) 3.531 + { 3.532 + list_del(&blkif->blkdev_list); 3.533 + blkif->blkdev_list.next = NULL; 3.534 + blkif_put(blkif); 3.535 + } 3.536 + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 3.537 +} 3.538 + 3.539 +static void add_to_blkdev_list_tail(blkif_t *blkif) 3.540 +{ 3.541 + unsigned long flags; 3.542 + if ( __on_blkdev_list(blkif) ) return; 3.543 + spin_lock_irqsave(&blkio_schedule_list_lock, flags); 3.544 + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) 3.545 + { 3.546 + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); 3.547 + blkif_get(blkif); 3.548 + } 3.549 + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 3.550 +} 3.551 + 3.552 + 3.553 +/****************************************************************** 3.554 + * SCHEDULER FUNCTIONS 3.555 + */ 3.556 + 3.557 +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); 3.558 + 3.559 +static int blkio_schedule(void *arg) 3.560 +{ 3.561 + DECLARE_WAITQUEUE(wq, current); 3.562 + 3.563 + blkif_t *blkif; 3.564 + struct list_head *ent; 3.565 + 3.566 + daemonize("xenblkd"); 3.567 + 3.568 + for ( ; ; ) 3.569 + { 3.570 + /* Wait for work to do. */ 3.571 + add_wait_queue(&blkio_schedule_wait, &wq); 3.572 + set_current_state(TASK_INTERRUPTIBLE); 3.573 + if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 3.574 + list_empty(&blkio_schedule_list) ) 3.575 + schedule(); 3.576 + __set_current_state(TASK_RUNNING); 3.577 + remove_wait_queue(&blkio_schedule_wait, &wq); 3.578 + 3.579 + /* Queue up a batch of requests. */ 3.580 + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && 3.581 + !list_empty(&blkio_schedule_list) ) 3.582 + { 3.583 + ent = blkio_schedule_list.next; 3.584 + blkif = list_entry(ent, blkif_t, blkdev_list); 3.585 + blkif_get(blkif); 3.586 + remove_from_blkdev_list(blkif); 3.587 + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) 3.588 + add_to_blkdev_list_tail(blkif); 3.589 + blkif_put(blkif); 3.590 + } 3.591 + } 3.592 +} 3.593 + 3.594 +static void maybe_trigger_blkio_schedule(void) 3.595 +{ 3.596 + /* 3.597 + * Needed so that two processes, who together make the following predicate 3.598 + * true, don't both read stale values and evaluate the predicate 3.599 + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... 3.600 + */ 3.601 + smp_mb(); 3.602 + 3.603 + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && 3.604 + !list_empty(&blkio_schedule_list) ) 3.605 + wake_up(&blkio_schedule_wait); 3.606 } 3.607 3.608 -void blkdev_resume(void) 3.609 + 3.610 + 3.611 +/****************************************************************** 3.612 + * COMPLETION CALLBACK -- Called as bh->b_end_io() 3.613 + */ 3.614 + 3.615 + 3.616 +static int blktap_read_ufe_ring(void) 3.617 +{ 3.618 + /* This is called to read responses from the UFE ring. */ 3.619 + 3.620 + RING_IDX i, j, rp; 3.621 + blkif_response_t *resp; 3.622 + blkif_t *blkif; 3.623 + int pending_idx; 3.624 + pending_req_t *pending_req; 3.625 + unsigned long flags; 3.626 + 3.627 + /* if we are forwarding from UFERring to FERing */ 3.628 + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { 3.629 + 3.630 + /* for each outstanding message on the UFEring */ 3.631 + rp = blktap_ufe_ring.sring->rsp_prod; 3.632 + rmb(); 3.633 + 3.634 + for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) 3.635 + { 3.636 + resp = RING_GET_RESPONSE(&blktap_ufe_ring, i); 3.637 + pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id)); 3.638 + pending_req = &pending_reqs[pending_idx]; 3.639 + 3.640 + blkif = pending_req->blkif; 3.641 + for (j = 0; j < pending_req->nr_pages; j++) { 3.642 + unsigned long vaddr; 3.643 + struct page **map = blktap_vma->vm_private_data; 3.644 + int offset; 3.645 + 3.646 + vaddr = MMAP_VADDR(user_vstart, pending_idx, j); 3.647 + offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 3.648 + 3.649 + //ClearPageReserved(virt_to_page(vaddr)); 3.650 + ClearPageReserved((struct page *)map[offset]); 3.651 + map[offset] = NULL; 3.652 + } 3.653 + 3.654 + fast_flush_area(pending_idx, pending_req->nr_pages); 3.655 + make_response(blkif, pending_req->id, resp->operation, 3.656 + resp->status); 3.657 + blkif_put(pending_req->blkif); 3.658 + spin_lock_irqsave(&pend_prod_lock, flags); 3.659 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 3.660 + spin_unlock_irqrestore(&pend_prod_lock, flags); 3.661 + } 3.662 + blktap_ufe_ring.rsp_cons = i; 3.663 + maybe_trigger_blkio_schedule(); 3.664 + } 3.665 + return 0; 3.666 +} 3.667 + 3.668 + 3.669 +/****************************************************************************** 3.670 + * NOTIFICATION FROM GUEST OS. 3.671 + */ 3.672 + 3.673 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) 3.674 +{ 3.675 + blkif_t *blkif = dev_id; 3.676 + add_to_blkdev_list_tail(blkif); 3.677 + maybe_trigger_blkio_schedule(); 3.678 + return IRQ_HANDLED; 3.679 +} 3.680 + 3.681 + 3.682 + 3.683 +/****************************************************************** 3.684 + * DOWNWARD CALLS -- These interface with the block-device layer proper. 3.685 + */ 3.686 + 3.687 +static int do_block_io_op(blkif_t *blkif, int max_to_do) 3.688 { 3.689 - ctrl_msg_t cmsg; 3.690 - blkif_fe_driver_status_t st; 3.691 + blkif_back_ring_t *blk_ring = &blkif->blk_ring; 3.692 + blkif_request_t *req; 3.693 + RING_IDX i, rp; 3.694 + int more_to_do = 0; 3.695 + 3.696 + rp = blk_ring->sring->req_prod; 3.697 + rmb(); /* Ensure we see queued requests up to 'rp'. */ 3.698 + 3.699 + for ( i = blk_ring->req_cons; 3.700 + (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i); 3.701 + i++ ) 3.702 + { 3.703 + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) 3.704 + { 3.705 + more_to_do = 1; 3.706 + break; 3.707 + } 3.708 + 3.709 + req = RING_GET_REQUEST(blk_ring, i); 3.710 + switch ( req->operation ) 3.711 + { 3.712 + case BLKIF_OP_READ: 3.713 + case BLKIF_OP_WRITE: 3.714 + dispatch_rw_block_io(blkif, req); 3.715 + break; 3.716 + 3.717 + default: 3.718 + DPRINTK("error: unknown block io operation [%d]\n", 3.719 + req->operation); 3.720 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 3.721 + break; 3.722 + } 3.723 + } 3.724 + 3.725 + blk_ring->req_cons = i; 3.726 + blktap_kick_user(); 3.727 + 3.728 + return more_to_do; 3.729 +} 3.730 + 3.731 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) 3.732 +{ 3.733 + blkif_request_t *target; 3.734 + int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 3.735 + pending_req_t *pending_req; 3.736 + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 3.737 + int op, ret; 3.738 + unsigned int nseg; 3.739 + 3.740 + /* Check that number of segments is sane. */ 3.741 + nseg = req->nr_segments; 3.742 + if ( unlikely(nseg == 0) || 3.743 + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) 3.744 + { 3.745 + DPRINTK("Bad number of segments in request (%d)\n", nseg); 3.746 + goto bad_descriptor; 3.747 + } 3.748 + 3.749 + /* Make sure userspace is ready. */ 3.750 + if (!blktap_ring_ok) { 3.751 + DPRINTK("blktap: ring not ready for requests!\n"); 3.752 + goto bad_descriptor; 3.753 + } 3.754 + 3.755 + 3.756 + if ( RING_FULL(&blktap_ufe_ring) ) { 3.757 + WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n"); 3.758 + goto bad_descriptor; 3.759 + } 3.760 + 3.761 + flush_cache_all(); /* a noop on intel... */ 3.762 + 3.763 + /* Map the foreign pages directly in to the application */ 3.764 + op = 0; 3.765 + for (i=0; i<req->nr_segments; i++) { 3.766 + 3.767 + unsigned long uvaddr; 3.768 + unsigned long kvaddr; 3.769 + unsigned long ptep; 3.770 + 3.771 + uvaddr = MMAP_VADDR(user_vstart, pending_idx, i); 3.772 + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); 3.773 + 3.774 + /* Map the remote page to kernel. */ 3.775 + map[op].host_addr = kvaddr; 3.776 + map[op].dom = blkif->domid; 3.777 + map[op].ref = blkif_gref_from_fas(req->frame_and_sects[i]); 3.778 + map[op].flags = GNTMAP_host_map; 3.779 + /* This needs a bit more thought in terms of interposition: 3.780 + * If we want to be able to modify pages during write using 3.781 + * grant table mappings, the guest will either need to allow 3.782 + * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */ 3.783 + if (req->operation == BLKIF_OP_WRITE) 3.784 + map[op].flags |= GNTMAP_readonly; 3.785 + op++; 3.786 3.787 - /* Send a driver-UP notification to the domain controller. */ 3.788 - cmsg.type = CMSG_BLKIF_FE; 3.789 - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; 3.790 - cmsg.length = sizeof(blkif_fe_driver_status_t); 3.791 - st.status = BLKIF_DRIVER_STATUS_UP; 3.792 - memcpy(cmsg.msg, &st, sizeof(st)); 3.793 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 3.794 + /* Now map it to user. */ 3.795 + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); 3.796 + if (ret) 3.797 + { 3.798 + DPRINTK("Couldn't get a pte addr!\n"); 3.799 + fast_flush_area(pending_idx, req->nr_segments); 3.800 + goto bad_descriptor; 3.801 + } 3.802 + 3.803 + map[op].host_addr = ptep; 3.804 + map[op].dom = blkif->domid; 3.805 + map[op].ref = blkif_gref_from_fas(req->frame_and_sects[i]); 3.806 + map[op].flags = GNTMAP_host_map | GNTMAP_application_map 3.807 + | GNTMAP_contains_pte; 3.808 + /* Above interposition comment applies here as well. */ 3.809 + if (req->operation == BLKIF_OP_WRITE) 3.810 + map[op].flags |= GNTMAP_readonly; 3.811 + op++; 3.812 + } 3.813 + 3.814 + if ( unlikely(HYPERVISOR_grant_table_op( 3.815 + GNTTABOP_map_grant_ref, map, op))) 3.816 + BUG(); 3.817 + 3.818 + op = 0; 3.819 + for (i=0; i<(req->nr_segments*2); i+=2) { 3.820 + unsigned long uvaddr; 3.821 + unsigned long kvaddr; 3.822 + unsigned long offset; 3.823 + int cancel = 0; 3.824 + 3.825 + uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2); 3.826 + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2); 3.827 + 3.828 + if ( unlikely(map[i].handle < 0) ) 3.829 + { 3.830 + DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); 3.831 + ret = map[i].handle; 3.832 + cancel = 1; 3.833 + } 3.834 + 3.835 + if ( unlikely(map[i+1].handle < 0) ) 3.836 + { 3.837 + DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); 3.838 + ret = map[i+1].handle; 3.839 + cancel = 1; 3.840 + } 3.841 + 3.842 + if (cancel) 3.843 + { 3.844 + fast_flush_area(pending_idx, req->nr_segments); 3.845 + goto bad_descriptor; 3.846 + } 3.847 + 3.848 + /* Set the necessary mappings in p2m and in the VM_FOREIGN 3.849 + * vm_area_struct to allow user vaddr -> struct page lookups 3.850 + * to work. This is needed for direct IO to foreign pages. */ 3.851 + phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = 3.852 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); 3.853 + 3.854 + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 3.855 + ((struct page **)blktap_vma->vm_private_data)[offset] = 3.856 + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); 3.857 + 3.858 + /* Save handles for unmapping later. */ 3.859 + pending_handle(pending_idx, i/2).kernel = map[i].handle; 3.860 + pending_handle(pending_idx, i/2).user = map[i+1].handle; 3.861 + } 3.862 + 3.863 + /* Mark mapped pages as reserved: */ 3.864 + for ( i = 0; i < req->nr_segments; i++ ) 3.865 + { 3.866 + unsigned long kvaddr; 3.867 + 3.868 + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); 3.869 + SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT)); 3.870 + } 3.871 + 3.872 + pending_req = &pending_reqs[pending_idx]; 3.873 + pending_req->blkif = blkif; 3.874 + pending_req->id = req->id; 3.875 + pending_req->operation = req->operation; 3.876 + pending_req->status = BLKIF_RSP_OKAY; 3.877 + pending_req->nr_pages = nseg; 3.878 + req->id = MAKE_ID(blkif->domid, pending_idx); 3.879 + //atomic_set(&pending_req->pendcnt, nbio); 3.880 + pending_cons++; 3.881 + blkif_get(blkif); 3.882 + 3.883 + /* Finally, write the request message to the user ring. */ 3.884 + target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); 3.885 + memcpy(target, req, sizeof(*req)); 3.886 + blktap_ufe_ring.req_prod_pvt++; 3.887 + return; 3.888 + 3.889 + bad_descriptor: 3.890 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 3.891 +} 3.892 + 3.893 + 3.894 + 3.895 +/****************************************************************** 3.896 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING 3.897 + */ 3.898 + 3.899 + 3.900 +static void make_response(blkif_t *blkif, unsigned long id, 3.901 + unsigned short op, int st) 3.902 +{ 3.903 + blkif_response_t *resp; 3.904 + unsigned long flags; 3.905 + blkif_back_ring_t *blk_ring = &blkif->blk_ring; 3.906 + 3.907 + /* Place on the response ring for the relevant domain. */ 3.908 + spin_lock_irqsave(&blkif->blk_ring_lock, flags); 3.909 + resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); 3.910 + resp->id = id; 3.911 + resp->operation = op; 3.912 + resp->status = st; 3.913 + wmb(); /* Ensure other side can see the response fields. */ 3.914 + blk_ring->rsp_prod_pvt++; 3.915 + RING_PUSH_RESPONSES(blk_ring); 3.916 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); 3.917 + 3.918 + /* Kick the relevant domain. */ 3.919 + notify_via_evtchn(blkif->evtchn); 3.920 } 3.921 -#endif 3.922 + 3.923 +static struct miscdevice blktap_miscdev = { 3.924 + .minor = BLKTAP_MINOR, 3.925 + .name = "blktap", 3.926 + .fops = &blktap_fops, 3.927 + .devfs_name = "misc/blktap", 3.928 +}; 3.929 + 3.930 +void blkif_deschedule(blkif_t *blkif) 3.931 +{ 3.932 + remove_from_blkdev_list(blkif); 3.933 +} 3.934 + 3.935 +static int __init blkif_init(void) 3.936 +{ 3.937 + int i, j, err; 3.938 + struct page *page; 3.939 +/* 3.940 + if ( !(xen_start_info.flags & SIF_INITDOMAIN) && 3.941 + !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 3.942 + return 0; 3.943 +*/ 3.944 + blkif_interface_init(); 3.945 + 3.946 + page = balloon_alloc_empty_page_range(MMAP_PAGES); 3.947 + BUG_ON(page == NULL); 3.948 + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 3.949 3.950 -__initcall(xlblktap_init); 3.951 + pending_cons = 0; 3.952 + pending_prod = MAX_PENDING_REQS; 3.953 + memset(pending_reqs, 0, sizeof(pending_reqs)); 3.954 + for ( i = 0; i < MAX_PENDING_REQS; i++ ) 3.955 + pending_ring[i] = i; 3.956 + 3.957 + spin_lock_init(&blkio_schedule_list_lock); 3.958 + INIT_LIST_HEAD(&blkio_schedule_list); 3.959 + 3.960 + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) 3.961 + BUG(); 3.962 + 3.963 + blkif_xenbus_init(); 3.964 + 3.965 + for (i=0; i<MAX_PENDING_REQS ; i++) 3.966 + for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) 3.967 + BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); 3.968 + 3.969 + err = misc_register(&blktap_miscdev); 3.970 + if ( err != 0 ) 3.971 + { 3.972 + printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); 3.973 + return err; 3.974 + } 3.975 + 3.976 + init_waitqueue_head(&blktap_wait); 3.977 + 3.978 + return 0; 3.979 +} 3.980 + 3.981 +__initcall(blkif_init);
4.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Sun Sep 04 15:08:16 2005 +0000 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,573 +0,0 @@ 4.4 -/****************************************************************************** 4.5 - * blktap_controlmsg.c 4.6 - * 4.7 - * XenLinux virtual block-device tap. 4.8 - * Control interfaces to the frontend and backend drivers. 4.9 - * 4.10 - * Copyright (c) 2004, Andrew Warfield 4.11 - * 4.12 - */ 4.13 - 4.14 -#include "blktap.h" 4.15 -#include <asm-xen/evtchn.h> 4.16 - 4.17 -static char *blkif_state_name[] = { 4.18 - [BLKIF_STATE_CLOSED] = "closed", 4.19 - [BLKIF_STATE_DISCONNECTED] = "disconnected", 4.20 - [BLKIF_STATE_CONNECTED] = "connected", 4.21 -}; 4.22 - 4.23 -static char *blkif_status_name[] = { 4.24 - [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", 4.25 - [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", 4.26 - [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", 4.27 - [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", 4.28 -}; 4.29 - 4.30 -unsigned int blktap_be_state = BLKIF_STATE_CLOSED; 4.31 -unsigned int blktap_be_evtchn; 4.32 - 4.33 -/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ 4.34 - 4.35 -#define BLKIF_HASHSZ 1024 4.36 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) 4.37 - 4.38 -static kmem_cache_t *blkif_cachep; 4.39 -static blkif_t *blkif_hash[BLKIF_HASHSZ]; 4.40 - 4.41 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 4.42 -{ 4.43 - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 4.44 - while ( (blkif != NULL) && 4.45 - ((blkif->domid != domid) || (blkif->handle != handle)) ) 4.46 - blkif = blkif->hash_next; 4.47 - return blkif; 4.48 -} 4.49 - 4.50 -static void __blkif_disconnect_complete(void *arg) 4.51 -{ 4.52 - blkif_t *blkif = (blkif_t *)arg; 4.53 - ctrl_msg_t cmsg; 4.54 - blkif_be_disconnect_t disc; 4.55 -#ifdef CONFIG_XEN_BLKDEV_GRANT 4.56 - struct gnttab_unmap_grant_ref op; 4.57 -#endif 4.58 - 4.59 - /* 4.60 - * These can't be done in blkif_disconnect() because at that point there 4.61 - * may be outstanding requests at the disc whose asynchronous responses 4.62 - * must still be notified to the remote driver. 4.63 - */ 4.64 -#ifdef CONFIG_XEN_BLKDEV_GRANT 4.65 - op.host_addr = blkif->shmem_vaddr; 4.66 - op.handle = blkif->shmem_handle; 4.67 - op.dev_bus_addr = 0; 4.68 - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); 4.69 -#endif 4.70 - vfree(blkif->blk_ring.sring); 4.71 - 4.72 - /* Construct the deferred response message. */ 4.73 - cmsg.type = CMSG_BLKIF_BE; 4.74 - cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; 4.75 - cmsg.id = blkif->disconnect_rspid; 4.76 - cmsg.length = sizeof(blkif_be_disconnect_t); 4.77 - disc.domid = blkif->domid; 4.78 - disc.blkif_handle = blkif->handle; 4.79 - disc.status = BLKIF_BE_STATUS_OKAY; 4.80 - memcpy(cmsg.msg, &disc, sizeof(disc)); 4.81 - 4.82 - /* 4.83 - * Make sure message is constructed /before/ status change, because 4.84 - * after the status change the 'blkif' structure could be deallocated at 4.85 - * any time. Also make sure we send the response /after/ status change, 4.86 - * as otherwise a subsequent CONNECT request could spuriously fail if 4.87 - * another CPU doesn't see the status change yet. 4.88 - */ 4.89 - mb(); 4.90 - if ( blkif->status != DISCONNECTING ) 4.91 - BUG(); 4.92 - blkif->status = DISCONNECTED; 4.93 - mb(); 4.94 - 4.95 - /* Send the successful response. */ 4.96 - ctrl_if_send_response(&cmsg); 4.97 -} 4.98 - 4.99 -void blkif_disconnect_complete(blkif_t *blkif) 4.100 -{ 4.101 - INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); 4.102 - schedule_work(&blkif->work); 4.103 -} 4.104 - 4.105 -void blkif_ptfe_create(blkif_be_create_t *create) 4.106 -{ 4.107 - blkif_t *blkif, **pblkif; 4.108 - domid_t domid = create->domid; 4.109 - unsigned int handle = create->blkif_handle; 4.110 - 4.111 - 4.112 - /* May want to store info on the connecting domain here. */ 4.113 - 4.114 - DPRINTK("PT got BE_CREATE\n"); 4.115 - 4.116 - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) 4.117 - { 4.118 - WPRINTK("Could not create blkif: out of memory\n"); 4.119 - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 4.120 - return; 4.121 - } 4.122 - 4.123 - /* blkif struct init code from blkback.c */ 4.124 - memset(blkif, 0, sizeof(*blkif)); 4.125 - blkif->domid = domid; 4.126 - blkif->handle = handle; 4.127 - blkif->status = DISCONNECTED; 4.128 - spin_lock_init(&blkif->blk_ring_lock); 4.129 - atomic_set(&blkif->refcnt, 0); 4.130 - 4.131 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 4.132 - while ( *pblkif != NULL ) 4.133 - { 4.134 - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 4.135 - { 4.136 - WPRINTK("Could not create blkif: already exists\n"); 4.137 - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; 4.138 - kmem_cache_free(blkif_cachep, blkif); 4.139 - return; 4.140 - } 4.141 - pblkif = &(*pblkif)->hash_next; 4.142 - } 4.143 - 4.144 - blkif->hash_next = *pblkif; 4.145 - *pblkif = blkif; 4.146 - 4.147 - create->status = BLKIF_BE_STATUS_OKAY; 4.148 -} 4.149 - 4.150 - 4.151 -void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) 4.152 -{ 4.153 - /* Clear anything that we initialized above. */ 4.154 - 4.155 - domid_t domid = destroy->domid; 4.156 - unsigned int handle = destroy->blkif_handle; 4.157 - blkif_t **pblkif, *blkif; 4.158 - 4.159 - DPRINTK("PT got BE_DESTROY\n"); 4.160 - 4.161 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 4.162 - while ( (blkif = *pblkif) != NULL ) 4.163 - { 4.164 - if ( (blkif->domid == domid) && (blkif->handle == handle) ) 4.165 - { 4.166 - if ( blkif->status != DISCONNECTED ) 4.167 - goto still_connected; 4.168 - goto destroy; 4.169 - } 4.170 - pblkif = &blkif->hash_next; 4.171 - } 4.172 - 4.173 - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 4.174 - return; 4.175 - 4.176 - still_connected: 4.177 - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 4.178 - return; 4.179 - 4.180 - destroy: 4.181 - *pblkif = blkif->hash_next; 4.182 - kmem_cache_free(blkif_cachep, blkif); 4.183 - destroy->status = BLKIF_BE_STATUS_OKAY; 4.184 -} 4.185 - 4.186 -void blkif_ptfe_connect(blkif_be_connect_t *connect) 4.187 -{ 4.188 - domid_t domid = connect->domid; 4.189 - unsigned int handle = connect->blkif_handle; 4.190 - unsigned int evtchn = connect->evtchn; 4.191 - unsigned long shmem_frame = connect->shmem_frame; 4.192 - struct vm_struct *vma; 4.193 -#ifdef CONFIG_XEN_BLKDEV_GRANT 4.194 - int ref = connect->shmem_ref; 4.195 -#else 4.196 - pgprot_t prot; 4.197 - int error; 4.198 -#endif 4.199 - blkif_t *blkif; 4.200 - blkif_sring_t *sring; 4.201 - 4.202 - DPRINTK("PT got BE_CONNECT\n"); 4.203 - 4.204 - blkif = blkif_find_by_handle(domid, handle); 4.205 - if ( unlikely(blkif == NULL) ) 4.206 - { 4.207 - WPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 4.208 - connect->domid, connect->blkif_handle); 4.209 - connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 4.210 - return; 4.211 - } 4.212 - 4.213 - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) 4.214 - { 4.215 - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 4.216 - return; 4.217 - } 4.218 - 4.219 -#ifndef CONFIG_XEN_BLKDEV_GRANT 4.220 - prot = __pgprot(_KERNPG_TABLE); 4.221 - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), 4.222 - shmem_frame<<PAGE_SHIFT, PAGE_SIZE, 4.223 - prot, domid); 4.224 - if ( error != 0 ) 4.225 - { 4.226 - if ( error == -ENOMEM ) 4.227 - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 4.228 - else if ( error == -EFAULT ) 4.229 - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 4.230 - else 4.231 - connect->status = BLKIF_BE_STATUS_ERROR; 4.232 - vfree(vma->addr); 4.233 - return; 4.234 - } 4.235 -#else 4.236 - { /* Map: Use the Grant table reference */ 4.237 - struct gnttab_map_grant_ref op; 4.238 - op.host_addr = VMALLOC_VMADDR(vma->addr); 4.239 - op.flags = GNTMAP_host_map; 4.240 - op.ref = ref; 4.241 - op.dom = domid; 4.242 - 4.243 - BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); 4.244 - 4.245 - handle = op.handle; 4.246 - 4.247 - if (op.handle < 0) { 4.248 - DPRINTK(" Grant table operation failure !\n"); 4.249 - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 4.250 - vfree(vma->addr); 4.251 - return; 4.252 - } 4.253 - 4.254 - blkif->shmem_ref = ref; 4.255 - blkif->shmem_handle = handle; 4.256 - blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); 4.257 - } 4.258 -#endif 4.259 - 4.260 - if ( blkif->status != DISCONNECTED ) 4.261 - { 4.262 - connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 4.263 - vfree(vma->addr); 4.264 - return; 4.265 - } 4.266 - 4.267 - sring = (blkif_sring_t *)vma->addr; 4.268 - SHARED_RING_INIT(sring); 4.269 - BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); 4.270 - 4.271 - blkif->evtchn = evtchn; 4.272 - blkif->shmem_frame = shmem_frame; 4.273 - blkif->status = CONNECTED; 4.274 - blkif_get(blkif); 4.275 - 4.276 - bind_evtchn_to_irqhandler( 4.277 - evtchn, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); 4.278 - 4.279 - connect->status = BLKIF_BE_STATUS_OKAY; 4.280 -} 4.281 - 4.282 -int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) 4.283 -{ 4.284 - domid_t domid = disconnect->domid; 4.285 - unsigned int handle = disconnect->blkif_handle; 4.286 - blkif_t *blkif; 4.287 - 4.288 - DPRINTK("PT got BE_DISCONNECT\n"); 4.289 - 4.290 - blkif = blkif_find_by_handle(domid, handle); 4.291 - if ( unlikely(blkif == NULL) ) 4.292 - { 4.293 - WPRINTK("blkif_disconnect attempted for non-existent blkif" 4.294 - " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 4.295 - disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 4.296 - return 1; /* Caller will send response error message. */ 4.297 - } 4.298 - 4.299 - if ( blkif->status == CONNECTED ) 4.300 - { 4.301 - blkif->status = DISCONNECTING; 4.302 - blkif->disconnect_rspid = rsp_id; 4.303 - wmb(); /* Let other CPUs see the status change. */ 4.304 - unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); 4.305 - blkif_deschedule(blkif); 4.306 - blkif_put(blkif); 4.307 - return 0; /* Caller should not send response message. */ 4.308 - } 4.309 - 4.310 - disconnect->status = BLKIF_BE_STATUS_OKAY; 4.311 - return 1; 4.312 -} 4.313 - 4.314 -/*-----[ Control Messages to/from Backend VM ]----------------------------*/ 4.315 - 4.316 -/* Tell the controller to bring up the interface. */ 4.317 -static void blkif_ptbe_send_interface_connect(void) 4.318 -{ 4.319 - ctrl_msg_t cmsg = { 4.320 - .type = CMSG_BLKIF_FE, 4.321 - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, 4.322 - .length = sizeof(blkif_fe_interface_connect_t), 4.323 - }; 4.324 - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; 4.325 - msg->handle = 0; 4.326 - msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring); 4.327 - 4.328 - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 4.329 -} 4.330 - 4.331 -static void blkif_ptbe_close(void) 4.332 -{ 4.333 -} 4.334 - 4.335 -/* Move from CLOSED to DISCONNECTED state. */ 4.336 -static void blkif_ptbe_disconnect(void) 4.337 -{ 4.338 - blkif_sring_t *sring; 4.339 - 4.340 - sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); 4.341 - SHARED_RING_INIT(sring); 4.342 - FRONT_RING_INIT(&blktap_be_ring, sring, PAGE_SIZE); 4.343 - blktap_be_state = BLKIF_STATE_DISCONNECTED; 4.344 - DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); 4.345 - blkif_ptbe_send_interface_connect(); 4.346 -} 4.347 - 4.348 -static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) 4.349 -{ 4.350 - int err = 0; 4.351 - 4.352 - blktap_be_evtchn = status->evtchn; 4.353 - 4.354 - err = bind_evtchn_to_irqhandler( 4.355 - blktap_be_evtchn, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL); 4.356 - if ( err ) { 4.357 - WPRINTK("blkfront bind_evtchn_to_irqhandler failed (%d)\n", err); 4.358 - return; 4.359 - } else { 4.360 - /* transtion to connected in case we need to do a 4.361 - a partion probe on a whole disk */ 4.362 - blktap_be_state = BLKIF_STATE_CONNECTED; 4.363 - } 4.364 -} 4.365 - 4.366 -static void unexpected(blkif_fe_interface_status_t *status) 4.367 -{ 4.368 - WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 4.369 - blkif_status_name[status->status], 4.370 - blkif_state_name[blktap_be_state]); 4.371 -} 4.372 - 4.373 -static void blkif_ptbe_status( 4.374 - blkif_fe_interface_status_t *status) 4.375 -{ 4.376 - if ( status->handle != 0 ) 4.377 - { 4.378 - DPRINTK("Status change on unsupported blkif %d\n", 4.379 - status->handle); 4.380 - return; 4.381 - } 4.382 - 4.383 - DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]); 4.384 - 4.385 - switch ( status->status ) 4.386 - { 4.387 - case BLKIF_INTERFACE_STATUS_CLOSED: 4.388 - switch ( blktap_be_state ) 4.389 - { 4.390 - case BLKIF_STATE_CLOSED: 4.391 - unexpected(status); 4.392 - break; 4.393 - case BLKIF_STATE_DISCONNECTED: 4.394 - case BLKIF_STATE_CONNECTED: 4.395 - unexpected(status); 4.396 - blkif_ptbe_close(); 4.397 - break; 4.398 - } 4.399 - break; 4.400 - 4.401 - case BLKIF_INTERFACE_STATUS_DISCONNECTED: 4.402 - switch ( blktap_be_state ) 4.403 - { 4.404 - case BLKIF_STATE_CLOSED: 4.405 - blkif_ptbe_disconnect(); 4.406 - break; 4.407 - case BLKIF_STATE_DISCONNECTED: 4.408 - case BLKIF_STATE_CONNECTED: 4.409 - printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n"); 4.410 - unexpected(status); 4.411 - break; 4.412 - } 4.413 - break; 4.414 - 4.415 - case BLKIF_INTERFACE_STATUS_CONNECTED: 4.416 - switch ( blktap_be_state ) 4.417 - { 4.418 - case BLKIF_STATE_CLOSED: 4.419 - unexpected(status); 4.420 - blkif_ptbe_disconnect(); 4.421 - blkif_ptbe_connect(status); 4.422 - break; 4.423 - case BLKIF_STATE_DISCONNECTED: 4.424 - blkif_ptbe_connect(status); 4.425 - break; 4.426 - case BLKIF_STATE_CONNECTED: 4.427 - unexpected(status); 4.428 - blkif_ptbe_connect(status); 4.429 - break; 4.430 - } 4.431 - break; 4.432 - 4.433 - case BLKIF_INTERFACE_STATUS_CHANGED: 4.434 - switch ( blktap_be_state ) 4.435 - { 4.436 - case BLKIF_STATE_CLOSED: 4.437 - case BLKIF_STATE_DISCONNECTED: 4.438 - unexpected(status); 4.439 - break; 4.440 - case BLKIF_STATE_CONNECTED: 4.441 - /* vbd_update(); */ 4.442 - /* tap doesn't really get state changes... */ 4.443 - unexpected(status); 4.444 - break; 4.445 - } 4.446 - break; 4.447 - 4.448 - default: 4.449 - DPRINTK("Status change to unknown value %d\n", status->status); 4.450 - break; 4.451 - } 4.452 -} 4.453 - 4.454 -/*-----[ All control messages enter here: ]-------------------------------*/ 4.455 - 4.456 -void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 4.457 -{ 4.458 - switch ( msg->type ) 4.459 - { 4.460 - case CMSG_BLKIF_FE: 4.461 - 4.462 - switch ( msg->subtype ) 4.463 - { 4.464 - case CMSG_BLKIF_FE_INTERFACE_STATUS: 4.465 - blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]); 4.466 - break; 4.467 - 4.468 - default: 4.469 - goto parse_error; 4.470 - } 4.471 - 4.472 - break; 4.473 - 4.474 - case CMSG_BLKIF_BE: 4.475 - 4.476 - /* send a copy of the message to user if wanted */ 4.477 - 4.478 - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || 4.479 - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { 4.480 - 4.481 - blktap_write_ctrl_ring(msg); 4.482 - blktap_kick_user(); 4.483 - } 4.484 - 4.485 - switch ( msg->subtype ) 4.486 - { 4.487 - case CMSG_BLKIF_BE_CREATE: 4.488 - blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]); 4.489 - break; 4.490 - case CMSG_BLKIF_BE_DESTROY: 4.491 - blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]); 4.492 - break; 4.493 - case CMSG_BLKIF_BE_CONNECT: 4.494 - blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]); 4.495 - break; 4.496 - case CMSG_BLKIF_BE_DISCONNECT: 4.497 - if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0], 4.498 - msg->id) ) 4.499 - return; 4.500 - break; 4.501 - 4.502 - /* We just ignore anything to do with vbds for now. */ 4.503 - 4.504 - case CMSG_BLKIF_BE_VBD_CREATE: 4.505 - DPRINTK("PT got VBD_CREATE\n"); 4.506 - ((blkif_be_vbd_create_t *)&msg->msg[0])->status 4.507 - = BLKIF_BE_STATUS_OKAY; 4.508 - break; 4.509 - case CMSG_BLKIF_BE_VBD_DESTROY: 4.510 - DPRINTK("PT got VBD_DESTROY\n"); 4.511 - ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status 4.512 - = BLKIF_BE_STATUS_OKAY; 4.513 - break; 4.514 - default: 4.515 - goto parse_error; 4.516 - } 4.517 - 4.518 - break; 4.519 - } 4.520 - 4.521 - ctrl_if_send_response(msg); 4.522 - return; 4.523 - 4.524 - parse_error: 4.525 - msg->length = 0; 4.526 - ctrl_if_send_response(msg); 4.527 -} 4.528 - 4.529 -/*-----[ Initialization ]-------------------------------------------------*/ 4.530 - 4.531 -void __init blkif_interface_init(void) 4.532 -{ 4.533 - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 4.534 - 0, 0, NULL, NULL); 4.535 - memset(blkif_hash, 0, sizeof(blkif_hash)); 4.536 - 4.537 - blktap_be_ring.sring = NULL; 4.538 -} 4.539 - 4.540 - 4.541 - 4.542 -/* Debug : print the current ring indices. */ 4.543 - 4.544 -void print_fe_ring_idxs(void) 4.545 -{ 4.546 - int i; 4.547 - blkif_t *blkif; 4.548 - 4.549 - WPRINTK("FE Rings: \n---------\n"); 4.550 - for ( i = 0; i < BLKIF_HASHSZ; i++) { 4.551 - blkif = blkif_hash[i]; 4.552 - while (blkif != NULL) { 4.553 - if (blkif->status == DISCONNECTED) { 4.554 - WPRINTK("(%2d,%2d) DISCONNECTED\n", 4.555 - blkif->domid, blkif->handle); 4.556 - } else if (blkif->status == DISCONNECTING) { 4.557 - WPRINTK("(%2d,%2d) DISCONNECTING\n", 4.558 - blkif->domid, blkif->handle); 4.559 - } else if (blkif->blk_ring.sring == NULL) { 4.560 - WPRINTK("(%2d,%2d) CONNECTED, but null sring!\n", 4.561 - blkif->domid, blkif->handle); 4.562 - } else { 4.563 - blkif_get(blkif); 4.564 - WPRINTK("(%2d,%2d): req_cons: %2d, rsp_prod_prv: %2d " 4.565 - "| req_prod: %2d, rsp_prod: %2d\n", 4.566 - blkif->domid, blkif->handle, 4.567 - blkif->blk_ring.req_cons, 4.568 - blkif->blk_ring.rsp_prod_pvt, 4.569 - blkif->blk_ring.sring->req_prod, 4.570 - blkif->blk_ring.sring->rsp_prod); 4.571 - blkif_put(blkif); 4.572 - } 4.573 - blkif = blkif->hash_next; 4.574 - } 4.575 - } 4.576 -}
5.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Sun Sep 04 15:08:16 2005 +0000 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,449 +0,0 @@ 5.4 -/****************************************************************************** 5.5 - * blktap_datapath.c 5.6 - * 5.7 - * XenLinux virtual block-device tap. 5.8 - * Block request routing data path. 5.9 - * 5.10 - * Copyright (c) 2004, Andrew Warfield 5.11 - * -- see full header in blktap.c 5.12 - */ 5.13 - 5.14 -#include "blktap.h" 5.15 -#include <asm-xen/evtchn.h> 5.16 - 5.17 -/*-----[ The data paths ]-------------------------------------------------*/ 5.18 - 5.19 -/* Connection to a single backend domain. */ 5.20 -blkif_front_ring_t blktap_be_ring; 5.21 - 5.22 -/*-----[ Tracking active requests ]---------------------------------------*/ 5.23 - 5.24 -/* this must be the same as MAX_PENDING_REQS in blkback.c */ 5.25 -#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) 5.26 - 5.27 -active_req_t active_reqs[MAX_ACTIVE_REQS]; 5.28 -ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; 5.29 -spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; 5.30 -ACTIVE_RING_IDX active_prod, active_cons; 5.31 -#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) 5.32 -#define ACTIVE_IDX(_ar) (_ar - active_reqs) 5.33 -#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) 5.34 - 5.35 -inline active_req_t *get_active_req(void) 5.36 -{ 5.37 - ACTIVE_RING_IDX idx; 5.38 - active_req_t *ar; 5.39 - unsigned long flags; 5.40 - 5.41 - ASSERT(active_cons != active_prod); 5.42 - 5.43 - spin_lock_irqsave(&active_req_lock, flags); 5.44 - idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; 5.45 - ar = &active_reqs[idx]; 5.46 - spin_unlock_irqrestore(&active_req_lock, flags); 5.47 - 5.48 - return ar; 5.49 -} 5.50 - 5.51 -inline void free_active_req(active_req_t *ar) 5.52 -{ 5.53 - unsigned long flags; 5.54 - 5.55 - spin_lock_irqsave(&active_req_lock, flags); 5.56 - active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); 5.57 - spin_unlock_irqrestore(&active_req_lock, flags); 5.58 -} 5.59 - 5.60 -active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) 5.61 -{ 5.62 - return &active_reqs[idx]; 5.63 -} 5.64 - 5.65 -void active_reqs_init(void) 5.66 -{ 5.67 - ACTIVE_RING_IDX i; 5.68 - 5.69 - active_cons = 0; 5.70 - active_prod = MAX_ACTIVE_REQS; 5.71 - memset(active_reqs, 0, sizeof(active_reqs)); 5.72 - for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) 5.73 - active_req_ring[i] = i; 5.74 -} 5.75 - 5.76 -/* Requests passing through the tap to the backend hijack the id field 5.77 - * in the request message. In it we put the AR index _AND_ the fe domid. 5.78 - * the domid is used by the backend to map the pages properly. 5.79 - */ 5.80 - 5.81 -static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) 5.82 -{ 5.83 - return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) ); 5.84 -} 5.85 - 5.86 -/*-----[ Ring helpers ]---------------------------------------------------*/ 5.87 - 5.88 -static void maybe_trigger_blktap_schedule(void); 5.89 - 5.90 -inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) 5.91 -{ 5.92 - blkif_response_t *resp_d; 5.93 - active_req_t *ar; 5.94 - 5.95 - ar = &active_reqs[ID_TO_IDX(rsp->id)]; 5.96 - rsp->id = ar->id; 5.97 - 5.98 - resp_d = RING_GET_RESPONSE(&blkif->blk_ring, 5.99 - blkif->blk_ring.rsp_prod_pvt); 5.100 - memcpy(resp_d, rsp, sizeof(blkif_response_t)); 5.101 - wmb(); 5.102 - blkif->blk_ring.rsp_prod_pvt++; 5.103 - 5.104 - blkif_put(ar->blkif); 5.105 - free_active_req(ar); 5.106 - 5.107 - return 0; 5.108 -} 5.109 - 5.110 -inline int write_req_to_be_ring(blkif_request_t *req) 5.111 -{ 5.112 - blkif_request_t *req_d; 5.113 - 5.114 - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { 5.115 - WPRINTK("Tap trying to access an unconnected backend!\n"); 5.116 - return 0; 5.117 - } 5.118 - 5.119 - req_d = RING_GET_REQUEST(&blktap_be_ring, 5.120 - blktap_be_ring.req_prod_pvt); 5.121 - memcpy(req_d, req, sizeof(blkif_request_t)); 5.122 - wmb(); 5.123 - blktap_be_ring.req_prod_pvt++; 5.124 - 5.125 - return 0; 5.126 -} 5.127 - 5.128 -void kick_fe_domain(blkif_t *blkif) 5.129 -{ 5.130 - RING_PUSH_RESPONSES(&blkif->blk_ring); 5.131 - notify_via_evtchn(blkif->evtchn); 5.132 - DPRINTK("notified FE(dom %u)\n", blkif->domid); 5.133 - 5.134 - /* We just feed up a batch of request slots... */ 5.135 - maybe_trigger_blktap_schedule(); 5.136 - 5.137 -} 5.138 - 5.139 -void kick_be_domain(void) 5.140 -{ 5.141 - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 5.142 - return; 5.143 - 5.144 - wmb(); /* Ensure that the frontend can see the requests. */ 5.145 - RING_PUSH_REQUESTS(&blktap_be_ring); 5.146 - notify_via_evtchn(blktap_be_evtchn); 5.147 - DPRINTK("notified BE\n"); 5.148 -} 5.149 - 5.150 -/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ 5.151 - 5.152 -/*-----[ Scheduler list maint -from blkback ]--- */ 5.153 - 5.154 -static struct list_head blkio_schedule_list; 5.155 -static spinlock_t blkio_schedule_list_lock; 5.156 - 5.157 -static int __on_blkdev_list(blkif_t *blkif) 5.158 -{ 5.159 - return blkif->blkdev_list.next != NULL; 5.160 -} 5.161 - 5.162 -static void remove_from_blkdev_list(blkif_t *blkif) 5.163 -{ 5.164 - unsigned long flags; 5.165 - if ( !__on_blkdev_list(blkif) ) return; 5.166 - spin_lock_irqsave(&blkio_schedule_list_lock, flags); 5.167 - if ( __on_blkdev_list(blkif) ) 5.168 - { 5.169 - list_del(&blkif->blkdev_list); 5.170 - blkif->blkdev_list.next = NULL; 5.171 - blkif_put(blkif); 5.172 - } 5.173 - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 5.174 -} 5.175 - 5.176 -static void add_to_blkdev_list_tail(blkif_t *blkif) 5.177 -{ 5.178 - unsigned long flags; 5.179 - if ( __on_blkdev_list(blkif) ) return; 5.180 - spin_lock_irqsave(&blkio_schedule_list_lock, flags); 5.181 - if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) 5.182 - { 5.183 - list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); 5.184 - blkif_get(blkif); 5.185 - } 5.186 - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); 5.187 -} 5.188 - 5.189 - 5.190 -/*-----[ Scheduler functions - from blkback ]--- */ 5.191 - 5.192 -static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); 5.193 - 5.194 -static int do_block_io_op(blkif_t *blkif, int max_to_do); 5.195 - 5.196 -static int blkio_schedule(void *arg) 5.197 -{ 5.198 - DECLARE_WAITQUEUE(wq, current); 5.199 - 5.200 - blkif_t *blkif; 5.201 - struct list_head *ent; 5.202 - 5.203 - daemonize( 5.204 - "xentapd" 5.205 - ); 5.206 - 5.207 - for ( ; ; ) 5.208 - { 5.209 - /* Wait for work to do. */ 5.210 - add_wait_queue(&blkio_schedule_wait, &wq); 5.211 - set_current_state(TASK_INTERRUPTIBLE); 5.212 - if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 5.213 - list_empty(&blkio_schedule_list) ) 5.214 - schedule(); 5.215 - __set_current_state(TASK_RUNNING); 5.216 - remove_wait_queue(&blkio_schedule_wait, &wq); 5.217 - 5.218 - /* Queue up a batch of requests. */ 5.219 - while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && 5.220 - !list_empty(&blkio_schedule_list) ) 5.221 - { 5.222 - ent = blkio_schedule_list.next; 5.223 - blkif = list_entry(ent, blkif_t, blkdev_list); 5.224 - blkif_get(blkif); 5.225 - remove_from_blkdev_list(blkif); 5.226 - if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) 5.227 - add_to_blkdev_list_tail(blkif); 5.228 - blkif_put(blkif); 5.229 - } 5.230 - } 5.231 -} 5.232 - 5.233 -static void maybe_trigger_blktap_schedule(void) 5.234 -{ 5.235 - /* 5.236 - * Needed so that two processes, who together make the following predicate 5.237 - * true, don't both read stale values and evaluate the predicate 5.238 - * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... 5.239 - */ 5.240 - smp_mb(); 5.241 - 5.242 - if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) && 5.243 - !list_empty(&blkio_schedule_list) ) 5.244 - wake_up(&blkio_schedule_wait); 5.245 -} 5.246 - 5.247 -void blkif_deschedule(blkif_t *blkif) 5.248 -{ 5.249 - remove_from_blkdev_list(blkif); 5.250 -} 5.251 - 5.252 -void __init blkdev_schedule_init(void) 5.253 -{ 5.254 - spin_lock_init(&blkio_schedule_list_lock); 5.255 - INIT_LIST_HEAD(&blkio_schedule_list); 5.256 - 5.257 - if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) 5.258 - BUG(); 5.259 -} 5.260 - 5.261 -/*-----[ Interrupt entry from a frontend ]------ */ 5.262 - 5.263 -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) 5.264 -{ 5.265 - blkif_t *blkif = dev_id; 5.266 - 5.267 - add_to_blkdev_list_tail(blkif); 5.268 - maybe_trigger_blktap_schedule(); 5.269 - return IRQ_HANDLED; 5.270 -} 5.271 - 5.272 -/*-----[ Other Frontend Ring functions ]-------- */ 5.273 - 5.274 -/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ 5.275 -static int do_block_io_op(blkif_t *blkif, int max_to_do) 5.276 -{ 5.277 - /* we have pending messages from the real frontend. */ 5.278 - 5.279 - blkif_request_t *req_s; 5.280 - RING_IDX i, rp; 5.281 - unsigned long flags; 5.282 - active_req_t *ar; 5.283 - int more_to_do = 0; 5.284 - int notify_be = 0, notify_user = 0; 5.285 - 5.286 - /* lock both rings */ 5.287 - spin_lock_irqsave(&blkif_io_lock, flags); 5.288 - 5.289 - rp = blkif->blk_ring.sring->req_prod; 5.290 - rmb(); 5.291 - 5.292 - for ( i = blkif->blk_ring.req_cons; 5.293 - (i != rp) && 5.294 - !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i); 5.295 - i++ ) 5.296 - { 5.297 - 5.298 - if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 5.299 - { 5.300 - more_to_do = 1; 5.301 - break; 5.302 - } 5.303 - 5.304 - req_s = RING_GET_REQUEST(&blkif->blk_ring, i); 5.305 - /* This is a new request: 5.306 - * Assign an active request record, and remap the id. 5.307 - */ 5.308 - ar = get_active_req(); 5.309 - ar->id = req_s->id; 5.310 - ar->nr_pages = req_s->nr_segments; 5.311 - blkif_get(blkif); 5.312 - ar->blkif = blkif; 5.313 - req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); 5.314 - /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ 5.315 - 5.316 - /* FE -> BE interposition point is here. */ 5.317 - 5.318 - /* ------------------------------------------------------------- */ 5.319 - /* BLKIF_OP_PROBE_HACK: */ 5.320 - /* Signal to the backend that we are a tap domain. */ 5.321 - 5.322 - if (req_s->operation == BLKIF_OP_PROBE) { 5.323 - DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); 5.324 - req_s->frame_and_sects[1] = BLKTAP_COOKIE; 5.325 - } 5.326 - 5.327 - /* ------------------------------------------------------------- */ 5.328 - 5.329 - /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ 5.330 - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || 5.331 - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { 5.332 - 5.333 - /* Copy the response message to UFERing */ 5.334 - /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ 5.335 - /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ 5.336 - 5.337 - DPRINTK("req->UFERing\n"); 5.338 - blktap_write_fe_ring(req_s); 5.339 - notify_user = 1; 5.340 - } 5.341 - 5.342 - /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ 5.343 - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || 5.344 - (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { 5.345 - 5.346 - /* be included to prevent noise from the fe when its off */ 5.347 - /* copy the request message to the BERing */ 5.348 - 5.349 - DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 5.350 - (unsigned)i & (RING_SIZE(&blktap_be_ring)-1), 5.351 - (unsigned)blktap_be_ring.req_prod_pvt & 5.352 - (RING_SIZE((&blktap_be_ring)-1))); 5.353 - 5.354 - write_req_to_be_ring(req_s); 5.355 - notify_be = 1; 5.356 - } 5.357 - } 5.358 - 5.359 - blkif->blk_ring.req_cons = i; 5.360 - 5.361 - /* unlock rings */ 5.362 - spin_unlock_irqrestore(&blkif_io_lock, flags); 5.363 - 5.364 - if (notify_user) 5.365 - blktap_kick_user(); 5.366 - if (notify_be) 5.367 - kick_be_domain(); 5.368 - 5.369 - return more_to_do; 5.370 -} 5.371 - 5.372 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/ 5.373 - 5.374 - 5.375 -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 5.376 - struct pt_regs *ptregs) 5.377 -{ 5.378 - blkif_response_t *resp_s; 5.379 - blkif_t *blkif; 5.380 - RING_IDX rp, i; 5.381 - unsigned long flags; 5.382 - 5.383 - DPRINTK("PT got BE interrupt.\n"); 5.384 - 5.385 - /* lock both rings */ 5.386 - spin_lock_irqsave(&blkif_io_lock, flags); 5.387 - 5.388 - rp = blktap_be_ring.sring->rsp_prod; 5.389 - rmb(); 5.390 - 5.391 - for ( i = blktap_be_ring.rsp_cons; i != rp; i++) 5.392 - { 5.393 - resp_s = RING_GET_RESPONSE(&blktap_be_ring, i); 5.394 - 5.395 - /* BE -> FE interposition point is here. */ 5.396 - 5.397 - blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; 5.398 - 5.399 - /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ 5.400 - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || 5.401 - (blktap_mode & BLKTAP_MODE_COPY_BE) ) { 5.402 - 5.403 - /* Copy the response message to UBERing */ 5.404 - /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ 5.405 - /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ 5.406 - 5.407 - DPRINTK("rsp->UBERing\n"); 5.408 - blktap_write_be_ring(resp_s); 5.409 - blktap_kick_user(); 5.410 - 5.411 - } 5.412 - 5.413 - /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ 5.414 - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || 5.415 - (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { 5.416 - 5.417 - /* (fe included to prevent random interference from the BE) */ 5.418 - /* Copy the response message to FERing */ 5.419 - 5.420 - DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 5.421 - (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1), 5.422 - (unsigned)blkif->blk_ring.rsp_prod_pvt & 5.423 - (RING_SIZE((&blkif->blk_ring)-1))); 5.424 - 5.425 - write_resp_to_fe_ring(blkif, resp_s); 5.426 - kick_fe_domain(blkif); 5.427 - 5.428 - } 5.429 - } 5.430 - 5.431 - blktap_be_ring.rsp_cons = i; 5.432 - 5.433 - 5.434 - spin_unlock_irqrestore(&blkif_io_lock, flags); 5.435 - 5.436 - return IRQ_HANDLED; 5.437 -} 5.438 - 5.439 -/* Debug : print the current ring indices. */ 5.440 - 5.441 -void print_be_ring_idxs(void) 5.442 -{ 5.443 - if (blktap_be_ring.sring != NULL) { 5.444 - WPRINTK("BE Ring: \n--------\n"); 5.445 - WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " 5.446 - "| req_prod: %2d, rsp_prod: %2d\n", 5.447 - blktap_be_ring.rsp_cons, 5.448 - blktap_be_ring.req_prod_pvt, 5.449 - blktap_be_ring.sring->req_prod, 5.450 - blktap_be_ring.sring->rsp_prod); 5.451 - } 5.452 -}
6.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Sun Sep 04 15:08:16 2005 +0000 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,801 +0,0 @@ 6.4 -/****************************************************************************** 6.5 - * blktap_userdev.c 6.6 - * 6.7 - * XenLinux virtual block-device tap. 6.8 - * Control interface between the driver and a character device. 6.9 - * 6.10 - * Copyright (c) 2004, Andrew Warfield 6.11 - */ 6.12 - 6.13 -#include <linux/config.h> 6.14 -#include <linux/module.h> 6.15 -#include <linux/kernel.h> 6.16 -#include <linux/fs.h> 6.17 -#include <linux/mm.h> 6.18 -#include <linux/miscdevice.h> 6.19 -#include <linux/errno.h> 6.20 -#include <linux/major.h> 6.21 -#include <linux/gfp.h> 6.22 -#include <linux/poll.h> 6.23 -#include <asm/pgalloc.h> 6.24 -#include <asm/tlbflush.h> 6.25 -#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ 6.26 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.27 -#include <asm-xen/xen-public/grant_table.h> 6.28 -#endif 6.29 - 6.30 -#include "blktap.h" 6.31 - 6.32 - 6.33 -unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; 6.34 - 6.35 -/* Only one process may open /dev/xen/blktap at any time. */ 6.36 -static unsigned long blktap_dev_inuse; 6.37 -unsigned long blktap_ring_ok; /* make this ring->state */ 6.38 - 6.39 -/* for poll: */ 6.40 -static wait_queue_head_t blktap_wait; 6.41 - 6.42 -/* Rings up to user space. */ 6.43 -static blkif_front_ring_t blktap_ufe_ring; 6.44 -static blkif_back_ring_t blktap_ube_ring; 6.45 -static ctrl_front_ring_t blktap_uctrl_ring; 6.46 - 6.47 -/* local prototypes */ 6.48 -static int blktap_read_fe_ring(void); 6.49 -static int blktap_read_be_ring(void); 6.50 - 6.51 - 6.52 -/* -------[ mmap region ]--------------------------------------------- */ 6.53 -/* 6.54 - * We use a big chunk of address space to map in-flight requests into, 6.55 - * and export this region up to user-space. See the comments in blkback 6.56 - * about this -- the two must be kept in sync if the tap is used as a 6.57 - * passthrough. 6.58 - */ 6.59 - 6.60 -#define MAX_PENDING_REQS 64 6.61 - 6.62 -/* immediately before the mmap area, we have a bunch of pages reserved 6.63 - * for shared memory rings. 6.64 - */ 6.65 -#define RING_PAGES 3 /* Ctrl, Front, and Back */ 6.66 - 6.67 -/* Where things are inside the device mapping. */ 6.68 -struct vm_area_struct *blktap_vma = NULL; 6.69 -unsigned long mmap_vstart; /* Kernel pages for mapping in data. */ 6.70 -unsigned long rings_vstart; /* start of mmaped vma */ 6.71 -unsigned long user_vstart; /* start of user mappings */ 6.72 - 6.73 -#define MMAP_PAGES_PER_REQUEST \ 6.74 - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) 6.75 -#define MMAP_PAGES \ 6.76 - (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) 6.77 -#define MMAP_VADDR(_start, _req,_seg) \ 6.78 - ( _start + \ 6.79 - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 6.80 - ((_seg) * PAGE_SIZE)) 6.81 - 6.82 -/* -------[ grant handles ]------------------------------------------- */ 6.83 - 6.84 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.85 -/* When using grant tables to map a frame for device access then the 6.86 - * handle returned must be used to unmap the frame. This is needed to 6.87 - * drop the ref count on the frame. 6.88 - */ 6.89 -struct grant_handle_pair 6.90 -{ 6.91 - u16 kernel; 6.92 - u16 user; 6.93 -}; 6.94 -static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; 6.95 -#define pending_handle(_idx, _i) \ 6.96 - (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) 6.97 -#define BLKTAP_INVALID_HANDLE(_g) \ 6.98 - (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) 6.99 -#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ 6.100 - (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ 6.101 - } while(0) 6.102 - 6.103 -#endif 6.104 - 6.105 - 6.106 -/* -------[ blktap vm ops ]------------------------------------------- */ 6.107 - 6.108 -static struct page *blktap_nopage(struct vm_area_struct *vma, 6.109 - unsigned long address, 6.110 - int *type) 6.111 -{ 6.112 - /* 6.113 - * if the page has not been mapped in by the driver then generate 6.114 - * a SIGBUS to the domain. 6.115 - */ 6.116 - 6.117 - force_sig(SIGBUS, current); 6.118 - 6.119 - return 0; 6.120 -} 6.121 - 6.122 -struct vm_operations_struct blktap_vm_ops = { 6.123 - nopage: blktap_nopage, 6.124 -}; 6.125 - 6.126 -/* -------[ blktap file ops ]----------------------------------------- */ 6.127 - 6.128 -static int blktap_open(struct inode *inode, struct file *filp) 6.129 -{ 6.130 - blkif_sring_t *sring; 6.131 - ctrl_sring_t *csring; 6.132 - 6.133 - if ( test_and_set_bit(0, &blktap_dev_inuse) ) 6.134 - return -EBUSY; 6.135 - 6.136 - /* Allocate the ctrl ring. */ 6.137 - csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); 6.138 - if (csring == NULL) 6.139 - goto fail_nomem; 6.140 - 6.141 - SetPageReserved(virt_to_page(csring)); 6.142 - 6.143 - SHARED_RING_INIT(csring); 6.144 - FRONT_RING_INIT(&blktap_uctrl_ring, csring, PAGE_SIZE); 6.145 - 6.146 - /* Allocate the fe ring. */ 6.147 - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); 6.148 - if (sring == NULL) 6.149 - goto fail_free_ctrl; 6.150 - 6.151 - SetPageReserved(virt_to_page(sring)); 6.152 - 6.153 - SHARED_RING_INIT(sring); 6.154 - FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE); 6.155 - 6.156 - /* Allocate the be ring. */ 6.157 - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); 6.158 - if (sring == NULL) 6.159 - goto fail_free_fe; 6.160 - 6.161 - SetPageReserved(virt_to_page(sring)); 6.162 - 6.163 - SHARED_RING_INIT(sring); 6.164 - BACK_RING_INIT(&blktap_ube_ring, sring, PAGE_SIZE); 6.165 - 6.166 - DPRINTK(KERN_ALERT "blktap open.\n"); 6.167 - 6.168 - return 0; 6.169 - 6.170 - fail_free_ctrl: 6.171 - free_page( (unsigned long) blktap_uctrl_ring.sring); 6.172 - 6.173 - fail_free_fe: 6.174 - free_page( (unsigned long) blktap_ufe_ring.sring); 6.175 - 6.176 - fail_nomem: 6.177 - return -ENOMEM; 6.178 -} 6.179 - 6.180 -static int blktap_release(struct inode *inode, struct file *filp) 6.181 -{ 6.182 - blktap_dev_inuse = 0; 6.183 - blktap_ring_ok = 0; 6.184 - 6.185 - DPRINTK(KERN_ALERT "blktap closed.\n"); 6.186 - 6.187 - /* Free the ring page. */ 6.188 - ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); 6.189 - free_page((unsigned long) blktap_uctrl_ring.sring); 6.190 - 6.191 - ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); 6.192 - free_page((unsigned long) blktap_ufe_ring.sring); 6.193 - 6.194 - ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); 6.195 - free_page((unsigned long) blktap_ube_ring.sring); 6.196 - 6.197 - /* Clear any active mappings and free foreign map table */ 6.198 - if (blktap_vma != NULL) { 6.199 - zap_page_range(blktap_vma, blktap_vma->vm_start, 6.200 - blktap_vma->vm_end - blktap_vma->vm_start, NULL); 6.201 - blktap_vma = NULL; 6.202 - } 6.203 - 6.204 - return 0; 6.205 -} 6.206 - 6.207 -/* Note on mmap: 6.208 - * We need to map pages to user space in a way that will allow the block 6.209 - * subsystem set up direct IO to them. This couldn't be done before, because 6.210 - * there isn't really a sane way to make a user virtual address down to a 6.211 - * physical address when the page belongs to another domain. 6.212 - * 6.213 - * My first approach was to map the page in to kernel memory, add an entry 6.214 - * for it in the physical frame list (using alloc_lomem_region as in blkback) 6.215 - * and then attempt to map that page up to user space. This is disallowed 6.216 - * by xen though, which realizes that we don't really own the machine frame 6.217 - * underlying the physical page. 6.218 - * 6.219 - * The new approach is to provide explicit support for this in xen linux. 6.220 - * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages 6.221 - * mapped from other vms. vma->vm_private_data is set up as a mapping 6.222 - * from pages to actual page structs. There is a new clause in get_user_pages 6.223 - * that does the right thing for this sort of mapping. 6.224 - * 6.225 - * blktap_mmap sets up this mapping. Most of the real work is done in 6.226 - * blktap_write_fe_ring below. 6.227 - */ 6.228 -static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) 6.229 -{ 6.230 - int size; 6.231 - struct page **map; 6.232 - int i; 6.233 - 6.234 - DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n", 6.235 - vma->vm_start, vma->vm_end); 6.236 - 6.237 - vma->vm_flags |= VM_RESERVED; 6.238 - vma->vm_ops = &blktap_vm_ops; 6.239 - 6.240 - size = vma->vm_end - vma->vm_start; 6.241 - if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { 6.242 - printk(KERN_INFO 6.243 - "blktap: you _must_ map exactly %d pages!\n", 6.244 - MMAP_PAGES + RING_PAGES); 6.245 - return -EAGAIN; 6.246 - } 6.247 - 6.248 - size >>= PAGE_SHIFT; 6.249 - DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); 6.250 - 6.251 - rings_vstart = vma->vm_start; 6.252 - user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); 6.253 - 6.254 - /* Map the ring pages to the start of the region and reserve it. */ 6.255 - 6.256 - /* not sure if I really need to do this... */ 6.257 - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 6.258 - 6.259 - DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); 6.260 - if (remap_pfn_range(vma, vma->vm_start, 6.261 - __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 6.262 - PAGE_SIZE, vma->vm_page_prot)) 6.263 - goto fail; 6.264 - 6.265 - 6.266 - DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); 6.267 - if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 6.268 - __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 6.269 - PAGE_SIZE, vma->vm_page_prot)) 6.270 - goto fail; 6.271 - 6.272 - DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); 6.273 - if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 6.274 - __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 6.275 - PAGE_SIZE, vma->vm_page_prot)) 6.276 - goto fail; 6.277 - 6.278 - /* Mark this VM as containing foreign pages, and set up mappings. */ 6.279 - map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) 6.280 - * sizeof(struct page_struct*), 6.281 - GFP_KERNEL); 6.282 - if (map == NULL) goto fail; 6.283 - 6.284 - for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) 6.285 - map[i] = NULL; 6.286 - 6.287 - vma->vm_private_data = map; 6.288 - vma->vm_flags |= VM_FOREIGN; 6.289 - 6.290 - blktap_vma = vma; 6.291 - blktap_ring_ok = 1; 6.292 - 6.293 - return 0; 6.294 - fail: 6.295 - /* Clear any active mappings. */ 6.296 - zap_page_range(vma, vma->vm_start, 6.297 - vma->vm_end - vma->vm_start, NULL); 6.298 - 6.299 - return -ENOMEM; 6.300 -} 6.301 - 6.302 -static int blktap_ioctl(struct inode *inode, struct file *filp, 6.303 - unsigned int cmd, unsigned long arg) 6.304 -{ 6.305 - switch(cmd) { 6.306 - case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ 6.307 - return blktap_read_fe_ring(); 6.308 - 6.309 - case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ 6.310 - return blktap_read_be_ring(); 6.311 - 6.312 - case BLKTAP_IOCTL_SETMODE: 6.313 - if (BLKTAP_MODE_VALID(arg)) { 6.314 - blktap_mode = arg; 6.315 - /* XXX: may need to flush rings here. */ 6.316 - printk(KERN_INFO "blktap: set mode to %lx\n", arg); 6.317 - return 0; 6.318 - } 6.319 - case BLKTAP_IOCTL_PRINT_IDXS: 6.320 - { 6.321 - print_be_ring_idxs(); 6.322 - print_fe_ring_idxs(); 6.323 - WPRINTK("User Rings: \n-----------\n"); 6.324 - WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " 6.325 - "| req_prod: %2d, rsp_prod: %2d\n", 6.326 - blktap_ufe_ring.rsp_cons, 6.327 - blktap_ufe_ring.req_prod_pvt, 6.328 - blktap_ufe_ring.sring->req_prod, 6.329 - blktap_ufe_ring.sring->rsp_prod); 6.330 - WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " 6.331 - "| req_prod: %2d, rsp_prod: %2d\n", 6.332 - blktap_ube_ring.req_cons, 6.333 - blktap_ube_ring.rsp_prod_pvt, 6.334 - blktap_ube_ring.sring->req_prod, 6.335 - blktap_ube_ring.sring->rsp_prod); 6.336 - 6.337 - } 6.338 - } 6.339 - return -ENOIOCTLCMD; 6.340 -} 6.341 - 6.342 -static unsigned int blktap_poll(struct file *file, poll_table *wait) 6.343 -{ 6.344 - poll_wait(file, &blktap_wait, wait); 6.345 - 6.346 - if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) || 6.347 - RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || 6.348 - RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { 6.349 - 6.350 - flush_tlb_all(); 6.351 - 6.352 - RING_PUSH_REQUESTS(&blktap_uctrl_ring); 6.353 - RING_PUSH_REQUESTS(&blktap_ufe_ring); 6.354 - RING_PUSH_RESPONSES(&blktap_ube_ring); 6.355 - return POLLIN | POLLRDNORM; 6.356 - } 6.357 - 6.358 - return 0; 6.359 -} 6.360 - 6.361 -void blktap_kick_user(void) 6.362 -{ 6.363 - /* blktap_ring->req_prod = blktap_req_prod; */ 6.364 - wake_up_interruptible(&blktap_wait); 6.365 -} 6.366 - 6.367 -static struct file_operations blktap_fops = { 6.368 - owner: THIS_MODULE, 6.369 - poll: blktap_poll, 6.370 - ioctl: blktap_ioctl, 6.371 - open: blktap_open, 6.372 - release: blktap_release, 6.373 - mmap: blktap_mmap, 6.374 -}; 6.375 - 6.376 -/*-----[ Data to/from user space ]----------------------------------------*/ 6.377 - 6.378 -static void fast_flush_area(int idx, int nr_pages) 6.379 -{ 6.380 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.381 - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 6.382 - unsigned int i, op = 0; 6.383 - struct grant_handle_pair *handle; 6.384 - unsigned long ptep; 6.385 - 6.386 - for (i=0; i<nr_pages; i++) 6.387 - { 6.388 - handle = &pending_handle(idx, i); 6.389 - if (!BLKTAP_INVALID_HANDLE(handle)) 6.390 - { 6.391 - 6.392 - unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); 6.393 - unmap[op].dev_bus_addr = 0; 6.394 - unmap[op].handle = handle->kernel; 6.395 - op++; 6.396 - 6.397 - if (create_lookup_pte_addr(blktap_vma->vm_mm, 6.398 - MMAP_VADDR(user_vstart, idx, i), 6.399 - &ptep) !=0) { 6.400 - DPRINTK("Couldn't get a pte addr!\n"); 6.401 - return; 6.402 - } 6.403 - unmap[op].host_addr = ptep; 6.404 - unmap[op].dev_bus_addr = 0; 6.405 - unmap[op].handle = handle->user; 6.406 - op++; 6.407 - 6.408 - BLKTAP_INVALIDATE_HANDLE(handle); 6.409 - } 6.410 - } 6.411 - if ( unlikely(HYPERVISOR_grant_table_op( 6.412 - GNTTABOP_unmap_grant_ref, unmap, op))) 6.413 - BUG(); 6.414 -#else 6.415 - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 6.416 - int i; 6.417 - 6.418 - for ( i = 0; i < nr_pages; i++ ) 6.419 - { 6.420 - MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i), 6.421 - __pte(0), 0); 6.422 - } 6.423 - 6.424 - mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 6.425 - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) 6.426 - BUG(); 6.427 -#endif 6.428 -} 6.429 - 6.430 - 6.431 -int blktap_write_fe_ring(blkif_request_t *req) 6.432 -{ 6.433 - blkif_request_t *target; 6.434 - int i, ret = 0; 6.435 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.436 - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 6.437 - int op; 6.438 -#else 6.439 - unsigned long remap_prot; 6.440 - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; 6.441 - mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 6.442 -#endif 6.443 - 6.444 - /* 6.445 - * This is called to pass a request from the real frontend domain's 6.446 - * blkif ring to the character device. 6.447 - */ 6.448 - 6.449 - if ( ! blktap_ring_ok ) { 6.450 - DPRINTK("blktap: ufe_ring not ready for a request!\n"); 6.451 - return 0; 6.452 - } 6.453 - 6.454 - if ( RING_FULL(&blktap_ufe_ring) ) { 6.455 - PRINTK("blktap: fe_ring is full, can't add.\n"); 6.456 - return 0; 6.457 - } 6.458 - 6.459 - flush_cache_all(); /* a noop on intel... */ 6.460 - 6.461 - target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); 6.462 - memcpy(target, req, sizeof(*req)); 6.463 - 6.464 - /* Map the foreign pages directly in to the application */ 6.465 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.466 - op = 0; 6.467 - for (i=0; i<target->nr_segments; i++) { 6.468 - 6.469 - unsigned long uvaddr; 6.470 - unsigned long kvaddr; 6.471 - unsigned long ptep; 6.472 - 6.473 - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); 6.474 - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); 6.475 - 6.476 - /* Map the remote page to kernel. */ 6.477 - map[op].host_addr = kvaddr; 6.478 - map[op].dom = ID_TO_DOM(req->id); 6.479 - map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); 6.480 - map[op].flags = GNTMAP_host_map; 6.481 - /* This needs a bit more thought in terms of interposition: 6.482 - * If we want to be able to modify pages during write using 6.483 - * grant table mappings, the guest will either need to allow 6.484 - * it, or we'll need to incur a copy. */ 6.485 - if (req->operation == BLKIF_OP_WRITE) 6.486 - map[op].flags |= GNTMAP_readonly; 6.487 - op++; 6.488 - 6.489 - /* Now map it to user. */ 6.490 - ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); 6.491 - if (ret) 6.492 - { 6.493 - DPRINTK("Couldn't get a pte addr!\n"); 6.494 - goto fail; 6.495 - } 6.496 - 6.497 - map[op].host_addr = ptep; 6.498 - map[op].dom = ID_TO_DOM(req->id); 6.499 - map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); 6.500 - map[op].flags = GNTMAP_host_map | GNTMAP_application_map 6.501 - | GNTMAP_contains_pte; 6.502 - /* Above interposition comment applies here as well. */ 6.503 - if (req->operation == BLKIF_OP_WRITE) 6.504 - map[op].flags |= GNTMAP_readonly; 6.505 - op++; 6.506 - } 6.507 - 6.508 - if ( unlikely(HYPERVISOR_grant_table_op( 6.509 - GNTTABOP_map_grant_ref, map, op))) 6.510 - BUG(); 6.511 - 6.512 - op = 0; 6.513 - for (i=0; i<(target->nr_segments*2); i+=2) { 6.514 - unsigned long uvaddr; 6.515 - unsigned long kvaddr; 6.516 - unsigned long offset; 6.517 - int cancel = 0; 6.518 - 6.519 - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2); 6.520 - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2); 6.521 - 6.522 - if ( unlikely(map[i].handle < 0) ) { 6.523 - DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); 6.524 - ret = map[i].handle; 6.525 - cancel = 1; 6.526 - } 6.527 - 6.528 - if ( unlikely(map[i+1].handle < 0) ) { 6.529 - DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); 6.530 - ret = map[i+1].handle; 6.531 - cancel = 1; 6.532 - } 6.533 - 6.534 - if (cancel) 6.535 - goto fail; 6.536 - 6.537 - /* Set the necessary mappings in p2m and in the VM_FOREIGN 6.538 - * vm_area_struct to allow user vaddr -> struct page lookups 6.539 - * to work. This is needed for direct IO to foreign pages. */ 6.540 - phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = 6.541 - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); 6.542 - 6.543 - offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 6.544 - ((struct page **)blktap_vma->vm_private_data)[offset] = 6.545 - pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); 6.546 - 6.547 - /* Save handles for unmapping later. */ 6.548 - pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle; 6.549 - pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle; 6.550 - } 6.551 - 6.552 -#else 6.553 - 6.554 - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; 6.555 - 6.556 - for (i=0; i<target->nr_segments; i++) { 6.557 - unsigned long buf; 6.558 - unsigned long uvaddr; 6.559 - unsigned long kvaddr; 6.560 - unsigned long offset; 6.561 - unsigned long ptep; 6.562 - 6.563 - buf = target->frame_and_sects[i] & PAGE_MASK; 6.564 - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); 6.565 - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); 6.566 - 6.567 - MULTI_update_va_mapping_otherdomain( 6.568 - mcl+i, 6.569 - kvaddr, 6.570 - pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)), 6.571 - 0, 6.572 - ID_TO_DOM(req->id)); 6.573 - 6.574 - phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = 6.575 - FOREIGN_FRAME(buf >> PAGE_SHIFT); 6.576 - 6.577 - ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); 6.578 - if (ret) 6.579 - { 6.580 - DPRINTK("error getting pte\n"); 6.581 - goto fail; 6.582 - } 6.583 - 6.584 - mmu[i].ptr = ptep; 6.585 - mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) 6.586 - | pgprot_val(blktap_vma->vm_page_prot); 6.587 - 6.588 - offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 6.589 - ((struct page **)blktap_vma->vm_private_data)[offset] = 6.590 - pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); 6.591 - } 6.592 - 6.593 - /* Add the mmu_update call. */ 6.594 - mcl[i].op = __HYPERVISOR_mmu_update; 6.595 - mcl[i].args[0] = (unsigned long)mmu; 6.596 - mcl[i].args[1] = target->nr_segments; 6.597 - mcl[i].args[2] = 0; 6.598 - mcl[i].args[3] = ID_TO_DOM(req->id); 6.599 - 6.600 - BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0); 6.601 - 6.602 - /* Make sure it all worked. */ 6.603 - for ( i = 0; i < target->nr_segments; i++ ) 6.604 - { 6.605 - if ( unlikely(mcl[i].result != 0) ) 6.606 - { 6.607 - DPRINTK("invalid buffer -- could not remap it\n"); 6.608 - ret = mcl[i].result; 6.609 - goto fail; 6.610 - } 6.611 - } 6.612 - if ( unlikely(mcl[i].result != 0) ) 6.613 - { 6.614 - DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); 6.615 - ret = mcl[i].result; 6.616 - goto fail; 6.617 - } 6.618 -#endif /* CONFIG_XEN_BLKDEV_GRANT */ 6.619 - 6.620 - /* Mark mapped pages as reserved: */ 6.621 - for ( i = 0; i < target->nr_segments; i++ ) 6.622 - { 6.623 - unsigned long kvaddr; 6.624 - 6.625 - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); 6.626 - SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT)); 6.627 - } 6.628 - 6.629 - 6.630 - blktap_ufe_ring.req_prod_pvt++; 6.631 - 6.632 - return 0; 6.633 - 6.634 - fail: 6.635 - fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); 6.636 - return ret; 6.637 -} 6.638 - 6.639 -int blktap_write_be_ring(blkif_response_t *rsp) 6.640 -{ 6.641 - blkif_response_t *target; 6.642 - 6.643 - /* 6.644 - * This is called to pass a request from the real backend domain's 6.645 - * blkif ring to the character device. 6.646 - */ 6.647 - 6.648 - if ( ! blktap_ring_ok ) { 6.649 - DPRINTK("blktap: be_ring not ready for a request!\n"); 6.650 - return 0; 6.651 - } 6.652 - 6.653 - /* No test for fullness in the response direction. */ 6.654 - 6.655 - target = RING_GET_RESPONSE(&blktap_ube_ring, 6.656 - blktap_ube_ring.rsp_prod_pvt); 6.657 - memcpy(target, rsp, sizeof(*rsp)); 6.658 - 6.659 - /* no mapping -- pages were mapped in blktap_write_fe_ring() */ 6.660 - 6.661 - blktap_ube_ring.rsp_prod_pvt++; 6.662 - 6.663 - return 0; 6.664 -} 6.665 - 6.666 -static int blktap_read_fe_ring(void) 6.667 -{ 6.668 - /* This is called to read responses from the UFE ring. */ 6.669 - 6.670 - RING_IDX i, j, rp; 6.671 - blkif_response_t *resp_s; 6.672 - blkif_t *blkif; 6.673 - active_req_t *ar; 6.674 - 6.675 - DPRINTK("blktap_read_fe_ring()\n"); 6.676 - 6.677 - /* if we are forwarding from UFERring to FERing */ 6.678 - if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { 6.679 - 6.680 - /* for each outstanding message on the UFEring */ 6.681 - rp = blktap_ufe_ring.sring->rsp_prod; 6.682 - rmb(); 6.683 - 6.684 - for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) 6.685 - { 6.686 - resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i); 6.687 - 6.688 - DPRINTK("resp->fe_ring\n"); 6.689 - ar = lookup_active_req(ID_TO_IDX(resp_s->id)); 6.690 - blkif = ar->blkif; 6.691 - for (j = 0; j < ar->nr_pages; j++) { 6.692 - unsigned long vaddr; 6.693 - struct page **map = blktap_vma->vm_private_data; 6.694 - int offset; 6.695 - 6.696 - vaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j); 6.697 - offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 6.698 - 6.699 - ClearPageReserved(virt_to_page(vaddr)); 6.700 - map[offset] = NULL; 6.701 - } 6.702 - 6.703 - fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); 6.704 - zap_page_range(blktap_vma, 6.705 - MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), 6.706 - ar->nr_pages << PAGE_SHIFT, NULL); 6.707 - write_resp_to_fe_ring(blkif, resp_s); 6.708 - blktap_ufe_ring.rsp_cons = i + 1; 6.709 - kick_fe_domain(blkif); 6.710 - } 6.711 - } 6.712 - return 0; 6.713 -} 6.714 - 6.715 -static int blktap_read_be_ring(void) 6.716 -{ 6.717 - /* This is called to read requests from the UBE ring. */ 6.718 - 6.719 - RING_IDX i, rp; 6.720 - blkif_request_t *req_s; 6.721 - 6.722 - DPRINTK("blktap_read_be_ring()\n"); 6.723 - 6.724 - /* if we are forwarding from UFERring to FERing */ 6.725 - if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { 6.726 - 6.727 - /* for each outstanding message on the UFEring */ 6.728 - rp = blktap_ube_ring.sring->req_prod; 6.729 - rmb(); 6.730 - for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) 6.731 - { 6.732 - req_s = RING_GET_REQUEST(&blktap_ube_ring, i); 6.733 - 6.734 - DPRINTK("req->be_ring\n"); 6.735 - write_req_to_be_ring(req_s); 6.736 - kick_be_domain(); 6.737 - } 6.738 - 6.739 - blktap_ube_ring.req_cons = i; 6.740 - } 6.741 - 6.742 - return 0; 6.743 -} 6.744 - 6.745 -int blktap_write_ctrl_ring(ctrl_msg_t *msg) 6.746 -{ 6.747 - ctrl_msg_t *target; 6.748 - 6.749 - if ( ! blktap_ring_ok ) { 6.750 - DPRINTK("blktap: be_ring not ready for a request!\n"); 6.751 - return 0; 6.752 - } 6.753 - 6.754 - /* No test for fullness in the response direction. */ 6.755 - 6.756 - target = RING_GET_REQUEST(&blktap_uctrl_ring, 6.757 - blktap_uctrl_ring.req_prod_pvt); 6.758 - memcpy(target, msg, sizeof(*msg)); 6.759 - 6.760 - blktap_uctrl_ring.req_prod_pvt++; 6.761 - 6.762 - /* currently treat the ring as unidirectional. */ 6.763 - blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; 6.764 - 6.765 - return 0; 6.766 - 6.767 -} 6.768 - 6.769 -/* -------[ blktap module setup ]------------------------------------- */ 6.770 - 6.771 -static struct miscdevice blktap_miscdev = { 6.772 - .minor = BLKTAP_MINOR, 6.773 - .name = "blktap", 6.774 - .fops = &blktap_fops, 6.775 - .devfs_name = "misc/blktap", 6.776 -}; 6.777 - 6.778 -int blktap_init(void) 6.779 -{ 6.780 - int err, i, j; 6.781 - struct page *page; 6.782 - 6.783 - page = balloon_alloc_empty_page_range(MMAP_PAGES); 6.784 - BUG_ON(page == NULL); 6.785 - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 6.786 - 6.787 -#ifdef CONFIG_XEN_BLKDEV_GRANT 6.788 - for (i=0; i<MAX_PENDING_REQS ; i++) 6.789 - for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) 6.790 - BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); 6.791 -#endif 6.792 - 6.793 - err = misc_register(&blktap_miscdev); 6.794 - if ( err != 0 ) 6.795 - { 6.796 - printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); 6.797 - return err; 6.798 - } 6.799 - 6.800 - init_waitqueue_head(&blktap_wait); 6.801 - 6.802 - 6.803 - return 0; 6.804 -}
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Sun Sep 04 21:19:44 2005 +0000 7.3 @@ -0,0 +1,112 @@ 7.4 + 7.5 +#ifndef __BLKIF__BACKEND__COMMON_H__ 7.6 +#define __BLKIF__BACKEND__COMMON_H__ 7.7 + 7.8 +#include <linux/config.h> 7.9 +#include <linux/version.h> 7.10 +#include <linux/module.h> 7.11 +#include <linux/interrupt.h> 7.12 +#include <linux/slab.h> 7.13 +#include <linux/blkdev.h> 7.14 +#include <linux/vmalloc.h> 7.15 +#include <asm/io.h> 7.16 +#include <asm/setup.h> 7.17 +#include <asm/pgalloc.h> 7.18 +#include <asm-xen/evtchn.h> 7.19 +#include <asm-xen/hypervisor.h> 7.20 +#include <asm-xen/xen-public/io/blkif.h> 7.21 +#include <asm-xen/xen-public/io/ring.h> 7.22 +#include <asm-xen/gnttab.h> 7.23 + 7.24 +#if 0 7.25 +#define ASSERT(_p) \ 7.26 + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ 7.27 + __LINE__, __FILE__); *(int*)0=0; } 7.28 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ 7.29 + __FILE__ , __LINE__ , ## _a ) 7.30 +#else 7.31 +#define ASSERT(_p) ((void)0) 7.32 +#define DPRINTK(_f, _a...) ((void)0) 7.33 +#endif 7.34 + 7.35 +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) 7.36 + 7.37 +struct vbd { 7.38 + blkif_vdev_t handle; /* what the domain refers to this vbd as */ 7.39 + unsigned char readonly; /* Non-zero -> read-only */ 7.40 + unsigned char type; /* VDISK_xxx */ 7.41 + blkif_pdev_t pdevice; /* phys device that this vbd maps to */ 7.42 + struct block_device *bdev; 7.43 +}; 7.44 + 7.45 +typedef struct blkif_st { 7.46 + /* Unique identifier for this interface. */ 7.47 + domid_t domid; 7.48 + unsigned int handle; 7.49 + /* Physical parameters of the comms window. */ 7.50 + unsigned long shmem_frame; 7.51 + unsigned int evtchn; 7.52 + unsigned int remote_evtchn; 7.53 + /* Comms information. */ 7.54 + blkif_back_ring_t blk_ring; 7.55 + /* VBDs attached to this interface. */ 7.56 + struct vbd vbd; 7.57 + /* Private fields. */ 7.58 + enum { DISCONNECTED, CONNECTED } status; 7.59 +#ifdef CONFIG_XEN_BLKDEV_TAP_BE 7.60 + /* Is this a blktap frontend */ 7.61 + unsigned int is_blktap; 7.62 +#endif 7.63 + struct list_head blkdev_list; 7.64 + spinlock_t blk_ring_lock; 7.65 + atomic_t refcnt; 7.66 + 7.67 + struct work_struct free_work; 7.68 + u16 shmem_handle; 7.69 + unsigned long shmem_vaddr; 7.70 + grant_ref_t shmem_ref; 7.71 +} blkif_t; 7.72 + 7.73 +void blkif_create(blkif_be_create_t *create); 7.74 +void blkif_destroy(blkif_be_destroy_t *destroy); 7.75 +void blkif_connect(blkif_be_connect_t *connect); 7.76 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); 7.77 +void blkif_disconnect_complete(blkif_t *blkif); 7.78 +blkif_t *alloc_blkif(domid_t domid); 7.79 +void free_blkif_callback(blkif_t *blkif); 7.80 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); 7.81 + 7.82 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 7.83 +#define blkif_put(_b) \ 7.84 + do { \ 7.85 + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ 7.86 + free_blkif_callback(_b); \ 7.87 + } while (0) 7.88 + 7.89 +/* Create a vbd. */ 7.90 +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, 7.91 + int readonly); 7.92 +void vbd_free(struct vbd *vbd); 7.93 + 7.94 +unsigned long vbd_size(struct vbd *vbd); 7.95 +unsigned int vbd_info(struct vbd *vbd); 7.96 +unsigned long vbd_secsize(struct vbd *vbd); 7.97 + 7.98 +struct phys_req { 7.99 + unsigned short dev; 7.100 + unsigned short nr_sects; 7.101 + struct block_device *bdev; 7.102 + blkif_sector_t sector_number; 7.103 +}; 7.104 + 7.105 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 7.106 + 7.107 +void blkif_interface_init(void); 7.108 + 7.109 +void blkif_deschedule(blkif_t *blkif); 7.110 + 7.111 +void blkif_xenbus_init(void); 7.112 + 7.113 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); 7.114 + 7.115 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sun Sep 04 21:19:44 2005 +0000 8.3 @@ -0,0 +1,141 @@ 8.4 +/****************************************************************************** 8.5 + * arch/xen/drivers/blkif/backend/interface.c 8.6 + * 8.7 + * Block-device interface management. 8.8 + * 8.9 + * Copyright (c) 2004, Keir Fraser 8.10 + */ 8.11 + 8.12 +#include "common.h" 8.13 +#include <asm-xen/evtchn.h> 8.14 + 8.15 +static kmem_cache_t *blkif_cachep; 8.16 + 8.17 +blkif_t *alloc_blkif(domid_t domid) 8.18 +{ 8.19 + blkif_t *blkif; 8.20 + 8.21 + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); 8.22 + if (!blkif) 8.23 + return ERR_PTR(-ENOMEM); 8.24 + 8.25 + memset(blkif, 0, sizeof(*blkif)); 8.26 + blkif->domid = domid; 8.27 + blkif->status = DISCONNECTED; 8.28 + spin_lock_init(&blkif->blk_ring_lock); 8.29 + atomic_set(&blkif->refcnt, 1); 8.30 + 8.31 + return blkif; 8.32 +} 8.33 + 8.34 +static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, 8.35 + unsigned long shared_page) 8.36 +{ 8.37 + struct gnttab_map_grant_ref op; 8.38 + op.host_addr = localaddr; 8.39 + op.flags = GNTMAP_host_map; 8.40 + op.ref = shared_page; 8.41 + op.dom = blkif->domid; 8.42 + 8.43 + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); 8.44 + 8.45 + if (op.handle < 0) { 8.46 + DPRINTK(" Grant table operation failure !\n"); 8.47 + return op.handle; 8.48 + } 8.49 + 8.50 + blkif->shmem_ref = shared_page; 8.51 + blkif->shmem_handle = op.handle; 8.52 + blkif->shmem_vaddr = localaddr; 8.53 + return 0; 8.54 +} 8.55 + 8.56 +static void unmap_frontend_page(blkif_t *blkif) 8.57 +{ 8.58 + struct gnttab_unmap_grant_ref op; 8.59 + 8.60 + op.host_addr = blkif->shmem_vaddr; 8.61 + op.handle = blkif->shmem_handle; 8.62 + op.dev_bus_addr = 0; 8.63 + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); 8.64 +} 8.65 + 8.66 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) 8.67 +{ 8.68 + struct vm_struct *vma; 8.69 + blkif_sring_t *sring; 8.70 + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; 8.71 + int err; 8.72 + 8.73 + BUG_ON(blkif->remote_evtchn); 8.74 + 8.75 + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) 8.76 + return -ENOMEM; 8.77 + 8.78 + err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page); 8.79 + if (err) { 8.80 + vfree(vma->addr); 8.81 + return err; 8.82 + } 8.83 + 8.84 + op.u.bind_interdomain.dom1 = DOMID_SELF; 8.85 + op.u.bind_interdomain.dom2 = blkif->domid; 8.86 + op.u.bind_interdomain.port1 = 0; 8.87 + op.u.bind_interdomain.port2 = evtchn; 8.88 + err = HYPERVISOR_event_channel_op(&op); 8.89 + if (err) { 8.90 + unmap_frontend_page(blkif); 8.91 + vfree(vma->addr); 8.92 + return err; 8.93 + } 8.94 + 8.95 + blkif->evtchn = op.u.bind_interdomain.port1; 8.96 + blkif->remote_evtchn = evtchn; 8.97 + 8.98 + sring = (blkif_sring_t *)vma->addr; 8.99 + SHARED_RING_INIT(sring); 8.100 + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); 8.101 + 8.102 + bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend", 8.103 + blkif); 8.104 + blkif->status = CONNECTED; 8.105 + blkif->shmem_frame = shared_page; 8.106 + 8.107 + return 0; 8.108 +} 8.109 + 8.110 +static void free_blkif(void *arg) 8.111 +{ 8.112 + evtchn_op_t op = { .cmd = EVTCHNOP_close }; 8.113 + blkif_t *blkif = (blkif_t *)arg; 8.114 + 8.115 + op.u.close.port = blkif->evtchn; 8.116 + op.u.close.dom = DOMID_SELF; 8.117 + HYPERVISOR_event_channel_op(&op); 8.118 + op.u.close.port = blkif->remote_evtchn; 8.119 + op.u.close.dom = blkif->domid; 8.120 + HYPERVISOR_event_channel_op(&op); 8.121 + 8.122 + if (blkif->evtchn) 8.123 + unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); 8.124 + 8.125 + if (blkif->blk_ring.sring) { 8.126 + unmap_frontend_page(blkif); 8.127 + vfree(blkif->blk_ring.sring); 8.128 + blkif->blk_ring.sring = NULL; 8.129 + } 8.130 + 8.131 + kmem_cache_free(blkif_cachep, blkif); 8.132 +} 8.133 + 8.134 +void free_blkif_callback(blkif_t *blkif) 8.135 +{ 8.136 + INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif); 8.137 + schedule_work(&blkif->free_work); 8.138 +} 8.139 + 8.140 +void __init blkif_interface_init(void) 8.141 +{ 8.142 + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 8.143 + 0, 0, NULL, NULL); 8.144 +}
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Sun Sep 04 21:19:44 2005 +0000 9.3 @@ -0,0 +1,225 @@ 9.4 +/* Xenbus code for blkif tap 9.5 + 9.6 + A Warfield. 9.7 + 9.8 + Hastily modified from the oroginal backend code: 9.9 + 9.10 + Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> 9.11 + 9.12 + This program is free software; you can redistribute it and/or modify 9.13 + it under the terms of the GNU General Public License as published by 9.14 + the Free Software Foundation; either version 2 of the License, or 9.15 + (at your option) any later version. 9.16 + 9.17 + This program is distributed in the hope that it will be useful, 9.18 + but WITHOUT ANY WARRANTY; without even the implied warranty of 9.19 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9.20 + GNU General Public License for more details. 9.21 + 9.22 + You should have received a copy of the GNU General Public License 9.23 + along with this program; if not, write to the Free Software 9.24 + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 9.25 +*/ 9.26 + 9.27 +#include <stdarg.h> 9.28 +#include <linux/module.h> 9.29 +#include <asm-xen/xenbus.h> 9.30 +#include "common.h" 9.31 + 9.32 +struct backend_info 9.33 +{ 9.34 + struct xenbus_device *dev; 9.35 + 9.36 + /* our communications channel */ 9.37 + blkif_t *blkif; 9.38 + 9.39 + long int frontend_id; 9.40 + 9.41 + /* watch back end for changes */ 9.42 + struct xenbus_watch backend_watch; 9.43 + 9.44 + /* watch front end for changes */ 9.45 + struct xenbus_watch watch; 9.46 + char *frontpath; 9.47 +}; 9.48 + 9.49 +static int blkback_remove(struct xenbus_device *dev) 9.50 +{ 9.51 + struct backend_info *be = dev->data; 9.52 + 9.53 + if (be->watch.node) 9.54 + unregister_xenbus_watch(&be->watch); 9.55 + unregister_xenbus_watch(&be->backend_watch); 9.56 + if (be->blkif) 9.57 + blkif_put(be->blkif); 9.58 + if (be->frontpath) 9.59 + kfree(be->frontpath); 9.60 + kfree(be); 9.61 + return 0; 9.62 +} 9.63 + 9.64 +/* Front end tells us frame. */ 9.65 +static void frontend_changed(struct xenbus_watch *watch, const char *node) 9.66 +{ 9.67 + unsigned long ring_ref; 9.68 + unsigned int evtchn; 9.69 + int err; 9.70 + struct backend_info *be 9.71 + = container_of(watch, struct backend_info, watch); 9.72 + 9.73 + /* If other end is gone, delete ourself. */ 9.74 + if (node && !xenbus_exists(be->frontpath, "")) { 9.75 + xenbus_rm(be->dev->nodename, ""); 9.76 + device_unregister(&be->dev->dev); 9.77 + return; 9.78 + } 9.79 + if (be->blkif == NULL || be->blkif->status == CONNECTED) 9.80 + return; 9.81 + 9.82 + err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref, 9.83 + "event-channel", "%u", &evtchn, NULL); 9.84 + if (err) { 9.85 + xenbus_dev_error(be->dev, err, 9.86 + "reading %s/ring-ref and event-channel", 9.87 + be->frontpath); 9.88 + return; 9.89 + } 9.90 + 9.91 + /* Map the shared frame, irq etc. */ 9.92 + err = blkif_map(be->blkif, ring_ref, evtchn); 9.93 + if (err) { 9.94 + xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u", 9.95 + ring_ref, evtchn); 9.96 + goto abort; 9.97 + } 9.98 + 9.99 + xenbus_dev_ok(be->dev); 9.100 + 9.101 + return; 9.102 + 9.103 +abort: 9.104 + xenbus_transaction_end(1); 9.105 +} 9.106 + 9.107 +/* 9.108 + Setup supplies physical device. 9.109 + We provide event channel and device details to front end. 9.110 + Frontend supplies shared frame and event channel. 9.111 + */ 9.112 +static void backend_changed(struct xenbus_watch *watch, const char *node) 9.113 +{ 9.114 + int err; 9.115 + char *p; 9.116 + long int handle; 9.117 + struct backend_info *be 9.118 + = container_of(watch, struct backend_info, backend_watch); 9.119 + struct xenbus_device *dev = be->dev; 9.120 + 9.121 + if (be->blkif == NULL) { 9.122 + /* Front end dir is a number, which is used as the handle. */ 9.123 + p = strrchr(be->frontpath, '/') + 1; 9.124 + handle = simple_strtoul(p, NULL, 0); 9.125 + 9.126 + be->blkif = alloc_blkif(be->frontend_id); 9.127 + if (IS_ERR(be->blkif)) { 9.128 + err = PTR_ERR(be->blkif); 9.129 + be->blkif = NULL; 9.130 + xenbus_dev_error(dev, err, "creating block interface"); 9.131 + return; 9.132 + } 9.133 + 9.134 + /* Pass in NULL node to skip exist test. */ 9.135 + frontend_changed(&be->watch, NULL); 9.136 + } 9.137 +} 9.138 + 9.139 +static int blkback_probe(struct xenbus_device *dev, 9.140 + const struct xenbus_device_id *id) 9.141 +{ 9.142 + struct backend_info *be; 9.143 + char *frontend; 9.144 + int err; 9.145 + 9.146 + be = kmalloc(sizeof(*be), GFP_KERNEL); 9.147 + if (!be) { 9.148 + xenbus_dev_error(dev, -ENOMEM, "allocating backend structure"); 9.149 + return -ENOMEM; 9.150 + } 9.151 + memset(be, 0, sizeof(*be)); 9.152 + 9.153 + frontend = NULL; 9.154 + err = xenbus_gather(dev->nodename, 9.155 + "frontend-id", "%li", &be->frontend_id, 9.156 + "frontend", NULL, &frontend, 9.157 + NULL); 9.158 + if (XENBUS_EXIST_ERR(err)) 9.159 + goto free_be; 9.160 + if (err < 0) { 9.161 + xenbus_dev_error(dev, err, 9.162 + "reading %s/frontend or frontend-id", 9.163 + dev->nodename); 9.164 + goto free_be; 9.165 + } 9.166 + if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) { 9.167 + /* If we can't get a frontend path and a frontend-id, 9.168 + * then our bus-id is no longer valid and we need to 9.169 + * destroy the backend device. 9.170 + */ 9.171 + err = -ENOENT; 9.172 + goto free_be; 9.173 + } 9.174 + 9.175 + be->dev = dev; 9.176 + be->backend_watch.node = dev->nodename; 9.177 + be->backend_watch.callback = backend_changed; 9.178 + err = register_xenbus_watch(&be->backend_watch); 9.179 + if (err) { 9.180 + be->backend_watch.node = NULL; 9.181 + xenbus_dev_error(dev, err, "adding backend watch on %s", 9.182 + dev->nodename); 9.183 + goto free_be; 9.184 + } 9.185 + 9.186 + be->frontpath = frontend; 9.187 + be->watch.node = be->frontpath; 9.188 + be->watch.callback = frontend_changed; 9.189 + err = register_xenbus_watch(&be->watch); 9.190 + if (err) { 9.191 + be->watch.node = NULL; 9.192 + xenbus_dev_error(dev, err, 9.193 + "adding frontend watch on %s", 9.194 + be->frontpath); 9.195 + goto free_be; 9.196 + } 9.197 + 9.198 + dev->data = be; 9.199 + 9.200 + backend_changed(&be->backend_watch, dev->nodename); 9.201 + return 0; 9.202 + 9.203 + free_be: 9.204 + if (be->backend_watch.node) 9.205 + unregister_xenbus_watch(&be->backend_watch); 9.206 + if (frontend) 9.207 + kfree(frontend); 9.208 + kfree(be); 9.209 + return err; 9.210 +} 9.211 + 9.212 +static struct xenbus_device_id blkback_ids[] = { 9.213 + { "vbd" }, 9.214 + { "" } 9.215 +}; 9.216 + 9.217 +static struct xenbus_driver blkback = { 9.218 + .name = "vbd", 9.219 + .owner = THIS_MODULE, 9.220 + .ids = blkback_ids, 9.221 + .probe = blkback_probe, 9.222 + .remove = blkback_remove, 9.223 +}; 9.224 + 9.225 +void blkif_xenbus_init(void) 9.226 +{ 9.227 + xenbus_register_backend(&blkback); 9.228 +}
12.1 --- a/linux-2.6-xen-sparse/mm/memory.c Sun Sep 04 15:08:16 2005 +0000 12.2 +++ b/linux-2.6-xen-sparse/mm/memory.c Sun Sep 04 21:19:44 2005 +0000 12.3 @@ -954,10 +954,8 @@ int get_user_pages(struct task_struct *t 12.4 i++; 12.5 start += PAGE_SIZE; 12.6 len--; 12.7 -printk(KERN_ALERT "HIT 0x%lx\n", start); 12.8 continue; 12.9 } 12.10 -else printk(KERN_ALERT "MISS 0x%lx\n", start); 12.11 } 12.12 12.13 if (!vma || (vma->vm_flags & VM_IO)
13.1 --- a/tools/blktap/Makefile Sun Sep 04 15:08:16 2005 +0000 13.2 +++ b/tools/blktap/Makefile Sun Sep 04 21:19:44 2005 +0000 13.3 @@ -6,7 +6,8 @@ XEN_ROOT = ../.. 13.4 include $(XEN_ROOT)/tools/Rules.mk 13.5 13.6 SUBDIRS := 13.7 -SUBDIRS += parallax 13.8 +SUBDIRS += ublkback 13.9 +#SUBDIRS += parallax 13.10 13.11 BLKTAP_INSTALL_DIR = /usr/sbin 13.12 13.13 @@ -14,12 +15,12 @@ INSTALL = install 13.14 INSTALL_PROG = $(INSTALL) -m0755 13.15 INSTALL_DIR = $(INSTALL) -d -m0755 13.16 13.17 -INCLUDES += -I. -I $(XEN_LIBXC) 13.18 +INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) 13.19 13.20 LIBS := -lpthread -lz 13.21 13.22 SRCS := 13.23 -SRCS += blktaplib.c 13.24 +SRCS += blktaplib.c xenbus.c blkif.c 13.25 13.26 CFLAGS += -Wall 13.27 CFLAGS += -Werror 13.28 @@ -28,17 +29,20 @@ CFLAGS += -Wno-unused 13.29 CFLAGS += -g3 13.30 CFLAGS += -fno-strict-aliasing 13.31 CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE 13.32 +# get asprintf(): 13.33 +CFLAGS += -D _GNU_SOURCE 13.34 # Get gcc to generate the dependencies for us. 13.35 CFLAGS += -Wp,-MD,.$(@F).d 13.36 CFLAGS += $(INCLUDES) 13.37 DEPS = .*.d 13.38 13.39 OBJS = $(patsubst %.c,%.o,$(SRCS)) 13.40 -IBINS = blkdump 13.41 +IBINS := 13.42 +#IBINS += blkdump 13.43 13.44 LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) 13.45 13.46 -all: mk-symlinks libblktap.so blkdump 13.47 +all: mk-symlinks libblktap.so #blkdump 13.48 @set -e; for subdir in $(SUBDIRS); do \ 13.49 $(MAKE) -C $$subdir $@; \ 13.50 done 13.51 @@ -59,7 +63,7 @@ install: all 13.52 $(INSTALL_DIR) -p $(DESTDIR)/usr/include 13.53 $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) 13.54 $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include 13.55 - $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) 13.56 + #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) 13.57 @set -e; for subdir in $(SUBDIRS); do \ 13.58 $(MAKE) -C $$subdir $@; \ 13.59 done 13.60 @@ -79,14 +83,16 @@ rpm: all 13.61 mv staging/i386/*.rpm . 13.62 rm -rf staging 13.63 13.64 -libblktap.so: $(OBJS) 13.65 - $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared -o \ 13.66 - libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) 13.67 +libblktap.so: $(OBJS) 13.68 + $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ 13.69 + -L$(XEN_XENSTORE) -l xenstore \ 13.70 + -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) 13.71 ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) 13.72 ln -sf libblktap.so.$(MAJOR) $@ 13.73 13.74 blkdump: libblktap.so 13.75 - $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c 13.76 + $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \ 13.77 + -l blktap blkdump.c 13.78 13.79 .PHONY: TAGS clean install mk-symlinks rpm 13.80
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/tools/blktap/README.sept05 Sun Sep 04 21:19:44 2005 +0000 14.3 @@ -0,0 +1,33 @@ 14.4 +The blktap has been rewritten substantially based on the current 14.5 +blkback driver. I've removed passthrough support, as this is broken 14.6 +by the move to grant tables and the lack of transitive grants. A 14.7 +blktap VM is now only capable of terminating block requests in 14.8 +userspace. 14.9 + 14.10 +ublkback/ contains a _very_ initial cut at a user-level version of the block 14.11 +backend driver. It gives a working example of how the current tap 14.12 +interfaces are used, in particular w.r.t. the vbd directories in 14.13 +xenstore. 14.14 + 14.15 +parallax/ contains fairly recent parallax code. This does not run on 14.16 +the changed blktap interface, but should only be a couple of hours 14.17 +work to get going again. 14.18 + 14.19 +All of the tricky bits are done, but there is plenty of cleaning to 14.20 +do, and the top-level functionality is not here yet. At the moment, 14.21 +the daemon ignores the pdev requested by the tools and opens the file 14.22 +or device specified by TMP_IMAGE_FILE_NAME in ublkback.c. 14.23 + 14.24 +TODO: 14.25 +1. Fix to allow pdev in the store to specify the device to open. 14.26 +2. Add support (to tools as well) to mount arbitrary files... 14.27 + just write the filename to mount into the store, instead of pdev. 14.28 +3. Reeximine blkif refcounting, it is almost certainly broken at the moment. 14.29 + - creating a blkif should take a reference. 14.30 + - each inflight request should take a reference on dequeue in blktaplib 14.31 + - sending responses should drop refs. 14.32 + - blkif should be implicitly freed when refcounts fall to 0. 14.33 +4. Modify the parallax req/rsp code as per ublkback to use the new tap 14.34 + interfaces. 14.35 +5. Write a front end that allows parallax and normal mounts to coexist 14.36 +6. Allow blkback and blktap to run at the same time.
15.1 --- a/tools/blktap/blkdump.c Sun Sep 04 15:08:16 2005 +0000 15.2 +++ b/tools/blktap/blkdump.c Sun Sep 04 21:19:44 2005 +0000 15.3 @@ -8,85 +8,18 @@ 15.4 #include <stdio.h> 15.5 #include "blktaplib.h" 15.6 15.7 -int control_print(control_msg_t *msg) 15.8 -{ 15.9 - if (msg->type != CMSG_BLKIF_BE) 15.10 - { 15.11 - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); 15.12 - return 0; 15.13 - } 15.14 - 15.15 - switch(msg->subtype) 15.16 - { 15.17 - case CMSG_BLKIF_BE_CREATE: 15.18 - if ( msg->length != sizeof(blkif_be_create_t) ) 15.19 - goto parse_error; 15.20 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", 15.21 - ((blkif_be_create_t *)msg->msg)->domid, 15.22 - ((blkif_be_create_t *)msg->msg)->blkif_handle); 15.23 - break; 15.24 - case CMSG_BLKIF_BE_DESTROY: 15.25 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 15.26 - goto parse_error; 15.27 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", 15.28 - ((blkif_be_destroy_t *)msg->msg)->domid, 15.29 - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); 15.30 - break; 15.31 - case CMSG_BLKIF_BE_CONNECT: 15.32 - if ( msg->length != sizeof(blkif_be_connect_t) ) 15.33 - goto parse_error; 15.34 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n", 15.35 - ((blkif_be_connect_t *)msg->msg)->domid, 15.36 - ((blkif_be_connect_t *)msg->msg)->blkif_handle); 15.37 - break; 15.38 - case CMSG_BLKIF_BE_DISCONNECT: 15.39 - if ( msg->length != sizeof(blkif_be_disconnect_t) ) 15.40 - goto parse_error; 15.41 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n", 15.42 - ((blkif_be_disconnect_t *)msg->msg)->domid, 15.43 - ((blkif_be_disconnect_t *)msg->msg)->blkif_handle); 15.44 - break; 15.45 - case CMSG_BLKIF_BE_VBD_CREATE: 15.46 - if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 15.47 - goto parse_error; 15.48 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n", 15.49 - ((blkif_be_vbd_create_t *)msg->msg)->domid, 15.50 - ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle, 15.51 - ((blkif_be_vbd_create_t *)msg->msg)->vdevice); 15.52 - break; 15.53 - case CMSG_BLKIF_BE_VBD_DESTROY: 15.54 - if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) 15.55 - goto parse_error; 15.56 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n", 15.57 - ((blkif_be_vbd_destroy_t *)msg->msg)->domid, 15.58 - ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle, 15.59 - ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice); 15.60 - break; 15.61 - default: 15.62 - goto parse_error; 15.63 - } 15.64 - 15.65 - return 0; 15.66 - 15.67 -parse_error: 15.68 - printf("[CONTROL_MSG] Bad message type or length!\n"); 15.69 - return 0; 15.70 -} 15.71 - 15.72 int request_print(blkif_request_t *req) 15.73 { 15.74 int i; 15.75 unsigned long fas; 15.76 15.77 - if ( req->operation == BLKIF_OP_PROBE ) { 15.78 - printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id), 15.79 - blkif_op_name[req->operation]); 15.80 - return BLKTAP_PASS; 15.81 - } else { 15.82 + if ( (req->operation == BLKIF_OP_READ) || 15.83 + (req->operation == BLKIF_OP_WRITE) ) 15.84 + { 15.85 printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 15.86 ID_TO_DOM(req->id), ID_TO_IDX(req->id), 15.87 blkif_op_name[req->operation], 15.88 - req->nr_segments, req->device, 15.89 + req->nr_segments, req->handle, 15.90 req->sector_number); 15.91 15.92 15.93 @@ -99,6 +32,8 @@ int request_print(blkif_request_t *req) 15.94 ); 15.95 } 15.96 15.97 + } else { 15.98 + printf("Unknown request message type.\n"); 15.99 } 15.100 15.101 return BLKTAP_PASS; 15.102 @@ -106,23 +41,22 @@ int request_print(blkif_request_t *req) 15.103 15.104 int response_print(blkif_response_t *rsp) 15.105 { 15.106 - if ( rsp->operation == BLKIF_OP_PROBE ) { 15.107 - printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 15.108 - blkif_op_name[rsp->operation]); 15.109 - return BLKTAP_PASS; 15.110 - } else { 15.111 + if ( (rsp->operation == BLKIF_OP_READ) || 15.112 + (rsp->operation == BLKIF_OP_WRITE) ) 15.113 + { 15.114 printf("[%2u:%2u>%5s] (status: %d)\n", 15.115 ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 15.116 blkif_op_name[rsp->operation], 15.117 rsp->status); 15.118 15.119 + } else { 15.120 + printf("Unknown request message type.\n"); 15.121 } 15.122 return BLKTAP_PASS; 15.123 } 15.124 15.125 int main(int argc, char *argv[]) 15.126 { 15.127 - blktap_register_ctrl_hook("control_print", control_print); 15.128 blktap_register_request_hook("request_print", request_print); 15.129 blktap_register_response_hook("response_print", response_print); 15.130 blktap_listen();
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/tools/blktap/blkif.c Sun Sep 04 21:19:44 2005 +0000 16.3 @@ -0,0 +1,213 @@ 16.4 +/* 16.5 + * blkif.c 16.6 + * 16.7 + * The blkif interface for blktap. A blkif describes an in-use virtual disk. 16.8 + */ 16.9 + 16.10 +#include <stdio.h> 16.11 +#include <stdlib.h> 16.12 +#include <errno.h> 16.13 +#include <string.h> 16.14 +#include <err.h> 16.15 + 16.16 +#include "blktaplib.h" 16.17 + 16.18 +#if 1 16.19 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 16.20 +#else 16.21 +#define DPRINTF(_f, _a...) ((void)0) 16.22 +#endif 16.23 + 16.24 +#define BLKIF_HASHSZ 1024 16.25 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) 16.26 + 16.27 +static blkif_t *blkif_hash[BLKIF_HASHSZ]; 16.28 + 16.29 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 16.30 +{ 16.31 + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 16.32 + while ( (blkif != NULL) && 16.33 + ((blkif->domid != domid) || (blkif->handle != handle)) ) 16.34 + blkif = blkif->hash_next; 16.35 + return blkif; 16.36 +} 16.37 + 16.38 +blkif_t *alloc_blkif(domid_t domid) 16.39 +{ 16.40 + blkif_t *blkif; 16.41 + 16.42 + blkif = (blkif_t *)malloc(sizeof(blkif_t)); 16.43 + if (!blkif) 16.44 + return NULL; 16.45 + 16.46 + memset(blkif, 0, sizeof(*blkif)); 16.47 + blkif->domid = domid; 16.48 + 16.49 + return blkif; 16.50 +} 16.51 + 16.52 +static int (*new_blkif_hook)(blkif_t *blkif) = NULL; 16.53 +void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) 16.54 +{ 16.55 + new_blkif_hook = fn; 16.56 +} 16.57 + 16.58 +int blkif_init(blkif_t *blkif, long int handle, long int pdev, 16.59 + long int readonly) 16.60 +{ 16.61 + domid_t domid; 16.62 + blkif_t **pblkif; 16.63 + 16.64 + if (blkif == NULL) 16.65 + return -EINVAL; 16.66 + 16.67 + domid = blkif->domid; 16.68 + blkif->handle = handle; 16.69 + blkif->pdev = pdev; 16.70 + blkif->readonly = readonly; 16.71 + 16.72 + /* 16.73 + * Call out to the new_blkif_hook. The tap application should define this, 16.74 + * and it should return having set blkif->ops 16.75 + * 16.76 + */ 16.77 + if (new_blkif_hook == NULL) 16.78 + { 16.79 + warn("Probe detected a new blkif, but no new_blkif_hook!"); 16.80 + return -1; 16.81 + } 16.82 + new_blkif_hook(blkif); 16.83 + 16.84 + /* Now wire it in. */ 16.85 + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 16.86 + while ( *pblkif != NULL ) 16.87 + { 16.88 + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 16.89 + { 16.90 + DPRINTF("Could not create blkif: already exists\n"); 16.91 + return -1; 16.92 + } 16.93 + pblkif = &(*pblkif)->hash_next; 16.94 + } 16.95 + blkif->hash_next = NULL; 16.96 + *pblkif = blkif; 16.97 + 16.98 + return 0; 16.99 +} 16.100 + 16.101 +void free_blkif(blkif_t *blkif) 16.102 +{ 16.103 + blkif_t **pblkif, *curs; 16.104 + 16.105 + pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)]; 16.106 + while ( (curs = *pblkif) != NULL ) 16.107 + { 16.108 + if ( blkif == curs ) 16.109 + { 16.110 + *pblkif = curs->hash_next; 16.111 + } 16.112 + pblkif = &curs->hash_next; 16.113 + } 16.114 + if (blkif != NULL) 16.115 + free(blkif); 16.116 +} 16.117 + 16.118 +void blkif_register_request_hook(blkif_t *blkif, char *name, 16.119 + int (*rh)(blkif_t *, blkif_request_t *, int)) 16.120 +{ 16.121 + request_hook_t *rh_ent, **c; 16.122 + 16.123 + rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); 16.124 + if (!rh_ent) 16.125 + { 16.126 + warn("couldn't allocate a new hook"); 16.127 + return; 16.128 + } 16.129 + 16.130 + rh_ent->func = rh; 16.131 + rh_ent->next = NULL; 16.132 + if (asprintf(&rh_ent->name, "%s", name) == -1) 16.133 + { 16.134 + free(rh_ent); 16.135 + warn("couldn't allocate a new hook name"); 16.136 + return; 16.137 + } 16.138 + 16.139 + c = &blkif->request_hook_chain; 16.140 + while (*c != NULL) { 16.141 + c = &(*c)->next; 16.142 + } 16.143 + *c = rh_ent; 16.144 +} 16.145 + 16.146 +void blkif_register_response_hook(blkif_t *blkif, char *name, 16.147 + int (*rh)(blkif_t *, blkif_response_t *, int)) 16.148 +{ 16.149 + response_hook_t *rh_ent, **c; 16.150 + 16.151 + rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); 16.152 + if (!rh_ent) 16.153 + { 16.154 + warn("couldn't allocate a new hook"); 16.155 + return; 16.156 + } 16.157 + 16.158 + rh_ent->func = rh; 16.159 + rh_ent->next = NULL; 16.160 + if (asprintf(&rh_ent->name, "%s", name) == -1) 16.161 + { 16.162 + free(rh_ent); 16.163 + warn("couldn't allocate a new hook name"); 16.164 + return; 16.165 + } 16.166 + 16.167 + c = &blkif->response_hook_chain; 16.168 + while (*c != NULL) { 16.169 + c = &(*c)->next; 16.170 + } 16.171 + *c = rh_ent; 16.172 +} 16.173 + 16.174 +void blkif_print_hooks(blkif_t *blkif) 16.175 +{ 16.176 + request_hook_t *req_hook; 16.177 + response_hook_t *rsp_hook; 16.178 + 16.179 + DPRINTF("Request Hooks:\n"); 16.180 + req_hook = blkif->request_hook_chain; 16.181 + while (req_hook != NULL) 16.182 + { 16.183 + DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); 16.184 + req_hook = req_hook->next; 16.185 + } 16.186 + 16.187 + DPRINTF("Response Hooks:\n"); 16.188 + rsp_hook = blkif->response_hook_chain; 16.189 + while (rsp_hook != NULL) 16.190 + { 16.191 + DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); 16.192 + rsp_hook = rsp_hook->next; 16.193 + } 16.194 +} 16.195 + 16.196 + 16.197 +long int vbd_size(blkif_t *blkif) 16.198 +{ 16.199 + return 1000000000; 16.200 +} 16.201 + 16.202 +long int vbd_secsize(blkif_t *blkif) 16.203 +{ 16.204 + return 512; 16.205 +} 16.206 + 16.207 +unsigned vbd_info(blkif_t *blkif) 16.208 +{ 16.209 + return 0; 16.210 +} 16.211 + 16.212 + 16.213 +void __init_blkif(void) 16.214 +{ 16.215 + memset(blkif_hash, 0, sizeof(blkif_hash)); 16.216 +}
17.1 --- a/tools/blktap/blktaplib.c Sun Sep 04 15:08:16 2005 +0000 17.2 +++ b/tools/blktap/blktaplib.c Sun Sep 04 21:19:44 2005 +0000 17.3 @@ -24,7 +24,7 @@ 17.4 #include <string.h> 17.5 #include <unistd.h> 17.6 #include <pthread.h> 17.7 - 17.8 +#include <xs.h> 17.9 17.10 #define __COMPILING_BLKTAP_LIB 17.11 #include "blktaplib.h" 17.12 @@ -34,29 +34,26 @@ 17.13 #else 17.14 #define DPRINTF(_f, _a...) ((void)0) 17.15 #endif 17.16 -#define DEBUG_RING_IDXS 1 17.17 +#define DEBUG_RING_IDXS 0 17.18 17.19 #define POLLRDNORM 0x040 17.20 17.21 #define BLKTAP_IOCTL_KICK 1 17.22 17.23 + 17.24 void got_sig_bus(); 17.25 void got_sig_int(); 17.26 17.27 /* in kernel these are opposite, but we are a consumer now. */ 17.28 blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */ 17.29 blkif_front_ring_t be_ring; 17.30 -ctrl_back_ring_t ctrl_ring; 17.31 17.32 unsigned long mmap_vstart = 0; 17.33 char *blktap_mem; 17.34 int fd = 0; 17.35 17.36 -#define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */ 17.37 -/*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/ 17.38 -#define BLKTAP_MMAP_PAGES \ 17.39 - ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE) 17.40 -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES) 17.41 +#define BLKTAP_RING_PAGES 1 /* Front */ 17.42 +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES) 17.43 17.44 int bad_count = 0; 17.45 void bad(void) 17.46 @@ -79,126 +76,13 @@ inline unsigned int ID_TO_IDX(unsigned l 17.47 } 17.48 17.49 inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } 17.50 -/* 17.51 + 17.52 static int (*request_hook)(blkif_request_t *req) = NULL; 17.53 static int (*response_hook)(blkif_response_t *req) = NULL; 17.54 -*/ 17.55 - 17.56 -/*-----[ Request/Response hook chains.]----------------------------------*/ 17.57 - 17.58 -#define HOOK_NAME_MAX 50 17.59 - 17.60 -typedef struct ctrl_hook_st { 17.61 - char name[HOOK_NAME_MAX]; 17.62 - int (*func)(control_msg_t *); 17.63 - struct ctrl_hook_st *next; 17.64 -} ctrl_hook_t; 17.65 - 17.66 -typedef struct request_hook_st { 17.67 - char name[HOOK_NAME_MAX]; 17.68 - int (*func)(blkif_request_t *); 17.69 - struct request_hook_st *next; 17.70 -} request_hook_t; 17.71 - 17.72 -typedef struct response_hook_st { 17.73 - char name[HOOK_NAME_MAX]; 17.74 - int (*func)(blkif_response_t *); 17.75 - struct response_hook_st *next; 17.76 -} response_hook_t; 17.77 - 17.78 -static ctrl_hook_t *ctrl_hook_chain = NULL; 17.79 -static request_hook_t *request_hook_chain = NULL; 17.80 -static response_hook_t *response_hook_chain = NULL; 17.81 - 17.82 -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) 17.83 -{ 17.84 - ctrl_hook_t *ch_ent, **c; 17.85 - 17.86 - ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t)); 17.87 - if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } 17.88 - 17.89 - ch_ent->func = ch; 17.90 - ch_ent->next = NULL; 17.91 - strncpy(ch_ent->name, name, HOOK_NAME_MAX); 17.92 - ch_ent->name[HOOK_NAME_MAX-1] = '\0'; 17.93 - 17.94 - c = &ctrl_hook_chain; 17.95 - while (*c != NULL) { 17.96 - c = &(*c)->next; 17.97 - } 17.98 - *c = ch_ent; 17.99 -} 17.100 - 17.101 -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) 17.102 -{ 17.103 - request_hook_t *rh_ent, **c; 17.104 - 17.105 - rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); 17.106 - if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } 17.107 - 17.108 - rh_ent->func = rh; 17.109 - rh_ent->next = NULL; 17.110 - strncpy(rh_ent->name, name, HOOK_NAME_MAX); 17.111 - 17.112 - c = &request_hook_chain; 17.113 - while (*c != NULL) { 17.114 - c = &(*c)->next; 17.115 - } 17.116 - *c = rh_ent; 17.117 -} 17.118 - 17.119 -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) 17.120 -{ 17.121 - response_hook_t *rh_ent, **c; 17.122 - 17.123 - rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); 17.124 - if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } 17.125 - 17.126 - rh_ent->func = rh; 17.127 - rh_ent->next = NULL; 17.128 - strncpy(rh_ent->name, name, HOOK_NAME_MAX); 17.129 - 17.130 - c = &response_hook_chain; 17.131 - while (*c != NULL) { 17.132 - c = &(*c)->next; 17.133 - } 17.134 - *c = rh_ent; 17.135 -} 17.136 - 17.137 -void print_hooks(void) 17.138 -{ 17.139 - request_hook_t *req_hook; 17.140 - response_hook_t *rsp_hook; 17.141 - ctrl_hook_t *ctrl_hook; 17.142 - 17.143 - DPRINTF("Control Hooks:\n"); 17.144 - ctrl_hook = ctrl_hook_chain; 17.145 - while (ctrl_hook != NULL) 17.146 - { 17.147 - DPRINTF(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name); 17.148 - ctrl_hook = ctrl_hook->next; 17.149 - } 17.150 - 17.151 - DPRINTF("Request Hooks:\n"); 17.152 - req_hook = request_hook_chain; 17.153 - while (req_hook != NULL) 17.154 - { 17.155 - DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); 17.156 - req_hook = req_hook->next; 17.157 - } 17.158 - 17.159 - DPRINTF("Response Hooks:\n"); 17.160 - rsp_hook = response_hook_chain; 17.161 - while (rsp_hook != NULL) 17.162 - { 17.163 - DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); 17.164 - rsp_hook = rsp_hook->next; 17.165 - } 17.166 -} 17.167 17.168 /*-----[ Data to/from Backend (server) VM ]------------------------------*/ 17.169 17.170 - 17.171 +/* 17.172 17.173 inline int write_req_to_be_ring(blkif_request_t *req) 17.174 { 17.175 @@ -214,6 +98,7 @@ inline int write_req_to_be_ring(blkif_re 17.176 17.177 return 0; 17.178 } 17.179 +*/ 17.180 17.181 inline int write_rsp_to_fe_ring(blkif_response_t *rsp) 17.182 { 17.183 @@ -230,14 +115,14 @@ inline int write_rsp_to_fe_ring(blkif_re 17.184 return 0; 17.185 } 17.186 17.187 -static void apply_rsp_hooks(blkif_response_t *rsp) 17.188 +static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp) 17.189 { 17.190 response_hook_t *rsp_hook; 17.191 17.192 - rsp_hook = response_hook_chain; 17.193 + rsp_hook = blkif->response_hook_chain; 17.194 while (rsp_hook != NULL) 17.195 { 17.196 - switch(rsp_hook->func(rsp)) 17.197 + switch(rsp_hook->func(blkif, rsp, 1)) 17.198 { 17.199 case BLKTAP_PASS: 17.200 break; 17.201 @@ -248,15 +133,19 @@ static void apply_rsp_hooks(blkif_respon 17.202 } 17.203 } 17.204 17.205 + 17.206 static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER; 17.207 17.208 -void blktap_inject_response(blkif_response_t *rsp) 17.209 +void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp) 17.210 { 17.211 17.212 - apply_rsp_hooks(rsp); 17.213 - 17.214 + apply_rsp_hooks(blkif, rsp); 17.215 + 17.216 write_rsp_to_fe_ring(rsp); 17.217 - 17.218 +} 17.219 + 17.220 +void blktap_kick_responses(void) 17.221 +{ 17.222 pthread_mutex_lock(&push_mutex); 17.223 17.224 RING_PUSH_RESPONSES(&fe_ring); 17.225 @@ -277,7 +166,7 @@ typedef struct { 17.226 int active; 17.227 } pollhook_t; 17.228 17.229 -static struct pollfd pfd[MAX_POLLFDS+1]; 17.230 +static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */ 17.231 static pollhook_t pollhooks[MAX_POLLFDS]; 17.232 static unsigned int ph_freelist[MAX_POLLFDS]; 17.233 static unsigned int ph_cons, ph_prod; 17.234 @@ -344,65 +233,65 @@ void __attribute__ ((constructor)) blkta 17.235 17.236 int blktap_listen(void) 17.237 { 17.238 - int notify_be, notify_fe, tap_pfd; 17.239 - 17.240 + int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret; 17.241 + struct xs_handle *h; 17.242 + blkif_t *blkif; 17.243 + 17.244 /* comms rings: */ 17.245 blkif_request_t *req; 17.246 blkif_response_t *rsp; 17.247 - control_msg_t *msg; 17.248 blkif_sring_t *sring; 17.249 - ctrl_sring_t *csring; 17.250 RING_IDX rp, i, pfd_count; 17.251 17.252 /* pending rings */ 17.253 blkif_request_t req_pending[BLKIF_RING_SIZE]; 17.254 - blkif_response_t rsp_pending[BLKIF_RING_SIZE]; 17.255 + /* blkif_response_t rsp_pending[BLKIF_RING_SIZE] */; 17.256 17.257 /* handler hooks: */ 17.258 request_hook_t *req_hook; 17.259 response_hook_t *rsp_hook; 17.260 - ctrl_hook_t *ctrl_hook; 17.261 17.262 signal (SIGBUS, got_sig_bus); 17.263 signal (SIGINT, got_sig_int); 17.264 17.265 - print_hooks(); 17.266 - 17.267 + __init_blkif(); 17.268 + 17.269 fd = open("/dev/blktap", O_RDWR); 17.270 - if (fd == -1) { 17.271 - printf("open failed! (%d)\n", errno); 17.272 - goto open_failed; 17.273 - } 17.274 + if (fd == -1) 17.275 + err(-1, "open failed!"); 17.276 17.277 blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 17.278 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 17.279 17.280 - if ((int)blktap_mem == -1) { 17.281 - printf("mmap failed! (%d)\n", errno); 17.282 - goto mmap_failed; 17.283 - } 17.284 + if ((int)blktap_mem == -1) 17.285 + err(-1, "mmap failed!"); 17.286 17.287 /* assign the rings to the mapped memory */ 17.288 - csring = (ctrl_sring_t *)blktap_mem; 17.289 - BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE); 17.290 - 17.291 +/* 17.292 sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE); 17.293 FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE); 17.294 - 17.295 - sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE)); 17.296 +*/ 17.297 + sring = (blkif_sring_t *)((unsigned long)blktap_mem); 17.298 BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE); 17.299 17.300 mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT); 17.301 17.302 + 17.303 + /* Set up store connection and watch. */ 17.304 + h = xs_daemon_open(); 17.305 + if (h == NULL) 17.306 + err(-1, "xs_daemon_open"); 17.307 + 17.308 + ret = add_blockdevice_probe_watch(h, "Domain-0"); 17.309 + if (ret != 0) 17.310 + err(0, "adding device probewatch"); 17.311 + 17.312 ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); 17.313 17.314 while(1) { 17.315 int ret; 17.316 17.317 /* build the poll list */ 17.318 - 17.319 - DPRINTF("Building poll list.\n"); 17.320 - 17.321 pfd_count = 0; 17.322 for ( i=0; i < MAX_POLLFDS; i++ ) { 17.323 pollhook_t *ph = &pollhooks[i]; 17.324 @@ -415,49 +304,31 @@ int blktap_listen(void) 17.325 } 17.326 } 17.327 17.328 - tap_pfd = pfd_count; 17.329 + tap_pfd = pfd_count++; 17.330 pfd[tap_pfd].fd = fd; 17.331 pfd[tap_pfd].events = POLLIN; 17.332 17.333 - DPRINTF("poll() %d fds.\n", pfd_count); 17.334 + store_pfd = pfd_count++; 17.335 + pfd[store_pfd].fd = xs_fileno(h); 17.336 + pfd[store_pfd].events = POLLIN; 17.337 17.338 - if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) { 17.339 + if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) { 17.340 if (DEBUG_RING_IDXS) 17.341 ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS); 17.342 continue; 17.343 } 17.344 17.345 - DPRINTF("poll returned %d\n", ret); 17.346 - 17.347 for (i=0; i < MAX_POLLFDS; i++) { 17.348 if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) ) 17.349 pollhooks[i].func(pollhooks[i].pfd->fd); 17.350 } 17.351 17.352 - if (pfd[tap_pfd].revents) { 17.353 - 17.354 - /* empty the control ring */ 17.355 - rp = ctrl_ring.sring->req_prod; 17.356 - rmb(); 17.357 - for (i = ctrl_ring.req_cons; i < rp; i++) 17.358 - { 17.359 - msg = RING_GET_REQUEST(&ctrl_ring, i); 17.360 + if (pfd[store_pfd].revents) { 17.361 + ret = xs_fire_next_watch(h); 17.362 + } 17.363 17.364 - ctrl_hook = ctrl_hook_chain; 17.365 - while (ctrl_hook != NULL) 17.366 - { 17.367 - DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name); 17.368 - /* We currently don't respond to ctrl messages. */ 17.369 - ctrl_hook->func(msg); 17.370 - ctrl_hook = ctrl_hook->next; 17.371 - } 17.372 - } 17.373 - /* Using this as a unidirectional ring. */ 17.374 - ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i; 17.375 -pthread_mutex_lock(&push_mutex); 17.376 - RING_PUSH_RESPONSES(&ctrl_ring); 17.377 -pthread_mutex_unlock(&push_mutex); 17.378 - 17.379 + if (pfd[tap_pfd].revents) 17.380 + { 17.381 /* empty the fe_ring */ 17.382 notify_fe = 0; 17.383 notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring); 17.384 @@ -465,44 +336,62 @@ pthread_mutex_unlock(&push_mutex); 17.385 rmb(); 17.386 for (i = fe_ring.req_cons; i != rp; i++) 17.387 { 17.388 - int done = 0; /* stop forwarding this request */ 17.389 + int done = 0; 17.390 17.391 req = RING_GET_REQUEST(&fe_ring, i); 17.392 memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req)); 17.393 req = &req_pending[ID_TO_IDX(req->id)]; 17.394 17.395 - DPRINTF("copying an fe request\n"); 17.396 + blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle); 17.397 17.398 - req_hook = request_hook_chain; 17.399 - while (req_hook != NULL) 17.400 + if (blkif != NULL) 17.401 { 17.402 - DPRINTF("REQ_HOOK: %s\n", req_hook->name); 17.403 - switch(req_hook->func(req)) 17.404 + req_hook = blkif->request_hook_chain; 17.405 + while (req_hook != NULL) 17.406 { 17.407 - case BLKTAP_RESPOND: 17.408 - apply_rsp_hooks((blkif_response_t *)req); 17.409 - write_rsp_to_fe_ring((blkif_response_t *)req); 17.410 - notify_fe = 1; 17.411 - done = 1; 17.412 - break; 17.413 - case BLKTAP_STOLEN: 17.414 - done = 1; 17.415 - break; 17.416 - case BLKTAP_PASS: 17.417 - break; 17.418 - default: 17.419 - printf("Unknown request hook return value!\n"); 17.420 + switch(req_hook->func(blkif, req, ((i+1) == rp))) 17.421 + { 17.422 + case BLKTAP_RESPOND: 17.423 + apply_rsp_hooks(blkif, (blkif_response_t *)req); 17.424 + write_rsp_to_fe_ring((blkif_response_t *)req); 17.425 + notify_fe = 1; 17.426 + done = 1; 17.427 + break; 17.428 + case BLKTAP_STOLEN: 17.429 + done = 1; 17.430 + break; 17.431 + case BLKTAP_PASS: 17.432 + break; 17.433 + default: 17.434 + printf("Unknown request hook return value!\n"); 17.435 + } 17.436 + if (done) break; 17.437 + req_hook = req_hook->next; 17.438 } 17.439 - if (done) break; 17.440 - req_hook = req_hook->next; 17.441 } 17.442 17.443 - if (done == 0) write_req_to_be_ring(req); 17.444 + if (done == 0) 17.445 + { 17.446 + /* this was: */ 17.447 + /* write_req_to_be_ring(req); */ 17.448 + 17.449 + unsigned long id = req->id; 17.450 + unsigned short operation = req->operation; 17.451 + printf("Unterminated request!\n"); 17.452 + rsp = (blkif_response_t *)req; 17.453 + rsp->id = id; 17.454 + rsp->operation = operation; 17.455 + rsp->status = BLKIF_RSP_ERROR; 17.456 + write_rsp_to_fe_ring(rsp); 17.457 + notify_fe = 1; 17.458 + done = 1; 17.459 + } 17.460 17.461 } 17.462 fe_ring.req_cons = i; 17.463 17.464 /* empty the be_ring */ 17.465 +/* 17.466 notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring); 17.467 rp = be_ring.sring->rsp_prod; 17.468 rmb(); 17.469 @@ -519,9 +408,9 @@ pthread_mutex_unlock(&push_mutex); 17.470 write_rsp_to_fe_ring(rsp); 17.471 } 17.472 be_ring.rsp_cons = i; 17.473 - 17.474 +*/ 17.475 /* notify the domains */ 17.476 - 17.477 +/* 17.478 if (notify_be) { 17.479 DPRINTF("notifying be\n"); 17.480 pthread_mutex_lock(&push_mutex); 17.481 @@ -529,13 +418,13 @@ pthread_mutex_lock(&push_mutex); 17.482 ioctl(fd, BLKTAP_IOCTL_KICK_BE); 17.483 pthread_mutex_unlock(&push_mutex); 17.484 } 17.485 - 17.486 +*/ 17.487 if (notify_fe) { 17.488 DPRINTF("notifying fe\n"); 17.489 -pthread_mutex_lock(&push_mutex); 17.490 + pthread_mutex_lock(&push_mutex); 17.491 RING_PUSH_RESPONSES(&fe_ring); 17.492 ioctl(fd, BLKTAP_IOCTL_KICK_FE); 17.493 -pthread_mutex_unlock(&push_mutex); 17.494 + pthread_mutex_unlock(&push_mutex); 17.495 } 17.496 } 17.497 }
18.1 --- a/tools/blktap/blktaplib.h Sun Sep 04 15:08:16 2005 +0000 18.2 +++ b/tools/blktap/blktaplib.h Sun Sep 04 21:19:44 2005 +0000 18.3 @@ -2,6 +2,9 @@ 18.4 * 18.5 * userland accessors to the block tap. 18.6 * 18.7 + * Sept 2/05 -- I'm scaling this back to only support block remappings 18.8 + * to user in a backend domain. Passthrough and interposition can be readded 18.9 + * once transitive grants are available. 18.10 */ 18.11 18.12 #ifndef __BLKTAPLIB_H__ 18.13 @@ -13,6 +16,7 @@ 18.14 #include <xen/io/blkif.h> 18.15 #include <xen/io/ring.h> 18.16 #include <xen/io/domain_controller.h> 18.17 +#include <xs.h> 18.18 18.19 /* /dev/xen/blktap resides at device number major=10, minor=202 */ 18.20 #define BLKTAP_MINOR 202 18.21 @@ -49,12 +53,18 @@ static inline int BLKTAP_MODE_VALID(unsi 18.22 return ( 18.23 ( arg == BLKTAP_MODE_PASSTHROUGH ) || 18.24 ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 18.25 + ( arg == BLKTAP_MODE_INTERPOSE ) ); 18.26 +/* 18.27 + return ( 18.28 + ( arg == BLKTAP_MODE_PASSTHROUGH ) || 18.29 + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 18.30 ( arg == BLKTAP_MODE_INTERCEPT_BE ) || 18.31 ( arg == BLKTAP_MODE_INTERPOSE ) || 18.32 ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || 18.33 ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || 18.34 ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) 18.35 ); 18.36 +*/ 18.37 } 18.38 18.39 /* Return values for handling messages in hooks. */ 18.40 @@ -62,30 +72,89 @@ static inline int BLKTAP_MODE_VALID(unsi 18.41 #define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */ 18.42 #define BLKTAP_STOLEN 2 /* Hook has stolen request. */ 18.43 18.44 -#define domid_t unsigned short 18.45 +//#define domid_t unsigned short 18.46 18.47 inline unsigned int ID_TO_IDX(unsigned long id); 18.48 inline domid_t ID_TO_DOM(unsigned long id); 18.49 18.50 -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); 18.51 -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); 18.52 -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); 18.53 -void blktap_inject_response(blkif_response_t *); 18.54 int blktap_attach_poll(int fd, short events, int (*func)(int)); 18.55 void blktap_detach_poll(int fd); 18.56 int blktap_listen(void); 18.57 18.58 +struct blkif; 18.59 + 18.60 +typedef struct request_hook_st { 18.61 + char *name; 18.62 + int (*func)(struct blkif *, blkif_request_t *, int); 18.63 + struct request_hook_st *next; 18.64 +} request_hook_t; 18.65 + 18.66 +typedef struct response_hook_st { 18.67 + char *name; 18.68 + int (*func)(struct blkif *, blkif_response_t *, int); 18.69 + struct response_hook_st *next; 18.70 +} response_hook_t; 18.71 + 18.72 +struct blkif_ops { 18.73 + long int (*get_size)(struct blkif *blkif); 18.74 + long int (*get_secsize)(struct blkif *blkif); 18.75 + unsigned (*get_info)(struct blkif *blkif); 18.76 +}; 18.77 + 18.78 +typedef struct blkif { 18.79 + domid_t domid; 18.80 + long int handle; 18.81 + 18.82 + long int pdev; 18.83 + long int readonly; 18.84 + 18.85 + enum { DISCONNECTED, CONNECTED } state; 18.86 + 18.87 + struct blkif_ops *ops; 18.88 + request_hook_t *request_hook_chain; 18.89 + response_hook_t *response_hook_chain; 18.90 + 18.91 + struct blkif *hash_next; 18.92 + 18.93 + void *prv; /* device-specific data */ 18.94 +} blkif_t; 18.95 + 18.96 +void register_new_blkif_hook(int (*fn)(blkif_t *blkif)); 18.97 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); 18.98 +blkif_t *alloc_blkif(domid_t domid); 18.99 +int blkif_init(blkif_t *blkif, long int handle, long int pdev, 18.100 + long int readonly); 18.101 +void free_blkif(blkif_t *blkif); 18.102 +void __init_blkif(void); 18.103 + 18.104 + 18.105 +/* xenstore/xenbus: */ 18.106 +extern int add_blockdevice_probe_watch(struct xs_handle *h, 18.107 + const char *domname); 18.108 +int xs_fire_next_watch(struct xs_handle *h); 18.109 + 18.110 + 18.111 +void blkif_print_hooks(blkif_t *blkif); 18.112 +void blkif_register_request_hook(blkif_t *blkif, char *name, 18.113 + int (*rh)(blkif_t *, blkif_request_t *, int)); 18.114 +void blkif_register_response_hook(blkif_t *blkif, char *name, 18.115 + int (*rh)(blkif_t *, blkif_response_t *, int)); 18.116 +void blkif_inject_response(blkif_t *blkif, blkif_response_t *); 18.117 +void blktap_kick_responses(void); 18.118 + 18.119 +/* this must match the underlying driver... */ 18.120 +#define MAX_PENDING_REQS 64 18.121 + 18.122 /* Accessing attached data page mappings */ 18.123 -#define MMAP_PAGES_PER_REQUEST \ 18.124 - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) 18.125 -#define MMAP_VADDR(_req,_seg) \ 18.126 - (mmap_vstart + \ 18.127 - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 18.128 +#define MMAP_PAGES \ 18.129 + (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) 18.130 +#define MMAP_VADDR(_req,_seg) \ 18.131 + (mmap_vstart + \ 18.132 + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ 18.133 ((_seg) * PAGE_SIZE)) 18.134 18.135 extern unsigned long mmap_vstart; 18.136 18.137 - 18.138 /* Defines that are only used by library clients */ 18.139 18.140 #ifndef __COMPILING_BLKTAP_LIB 18.141 @@ -93,7 +162,6 @@ extern unsigned long mmap_vstart; 18.142 static char *blkif_op_name[] = { 18.143 [BLKIF_OP_READ] = "READ", 18.144 [BLKIF_OP_WRITE] = "WRITE", 18.145 - [BLKIF_OP_PROBE] = "PROBE", 18.146 }; 18.147 18.148 #endif /* __COMPILING_BLKTAP_LIB */
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/tools/blktap/list.h Sun Sep 04 21:19:44 2005 +0000 19.3 @@ -0,0 +1,55 @@ 19.4 +/* 19.5 + * list.h 19.6 + * 19.7 + * This is a subset of linux's list.h intended to be used in user-space. 19.8 + * 19.9 + */ 19.10 + 19.11 +#ifndef __LIST_H__ 19.12 +#define __LIST_H__ 19.13 + 19.14 +#define LIST_POISON1 ((void *) 0x00100100) 19.15 +#define LIST_POISON2 ((void *) 0x00200200) 19.16 + 19.17 +struct list_head { 19.18 + struct list_head *next, *prev; 19.19 +}; 19.20 + 19.21 +#define LIST_HEAD_INIT(name) { &(name), &(name) } 19.22 + 19.23 +#define LIST_HEAD(name) \ 19.24 + struct list_head name = LIST_HEAD_INIT(name) 19.25 + 19.26 +static inline void __list_add(struct list_head *new, 19.27 + struct list_head *prev, 19.28 + struct list_head *next) 19.29 +{ 19.30 + next->prev = new; 19.31 + new->next = next; 19.32 + new->prev = prev; 19.33 + prev->next = new; 19.34 +} 19.35 + 19.36 +static inline void list_add(struct list_head *new, struct list_head *head) 19.37 +{ 19.38 + __list_add(new, head, head->next); 19.39 +} 19.40 +static inline void __list_del(struct list_head * prev, struct list_head * next) 19.41 +{ 19.42 + next->prev = prev; 19.43 + prev->next = next; 19.44 +} 19.45 +static inline void list_del(struct list_head *entry) 19.46 +{ 19.47 + __list_del(entry->prev, entry->next); 19.48 + entry->next = LIST_POISON1; 19.49 + entry->prev = LIST_POISON2; 19.50 +} 19.51 +#define list_entry(ptr, type, member) \ 19.52 + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) 19.53 +#define list_for_each_entry(pos, head, member) \ 19.54 + for (pos = list_entry((head)->next, typeof(*pos), member); \ 19.55 + &pos->member != (head); \ 19.56 + pos = list_entry(pos->member.next, typeof(*pos), member)) 19.57 + 19.58 +#endif /* __LIST_H__ */
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/tools/blktap/ublkback/Makefile Sun Sep 04 21:19:44 2005 +0000 20.3 @@ -0,0 +1,42 @@ 20.4 + 20.5 +XEN_ROOT = ../../.. 20.6 +include $(XEN_ROOT)/tools/Rules.mk 20.7 + 20.8 +INCLUDES += -I.. 20.9 + 20.10 +INSTALL = install 20.11 +INSTALL_PROG = $(INSTALL) -m0755 20.12 +IBIN = ublkback 20.13 +INSTALL_DIR = /usr/sbin 20.14 + 20.15 +CFLAGS += -Wall 20.16 +CFLAGS += -Werror 20.17 +CFLAGS += -Wno-unused 20.18 +#CFLAGS += -O3 20.19 +CFLAGS += -g3 20.20 +CFLAGS += -fno-strict-aliasing 20.21 +CFLAGS += -I $(XEN_LIBXC) 20.22 +CFLAGS += $(INCLUDES) -I. 20.23 +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE 20.24 +# Get gcc to generate the dependencies for us. 20.25 +CFLAGS += -Wp,-MD,.$(@F).d 20.26 +DEPS = .*.d 20.27 + 20.28 +OBJS = $(patsubst %.c,%.o,$(SRCS)) 20.29 + 20.30 +all: $(IBIN) 20.31 + 20.32 +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) 20.33 + 20.34 +install: 20.35 + $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR) 20.36 +clean: 20.37 + rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN) 20.38 + 20.39 +ublkback: 20.40 + $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L.. \ 20.41 + -lblktap -laio ublkback.c ublkbacklib.c -pg 20.42 + 20.43 +.PHONY: clean install 20.44 + 20.45 +-include $(DEPS)
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/tools/blktap/ublkback/ublkback.c Sun Sep 04 21:19:44 2005 +0000 21.3 @@ -0,0 +1,18 @@ 21.4 +/* ublkback.c 21.5 + * 21.6 + * libaio-based userlevel backend. 21.7 + */ 21.8 + 21.9 +#include "blktaplib.h" 21.10 +#include "ublkbacklib.h" 21.11 + 21.12 + 21.13 +int main(int argc, char *argv[]) 21.14 +{ 21.15 + ublkback_init(); 21.16 + 21.17 + register_new_blkif_hook(ublkback_new_blkif); 21.18 + blktap_listen(); 21.19 + 21.20 + return 0; 21.21 +}
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 22.2 +++ b/tools/blktap/ublkback/ublkbacklib.c Sun Sep 04 21:19:44 2005 +0000 22.3 @@ -0,0 +1,477 @@ 22.4 +/* ublkbacklib.c 22.5 + * 22.6 + * file/device image-backed block device -- using linux libaio. 22.7 + * 22.8 + * (c) 2004 Andrew Warfield. 22.9 + * 22.10 + * Xend has been modified to use an amorfs:[fsid] disk tag. 22.11 + * This will show up as device type (maj:240,min:0) = 61440. 22.12 + * 22.13 + * The fsid is placed in the sec_start field of the disk extent. 22.14 + * 22.15 + * NOTE: This doesn't work. Grrr. 22.16 + */ 22.17 + 22.18 +#define _GNU_SOURCE 22.19 +#define __USE_LARGEFILE64 22.20 + 22.21 +#include <stdio.h> 22.22 +#include <stdlib.h> 22.23 +#include <fcntl.h> 22.24 +#include <string.h> 22.25 +#include <db.h> 22.26 +#include <sys/stat.h> 22.27 +#include <sys/types.h> 22.28 +#include <sys/poll.h> 22.29 +#include <unistd.h> 22.30 +#include <errno.h> 22.31 +#include <libaio.h> 22.32 +#include <pthread.h> 22.33 +#include <time.h> 22.34 +#include <err.h> 22.35 +#include "blktaplib.h" 22.36 + 22.37 +/* XXXX: */ 22.38 +/* Current code just mounts this file/device to any requests that come in. */ 22.39 +//#define TMP_IMAGE_FILE_NAME "/dev/sda1" 22.40 +#define TMP_IMAGE_FILE_NAME "fc3.image" 22.41 + 22.42 +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 22.43 +#define MAX_SEGMENTS_PER_REQ 11 22.44 +#define SECTOR_SHIFT 9 22.45 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) 22.46 + 22.47 +#if 0 22.48 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 22.49 +#else 22.50 +#define DPRINTF(_f, _a...) ((void)0) 22.51 +#endif 22.52 + 22.53 +#if 1 22.54 +#define ASSERT(_p) \ 22.55 + if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ 22.56 + __LINE__, __FILE__); *(int*)0=0; } 22.57 +#else 22.58 +#define ASSERT(_p) ((void)0) 22.59 +#endif 22.60 + 22.61 +/* Note on pending_reqs: I assume all reqs are queued before they start to 22.62 + * get filled. so count of 0 is an unused record. 22.63 + */ 22.64 +typedef struct { 22.65 + blkif_request_t req; 22.66 + blkif_t *blkif; 22.67 + int count; 22.68 +} pending_req_t; 22.69 + 22.70 +static pending_req_t pending_list[MAX_REQUESTS]; 22.71 +static io_context_t ctx; 22.72 +static struct iocb *iocb_free[MAX_AIO_REQS]; 22.73 +static int iocb_free_count; 22.74 + 22.75 +/* ---[ Notification mecahnism ]--------------------------------------- */ 22.76 + 22.77 +enum { 22.78 + READ = 0, 22.79 + WRITE = 1 22.80 +}; 22.81 + 22.82 +static int aio_notify[2]; 22.83 +static volatile int aio_listening = 0; 22.84 +static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER; 22.85 + 22.86 +static struct io_event aio_events[MAX_AIO_REQS]; 22.87 +static int aio_event_count = 0; 22.88 + 22.89 +/* this is commented out in libaio.h for some reason. */ 22.90 +extern int io_queue_wait(io_context_t ctx, struct timespec *timeout); 22.91 + 22.92 +static void *notifier_thread(void *arg) 22.93 +{ 22.94 + int ret; 22.95 + int msg = 0x00feeb00; 22.96 + 22.97 + DPRINTF("Notifier thread started.\n"); 22.98 + for (;;) { 22.99 + pthread_mutex_lock(¬ifier_sem); 22.100 + if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) { 22.101 + aio_event_count = ret; 22.102 + write(aio_notify[WRITE], &msg, sizeof(msg)); 22.103 + } else { 22.104 + printf("[io_queue_wait error! %d]\n", errno); 22.105 + pthread_mutex_unlock(¬ifier_sem); 22.106 + } 22.107 + } 22.108 +} 22.109 + 22.110 +/* --- Talking to xenstore: ------------------------------------------- */ 22.111 + 22.112 +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done); 22.113 +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done); 22.114 + 22.115 +typedef struct image { 22.116 + /* These need to turn into an array/rbtree for multi-disk support. */ 22.117 + int fd; 22.118 + u64 fsid; 22.119 + blkif_vdev_t vdevice; 22.120 + long int size; 22.121 + long int secsize; 22.122 + long int info; 22.123 +} image_t; 22.124 + 22.125 +long int ublkback_get_size(blkif_t *blkif) 22.126 +{ 22.127 + image_t *img = (image_t *)blkif->prv; 22.128 + return img->size; 22.129 +} 22.130 + 22.131 +long int ublkback_get_secsize(blkif_t *blkif) 22.132 +{ 22.133 + image_t *img = (image_t *)blkif->prv; 22.134 + return img->secsize; 22.135 +} 22.136 + 22.137 +unsigned ublkback_get_info(blkif_t *blkif) 22.138 +{ 22.139 + image_t *img = (image_t *)blkif->prv; 22.140 + return img->info; 22.141 +} 22.142 + 22.143 +static struct blkif_ops ublkback_ops = { 22.144 + get_size: ublkback_get_size, 22.145 + get_secsize: ublkback_get_secsize, 22.146 + get_info: ublkback_get_info, 22.147 +}; 22.148 + 22.149 +int ublkback_new_blkif(blkif_t *blkif) 22.150 +{ 22.151 + image_t *image; 22.152 + struct stat stat; 22.153 + int ret; 22.154 + 22.155 + image = (image_t *)malloc(sizeof(image_t)); 22.156 + if (image == NULL) { 22.157 + printf("error allocating image record.\n"); 22.158 + return -ENOMEM; 22.159 + } 22.160 + 22.161 + /* Open it. */ 22.162 + image->fd = open(TMP_IMAGE_FILE_NAME, 22.163 + O_RDWR | O_DIRECT | O_LARGEFILE); 22.164 + 22.165 + if ((image->fd < 0) && (errno == EINVAL)) { 22.166 + /* Maybe O_DIRECT isn't supported. */ 22.167 + warn("open() failed on '%s', trying again without O_DIRECT", 22.168 + TMP_IMAGE_FILE_NAME); 22.169 + image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE); 22.170 + } 22.171 + 22.172 + if (image->fd < 0) { 22.173 + warn("Couldn't open image file!"); 22.174 + free(image); 22.175 + return -EINVAL; 22.176 + } 22.177 + 22.178 + /* Size it. */ 22.179 + ret = fstat(image->fd, &stat); 22.180 + if (ret != 0) { 22.181 + printf("Couldn't stat image in PROBE!"); 22.182 + return -EINVAL; 22.183 + } 22.184 + 22.185 + image->size = (stat.st_size >> SECTOR_SHIFT); 22.186 + 22.187 + /* TODO: IOCTL to get size of raw device. */ 22.188 +/* 22.189 + ret = ioctl(img->fd, BLKGETSIZE, &blksize); 22.190 + if (ret != 0) { 22.191 + printf("Couldn't ioctl image in PROBE!\n"); 22.192 + goto err; 22.193 + } 22.194 +*/ 22.195 + if (image->size == 0) 22.196 + image->size =((u64) 16836057); 22.197 + image->secsize = 512; 22.198 + image->info = 0; 22.199 + 22.200 + /* Register the hooks */ 22.201 + blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request); 22.202 + blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response); 22.203 + 22.204 + 22.205 + printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 22.206 + blkif->pdev, TMP_IMAGE_FILE_NAME); 22.207 + 22.208 + blkif->ops = &ublkback_ops; 22.209 + blkif->prv = (void *)image; 22.210 + 22.211 + return 0; 22.212 +} 22.213 + 22.214 + 22.215 +/* --- Moving the bits: ----------------------------------------------- */ 22.216 + 22.217 +static int batch_count = 0; 22.218 +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done) 22.219 +{ 22.220 + int fd; 22.221 + u64 sector; 22.222 + char *spage, *dpage; 22.223 + int ret, i, idx; 22.224 + blkif_response_t *rsp; 22.225 + domid_t dom = ID_TO_DOM(req->id); 22.226 + static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 22.227 + static int io_idx = 0; 22.228 + struct iocb *io; 22.229 + image_t *img; 22.230 + 22.231 + img = (image_t *)blkif->prv; 22.232 + fd = img->fd; 22.233 + 22.234 + switch (req->operation) 22.235 + { 22.236 + case BLKIF_OP_WRITE: 22.237 + { 22.238 + unsigned long size; 22.239 + 22.240 + 22.241 + batch_count++; 22.242 + 22.243 + idx = ID_TO_IDX(req->id); 22.244 + ASSERT(pending_list[idx].count == 0); 22.245 + memcpy(&pending_list[idx].req, req, sizeof(*req)); 22.246 + pending_list[idx].count = req->nr_segments; 22.247 + pending_list[idx].blkif = blkif; 22.248 + 22.249 + for (i = 0; i < req->nr_segments; i++) { 22.250 + 22.251 + sector = req->sector_number + (8*i); 22.252 + 22.253 + size = blkif_last_sect (req->frame_and_sects[i]) - 22.254 + blkif_first_sect(req->frame_and_sects[i]) + 1; 22.255 + 22.256 + if (blkif_first_sect(req->frame_and_sects[i]) != 0) 22.257 + DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 22.258 + req->sector_number, sector, 22.259 + blkif_first_sect(req->frame_and_sects[i]), 22.260 + blkif_last_sect (req->frame_and_sects[i]), 22.261 + (long)(sector << SECTOR_SHIFT)); 22.262 + 22.263 + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 22.264 + spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 22.265 + 22.266 + /*convert size and sector to byte offsets */ 22.267 + size <<= SECTOR_SHIFT; 22.268 + sector <<= SECTOR_SHIFT; 22.269 + 22.270 + io = iocb_free[--iocb_free_count]; 22.271 + io_prep_pwrite(io, fd, spage, size, sector); 22.272 + io->data = (void *)idx; 22.273 + //ioq[i] = io; 22.274 + ioq[io_idx++] = io; 22.275 + } 22.276 + 22.277 + if (batch_done) { 22.278 + ret = io_submit(ctx, io_idx, ioq); 22.279 + batch_count = 0; 22.280 + if (ret < 0) 22.281 + printf("BADNESS: io_submit error! (%d)\n", errno); 22.282 + io_idx = 0; 22.283 + } 22.284 + 22.285 + return BLKTAP_STOLEN; 22.286 + 22.287 + } 22.288 + case BLKIF_OP_READ: 22.289 + { 22.290 + unsigned long size; 22.291 + 22.292 + batch_count++; 22.293 + idx = ID_TO_IDX(req->id); 22.294 + ASSERT(pending_list[idx].count == 0); 22.295 + memcpy(&pending_list[idx].req, req, sizeof(*req)); 22.296 + pending_list[idx].count = req->nr_segments; 22.297 + pending_list[idx].blkif = blkif; 22.298 + 22.299 + for (i = 0; i < req->nr_segments; i++) { 22.300 + 22.301 + sector = req->sector_number + (8*i); 22.302 + 22.303 + size = blkif_last_sect (req->frame_and_sects[i]) - 22.304 + blkif_first_sect(req->frame_and_sects[i]) + 1; 22.305 + 22.306 + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 22.307 + dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 22.308 + 22.309 + if (blkif_first_sect(req->frame_and_sects[i]) != 0) 22.310 + DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) " 22.311 + "pos: %15lu dpage: %p\n", 22.312 + req->sector_number, sector, 22.313 + blkif_first_sect(req->frame_and_sects[i]), 22.314 + blkif_last_sect (req->frame_and_sects[i]), 22.315 + (long)(sector << SECTOR_SHIFT), dpage); 22.316 + 22.317 + /*convert size and sector to byte offsets */ 22.318 + size <<= SECTOR_SHIFT; 22.319 + sector <<= SECTOR_SHIFT; 22.320 + 22.321 + 22.322 + /* 22.323 + * NB: Looks like AIO now has non-page aligned support, this path 22.324 + * can probably be removed... Only really used for hunting 22.325 + * superblocks anyway... ;) 22.326 + */ 22.327 + if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) { 22.328 + /* AIO to raw devices must be page aligned, so do this read 22.329 + * synchronously. The OS is probably just looking for 22.330 + * a superblock or something, so this won't hurt performance. 22.331 + */ 22.332 + int ret; 22.333 + 22.334 + printf("Slow path block read.\n"); 22.335 + /* Question: do in-progress aio ops modify the file cursor? */ 22.336 + ret = lseek(fd, sector, SEEK_SET); 22.337 + if (ret == (off_t)-1) 22.338 + printf("lseek failed!\n"); 22.339 + ret = read(fd, dpage, size); 22.340 + if (ret < 0) 22.341 + printf("read problem (%d)\n", ret); 22.342 + printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret); 22.343 + 22.344 + /* not an async request any more... */ 22.345 + pending_list[idx].count--; 22.346 + 22.347 + rsp = (blkif_response_t *)req; 22.348 + rsp->id = req->id; 22.349 + rsp->operation = BLKIF_OP_READ; 22.350 + rsp->status = BLKIF_RSP_OKAY; 22.351 + return BLKTAP_RESPOND; 22.352 + /* Doh -- need to flush aio if this is end-of-batch */ 22.353 + } 22.354 + 22.355 + io = iocb_free[--iocb_free_count]; 22.356 + 22.357 + io_prep_pread(io, fd, dpage, size, sector); 22.358 + io->data = (void *)idx; 22.359 + 22.360 + ioq[io_idx++] = io; 22.361 + //ioq[i] = io; 22.362 + } 22.363 + 22.364 + if (batch_done) { 22.365 + ret = io_submit(ctx, io_idx, ioq); 22.366 + batch_count = 0; 22.367 + if (ret < 0) 22.368 + printf("BADNESS: io_submit error! (%d)\n", errno); 22.369 + io_idx = 0; 22.370 + } 22.371 + 22.372 + return BLKTAP_STOLEN; 22.373 + 22.374 + } 22.375 + } 22.376 + 22.377 + printf("Unknown block operation!\n"); 22.378 +err: 22.379 + rsp = (blkif_response_t *)req; 22.380 + rsp->id = req->id; 22.381 + rsp->operation = req->operation; 22.382 + rsp->status = BLKIF_RSP_ERROR; 22.383 + return BLKTAP_RESPOND; 22.384 +} 22.385 + 22.386 + 22.387 +int ublkback_pollhook(int fd) 22.388 +{ 22.389 + struct io_event *ep; 22.390 + int n, ret, idx; 22.391 + blkif_request_t *req; 22.392 + blkif_response_t *rsp; 22.393 + int responses_queued = 0; 22.394 + int pages=0; 22.395 + 22.396 + for (ep = aio_events; aio_event_count-- > 0; ep++) { 22.397 + struct iocb *io = ep->obj; 22.398 + idx = (int) ep->data; 22.399 + 22.400 + if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ 22.401 + printf("invalid index returned(%u)!\n", idx); 22.402 + break; 22.403 + } 22.404 + 22.405 + if ((int)ep->res < 0) 22.406 + printf("***\n***aio request error! (%d,%d)\n***\n", 22.407 + (int)ep->res, (int)ep->res2); 22.408 + 22.409 + pending_list[idx].count--; 22.410 + iocb_free[iocb_free_count++] = io; 22.411 + pages++; 22.412 + 22.413 + if (pending_list[idx].count == 0) { 22.414 + blkif_request_t tmp = pending_list[idx].req; 22.415 + rsp = (blkif_response_t *)&pending_list[idx].req; 22.416 + rsp->id = tmp.id; 22.417 + rsp->operation = tmp.operation; 22.418 + rsp->status = BLKIF_RSP_OKAY; 22.419 + blkif_inject_response(pending_list[idx].blkif, rsp); 22.420 + responses_queued++; 22.421 + } 22.422 + } 22.423 + 22.424 + if (responses_queued) { 22.425 + blktap_kick_responses(); 22.426 + } 22.427 + 22.428 + read(aio_notify[READ], &idx, sizeof(idx)); 22.429 + aio_listening = 1; 22.430 + pthread_mutex_unlock(¬ifier_sem); 22.431 + 22.432 + return 0; 22.433 +} 22.434 + 22.435 +/* the image library terminates the request stream. _resp is a noop. */ 22.436 +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done) 22.437 +{ 22.438 + return BLKTAP_PASS; 22.439 +} 22.440 + 22.441 +void ublkback_init(void) 22.442 +{ 22.443 + int i, rc; 22.444 + pthread_t p; 22.445 + 22.446 + for (i = 0; i < MAX_REQUESTS; i++) 22.447 + pending_list[i].count = 0; 22.448 + 22.449 + memset(&ctx, 0, sizeof(ctx)); 22.450 + rc = io_queue_init(MAX_AIO_REQS, &ctx); 22.451 + if (rc != 0) { 22.452 + printf("queue_init failed! (%d)\n", rc); 22.453 + exit(0); 22.454 + } 22.455 + 22.456 + for (i=0; i<MAX_AIO_REQS; i++) { 22.457 + if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) { 22.458 + printf("error allocating iocb array\n"); 22.459 + exit(0); 22.460 + } 22.461 + iocb_free_count = i; 22.462 + } 22.463 + 22.464 + rc = pipe(aio_notify); 22.465 + if (rc != 0) { 22.466 + printf("pipe failed! (%d)\n", errno); 22.467 + exit(0); 22.468 + } 22.469 + 22.470 + rc = pthread_create(&p, NULL, notifier_thread, NULL); 22.471 + if (rc != 0) { 22.472 + printf("pthread_create failed! (%d)\n", errno); 22.473 + exit(0); 22.474 + } 22.475 + 22.476 + aio_listening = 1; 22.477 + 22.478 + blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook); 22.479 +} 22.480 +
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 23.2 +++ b/tools/blktap/ublkback/ublkbacklib.h Sun Sep 04 21:19:44 2005 +0000 23.3 @@ -0,0 +1,16 @@ 23.4 +/* blkaiolib.h 23.5 + * 23.6 + * aio image-backed block device. 23.7 + * 23.8 + * (c) 2004 Andrew Warfield. 23.9 + * 23.10 + * Xend has been modified to use an amorfs:[fsid] disk tag. 23.11 + * This will show up as device type (maj:240,min:0) = 61440. 23.12 + * 23.13 + * The fsid is placed in the sec_start field of the disk extent. 23.14 + */ 23.15 + 23.16 +int ublkback_request(blkif_request_t *req, int batch_done); 23.17 +int ublkback_response(blkif_response_t *rsp); /* noop */ 23.18 +int ublkback_new_blkif(blkif_t *blkif); 23.19 +void ublkback_init(void);
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 24.2 +++ b/tools/blktap/xenbus.c Sun Sep 04 21:19:44 2005 +0000 24.3 @@ -0,0 +1,578 @@ 24.4 +/* 24.5 + * xenbus.c 24.6 + * 24.7 + * xenbus interface to the blocktap. 24.8 + * 24.9 + * this handles the top-half of integration with block devices through the 24.10 + * store -- the tap driver negotiates the device channel etc, while the 24.11 + * userland tap clinet needs to sort out the disk parameters etc. 24.12 + * 24.13 + * A. Warfield 2005 Based primarily on the blkback and xenbus driver code. 24.14 + * Comments there apply here... 24.15 + */ 24.16 + 24.17 +#include <stdio.h> 24.18 +#include <stdlib.h> 24.19 +#include <string.h> 24.20 +#include <err.h> 24.21 +#include <stdarg.h> 24.22 +#include <errno.h> 24.23 +#include <xs.h> 24.24 +#include <sys/types.h> 24.25 +#include <sys/stat.h> 24.26 +#include <fcntl.h> 24.27 +#include <poll.h> 24.28 +#include "blktaplib.h" 24.29 +#include "list.h" 24.30 + 24.31 +#if 0 24.32 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 24.33 +#else 24.34 +#define DPRINTF(_f, _a...) ((void)0) 24.35 +#endif 24.36 + 24.37 +/* --- Xenstore / Xenbus helpers ---------------------------------------- */ 24.38 +/* 24.39 + * These should all be pulled out into the xenstore API. I'm faulting commands 24.40 + * in from the xenbus interface as i need them. 24.41 + */ 24.42 + 24.43 + 24.44 +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ 24.45 +int xs_gather(struct xs_handle *xs, const char *dir, ...) 24.46 +{ 24.47 + va_list ap; 24.48 + const char *name; 24.49 + char *path; 24.50 + int ret = 0; 24.51 + 24.52 + va_start(ap, dir); 24.53 + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { 24.54 + const char *fmt = va_arg(ap, char *); 24.55 + void *result = va_arg(ap, void *); 24.56 + char *p; 24.57 + 24.58 + if (asprintf(&path, "%s/%s", dir, name) == -1) 24.59 + { 24.60 + warn("allocation error in xs_gather!\n"); 24.61 + ret = ENOMEM; 24.62 + break; 24.63 + } 24.64 + p = xs_read(xs, path, NULL); 24.65 + free(path); 24.66 + if (p == NULL) { 24.67 + ret = ENOENT; 24.68 + break; 24.69 + } 24.70 + if (fmt) { 24.71 + if (sscanf(p, fmt, result) == 0) 24.72 + ret = EINVAL; 24.73 + free(p); 24.74 + } else 24.75 + *(char **)result = p; 24.76 + } 24.77 + va_end(ap); 24.78 + return ret; 24.79 +} 24.80 + 24.81 +/* Single printf and write: returns -errno or 0. */ 24.82 +int xs_printf(struct xs_handle *h, const char *dir, const char *node, 24.83 + const char *fmt, ...) 24.84 +{ 24.85 + char *buf, *path; 24.86 + va_list ap; 24.87 + int ret; 24.88 + 24.89 + va_start(ap, fmt); 24.90 + ret = vasprintf(&buf, fmt, ap); 24.91 + va_end(ap); 24.92 + 24.93 + asprintf(&path, "%s/%s", dir, node); 24.94 + 24.95 + if ((path == NULL) || (buf == NULL)) 24.96 + return 0; 24.97 + 24.98 + ret = xs_write(h, path, buf, strlen(buf)+1, O_CREAT); 24.99 + 24.100 + free(buf); 24.101 + free(path); 24.102 + 24.103 + return ret; 24.104 +} 24.105 + 24.106 + 24.107 +int xs_exists(struct xs_handle *h, const char *path) 24.108 +{ 24.109 + char **d; 24.110 + int num; 24.111 + 24.112 + d = xs_directory(h, path, &num); 24.113 + if (d == NULL) 24.114 + return 0; 24.115 + free(d); 24.116 + return 1; 24.117 +} 24.118 + 24.119 + 24.120 + 24.121 +/* This assumes that the domain name we are looking for is unique! */ 24.122 +char *get_dom_uuid(struct xs_handle *h, const char *name) 24.123 +{ 24.124 + char **e, *val, *uuid = NULL; 24.125 + int num, i, len; 24.126 + char *path; 24.127 + 24.128 + e = xs_directory(h, "/domain", &num); 24.129 + 24.130 + i=0; 24.131 + while (i < num) { 24.132 + asprintf(&path, "/domain/%s/name", e[i]); 24.133 + val = xs_read(h, path, &len); 24.134 + free(path); 24.135 + if (val == NULL) 24.136 + continue; 24.137 + if (strcmp(val, name) == 0) { 24.138 + /* match! */ 24.139 + asprintf(&path, "/domain/%s/uuid", e[i]); 24.140 + uuid = xs_read(h, path, &len); 24.141 + free(val); 24.142 + free(path); 24.143 + break; 24.144 + } 24.145 + free(val); 24.146 + i++; 24.147 + } 24.148 + 24.149 + free(e); 24.150 + return uuid; 24.151 +} 24.152 + 24.153 +static int strsep_len(const char *str, char c, unsigned int len) 24.154 +{ 24.155 + unsigned int i; 24.156 + 24.157 + for (i = 0; str[i]; i++) 24.158 + if (str[i] == c) { 24.159 + if (len == 0) 24.160 + return i; 24.161 + len--; 24.162 + } 24.163 + return (len == 0) ? i : -ERANGE; 24.164 +} 24.165 + 24.166 + 24.167 +/* xenbus watches: */ 24.168 +/* Register callback to watch this node. */ 24.169 +struct xenbus_watch 24.170 +{ 24.171 + struct list_head list; 24.172 + char *node; 24.173 + void (*callback)(struct xs_handle *h, 24.174 + struct xenbus_watch *, 24.175 + const char *node); 24.176 +}; 24.177 + 24.178 +static LIST_HEAD(watches); 24.179 + 24.180 +/* A little paranoia: we don't just trust token. */ 24.181 +static struct xenbus_watch *find_watch(const char *token) 24.182 +{ 24.183 + struct xenbus_watch *i, *cmp; 24.184 + 24.185 + cmp = (void *)strtoul(token, NULL, 16); 24.186 + 24.187 + list_for_each_entry(i, &watches, list) 24.188 + if (i == cmp) 24.189 + return i; 24.190 + return NULL; 24.191 +} 24.192 + 24.193 +/* Register callback to watch this node. like xs_watch, return 0 on failure */ 24.194 +int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) 24.195 +{ 24.196 + /* Pointer in ascii is the token. */ 24.197 + char token[sizeof(watch) * 2 + 1]; 24.198 + int er; 24.199 + 24.200 + sprintf(token, "%lX", (long)watch); 24.201 + if (find_watch(token)) 24.202 + { 24.203 + warn("watch collision!"); 24.204 + return -EINVAL; 24.205 + } 24.206 + 24.207 + er = xs_watch(h, watch->node, token); 24.208 + if (er != 0) { 24.209 + list_add(&watch->list, &watches); 24.210 + } 24.211 + 24.212 + return er; 24.213 +} 24.214 + 24.215 +int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) 24.216 +{ 24.217 + char token[sizeof(watch) * 2 + 1]; 24.218 + int er; 24.219 + 24.220 + sprintf(token, "%lX", (long)watch); 24.221 + if (!find_watch(token)) 24.222 + { 24.223 + warn("no such watch!"); 24.224 + return -EINVAL; 24.225 + } 24.226 + 24.227 + 24.228 + er = xs_unwatch(h, watch->node, token); 24.229 + list_del(&watch->list); 24.230 + 24.231 + if (er == 0) 24.232 + warn("XENBUS Failed to release watch %s: %i", 24.233 + watch->node, er); 24.234 + return 0; 24.235 +} 24.236 + 24.237 +/* Re-register callbacks to all watches. */ 24.238 +void reregister_xenbus_watches(struct xs_handle *h) 24.239 +{ 24.240 + struct xenbus_watch *watch; 24.241 + char token[sizeof(watch) * 2 + 1]; 24.242 + 24.243 + list_for_each_entry(watch, &watches, list) { 24.244 + sprintf(token, "%lX", (long)watch); 24.245 + xs_watch(h, watch->node, token); 24.246 + } 24.247 +} 24.248 + 24.249 +/* based on watch_thread() */ 24.250 +int xs_fire_next_watch(struct xs_handle *h) 24.251 +{ 24.252 + char **res; 24.253 + char *token; 24.254 + char *node = NULL; 24.255 + struct xenbus_watch *w; 24.256 + int er; 24.257 + 24.258 + res = xs_read_watch(h); 24.259 + if (res == NULL) 24.260 + return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */ 24.261 + 24.262 + node = res[0]; 24.263 + token = res[1]; 24.264 + 24.265 + er = xs_acknowledge_watch(h, token); 24.266 + if (er == 0) 24.267 + warn("Couldn't acknowledge watch (%s)", token); 24.268 + 24.269 + w = find_watch(token); 24.270 + if (!w) 24.271 + { 24.272 + warn("unregistered watch fired"); 24.273 + goto done; 24.274 + } 24.275 + w->callback(h, w, node); 24.276 + 24.277 + done: 24.278 + free(res); 24.279 + return 1; 24.280 +} 24.281 + 24.282 + 24.283 + 24.284 + 24.285 +/* ---------------------------------------------------------------------- */ 24.286 + 24.287 +struct backend_info 24.288 +{ 24.289 + /* our communications channel */ 24.290 + blkif_t *blkif; 24.291 + 24.292 + long int frontend_id; 24.293 + long int pdev; 24.294 + long int readonly; 24.295 + 24.296 + /* watch back end for changes */ 24.297 + struct xenbus_watch backend_watch; 24.298 + char *backpath; 24.299 + 24.300 + /* watch front end for changes */ 24.301 + struct xenbus_watch watch; 24.302 + char *frontpath; 24.303 + 24.304 + struct list_head list; 24.305 +}; 24.306 + 24.307 +static LIST_HEAD(belist); 24.308 + 24.309 +static struct backend_info *be_lookup_be(const char *bepath) 24.310 +{ 24.311 + struct backend_info *be; 24.312 + 24.313 + list_for_each_entry(be, &belist, list) 24.314 + if (strcmp(bepath, be->backpath) == 0) 24.315 + return be; 24.316 + return (struct backend_info *)NULL; 24.317 +} 24.318 + 24.319 +static int be_exists_be(const char *bepath) 24.320 +{ 24.321 + return ( be_lookup_be(bepath) != NULL ); 24.322 +} 24.323 + 24.324 +static struct backend_info *be_lookup_fe(const char *fepath) 24.325 +{ 24.326 + struct backend_info *be; 24.327 + 24.328 + list_for_each_entry(be, &belist, list) 24.329 + if (strcmp(fepath, be->frontpath) == 0) 24.330 + return be; 24.331 + return (struct backend_info *)NULL; 24.332 +} 24.333 + 24.334 +static int backend_remove(struct xs_handle *h, struct backend_info *be) 24.335 +{ 24.336 + /* Turn off watches. */ 24.337 + if (be->watch.node) 24.338 + unregister_xenbus_watch(h, &be->watch); 24.339 + if (be->backend_watch.node) 24.340 + unregister_xenbus_watch(h, &be->backend_watch); 24.341 + 24.342 + /* Unhook from be list. */ 24.343 + list_del(&be->list); 24.344 + 24.345 + /* Free everything else. */ 24.346 + if (be->blkif) 24.347 + free_blkif(be->blkif); 24.348 + if (be->frontpath) 24.349 + free(be->frontpath); 24.350 + if (be->backpath) 24.351 + free(be->backpath); 24.352 + free(be); 24.353 + return 0; 24.354 +} 24.355 + 24.356 +static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 24.357 + const char *fepath_im) 24.358 +{ 24.359 + struct backend_info *be; 24.360 + char *fepath = NULL; 24.361 + int er; 24.362 + 24.363 + be = be_lookup_fe(w->node); 24.364 + if (be == NULL) 24.365 + { 24.366 + warn("frontend changed called for nonexistent backend! (%s)", fepath); 24.367 + goto fail; 24.368 + } 24.369 + 24.370 + /* If other end is gone, delete ourself. */ 24.371 + if (w->node && !xs_exists(h, be->frontpath)) { 24.372 + DPRINTF("DELETING BE: %s\n", be->backpath); 24.373 + backend_remove(h, be); 24.374 + return; 24.375 + } 24.376 + 24.377 + if (be->blkif == NULL || (be->blkif->state == CONNECTED)) 24.378 + return; 24.379 + 24.380 + /* Supply the information about the device the frontend needs */ 24.381 + er = xs_transaction_start(h, be->backpath); 24.382 + if (er == 0) { 24.383 + warn("starting transaction"); 24.384 + goto fail; 24.385 + } 24.386 + 24.387 + er = xs_printf(h, be->backpath, "sectors", "%lu", 24.388 + be->blkif->ops->get_size(be->blkif)); 24.389 + if (er == 0) { 24.390 + warn("writing sectors"); 24.391 + goto fail; 24.392 + } 24.393 + 24.394 + er = xs_printf(h, be->backpath, "info", "%u", 24.395 + be->blkif->ops->get_info(be->blkif)); 24.396 + if (er == 0) { 24.397 + warn("writing info"); 24.398 + goto fail; 24.399 + } 24.400 + 24.401 + er = xs_printf(h, be->backpath, "sector-size", "%lu", 24.402 + be->blkif->ops->get_secsize(be->blkif)); 24.403 + if (er == 0) { 24.404 + warn("writing sector-size"); 24.405 + goto fail; 24.406 + } 24.407 + 24.408 + be->blkif->state = CONNECTED; 24.409 + 24.410 + xs_transaction_end(h, 0); 24.411 + 24.412 + return; 24.413 + 24.414 + fail: 24.415 + if (fepath) 24.416 + free(fepath); 24.417 +} 24.418 + 24.419 + 24.420 +static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 24.421 + const char *bepath_im) 24.422 +{ 24.423 + struct backend_info *be; 24.424 + char *path = NULL, *p; 24.425 + int len, er; 24.426 + long int pdev = 0, handle; 24.427 + 24.428 + be = be_lookup_be(w->node); 24.429 + if (be == NULL) 24.430 + { 24.431 + warn("backend changed called for nonexistent backend! (%s)", w->node); 24.432 + goto fail; 24.433 + } 24.434 + 24.435 + er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL); 24.436 + if (er != 0) 24.437 + goto fail; 24.438 + 24.439 + if (be->pdev && be->pdev != pdev) { 24.440 + warn("changing physical-device not supported"); 24.441 + goto fail; 24.442 + } 24.443 + be->pdev = pdev; 24.444 + 24.445 + asprintf(&path, "%s/%s", w->node, "read-only"); 24.446 + if (xs_exists(h, path)) 24.447 + be->readonly = 1; 24.448 + 24.449 + if (be->blkif == NULL) { 24.450 + /* Front end dir is a number, which is used as the handle. */ 24.451 + p = strrchr(be->frontpath, '/') + 1; 24.452 + handle = strtoul(p, NULL, 0); 24.453 + 24.454 + be->blkif = alloc_blkif(be->frontend_id); 24.455 + if (be->blkif == NULL) 24.456 + goto fail; 24.457 + 24.458 + er = blkif_init(be->blkif, handle, be->pdev, be->readonly); 24.459 + if (er) 24.460 + goto fail; 24.461 + 24.462 + DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node); 24.463 + 24.464 + /* Pass in NULL node to skip exist test. */ 24.465 + frontend_changed(h, &be->watch, NULL); 24.466 + } 24.467 + 24.468 + fail: 24.469 + if (path) 24.470 + free(path); 24.471 + 24.472 +} 24.473 + 24.474 +static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 24.475 + const char *bepath_im) 24.476 +{ 24.477 + struct backend_info *be = NULL; 24.478 + char *frontend = NULL, *bepath = NULL; 24.479 + int er, len; 24.480 + 24.481 + bepath = strdup(bepath_im); 24.482 + if (!bepath) 24.483 + return; 24.484 + len = strsep_len(bepath, '/', 6); 24.485 + if (len < 0) 24.486 + goto free_be; 24.487 + 24.488 + bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */ 24.489 + 24.490 + be = malloc(sizeof(*be)); 24.491 + if (!be) { 24.492 + warn("allocating backend structure"); 24.493 + goto free_be; 24.494 + } 24.495 + memset(be, 0, sizeof(*be)); 24.496 + 24.497 + frontend = NULL; 24.498 + er = xs_gather(h, bepath, 24.499 + "frontend-id", "%li", &be->frontend_id, 24.500 + "frontend", NULL, &frontend, 24.501 + NULL); 24.502 + if (er) 24.503 + goto free_be; 24.504 + 24.505 + if (strlen(frontend) == 0 || !xs_exists(h, frontend)) { 24.506 + /* If we can't get a frontend path and a frontend-id, 24.507 + * then our bus-id is no longer valid and we need to 24.508 + * destroy the backend device. 24.509 + */ 24.510 + DPRINTF("No frontend (%s)\n", frontend); 24.511 + goto free_be; 24.512 + } 24.513 + 24.514 + /* Are we already tracking this device? */ 24.515 + if (be_exists_be(bepath)) 24.516 + goto free_be; 24.517 + 24.518 + be->backpath = bepath; 24.519 + be->backend_watch.node = be->backpath; 24.520 + be->backend_watch.callback = backend_changed; 24.521 + er = register_xenbus_watch(h, &be->backend_watch); 24.522 + if (er == 0) { 24.523 + be->backend_watch.node = NULL; 24.524 + warn("error adding backend watch on %s", bepath); 24.525 + goto free_be; 24.526 + } 24.527 + 24.528 + be->frontpath = frontend; 24.529 + be->watch.node = be->frontpath; 24.530 + be->watch.callback = frontend_changed; 24.531 + er = register_xenbus_watch(h, &be->watch); 24.532 + if (er == 0) { 24.533 + be->watch.node = NULL; 24.534 + warn("adding frontend watch on %s", be->frontpath); 24.535 + goto free_be; 24.536 + } 24.537 + 24.538 + list_add(&be->list, &belist); 24.539 + 24.540 + DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im); 24.541 + 24.542 + backend_changed(h, &be->backend_watch, bepath); 24.543 + return; 24.544 + 24.545 + free_be: 24.546 + if ((be) && (be->backend_watch.node)) 24.547 + unregister_xenbus_watch(h, &be->backend_watch); 24.548 + if (frontend) 24.549 + free(frontend); 24.550 + if (bepath) 24.551 + free(bepath); 24.552 + free(be); 24.553 + return; 24.554 +} 24.555 + 24.556 + 24.557 +int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname) 24.558 +{ 24.559 + char *uuid, *path; 24.560 + struct xenbus_watch *vbd_watch; 24.561 + int er; 24.562 + 24.563 + uuid = get_dom_uuid(h, domname); 24.564 + 24.565 + DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]"); 24.566 + 24.567 + asprintf(&path, "/domain/%s/backend/vbd", uuid); 24.568 + if (path == NULL) 24.569 + return -ENOMEM; 24.570 + 24.571 + vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch)); 24.572 + vbd_watch->node = path; 24.573 + vbd_watch->callback = blkback_probe; 24.574 + er = register_xenbus_watch(h, vbd_watch); 24.575 + if (er == 0) { 24.576 + warn("Error adding vbd probe watch %s", path); 24.577 + return -EINVAL; 24.578 + } 24.579 + 24.580 + return 0; 24.581 +}