ia64/xen-unstable

changeset 17223:f07a7ba63885

Use ioemu block drivers through blktap.

Add support for a tap:ioemu pseudo driver. Devices using this driver
won't use tapdisk (containing the code duplication) any more, but will
connect to the qemu-dm of the domain. In this way no working
configuration should be broken right now as you can still choose to
use the tapdisk drivers.

Signed-off-by: Kevin Wolf <kwolf@suse.de>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Mar 18 10:58:47 2008 +0000 (2008-03-18)
parents bab6afd2a29f
children 3633eae53572
files tools/blktap/drivers/blktapctrl.c tools/blktap/drivers/tapdisk.h tools/blktap/lib/blktaplib.h tools/ioemu/Makefile.target tools/ioemu/hw/xen_blktap.c tools/ioemu/hw/xen_blktap.h tools/ioemu/hw/xen_machine_pv.c tools/ioemu/vl.c tools/python/xen/xend/server/BlktapController.py
line diff
     1.1 --- a/tools/blktap/drivers/blktapctrl.c	Tue Mar 18 10:53:20 2008 +0000
     1.2 +++ b/tools/blktap/drivers/blktapctrl.c	Tue Mar 18 10:58:47 2008 +0000
     1.3 @@ -501,6 +501,80 @@ int launch_tapdisk(char *wrctldev, char 
     1.4  	return 0;
     1.5  }
     1.6  
     1.7 +/* Connect to qemu-dm */
     1.8 +static int connect_qemu(blkif_t *blkif)
     1.9 +{
    1.10 +	char *rdctldev, *wrctldev;
    1.11 +	
    1.12 +	if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", 
    1.13 +			blkif->domid) < 0)
    1.14 +		return -1;
    1.15 +
    1.16 +	if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", 
    1.17 +			blkif->domid) < 0) {
    1.18 +		free(rdctldev);
    1.19 +		return -1;
    1.20 +	}
    1.21 +
    1.22 +	DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
    1.23 +	
    1.24 +	blkif->fds[READ] = open_ctrl_socket(wrctldev);
    1.25 +	blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
    1.26 +	
    1.27 +	free(rdctldev);
    1.28 +	free(wrctldev);
    1.29 +	
    1.30 +	if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
    1.31 +		return -1;
    1.32 +
    1.33 +	DPRINTF("Attached to qemu blktap pipes\n");
    1.34 +	return 0;
    1.35 +}
    1.36 +
    1.37 +/* Launch tapdisk instance */
    1.38 +static int connect_tapdisk(blkif_t *blkif, int minor)
    1.39 +{
    1.40 +	char *rdctldev = NULL, *wrctldev = NULL;
    1.41 +	int ret = -1;
    1.42 +
    1.43 +	DPRINTF("tapdisk process does not exist:\n");
    1.44 +
    1.45 +	if (asprintf(&rdctldev,
    1.46 +		     "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1)
    1.47 +		goto fail;
    1.48 +
    1.49 +	if (asprintf(&wrctldev,
    1.50 +		     "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1)
    1.51 +		goto fail;
    1.52 +	
    1.53 +	blkif->fds[READ] = open_ctrl_socket(rdctldev);
    1.54 +	blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
    1.55 +	
    1.56 +	if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1)
    1.57 +		goto fail;
    1.58 +
    1.59 +	/*launch the new process*/
    1.60 +	DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",
    1.61 +			wrctldev, rdctldev);
    1.62 +
    1.63 +	if (launch_tapdisk(wrctldev, rdctldev) == -1) {
    1.64 +		DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",
    1.65 +				wrctldev, rdctldev);
    1.66 +		goto fail;
    1.67 +	}
    1.68 +
    1.69 +	ret = 0;
    1.70 +	
    1.71 +fail:
    1.72 +	if (rdctldev)
    1.73 +		free(rdctldev);
    1.74 +
    1.75 +	if (wrctldev)
    1.76 +		free(wrctldev);
    1.77 +
    1.78 +	return ret;
    1.79 +}
    1.80 +
    1.81  int blktapctrl_new_blkif(blkif_t *blkif)
    1.82  {
    1.83  	blkif_info_t *blk;
    1.84 @@ -524,30 +598,14 @@ int blktapctrl_new_blkif(blkif_t *blkif)
    1.85  		blkif->cookie = next_cookie++;
    1.86  
    1.87  		if (!exist) {
    1.88 -			DPRINTF("Process does not exist:\n");
    1.89 -			if (asprintf(&rdctldev,
    1.90 -				     "%s/tapctrlread%d", BLKTAP_CTRL_DIR, minor) == -1)
    1.91 -				goto fail;
    1.92 -			if (asprintf(&wrctldev,
    1.93 -				     "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, minor) == -1) {
    1.94 -				free(rdctldev);
    1.95 -				goto fail;
    1.96 -			}
    1.97 -			blkif->fds[READ] = open_ctrl_socket(rdctldev);
    1.98 -			blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
    1.99 -			
   1.100 -			if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) 
   1.101 -				goto fail;
   1.102 -
   1.103 -			/*launch the new process*/
   1.104 - 			DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",wrctldev, rdctldev);
   1.105 - 			if (launch_tapdisk(wrctldev, rdctldev) == -1) {
   1.106 - 				DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",wrctldev, rdctldev);
   1.107 -				goto fail;
   1.108 +			if (type == DISK_TYPE_IOEMU) {
   1.109 +				if (connect_qemu(blkif))
   1.110 +					goto fail;
   1.111 +			} else {
   1.112 +				if (connect_tapdisk(blkif, minor))
   1.113 +					goto fail;
   1.114  			}
   1.115  
   1.116 -			free(rdctldev);
   1.117 -			free(wrctldev);
   1.118  		} else {
   1.119  			DPRINTF("Process exists!\n");
   1.120  			blkif->fds[READ] = exist->fds[READ];
     2.1 --- a/tools/blktap/drivers/tapdisk.h	Tue Mar 18 10:53:20 2008 +0000
     2.2 +++ b/tools/blktap/drivers/tapdisk.h	Tue Mar 18 10:58:47 2008 +0000
     2.3 @@ -167,6 +167,7 @@ extern struct tap_disk tapdisk_qcow2;
     2.4  #define DISK_TYPE_RAM      3
     2.5  #define DISK_TYPE_QCOW     4
     2.6  #define DISK_TYPE_QCOW2    5
     2.7 +#define DISK_TYPE_IOEMU    6
     2.8  
     2.9  
    2.10  /*Define Individual Disk Parameters here */
    2.11 @@ -230,6 +231,16 @@ static disk_info_t qcow2_disk = {
    2.12  #endif
    2.13  };
    2.14  
    2.15 +static disk_info_t ioemu_disk = {
    2.16 +	DISK_TYPE_IOEMU,
    2.17 +	"ioemu disk",
    2.18 +	"ioemu",
    2.19 +	0,
    2.20 +#ifdef TAPDISK
    2.21 +	NULL
    2.22 +#endif
    2.23 +};
    2.24 +
    2.25  /*Main disk info array */
    2.26  static disk_info_t *dtypes[] = {
    2.27  	&aio_disk,
    2.28 @@ -238,6 +249,7 @@ static disk_info_t *dtypes[] = {
    2.29  	&ram_disk,
    2.30  	&qcow_disk,
    2.31  	&qcow2_disk,
    2.32 +	&ioemu_disk,
    2.33  };
    2.34  
    2.35  typedef struct driver_list_entry {
     3.1 --- a/tools/blktap/lib/blktaplib.h	Tue Mar 18 10:53:20 2008 +0000
     3.2 +++ b/tools/blktap/lib/blktaplib.h	Tue Mar 18 10:58:47 2008 +0000
     3.3 @@ -221,15 +221,5 @@ int xs_fire_next_watch(struct xs_handle 
     3.4       ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * getpagesize()) +    \
     3.5       ((_seg) * getpagesize()))
     3.6  
     3.7 -/* Defines that are only used by library clients */
     3.8 -
     3.9 -#ifndef __COMPILING_BLKTAP_LIB
    3.10 -
    3.11 -static char *blkif_op_name[] = {
    3.12 -	[BLKIF_OP_READ]       = "READ",
    3.13 -	[BLKIF_OP_WRITE]      = "WRITE",
    3.14 -};
    3.15 -
    3.16 -#endif /* __COMPILING_BLKTAP_LIB */
    3.17  
    3.18  #endif /* __BLKTAPLIB_H__ */
     4.1 --- a/tools/ioemu/Makefile.target	Tue Mar 18 10:53:20 2008 +0000
     4.2 +++ b/tools/ioemu/Makefile.target	Tue Mar 18 10:58:47 2008 +0000
     4.3 @@ -17,6 +17,7 @@ TARGET_PATH=$(SRC_PATH)/target-$(TARGET_
     4.4  VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio
     4.5  CPPFLAGS+=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH)
     4.6  CPPFLAGS+= -I$(XEN_ROOT)/tools/libxc
     4.7 +CPPFLAGS+= -I$(XEN_ROOT)/tools/blktap/lib
     4.8  CPPFLAGS+= -I$(XEN_ROOT)/tools/xenstore
     4.9  CPPFLAGS+= -I$(XEN_ROOT)/tools/include
    4.10  ifdef CONFIG_DARWIN_USER
    4.11 @@ -429,6 +430,7 @@ VL_OBJS+= cirrus_vga.o parallel.o acpi.o
    4.12  VL_OBJS+= usb-uhci.o smbus_eeprom.o
    4.13  VL_OBJS+= piix4acpi.o
    4.14  VL_OBJS+= xenstore.o
    4.15 +VL_OBJS+= xen_blktap.o
    4.16  VL_OBJS+= xen_platform.o
    4.17  VL_OBJS+= xen_machine_fv.o
    4.18  VL_OBJS+= xen_machine_pv.o
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/ioemu/hw/xen_blktap.c	Tue Mar 18 10:58:47 2008 +0000
     5.3 @@ -0,0 +1,686 @@
     5.4 +/* xen_blktap.c
     5.5 + *
     5.6 + * Interface to blktapctrl to allow use of qemu block drivers with blktap.
     5.7 + * This file is based on tools/blktap/drivers/tapdisk.c
     5.8 + * 
     5.9 + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
    5.10 + * Copyright (c) 2008 Kevin Wolf
    5.11 + */
    5.12 +
    5.13 +/*
    5.14 + * There are several communication channels which are used by this interface:
    5.15 + *
    5.16 + *   - A pair of pipes for receiving and sending general control messages
    5.17 + *     (qemu-read-N and qemu-writeN in /var/run/tap, where N is the domain ID).
    5.18 + *     These control messages are handled by handle_blktap_ctrlmsg().
    5.19 + *
    5.20 + *   - One file descriptor per attached disk (/dev/xen/blktapN) for disk
    5.21 + *     specific control messages. A callback is triggered on this fd if there
    5.22 + *     is a new IO request. The callback function is handle_blktap_iomsg().
    5.23 + *
    5.24 + *   - A shared ring for each attached disk containing the actual IO requests 
    5.25 + *     and responses. Whenever handle_blktap_iomsg() is triggered it processes
    5.26 + *     the requests on this ring.
    5.27 + */
    5.28 +
    5.29 +#include <sys/stat.h>
    5.30 +#include <sys/types.h>
    5.31 +#include <sys/mman.h>
    5.32 +#include <sys/ioctl.h>
    5.33 +#include <fcntl.h>
    5.34 +#include <stdio.h>
    5.35 +#include <errno.h>
    5.36 +#include <stdlib.h>
    5.37 +
    5.38 +#include "vl.h"
    5.39 +#include "blktaplib.h"
    5.40 +#include "xen_blktap.h"
    5.41 +#include "block_int.h"
    5.42 +
    5.43 +#define MSG_SIZE 4096
    5.44 +
    5.45 +#define BLKTAP_CTRL_DIR "/var/run/tap"
    5.46 +
    5.47 +/* If enabled, print debug messages to stderr */
    5.48 +#if 1
    5.49 +#define DPRINTF(_f, _a...) fprintf(stderr, __FILE__ ":%d: " _f, __LINE__, ##_a)
    5.50 +#else
    5.51 +#define DPRINTF(_f, _a...) ((void)0)
    5.52 +#endif
    5.53 +
    5.54 +#if 1                                                                        
    5.55 +#define ASSERT(_p) \
    5.56 +    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s\n", #_p , \
    5.57 +        __LINE__, __FILE__); *(int*)0=0; }
    5.58 +#else
    5.59 +#define ASSERT(_p) ((void)0)
    5.60 +#endif 
    5.61 +
    5.62 +
    5.63 +extern int domid;
    5.64 +
    5.65 +int read_fd;
    5.66 +int write_fd;
    5.67 +
    5.68 +static pid_t process;
    5.69 +fd_list_entry_t *fd_start = NULL;
    5.70 +
    5.71 +static void handle_blktap_iomsg(void* private);
    5.72 +
    5.73 +struct aiocb_info {
    5.74 +	struct td_state	*s;
    5.75 +	uint64_t sector;
    5.76 +	int nr_secs;
    5.77 +	int idx;
    5.78 +	long i;
    5.79 +};
    5.80 +
    5.81 +static void unmap_disk(struct td_state *s)
    5.82 +{
    5.83 +	tapdev_info_t *info = s->ring_info;
    5.84 +	fd_list_entry_t *entry;
    5.85 +	
    5.86 +	bdrv_close(s->bs);
    5.87 +
    5.88 +	if (info != NULL && info->mem > 0)
    5.89 +	        munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
    5.90 +
    5.91 +	entry = s->fd_entry;
    5.92 +	*entry->pprev = entry->next;
    5.93 +	if (entry->next)
    5.94 +		entry->next->pprev = entry->pprev;
    5.95 +
    5.96 +	qemu_set_fd_handler2(info->fd, NULL, NULL, NULL, NULL);
    5.97 +	close(info->fd);
    5.98 +
    5.99 +	free(s->fd_entry);
   5.100 +	free(s->blkif);
   5.101 +	free(s->ring_info);
   5.102 +	free(s);
   5.103 +
   5.104 +	return;
   5.105 +}
   5.106 +
   5.107 +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
   5.108 +{
   5.109 +	fd_list_entry_t **pprev, *entry;
   5.110 +
   5.111 +	DPRINTF("Adding fd_list_entry\n");
   5.112 +
   5.113 +	/*Add to linked list*/
   5.114 +	s->fd_entry   = entry = malloc(sizeof(fd_list_entry_t));
   5.115 +	entry->tap_fd = tap_fd;
   5.116 +	entry->s      = s;
   5.117 +	entry->next   = NULL;
   5.118 +
   5.119 +	pprev = &fd_start;
   5.120 +	while (*pprev != NULL)
   5.121 +		pprev = &(*pprev)->next;
   5.122 +
   5.123 +	*pprev = entry;
   5.124 +	entry->pprev = pprev;
   5.125 +
   5.126 +	return entry;
   5.127 +}
   5.128 +
   5.129 +static inline struct td_state *get_state(int cookie)
   5.130 +{
   5.131 +	fd_list_entry_t *ptr;
   5.132 +
   5.133 +	ptr = fd_start;
   5.134 +	while (ptr != NULL) {
   5.135 +		if (ptr->cookie == cookie) return ptr->s;
   5.136 +		ptr = ptr->next;
   5.137 +	}
   5.138 +	return NULL;
   5.139 +}
   5.140 +
   5.141 +static struct td_state *state_init(void)
   5.142 +{
   5.143 +	int i;
   5.144 +	struct td_state *s;
   5.145 +	blkif_t *blkif;
   5.146 +
   5.147 +	s = malloc(sizeof(struct td_state));
   5.148 +	blkif = s->blkif = malloc(sizeof(blkif_t));
   5.149 +	s->ring_info = calloc(1, sizeof(tapdev_info_t));
   5.150 +
   5.151 +	for (i = 0; i < MAX_REQUESTS; i++) {
   5.152 +		blkif->pending_list[i].secs_pending = 0;
   5.153 +		blkif->pending_list[i].submitting = 0;
   5.154 +	}
   5.155 +
   5.156 +	return s;
   5.157 +}
   5.158 +
   5.159 +static int map_new_dev(struct td_state *s, int minor)
   5.160 +{
   5.161 +	int tap_fd;
   5.162 +	tapdev_info_t *info = s->ring_info;
   5.163 +	char *devname;
   5.164 +	fd_list_entry_t *ptr;
   5.165 +	int page_size;
   5.166 +
   5.167 +	if (asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor) == -1)
   5.168 +		return -1;
   5.169 +	tap_fd = open(devname, O_RDWR);
   5.170 +	if (tap_fd == -1) 
   5.171 +	{
   5.172 +		DPRINTF("open failed on dev %s!\n",devname);
   5.173 +		goto fail;
   5.174 +	} 
   5.175 +	info->fd = tap_fd;
   5.176 +
   5.177 +	/*Map the shared memory*/
   5.178 +	page_size = getpagesize();
   5.179 +	info->mem = mmap(0, page_size * BLKTAP_MMAP_REGION_SIZE, 
   5.180 +			  PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
   5.181 +	if ((long int)info->mem == -1) 
   5.182 +	{
   5.183 +		DPRINTF("mmap failed on dev %s!\n",devname);
   5.184 +		goto fail;
   5.185 +	}
   5.186 +
   5.187 +	/* assign the rings to the mapped memory */ 
   5.188 +	info->sring = (blkif_sring_t *)((unsigned long)info->mem);
   5.189 +	BACK_RING_INIT(&info->fe_ring, info->sring, page_size);
   5.190 +	
   5.191 +	info->vstart = 
   5.192 +	        (unsigned long)info->mem + (BLKTAP_RING_PAGES * page_size);
   5.193 +
   5.194 +	ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
   5.195 +	ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
   5.196 +	free(devname);
   5.197 +
   5.198 +	/*Update the fd entry*/
   5.199 +	ptr = fd_start;
   5.200 +	while (ptr != NULL) {
   5.201 +		if (s == ptr->s) {
   5.202 +			ptr->tap_fd = tap_fd;
   5.203 +
   5.204 +			/* Setup fd_handler for qemu main loop */
   5.205 +			DPRINTF("set tap_fd = %d\n", tap_fd);
   5.206 +			qemu_set_fd_handler2(tap_fd, NULL, &handle_blktap_iomsg, NULL, s);
   5.207 +
   5.208 +			break;
   5.209 +		}
   5.210 +		ptr = ptr->next;
   5.211 +	}	
   5.212 +
   5.213 +
   5.214 +	DPRINTF("map_new_dev = %d\n", minor);
   5.215 +	return minor;
   5.216 +
   5.217 + fail:
   5.218 +	free(devname);
   5.219 +	return -1;
   5.220 +}
   5.221 +
   5.222 +static int open_disk(struct td_state *s, char *path, int readonly)
   5.223 +{
   5.224 +	struct disk_id id;
   5.225 +	BlockDriverState* bs;
   5.226 +
   5.227 +	DPRINTF("Opening %s\n", path);
   5.228 +	bs = calloc(1, sizeof(*bs));
   5.229 +
   5.230 +	memset(&id, 0, sizeof(struct disk_id));
   5.231 +
   5.232 +	if (bdrv_open(bs, path, 0) != 0) {
   5.233 +		fprintf(stderr, "Could not open image file %s\n", path);
   5.234 +		return -ENOMEM;
   5.235 +	}
   5.236 +
   5.237 +	s->bs = bs;
   5.238 +	s->flags = readonly ? TD_RDONLY : 0;
   5.239 +	s->size = bs->total_sectors;
   5.240 +	s->sector_size = 512;
   5.241 +
   5.242 +	s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
   5.243 +
   5.244 +	return 0;
   5.245 +}
   5.246 +
   5.247 +static inline void write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp)
   5.248 +{
   5.249 +	tapdev_info_t *info = s->ring_info;
   5.250 +	blkif_response_t *rsp_d;
   5.251 +	
   5.252 +	rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
   5.253 +	memcpy(rsp_d, rsp, sizeof(blkif_response_t));
   5.254 +	info->fe_ring.rsp_prod_pvt++;
   5.255 +}
   5.256 +
   5.257 +static inline void kick_responses(struct td_state *s)
   5.258 +{
   5.259 +	tapdev_info_t *info = s->ring_info;
   5.260 +
   5.261 +	if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) 
   5.262 +	{
   5.263 +		RING_PUSH_RESPONSES(&info->fe_ring);
   5.264 +		ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
   5.265 +	}
   5.266 +}
   5.267 +
   5.268 +static int send_responses(struct td_state *s, int res, 
   5.269 +		   uint64_t sector, int nr_secs, int idx, void *private)
   5.270 +{
   5.271 +	pending_req_t   *preq;
   5.272 +	blkif_request_t *req;
   5.273 +	int responses_queued = 0;
   5.274 +	blkif_t *blkif = s->blkif;
   5.275 +	int secs_done = nr_secs;
   5.276 +
   5.277 +	if ( (idx > MAX_REQUESTS-1) )
   5.278 +	{
   5.279 +		DPRINTF("invalid index returned(%u)!\n", idx);
   5.280 +		return 0;
   5.281 +	}
   5.282 +	preq = &blkif->pending_list[idx];
   5.283 +	req  = &preq->req;
   5.284 +
   5.285 +	preq->secs_pending -= secs_done;
   5.286 +
   5.287 +	if (res == -EBUSY && preq->submitting) 
   5.288 +		return -EBUSY;  /* propagate -EBUSY back to higher layers */
   5.289 +	if (res) 
   5.290 +		preq->status = BLKIF_RSP_ERROR;
   5.291 +	
   5.292 +	if (!preq->submitting && preq->secs_pending == 0) 
   5.293 +	{
   5.294 +		blkif_request_t tmp;
   5.295 +		blkif_response_t *rsp;
   5.296 +
   5.297 +		tmp = preq->req;
   5.298 +		rsp = (blkif_response_t *)req;
   5.299 +		
   5.300 +		rsp->id = tmp.id;
   5.301 +		rsp->operation = tmp.operation;
   5.302 +		rsp->status = preq->status;
   5.303 +		
   5.304 +		write_rsp_to_ring(s, rsp);
   5.305 +		responses_queued++;
   5.306 +
   5.307 +		kick_responses(s);
   5.308 +	}
   5.309 +	
   5.310 +	return responses_queued;
   5.311 +}
   5.312 +
   5.313 +static void qemu_send_responses(void* opaque, int ret)
   5.314 +{
   5.315 +	struct aiocb_info* info = opaque;
   5.316 +
   5.317 +	if (ret != 0) {
   5.318 +		DPRINTF("ERROR: ret = %d (%s)\n", ret, strerror(-ret));
   5.319 +	}
   5.320 +
   5.321 +	send_responses(info->s, ret, info->sector, info->nr_secs, 
   5.322 +		info->idx, (void*) info->i);
   5.323 +	free(info);
   5.324 +}
   5.325 +
   5.326 +/**
   5.327 + * Callback function for the IO message pipe. Reads requests from the ring
   5.328 + * and processes them (call qemu read/write functions).
   5.329 + *
   5.330 + * The private parameter points to the struct td_state representing the
   5.331 + * disk the request is targeted at.
   5.332 + */
   5.333 +static void handle_blktap_iomsg(void* private)
   5.334 +{
   5.335 +	struct td_state* s = private;
   5.336 +
   5.337 +	RING_IDX          rp, j, i;
   5.338 +	blkif_request_t  *req;
   5.339 +	int idx, nsects, ret;
   5.340 +	uint64_t sector_nr;
   5.341 +	uint8_t *page;
   5.342 +	blkif_t *blkif = s->blkif;
   5.343 +	tapdev_info_t *info = s->ring_info;
   5.344 +	int page_size = getpagesize();
   5.345 +
   5.346 +	struct aiocb_info *aiocb_info;
   5.347 +
   5.348 +	if (info->fe_ring.sring == NULL) {
   5.349 +		DPRINTF("  sring == NULL, ignoring IO request\n");
   5.350 +		return;
   5.351 +	}
   5.352 +
   5.353 +	rp = info->fe_ring.sring->req_prod; 
   5.354 +	xen_rmb();
   5.355 +
   5.356 +	for (j = info->fe_ring.req_cons; j != rp; j++)
   5.357 +	{
   5.358 +		int start_seg = 0; 
   5.359 +
   5.360 +		req = NULL;
   5.361 +		req = RING_GET_REQUEST(&info->fe_ring, j);
   5.362 +		++info->fe_ring.req_cons;
   5.363 +		
   5.364 +		if (req == NULL)
   5.365 +			continue;
   5.366 +
   5.367 +		idx = req->id;
   5.368 +
   5.369 +		ASSERT(blkif->pending_list[idx].secs_pending == 0);
   5.370 +		memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
   5.371 +		blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
   5.372 +		blkif->pending_list[idx].submitting = 1;
   5.373 +		sector_nr = req->sector_number;
   5.374 +
   5.375 +		/* Don't allow writes on readonly devices */
   5.376 +		if ((s->flags & TD_RDONLY) && 
   5.377 +		    (req->operation == BLKIF_OP_WRITE)) {
   5.378 +			blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
   5.379 +			goto send_response;
   5.380 +		}
   5.381 +
   5.382 +		for (i = start_seg; i < req->nr_segments; i++) {
   5.383 +			nsects = req->seg[i].last_sect - 
   5.384 +				 req->seg[i].first_sect + 1;
   5.385 +	
   5.386 +			if ((req->seg[i].last_sect >= page_size >> 9) ||
   5.387 +					(nsects <= 0))
   5.388 +				continue;
   5.389 +
   5.390 +			page  = (uint8_t*) MMAP_VADDR(info->vstart, 
   5.391 +						   (unsigned long)req->id, i);
   5.392 +			page += (req->seg[i].first_sect << SECTOR_SHIFT);
   5.393 +
   5.394 +			if (sector_nr >= s->size) {
   5.395 +				DPRINTF("Sector request failed:\n");
   5.396 +				DPRINTF("%s request, idx [%d,%d] size [%llu], "
   5.397 +					"sector [%llu,%llu]\n",
   5.398 +					(req->operation == BLKIF_OP_WRITE ? 
   5.399 +					 "WRITE" : "READ"),
   5.400 +					idx,i,
   5.401 +					(long long unsigned) 
   5.402 +						nsects<<SECTOR_SHIFT,
   5.403 +					(long long unsigned) 
   5.404 +						sector_nr<<SECTOR_SHIFT,
   5.405 +					(long long unsigned) sector_nr);
   5.406 +				continue;
   5.407 +			}
   5.408 +
   5.409 +			blkif->pending_list[idx].secs_pending += nsects;
   5.410 +
   5.411 +			switch (req->operation) 
   5.412 +			{
   5.413 +			case BLKIF_OP_WRITE:
   5.414 +				aiocb_info = malloc(sizeof(*aiocb_info));
   5.415 +
   5.416 +				aiocb_info->s = s;
   5.417 +				aiocb_info->sector = sector_nr;
   5.418 +				aiocb_info->nr_secs = nsects;
   5.419 +				aiocb_info->idx = idx;
   5.420 +				aiocb_info->i = i;
   5.421 +
   5.422 +				ret = (NULL == bdrv_aio_write(s->bs, sector_nr,
   5.423 +							  page, nsects,
   5.424 +							  qemu_send_responses,
   5.425 +							  aiocb_info));
   5.426 +
   5.427 +				if (ret) {
   5.428 +					blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
   5.429 +					DPRINTF("ERROR: bdrv_write() == NULL\n");
   5.430 +					goto send_response;
   5.431 +				}
   5.432 +				break;
   5.433 +
   5.434 +			case BLKIF_OP_READ:
   5.435 +				aiocb_info = malloc(sizeof(*aiocb_info));
   5.436 +
   5.437 +				aiocb_info->s = s;
   5.438 +				aiocb_info->sector = sector_nr;
   5.439 +				aiocb_info->nr_secs = nsects;
   5.440 +				aiocb_info->idx = idx;
   5.441 +				aiocb_info->i = i;
   5.442 +
   5.443 +				ret = (NULL == bdrv_aio_read(s->bs, sector_nr,
   5.444 +							 page, nsects,
   5.445 +							 qemu_send_responses,
   5.446 +							 aiocb_info));
   5.447 +
   5.448 +				if (ret) {
   5.449 +					blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
   5.450 +					DPRINTF("ERROR: bdrv_read() == NULL\n");
   5.451 +					goto send_response;
   5.452 +				}
   5.453 +				break;
   5.454 +
   5.455 +			default:
   5.456 +				DPRINTF("Unknown block operation\n");
   5.457 +				break;
   5.458 +			}
   5.459 +			sector_nr += nsects;
   5.460 +		}
   5.461 +	send_response:
   5.462 +		blkif->pending_list[idx].submitting = 0;
   5.463 +
   5.464 +		/* force write_rsp_to_ring for synchronous case */
   5.465 +		if (blkif->pending_list[idx].secs_pending == 0)
   5.466 +			send_responses(s, 0, 0, 0, idx, (void *)(long)0);
   5.467 +	}
   5.468 +}
   5.469 +
   5.470 +/**
   5.471 + * Callback function for the qemu-read pipe. Reads and processes control 
   5.472 + * message from the pipe.
   5.473 + *
   5.474 + * The parameter private is unused.
   5.475 + */
   5.476 +static void handle_blktap_ctrlmsg(void* private)
   5.477 +{
   5.478 +	int length, len, msglen;
   5.479 +	char *ptr, *path;
   5.480 +	image_t *img;
   5.481 +	msg_hdr_t *msg;
   5.482 +	msg_newdev_t *msg_dev;
   5.483 +	msg_pid_t *msg_pid;
   5.484 +	int ret = -1;
   5.485 +	struct td_state *s = NULL;
   5.486 +	fd_list_entry_t *entry;
   5.487 +
   5.488 +	char buf[MSG_SIZE];
   5.489 +
   5.490 +	length = read(read_fd, buf, MSG_SIZE);
   5.491 +
   5.492 +	if (length > 0 && length >= sizeof(msg_hdr_t)) 
   5.493 +	{
   5.494 +		msg = (msg_hdr_t *)buf;
   5.495 +		DPRINTF("blktap: Received msg, len %d, type %d, UID %d\n",
   5.496 +			length,msg->type,msg->cookie);
   5.497 +
   5.498 +		switch (msg->type) {
   5.499 +		case CTLMSG_PARAMS: 			
   5.500 +			ptr = buf + sizeof(msg_hdr_t);
   5.501 +			len = (length - sizeof(msg_hdr_t));
   5.502 +			path = calloc(1, len + 1);
   5.503 +			
   5.504 +			memcpy(path, ptr, len); 
   5.505 +			DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
   5.506 +
   5.507 +			/* Allocate the disk structs */
   5.508 +			s = state_init();
   5.509 +
   5.510 +			/*Open file*/
   5.511 +			if (s == NULL || open_disk(s, path, msg->readonly)) {
   5.512 +				msglen = sizeof(msg_hdr_t);
   5.513 +				msg->type = CTLMSG_IMG_FAIL;
   5.514 +				msg->len = msglen;
   5.515 +			} else {
   5.516 +				entry = add_fd_entry(0, s);
   5.517 +				entry->cookie = msg->cookie;
   5.518 +				DPRINTF("Entered cookie %d\n", entry->cookie);
   5.519 +				
   5.520 +				memset(buf, 0x00, MSG_SIZE); 
   5.521 +			
   5.522 +				msglen = sizeof(msg_hdr_t) + sizeof(image_t);
   5.523 +				msg->type = CTLMSG_IMG;
   5.524 +				img = (image_t *)(buf + sizeof(msg_hdr_t));
   5.525 +				img->size = s->size;
   5.526 +				img->secsize = s->sector_size;
   5.527 +				img->info = s->info;
   5.528 +				DPRINTF("Writing (size, secsize, info) = "
   5.529 +					"(%#" PRIx64 ", %#" PRIx64 ", %d)\n",
   5.530 +					s->size, s->sector_size, s->info);
   5.531 +			}
   5.532 +			len = write(write_fd, buf, msglen);
   5.533 +			free(path);
   5.534 +			break;
   5.535 +			
   5.536 +		case CTLMSG_NEWDEV:
   5.537 +			msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
   5.538 +
   5.539 +			s = get_state(msg->cookie);
   5.540 +			DPRINTF("Retrieving state, cookie %d.....[%s]\n",
   5.541 +				msg->cookie, (s == NULL ? "FAIL":"OK"));
   5.542 +			if (s != NULL) {
   5.543 +				ret = ((map_new_dev(s, msg_dev->devnum) 
   5.544 +					== msg_dev->devnum ? 0: -1));
   5.545 +			}	
   5.546 +
   5.547 +			memset(buf, 0x00, MSG_SIZE); 
   5.548 +			msglen = sizeof(msg_hdr_t);
   5.549 +			msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP 
   5.550 +				              : CTLMSG_NEWDEV_FAIL);
   5.551 +			msg->len = msglen;
   5.552 +
   5.553 +			len = write(write_fd, buf, msglen);
   5.554 +			break;
   5.555 +
   5.556 +		case CTLMSG_CLOSE:
   5.557 +			s = get_state(msg->cookie);
   5.558 +			if (s) unmap_disk(s);
   5.559 +			break;			
   5.560 +
   5.561 +		case CTLMSG_PID:
   5.562 +			memset(buf, 0x00, MSG_SIZE);
   5.563 +			msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
   5.564 +			msg->type = CTLMSG_PID_RSP;
   5.565 +			msg->len = msglen;
   5.566 +
   5.567 +			msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
   5.568 +			process = getpid();
   5.569 +			msg_pid->pid = process;
   5.570 +
   5.571 +			len = write(write_fd, buf, msglen);
   5.572 +			break;
   5.573 +
   5.574 +		default:
   5.575 +			break;
   5.576 +		}
   5.577 +	}
   5.578 +}
   5.579 +
   5.580 +/**
   5.581 + * Opens a control socket, i.e. a pipe to communicate with blktapctrl.
   5.582 + *
   5.583 + * Returns the file descriptor number for the pipe; -1 in error case
   5.584 + */
   5.585 +static int open_ctrl_socket(char *devname)
   5.586 +{
   5.587 +	int ret;
   5.588 +	int ipc_fd;
   5.589 +
   5.590 +	if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
   5.591 +		DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
   5.592 +
   5.593 +	ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
   5.594 +	if ( (ret != 0) && (errno != EEXIST) ) {
   5.595 +		DPRINTF("ERROR: pipe failed (%d)\n", errno);
   5.596 +		return -1;
   5.597 +	}
   5.598 +
   5.599 +	ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
   5.600 +
   5.601 +	if (ipc_fd < 0) {
   5.602 +		DPRINTF("FD open failed\n");
   5.603 +		return -1;
   5.604 +	}
   5.605 +
   5.606 +	return ipc_fd;
   5.607 +}
   5.608 +
   5.609 +/**
   5.610 + * Unmaps all disks and closes their pipes
   5.611 + */
   5.612 +void shutdown_blktap(void)
   5.613 +{
   5.614 +	fd_list_entry_t *ptr;
   5.615 +	struct td_state *s;
   5.616 +	char *devname;
   5.617 +
   5.618 +	DPRINTF("Shutdown blktap\n");
   5.619 +
   5.620 +	/* Unmap all disks */
   5.621 +	ptr = fd_start;
   5.622 +	while (ptr != NULL) {
   5.623 +		s = ptr->s;
   5.624 +		unmap_disk(s);
   5.625 +		close(ptr->tap_fd);
   5.626 +		ptr = ptr->next;
   5.627 +	}
   5.628 +
   5.629 +	/* Delete control pipes */
   5.630 +	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
   5.631 +		DPRINTF("Delete %s\n", devname);
   5.632 +		if (unlink(devname))
   5.633 +			DPRINTF("Could not delete: %s\n", strerror(errno));
   5.634 +		free(devname);
   5.635 +	}
   5.636 +	
   5.637 +	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {	
   5.638 +		DPRINTF("Delete %s\n", devname);
   5.639 +		if (unlink(devname))
   5.640 +			DPRINTF("Could not delete: %s\n", strerror(errno));
   5.641 +		free(devname);
   5.642 +	}
   5.643 +}
   5.644 +
   5.645 +/**
   5.646 + * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap
   5.647 + * and register a fd handler.
   5.648 + *
   5.649 + * Returns 0 on success.
   5.650 + */
   5.651 +int init_blktap(void)
   5.652 +{
   5.653 +	char* devname;	
   5.654 +
   5.655 +	DPRINTF("Init blktap pipes\n");
   5.656 +
   5.657 +	/* Open the read pipe */
   5.658 +	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {	
   5.659 +		read_fd = open_ctrl_socket(devname);		
   5.660 +		free(devname);
   5.661 +		
   5.662 +		if (read_fd == -1) {
   5.663 +			fprintf(stderr, "Could not open %s/qemu-read-%d\n",
   5.664 +				BLKTAP_CTRL_DIR, domid);
   5.665 +			return -1;
   5.666 +		}
   5.667 +	}
   5.668 +	
   5.669 +	/* Open the write pipe */
   5.670 +	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {
   5.671 +		write_fd = open_ctrl_socket(devname);
   5.672 +		free(devname);
   5.673 +		
   5.674 +		if (write_fd == -1) {
   5.675 +			fprintf(stderr, "Could not open %s/qemu-write-%d\n",
   5.676 +				BLKTAP_CTRL_DIR, domid);
   5.677 +			close(read_fd);
   5.678 +			return -1;
   5.679 +		}
   5.680 +	}
   5.681 +
   5.682 +	/* Attach a handler to the read pipe (called from qemu main loop) */
   5.683 +	qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
   5.684 +
   5.685 +	/* Register handler to clean up when the domain is destroyed */
   5.686 +	atexit(&shutdown_blktap);
   5.687 +
   5.688 +	return 0;
   5.689 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/ioemu/hw/xen_blktap.h	Tue Mar 18 10:58:47 2008 +0000
     6.3 @@ -0,0 +1,57 @@
     6.4 +/* xen_blktap.h
     6.5 + *
     6.6 + * Generic disk interface for blktap-based image adapters.
     6.7 + *
     6.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
     6.9 + */
    6.10 +
    6.11 +#ifndef XEN_BLKTAP_H_ 
    6.12 +#define XEN_BLKTAP_H_
    6.13 +
    6.14 +#include <stdint.h>
    6.15 +#include <syslog.h>
    6.16 +#include <stdio.h>
    6.17 +
    6.18 +#include "block_int.h"
    6.19 +
    6.20 +/* Things disks need to know about, these should probably be in a higher-level
    6.21 + * header. */
    6.22 +#define MAX_SEGMENTS_PER_REQ    11
    6.23 +#define SECTOR_SHIFT             9
    6.24 +#define DEFAULT_SECTOR_SIZE    512
    6.25 +
    6.26 +#define MAX_IOFD                 2
    6.27 +
    6.28 +#define BLK_NOT_ALLOCATED       99
    6.29 +#define TD_NO_PARENT             1
    6.30 +
    6.31 +typedef uint32_t td_flag_t;
    6.32 +
    6.33 +#define TD_RDONLY                1
    6.34 +
    6.35 +struct disk_id {
    6.36 +	char *name;
    6.37 +	int drivertype;
    6.38 +};
    6.39 +
    6.40 +/* This structure represents the state of an active virtual disk.           */
    6.41 +struct td_state {
    6.42 +	BlockDriverState* bs;
    6.43 +	td_flag_t flags;
    6.44 +	void *blkif;
    6.45 +	void *image;
    6.46 +	void *ring_info;
    6.47 +	void *fd_entry;
    6.48 +	uint64_t sector_size;
    6.49 +	uint64_t size;
    6.50 +	unsigned int       info;
    6.51 +};
    6.52 +
    6.53 +typedef struct fd_list_entry {
    6.54 +	int cookie;
    6.55 +	int  tap_fd;
    6.56 +	struct td_state *s;
    6.57 +	struct fd_list_entry **pprev, *next;
    6.58 +} fd_list_entry_t;
    6.59 +
    6.60 +#endif /*XEN_BLKTAP_H_*/
     7.1 --- a/tools/ioemu/hw/xen_machine_pv.c	Tue Mar 18 10:53:20 2008 +0000
     7.2 +++ b/tools/ioemu/hw/xen_machine_pv.c	Tue Mar 18 10:58:47 2008 +0000
     7.3 @@ -26,6 +26,9 @@
     7.4  #include "xen_console.h"
     7.5  #include "xenfb.h"
     7.6  
     7.7 +extern void init_blktap(void);
     7.8 +
     7.9 +
    7.10  /* The Xen PV machine currently provides
    7.11   *   - a virtual framebuffer
    7.12   *   - ....
    7.13 @@ -41,6 +44,10 @@ static void xen_init_pv(uint64_t ram_siz
    7.14      struct xenfb *xenfb;
    7.15      extern int domid;
    7.16  
    7.17 +
    7.18 +    /* Initialize tapdisk client */
    7.19 +    init_blktap();
    7.20 +
    7.21      /* Connect to text console */
    7.22      if (serial_hds[0]) {
    7.23          if (xencons_init(domid, serial_hds[0]) < 0) {
     8.1 --- a/tools/ioemu/vl.c	Tue Mar 18 10:53:20 2008 +0000
     8.2 +++ b/tools/ioemu/vl.c	Tue Mar 18 10:58:47 2008 +0000
     8.3 @@ -6272,6 +6272,12 @@ void qemu_system_powerdown_request(void)
     8.4          cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
     8.5  }
     8.6  
     8.7 +static void qemu_sighup_handler(int signal)
     8.8 +{
     8.9 +    fprintf(stderr, "Received SIGHUP, terminating.\n");
    8.10 +    exit(0);
    8.11 +}
    8.12 +
    8.13  void main_loop_wait(int timeout)
    8.14  {
    8.15      IOHandlerRecord *ioh;
    8.16 @@ -7980,7 +7986,7 @@ int main(int argc, char **argv)
    8.17  
    8.18  #ifndef CONFIG_STUBDOM
    8.19      /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
    8.20 -    signal(SIGHUP, SIG_DFL);
    8.21 +    signal(SIGHUP, qemu_sighup_handler);
    8.22      sigemptyset(&set);
    8.23      sigaddset(&set, SIGTERM);
    8.24      sigaddset(&set, SIGHUP);
     9.1 --- a/tools/python/xen/xend/server/BlktapController.py	Tue Mar 18 10:53:20 2008 +0000
     9.2 +++ b/tools/python/xen/xend/server/BlktapController.py	Tue Mar 18 10:58:47 2008 +0000
     9.3 @@ -13,7 +13,9 @@ blktap_disk_types = [
     9.4      'vmdk',
     9.5      'ram',
     9.6      'qcow',
     9.7 -    'qcow2'
     9.8 +    'qcow2',
     9.9 +
    9.10 +    'ioemu'
    9.11      ]
    9.12  
    9.13  class BlktapController(BlkifController):