ia64/xen-unstable

view tools/blktap2/drivers/block-ram.c @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children b7f73a7f3078
line source
1 /*
2 * Copyright (c) 2007, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <sys/statvfs.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <linux/fs.h>
38 #include <string.h>
40 #include "tapdisk.h"
41 #include "tapdisk-driver.h"
42 #include "tapdisk-interface.h"
44 char *img;
45 long int disksector_size;
46 long int disksize;
47 long int diskinfo;
48 static int connections = 0;
50 struct tdram_state {
51 int fd;
52 };
54 /*Get Image size, secsize*/
55 static int get_image_info(int fd, td_disk_info_t *info)
56 {
57 int ret;
58 long size;
59 unsigned long total_size;
60 struct statvfs statBuf;
61 struct stat stat;
63 ret = fstat(fd, &stat);
64 if (ret != 0) {
65 DPRINTF("ERROR: fstat failed, Couldn't stat image");
66 return -EINVAL;
67 }
69 if (S_ISBLK(stat.st_mode)) {
70 /*Accessing block device directly*/
71 info->size = 0;
72 if (ioctl(fd,BLKGETSIZE,&info->size)!=0) {
73 DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
74 return -EINVAL;
75 }
77 DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
78 "sector_shift [%llu]\n",
79 (long long unsigned)(info->size << SECTOR_SHIFT),
80 (long long unsigned)info->size);
82 /*Get the sector size*/
83 #if defined(BLKSSZGET)
84 {
85 int arg;
86 info->sector_size = DEFAULT_SECTOR_SIZE;
87 ioctl(fd, BLKSSZGET, &info->sector_size);
89 if (info->sector_size != DEFAULT_SECTOR_SIZE)
90 DPRINTF("Note: sector size is %ld (not %d)\n",
91 info->sector_size, DEFAULT_SECTOR_SIZE);
92 }
93 #else
94 info->sector_size = DEFAULT_SECTOR_SIZE;
95 #endif
97 } else {
98 /*Local file? try fstat instead*/
99 info->size = (stat.st_size >> SECTOR_SHIFT);
100 info->sector_size = DEFAULT_SECTOR_SIZE;
101 DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
102 "sector_shift [%llu]\n",
103 (long long unsigned)(info->size << SECTOR_SHIFT),
104 (long long unsigned)info->size);
105 }
107 if (info->size == 0) {
108 info->size =((uint64_t) MAX_RAMDISK_SIZE);
109 info->sector_size = DEFAULT_SECTOR_SIZE;
110 }
111 info->info = 0;
113 /*Store variables locally*/
114 disksector_size = info->sector_size;
115 disksize = info->size;
116 diskinfo = info->info;
117 DPRINTF("Image sector_size: \n\t[%lu]\n",
118 info->sector_size);
120 return 0;
121 }
123 /* Open the disk file and initialize ram state. */
124 int tdram_open (td_driver_t *driver, const char *name, td_flag_t flags)
125 {
126 char *p;
127 uint64_t size;
128 int i, fd, ret = 0, count = 0, o_flags;
129 struct tdram_state *prv = (struct tdram_state *)driver->data;
131 connections++;
133 if (connections > 1) {
134 driver->info.sector_size = disksector_size;
135 driver->info.size = disksize;
136 driver->info.info = diskinfo;
137 DPRINTF("Image already open, returning parameters:\n");
138 DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
139 "sector_shift [%llu]\n",
140 (long long unsigned)(driver->info.size << SECTOR_SHIFT),
141 (long long unsigned)driver->info.size);
142 DPRINTF("Image sector_size: \n\t[%lu]\n",
143 driver->info.sector_size);
145 prv->fd = -1;
146 goto done;
147 }
149 /* Open the file */
150 o_flags = O_DIRECT | O_LARGEFILE |
151 ((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
152 fd = open(name, o_flags);
154 if ((fd == -1) && (errno == EINVAL)) {
156 /* Maybe O_DIRECT isn't supported. */
157 o_flags &= ~O_DIRECT;
158 fd = open(name, o_flags);
159 if (fd != -1) DPRINTF("WARNING: Accessing image without"
160 "O_DIRECT! (%s)\n", name);
162 } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
164 if (fd == -1) {
165 DPRINTF("Unable to open [%s]!\n",name);
166 ret = 0 - errno;
167 goto done;
168 }
170 prv->fd = fd;
172 ret = get_image_info(fd, &driver->info);
173 size = MAX_RAMDISK_SIZE;
175 if (driver->info.size > size) {
176 DPRINTF("Disk exceeds limit, must be less than [%d]MB",
177 (MAX_RAMDISK_SIZE<<SECTOR_SHIFT)>>20);
178 return -ENOMEM;
179 }
181 /*Read the image into memory*/
182 if (posix_memalign((void **)&img,
183 DEFAULT_SECTOR_SIZE,
184 driver->info.size << SECTOR_SHIFT)) {
185 DPRINTF("Mem malloc failed\n");
186 return -errno;
187 }
188 p = img;
189 DPRINTF("Reading %llu bytes.......",
190 (long long unsigned)driver->info.size << SECTOR_SHIFT);
192 for (i = 0; i < driver->info.size; i++) {
193 ret = read(prv->fd, p, driver->info.sector_size);
194 if (ret != driver->info.sector_size) {
195 DPRINTF("ret = %d, errno = %d\n", ret, errno);
196 ret = 0 - errno;
197 break;
198 } else {
199 count += ret;
200 p = img + count;
201 }
202 }
203 DPRINTF("[%d]\n",count);
204 if (count != driver->info.size << SECTOR_SHIFT) {
205 ret = -1;
206 } else {
207 ret = 0;
208 }
210 done:
211 return ret;
212 }
214 void tdram_queue_read(td_driver_t *driver, td_request_t treq)
215 {
216 struct tdram_state *prv = (struct tdram_state *)driver->data;
217 int size = treq.secs * driver->info.sector_size;
218 uint64_t offset = treq.sec * (uint64_t)driver->info.sector_size;
220 memcpy(treq.buf, img + offset, size);
222 td_complete_request(treq, 0);
223 }
225 void tdram_queue_write(td_driver_t *driver, td_request_t treq)
226 {
227 struct tdram_state *prv = (struct tdram_state *)driver->data;
228 int size = treq.secs * driver->info.sector_size;
229 uint64_t offset = treq.sec * (uint64_t)driver->info.sector_size;
231 /* We assume that write access is controlled
232 * at a higher level for multiple disks */
233 memcpy(img + offset, treq.buf, size);
235 td_complete_request(treq, 0);
236 }
238 int tdram_close(td_driver_t *driver)
239 {
240 struct tdram_state *prv = (struct tdram_state *)driver->data;
242 connections--;
244 return 0;
245 }
247 int tdram_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
248 {
249 return TD_NO_PARENT;
250 }
252 int tdram_validate_parent(td_driver_t *driver,
253 td_driver_t *pdriver, td_flag_t flags)
254 {
255 return -EINVAL;
256 }
258 struct tap_disk tapdisk_ram = {
259 .disk_type = "tapdisk_ram",
260 .flags = 0,
261 .private_data_size = sizeof(struct tdram_state),
262 .td_open = tdram_open,
263 .td_close = tdram_close,
264 .td_queue_read = tdram_queue_read,
265 .td_queue_write = tdram_queue_write,
266 .td_get_parent_id = tdram_get_parent_id,
267 .td_validate_parent = tdram_validate_parent,
268 .td_debug = NULL,
269 };