ia64/xen-unstable

view tools/blktap2/vhd/lib/vhd-util-coalesce.c @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children b7f73a7f3078
line source
1 /* Copyright (c) 2008, XenSource Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of XenSource Inc. nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
33 #include "libvhd.h"
35 static int
36 __raw_io_write(int fd, char* buf, uint64_t sec, uint32_t secs)
37 {
38 off64_t off;
39 size_t ret;
41 errno = 0;
42 off = lseek64(fd, vhd_sectors_to_bytes(sec), SEEK_SET);
43 if (off == (off64_t)-1) {
44 printf("raw parent: seek(0x%08"PRIx64") failed: %d\n",
45 vhd_sectors_to_bytes(sec), -errno);
46 return -errno;
47 }
49 ret = write(fd, buf, vhd_sectors_to_bytes(secs));
50 if (ret == vhd_sectors_to_bytes(secs))
51 return 0;
53 printf("raw parent: write of 0x%"PRIx64" returned %zd, errno: %d\n",
54 vhd_sectors_to_bytes(secs), ret, -errno);
55 return (errno ? -errno : -EIO);
56 }
58 /*
59 * Use 'parent' if the parent is VHD, and 'parent_fd' if the parent is raw
60 */
61 static int
62 vhd_util_coalesce_block(vhd_context_t *vhd, vhd_context_t *parent,
63 int parent_fd, uint64_t block)
64 {
65 int i, err;
66 char *buf, *map;
67 uint64_t sec, secs;
69 buf = NULL;
70 map = NULL;
71 sec = block * vhd->spb;
73 if (vhd->bat.bat[block] == DD_BLK_UNUSED)
74 return 0;
76 err = posix_memalign((void **)&buf, 4096, vhd->header.block_size);
77 if (err)
78 return -err;
80 err = vhd_io_read(vhd, buf, sec, vhd->spb);
81 if (err)
82 goto done;
84 if (vhd_has_batmap(vhd) && vhd_batmap_test(vhd, &vhd->batmap, block)) {
85 if (parent->file)
86 err = vhd_io_write(parent, buf, sec, vhd->spb);
87 else
88 err = __raw_io_write(parent_fd, buf, sec, vhd->spb);
89 goto done;
90 }
92 err = vhd_read_bitmap(vhd, block, &map);
93 if (err)
94 goto done;
96 for (i = 0; i < vhd->spb; i++) {
97 if (!vhd_bitmap_test(vhd, map, i))
98 continue;
100 for (secs = 0; i + secs < vhd->spb; secs++)
101 if (!vhd_bitmap_test(vhd, map, i + secs))
102 break;
104 if (parent->file)
105 err = vhd_io_write(parent,
106 buf + vhd_sectors_to_bytes(i),
107 sec + i, secs);
108 else
109 err = __raw_io_write(parent_fd,
110 buf + vhd_sectors_to_bytes(i),
111 sec + i, secs);
112 if (err)
113 goto done;
115 i += secs;
116 }
118 err = 0;
120 done:
121 free(buf);
122 free(map);
123 return err;
124 }
126 int
127 vhd_util_coalesce(int argc, char **argv)
128 {
129 int err, c;
130 uint64_t i;
131 char *name, *pname;
132 vhd_context_t vhd, parent;
133 int parent_fd = -1;
135 name = NULL;
136 pname = NULL;
137 parent.file = NULL;
139 if (!argc || !argv)
140 goto usage;
142 optind = 0;
143 while ((c = getopt(argc, argv, "n:h")) != -1) {
144 switch (c) {
145 case 'n':
146 name = optarg;
147 break;
148 case 'h':
149 default:
150 goto usage;
151 }
152 }
154 if (!name || optind != argc)
155 goto usage;
157 err = vhd_open(&vhd, name, VHD_OPEN_RDONLY);
158 if (err) {
159 printf("error opening %s: %d\n", name, err);
160 return err;
161 }
163 err = vhd_parent_locator_get(&vhd, &pname);
164 if (err) {
165 printf("error finding %s parent: %d\n", name, err);
166 vhd_close(&vhd);
167 return err;
168 }
170 if (vhd_parent_raw(&vhd)) {
171 parent_fd = open(pname, O_RDWR | O_DIRECT | O_LARGEFILE, 0644);
172 if (parent_fd == -1) {
173 err = -errno;
174 printf("failed to open parent %s: %d\n", pname, err);
175 vhd_close(&vhd);
176 return err;
177 }
178 } else {
179 err = vhd_open(&parent, pname, VHD_OPEN_RDWR);
180 if (err) {
181 printf("error opening %s: %d\n", pname, err);
182 free(pname);
183 vhd_close(&vhd);
184 return err;
185 }
186 }
188 err = vhd_get_bat(&vhd);
189 if (err)
190 goto done;
192 if (vhd_has_batmap(&vhd)) {
193 err = vhd_get_batmap(&vhd);
194 if (err)
195 goto done;
196 }
198 for (i = 0; i < vhd.bat.entries; i++) {
199 err = vhd_util_coalesce_block(&vhd, &parent, parent_fd, i);
200 if (err)
201 goto done;
202 }
204 err = 0;
206 done:
207 free(pname);
208 vhd_close(&vhd);
209 if (parent.file)
210 vhd_close(&parent);
211 else
212 close(parent_fd);
213 return err;
215 usage:
216 printf("options: <-n name> [-h help]\n");
217 return -EINVAL;
218 }