ia64/xen-unstable

view tools/blktap2/include/libvhd.h @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children b7f73a7f3078
line source
1 /* Copyright (c) 2008, XenSource Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of XenSource Inc. nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #ifndef _VHD_LIB_H_
28 #define _VHD_LIB_H_
30 #include <string.h>
31 #include <endian.h>
32 #include <byteswap.h>
33 #include <uuid/uuid.h>
35 #include "vhd.h"
37 #if BYTE_ORDER == LITTLE_ENDIAN
38 #define BE16_IN(foo) (*(foo)) = bswap_16(*(foo))
39 #define BE32_IN(foo) (*(foo)) = bswap_32(*(foo))
40 #define BE64_IN(foo) (*(foo)) = bswap_64(*(foo))
41 #define BE16_OUT(foo) (*(foo)) = bswap_16(*(foo))
42 #define BE32_OUT(foo) (*(foo)) = bswap_32(*(foo))
43 #define BE64_OUT(foo) (*(foo)) = bswap_64(*(foo))
44 #else
45 #define BE16_IN(foo)
46 #define BE32_IN(foo)
47 #define BE64_IN(foo)
48 #define BE32_OUT(foo)
49 #define BE32_OUT(foo)
50 #define BE64_OUT(foo)
51 #endif
53 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
54 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
56 #define VHD_MAX_NAME_LEN 1024
58 #define VHD_BLOCK_SHIFT 21
59 #define VHD_BLOCK_SIZE (1ULL << VHD_BLOCK_SHIFT)
61 #define UTF_16 "UTF-16"
62 #define UTF_16LE "UTF-16LE"
63 #define UTF_16BE "UTF-16BE"
65 #define VHD_OPEN_RDONLY 0x00001
66 #define VHD_OPEN_RDWR 0x00002
67 #define VHD_OPEN_FAST 0x00004
68 #define VHD_OPEN_STRICT 0x00008
69 #define VHD_OPEN_IGNORE_DISABLED 0x00010
71 #define VHD_FLAG_CREAT_PARENT_RAW 0x00001
73 #define vhd_flag_set(word, flag) ((word) |= (flag))
74 #define vhd_flag_clear(word, flag) ((word) &= ~(flag))
75 #define vhd_flag_test(word, flag) ((word) & (flag))
78 #define ENABLE_FAILURE_TESTING
79 #define FAIL_REPARENT_BEGIN 0
80 #define FAIL_REPARENT_LOCATOR 1
81 #define FAIL_REPARENT_END 2
82 #define FAIL_RESIZE_BEGIN 3
83 #define FAIL_RESIZE_DATA_MOVED 4
84 #define FAIL_RESIZE_METADATA_MOVED 5
85 #define FAIL_RESIZE_END 6
86 #define NUM_FAIL_TESTS 7
88 #ifdef ENABLE_FAILURE_TESTING
89 #define TEST_FAIL_AT(point) \
90 if (TEST_FAIL[point]) { \
91 printf("Failing at %s\n", ENV_VAR_FAIL[point]); exit(EINVAL); }
92 #define TEST_FAIL_EXTERN_VARS \
93 extern const char* ENV_VAR_FAIL[]; \
94 extern int TEST_FAIL[];
95 #else
96 #define TEST_FAIL_AT(point)
97 #define TEST_FAIL_EXTERN_VARS
98 #endif // ENABLE_FAILURE_TESTING
101 static const char VHD_POISON_COOKIE[] = "v_poison";
103 typedef struct hd_ftr vhd_footer_t;
104 typedef struct dd_hdr vhd_header_t;
105 typedef struct vhd_bat vhd_bat_t;
106 typedef struct vhd_batmap vhd_batmap_t;
107 typedef struct dd_batmap_hdr vhd_batmap_header_t;
108 typedef struct prt_loc vhd_parent_locator_t;
109 typedef struct vhd_context vhd_context_t;
110 typedef uint32_t vhd_flag_creat_t;
112 struct vhd_bat {
113 uint32_t spb;
114 uint32_t entries;
115 uint32_t *bat;
116 };
118 struct vhd_batmap {
119 vhd_batmap_header_t header;
120 char *map;
121 };
123 struct vhd_context {
124 int fd;
125 char *file;
126 int oflags;
127 int is_block;
129 uint32_t spb;
130 uint32_t bm_secs;
132 vhd_header_t header;
133 vhd_footer_t footer;
134 vhd_bat_t bat;
135 vhd_batmap_t batmap;
136 };
138 static inline uint32_t
139 secs_round_up(uint64_t bytes)
140 {
141 return ((bytes + (VHD_SECTOR_SIZE - 1)) >> VHD_SECTOR_SHIFT);
142 }
144 static inline uint32_t
145 secs_round_up_no_zero(uint64_t bytes)
146 {
147 return (secs_round_up(bytes) ? : 1);
148 }
150 static inline uint64_t
151 vhd_sectors_to_bytes(uint64_t sectors)
152 {
153 return sectors << VHD_SECTOR_SHIFT;
154 }
156 static inline uint64_t
157 vhd_bytes_padded(uint64_t bytes)
158 {
159 return vhd_sectors_to_bytes(secs_round_up_no_zero(bytes));
160 }
162 static inline int
163 vhd_type_dynamic(vhd_context_t *ctx)
164 {
165 return (ctx->footer.type == HD_TYPE_DYNAMIC ||
166 ctx->footer.type == HD_TYPE_DIFF);
167 }
169 static inline int
170 vhd_creator_tapdisk(vhd_context_t *ctx)
171 {
172 return !strncmp(ctx->footer.crtr_app, "tap", 3);
173 }
175 static inline int
176 vhd_disabled(vhd_context_t *ctx)
177 {
178 return (!memcmp(ctx->footer.cookie,
179 VHD_POISON_COOKIE, sizeof(ctx->footer.cookie)));
180 }
182 static inline size_t
183 vhd_parent_locator_size(vhd_parent_locator_t *loc)
184 {
185 /*
186 * MICROSOFT_COMPAT
187 * data_space *should* be in sectors,
188 * but sometimes we find it in bytes
189 */
190 if (loc->data_space < 512)
191 return vhd_sectors_to_bytes(loc->data_space);
192 else if (loc->data_space % 512 == 0)
193 return loc->data_space;
194 else
195 return 0;
196 }
198 static inline int
199 vhd_parent_raw(vhd_context_t *ctx)
200 {
201 return uuid_is_null(ctx->header.prt_uuid);
202 }
204 void libvhd_set_log_level(int);
206 int vhd_test_file_fixed(const char *, int *);
208 uint32_t vhd_time(time_t time);
209 size_t vhd_time_to_string(uint32_t timestamp, char *target);
210 uint32_t vhd_chs(uint64_t size);
212 uint32_t vhd_checksum_footer(vhd_footer_t *);
213 uint32_t vhd_checksum_header(vhd_header_t *);
214 uint32_t vhd_checksum_batmap(vhd_batmap_t *);
216 void vhd_footer_in(vhd_footer_t *);
217 void vhd_footer_out(vhd_footer_t *);
218 void vhd_header_in(vhd_header_t *);
219 void vhd_header_out(vhd_header_t *);
220 void vhd_bat_in(vhd_bat_t *);
221 void vhd_bat_out(vhd_bat_t *);
222 void vhd_batmap_header_in(vhd_batmap_t *);
223 void vhd_batmap_header_out(vhd_batmap_t *);
225 int vhd_validate_footer(vhd_footer_t *footer);
226 int vhd_validate_header(vhd_header_t *header);
227 int vhd_validate_batmap_header(vhd_batmap_t *batmap);
228 int vhd_validate_batmap(vhd_batmap_t *batmap);
229 int vhd_validate_platform_code(uint32_t code);
231 int vhd_open(vhd_context_t *, const char *file, int flags);
232 void vhd_close(vhd_context_t *);
233 int vhd_create(const char *name, uint64_t bytes, int type, vhd_flag_creat_t);
234 /* vhd_snapshot: the bytes parameter is optional and can be 0 if the snapshot
235 * is to have the same size as the (first non-empty) parent */
236 int vhd_snapshot(const char *snapshot, uint64_t bytes, const char *parent,
237 vhd_flag_creat_t);
239 int vhd_hidden(vhd_context_t *, int *);
240 int vhd_chain_depth(vhd_context_t *, int *);
242 off64_t vhd_position(vhd_context_t *);
243 int vhd_seek(vhd_context_t *, off64_t, int);
244 int vhd_read(vhd_context_t *, void *, size_t);
245 int vhd_write(vhd_context_t *, void *, size_t);
247 int vhd_offset(vhd_context_t *, uint32_t, uint32_t *);
249 int vhd_end_of_headers(vhd_context_t *ctx, off64_t *off);
250 int vhd_end_of_data(vhd_context_t *ctx, off64_t *off);
251 int vhd_batmap_header_offset(vhd_context_t *ctx, off64_t *off);
253 int vhd_get_header(vhd_context_t *);
254 int vhd_get_footer(vhd_context_t *);
255 int vhd_get_bat(vhd_context_t *);
256 int vhd_get_batmap(vhd_context_t *);
258 void vhd_put_header(vhd_context_t *);
259 void vhd_put_footer(vhd_context_t *);
260 void vhd_put_bat(vhd_context_t *);
261 void vhd_put_batmap(vhd_context_t *);
263 int vhd_has_batmap(vhd_context_t *);
264 int vhd_batmap_test(vhd_context_t *, vhd_batmap_t *, uint32_t);
265 void vhd_batmap_set(vhd_context_t *, vhd_batmap_t *, uint32_t);
266 void vhd_batmap_clear(vhd_context_t *, vhd_batmap_t *, uint32_t);
268 int vhd_get_phys_size(vhd_context_t *, off64_t *);
269 int vhd_set_phys_size(vhd_context_t *, off64_t);
271 int vhd_bitmap_test(vhd_context_t *, char *, uint32_t);
272 void vhd_bitmap_set(vhd_context_t *, char *, uint32_t);
273 void vhd_bitmap_clear(vhd_context_t *, char *, uint32_t);
275 int vhd_parent_locator_count(vhd_context_t *);
276 int vhd_parent_locator_get(vhd_context_t *, char **);
277 int vhd_parent_locator_read(vhd_context_t *, vhd_parent_locator_t *, char **);
278 int vhd_find_parent(vhd_context_t *, const char *, char **);
279 int vhd_parent_locator_write_at(vhd_context_t *, const char *,
280 off64_t, uint32_t, size_t,
281 vhd_parent_locator_t *);
283 int vhd_header_decode_parent(vhd_context_t *, vhd_header_t *, char **);
284 int vhd_change_parent(vhd_context_t *, char *parent_path, int raw);
286 int vhd_read_footer(vhd_context_t *, vhd_footer_t *);
287 int vhd_read_footer_at(vhd_context_t *, vhd_footer_t *, off64_t);
288 int vhd_read_footer_strict(vhd_context_t *, vhd_footer_t *);
289 int vhd_read_header(vhd_context_t *, vhd_header_t *);
290 int vhd_read_header_at(vhd_context_t *, vhd_header_t *, off64_t);
291 int vhd_read_bat(vhd_context_t *, vhd_bat_t *);
292 int vhd_read_batmap(vhd_context_t *, vhd_batmap_t *);
293 int vhd_read_bitmap(vhd_context_t *, uint32_t block, char **bufp);
294 int vhd_read_block(vhd_context_t *, uint32_t block, char **bufp);
296 int vhd_write_footer(vhd_context_t *, vhd_footer_t *);
297 int vhd_write_footer_at(vhd_context_t *, vhd_footer_t *, off64_t);
298 int vhd_write_header(vhd_context_t *, vhd_header_t *);
299 int vhd_write_header_at(vhd_context_t *, vhd_header_t *, off64_t);
300 int vhd_write_bat(vhd_context_t *, vhd_bat_t *);
301 int vhd_write_batmap(vhd_context_t *, vhd_batmap_t *);
302 int vhd_write_bitmap(vhd_context_t *, uint32_t block, char *bitmap);
303 int vhd_write_block(vhd_context_t *, uint32_t block, char *data);
305 int vhd_io_read(vhd_context_t *, char *, uint64_t, uint32_t);
306 int vhd_io_write(vhd_context_t *, char *, uint64_t, uint32_t);
308 #endif