ia64/xen-unstable

view tools/blktap2/drivers/block-vhd.c @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children b7f73a7f3078
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * A note on write transactions:
29 * Writes that require updating the BAT or bitmaps cannot be signaled
30 * as complete until all updates have reached disk. Transactions are
31 * used to ensure proper ordering in these cases. The two types of
32 * transactions are as follows:
33 * - Bitmap updates only: data writes that require updates to the same
34 * bitmap are grouped in a transaction. Only after all data writes
35 * in a transaction complete does the bitmap write commence. Only
36 * after the bitmap write finishes are the data writes signalled as
37 * complete.
38 * - BAT and bitmap updates: data writes are grouped in transactions
39 * as above, but a special extra write is included in the transaction,
40 * which zeros out the newly allocated bitmap on disk. When the data
41 * writes and the zero-bitmap write complete, the BAT and bitmap writes
42 * are started in parallel. The transaction is completed only after both
43 * the BAT and bitmap writes successfully return.
44 */
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <unistd.h>
51 #include <sys/stat.h>
52 #include <sys/ioctl.h>
53 #include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
54 /* e2fsprogs-devel. */
55 #include <string.h> /* for memset. */
56 #include <libaio.h>
57 #include <sys/mman.h>
59 #include "libvhd.h"
60 #include "tapdisk.h"
61 #include "tapdisk-driver.h"
62 #include "tapdisk-interface.h"
64 unsigned int SPB;
66 #define DEBUGGING 2
67 #define ASSERTING 1
68 #define MICROSOFT_COMPAT
70 #define VHD_BATMAP_MAX_RETRIES 10
72 #define __TRACE(s) \
73 do { \
74 DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %" \
75 PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: " \
76 "%lu, BBLK: 0x%04x\n", \
77 s->vhd.file, s->queued, s->completed, s->returned, \
78 VHD_REQS_DATA - s->vreq_free_count, \
79 s->bat.pbw_blk); \
80 } while(0)
82 #define __ASSERT(_p) \
83 if (!(_p)) { \
84 DPRINTF("%s:%d: FAILED ASSERTION: '%s'\n", \
85 __FILE__, __LINE__, #_p); \
86 DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n", \
87 __FILE__, __LINE__, #_p); \
88 tlog_flush(); \
89 *(int*)0 = 0; \
90 }
92 #if (DEBUGGING == 1)
93 #define DBG(level, _f, _a...) DPRINTF(_f, ##_a)
94 #define ERR(err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
95 #define TRACE(s) ((void)0)
96 #elif (DEBUGGING == 2)
97 #define DBG(level, _f, _a...) tlog_write(level, _f, ##_a)
98 #define ERR(err, _f, _a...) tlog_error(err, _f, ##_a)
99 #define TRACE(s) __TRACE(s)
100 #else
101 #define DBG(level, _f, _a...) ((void)0)
102 #define ERR(err, _f, _a...) ((void)0)
103 #define TRACE(s) ((void)0)
104 #endif
106 #if (ASSERTING == 1)
107 #define ASSERT(_p) __ASSERT(_p)
108 #else
109 #define ASSERT(_p) ((void)0)
110 #endif
112 /******VHD DEFINES******/
113 #define VHD_CACHE_SIZE 32
115 #define VHD_REQS_DATA TAPDISK_DATA_REQUESTS
116 #define VHD_REQS_META (VHD_CACHE_SIZE + 2)
117 #define VHD_REQS_TOTAL (VHD_REQS_DATA + VHD_REQS_META)
119 #define VHD_OP_BAT_WRITE 0
120 #define VHD_OP_DATA_READ 1
121 #define VHD_OP_DATA_WRITE 2
122 #define VHD_OP_BITMAP_READ 3
123 #define VHD_OP_BITMAP_WRITE 4
124 #define VHD_OP_ZERO_BM_WRITE 5
126 #define VHD_BM_BAT_LOCKED 0
127 #define VHD_BM_BAT_CLEAR 1
128 #define VHD_BM_BIT_CLEAR 2
129 #define VHD_BM_BIT_SET 3
130 #define VHD_BM_NOT_CACHED 4
131 #define VHD_BM_READ_PENDING 5
133 #define VHD_FLAG_OPEN_RDONLY 1
134 #define VHD_FLAG_OPEN_NO_CACHE 2
135 #define VHD_FLAG_OPEN_QUIET 4
136 #define VHD_FLAG_OPEN_STRICT 8
137 #define VHD_FLAG_OPEN_QUERY 16
138 #define VHD_FLAG_OPEN_PREALLOCATE 32
140 #define VHD_FLAG_BAT_LOCKED 1
141 #define VHD_FLAG_BAT_WRITE_STARTED 2
143 #define VHD_FLAG_BM_UPDATE_BAT 1
144 #define VHD_FLAG_BM_WRITE_PENDING 2
145 #define VHD_FLAG_BM_READ_PENDING 4
146 #define VHD_FLAG_BM_LOCKED 8
148 #define VHD_FLAG_REQ_UPDATE_BAT 1
149 #define VHD_FLAG_REQ_UPDATE_BITMAP 2
150 #define VHD_FLAG_REQ_QUEUED 4
151 #define VHD_FLAG_REQ_FINISHED 8
153 #define VHD_FLAG_TX_LIVE 1
154 #define VHD_FLAG_TX_UPDATE_BAT 2
156 typedef uint8_t vhd_flag_t;
158 struct vhd_state;
159 struct vhd_request;
161 struct vhd_req_list {
162 struct vhd_request *head;
163 struct vhd_request *tail;
164 };
166 struct vhd_transaction {
167 int error;
168 int closed;
169 int started;
170 int finished;
171 vhd_flag_t status;
172 struct vhd_req_list requests;
173 };
175 struct vhd_request {
176 int error;
177 uint8_t op;
178 vhd_flag_t flags;
179 td_request_t treq;
180 struct tiocb tiocb;
181 struct vhd_state *state;
182 struct vhd_request *next;
183 struct vhd_transaction *tx;
184 };
186 struct vhd_bat_state {
187 vhd_bat_t bat;
188 vhd_batmap_t batmap;
189 vhd_flag_t status;
190 uint32_t pbw_blk; /* blk num of pending write */
191 uint64_t pbw_offset; /* file offset of same */
192 struct vhd_request req; /* for writing bat table */
193 struct vhd_request zero_req; /* for initializing bitmaps */
194 char *bat_buf;
195 };
197 struct vhd_bitmap {
198 u32 blk;
199 u64 seqno; /* lru sequence number */
200 vhd_flag_t status;
202 char *map; /* map should only be modified
203 * in finish_bitmap_write */
204 char *shadow; /* in-memory bitmap changes are
205 * made to shadow and copied to
206 * map only after having been
207 * flushed to disk */
208 struct vhd_transaction tx; /* transaction data structure
209 * encapsulating data, bitmap,
210 * and bat writes */
211 struct vhd_req_list queue; /* data writes waiting for next
212 * transaction */
213 struct vhd_req_list waiting; /* pending requests that cannot
214 * be serviced until this bitmap
215 * is read from disk */
216 struct vhd_request req;
217 };
219 struct vhd_state {
220 vhd_flag_t flags;
222 /* VHD stuff */
223 vhd_context_t vhd;
224 u32 spp; /* sectors per page */
225 u32 spb; /* sectors per block */
226 u64 next_db; /* pointer to the next
227 * (unallocated) datablock */
229 struct vhd_bat_state bat;
231 u64 bm_lru; /* lru sequence number */
232 u32 bm_secs; /* size of bitmap, in sectors */
233 struct vhd_bitmap *bitmap[VHD_CACHE_SIZE];
235 int bm_free_count;
236 struct vhd_bitmap *bitmap_free[VHD_CACHE_SIZE];
237 struct vhd_bitmap bitmap_list[VHD_CACHE_SIZE];
239 int vreq_free_count;
240 struct vhd_request *vreq_free[VHD_REQS_DATA];
241 struct vhd_request vreq_list[VHD_REQS_DATA];
243 td_driver_t *driver;
245 uint64_t queued;
246 uint64_t completed;
247 uint64_t returned;
248 uint64_t reads;
249 uint64_t read_size;
250 uint64_t writes;
251 uint64_t write_size;
252 };
254 #define test_vhd_flag(word, flag) ((word) & (flag))
255 #define set_vhd_flag(word, flag) ((word) |= (flag))
256 #define clear_vhd_flag(word, flag) ((word) &= ~(flag))
258 #define bat_entry(s, blk) ((s)->bat.bat.bat[(blk)])
260 static void vhd_complete(void *, struct tiocb *, int);
261 static void finish_data_transaction(struct vhd_state *, struct vhd_bitmap *);
263 static struct vhd_state *_vhd_master;
264 static unsigned long _vhd_zsize;
265 static char *_vhd_zeros;
267 static int
268 vhd_initialize(struct vhd_state *s)
269 {
270 if (_vhd_zeros)
271 return 0;
273 _vhd_zsize = 2 * getpagesize();
274 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
275 _vhd_zsize += VHD_BLOCK_SIZE;
277 _vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
278 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
279 if (_vhd_zeros == MAP_FAILED) {
280 EPRINTF("vhd_initialize failed: %d\n", -errno);
281 _vhd_zeros = NULL;
282 _vhd_zsize = 0;
283 return -errno;
284 }
286 _vhd_master = s;
287 return 0;
288 }
290 static void
291 vhd_free(struct vhd_state *s)
292 {
293 if (_vhd_master != s || !_vhd_zeros)
294 return;
296 munmap(_vhd_zeros, _vhd_zsize);
297 _vhd_zsize = 0;
298 _vhd_zeros = NULL;
299 _vhd_master = NULL;
300 }
302 static char *
303 _get_vhd_zeros(const char *func, unsigned long size)
304 {
305 if (!_vhd_zeros || _vhd_zsize < size) {
306 EPRINTF("invalid zero request from %s: %lu, %lu, %p\n",
307 func, size, _vhd_zsize, _vhd_zeros);
308 ASSERT(0);
309 }
311 return _vhd_zeros;
312 }
314 #define vhd_zeros(size) _get_vhd_zeros(__func__, size)
316 static inline void
317 set_batmap(struct vhd_state *s, uint32_t blk)
318 {
319 if (s->bat.batmap.map) {
320 vhd_batmap_set(&s->vhd, &s->bat.batmap, blk);
321 DBG(TLOG_DBG, "block 0x%x completely full\n", blk);
322 }
323 }
325 static inline int
326 test_batmap(struct vhd_state *s, uint32_t blk)
327 {
328 if (!s->bat.batmap.map)
329 return 0;
330 return vhd_batmap_test(&s->vhd, &s->bat.batmap, blk);
331 }
333 static int
334 vhd_kill_footer(struct vhd_state *s)
335 {
336 int err;
337 off64_t end;
338 char *zeros;
340 if (s->vhd.footer.type == HD_TYPE_FIXED)
341 return 0;
343 err = posix_memalign((void **)&zeros, 512, 512);
344 if (err)
345 return -err;
347 err = 1;
348 memset(zeros, 0xc7c7c7c7, 512);
350 if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
351 goto fail;
353 if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
354 goto fail;
356 if (write(s->vhd.fd, zeros, 512) != 512)
357 goto fail;
359 err = 0;
361 fail:
362 free(zeros);
363 if (err)
364 return (errno ? -errno : -EIO);
365 return 0;
366 }
368 static inline int
369 find_next_free_block(struct vhd_state *s)
370 {
371 int err;
372 off64_t eom;
373 uint32_t i, entry;
375 err = vhd_end_of_headers(&s->vhd, &eom);
376 if (err)
377 return err;
379 s->next_db = secs_round_up(eom);
381 for (i = 0; i < s->bat.bat.entries; i++) {
382 entry = bat_entry(s, i);
383 if (entry != DD_BLK_UNUSED && entry >= s->next_db)
384 s->next_db = entry + s->spb + s->bm_secs;
385 }
387 return 0;
388 }
390 static void
391 vhd_free_bat(struct vhd_state *s)
392 {
393 free(s->bat.bat.bat);
394 free(s->bat.batmap.map);
395 free(s->bat.bat_buf);
396 memset(&s->bat, 0, sizeof(struct vhd_bat));
397 }
399 static int
400 vhd_initialize_bat(struct vhd_state *s)
401 {
402 int err, psize, batmap_required, i;
404 memset(&s->bat, 0, sizeof(struct vhd_bat));
406 psize = getpagesize();
408 err = vhd_read_bat(&s->vhd, &s->bat.bat);
409 if (err) {
410 EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
411 return err;
412 }
414 batmap_required = 1;
415 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY)) {
416 batmap_required = 0;
417 } else {
418 err = find_next_free_block(s);
419 if (err)
420 goto fail;
421 }
423 if (vhd_has_batmap(&s->vhd)) {
424 for (i = 0; i < VHD_BATMAP_MAX_RETRIES; i++) {
425 err = vhd_read_batmap(&s->vhd, &s->bat.batmap);
426 if (err) {
427 EPRINTF("%s: reading batmap: %d\n",
428 s->vhd.file, err);
429 if (batmap_required)
430 goto fail;
431 } else {
432 break;
433 }
434 }
435 if (err)
436 EPRINTF("%s: ignoring non-critical batmap error\n",
437 s->vhd.file);
438 }
440 err = posix_memalign((void **)&s->bat.bat_buf,
441 VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
442 if (err) {
443 s->bat.bat_buf = NULL;
444 goto fail;
445 }
447 return 0;
449 fail:
450 vhd_free_bat(s);
451 return err;
452 }
454 static void
455 vhd_free_bitmap_cache(struct vhd_state *s)
456 {
457 int i;
458 struct vhd_bitmap *bm;
460 for (i = 0; i < VHD_CACHE_SIZE; i++) {
461 bm = s->bitmap_list + i;
462 free(bm->map);
463 free(bm->shadow);
464 s->bitmap_free[i] = NULL;
465 }
467 memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
468 }
470 static int
471 vhd_initialize_bitmap_cache(struct vhd_state *s)
472 {
473 int i, err, map_size;
474 struct vhd_bitmap *bm;
476 memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
478 s->bm_lru = 0;
479 map_size = vhd_sectors_to_bytes(s->bm_secs);
480 s->bm_free_count = VHD_CACHE_SIZE;
482 for (i = 0; i < VHD_CACHE_SIZE; i++) {
483 bm = s->bitmap_list + i;
485 err = posix_memalign((void **)&bm->map, 512, map_size);
486 if (err) {
487 bm->map = NULL;
488 goto fail;
489 }
491 err = posix_memalign((void **)&bm->shadow, 512, map_size);
492 if (err) {
493 bm->shadow = NULL;
494 goto fail;
495 }
497 memset(bm->map, 0, map_size);
498 memset(bm->shadow, 0, map_size);
499 s->bitmap_free[i] = bm;
500 }
502 return 0;
504 fail:
505 vhd_free_bitmap_cache(s);
506 return err;
507 }
509 static int
510 vhd_initialize_dynamic_disk(struct vhd_state *s)
511 {
512 int err;
514 err = vhd_get_header(&s->vhd);
515 if (err) {
516 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
517 EPRINTF("Error reading VHD DD header.\n");
518 return err;
519 }
521 if (s->vhd.header.hdr_ver != 0x00010000) {
522 EPRINTF("unsupported header version! (0x%x)\n",
523 s->vhd.header.hdr_ver);
524 return -EINVAL;
525 }
527 s->spp = getpagesize() >> VHD_SECTOR_SHIFT;
528 s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
529 s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
531 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
532 return 0;
534 err = vhd_initialize_bat(s);
535 if (err)
536 return err;
538 err = vhd_initialize_bitmap_cache(s);
539 if (err) {
540 vhd_free_bat(s);
541 return err;
542 }
544 return 0;
545 }
547 static int
548 vhd_check_version(struct vhd_state *s)
549 {
550 if (strncmp(s->vhd.footer.crtr_app, "tap", 3))
551 return 0;
553 if (s->vhd.footer.crtr_ver > VHD_CURRENT_VERSION) {
554 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
555 EPRINTF("WARNING: %s vhd creator version 0x%08x, "
556 "but only versions up to 0x%08x are "
557 "supported for IO\n", s->vhd.file,
558 s->vhd.footer.crtr_ver, VHD_CURRENT_VERSION);
560 return -EINVAL;
561 }
563 return 0;
564 }
566 static void
567 vhd_log_open(struct vhd_state *s)
568 {
569 char buf[5];
570 uint32_t i, allocated, full;
572 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
573 return;
575 snprintf(buf, sizeof(buf), "%s", s->vhd.footer.crtr_app);
576 if (!vhd_type_dynamic(&s->vhd)) {
577 DPRINTF("%s version: %s 0x%08x\n",
578 s->vhd.file, buf, s->vhd.footer.crtr_ver);
579 return;
580 }
582 allocated = 0;
583 full = 0;
585 for (i = 0; i < s->bat.bat.entries; i++) {
586 if (bat_entry(s, i) != DD_BLK_UNUSED)
587 allocated++;
588 if (test_batmap(s, i))
589 full++;
590 }
592 DPRINTF("%s version: %s 0x%08x, b: %u, a: %u, f: %u, n: %"PRIu64"\n",
593 s->vhd.file, buf, s->vhd.footer.crtr_ver, s->bat.bat.entries,
594 allocated, full, s->next_db);
595 }
597 static int
598 __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags)
599 {
600 int i, o_flags, err;
601 struct vhd_state *s;
603 DBG(TLOG_INFO, "vhd_open: %s\n", name);
604 if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
605 libvhd_set_log_level(1);
607 s = (struct vhd_state *)driver->data;
608 memset(s, 0, sizeof(struct vhd_state));
610 s->flags = flags;
611 s->driver = driver;
613 err = vhd_initialize(s);
614 if (err)
615 return err;
617 o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ?
618 VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
620 err = vhd_open(&s->vhd, name, o_flags);
621 if (err) {
622 libvhd_set_log_level(1);
623 err = vhd_open(&s->vhd, name, o_flags);
624 if (err) {
625 EPRINTF("Unable to open [%s] (%d)!\n", name, err);
626 return err;
627 }
628 }
630 err = vhd_check_version(s);
631 if (err)
632 goto fail;
634 s->spb = s->spp = 1;
636 if (vhd_type_dynamic(&s->vhd)) {
637 err = vhd_initialize_dynamic_disk(s);
638 if (err)
639 goto fail;
640 }
642 vhd_log_open(s);
644 SPB = s->spb;
646 s->vreq_free_count = VHD_REQS_DATA;
647 for (i = 0; i < VHD_REQS_DATA; i++)
648 s->vreq_free[i] = s->vreq_list + i;
650 driver->info.size = s->vhd.footer.curr_size >> VHD_SECTOR_SHIFT;
651 driver->info.sector_size = VHD_SECTOR_SIZE;
652 driver->info.info = 0;
654 DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
655 driver->info.size, driver->info.sector_size, driver->info.info);
657 if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) &&
658 !test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) {
659 err = vhd_kill_footer(s);
660 if (err) {
661 DPRINTF("ERROR killing footer: %d\n", err);
662 goto fail;
663 }
664 s->writes++;
665 }
667 return 0;
669 fail:
670 vhd_free_bat(s);
671 vhd_free_bitmap_cache(s);
672 vhd_close(&s->vhd);
673 vhd_free(s);
674 return err;
675 }
677 static int
678 _vhd_open(td_driver_t *driver, const char *name, td_flag_t flags)
679 {
680 vhd_flag_t vhd_flags = 0;
682 if (flags & TD_OPEN_RDONLY)
683 vhd_flags |= VHD_FLAG_OPEN_RDONLY;
684 if (flags & TD_OPEN_QUIET)
685 vhd_flags |= VHD_FLAG_OPEN_QUIET;
686 if (flags & TD_OPEN_STRICT)
687 vhd_flags |= VHD_FLAG_OPEN_STRICT;
688 if (flags & TD_OPEN_QUERY)
689 vhd_flags |= (VHD_FLAG_OPEN_QUERY |
690 VHD_FLAG_OPEN_QUIET |
691 VHD_FLAG_OPEN_RDONLY |
692 VHD_FLAG_OPEN_NO_CACHE);
694 /* pre-allocate for all but NFS and LVM storage */
695 if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
696 driver->storage != TAPDISK_STORAGE_TYPE_LVM)
697 vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
699 return __vhd_open(driver, name, vhd_flags);
700 }
702 static void
703 vhd_log_close(struct vhd_state *s)
704 {
705 uint32_t i, allocated, full;
707 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
708 return;
710 allocated = 0;
711 full = 0;
713 for (i = 0; i < s->bat.bat.entries; i++) {
714 if (bat_entry(s, i) != DD_BLK_UNUSED)
715 allocated++;
716 if (test_batmap(s, i))
717 full++;
718 }
720 DPRINTF("%s: b: %u, a: %u, f: %u, n: %"PRIu64"\n",
721 s->vhd.file, s->bat.bat.entries, allocated, full, s->next_db);
722 }
724 static int
725 _vhd_close(td_driver_t *driver)
726 {
727 int err;
728 struct vhd_state *s;
729 struct vhd_bitmap *bm;
731 DBG(TLOG_WARN, "vhd_close\n");
732 s = (struct vhd_state *)driver->data;
734 /* don't write footer if tapdisk is read-only */
735 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
736 goto free;
738 /*
739 * write footer if:
740 * - we killed it on open (opened with strict)
741 * - we've written data since opening
742 */
743 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_STRICT) || s->writes) {
744 memcpy(&s->vhd.bat, &s->bat.bat, sizeof(vhd_bat_t));
745 err = vhd_write_footer(&s->vhd, &s->vhd.footer);
746 memset(&s->vhd.bat, 0, sizeof(vhd_bat_t));
748 if (err)
749 EPRINTF("writing %s footer: %d\n", s->vhd.file, err);
751 if (!vhd_has_batmap(&s->vhd))
752 goto free;
754 err = vhd_write_batmap(&s->vhd, &s->bat.batmap);
755 if (err)
756 EPRINTF("writing %s batmap: %d\n", s->vhd.file, err);
757 }
759 free:
760 vhd_log_close(s);
761 vhd_free_bat(s);
762 vhd_free_bitmap_cache(s);
763 vhd_close(&s->vhd);
764 vhd_free(s);
766 memset(s, 0, sizeof(struct vhd_state));
768 return 0;
769 }
771 int
772 vhd_validate_parent(td_driver_t *child_driver,
773 td_driver_t *parent_driver, td_flag_t flags)
774 {
775 struct stat stats;
776 struct vhd_state *child = (struct vhd_state *)child_driver->data;
777 struct vhd_state *parent;
779 if (parent_driver->type != DISK_TYPE_VHD) {
780 if (child_driver->type != DISK_TYPE_VHD)
781 return -EINVAL;
782 if (child->vhd.footer.type != HD_TYPE_DIFF)
783 return -EINVAL;
784 if (!vhd_parent_raw(&child->vhd))
785 return -EINVAL;
786 return 0;
787 }
789 parent = (struct vhd_state *)parent_driver->data;
791 /*
792 * This check removed because of cases like:
793 * - parent VHD marked as 'hidden'
794 * - parent VHD modified during coalesce
795 */
796 /*
797 if (stat(parent->vhd.file, &stats)) {
798 DPRINTF("ERROR stating parent file %s\n", parent->vhd.file);
799 return -errno;
800 }
802 if (child->hdr.prt_ts != vhd_time(stats.st_mtime)) {
803 DPRINTF("ERROR: parent file has been modified since "
804 "snapshot. Child image no longer valid.\n");
805 return -EINVAL;
806 }
807 */
809 if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
810 DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
811 "snapshot. Child image no longer valid.\n",
812 __func__, child->vhd.file, parent->vhd.file);
813 return -EINVAL;
814 }
816 /* TODO: compare sizes */
818 return 0;
819 }
821 int
822 vhd_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
823 {
824 int err;
825 char *parent;
826 struct vhd_state *s;
828 DBG(TLOG_DBG, "\n");
829 memset(id, 0, sizeof(td_disk_id_t));
831 s = (struct vhd_state *)driver->data;
833 if (s->vhd.footer.type != HD_TYPE_DIFF)
834 return TD_NO_PARENT;
836 err = vhd_parent_locator_get(&s->vhd, &parent);
837 if (err)
838 return err;
840 id->name = parent;
841 id->drivertype = DISK_TYPE_VHD;
842 if (vhd_parent_raw(&s->vhd)) {
843 DPRINTF("VHD: parent is raw\n");
844 id->drivertype = DISK_TYPE_AIO;
845 }
846 return 0;
847 }
849 static inline void
850 clear_req_list(struct vhd_req_list *list)
851 {
852 list->head = list->tail = NULL;
853 }
855 static inline void
856 add_to_tail(struct vhd_req_list *list, struct vhd_request *e)
857 {
858 if (!list->head)
859 list->head = list->tail = e;
860 else
861 list->tail = list->tail->next = e;
862 }
864 static inline int
865 remove_from_req_list(struct vhd_req_list *list, struct vhd_request *e)
866 {
867 struct vhd_request *i = list->head;
869 if (list->head == e) {
870 if (list->tail == e)
871 clear_req_list(list);
872 else
873 list->head = list->head->next;
874 return 0;
875 }
877 while (i->next) {
878 if (i->next == e) {
879 if (list->tail == e) {
880 i->next = NULL;
881 list->tail = i;
882 } else
883 i->next = i->next->next;
884 return 0;
885 }
886 i = i->next;
887 }
889 return -EINVAL;
890 }
892 static inline void
893 init_vhd_request(struct vhd_state *s, struct vhd_request *req)
894 {
895 memset(req, 0, sizeof(struct vhd_request));
896 req->state = s;
897 }
899 static inline void
900 init_tx(struct vhd_transaction *tx)
901 {
902 memset(tx, 0, sizeof(struct vhd_transaction));
903 }
905 static inline void
906 add_to_transaction(struct vhd_transaction *tx, struct vhd_request *r)
907 {
908 ASSERT(!tx->closed);
910 r->tx = tx;
911 tx->started++;
912 add_to_tail(&tx->requests, r);
913 set_vhd_flag(tx->status, VHD_FLAG_TX_LIVE);
915 DBG(TLOG_DBG, "blk: 0x%04"PRIx64", lsec: 0x%08"PRIx64", tx: %p, "
916 "started: %d, finished: %d, status: %u\n",
917 r->treq.sec / SPB, r->treq.sec, tx,
918 tx->started, tx->finished, tx->status);
919 }
921 static inline int
922 transaction_completed(struct vhd_transaction *tx)
923 {
924 return (tx->started == tx->finished);
925 }
927 static inline void
928 init_bat(struct vhd_state *s)
929 {
930 s->bat.req.tx = NULL;
931 s->bat.req.next = NULL;
932 s->bat.req.error = 0;
933 s->bat.pbw_blk = 0;
934 s->bat.pbw_offset = 0;
935 s->bat.status = 0;
936 }
938 static inline void
939 lock_bat(struct vhd_state *s)
940 {
941 set_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
942 }
944 static inline void
945 unlock_bat(struct vhd_state *s)
946 {
947 clear_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
948 }
950 static inline int
951 bat_locked(struct vhd_state *s)
952 {
953 return test_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
954 }
956 static inline void
957 init_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
958 {
959 bm->blk = 0;
960 bm->seqno = 0;
961 bm->status = 0;
962 init_tx(&bm->tx);
963 clear_req_list(&bm->queue);
964 clear_req_list(&bm->waiting);
965 memset(bm->map, 0, vhd_sectors_to_bytes(s->bm_secs));
966 memset(bm->shadow, 0, vhd_sectors_to_bytes(s->bm_secs));
967 init_vhd_request(s, &bm->req);
968 }
970 static inline struct vhd_bitmap *
971 get_bitmap(struct vhd_state *s, uint32_t block)
972 {
973 int i;
974 struct vhd_bitmap *bm;
976 for (i = 0; i < VHD_CACHE_SIZE; i++) {
977 bm = s->bitmap[i];
978 if (bm && bm->blk == block)
979 return bm;
980 }
982 return NULL;
983 }
985 static inline void
986 lock_bitmap(struct vhd_bitmap *bm)
987 {
988 set_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
989 }
991 static inline void
992 unlock_bitmap(struct vhd_bitmap *bm)
993 {
994 clear_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
995 }
997 static inline int
998 bitmap_locked(struct vhd_bitmap *bm)
999 {
1000 return test_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
1003 static inline int
1004 bitmap_valid(struct vhd_bitmap *bm)
1006 return !test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
1009 static inline int
1010 bitmap_in_use(struct vhd_bitmap *bm)
1012 return (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING) ||
1013 test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING) ||
1014 test_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT) ||
1015 bm->waiting.head || bm->tx.requests.head || bm->queue.head);
1018 static inline int
1019 bitmap_full(struct vhd_state *s, struct vhd_bitmap *bm)
1021 int i, n;
1023 n = s->spb >> 3;
1024 for (i = 0; i < n; i++)
1025 if (bm->map[i] != (char)0xFF)
1026 return 0;
1028 DBG(TLOG_DBG, "bitmap 0x%04x full\n", bm->blk);
1029 return 1;
1032 static struct vhd_bitmap *
1033 remove_lru_bitmap(struct vhd_state *s)
1035 int i, idx = 0;
1036 u64 seq = s->bm_lru;
1037 struct vhd_bitmap *bm, *lru = NULL;
1039 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1040 bm = s->bitmap[i];
1041 if (bm && bm->seqno < seq && !bitmap_locked(bm)) {
1042 idx = i;
1043 lru = bm;
1044 seq = lru->seqno;
1048 if (lru) {
1049 s->bitmap[idx] = NULL;
1050 ASSERT(!bitmap_in_use(lru));
1053 return lru;
1056 static int
1057 alloc_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap **bitmap, uint32_t blk)
1059 struct vhd_bitmap *bm;
1061 *bitmap = NULL;
1063 if (s->bm_free_count > 0) {
1064 bm = s->bitmap_free[--s->bm_free_count];
1065 } else {
1066 bm = remove_lru_bitmap(s);
1067 if (!bm)
1068 return -EBUSY;
1071 init_vhd_bitmap(s, bm);
1072 bm->blk = blk;
1073 *bitmap = bm;
1075 return 0;
1078 static inline uint64_t
1079 __bitmap_lru_seqno(struct vhd_state *s)
1081 int i;
1082 struct vhd_bitmap *bm;
1084 if (s->bm_lru == 0xffffffff) {
1085 s->bm_lru = 0;
1086 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1087 bm = s->bitmap[i];
1088 if (bm) {
1089 bm->seqno >>= 1;
1090 if (bm->seqno > s->bm_lru)
1091 s->bm_lru = bm->seqno;
1096 return ++s->bm_lru;
1099 static inline void
1100 touch_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1102 bm->seqno = __bitmap_lru_seqno(s);
1105 static inline void
1106 install_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1108 int i;
1109 for (i = 0; i < VHD_CACHE_SIZE; i++) {
1110 if (!s->bitmap[i]) {
1111 touch_bitmap(s, bm);
1112 s->bitmap[i] = bm;
1113 return;
1117 ASSERT(0);
1120 static inline void
1121 free_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
1123 int i;
1125 for (i = 0; i < VHD_CACHE_SIZE; i++)
1126 if (s->bitmap[i] == bm)
1127 break;
1129 ASSERT(!bitmap_locked(bm));
1130 ASSERT(!bitmap_in_use(bm));
1131 ASSERT(i < VHD_CACHE_SIZE);
1133 s->bitmap[i] = NULL;
1134 s->bitmap_free[s->bm_free_count++] = bm;
1137 static int
1138 read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
1140 u32 blk, sec;
1141 struct vhd_bitmap *bm;
1143 /* in fixed disks, every block is present */
1144 if (s->vhd.footer.type == HD_TYPE_FIXED)
1145 return VHD_BM_BIT_SET;
1147 blk = sector / s->spb;
1148 sec = sector % s->spb;
1150 if (blk > s->vhd.header.max_bat_size) {
1151 DPRINTF("ERROR: sec %"PRIu64" out of range, op = %d\n",
1152 sector, op);
1153 return -EINVAL;
1156 if (bat_entry(s, blk) == DD_BLK_UNUSED) {
1157 if (op == VHD_OP_DATA_WRITE &&
1158 s->bat.pbw_blk != blk && bat_locked(s))
1159 return VHD_BM_BAT_LOCKED;
1161 return VHD_BM_BAT_CLEAR;
1164 if (test_batmap(s, blk)) {
1165 DBG(TLOG_DBG, "batmap set for 0x%04x\n", blk);
1166 return VHD_BM_BIT_SET;
1169 bm = get_bitmap(s, blk);
1170 if (!bm)
1171 return VHD_BM_NOT_CACHED;
1173 /* bump lru count */
1174 touch_bitmap(s, bm);
1176 if (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING))
1177 return VHD_BM_READ_PENDING;
1179 return ((vhd_bitmap_test(&s->vhd, bm->map, sec)) ?
1180 VHD_BM_BIT_SET : VHD_BM_BIT_CLEAR);
1183 static int
1184 read_bitmap_cache_span(struct vhd_state *s,
1185 uint64_t sector, int nr_secs, int value)
1187 int ret;
1188 u32 blk, sec;
1189 struct vhd_bitmap *bm;
1191 /* in fixed disks, every block is present */
1192 if (s->vhd.footer.type == HD_TYPE_FIXED)
1193 return nr_secs;
1195 sec = sector % s->spb;
1196 blk = sector / s->spb;
1198 if (test_batmap(s, blk))
1199 return MIN(nr_secs, s->spb - sec);
1201 bm = get_bitmap(s, blk);
1203 ASSERT(bm && bitmap_valid(bm));
1205 for (ret = 0; sec < s->spb && ret < nr_secs; sec++, ret++)
1206 if (vhd_bitmap_test(&s->vhd, bm->map, sec) != value)
1207 break;
1209 return ret;
1212 static inline struct vhd_request *
1213 alloc_vhd_request(struct vhd_state *s)
1215 struct vhd_request *req = NULL;
1217 if (s->vreq_free_count > 0) {
1218 req = s->vreq_free[--s->vreq_free_count];
1219 ASSERT(req->treq.secs == 0);
1220 init_vhd_request(s, req);
1221 return req;
1224 return NULL;
1227 static inline void
1228 free_vhd_request(struct vhd_state *s, struct vhd_request *req)
1230 memset(req, 0, sizeof(struct vhd_request));
1231 s->vreq_free[s->vreq_free_count++] = req;
1234 static inline void
1235 aio_read(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
1237 struct tiocb *tiocb = &req->tiocb;
1239 td_prep_read(tiocb, s->vhd.fd, req->treq.buf,
1240 vhd_sectors_to_bytes(req->treq.secs),
1241 offset, vhd_complete, req);
1242 td_queue_tiocb(s->driver, tiocb);
1244 s->queued++;
1245 s->reads++;
1246 s->read_size += req->treq.secs;
1247 TRACE(s);
1250 static inline void
1251 aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
1253 struct tiocb *tiocb = &req->tiocb;
1255 td_prep_write(tiocb, s->vhd.fd, req->treq.buf,
1256 vhd_sectors_to_bytes(req->treq.secs),
1257 offset, vhd_complete, req);
1258 td_queue_tiocb(s->driver, tiocb);
1260 s->queued++;
1261 s->writes++;
1262 s->write_size += req->treq.secs;
1263 TRACE(s);
1266 static inline uint64_t
1267 reserve_new_block(struct vhd_state *s, uint32_t blk)
1269 int gap = 0;
1271 ASSERT(!test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED));
1273 /* data region of segment should begin on page boundary */
1274 if ((s->next_db + s->bm_secs) % s->spp)
1275 gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
1277 s->bat.pbw_blk = blk;
1278 s->bat.pbw_offset = s->next_db + gap;
1280 return s->next_db;
1283 static int
1284 schedule_bat_write(struct vhd_state *s)
1286 int i;
1287 u32 blk;
1288 char *buf;
1289 u64 offset;
1290 struct vhd_request *req;
1292 ASSERT(bat_locked(s));
1294 req = &s->bat.req;
1295 buf = s->bat.bat_buf;
1296 blk = s->bat.pbw_blk;
1298 init_vhd_request(s, req);
1299 memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
1301 ((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
1303 for (i = 0; i < 128; i++)
1304 BE32_OUT(&((u32 *)buf)[i]);
1306 offset = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
1307 req->treq.secs = 1;
1308 req->treq.buf = buf;
1309 req->op = VHD_OP_BAT_WRITE;
1310 req->next = NULL;
1312 aio_write(s, req, offset);
1313 set_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED);
1315 DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64", "
1316 "table_offset: 0x%08"PRIx64"\n", blk, s->bat.pbw_offset, offset);
1318 return 0;
1321 static void
1322 schedule_zero_bm_write(struct vhd_state *s,
1323 struct vhd_bitmap *bm, uint64_t lb_end)
1325 uint64_t offset;
1326 struct vhd_request *req = &s->bat.zero_req;
1328 init_vhd_request(s, req);
1330 offset = vhd_sectors_to_bytes(lb_end);
1331 req->op = VHD_OP_ZERO_BM_WRITE;
1332 req->treq.sec = s->bat.pbw_blk * s->spb;
1333 req->treq.secs = (s->bat.pbw_offset - lb_end) + s->bm_secs;
1334 req->treq.buf = vhd_zeros(vhd_sectors_to_bytes(req->treq.secs));
1335 req->next = NULL;
1337 DBG(TLOG_DBG, "blk: 0x%04x, writing zero bitmap at 0x%08"PRIx64"\n",
1338 s->bat.pbw_blk, offset);
1340 lock_bitmap(bm);
1341 add_to_transaction(&bm->tx, req);
1342 aio_write(s, req, offset);
1345 static int
1346 update_bat(struct vhd_state *s, uint32_t blk)
1348 int err;
1349 uint64_t lb_end;
1350 struct vhd_bitmap *bm;
1352 ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
1354 if (bat_locked(s)) {
1355 ASSERT(s->bat.pbw_blk == blk);
1356 return 0;
1359 /* empty bitmap could already be in
1360 * cache if earlier bat update failed */
1361 bm = get_bitmap(s, blk);
1362 if (!bm) {
1363 /* install empty bitmap in cache */
1364 err = alloc_vhd_bitmap(s, &bm, blk);
1365 if (err)
1366 return err;
1368 install_bitmap(s, bm);
1371 lock_bat(s);
1372 lb_end = reserve_new_block(s, blk);
1373 schedule_zero_bm_write(s, bm, lb_end);
1374 set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT);
1376 return 0;
1379 static int
1380 allocate_block(struct vhd_state *s, uint32_t blk)
1382 char *zeros;
1383 int err, gap;
1384 uint64_t offset, size;
1385 struct vhd_bitmap *bm;
1387 ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
1389 if (bat_locked(s)) {
1390 ASSERT(s->bat.pbw_blk == blk);
1391 if (s->bat.req.error)
1392 return -EBUSY;
1393 return 0;
1396 gap = 0;
1397 s->bat.pbw_blk = blk;
1398 offset = vhd_sectors_to_bytes(s->next_db);
1400 /* data region of segment should begin on page boundary */
1401 if ((s->next_db + s->bm_secs) % s->spp) {
1402 gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
1403 s->next_db += gap;
1406 s->bat.pbw_offset = s->next_db;
1408 DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64"\n",
1409 blk, s->bat.pbw_offset);
1411 if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
1412 ERR(errno, "lseek failed\n");
1413 return -errno;
1416 size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
1417 err = write(s->vhd.fd, vhd_zeros(size), size);
1418 if (err != size) {
1419 err = (err == -1 ? -errno : -EIO);
1420 ERR(err, "write failed");
1421 return err;
1424 /* empty bitmap could already be in
1425 * cache if earlier bat update failed */
1426 bm = get_bitmap(s, blk);
1427 if (!bm) {
1428 /* install empty bitmap in cache */
1429 err = alloc_vhd_bitmap(s, &bm, blk);
1430 if (err)
1431 return err;
1433 install_bitmap(s, bm);
1436 lock_bat(s);
1437 lock_bitmap(bm);
1438 schedule_bat_write(s);
1439 add_to_transaction(&bm->tx, &s->bat.req);
1441 return 0;
1444 static int
1445 schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
1447 u64 offset;
1448 u32 blk = 0, sec = 0;
1449 struct vhd_bitmap *bm;
1450 struct vhd_request *req;
1452 if (s->vhd.footer.type == HD_TYPE_FIXED) {
1453 offset = vhd_sectors_to_bytes(treq.sec);
1454 goto make_request;
1457 blk = treq.sec / s->spb;
1458 sec = treq.sec % s->spb;
1459 bm = get_bitmap(s, blk);
1460 offset = bat_entry(s, blk);
1462 ASSERT(offset != DD_BLK_UNUSED);
1463 ASSERT(test_batmap(s, blk) || (bm && bitmap_valid(bm)));
1465 offset += s->bm_secs + sec;
1466 offset = vhd_sectors_to_bytes(offset);
1468 make_request:
1469 req = alloc_vhd_request(s);
1470 if (!req)
1471 return -EBUSY;
1473 req->treq = treq;
1474 req->flags = flags;
1475 req->op = VHD_OP_DATA_READ;
1476 req->next = NULL;
1478 aio_read(s, req, offset);
1480 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
1481 "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x, buf: %p\n",
1482 s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags,
1483 treq.buf);
1485 return 0;
1488 static int
1489 schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
1491 int err;
1492 u64 offset;
1493 u32 blk = 0, sec = 0;
1494 struct vhd_bitmap *bm = NULL;
1495 struct vhd_request *req;
1497 if (s->vhd.footer.type == HD_TYPE_FIXED) {
1498 offset = vhd_sectors_to_bytes(treq.sec);
1499 goto make_request;
1502 blk = treq.sec / s->spb;
1503 sec = treq.sec % s->spb;
1504 offset = bat_entry(s, blk);
1506 if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BAT)) {
1507 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
1508 err = allocate_block(s, blk);
1509 else
1510 err = update_bat(s, blk);
1512 if (err)
1513 return err;
1515 offset = s->bat.pbw_offset;
1518 offset += s->bm_secs + sec;
1519 offset = vhd_sectors_to_bytes(offset);
1521 make_request:
1522 req = alloc_vhd_request(s);
1523 if (!req)
1524 return -EBUSY;
1526 req->treq = treq;
1527 req->flags = flags;
1528 req->op = VHD_OP_DATA_WRITE;
1529 req->next = NULL;
1531 if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BITMAP)) {
1532 bm = get_bitmap(s, blk);
1533 ASSERT(bm && bitmap_valid(bm));
1534 lock_bitmap(bm);
1536 if (bm->tx.closed) {
1537 add_to_tail(&bm->queue, req);
1538 set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
1539 } else
1540 add_to_transaction(&bm->tx, req);
1543 aio_write(s, req, offset);
1545 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
1546 "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x\n",
1547 s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags);
1549 return 0;
1552 static int
1553 schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
1555 int err;
1556 u64 offset;
1557 struct vhd_bitmap *bm;
1558 struct vhd_request *req = NULL;
1560 ASSERT(vhd_type_dynamic(&s->vhd));
1562 offset = bat_entry(s, blk);
1564 ASSERT(offset != DD_BLK_UNUSED);
1565 ASSERT(!get_bitmap(s, blk));
1567 offset = vhd_sectors_to_bytes(offset);
1569 err = alloc_vhd_bitmap(s, &bm, blk);
1570 if (err)
1571 return err;
1573 req = &bm->req;
1574 init_vhd_request(s, req);
1576 req->treq.sec = blk * s->spb;
1577 req->treq.secs = s->bm_secs;
1578 req->treq.buf = bm->map;
1579 req->treq.cb = NULL;
1580 req->op = VHD_OP_BITMAP_READ;
1581 req->next = NULL;
1583 aio_read(s, req, offset);
1584 lock_bitmap(bm);
1585 install_bitmap(s, bm);
1586 set_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
1588 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, nr_secs: 0x%04x, "
1589 "offset: 0x%08"PRIx64"\n", s->vhd.file, req->treq.sec, blk,
1590 req->treq.secs, offset);
1592 return 0;
1595 static void
1596 schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
1598 u64 offset;
1599 struct vhd_bitmap *bm;
1600 struct vhd_request *req;
1602 bm = get_bitmap(s, blk);
1603 offset = bat_entry(s, blk);
1605 ASSERT(vhd_type_dynamic(&s->vhd));
1606 ASSERT(bm && bitmap_valid(bm) &&
1607 !test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING));
1609 if (offset == DD_BLK_UNUSED) {
1610 ASSERT(bat_locked(s) && s->bat.pbw_blk == blk);
1611 offset = s->bat.pbw_offset;
1614 offset = vhd_sectors_to_bytes(offset);
1616 req = &bm->req;
1617 init_vhd_request(s, req);
1619 req->treq.sec = blk * s->spb;
1620 req->treq.secs = s->bm_secs;
1621 req->treq.buf = bm->shadow;
1622 req->treq.cb = NULL;
1623 req->op = VHD_OP_BITMAP_WRITE;
1624 req->next = NULL;
1626 aio_write(s, req, offset);
1627 lock_bitmap(bm);
1628 touch_bitmap(s, bm); /* bump lru count */
1629 set_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING);
1631 DBG(TLOG_DBG, "%s: blk: 0x%04x, sec: 0x%08"PRIx64", nr_secs: 0x%04x, "
1632 "offset: 0x%"PRIx64"\n", s->vhd.file, blk, req->treq.sec,
1633 req->treq.secs, offset);
1636 /*
1637 * queued requests will be submitted once the bitmap
1638 * describing them is read and the requests are validated.
1639 */
1640 static int
1641 __vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
1643 u32 blk;
1644 struct vhd_bitmap *bm;
1645 struct vhd_request *req;
1647 ASSERT(vhd_type_dynamic(&s->vhd));
1649 blk = treq.sec / s->spb;
1650 bm = get_bitmap(s, blk);
1652 ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING));
1654 req = alloc_vhd_request(s);
1655 if (!req)
1656 return -EBUSY;
1658 req->treq = treq;
1659 req->op = op;
1660 req->next = NULL;
1662 add_to_tail(&bm->waiting, req);
1663 lock_bitmap(bm);
1665 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x nr_secs: 0x%04x, "
1666 "op: %u\n", s->vhd.file, treq.sec, blk, treq.secs, op);
1668 TRACE(s);
1669 return 0;
1672 static void
1673 vhd_queue_read(td_driver_t *driver, td_request_t treq)
1675 struct vhd_state *s = (struct vhd_state *)driver->data;
1677 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x (seg: %d)\n",
1678 s->vhd.file, treq.sec, treq.secs, treq.sidx);
1680 while (treq.secs) {
1681 int err;
1682 td_request_t clone;
1684 err = 0;
1685 clone = treq;
1687 switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_READ)) {
1688 case -EINVAL:
1689 err = -EINVAL;
1690 goto fail;
1692 case VHD_BM_BAT_CLEAR:
1693 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1694 td_forward_request(clone);
1695 break;
1697 case VHD_BM_BIT_CLEAR:
1698 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
1699 td_forward_request(clone);
1700 break;
1702 case VHD_BM_BIT_SET:
1703 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
1704 err = schedule_data_read(s, clone, 0);
1705 if (err)
1706 goto fail;
1707 break;
1709 case VHD_BM_NOT_CACHED:
1710 err = schedule_bitmap_read(s, clone.sec / s->spb);
1711 if (err)
1712 goto fail;
1714 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1715 err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone);
1716 if (err)
1717 goto fail;
1718 break;
1720 case VHD_BM_READ_PENDING:
1721 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1722 err = __vhd_queue_request(s, VHD_OP_DATA_READ, clone);
1723 if (err)
1724 goto fail;
1725 break;
1727 case VHD_BM_BAT_LOCKED:
1728 default:
1729 ASSERT(0);
1730 break;
1733 treq.sec += clone.secs;
1734 treq.secs -= clone.secs;
1735 treq.buf += vhd_sectors_to_bytes(clone.secs);
1736 continue;
1738 fail:
1739 clone.secs = treq.secs;
1740 td_complete_request(clone, err);
1741 break;
1745 static void
1746 vhd_queue_write(td_driver_t *driver, td_request_t treq)
1748 struct vhd_state *s = (struct vhd_state *)driver->data;
1750 DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x, (seg: %d)\n",
1751 s->vhd.file, treq.sec, treq.secs, treq.sidx);
1753 while (treq.secs) {
1754 int err;
1755 uint8_t flags;
1756 td_request_t clone;
1758 err = 0;
1759 flags = 0;
1760 clone = treq;
1762 switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_WRITE)) {
1763 case -EINVAL:
1764 err = -EINVAL;
1765 goto fail;
1767 case VHD_BM_BAT_LOCKED:
1768 err = -EBUSY;
1769 clone.blocked = 1;
1770 goto fail;
1772 case VHD_BM_BAT_CLEAR:
1773 flags = (VHD_FLAG_REQ_UPDATE_BAT |
1774 VHD_FLAG_REQ_UPDATE_BITMAP);
1775 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1776 err = schedule_data_write(s, clone, flags);
1777 if (err)
1778 goto fail;
1779 break;
1781 case VHD_BM_BIT_CLEAR:
1782 flags = VHD_FLAG_REQ_UPDATE_BITMAP;
1783 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 0);
1784 err = schedule_data_write(s, clone, flags);
1785 if (err)
1786 goto fail;
1787 break;
1789 case VHD_BM_BIT_SET:
1790 clone.secs = read_bitmap_cache_span(s, clone.sec, clone.secs, 1);
1791 err = schedule_data_write(s, clone, 0);
1792 if (err)
1793 goto fail;
1794 break;
1796 case VHD_BM_NOT_CACHED:
1797 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1798 err = schedule_bitmap_read(s, clone.sec / s->spb);
1799 if (err)
1800 goto fail;
1802 err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone);
1803 if (err)
1804 goto fail;
1805 break;
1807 case VHD_BM_READ_PENDING:
1808 clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
1809 err = __vhd_queue_request(s, VHD_OP_DATA_WRITE, clone);
1810 if (err)
1811 goto fail;
1812 break;
1814 default:
1815 ASSERT(0);
1816 break;
1819 treq.sec += clone.secs;
1820 treq.secs -= clone.secs;
1821 treq.buf += vhd_sectors_to_bytes(clone.secs);
1822 continue;
1824 fail:
1825 clone.secs = treq.secs;
1826 td_complete_request(clone, err);
1827 break;
1831 static inline void
1832 signal_completion(struct vhd_request *list, int error)
1834 struct vhd_state *s;
1835 struct vhd_request *r, *next;
1837 if (!list)
1838 return;
1840 r = list;
1841 s = list->state;
1843 while (r) {
1844 int err;
1846 err = (error ? error : r->error);
1847 next = r->next;
1848 td_complete_request(r->treq, err);
1849 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64", "
1850 "err: %d\n", r->treq.sec, r->treq.sec / s->spb, err);
1851 free_vhd_request(s, r);
1852 r = next;
1854 s->returned++;
1855 TRACE(s);
1859 static void
1860 start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1862 int i, error = 0;
1863 struct vhd_transaction *tx;
1864 struct vhd_request *r, *next;
1866 if (!bm->queue.head)
1867 return;
1869 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1871 r = bm->queue.head;
1872 tx = &bm->tx;
1873 clear_req_list(&bm->queue);
1875 if (r && bat_entry(s, bm->blk) == DD_BLK_UNUSED)
1876 tx->error = -EIO;
1878 while (r) {
1879 next = r->next;
1880 r->next = NULL;
1881 clear_vhd_flag(r->flags, VHD_FLAG_REQ_QUEUED);
1883 add_to_transaction(tx, r);
1884 if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
1885 tx->finished++;
1886 if (!r->error) {
1887 u32 sec = r->treq.sec % s->spb;
1888 for (i = 0; i < r->treq.secs; i++)
1889 vhd_bitmap_set(&s->vhd,
1890 bm->shadow, sec + i);
1893 r = next;
1896 /* perhaps all the queued writes already completed? */
1897 if (tx->started && transaction_completed(tx))
1898 finish_data_transaction(s, bm);
1901 static void
1902 finish_bat_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1904 struct vhd_transaction *tx = &bm->tx;
1906 if (!bat_locked(s))
1907 return;
1909 if (s->bat.pbw_blk != bm->blk)
1910 return;
1912 if (!s->bat.req.error)
1913 goto release;
1915 if (!test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE))
1916 goto release;
1918 tx->closed = 1;
1919 return;
1921 release:
1922 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1923 unlock_bat(s);
1924 init_bat(s);
1927 static void
1928 finish_bitmap_transaction(struct vhd_state *s,
1929 struct vhd_bitmap *bm, int error)
1931 int map_size;
1932 struct vhd_transaction *tx = &bm->tx;
1934 DBG(TLOG_DBG, "blk: 0x%04x, err: %d\n", bm->blk, error);
1935 tx->error = (tx->error ? tx->error : error);
1936 map_size = vhd_sectors_to_bytes(s->bm_secs);
1938 if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
1939 if (test_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT)) {
1940 /* still waiting for bat write */
1941 ASSERT(bm->blk == s->bat.pbw_blk);
1942 ASSERT(test_vhd_flag(s->bat.status,
1943 VHD_FLAG_BAT_WRITE_STARTED));
1944 s->bat.req.tx = tx;
1945 return;
1949 if (tx->error) {
1950 /* undo changes to shadow */
1951 memcpy(bm->shadow, bm->map, map_size);
1952 } else {
1953 /* complete atomic write */
1954 memcpy(bm->map, bm->shadow, map_size);
1955 if (!test_batmap(s, bm->blk) && bitmap_full(s, bm))
1956 set_batmap(s, bm->blk);
1959 /* transaction done; signal completions */
1960 signal_completion(tx->requests.head, tx->error);
1961 init_tx(tx);
1962 start_new_bitmap_transaction(s, bm);
1964 if (!bitmap_in_use(bm))
1965 unlock_bitmap(bm);
1967 finish_bat_transaction(s, bm);
1970 static void
1971 finish_data_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
1973 struct vhd_transaction *tx = &bm->tx;
1975 DBG(TLOG_DBG, "blk: 0x%04x\n", bm->blk);
1977 tx->closed = 1;
1979 if (!tx->error)
1980 return schedule_bitmap_write(s, bm->blk);
1982 return finish_bitmap_transaction(s, bm, 0);
1985 static void
1986 finish_bat_write(struct vhd_request *req)
1988 struct vhd_bitmap *bm;
1989 struct vhd_transaction *tx;
1990 struct vhd_state *s = req->state;
1992 s->returned++;
1993 TRACE(s);
1995 bm = get_bitmap(s, s->bat.pbw_blk);
1997 DBG(TLOG_DBG, "blk 0x%04x, pbwo: 0x%08"PRIx64", err %d\n",
1998 s->bat.pbw_blk, s->bat.pbw_offset, req->error);
1999 ASSERT(bm && bitmap_valid(bm));
2000 ASSERT(bat_locked(s) &&
2001 test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED));
2003 tx = &bm->tx;
2004 ASSERT(test_vhd_flag(tx->status, VHD_FLAG_TX_LIVE));
2006 if (!req->error) {
2007 bat_entry(s, s->bat.pbw_blk) = s->bat.pbw_offset;
2008 s->next_db = s->bat.pbw_offset + s->spb + s->bm_secs;
2009 } else
2010 tx->error = req->error;
2012 if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE)) {
2013 tx->finished++;
2014 remove_from_req_list(&tx->requests, req);
2015 if (transaction_completed(tx))
2016 finish_data_transaction(s, bm);
2017 } else {
2018 clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT);
2019 if (s->bat.req.tx)
2020 finish_bitmap_transaction(s, bm, req->error);
2023 finish_bat_transaction(s, bm);
2026 static void
2027 finish_zero_bm_write(struct vhd_request *req)
2029 u32 blk;
2030 struct vhd_bitmap *bm;
2031 struct vhd_transaction *tx = req->tx;
2032 struct vhd_state *s = req->state;
2034 s->returned++;
2035 TRACE(s);
2037 blk = req->treq.sec / s->spb;
2038 bm = get_bitmap(s, blk);
2040 DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
2041 ASSERT(bat_locked(s));
2042 ASSERT(s->bat.pbw_blk == blk);
2043 ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm));
2045 tx->finished++;
2046 remove_from_req_list(&tx->requests, req);
2048 if (req->error) {
2049 unlock_bat(s);
2050 init_bat(s);
2051 tx->error = req->error;
2052 clear_vhd_flag(tx->status, VHD_FLAG_TX_UPDATE_BAT);
2053 } else
2054 schedule_bat_write(s);
2056 if (transaction_completed(tx))
2057 finish_data_transaction(s, bm);
2060 static void
2061 finish_bitmap_read(struct vhd_request *req)
2063 u32 blk;
2064 struct vhd_bitmap *bm;
2065 struct vhd_request *r, *next;
2066 struct vhd_state *s = req->state;
2068 s->returned++;
2069 TRACE(s);
2071 blk = req->treq.sec / s->spb;
2072 bm = get_bitmap(s, blk);
2074 DBG(TLOG_DBG, "blk: 0x%04x\n", blk);
2075 ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING));
2077 r = bm->waiting.head;
2078 clear_req_list(&bm->waiting);
2079 clear_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
2081 if (!req->error) {
2082 memcpy(bm->shadow, bm->map, vhd_sectors_to_bytes(s->bm_secs));
2084 while (r) {
2085 struct vhd_request tmp;
2087 tmp = *r;
2088 next = r->next;
2089 free_vhd_request(s, r);
2091 ASSERT(tmp.op == VHD_OP_DATA_READ ||
2092 tmp.op == VHD_OP_DATA_WRITE);
2094 if (tmp.op == VHD_OP_DATA_READ)
2095 vhd_queue_read(s->driver, tmp.treq);
2096 else if (tmp.op == VHD_OP_DATA_WRITE)
2097 vhd_queue_write(s->driver, tmp.treq);
2099 r = next;
2101 } else {
2102 int err = req->error;
2103 unlock_bitmap(bm);
2104 free_vhd_bitmap(s, bm);
2105 return signal_completion(r, err);
2108 if (!bitmap_in_use(bm))
2109 unlock_bitmap(bm);
2112 static void
2113 finish_bitmap_write(struct vhd_request *req)
2115 u32 blk;
2116 struct vhd_bitmap *bm;
2117 struct vhd_transaction *tx;
2118 struct vhd_state *s = req->state;
2120 s->returned++;
2121 TRACE(s);
2123 blk = req->treq.sec / s->spb;
2124 bm = get_bitmap(s, blk);
2125 tx = &bm->tx;
2127 DBG(TLOG_DBG, "blk: 0x%04x, started: %d, finished: %d\n",
2128 blk, tx->started, tx->finished);
2129 ASSERT(tx->closed);
2130 ASSERT(bm && bitmap_valid(bm));
2131 ASSERT(test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING));
2133 clear_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING);
2135 finish_bitmap_transaction(s, bm, req->error);
2138 static void
2139 finish_data_read(struct vhd_request *req)
2141 struct vhd_state *s = req->state;
2143 DBG(TLOG_DBG, "lsec 0x%08"PRIx64", blk: 0x%04"PRIx64"\n",
2144 req->treq.sec, req->treq.sec / s->spb);
2145 signal_completion(req, 0);
2148 static void
2149 finish_data_write(struct vhd_request *req)
2151 int i;
2152 struct vhd_transaction *tx = req->tx;
2153 struct vhd_state *s = (struct vhd_state *)req->state;
2155 set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
2157 if (tx) {
2158 u32 blk, sec;
2159 struct vhd_bitmap *bm;
2161 blk = req->treq.sec / s->spb;
2162 sec = req->treq.sec % s->spb;
2163 bm = get_bitmap(s, blk);
2165 ASSERT(bm && bitmap_valid(bm) && bitmap_locked(bm));
2167 tx->finished++;
2169 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x04%"PRIx64", "
2170 "tx->started: %d, tx->finished: %d\n", req->treq.sec,
2171 req->treq.sec / s->spb, tx->started, tx->finished);
2173 if (!req->error)
2174 for (i = 0; i < req->treq.secs; i++)
2175 vhd_bitmap_set(&s->vhd, bm->shadow, sec + i);
2177 if (transaction_completed(tx))
2178 finish_data_transaction(s, bm);
2180 } else if (!test_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED)) {
2181 ASSERT(!req->next);
2182 DBG(TLOG_DBG, "lsec: 0x%08"PRIx64", blk: 0x%04"PRIx64"\n",
2183 req->treq.sec, req->treq.sec / s->spb);
2184 signal_completion(req, 0);
2188 void
2189 vhd_complete(void *arg, struct tiocb *tiocb, int err)
2191 struct vhd_request *req = (struct vhd_request *)arg;
2192 struct vhd_state *s = req->state;
2193 struct iocb *io = &tiocb->iocb;
2195 s->completed++;
2196 TRACE(s);
2198 req->error = err;
2200 if (req->error)
2201 ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
2202 "nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
2203 s->vhd.file, req->op, req->treq.sec, req->treq.secs,
2204 io->u.c.nbytes, req->treq.sec / s->spb,
2205 bat_entry(s, req->treq.sec / s->spb));
2207 switch (req->op) {
2208 case VHD_OP_DATA_READ:
2209 finish_data_read(req);
2210 break;
2212 case VHD_OP_DATA_WRITE:
2213 finish_data_write(req);
2214 break;
2216 case VHD_OP_BITMAP_READ:
2217 finish_bitmap_read(req);
2218 break;
2220 case VHD_OP_BITMAP_WRITE:
2221 finish_bitmap_write(req);
2222 break;
2224 case VHD_OP_ZERO_BM_WRITE:
2225 finish_zero_bm_write(req);
2226 break;
2228 case VHD_OP_BAT_WRITE:
2229 finish_bat_write(req);
2230 break;
2232 default:
2233 ASSERT(0);
2234 break;
2238 void
2239 vhd_debug(td_driver_t *driver)
2241 int i;
2242 struct vhd_state *s = (struct vhd_state *)driver->data;
2244 DBG(TLOG_WARN, "%s: QUEUED: 0x%08"PRIx64", COMPLETED: 0x%08"PRIx64", "
2245 "RETURNED: 0x%08"PRIx64"\n", s->vhd.file, s->queued, s->completed,
2246 s->returned);
2247 DBG(TLOG_WARN, "WRITES: 0x%08"PRIx64", AVG_WRITE_SIZE: %f\n",
2248 s->writes, (s->writes ? ((float)s->write_size / s->writes) : 0.0));
2249 DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
2250 s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
2252 DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
2253 for (i = 0; i < VHD_REQS_DATA; i++) {
2254 struct vhd_request *r = &s->vreq_list[i];
2255 td_request_t *t = &r->treq;
2256 if (t->secs)
2257 DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
2258 " lsec: 0x%08"PRIx64", flags: %d, this: %p, "
2259 "next: %p, tx: %p\n", i, t->id, r->error, r->op,
2260 t->sec, r->flags, r, r->next, r->tx);
2263 DBG(TLOG_WARN, "BITMAP CACHE:\n");
2264 for (i = 0; i < VHD_CACHE_SIZE; i++) {
2265 int qnum = 0, wnum = 0, rnum = 0;
2266 struct vhd_bitmap *bm = s->bitmap[i];
2267 struct vhd_transaction *tx;
2268 struct vhd_request *r;
2270 if (!bm)
2271 continue;
2273 tx = &bm->tx;
2274 r = bm->queue.head;
2275 while (r) {
2276 qnum++;
2277 r = r->next;
2280 r = bm->waiting.head;
2281 while (r) {
2282 wnum++;
2283 r = r->next;
2286 r = tx->requests.head;
2287 while (r) {
2288 rnum++;
2289 r = r->next;
2292 DBG(TLOG_WARN, "%d: blk: 0x%04x, status: 0x%08x, q: %p, qnum: %d, w: %p, "
2293 "wnum: %d, locked: %d, in use: %d, tx: %p, tx_error: %d, "
2294 "started: %d, finished: %d, status: %u, reqs: %p, nreqs: %d\n",
2295 i, bm->blk, bm->status, bm->queue.head, qnum, bm->waiting.head,
2296 wnum, bitmap_locked(bm), bitmap_in_use(bm), tx, tx->error,
2297 tx->started, tx->finished, tx->status, tx->requests.head, rnum);
2300 DBG(TLOG_WARN, "BAT: status: 0x%08x, pbw_blk: 0x%04x, "
2301 "pbw_off: 0x%08"PRIx64", tx: %p\n", s->bat.status, s->bat.pbw_blk,
2302 s->bat.pbw_offset, s->bat.req.tx);
2304 /*
2305 for (i = 0; i < s->hdr.max_bat_size; i++)
2306 DPRINTF("%d: %u\n", i, s->bat.bat[i]);
2307 */
2310 struct tap_disk tapdisk_vhd = {
2311 .disk_type = "tapdisk_vhd",
2312 .flags = 0,
2313 .private_data_size = sizeof(struct vhd_state),
2314 .td_open = _vhd_open,
2315 .td_close = _vhd_close,
2316 .td_queue_read = vhd_queue_read,
2317 .td_queue_write = vhd_queue_write,
2318 .td_get_parent_id = vhd_get_parent_id,
2319 .td_validate_parent = vhd_validate_parent,
2320 .td_debug = vhd_debug,
2321 };