ia64/xen-unstable

view tools/blktap2/drivers/block-qcow.c @ 19817:b7f73a7f3078

blktap2: portability fixes for NetBSD

- Use standard off_t and lseek() instead of non-portable off64_t and
lseek64()
- Use uuid API as documented in DCE 1.1 RPC specification
- Add NetBSD implementation for blk_getimagesize() and
blk_getsectorsize()
- Use blk_getimagesize() and blk_getsectorsize()
- Fix uuid header check

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 23 17:24:14 2009 +0100 (2009-06-23)
parents 50e048b77ad1
children
line source
1 /* block-qcow.c
2 *
3 * Asynchronous Qemu copy-on-write disk implementation.
4 * Code based on the Qemu implementation
5 * (see copyright notice below)
6 *
7 * (c) 2006 Andrew Warfield and Julian Chesterfield
8 *
9 */
11 /*
12 * Block driver for the QCOW format
13 *
14 * Copyright (c) 2004 Fabrice Bellard
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this software and associated documentation files(the "Software"), to deal
18 * in the Software without restriction, including without limitation the rights
19 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 * copies of the Software, and to permit persons to whom the Software is
21 * furnished to do so, subject to the following conditions:
22 */
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/statvfs.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <string.h>
33 #include <zlib.h>
34 #include <inttypes.h>
35 #include <libaio.h>
36 #include <openssl/md5.h>
37 #include <limits.h>
38 #include "bswap.h"
39 #include "aes.h"
41 #include "tapdisk.h"
42 #include "tapdisk-driver.h"
43 #include "tapdisk-interface.h"
44 #include "qcow.h"
45 #include "blk.h"
46 #include "atomicio.h"
48 /* *BSD has no O_LARGEFILE */
49 #ifndef O_LARGEFILE
50 #define O_LARGEFILE 0
51 #endif
53 #if 1
54 #define ASSERT(_p) \
55 if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
56 __LINE__, __FILE__); *(int*)0=0; }
57 #else
58 #define ASSERT(_p) ((void)0)
59 #endif
61 struct pending_aio {
62 td_callback_t cb;
63 int id;
64 void *private;
65 int nb_sectors;
66 char *buf;
67 uint64_t sector;
68 };
70 #undef IOCB_IDX
71 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
73 #define ZERO_TEST(_b) (_b | 0x00)
75 struct qcow_request {
76 td_request_t treq;
77 struct tiocb tiocb;
78 struct tdqcow_state *state;
79 };
81 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
83 #ifdef USE_GCRYPT
85 #include <gcrypt.h>
87 uint32_t gen_cksum(char *ptr, int len)
88 {
89 int i;
90 uint32_t md[4];
92 /* Generate checksum */
93 gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len);
95 return md[0];
96 }
98 #else /* use libcrypto */
100 #include <openssl/md5.h>
102 uint32_t gen_cksum(char *ptr, int len)
103 {
104 int i;
105 unsigned char *md;
106 uint32_t ret;
108 md = malloc(MD5_DIGEST_LENGTH);
109 if(!md) return 0;
111 /* Generate checksum */
112 if (MD5((unsigned char *)ptr, len, md) != md)
113 ret = 0;
114 else
115 memcpy(&ret, md, sizeof(uint32_t));
117 free(md);
118 return ret;
119 }
121 #endif
124 static void free_aio_state(struct tdqcow_state* s)
125 {
126 free(s->aio_requests);
127 free(s->aio_free_list);
128 }
130 static int init_aio_state(td_driver_t *driver)
131 {
132 int i, ret;
133 td_disk_info_t *bs = &(driver->info);
134 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
136 // A segment (i.e. a page) can span multiple clusters
137 s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
138 MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
140 s->aio_free_count = s->max_aio_reqs;
142 if (!(s->aio_requests = calloc(s->max_aio_reqs, sizeof(struct qcow_request))) ||
143 !(s->aio_free_list = calloc(s->max_aio_reqs, sizeof(struct qcow_request)))) {
144 DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
145 s->max_aio_reqs);
146 goto fail;
147 }
149 for (i = 0; i < s->max_aio_reqs; i++)
150 s->aio_free_list[i] = &s->aio_requests[i];
152 DPRINTF("AIO state initialised\n");
154 return 0;
155 fail:
156 return -1;
157 }
159 int get_filesize(char *filename, uint64_t *size, struct stat *st)
160 {
161 int fd;
162 QCowHeader header;
164 /*Set to the backing file size*/
165 fd = open(filename, O_RDONLY);
166 if (fd < 0)
167 return -1;
168 if (read(fd, &header, sizeof(header)) < sizeof(header)) {
169 close(fd);
170 return -1;
171 }
172 close(fd);
174 be32_to_cpus(&header.magic);
175 be64_to_cpus(&header.size);
176 if (header.magic == QCOW_MAGIC) {
177 *size = header.size >> SECTOR_SHIFT;
178 return 0;
179 }
181 if(S_ISBLK(st->st_mode)) {
182 fd = open(filename, O_RDONLY);
183 if (fd < 0)
184 return -1;
185 if (blk_getimagesize(fd, size) != 0) {
186 printf("Unable to get Block device size\n");
187 close(fd);
188 return -1;
189 }
190 close(fd);
191 } else *size = (st->st_size >> SECTOR_SHIFT);
192 return 0;
193 }
195 static int qcow_set_key(struct tdqcow_state *s, const char *key)
196 {
197 uint8_t keybuf[16];
198 int len, i;
200 memset(keybuf, 0, 16);
201 len = strlen(key);
202 if (len > 16)
203 len = 16;
204 /* XXX: we could compress the chars to 7 bits to increase
205 entropy */
206 for (i = 0; i < len; i++) {
207 keybuf[i] = key[i];
208 }
209 s->crypt_method = s->crypt_method_header;
211 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
212 return -1;
213 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
214 return -1;
215 #if 0
216 /* test */
217 {
218 uint8_t in[16];
219 uint8_t out[16];
220 uint8_t tmp[16];
221 for (i=0; i<16; i++)
222 in[i] = i;
223 AES_encrypt(in, tmp, &s->aes_encrypt_key);
224 AES_decrypt(tmp, out, &s->aes_decrypt_key);
225 for (i = 0; i < 16; i++)
226 DPRINTF(" %02x", tmp[i]);
227 DPRINTF("\n");
228 for (i = 0; i < 16; i++)
229 DPRINTF(" %02x", out[i]);
230 DPRINTF("\n");
231 }
232 #endif
233 return 0;
234 }
236 void tdqcow_complete(void *arg, struct tiocb *tiocb, int err)
237 {
238 struct qcow_request *aio = (struct qcow_request *)arg;
239 struct tdqcow_state *s = aio->state;
241 td_complete_request(aio->treq, err);
243 s->aio_free_list[s->aio_free_count++] = aio;
244 }
246 static void async_read(td_driver_t *driver, td_request_t treq)
247 {
248 int size;
249 uint64_t offset;
250 struct qcow_request *aio;
251 struct tdqcow_state *prv;
253 prv = (struct tdqcow_state *)driver->data;
254 size = treq.secs * driver->info.sector_size;
255 offset = treq.sec * (uint64_t)driver->info.sector_size;
257 if (prv->aio_free_count == 0)
258 goto fail;
260 aio = prv->aio_free_list[--prv->aio_free_count];
261 aio->treq = treq;
262 aio->state = prv;
264 td_prep_read(&aio->tiocb, prv->fd, treq.buf,
265 size, offset, tdqcow_complete, aio);
266 td_queue_tiocb(driver, &aio->tiocb);
268 return;
270 fail:
271 td_complete_request(treq, -EBUSY);
272 }
274 static void async_write(td_driver_t *driver, td_request_t treq)
275 {
276 int size;
277 uint64_t offset;
278 struct qcow_request *aio;
279 struct tdqcow_state *prv;
281 prv = (struct tdqcow_state *)driver->data;
282 size = treq.secs * driver->info.sector_size;
283 offset = treq.sec * (uint64_t)driver->info.sector_size;
285 if (prv->aio_free_count == 0)
286 goto fail;
288 aio = prv->aio_free_list[--prv->aio_free_count];
289 aio->treq = treq;
290 aio->state = prv;
292 td_prep_write(&aio->tiocb, prv->fd, treq.buf,
293 size, offset, tdqcow_complete, aio);
294 td_queue_tiocb(driver, &aio->tiocb);
296 return;
298 fail:
299 td_complete_request(treq, -EBUSY);
300 }
302 /*
303 * The crypt function is compatible with the linux cryptoloop
304 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
305 * supported .
306 */
307 static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
308 uint8_t *out_buf, const uint8_t *in_buf,
309 int nb_sectors, int enc,
310 const AES_KEY *key)
311 {
312 union {
313 uint64_t ll[2];
314 uint8_t b[16];
315 } ivec;
316 int i;
318 for (i = 0; i < nb_sectors; i++) {
319 ivec.ll[0] = cpu_to_le64(sector_num);
320 ivec.ll[1] = 0;
321 AES_cbc_encrypt(in_buf, out_buf, 512, key,
322 ivec.b, enc);
323 sector_num++;
324 in_buf += 512;
325 out_buf += 512;
326 }
327 }
329 int qtruncate(int fd, off_t length, int sparse)
330 {
331 int ret, i;
332 int current = 0, rem = 0;
333 uint64_t sectors;
334 struct stat st;
335 char *buf;
337 /* If length is greater than the current file len
338 * we synchronously write zeroes to the end of the
339 * file, otherwise we truncate the length down
340 */
341 ret = fstat(fd, &st);
342 if (ret == -1)
343 return -1;
344 if (S_ISBLK(st.st_mode))
345 return 0;
347 sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
348 current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
349 rem = st.st_size % DEFAULT_SECTOR_SIZE;
351 /* If we are extending this file, we write zeros to the end --
352 * this tries to ensure that the extents allocated wind up being
353 * contiguous on disk.
354 */
355 if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
356 /*We are extending the file*/
357 if ((ret = posix_memalign((void **)&buf,
358 512, DEFAULT_SECTOR_SIZE))) {
359 DPRINTF("posix_memalign failed: %d\n", ret);
360 return -1;
361 }
362 memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
363 if (lseek(fd, 0, SEEK_END)==-1) {
364 DPRINTF("Lseek EOF failed (%d), internal error\n",
365 errno);
366 free(buf);
367 return -1;
368 }
369 if (rem) {
370 ret = write(fd, buf, rem);
371 if (ret != rem) {
372 DPRINTF("write failed: ret = %d, err = %s\n",
373 ret, strerror(errno));
374 free(buf);
375 return -1;
376 }
377 }
378 for (i = current; i < sectors; i++ ) {
379 ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
380 if (ret != DEFAULT_SECTOR_SIZE) {
381 DPRINTF("write failed: ret = %d, err = %s\n",
382 ret, strerror(errno));
383 free(buf);
384 return -1;
385 }
386 }
387 free(buf);
388 } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
389 if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
390 DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
391 return -1;
392 }
393 return 0;
394 }
396 /* 'allocate' is:
397 *
398 * 0 to not allocate.
399 *
400 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
401 * 'n_end')
402 *
403 * 2 to allocate a compressed cluster of size
404 * 'compressed_size'. 'compressed_size' must be > 0 and <
405 * cluster_size
406 *
407 * return 0 if not allocated.
408 */
409 static uint64_t get_cluster_offset(struct tdqcow_state *s,
410 uint64_t offset, int allocate,
411 int compressed_size,
412 int n_start, int n_end)
413 {
414 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
415 char *tmp_ptr2, *l2_ptr, *l1_ptr;
416 uint64_t *tmp_ptr;
417 uint64_t l2_offset, *l2_table, cluster_offset, tmp;
418 uint32_t min_count;
419 int new_l2_table;
421 /*Check L1 table for the extent offset*/
422 l1_index = offset >> (s->l2_bits + s->cluster_bits);
423 l2_offset = s->l1_table[l1_index];
424 new_l2_table = 0;
425 if (!l2_offset) {
426 if (!allocate)
427 return 0;
428 /*
429 * allocating a new l2 entry + extent
430 * at the end of the file, we must also
431 * update the L1 entry safely.
432 */
433 l2_offset = s->fd_end;
435 /* round to cluster size */
436 l2_offset = (l2_offset + s->cluster_size - 1)
437 & ~(s->cluster_size - 1);
439 /* update the L1 entry */
440 s->l1_table[l1_index] = l2_offset;
442 /*Truncate file for L2 table
443 *(initialised to zero in case we crash)*/
444 if (qtruncate(s->fd,
445 l2_offset + (s->l2_size * sizeof(uint64_t)),
446 s->sparse) != 0) {
447 DPRINTF("ERROR truncating file\n");
448 return 0;
449 }
450 s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
452 /*Update the L1 table entry on disk
453 * (for O_DIRECT we write 4KByte blocks)*/
454 l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
455 l1_ptr = (char *)s->l1_table + (l1_sector << 12);
457 if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
458 DPRINTF("ERROR allocating memory for L1 table\n");
459 }
460 memcpy(tmp_ptr, l1_ptr, 4096);
462 /* Convert block to write to big endian */
463 for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
464 cpu_to_be64s(&tmp_ptr[i]);
465 }
467 /*
468 * Issue non-asynchronous L1 write.
469 * For safety, we must ensure that
470 * entry is written before blocks.
471 */
472 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
473 if (write(s->fd, tmp_ptr, 4096) != 4096) {
474 free(tmp_ptr);
475 return 0;
476 }
477 free(tmp_ptr);
479 new_l2_table = 1;
480 goto cache_miss;
481 } else if (s->min_cluster_alloc == s->l2_size) {
482 /*Fast-track the request*/
483 cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
484 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
485 return cluster_offset + (l2_index * s->cluster_size);
486 }
488 /*Check to see if L2 entry is already cached*/
489 for (i = 0; i < L2_CACHE_SIZE; i++) {
490 if (l2_offset == s->l2_cache_offsets[i]) {
491 /* increment the hit count */
492 if (++s->l2_cache_counts[i] == 0xffffffff) {
493 for (j = 0; j < L2_CACHE_SIZE; j++) {
494 s->l2_cache_counts[j] >>= 1;
495 }
496 }
497 l2_table = s->l2_cache + (i << s->l2_bits);
498 goto found;
499 }
500 }
502 cache_miss:
503 /* not found: load a new entry in the least used one */
504 min_index = 0;
505 min_count = 0xffffffff;
506 for (i = 0; i < L2_CACHE_SIZE; i++) {
507 if (s->l2_cache_counts[i] < min_count) {
508 min_count = s->l2_cache_counts[i];
509 min_index = i;
510 }
511 }
512 l2_table = s->l2_cache + (min_index << s->l2_bits);
514 /*If extent pre-allocated, read table from disk,
515 *otherwise write new table to disk*/
516 if (new_l2_table) {
517 /*Should we allocate the whole extent? Adjustable parameter.*/
518 if (s->cluster_alloc == s->l2_size) {
519 cluster_offset = l2_offset +
520 (s->l2_size * sizeof(uint64_t));
521 cluster_offset = (cluster_offset + s->cluster_size - 1)
522 & ~(s->cluster_size - 1);
523 if (qtruncate(s->fd, cluster_offset +
524 (s->cluster_size * s->l2_size),
525 s->sparse) != 0) {
526 DPRINTF("ERROR truncating file\n");
527 return 0;
528 }
529 s->fd_end = cluster_offset +
530 (s->cluster_size * s->l2_size);
531 for (i = 0; i < s->l2_size; i++) {
532 l2_table[i] = cpu_to_be64(cluster_offset +
533 (i*s->cluster_size));
534 }
535 } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
537 lseek(s->fd, l2_offset, SEEK_SET);
538 if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
539 s->l2_size * sizeof(uint64_t))
540 return 0;
541 } else {
542 lseek(s->fd, l2_offset, SEEK_SET);
543 if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
544 s->l2_size * sizeof(uint64_t))
545 return 0;
546 }
548 /*Update the cache entries*/
549 s->l2_cache_offsets[min_index] = l2_offset;
550 s->l2_cache_counts[min_index] = 1;
552 found:
553 /*The extent is split into 's->l2_size' blocks of
554 *size 's->cluster_size'*/
555 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
556 cluster_offset = be64_to_cpu(l2_table[l2_index]);
558 if (!cluster_offset ||
559 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
560 if (!allocate)
561 return 0;
563 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
564 (n_end - n_start) < s->cluster_sectors) {
565 /* cluster is already allocated but compressed, we must
566 decompress it in the case it is not completely
567 overwritten */
568 if (decompress_cluster(s, cluster_offset) < 0)
569 return 0;
570 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
571 cluster_offset = (cluster_offset + s->cluster_size - 1)
572 & ~(s->cluster_size - 1);
573 /* write the cluster content - not asynchronous */
574 lseek(s->fd, cluster_offset, SEEK_SET);
575 if (write(s->fd, s->cluster_cache, s->cluster_size) !=
576 s->cluster_size)
577 return -1;
578 } else {
579 /* allocate a new cluster */
580 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
581 if (allocate == 1) {
582 /* round to cluster size */
583 cluster_offset =
584 (cluster_offset + s->cluster_size - 1)
585 & ~(s->cluster_size - 1);
586 if (qtruncate(s->fd, cluster_offset +
587 s->cluster_size, s->sparse)!=0) {
588 DPRINTF("ERROR truncating file\n");
589 return 0;
590 }
591 s->fd_end = (cluster_offset + s->cluster_size);
592 /* if encrypted, we must initialize the cluster
593 content which won't be written */
594 if (s->crypt_method &&
595 (n_end - n_start) < s->cluster_sectors) {
596 uint64_t start_sect;
597 start_sect = (offset &
598 ~(s->cluster_size - 1))
599 >> 9;
600 memset(s->cluster_data + 512,
601 0xaa, 512);
602 for (i = 0; i < s->cluster_sectors;i++)
603 {
604 if (i < n_start || i >= n_end)
605 {
606 encrypt_sectors(s, start_sect + i,
607 s->cluster_data,
608 s->cluster_data + 512, 1, 1,
609 &s->aes_encrypt_key);
610 lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
611 if (write(s->fd, s->cluster_data, 512) != 512)
612 return -1;
613 }
614 }
615 }
616 } else {
617 cluster_offset |= QCOW_OFLAG_COMPRESSED |
618 (uint64_t)compressed_size
619 << (63 - s->cluster_bits);
620 }
621 }
622 /* update L2 table */
623 tmp = cpu_to_be64(cluster_offset);
624 l2_table[l2_index] = tmp;
626 /*For IO_DIRECT we write 4KByte blocks*/
627 l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
628 l2_ptr = (char *)l2_table + (l2_sector << 12);
630 if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
631 DPRINTF("ERROR allocating memory for L1 table\n");
632 }
633 memcpy(tmp_ptr2, l2_ptr, 4096);
634 lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
635 if (write(s->fd, tmp_ptr2, 4096) != 4096) {
636 free(tmp_ptr2);
637 return -1;
638 }
639 free(tmp_ptr2);
640 }
641 return cluster_offset;
642 }
644 static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
645 int nb_sectors, int *pnum)
646 {
647 int index_in_cluster, n;
648 uint64_t cluster_offset;
650 cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
651 index_in_cluster = sector_num & (s->cluster_sectors - 1);
652 n = s->cluster_sectors - index_in_cluster;
653 if (n > nb_sectors)
654 n = nb_sectors;
655 *pnum = n;
656 return (cluster_offset != 0);
657 }
659 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
660 const uint8_t *buf, int buf_size)
661 {
662 z_stream strm1, *strm = &strm1;
663 int ret, out_len;
665 memset(strm, 0, sizeof(*strm));
667 strm->next_in = (uint8_t *)buf;
668 strm->avail_in = buf_size;
669 strm->next_out = out_buf;
670 strm->avail_out = out_buf_size;
672 ret = inflateInit2(strm, -12);
673 if (ret != Z_OK)
674 return -1;
675 ret = inflate(strm, Z_FINISH);
676 out_len = strm->next_out - out_buf;
677 if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
678 (out_len != out_buf_size) ) {
679 inflateEnd(strm);
680 return -1;
681 }
682 inflateEnd(strm);
683 return 0;
684 }
686 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
687 {
688 int ret, csize;
689 uint64_t coffset;
691 coffset = cluster_offset & s->cluster_offset_mask;
692 if (s->cluster_cache_offset != coffset) {
693 csize = cluster_offset >> (63 - s->cluster_bits);
694 csize &= (s->cluster_size - 1);
695 lseek(s->fd, coffset, SEEK_SET);
696 ret = read(s->fd, s->cluster_data, csize);
697 if (ret != csize)
698 return -1;
699 if (decompress_buffer(s->cluster_cache, s->cluster_size,
700 s->cluster_data, csize) < 0) {
701 return -1;
702 }
703 s->cluster_cache_offset = coffset;
704 }
705 return 0;
706 }
708 static int
709 tdqcow_read_header(int fd, QCowHeader *header)
710 {
711 int err;
712 char *buf;
713 struct stat st;
714 size_t size, expected;
716 memset(header, 0, sizeof(*header));
718 err = fstat(fd, &st);
719 if (err)
720 return -errno;
722 err = lseek(fd, 0, SEEK_SET);
723 if (err == (off_t)-1)
724 return -errno;
726 size = (sizeof(*header) + 511) & ~511;
727 err = posix_memalign((void **)&buf, 512, size);
728 if (err)
729 return err;
731 expected = size;
732 if (st.st_size < size)
733 expected = st.st_size;
735 errno = 0;
736 err = read(fd, buf, size);
737 if (err != expected) {
738 err = (errno ? -errno : -EIO);
739 goto out;
740 }
742 memcpy(header, buf, sizeof(*header));
743 be32_to_cpus(&header->magic);
744 be32_to_cpus(&header->version);
745 be64_to_cpus(&header->backing_file_offset);
746 be32_to_cpus(&header->backing_file_size);
747 be32_to_cpus(&header->mtime);
748 be64_to_cpus(&header->size);
749 be32_to_cpus(&header->crypt_method);
750 be64_to_cpus(&header->l1_table_offset);
752 err = 0;
754 out:
755 free(buf);
756 return err;
757 }
759 static int
760 tdqcow_load_l1_table(struct tdqcow_state *s, QCowHeader *header)
761 {
762 char *buf;
763 struct stat st;
764 size_t expected;
765 int i, err, shift;
766 QCowHeader_ext *exthdr;
767 uint32_t l1_table_bytes, l1_table_block, l1_table_size;
769 buf = NULL;
770 s->l1_table = NULL;
772 shift = s->cluster_bits + s->l2_bits;
774 s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
775 s->l1_table_offset = header->l1_table_offset;
777 s->min_cluster_alloc = 1; /* default */
779 l1_table_bytes = s->l1_size * sizeof(uint64_t);
780 l1_table_size = (l1_table_bytes + 4095) & ~4095;
781 l1_table_block = (l1_table_bytes + s->l1_table_offset + 4095) & ~4095;
783 DPRINTF("L1 Table offset detected: %"PRIu64", size %d (%d)\n",
784 (uint64_t)s->l1_table_offset,
785 (int) (s->l1_size * sizeof(uint64_t)),
786 l1_table_size);
788 err = fstat(s->fd, &st);
789 if (err) {
790 err = -errno;
791 goto out;
792 }
794 err = lseek(s->fd, 0, SEEK_SET);
795 if (err == (off_t)-1) {
796 err = -errno;
797 goto out;
798 }
800 err = posix_memalign((void **)&buf, 512, l1_table_block);
801 if (err) {
802 buf = NULL;
803 goto out;
804 }
806 err = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
807 if (err) {
808 s->l1_table = NULL;
809 goto out;
810 }
812 memset(buf, 0, l1_table_block);
813 memset(s->l1_table, 0, l1_table_size);
815 expected = l1_table_block;
816 if (st.st_size < l1_table_block)
817 expected = st.st_size;
819 errno = 0;
820 err = read(s->fd, buf, l1_table_block);
821 if (err != expected) {
822 err = (errno ? -errno : -EIO);
823 goto out;
824 }
826 memcpy(s->l1_table, buf + s->l1_table_offset, l1_table_size);
827 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
829 /* check for xen extended header */
830 if (s->l1_table_offset % 4096 == 0 &&
831 be32_to_cpu(exthdr->xmagic) == XEN_MAGIC) {
832 uint32_t flags = be32_to_cpu(exthdr->flags);
833 uint32_t cksum = be32_to_cpu(exthdr->cksum);
835 /*
836 * Try to detect old tapdisk images. They have to be fixed
837 * because they use big endian rather than native endian for
838 * the L1 table. After this block, the l1 table will
839 * definitely be in BIG endian.
840 */
841 if (!(flags & EXTHDR_L1_BIG_ENDIAN)) {
842 DPRINTF("qcow: converting to big endian L1 table\n");
844 /* convert to big endian */
845 for (i = 0; i < s->l1_size; i++)
846 cpu_to_be64s(&s->l1_table[i]);
848 flags |= EXTHDR_L1_BIG_ENDIAN;
849 exthdr->flags = cpu_to_be32(flags);
851 memcpy(buf + s->l1_table_offset,
852 s->l1_table, l1_table_size);
854 err = lseek(s->fd, 0, SEEK_SET);
855 if (err == (off_t)-1) {
856 err = -errno;
857 goto out;
858 }
860 err = atomicio(vwrite, s->fd, buf, l1_table_block);
861 if (err != l1_table_block) {
862 err = -errno;
863 goto out;
864 }
865 }
867 /* check the L1 table checksum */
868 if (cksum != gen_cksum((char *)s->l1_table,
869 s->l1_size * sizeof(uint64_t)))
870 DPRINTF("qcow: bad L1 checksum\n");
871 else {
872 s->extended = 1;
873 s->sparse = (be32_to_cpu(exthdr->flags) & SPARSE_FILE);
874 s->min_cluster_alloc =
875 be32_to_cpu(exthdr->min_cluster_alloc);
876 }
877 }
879 /* convert L1 table to native endian for operation */
880 for (i = 0; i < s->l1_size; i++)
881 be64_to_cpus(&s->l1_table[i]);
883 err = 0;
885 out:
886 if (err) {
887 free(buf);
888 free(s->l1_table);
889 s->l1_table = NULL;
890 }
891 return err;
892 }
894 /* Open the disk file and initialize qcow state. */
895 int tdqcow_open (td_driver_t *driver, const char *name, td_flag_t flags)
896 {
897 int fd, len, i, ret, size, o_flags;
898 td_disk_info_t *bs = &(driver->info);
899 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
900 QCowHeader header;
901 uint64_t final_cluster = 0;
903 DPRINTF("QCOW: Opening %s\n", name);
905 o_flags = O_DIRECT | O_LARGEFILE |
906 ((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
907 fd = open(name, o_flags);
908 if (fd < 0) {
909 DPRINTF("Unable to open %s (%d)\n", name, -errno);
910 return -1;
911 }
913 s->fd = fd;
914 s->name = strdup(name);
915 if (!s->name)
916 goto fail;
918 if (tdqcow_read_header(fd, &header))
919 goto fail;
921 if (header.magic != QCOW_MAGIC)
922 goto fail;
924 switch (header.version) {
925 case QCOW_VERSION:
926 break;
927 case 2:
928 //TODO: Port qcow2 to new blktap framework.
929 // close(fd);
930 // dd->drv = &tapdisk_qcow2;
931 // return dd->drv->td_open(dd, name, flags);
932 goto fail;
933 default:
934 goto fail;
935 }
937 if (header.size <= 1 || header.cluster_bits < 9)
938 goto fail;
939 if (header.crypt_method > QCOW_CRYPT_AES)
940 goto fail;
941 s->crypt_method_header = header.crypt_method;
942 if (s->crypt_method_header)
943 s->encrypted = 1;
944 s->cluster_bits = header.cluster_bits;
945 s->cluster_size = 1 << s->cluster_bits;
946 s->cluster_sectors = 1 << (s->cluster_bits - 9);
947 s->l2_bits = header.l2_bits;
948 s->l2_size = 1 << s->l2_bits;
949 s->cluster_alloc = s->l2_size;
950 bs->size = header.size / 512;
951 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
952 s->backing_file_offset = header.backing_file_offset;
953 s->backing_file_size = header.backing_file_size;
955 /* allocate and load l1 table */
956 if (tdqcow_load_l1_table(s, &header))
957 goto fail;
959 /* alloc L2 cache */
960 size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
961 ret = posix_memalign((void **)&s->l2_cache, 4096, size);
962 if(ret != 0) goto fail;
964 size = s->cluster_size;
965 ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
966 if(ret != 0) goto fail;
968 ret = posix_memalign((void **)&s->cluster_data, 4096, size);
969 if(ret != 0) goto fail;
970 s->cluster_cache_offset = -1;
972 if (s->backing_file_offset != 0)
973 s->cluster_alloc = 1; /*Cannot use pre-alloc*/
975 bs->sector_size = 512;
976 bs->info = 0;
978 for(i = 0; i < s->l1_size; i++)
979 if (s->l1_table[i] > final_cluster)
980 final_cluster = s->l1_table[i];
982 if (init_aio_state(driver)!=0) {
983 DPRINTF("Unable to initialise AIO state\n");
984 free_aio_state(s);
985 goto fail;
986 }
988 if (!final_cluster)
989 s->fd_end = s->l1_table_offset +
990 ((s->l1_size * sizeof(uint64_t) + 4095) & ~4095);
991 else {
992 s->fd_end = lseek(fd, 0, SEEK_END);
993 if (s->fd_end == (off_t)-1)
994 goto fail;
995 }
997 return 0;
999 fail:
1000 DPRINTF("QCOW Open failed\n");
1002 free_aio_state(s);
1003 free(s->l1_table);
1004 free(s->l2_cache);
1005 free(s->cluster_cache);
1006 free(s->cluster_data);
1007 close(fd);
1008 return -1;
1011 void tdqcow_queue_read(td_driver_t *driver, td_request_t treq)
1013 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1014 int ret = 0, index_in_cluster, n, i;
1015 uint64_t cluster_offset, sector, nb_sectors;
1016 struct qcow_prv* prv;
1017 td_request_t clone = treq;
1018 char* buf = treq.buf;
1020 sector = treq.sec;
1021 nb_sectors = treq.secs;
1023 /*We store a local record of the request*/
1024 while (nb_sectors > 0) {
1025 cluster_offset =
1026 get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
1027 index_in_cluster = sector & (s->cluster_sectors - 1);
1028 n = s->cluster_sectors - index_in_cluster;
1029 if (n > nb_sectors)
1030 n = nb_sectors;
1032 if (s->aio_free_count == 0) {
1033 td_complete_request(treq, -EBUSY);
1034 return;
1037 if(!cluster_offset) {
1038 treq.buf = buf;
1039 treq.sec = sector;
1040 treq.secs = n;
1041 td_forward_request(treq);
1043 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
1044 if (decompress_cluster(s, cluster_offset) < 0) {
1045 td_complete_request(treq, -EIO);
1046 goto done;
1048 memcpy(buf, s->cluster_cache + index_in_cluster * 512,
1049 512 * n);
1051 treq.buf = buf;
1052 treq.sec = sector;
1053 treq.secs = n;
1054 td_complete_request(treq, 0);
1055 } else {
1056 clone.buf = buf;
1057 clone.sec = (cluster_offset>>9)+index_in_cluster;
1058 clone.secs = n;
1059 async_read(driver, clone);
1061 nb_sectors -= n;
1062 sector += n;
1063 buf += n * 512;
1065 done:
1066 return;
1069 void tdqcow_queue_write(td_driver_t *driver, td_request_t treq)
1071 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1072 int ret = 0, index_in_cluster, n, i;
1073 uint64_t cluster_offset, sector, nb_sectors;
1074 td_callback_t cb;
1075 struct qcow_prv* prv;
1076 char* buf = treq.buf;
1077 td_request_t clone=treq;
1079 sector = treq.sec;
1080 nb_sectors = treq.secs;
1082 /*We store a local record of the request*/
1083 while (nb_sectors > 0) {
1084 index_in_cluster = sector & (s->cluster_sectors - 1);
1085 n = s->cluster_sectors - index_in_cluster;
1086 if (n > nb_sectors)
1087 n = nb_sectors;
1089 if (s->aio_free_count == 0) {
1090 td_complete_request(treq, -EBUSY);
1091 return;
1094 cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
1095 index_in_cluster,
1096 index_in_cluster+n);
1097 if (!cluster_offset) {
1098 DPRINTF("Ooops, no write cluster offset!\n");
1099 td_complete_request(treq, -EIO);
1100 return;
1103 if (s->crypt_method) {
1104 encrypt_sectors(s, sector, s->cluster_data,
1105 (unsigned char *)buf, n, 1,
1106 &s->aes_encrypt_key);
1108 clone.buf = buf;
1109 clone.sec = (cluster_offset>>9) + index_in_cluster;
1110 clone.secs = n;
1111 async_write(driver, clone);
1112 } else {
1113 clone.buf = buf;
1114 clone.sec = (cluster_offset>>9) + index_in_cluster;
1115 clone.secs = n;
1117 async_write(driver, clone);
1120 nb_sectors -= n;
1121 sector += n;
1122 buf += n * 512;
1124 s->cluster_cache_offset = -1; /* disable compressed cache */
1126 return;
1129 static int
1130 tdqcow_update_checksum(struct tdqcow_state *s)
1132 int i, fd, err;
1133 uint32_t offset, cksum, out;
1135 if (!s->extended)
1136 return 0;
1138 fd = open(s->name, O_WRONLY | O_LARGEFILE); /* open without O_DIRECT */
1139 if (fd == -1) {
1140 err = errno;
1141 goto out;
1144 offset = sizeof(QCowHeader) + offsetof(QCowHeader_ext, cksum);
1145 if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
1146 err = errno;
1147 goto out;
1150 /* convert to big endian for checksum */
1151 for (i = 0; i < s->l1_size; i++)
1152 cpu_to_be64s(&s->l1_table[i]);
1154 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
1156 /* and back again... */
1157 for (i = 0; i < s->l1_size; i++)
1158 be64_to_cpus(&s->l1_table[i]);
1160 DPRINTF("Writing cksum: %d", cksum);
1162 out = cpu_to_be32(cksum);
1163 if (write(fd, &out, sizeof(out)) != sizeof(out)) {
1164 err = errno;
1165 goto out;
1168 err = 0;
1170 out:
1171 if (err)
1172 DPRINTF("failed to update checksum: %d\n", err);
1173 if (fd != -1)
1174 close(fd);
1175 return err;
1178 int tdqcow_close(td_driver_t *driver)
1180 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1182 /*Update the hdr cksum*/
1183 tdqcow_update_checksum(s);
1185 free_aio_state(s);
1186 free(s->name);
1187 free(s->l1_table);
1188 free(s->l2_cache);
1189 free(s->cluster_cache);
1190 free(s->cluster_data);
1191 close(s->fd);
1192 return 0;
1195 int qcow_create(const char *filename, uint64_t total_size,
1196 const char *backing_file, int sparse)
1198 int fd, header_size, backing_filename_len, l1_size, i;
1199 int shift, length, adjust, flags = 0, ret = 0;
1200 QCowHeader header;
1201 QCowHeader_ext exthdr;
1202 char backing_filename[PATH_MAX], *ptr;
1203 uint64_t tmp, size, total_length;
1204 struct stat st;
1206 DPRINTF("Qcow_create: size %"PRIu64"\n",total_size);
1208 fd = open(filename,
1209 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1210 0644);
1211 if (fd < 0)
1212 return -1;
1214 memset(&header, 0, sizeof(header));
1215 header.magic = cpu_to_be32(QCOW_MAGIC);
1216 header.version = cpu_to_be32(QCOW_VERSION);
1218 /*Create extended header fields*/
1219 exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
1221 header_size = sizeof(header) + sizeof(QCowHeader_ext);
1222 backing_filename_len = 0;
1223 size = (total_size >> SECTOR_SHIFT);
1224 if (backing_file) {
1225 if (strcmp(backing_file, "fat:")) {
1226 const char *p;
1227 /* XXX: this is a hack: we do not attempt to
1228 *check for URL like syntax */
1229 p = strchr(backing_file, ':');
1230 if (p && (p - backing_file) >= 2) {
1231 /* URL like but exclude "c:" like filenames */
1232 strncpy(backing_filename, backing_file,
1233 sizeof(backing_filename));
1234 } else {
1235 if (realpath(backing_file, backing_filename) == NULL ||
1236 stat(backing_filename, &st) != 0) {
1237 return -1;
1240 header.backing_file_offset = cpu_to_be64(header_size);
1241 backing_filename_len = strlen(backing_filename);
1242 header.backing_file_size = cpu_to_be32(
1243 backing_filename_len);
1244 header_size += backing_filename_len;
1246 /*Set to the backing file size*/
1247 if(get_filesize(backing_filename, &size, &st)) {
1248 return -1;
1250 DPRINTF("Backing file size detected: %"PRId64" sectors"
1251 "(total %"PRId64" [%"PRId64" MB])\n",
1252 size,
1253 (uint64_t)(size << SECTOR_SHIFT),
1254 (uint64_t)(size >> 11));
1255 } else {
1256 backing_file = NULL;
1257 DPRINTF("Setting file size: %"PRId64" (total %"PRId64")\n",
1258 total_size,
1259 (uint64_t) (total_size << SECTOR_SHIFT));
1261 header.mtime = cpu_to_be32(st.st_mtime);
1262 header.cluster_bits = 9; /* 512 byte cluster to avoid copying
1263 unmodifyed sectors */
1264 header.l2_bits = 12; /* 32 KB L2 tables */
1265 exthdr.min_cluster_alloc = cpu_to_be32(1);
1266 } else {
1267 DPRINTF("Setting file size: %"PRId64" sectors"
1268 "(total %"PRId64" [%"PRId64" MB])\n",
1269 size,
1270 (uint64_t) (size << SECTOR_SHIFT),
1271 (uint64_t) (size >> 11));
1272 header.cluster_bits = 12; /* 4 KB clusters */
1273 header.l2_bits = 9; /* 4 KB L2 tables */
1274 exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
1276 /*Set the header size value*/
1277 header.size = cpu_to_be64(size * 512);
1279 header_size = (header_size + 7) & ~7;
1280 if (header_size % 4096 > 0) {
1281 header_size = ((header_size >> 12) + 1) << 12;
1284 shift = header.cluster_bits + header.l2_bits;
1285 l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
1287 header.l1_table_offset = cpu_to_be64(header_size);
1288 DPRINTF("L1 Table offset: %d, size %d\n",
1289 header_size,
1290 (int)(l1_size * sizeof(uint64_t)));
1291 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1293 ptr = calloc(1, l1_size * sizeof(uint64_t));
1294 exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
1295 printf("Created cksum: %d\n",exthdr.cksum);
1296 free(ptr);
1298 /*adjust file length to system page size boundary*/
1299 length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)),
1300 getpagesize());
1301 if (qtruncate(fd, length, 0)!=0) {
1302 DPRINTF("ERROR truncating file\n");
1303 return -1;
1306 if (sparse == 0) {
1307 /*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/
1308 total_length = length + (l1_size * (1 << 9)) + (size * 512);
1309 if (qtruncate(fd, total_length, 0)!=0) {
1310 DPRINTF("ERROR truncating file\n");
1311 return -1;
1313 printf("File truncated to length %"PRIu64"\n",total_length);
1314 } else
1315 flags = SPARSE_FILE;
1317 flags |= EXTHDR_L1_BIG_ENDIAN;
1318 exthdr.flags = cpu_to_be32(flags);
1320 /* write all the data */
1321 lseek(fd, 0, SEEK_SET);
1322 ret += write(fd, &header, sizeof(header));
1323 ret += write(fd, &exthdr, sizeof(exthdr));
1324 if (backing_file)
1325 ret += write(fd, backing_filename, backing_filename_len);
1327 lseek(fd, header_size, SEEK_SET);
1328 tmp = 0;
1329 for (i = 0;i < l1_size; i++) {
1330 ret += write(fd, &tmp, sizeof(tmp));
1333 close(fd);
1335 return 0;
1338 static int qcow_make_empty(struct tdqcow_state *s)
1340 uint32_t l1_length = s->l1_size * sizeof(uint64_t);
1342 memset(s->l1_table, 0, l1_length);
1343 lseek(s->fd, s->l1_table_offset, SEEK_SET);
1344 if (write(s->fd, s->l1_table, l1_length) < 0)
1345 return -1;
1346 if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) {
1347 DPRINTF("ERROR truncating file\n");
1348 return -1;
1351 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1352 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
1353 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
1355 return 0;
1358 static int qcow_get_cluster_size(struct tdqcow_state *s)
1360 return s->cluster_size;
1363 /* XXX: put compressed sectors first, then all the cluster aligned
1364 tables to avoid losing bytes in alignment */
1365 static int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num,
1366 const uint8_t *buf)
1368 z_stream strm;
1369 int ret, out_len;
1370 uint8_t *out_buf;
1371 uint64_t cluster_offset;
1373 out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
1374 if (!out_buf)
1375 return -1;
1377 /* best compression, small window, no zlib header */
1378 memset(&strm, 0, sizeof(strm));
1379 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1380 Z_DEFLATED, -12,
1381 9, Z_DEFAULT_STRATEGY);
1382 if (ret != 0) {
1383 free(out_buf);
1384 return -1;
1387 strm.avail_in = s->cluster_size;
1388 strm.next_in = (uint8_t *)buf;
1389 strm.avail_out = s->cluster_size;
1390 strm.next_out = out_buf;
1392 ret = deflate(&strm, Z_FINISH);
1393 if (ret != Z_STREAM_END && ret != Z_OK) {
1394 free(out_buf);
1395 deflateEnd(&strm);
1396 return -1;
1398 out_len = strm.next_out - out_buf;
1400 deflateEnd(&strm);
1402 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1403 /* could not compress: write normal cluster */
1404 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
1405 } else {
1406 cluster_offset = get_cluster_offset(s, sector_num << 9, 2,
1407 out_len, 0, 0);
1408 cluster_offset &= s->cluster_offset_mask;
1409 lseek(s->fd, cluster_offset, SEEK_SET);
1410 if (write(s->fd, out_buf, out_len) != out_len) {
1411 free(out_buf);
1412 return -1;
1416 free(out_buf);
1417 return 0;
1420 static int
1421 tdqcow_get_image_type(const char *file, int *type)
1423 int fd;
1424 size_t size;
1425 QCowHeader header;
1427 fd = open(file, O_RDONLY);
1428 if (fd == -1)
1429 return -errno;
1431 size = read(fd, &header, sizeof(header));
1432 close(fd);
1433 if (size != sizeof(header))
1434 return (errno ? -errno : -EIO);
1436 be32_to_cpus(&header.magic);
1437 if (header.magic == QCOW_MAGIC)
1438 *type = DISK_TYPE_QCOW;
1439 else
1440 *type = DISK_TYPE_AIO;
1442 return 0;
1445 int tdqcow_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
1447 off_t off;
1448 char *buf, *filename;
1449 int len, secs, type, err = -EINVAL;
1450 struct tdqcow_state *child = (struct tdqcow_state *)driver->data;
1452 if (!child->backing_file_offset)
1453 return TD_NO_PARENT;
1455 /* read the backing file name */
1456 len = child->backing_file_size;
1457 off = child->backing_file_offset - (child->backing_file_offset % 512);
1458 secs = (len + (child->backing_file_offset - off) + 511) >> 9;
1460 if (posix_memalign((void **)&buf, 512, secs << 9))
1461 return -1;
1463 if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
1464 goto out;
1466 if (read(child->fd, buf, secs << 9) != secs << 9)
1467 goto out;
1468 filename = buf + (child->backing_file_offset - off);
1469 filename[len] = '\0';
1471 if (tdqcow_get_image_type(filename, &type))
1472 goto out;
1474 id->name = strdup(filename);
1475 id->drivertype = type;
1476 err = 0;
1477 out:
1478 free(buf);
1479 return err;
1482 int tdqcow_validate_parent(td_driver_t *driver,
1483 td_driver_t *pdriver, td_flag_t flags)
1485 struct stat stats;
1486 uint64_t psize, csize;
1487 struct tdqcow_state *c = (struct tdqcow_state *)driver->data;
1488 struct tdqcow_state *p = (struct tdqcow_state *)pdriver->data;
1490 if (stat(p->name, &stats))
1491 return -EINVAL;
1492 if (get_filesize(p->name, &psize, &stats))
1493 return -EINVAL;
1495 if (stat(c->name, &stats))
1496 return -EINVAL;
1497 if (get_filesize(c->name, &csize, &stats))
1498 return -EINVAL;
1500 if (csize != psize)
1501 return -EINVAL;
1503 return 0;
1506 struct tap_disk tapdisk_qcow = {
1507 .disk_type = "tapdisk_qcow",
1508 .flags = 0,
1509 .private_data_size = sizeof(struct tdqcow_state),
1510 .td_open = tdqcow_open,
1511 .td_close = tdqcow_close,
1512 .td_queue_read = tdqcow_queue_read,
1513 .td_queue_write = tdqcow_queue_write,
1514 .td_get_parent_id = tdqcow_get_parent_id,
1515 .td_validate_parent = tdqcow_validate_parent,
1516 .td_debug = NULL,
1517 };