ia64/xen-unstable

view tools/blktap/drivers/block-qcow.c @ 17391:633099ff88a8

tools: Use PATH_MAX for pathname char arrays.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Apr 04 14:49:37 2008 +0100 (2008-04-04)
parents 17e30b91b9e2
children fbccdd7e2a86
line source
1 /* block-qcow.c
2 *
3 * Asynchronous Qemu copy-on-write disk implementation.
4 * Code based on the Qemu implementation
5 * (see copyright notice below)
6 *
7 * (c) 2006 Andrew Warfield and Julian Chesterfield
8 *
9 */
11 /*
12 * Block driver for the QCOW format
13 *
14 * Copyright (c) 2004 Fabrice Bellard
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this software and associated documentation files(the "Software"), to deal
18 * in the Software without restriction, including without limitation the rights
19 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 * copies of the Software, and to permit persons to whom the Software is
21 * furnished to do so, subject to the following conditions:
22 */
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/statvfs.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <string.h>
33 #include <zlib.h>
34 #include <inttypes.h>
35 #include <libaio.h>
36 #include <openssl/md5.h>
37 #include "bswap.h"
38 #include "aes.h"
39 #include "tapdisk.h"
40 #include "tapaio.h"
41 #include "blk.h"
43 /* *BSD has no O_LARGEFILE */
44 #ifndef O_LARGEFILE
45 #define O_LARGEFILE 0
46 #endif
48 #if 1
49 #define ASSERT(_p) \
50 if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
51 __LINE__, __FILE__); *(int*)0=0; }
52 #else
53 #define ASSERT(_p) ((void)0)
54 #endif
56 #define ROUNDUP(l, s) \
57 ({ \
58 (uint64_t)( \
59 (l + (s - 1)) - ((l + (s - 1)) % s)); \
60 })
62 #undef IOCB_IDX
63 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
65 #define ZERO_TEST(_b) (_b | 0x00)
67 /**************************************************************/
68 /* QEMU COW block driver with compression and encryption support */
70 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
71 #define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
72 #define QCOW_VERSION 1
74 #define QCOW_CRYPT_NONE 0x00
75 #define QCOW_CRYPT_AES 0x01
77 #define QCOW_OFLAG_COMPRESSED (1LL << 63)
78 #define SPARSE_FILE 0x01
79 #define EXTHDR_L1_BIG_ENDIAN 0x02
81 #ifndef O_BINARY
82 #define O_BINARY 0
83 #endif
85 typedef struct QCowHeader {
86 uint32_t magic;
87 uint32_t version;
88 uint64_t backing_file_offset;
89 uint32_t backing_file_size;
90 uint32_t mtime;
91 uint64_t size; /* in bytes */
92 uint8_t cluster_bits;
93 uint8_t l2_bits;
94 uint32_t crypt_method;
95 uint64_t l1_table_offset;
96 } QCowHeader;
98 /*Extended header for Xen enhancements*/
99 typedef struct QCowHeader_ext {
100 uint32_t xmagic;
101 uint32_t cksum;
102 uint32_t min_cluster_alloc;
103 uint32_t flags;
104 } QCowHeader_ext;
106 #define L2_CACHE_SIZE 16 /*Fixed allocation in Qemu*/
108 struct tdqcow_state {
109 int fd; /*Main Qcow file descriptor */
110 uint64_t fd_end; /*Store a local record of file length */
111 char *name; /*Record of the filename*/
112 uint32_t backing_file_size;
113 uint64_t backing_file_offset;
114 int encrypted; /*File contents are encrypted or plain*/
115 int cluster_bits; /*Determines length of cluster as
116 *indicated by file hdr*/
117 int cluster_size; /*Length of cluster*/
118 int cluster_sectors; /*Number of sectors per cluster*/
119 int cluster_alloc; /*Blktap fix for allocating full
120 *extents*/
121 int min_cluster_alloc; /*Blktap historical extent alloc*/
122 int sparse; /*Indicates whether to preserve sparseness*/
123 int l2_bits; /*Size of L2 table entry*/
124 int l2_size; /*Full table size*/
125 int l1_size; /*L1 table size*/
126 uint64_t cluster_offset_mask;
127 uint64_t l1_table_offset; /*L1 table offset from beginning of
128 *file*/
129 uint64_t *l1_table; /*L1 table entries*/
130 uint64_t *l2_cache; /*We maintain a cache of size
131 *L2_CACHE_SIZE of most read entries*/
132 uint64_t l2_cache_offsets[L2_CACHE_SIZE]; /*L2 cache entries*/
133 uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/
134 uint8_t *cluster_cache;
135 uint8_t *cluster_data;
136 uint64_t cluster_cache_offset; /**/
137 uint32_t crypt_method; /*current crypt method, 0 if no
138 *key yet */
139 uint32_t crypt_method_header; /**/
140 AES_KEY aes_encrypt_key; /*AES key*/
141 AES_KEY aes_decrypt_key; /*AES key*/
143 /* libaio state */
144 tap_aio_context_t aio;
145 };
147 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
149 static uint32_t gen_cksum(char *ptr, int len)
150 {
151 int i;
152 unsigned char *md;
153 uint32_t ret;
155 md = malloc(MD5_DIGEST_LENGTH);
157 if(!md) return 0;
159 /* Convert L1 table to big endian */
160 for(i = 0; i < len / sizeof(uint64_t); i++) {
161 cpu_to_be64s(&((uint64_t*) ptr)[i]);
162 }
164 /* Generate checksum */
165 if (MD5((unsigned char *)ptr, len, md) != md)
166 ret = 0;
167 else
168 memcpy(&ret, md, sizeof(uint32_t));
170 /* Convert L1 table back to native endianess */
171 for(i = 0; i < len / sizeof(uint64_t); i++) {
172 be64_to_cpus(&((uint64_t*) ptr)[i]);
173 }
175 free(md);
176 return ret;
177 }
179 static int get_filesize(char *filename, uint64_t *size, struct stat *st)
180 {
181 int fd;
182 QCowHeader header;
184 /*Set to the backing file size*/
185 fd = open(filename, O_RDONLY);
186 if (fd < 0)
187 return -1;
188 if (read(fd, &header, sizeof(header)) < sizeof(header)) {
189 close(fd);
190 return -1;
191 }
192 close(fd);
194 be32_to_cpus(&header.magic);
195 be64_to_cpus(&header.size);
196 if (header.magic == QCOW_MAGIC) {
197 *size = header.size >> SECTOR_SHIFT;
198 return 0;
199 }
201 if(S_ISBLK(st->st_mode)) {
202 fd = open(filename, O_RDONLY);
203 if (fd < 0)
204 return -1;
205 if (blk_getimagesize(fd, size) != 0) {
206 close(fd);
207 return -1;
208 }
209 close(fd);
210 } else *size = (st->st_size >> SECTOR_SHIFT);
211 return 0;
212 }
214 static int qcow_set_key(struct tdqcow_state *s, const char *key)
215 {
216 uint8_t keybuf[16];
217 int len, i;
219 memset(keybuf, 0, 16);
220 len = strlen(key);
221 if (len > 16)
222 len = 16;
223 /* XXX: we could compress the chars to 7 bits to increase
224 entropy */
225 for (i = 0; i < len; i++) {
226 keybuf[i] = key[i];
227 }
228 s->crypt_method = s->crypt_method_header;
230 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
231 return -1;
232 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
233 return -1;
234 #if 0
235 /* test */
236 {
237 uint8_t in[16];
238 uint8_t out[16];
239 uint8_t tmp[16];
240 for (i=0; i<16; i++)
241 in[i] = i;
242 AES_encrypt(in, tmp, &s->aes_encrypt_key);
243 AES_decrypt(tmp, out, &s->aes_decrypt_key);
244 for (i = 0; i < 16; i++)
245 DPRINTF(" %02x", tmp[i]);
246 DPRINTF("\n");
247 for (i = 0; i < 16; i++)
248 DPRINTF(" %02x", out[i]);
249 DPRINTF("\n");
250 }
251 #endif
252 return 0;
253 }
255 /*
256 * The crypt function is compatible with the linux cryptoloop
257 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
258 * supported .
259 */
260 static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
261 uint8_t *out_buf, const uint8_t *in_buf,
262 int nb_sectors, int enc,
263 const AES_KEY *key)
264 {
265 union {
266 uint64_t ll[2];
267 uint8_t b[16];
268 } ivec;
269 int i;
271 for (i = 0; i < nb_sectors; i++) {
272 ivec.ll[0] = cpu_to_le64(sector_num);
273 ivec.ll[1] = 0;
274 AES_cbc_encrypt(in_buf, out_buf, 512, key,
275 ivec.b, enc);
276 sector_num++;
277 in_buf += 512;
278 out_buf += 512;
279 }
280 }
282 static int qtruncate(int fd, off_t length, int sparse)
283 {
284 int ret, i;
285 int current = 0, rem = 0;
286 uint64_t sectors;
287 struct stat st;
288 char *buf;
290 /* If length is greater than the current file len
291 * we synchronously write zeroes to the end of the
292 * file, otherwise we truncate the length down
293 */
294 ret = fstat(fd, &st);
295 if (ret == -1)
296 return -1;
297 if (S_ISBLK(st.st_mode))
298 return 0;
300 sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
301 current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
302 rem = st.st_size % DEFAULT_SECTOR_SIZE;
304 /* If we are extending this file, we write zeros to the end --
305 * this tries to ensure that the extents allocated wind up being
306 * contiguous on disk.
307 */
308 if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
309 /*We are extending the file*/
310 if ((ret = posix_memalign((void **)&buf,
311 512, DEFAULT_SECTOR_SIZE))) {
312 DPRINTF("posix_memalign failed: %d\n", ret);
313 return -1;
314 }
315 memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
316 if (lseek(fd, 0, SEEK_END)==-1) {
317 DPRINTF("Lseek EOF failed (%d), internal error\n",
318 errno);
319 free(buf);
320 return -1;
321 }
322 if (rem) {
323 ret = write(fd, buf, rem);
324 if (ret != rem) {
325 DPRINTF("write failed: ret = %d, err = %s\n",
326 ret, strerror(errno));
327 free(buf);
328 return -1;
329 }
330 }
331 for (i = current; i < sectors; i++ ) {
332 ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
333 if (ret != DEFAULT_SECTOR_SIZE) {
334 DPRINTF("write failed: ret = %d, err = %s\n",
335 ret, strerror(errno));
336 free(buf);
337 return -1;
338 }
339 }
340 free(buf);
341 } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
342 if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
343 DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
344 return -1;
345 }
346 return 0;
347 }
350 /* 'allocate' is:
351 *
352 * 0 to not allocate.
353 *
354 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
355 * 'n_end')
356 *
357 * 2 to allocate a compressed cluster of size
358 * 'compressed_size'. 'compressed_size' must be > 0 and <
359 * cluster_size
360 *
361 * return 0 if not allocated.
362 */
363 static uint64_t get_cluster_offset(struct tdqcow_state *s,
364 uint64_t offset, int allocate,
365 int compressed_size,
366 int n_start, int n_end)
367 {
368 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
369 char *tmp_ptr2, *l2_ptr, *l1_ptr;
370 uint64_t *tmp_ptr;
371 uint64_t l2_offset, *l2_table, cluster_offset, tmp;
372 uint32_t min_count;
373 int new_l2_table;
375 /*Check L1 table for the extent offset*/
376 l1_index = offset >> (s->l2_bits + s->cluster_bits);
377 l2_offset = s->l1_table[l1_index];
378 new_l2_table = 0;
379 if (!l2_offset) {
380 if (!allocate)
381 return 0;
382 /*
383 * allocating a new l2 entry + extent
384 * at the end of the file, we must also
385 * update the L1 entry safely.
386 */
387 l2_offset = s->fd_end;
389 /* round to cluster size */
390 l2_offset = (l2_offset + s->cluster_size - 1)
391 & ~(s->cluster_size - 1);
393 /* update the L1 entry */
394 s->l1_table[l1_index] = l2_offset;
395 tmp = cpu_to_be64(l2_offset);
397 /*Truncate file for L2 table
398 *(initialised to zero in case we crash)*/
399 if (qtruncate(s->fd,
400 l2_offset + (s->l2_size * sizeof(uint64_t)),
401 s->sparse) != 0) {
402 DPRINTF("ERROR truncating file\n");
403 return 0;
404 }
405 s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
407 /*Update the L1 table entry on disk
408 * (for O_DIRECT we write 4KByte blocks)*/
409 l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
410 l1_ptr = (char *)s->l1_table + (l1_sector << 12);
412 if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
413 DPRINTF("ERROR allocating memory for L1 table\n");
414 }
415 memcpy(tmp_ptr, l1_ptr, 4096);
417 /* Convert block to write to big endian */
418 for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
419 cpu_to_be64s(&tmp_ptr[i]);
420 }
422 /*
423 * Issue non-asynchronous L1 write.
424 * For safety, we must ensure that
425 * entry is written before blocks.
426 */
427 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
428 if (write(s->fd, tmp_ptr, 4096) != 4096) {
429 free(tmp_ptr);
430 return 0;
431 }
432 free(tmp_ptr);
434 new_l2_table = 1;
435 goto cache_miss;
436 } else if (s->min_cluster_alloc == s->l2_size) {
437 /*Fast-track the request*/
438 cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
439 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
440 return cluster_offset + (l2_index * s->cluster_size);
441 }
443 /*Check to see if L2 entry is already cached*/
444 for (i = 0; i < L2_CACHE_SIZE; i++) {
445 if (l2_offset == s->l2_cache_offsets[i]) {
446 /* increment the hit count */
447 if (++s->l2_cache_counts[i] == 0xffffffff) {
448 for (j = 0; j < L2_CACHE_SIZE; j++) {
449 s->l2_cache_counts[j] >>= 1;
450 }
451 }
452 l2_table = s->l2_cache + (i << s->l2_bits);
453 goto found;
454 }
455 }
457 cache_miss:
458 /* not found: load a new entry in the least used one */
459 min_index = 0;
460 min_count = 0xffffffff;
461 for (i = 0; i < L2_CACHE_SIZE; i++) {
462 if (s->l2_cache_counts[i] < min_count) {
463 min_count = s->l2_cache_counts[i];
464 min_index = i;
465 }
466 }
467 l2_table = s->l2_cache + (min_index << s->l2_bits);
469 /*If extent pre-allocated, read table from disk,
470 *otherwise write new table to disk*/
471 if (new_l2_table) {
472 /*Should we allocate the whole extent? Adjustable parameter.*/
473 if (s->cluster_alloc == s->l2_size) {
474 cluster_offset = l2_offset +
475 (s->l2_size * sizeof(uint64_t));
476 cluster_offset = (cluster_offset + s->cluster_size - 1)
477 & ~(s->cluster_size - 1);
478 if (qtruncate(s->fd, cluster_offset +
479 (s->cluster_size * s->l2_size),
480 s->sparse) != 0) {
481 DPRINTF("ERROR truncating file\n");
482 return 0;
483 }
484 s->fd_end = cluster_offset +
485 (s->cluster_size * s->l2_size);
486 for (i = 0; i < s->l2_size; i++) {
487 l2_table[i] = cpu_to_be64(cluster_offset +
488 (i*s->cluster_size));
489 }
490 } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
492 lseek(s->fd, l2_offset, SEEK_SET);
493 if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
494 s->l2_size * sizeof(uint64_t))
495 return 0;
496 } else {
497 lseek(s->fd, l2_offset, SEEK_SET);
498 if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
499 s->l2_size * sizeof(uint64_t))
500 return 0;
501 }
503 /*Update the cache entries*/
504 s->l2_cache_offsets[min_index] = l2_offset;
505 s->l2_cache_counts[min_index] = 1;
507 found:
508 /*The extent is split into 's->l2_size' blocks of
509 *size 's->cluster_size'*/
510 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
511 cluster_offset = be64_to_cpu(l2_table[l2_index]);
513 if (!cluster_offset ||
514 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
515 if (!allocate)
516 return 0;
518 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
519 (n_end - n_start) < s->cluster_sectors) {
520 /* cluster is already allocated but compressed, we must
521 decompress it in the case it is not completely
522 overwritten */
523 if (decompress_cluster(s, cluster_offset) < 0)
524 return 0;
525 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
526 cluster_offset = (cluster_offset + s->cluster_size - 1)
527 & ~(s->cluster_size - 1);
528 /* write the cluster content - not asynchronous */
529 lseek(s->fd, cluster_offset, SEEK_SET);
530 if (write(s->fd, s->cluster_cache, s->cluster_size) !=
531 s->cluster_size)
532 return -1;
533 } else {
534 /* allocate a new cluster */
535 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
536 if (allocate == 1) {
537 /* round to cluster size */
538 cluster_offset =
539 (cluster_offset + s->cluster_size - 1)
540 & ~(s->cluster_size - 1);
541 if (qtruncate(s->fd, cluster_offset +
542 s->cluster_size, s->sparse)!=0) {
543 DPRINTF("ERROR truncating file\n");
544 return 0;
545 }
546 s->fd_end = (cluster_offset + s->cluster_size);
547 /* if encrypted, we must initialize the cluster
548 content which won't be written */
549 if (s->crypt_method &&
550 (n_end - n_start) < s->cluster_sectors) {
551 uint64_t start_sect;
552 start_sect = (offset &
553 ~(s->cluster_size - 1))
554 >> 9;
555 memset(s->cluster_data + 512,
556 0xaa, 512);
557 for (i = 0; i < s->cluster_sectors;i++)
558 {
559 if (i < n_start || i >= n_end)
560 {
561 encrypt_sectors(s, start_sect + i,
562 s->cluster_data,
563 s->cluster_data + 512, 1, 1,
564 &s->aes_encrypt_key);
565 lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
566 if (write(s->fd, s->cluster_data, 512) != 512)
567 return -1;
568 }
569 }
570 }
571 } else {
572 cluster_offset |= QCOW_OFLAG_COMPRESSED |
573 (uint64_t)compressed_size
574 << (63 - s->cluster_bits);
575 }
576 }
577 /* update L2 table */
578 tmp = cpu_to_be64(cluster_offset);
579 l2_table[l2_index] = tmp;
581 /*For IO_DIRECT we write 4KByte blocks*/
582 l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
583 l2_ptr = (char *)l2_table + (l2_sector << 12);
585 if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
586 DPRINTF("ERROR allocating memory for L1 table\n");
587 }
588 memcpy(tmp_ptr2, l2_ptr, 4096);
589 lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
590 if (write(s->fd, tmp_ptr2, 4096) != 4096) {
591 free(tmp_ptr2);
592 return -1;
593 }
594 free(tmp_ptr2);
595 }
596 return cluster_offset;
597 }
599 static void init_cluster_cache(struct disk_driver *dd)
600 {
601 struct td_state *bs = dd->td_state;
602 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
603 uint32_t count = 0;
604 int i, cluster_entries;
606 cluster_entries = s->cluster_size / 512;
607 DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n",
608 cluster_entries, s->cluster_size);
610 for (i = 0; i < bs->size; i += cluster_entries) {
611 if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++;
612 if (count >= L2_CACHE_SIZE) return;
613 }
614 DPRINTF("Finished cluster initialisation, added %d entries\n", count);
615 return;
616 }
618 static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
619 int nb_sectors, int *pnum)
620 {
621 int index_in_cluster, n;
622 uint64_t cluster_offset;
624 cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
625 index_in_cluster = sector_num & (s->cluster_sectors - 1);
626 n = s->cluster_sectors - index_in_cluster;
627 if (n > nb_sectors)
628 n = nb_sectors;
629 *pnum = n;
630 return (cluster_offset != 0);
631 }
633 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
634 const uint8_t *buf, int buf_size)
635 {
636 z_stream strm1, *strm = &strm1;
637 int ret, out_len;
639 memset(strm, 0, sizeof(*strm));
641 strm->next_in = (uint8_t *)buf;
642 strm->avail_in = buf_size;
643 strm->next_out = out_buf;
644 strm->avail_out = out_buf_size;
646 ret = inflateInit2(strm, -12);
647 if (ret != Z_OK)
648 return -1;
649 ret = inflate(strm, Z_FINISH);
650 out_len = strm->next_out - out_buf;
651 if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
652 (out_len != out_buf_size) ) {
653 inflateEnd(strm);
654 return -1;
655 }
656 inflateEnd(strm);
657 return 0;
658 }
660 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
661 {
662 int ret, csize;
663 uint64_t coffset;
665 coffset = cluster_offset & s->cluster_offset_mask;
666 if (s->cluster_cache_offset != coffset) {
667 csize = cluster_offset >> (63 - s->cluster_bits);
668 csize &= (s->cluster_size - 1);
669 lseek(s->fd, coffset, SEEK_SET);
670 ret = read(s->fd, s->cluster_data, csize);
671 if (ret != csize)
672 return -1;
673 if (decompress_buffer(s->cluster_cache, s->cluster_size,
674 s->cluster_data, csize) < 0) {
675 return -1;
676 }
677 s->cluster_cache_offset = coffset;
678 }
679 return 0;
680 }
682 static inline void init_fds(struct disk_driver *dd)
683 {
684 int i;
685 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
687 for(i = 0; i < MAX_IOFD; i++)
688 dd->io_fd[i] = 0;
690 dd->io_fd[0] = s->aio.aio_ctx.pollfd;
691 }
693 /* Open the disk file and initialize qcow state. */
694 int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
695 {
696 int fd, len, i, shift, ret, size, l1_table_size, o_flags;
697 int max_aio_reqs;
698 struct td_state *bs = dd->td_state;
699 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
700 char *buf;
701 QCowHeader *header;
702 QCowHeader_ext *exthdr;
703 uint32_t cksum;
704 uint64_t final_cluster = 0;
706 DPRINTF("QCOW: Opening %s\n",name);
708 o_flags = O_DIRECT | O_LARGEFILE |
709 ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
710 fd = open(name, o_flags);
711 if (fd < 0) {
712 DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
713 return -1;
714 }
716 s->fd = fd;
717 if (asprintf(&s->name,"%s", name) == -1) {
718 close(fd);
719 return -1;
720 }
722 ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512);
724 ret = posix_memalign((void **)&buf, 512, 512);
725 if (ret != 0) goto fail;
727 if (read(fd, buf, 512) != 512)
728 goto fail;
730 header = (QCowHeader *)buf;
731 be32_to_cpus(&header->magic);
732 be32_to_cpus(&header->version);
733 be64_to_cpus(&header->backing_file_offset);
734 be32_to_cpus(&header->backing_file_size);
735 be32_to_cpus(&header->mtime);
736 be64_to_cpus(&header->size);
737 be32_to_cpus(&header->crypt_method);
738 be64_to_cpus(&header->l1_table_offset);
740 if (header->magic != QCOW_MAGIC)
741 goto fail;
743 switch (header->version) {
744 case QCOW_VERSION:
745 break;
746 case 2:
747 close(fd);
748 dd->drv = &tapdisk_qcow2;
749 return dd->drv->td_open(dd, name, flags);
750 default:
751 goto fail;
752 }
754 if (header->size <= 1 || header->cluster_bits < 9)
755 goto fail;
756 if (header->crypt_method > QCOW_CRYPT_AES)
757 goto fail;
758 s->crypt_method_header = header->crypt_method;
759 if (s->crypt_method_header)
760 s->encrypted = 1;
761 s->cluster_bits = header->cluster_bits;
762 s->cluster_size = 1 << s->cluster_bits;
763 s->cluster_sectors = 1 << (s->cluster_bits - 9);
764 s->l2_bits = header->l2_bits;
765 s->l2_size = 1 << s->l2_bits;
766 s->cluster_alloc = s->l2_size;
767 bs->size = header->size / 512;
768 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
769 s->backing_file_offset = header->backing_file_offset;
770 s->backing_file_size = header->backing_file_size;
772 /* read the level 1 table */
773 shift = s->cluster_bits + s->l2_bits;
774 s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
776 s->l1_table_offset = header->l1_table_offset;
778 /*allocate a 4Kbyte multiple of memory*/
779 l1_table_size = s->l1_size * sizeof(uint64_t);
780 if (l1_table_size % 4096 > 0) {
781 l1_table_size = ((l1_table_size >> 12) + 1) << 12;
782 }
783 ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
784 if (ret != 0) goto fail;
786 memset(s->l1_table, 0x00, l1_table_size);
788 DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n",
789 (long long)s->l1_table_offset,
790 (int) (s->l1_size * sizeof(uint64_t)),
791 l1_table_size);
793 lseek(fd, s->l1_table_offset, SEEK_SET);
794 if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
795 goto fail;
797 for(i = 0; i < s->l1_size; i++) {
798 be64_to_cpus(&s->l1_table[i]);
799 //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
800 if (s->l1_table[i] > final_cluster)
801 final_cluster = s->l1_table[i];
802 }
804 /* alloc L2 cache */
805 size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
806 ret = posix_memalign((void **)&s->l2_cache, 4096, size);
807 if(ret != 0) goto fail;
809 size = s->cluster_size;
810 ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
811 if(ret != 0) goto fail;
813 ret = posix_memalign((void **)&s->cluster_data, 4096, size);
814 if(ret != 0) goto fail;
815 s->cluster_cache_offset = -1;
817 if (s->backing_file_offset != 0)
818 s->cluster_alloc = 1; /*Cannot use pre-alloc*/
820 bs->sector_size = 512;
821 bs->info = 0;
823 /*Detect min_cluster_alloc*/
824 s->min_cluster_alloc = 1; /*Default*/
825 if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
826 /*We test to see if the xen magic # exists*/
827 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
828 be32_to_cpus(&exthdr->xmagic);
829 if(exthdr->xmagic != XEN_MAGIC)
830 goto end_xenhdr;
832 /* Try to detect old tapdisk images. They have to be fixed because
833 * they don't use big endian but native endianess for the L1 table */
834 if ((exthdr->flags & EXTHDR_L1_BIG_ENDIAN) == 0) {
836 /*
837 The image is broken. Fix it. The L1 table has already been
838 byte-swapped, so we can write it to the image file as it is
839 currently in memory. Then swap it back to native endianess
840 for operation.
841 */
843 DPRINTF("qcow: Converting image to big endian L1 table\n");
845 lseek(fd, s->l1_table_offset, SEEK_SET);
846 if (write(fd, s->l1_table, l1_table_size) != l1_table_size) {
847 DPRINTF("qcow: Failed to write new L1 table\n");
848 goto fail;
849 }
851 for(i = 0;i < s->l1_size; i++) {
852 cpu_to_be64s(&s->l1_table[i]);
853 }
855 /* Write the big endian flag to the extended header */
856 exthdr->flags |= EXTHDR_L1_BIG_ENDIAN;
858 if (write(fd, buf, 512) != 512) {
859 DPRINTF("qcow: Failed to write extended header\n");
860 goto fail;
861 }
862 }
864 /*Finally check the L1 table cksum*/
865 be32_to_cpus(&exthdr->cksum);
866 cksum = gen_cksum((char *)s->l1_table,
867 s->l1_size * sizeof(uint64_t));
868 if(exthdr->cksum != cksum)
869 goto end_xenhdr;
871 be32_to_cpus(&exthdr->min_cluster_alloc);
872 be32_to_cpus(&exthdr->flags);
873 s->sparse = (exthdr->flags & SPARSE_FILE);
874 s->min_cluster_alloc = exthdr->min_cluster_alloc;
875 }
877 end_xenhdr:
879 /* A segment (i.e. a page) can span multiple clusters */
880 max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
881 MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
883 if (tap_aio_init(&s->aio, bs->size, max_aio_reqs)!=0) {
884 DPRINTF("Unable to initialise AIO state\n");
885 tap_aio_free(&s->aio);
886 goto fail;
887 }
888 init_fds(dd);
890 if (!final_cluster)
891 s->fd_end = s->l1_table_offset + l1_table_size;
892 else {
893 s->fd_end = lseek(fd, 0, SEEK_END);
894 if (s->fd_end == (off_t)-1)
895 goto fail;
896 }
898 return 0;
900 fail:
901 DPRINTF("QCOW Open failed\n");
902 tap_aio_free(&s->aio);
903 free(s->l1_table);
904 free(s->l2_cache);
905 free(s->cluster_cache);
906 free(s->cluster_data);
907 close(fd);
908 return -1;
909 }
911 int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector,
912 int nb_sectors, char *buf, td_callback_t cb,
913 int id, void *private)
914 {
915 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
916 int ret = 0, index_in_cluster, n, i, rsp = 0;
917 uint64_t cluster_offset, sec, nr_secs;
919 sec = sector;
920 nr_secs = nb_sectors;
922 /*Check we can get a lock*/
923 for (i = 0; i < nb_sectors; i++)
924 if (!tap_aio_can_lock(&s->aio, sector + i))
925 return cb(dd, -EBUSY, sector, nb_sectors, id, private);
927 /*We store a local record of the request*/
928 while (nb_sectors > 0) {
929 cluster_offset =
930 get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
931 index_in_cluster = sector & (s->cluster_sectors - 1);
932 n = s->cluster_sectors - index_in_cluster;
933 if (n > nb_sectors)
934 n = nb_sectors;
936 if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector))
937 return cb(dd, -EBUSY, sector, nb_sectors, id, private);
939 if(!cluster_offset) {
940 tap_aio_unlock(&s->aio, sector);
941 ret = cb(dd, BLK_NOT_ALLOCATED,
942 sector, n, id, private);
943 if (ret == -EBUSY) {
944 /* mark remainder of request
945 * as busy and try again later */
946 return cb(dd, -EBUSY, sector + n,
947 nb_sectors - n, id, private);
948 } else
949 rsp += ret;
950 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
951 tap_aio_unlock(&s->aio, sector);
952 if (decompress_cluster(s, cluster_offset) < 0) {
953 rsp += cb(dd, -EIO, sector,
954 nb_sectors, id, private);
955 goto done;
956 }
957 memcpy(buf, s->cluster_cache + index_in_cluster * 512,
958 512 * n);
959 rsp += cb(dd, 0, sector, n, id, private);
960 } else {
961 tap_aio_read(&s->aio, s->fd, n * 512,
962 (cluster_offset + index_in_cluster * 512),
963 buf, cb, id, sector, private);
964 }
965 nb_sectors -= n;
966 sector += n;
967 buf += n * 512;
968 }
969 done:
970 return rsp;
971 }
973 int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector,
974 int nb_sectors, char *buf, td_callback_t cb,
975 int id, void *private)
976 {
977 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
978 int ret = 0, index_in_cluster, n, i;
979 uint64_t cluster_offset, sec, nr_secs;
981 sec = sector;
982 nr_secs = nb_sectors;
984 /*Check we can get a lock*/
985 for (i = 0; i < nb_sectors; i++)
986 if (!tap_aio_can_lock(&s->aio, sector + i))
987 return cb(dd, -EBUSY, sector, nb_sectors, id, private);
989 /*We store a local record of the request*/
990 while (nb_sectors > 0) {
991 index_in_cluster = sector & (s->cluster_sectors - 1);
992 n = s->cluster_sectors - index_in_cluster;
993 if (n > nb_sectors)
994 n = nb_sectors;
996 if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector))
997 return cb(dd, -EBUSY, sector, nb_sectors, id, private);
999 cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
1000 index_in_cluster,
1001 index_in_cluster+n);
1002 if (!cluster_offset) {
1003 DPRINTF("Ooops, no write cluster offset!\n");
1004 tap_aio_unlock(&s->aio, sector);
1005 return cb(dd, -EIO, sector, nb_sectors, id, private);
1008 if (s->crypt_method) {
1009 encrypt_sectors(s, sector, s->cluster_data,
1010 (unsigned char *)buf, n, 1,
1011 &s->aes_encrypt_key);
1012 tap_aio_write(&s->aio, s->fd, n * 512,
1013 (cluster_offset + index_in_cluster*512),
1014 (char *)s->cluster_data, cb, id, sector,
1015 private);
1016 } else {
1017 tap_aio_write(&s->aio, s->fd, n * 512,
1018 (cluster_offset + index_in_cluster*512),
1019 buf, cb, id, sector, private);
1022 nb_sectors -= n;
1023 sector += n;
1024 buf += n * 512;
1026 s->cluster_cache_offset = -1; /* disable compressed cache */
1028 return 0;
1031 int tdqcow_submit(struct disk_driver *dd)
1033 struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
1035 return tap_aio_submit(&prv->aio);
1038 int tdqcow_close(struct disk_driver *dd)
1040 struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
1041 uint32_t cksum, out;
1042 int fd, offset;
1044 /*Update the hdr cksum*/
1045 if(s->min_cluster_alloc == s->l2_size) {
1046 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
1047 printf("Writing cksum: %d",cksum);
1048 fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/
1049 offset = sizeof(QCowHeader) + sizeof(uint32_t);
1050 lseek(fd, offset, SEEK_SET);
1051 out = cpu_to_be32(cksum);
1052 if (write(fd, &out, sizeof(uint32_t))) ;
1053 close(fd);
1056 io_destroy(s->aio.aio_ctx.aio_ctx);
1057 free(s->name);
1058 free(s->l1_table);
1059 free(s->l2_cache);
1060 free(s->cluster_cache);
1061 free(s->cluster_data);
1062 close(s->fd);
1063 return 0;
1066 int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
1068 int ret, i, nr_events, rsp = 0,*ptr;
1069 struct io_event *ep;
1070 struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
1072 if (sid > MAX_IOFD) return 1;
1074 nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
1075 repeat:
1076 for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
1077 struct iocb *io = ep->obj;
1078 struct pending_aio *pio;
1080 pio = &prv->aio.pending_aio[(long)io->data];
1082 tap_aio_unlock(&prv->aio, pio->sector);
1084 if (prv->crypt_method)
1085 encrypt_sectors(prv, pio->sector,
1086 (unsigned char *)pio->buf,
1087 (unsigned char *)pio->buf,
1088 pio->nb_sectors, 0,
1089 &prv->aes_decrypt_key);
1091 rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
1092 pio->sector, pio->nb_sectors,
1093 pio->id, pio->private);
1095 prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
1098 if (nr_events) {
1099 nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
1100 goto repeat;
1103 tap_aio_continue(&prv->aio.aio_ctx);
1105 return rsp;
1108 int qcow_create(const char *filename, uint64_t total_size,
1109 const char *backing_file, int sparse)
1111 int fd, header_size, backing_filename_len, l1_size, i;
1112 int shift, length, adjust, flags = 0, ret = 0;
1113 QCowHeader header;
1114 QCowHeader_ext exthdr;
1115 char backing_filename[PATH_MAX], *ptr;
1116 uint64_t tmp, size, total_length;
1117 struct stat st;
1119 DPRINTF("Qcow_create: size %llu\n",(long long unsigned)total_size);
1121 fd = open(filename,
1122 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1123 0644);
1124 if (fd < 0)
1125 return -1;
1127 memset(&header, 0, sizeof(header));
1128 header.magic = cpu_to_be32(QCOW_MAGIC);
1129 header.version = cpu_to_be32(QCOW_VERSION);
1131 /*Create extended header fields*/
1132 exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
1134 header_size = sizeof(header) + sizeof(QCowHeader_ext);
1135 backing_filename_len = 0;
1136 size = (total_size >> SECTOR_SHIFT);
1137 if (backing_file) {
1138 if (strcmp(backing_file, "fat:")) {
1139 const char *p;
1140 /* XXX: this is a hack: we do not attempt to
1141 *check for URL like syntax */
1142 p = strchr(backing_file, ':');
1143 if (p && (p - backing_file) >= 2) {
1144 /* URL like but exclude "c:" like filenames */
1145 strncpy(backing_filename, backing_file,
1146 sizeof(backing_filename));
1147 } else {
1148 if (realpath(backing_file, backing_filename) == NULL ||
1149 stat(backing_filename, &st) != 0) {
1150 return -1;
1153 header.backing_file_offset = cpu_to_be64(header_size);
1154 backing_filename_len = strlen(backing_filename);
1155 header.backing_file_size = cpu_to_be32(
1156 backing_filename_len);
1157 header_size += backing_filename_len;
1159 /*Set to the backing file size*/
1160 if(get_filesize(backing_filename, &size, &st)) {
1161 return -1;
1163 DPRINTF("Backing file size detected: %lld sectors"
1164 "(total %lld [%lld MB])\n",
1165 (long long)size,
1166 (long long)(size << SECTOR_SHIFT),
1167 (long long)(size >> 11));
1168 } else {
1169 backing_file = NULL;
1170 DPRINTF("Setting file size: %lld (total %lld)\n",
1171 (long long) total_size,
1172 (long long) (total_size << SECTOR_SHIFT));
1174 header.mtime = cpu_to_be32(st.st_mtime);
1175 header.cluster_bits = 9; /* 512 byte cluster to avoid copying
1176 unmodifyed sectors */
1177 header.l2_bits = 12; /* 32 KB L2 tables */
1178 exthdr.min_cluster_alloc = cpu_to_be32(1);
1179 } else {
1180 DPRINTF("Setting file size: %lld sectors"
1181 "(total %lld [%lld MB])\n",
1182 (long long) size,
1183 (long long) (size << SECTOR_SHIFT),
1184 (long long) (size >> 11));
1185 header.cluster_bits = 12; /* 4 KB clusters */
1186 header.l2_bits = 9; /* 4 KB L2 tables */
1187 exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
1189 /*Set the header size value*/
1190 header.size = cpu_to_be64(size * 512);
1192 header_size = (header_size + 7) & ~7;
1193 if (header_size % 4096 > 0) {
1194 header_size = ((header_size >> 12) + 1) << 12;
1197 shift = header.cluster_bits + header.l2_bits;
1198 l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
1200 header.l1_table_offset = cpu_to_be64(header_size);
1201 DPRINTF("L1 Table offset: %d, size %d\n",
1202 header_size,
1203 (int)(l1_size * sizeof(uint64_t)));
1204 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1206 ptr = calloc(1, l1_size * sizeof(uint64_t));
1207 exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
1208 printf("Created cksum: %d\n",exthdr.cksum);
1209 free(ptr);
1211 /*adjust file length to system page size boundary*/
1212 length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)),
1213 getpagesize());
1214 if (qtruncate(fd, length, 0)!=0) {
1215 DPRINTF("ERROR truncating file\n");
1216 return -1;
1219 if (sparse == 0) {
1220 /*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/
1221 total_length = length + (l1_size * (1 << 9)) + (size * 512);
1222 if (qtruncate(fd, total_length, 0)!=0) {
1223 DPRINTF("ERROR truncating file\n");
1224 return -1;
1226 printf("File truncated to length %"PRIu64"\n",total_length);
1227 } else
1228 flags = SPARSE_FILE;
1230 exthdr.flags = cpu_to_be32(flags);
1232 /* write all the data */
1233 lseek(fd, 0, SEEK_SET);
1234 ret += write(fd, &header, sizeof(header));
1235 ret += write(fd, &exthdr, sizeof(exthdr));
1236 if (backing_file)
1237 ret += write(fd, backing_filename, backing_filename_len);
1239 lseek(fd, header_size, SEEK_SET);
1240 tmp = 0;
1241 for (i = 0;i < l1_size; i++) {
1242 ret += write(fd, &tmp, sizeof(tmp));
1245 close(fd);
1247 return 0;
1250 int qcow_make_empty(struct tdqcow_state *s)
1252 uint32_t l1_length = s->l1_size * sizeof(uint64_t);
1254 memset(s->l1_table, 0, l1_length);
1255 lseek(s->fd, s->l1_table_offset, SEEK_SET);
1256 if (write(s->fd, s->l1_table, l1_length) < 0)
1257 return -1;
1258 if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) {
1259 DPRINTF("ERROR truncating file\n");
1260 return -1;
1263 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1264 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
1265 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
1267 return 0;
1270 int qcow_get_cluster_size(struct tdqcow_state *s)
1272 return s->cluster_size;
1275 /* XXX: put compressed sectors first, then all the cluster aligned
1276 tables to avoid losing bytes in alignment */
1277 int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num,
1278 const uint8_t *buf)
1280 z_stream strm;
1281 int ret, out_len;
1282 uint8_t *out_buf;
1283 uint64_t cluster_offset;
1285 out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
1286 if (!out_buf)
1287 return -1;
1289 /* best compression, small window, no zlib header */
1290 memset(&strm, 0, sizeof(strm));
1291 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1292 Z_DEFLATED, -12,
1293 9, Z_DEFAULT_STRATEGY);
1294 if (ret != 0) {
1295 free(out_buf);
1296 return -1;
1299 strm.avail_in = s->cluster_size;
1300 strm.next_in = (uint8_t *)buf;
1301 strm.avail_out = s->cluster_size;
1302 strm.next_out = out_buf;
1304 ret = deflate(&strm, Z_FINISH);
1305 if (ret != Z_STREAM_END && ret != Z_OK) {
1306 free(out_buf);
1307 deflateEnd(&strm);
1308 return -1;
1310 out_len = strm.next_out - out_buf;
1312 deflateEnd(&strm);
1314 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1315 /* could not compress: write normal cluster */
1316 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
1317 } else {
1318 cluster_offset = get_cluster_offset(s, sector_num << 9, 2,
1319 out_len, 0, 0);
1320 cluster_offset &= s->cluster_offset_mask;
1321 lseek(s->fd, cluster_offset, SEEK_SET);
1322 if (write(s->fd, out_buf, out_len) != out_len) {
1323 free(out_buf);
1324 return -1;
1328 free(out_buf);
1329 return 0;
1332 int tdqcow_get_parent_id(struct disk_driver *dd, struct disk_id *id)
1334 off_t off;
1335 char *buf, *filename;
1336 int len, secs, err = -EINVAL;
1337 struct tdqcow_state *child = (struct tdqcow_state *)dd->private;
1339 if (!child->backing_file_offset)
1340 return TD_NO_PARENT;
1342 /* read the backing file name */
1343 len = child->backing_file_size;
1344 off = child->backing_file_offset - (child->backing_file_offset % 512);
1345 secs = (len + (child->backing_file_offset - off) + 511) >> 9;
1347 if (posix_memalign((void **)&buf, 512, secs << 9))
1348 return -1;
1350 if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
1351 goto out;
1353 if (read(child->fd, buf, secs << 9) != secs << 9)
1354 goto out;
1355 filename = buf + (child->backing_file_offset - off);
1356 filename[len] = '\0';
1358 id->name = strdup(filename);
1359 id->drivertype = DISK_TYPE_QCOW;
1360 err = 0;
1361 out:
1362 free(buf);
1363 return err;
1366 int tdqcow_validate_parent(struct disk_driver *child,
1367 struct disk_driver *parent, td_flag_t flags)
1369 struct stat stats;
1370 uint64_t psize, csize;
1371 struct tdqcow_state *c = (struct tdqcow_state *)child->private;
1372 struct tdqcow_state *p = (struct tdqcow_state *)parent->private;
1374 if (stat(p->name, &stats))
1375 return -EINVAL;
1376 if (get_filesize(p->name, &psize, &stats))
1377 return -EINVAL;
1379 if (stat(c->name, &stats))
1380 return -EINVAL;
1381 if (get_filesize(c->name, &csize, &stats))
1382 return -EINVAL;
1384 if (csize != psize)
1385 return -EINVAL;
1387 return 0;
1390 struct tap_disk tapdisk_qcow = {
1391 .disk_type = "tapdisk_qcow",
1392 .private_data_size = sizeof(struct tdqcow_state),
1393 .td_open = tdqcow_open,
1394 .td_queue_read = tdqcow_queue_read,
1395 .td_queue_write = tdqcow_queue_write,
1396 .td_submit = tdqcow_submit,
1397 .td_close = tdqcow_close,
1398 .td_do_callbacks = tdqcow_do_callbacks,
1399 .td_get_parent_id = tdqcow_get_parent_id,
1400 .td_validate_parent = tdqcow_validate_parent
1401 };