ia64/xen-unstable

view tools/blktap/drivers/block-qcow.c @ 11429:323a89a3c01c

[blktap] Fix block device getsize bug in qcow when backing_filename is a block device.
author jchesterfield@dhcp92.uk.xensource.com
date Tue Sep 05 15:35:42 2006 +0100 (2006-09-05)
parents 7299065fac68
children 69efe6730fb1
line source
1 /* block-qcow.c
2 *
3 * Asynchronous Qemu copy-on-write disk implementation.
4 * Code based on the Qemu implementation
5 * (see copyright notice below)
6 *
7 * (c) 2006 Andrew Warfield and Julian Chesterfield
8 *
9 */
11 /*
12 * Block driver for the QCOW format
13 *
14 * Copyright (c) 2004 Fabrice Bellard
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this software and associated documentation files(the "Software"), to deal
18 * in the Software without restriction, including without limitation the rights
19 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 * copies of the Software, and to permit persons to whom the Software is
21 * furnished to do so, subject to the following conditions:
22 */
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/statvfs.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <linux/fs.h>
33 #include <string.h>
34 #include <zlib.h>
35 #include <inttypes.h>
36 #include <libaio.h>
37 #include <openssl/md5.h>
38 #include "bswap.h"
39 #include "aes.h"
40 #include "tapdisk.h"
42 #if 1
43 #define ASSERT(_p) \
44 if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
45 __LINE__, __FILE__); *(int*)0=0; }
46 #else
47 #define ASSERT(_p) ((void)0)
48 #endif
51 /******AIO DEFINES******/
52 #define REQUEST_ASYNC_FD 1
53 #define MAX_QCOW_IDS 0xFFFF
54 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
56 struct pending_aio {
57 td_callback_t cb;
58 int id;
59 void *private;
60 int nb_sectors;
61 char *buf;
62 uint64_t sector;
63 int qcow_idx;
64 };
66 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
68 #define ZERO_TEST(_b) (_b | 0x00)
70 /**************************************************************/
71 /* QEMU COW block driver with compression and encryption support */
73 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
74 #define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
75 #define QCOW_VERSION 1
77 #define QCOW_CRYPT_NONE 0
78 #define QCOW_CRYPT_AES 1
80 #define QCOW_OFLAG_COMPRESSED (1LL << 63)
82 #ifndef O_BINARY
83 #define O_BINARY 0
84 #endif
86 typedef struct QCowHeader {
87 uint32_t magic;
88 uint32_t version;
89 uint64_t backing_file_offset;
90 uint32_t backing_file_size;
91 uint32_t mtime;
92 uint64_t size; /* in bytes */
93 uint8_t cluster_bits;
94 uint8_t l2_bits;
95 uint32_t crypt_method;
96 uint64_t l1_table_offset;
97 } QCowHeader;
99 /*Extended header for Xen enhancements*/
100 typedef struct QCowHeader_ext {
101 uint32_t xmagic;
102 uint32_t cksum;
103 uint32_t min_cluster_alloc;
104 } QCowHeader_ext;
106 #define L2_CACHE_SIZE 16 /*Fixed allocation in Qemu*/
108 struct tdqcow_state {
109 int fd; /*Main Qcow file descriptor */
110 uint64_t fd_end; /*Store a local record of file length */
111 int bfd; /*Backing file descriptor*/
112 char *name; /*Record of the filename*/
113 int poll_pipe[2]; /*dummy fd for polling on */
114 int encrypted; /*File contents are encrypted or plain*/
115 int cluster_bits; /*Determines length of cluster as
116 *indicated by file hdr*/
117 int cluster_size; /*Length of cluster*/
118 int cluster_sectors; /*Number of sectors per cluster*/
119 int cluster_alloc; /*Blktap fix for allocating full
120 *extents*/
121 int min_cluster_alloc; /*Blktap historical extent alloc*/
122 int l2_bits; /*Size of L2 table entry*/
123 int l2_size; /*Full table size*/
124 int l1_size; /*L1 table size*/
125 uint64_t cluster_offset_mask;
126 uint64_t l1_table_offset; /*L1 table offset from beginning of
127 *file*/
128 uint64_t *l1_table; /*L1 table entries*/
129 uint64_t *l2_cache; /*We maintain a cache of size
130 *L2_CACHE_SIZE of most read entries*/
131 uint64_t l2_cache_offsets[L2_CACHE_SIZE]; /*L2 cache entries*/
132 uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/
133 uint8_t *cluster_cache;
134 uint8_t *cluster_data;
135 uint8_t *sector_lock; /*Locking bitmap for AIO reads/writes*/
136 uint64_t cluster_cache_offset; /**/
137 uint32_t crypt_method; /*current crypt method, 0 if no
138 *key yet */
139 uint32_t crypt_method_header; /**/
140 AES_KEY aes_encrypt_key; /*AES key*/
141 AES_KEY aes_decrypt_key; /*AES key*/
142 /* libaio state */
143 io_context_t aio_ctx;
144 int nr_reqs [MAX_QCOW_IDS];
145 struct iocb iocb_list [MAX_AIO_REQS];
146 struct iocb *iocb_free [MAX_AIO_REQS];
147 struct pending_aio pending_aio[MAX_AIO_REQS];
148 int iocb_free_count;
149 struct iocb *iocb_queue[MAX_AIO_REQS];
150 int iocb_queued;
151 int poll_fd; /* NB: we require aio_poll support */
152 struct io_event aio_events[MAX_AIO_REQS];
153 };
155 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
157 static int init_aio_state(struct td_state *bs)
158 {
159 int i;
160 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
161 long ioidx;
163 /*Initialize Locking bitmap*/
164 s->sector_lock = calloc(1, bs->size);
166 if (!s->sector_lock) {
167 DPRINTF("Failed to allocate sector lock\n");
168 goto fail;
169 }
171 /* Initialize AIO */
172 s->iocb_free_count = MAX_AIO_REQS;
173 s->iocb_queued = 0;
175 /*Signal kernel to create Poll FD for Asyc completion events*/
176 s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
177 s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
179 if (s->poll_fd < 0) {
180 if (s->poll_fd == -EAGAIN) {
181 DPRINTF("Couldn't setup AIO context. If you are "
182 "trying to concurrently use a large number "
183 "of blktap-based disks, you may need to "
184 "increase the system-wide aio request limit. "
185 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
186 "aio-max-nr')\n");
187 } else {
188 DPRINTF("Couldn't get fd for AIO poll support. This "
189 "is probably because your kernel does not "
190 "have the aio-poll patch applied.\n");
191 }
192 goto fail;
193 }
195 for (i=0;i<MAX_AIO_REQS;i++)
196 s->iocb_free[i] = &s->iocb_list[i];
197 for (i=0;i<MAX_QCOW_IDS;i++)
198 s->nr_reqs[i] = 0;
199 DPRINTF("AIO state initialised\n");
201 return 0;
203 fail:
204 return -1;
205 }
207 /*
208 *Test if block is zero.
209 * Return:
210 * 1 for TRUE
211 * 0 for FALSE
212 */
213 static inline int IS_ZERO(char *buf, int len)
214 {
215 int i;
217 for (i = 0; i < len; i++) {
218 /*if not zero, return false*/
219 if (ZERO_TEST(*(buf + i))) return 0;
220 }
221 return 1;
222 }
224 static uint32_t gen_cksum(char *ptr, int len)
225 {
226 unsigned char *md;
227 uint32_t ret;
229 md = malloc(MD5_DIGEST_LENGTH);
231 if(!md) return 0;
233 if (MD5((unsigned char *)ptr, len, md) != md) return 0;
235 memcpy(&ret, md, sizeof(uint32_t));
236 free(md);
237 return ret;
238 }
240 static int get_filesize(char *filename, uint64_t *size, struct stat *st)
241 {
242 int blockfd;
244 /*Set to the backing file size*/
245 if(S_ISBLK(st->st_mode)) {
246 blockfd = open(filename, O_RDONLY);
247 if (blockfd < 0)
248 return -1;
249 if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
250 printf("Unable to get Block device size\n");
251 close(blockfd);
252 return -1;
253 }
254 close(blockfd);
255 } else *size = (st->st_size >> SECTOR_SHIFT);
256 return 0;
257 }
259 static int qcow_set_key(struct td_state *bs, const char *key)
260 {
261 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
262 uint8_t keybuf[16];
263 int len, i;
265 memset(keybuf, 0, 16);
266 len = strlen(key);
267 if (len > 16)
268 len = 16;
269 /* XXX: we could compress the chars to 7 bits to increase
270 entropy */
271 for (i = 0; i < len; i++) {
272 keybuf[i] = key[i];
273 }
274 s->crypt_method = s->crypt_method_header;
276 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
277 return -1;
278 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
279 return -1;
280 #if 0
281 /* test */
282 {
283 uint8_t in[16];
284 uint8_t out[16];
285 uint8_t tmp[16];
286 for (i=0; i<16; i++)
287 in[i] = i;
288 AES_encrypt(in, tmp, &s->aes_encrypt_key);
289 AES_decrypt(tmp, out, &s->aes_decrypt_key);
290 for (i = 0; i < 16; i++)
291 DPRINTF(" %02x", tmp[i]);
292 DPRINTF("\n");
293 for (i = 0; i < 16; i++)
294 DPRINTF(" %02x", out[i]);
295 DPRINTF("\n");
296 }
297 #endif
298 return 0;
299 }
301 static int async_read(struct tdqcow_state *s, int fd, int size,
302 uint64_t offset,
303 char *buf, td_callback_t cb,
304 int id, uint64_t sector, int qcow_idx, void *private)
305 {
306 struct iocb *io;
307 struct pending_aio *pio;
308 long ioidx;
310 io = s->iocb_free[--s->iocb_free_count];
312 ioidx = IOCB_IDX(s, io);
313 pio = &s->pending_aio[ioidx];
314 pio->cb = cb;
315 pio->id = id;
316 pio->private = private;
317 pio->nb_sectors = size/512;
318 pio->buf = buf;
319 pio->sector = sector;
320 pio->qcow_idx = qcow_idx;
322 io_prep_pread(io, fd, buf, size, offset);
323 io->data = (void *)ioidx;
325 s->iocb_queue[s->iocb_queued++] = io;
327 return 1;
328 }
330 static int async_write(struct tdqcow_state *s, int fd, int size,
331 uint64_t offset,
332 char *buf, td_callback_t cb,
333 int id, uint64_t sector, int qcow_idx, void *private)
334 {
335 struct iocb *io;
336 struct pending_aio *pio;
337 long ioidx;
339 io = s->iocb_free[--s->iocb_free_count];
341 ioidx = IOCB_IDX(s, io);
342 pio = &s->pending_aio[ioidx];
343 pio->cb = cb;
344 pio->id = id;
345 pio->private = private;
346 pio->nb_sectors = size/512;
347 pio->buf = buf;
348 pio->sector = sector;
349 pio->qcow_idx = qcow_idx;
351 io_prep_pwrite(io, fd, buf, size, offset);
352 io->data = (void *)ioidx;
354 s->iocb_queue[s->iocb_queued++] = io;
356 return 1;
357 }
359 /*TODO: Fix sector span!*/
360 static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
361 {
362 return (s->sector_lock[sector] ? 0 : 1);
363 }
365 static int aio_lock(struct tdqcow_state *s, uint64_t sector)
366 {
367 return ++s->sector_lock[sector];
368 }
370 static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
371 {
372 if (!s->sector_lock[sector]) return;
374 --s->sector_lock[sector];
375 return;
376 }
378 /*TODO - Use a freelist*/
379 static int get_free_idx(struct tdqcow_state *s)
380 {
381 int i;
383 for(i = 0; i < MAX_QCOW_IDS; i++) {
384 if(s->nr_reqs[i] == 0) return i;
385 }
386 return -1;
387 }
389 /*
390 * The crypt function is compatible with the linux cryptoloop
391 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
392 * supported .
393 */
394 static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
395 uint8_t *out_buf, const uint8_t *in_buf,
396 int nb_sectors, int enc,
397 const AES_KEY *key)
398 {
399 union {
400 uint64_t ll[2];
401 uint8_t b[16];
402 } ivec;
403 int i;
405 for (i = 0; i < nb_sectors; i++) {
406 ivec.ll[0] = cpu_to_le64(sector_num);
407 ivec.ll[1] = 0;
408 AES_cbc_encrypt(in_buf, out_buf, 512, key,
409 ivec.b, enc);
410 sector_num++;
411 in_buf += 512;
412 out_buf += 512;
413 }
414 }
417 /* 'allocate' is:
418 *
419 * 0 to not allocate.
420 *
421 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
422 * 'n_end')
423 *
424 * 2 to allocate a compressed cluster of size
425 * 'compressed_size'. 'compressed_size' must be > 0 and <
426 * cluster_size
427 *
428 * return 0 if not allocated.
429 */
430 static uint64_t get_cluster_offset(struct td_state *bs,
431 uint64_t offset, int allocate,
432 int compressed_size,
433 int n_start, int n_end)
434 {
435 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
436 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
437 char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
438 uint64_t l2_offset, *l2_table, cluster_offset, tmp;
439 uint32_t min_count;
440 int new_l2_table;
442 /*Check L1 table for the extent offset*/
443 l1_index = offset >> (s->l2_bits + s->cluster_bits);
444 l2_offset = s->l1_table[l1_index];
445 new_l2_table = 0;
446 if (!l2_offset) {
447 if (!allocate)
448 return 0;
449 /*
450 * allocating a new l2 entry + extent
451 * at the end of the file, we must also
452 * update the L1 entry safely.
453 */
454 l2_offset = s->fd_end;
456 /* round to cluster size */
457 l2_offset = (l2_offset + s->cluster_size - 1)
458 & ~(s->cluster_size - 1);
460 /* update the L1 entry */
461 s->l1_table[l1_index] = l2_offset;
462 tmp = cpu_to_be64(l2_offset);
464 /*Truncate file for L2 table
465 *(initialised to zero in case we crash)*/
466 ftruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t)));
467 s->fd_end += (s->l2_size * sizeof(uint64_t));
469 /*Update the L1 table entry on disk
470 * (for O_DIRECT we write 4KByte blocks)*/
471 l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
472 l1_ptr = (char *)s->l1_table + (l1_sector << 12);
474 if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
475 DPRINTF("ERROR allocating memory for L1 table\n");
476 }
477 memcpy(tmp_ptr, l1_ptr, 4096);
479 /*
480 * Issue non-asynchronous L1 write.
481 * For safety, we must ensure that
482 * entry is written before blocks.
483 */
484 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
485 if (write(s->fd, tmp_ptr, 4096) != 4096)
486 return 0;
487 free(tmp_ptr);
489 new_l2_table = 1;
490 goto cache_miss;
491 } else if (s->min_cluster_alloc == s->l2_size) {
492 /*Fast-track the request*/
493 cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
494 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
495 return cluster_offset + (l2_index * s->cluster_size);
496 }
498 /*Check to see if L2 entry is already cached*/
499 for (i = 0; i < L2_CACHE_SIZE; i++) {
500 if (l2_offset == s->l2_cache_offsets[i]) {
501 /* increment the hit count */
502 if (++s->l2_cache_counts[i] == 0xffffffff) {
503 for (j = 0; j < L2_CACHE_SIZE; j++) {
504 s->l2_cache_counts[j] >>= 1;
505 }
506 }
507 l2_table = s->l2_cache + (i << s->l2_bits);
508 goto found;
509 }
510 }
512 cache_miss:
513 /* not found: load a new entry in the least used one */
514 min_index = 0;
515 min_count = 0xffffffff;
516 for (i = 0; i < L2_CACHE_SIZE; i++) {
517 if (s->l2_cache_counts[i] < min_count) {
518 min_count = s->l2_cache_counts[i];
519 min_index = i;
520 }
521 }
522 l2_table = s->l2_cache + (min_index << s->l2_bits);
524 /*If extent pre-allocated, read table from disk,
525 *otherwise write new table to disk*/
526 if (new_l2_table) {
527 /*Should we allocate the whole extent? Adjustable parameter.*/
528 if (s->cluster_alloc == s->l2_size) {
529 cluster_offset = l2_offset +
530 (s->l2_size * sizeof(uint64_t));
531 cluster_offset = (cluster_offset + s->cluster_size - 1)
532 & ~(s->cluster_size - 1);
533 ftruncate(s->fd, cluster_offset +
534 (s->cluster_size * s->l2_size));
535 s->fd_end = cluster_offset +
536 (s->cluster_size * s->l2_size);
537 for (i = 0; i < s->l2_size; i++) {
538 l2_table[i] = cpu_to_be64(cluster_offset +
539 (i*s->cluster_size));
540 }
541 } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
543 lseek(s->fd, l2_offset, SEEK_SET);
544 if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
545 s->l2_size * sizeof(uint64_t))
546 return 0;
547 } else {
548 lseek(s->fd, l2_offset, SEEK_SET);
549 if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
550 s->l2_size * sizeof(uint64_t))
551 return 0;
552 }
554 /*Update the cache entries*/
555 s->l2_cache_offsets[min_index] = l2_offset;
556 s->l2_cache_counts[min_index] = 1;
558 found:
559 /*The extent is split into 's->l2_size' blocks of
560 *size 's->cluster_size'*/
561 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
562 cluster_offset = be64_to_cpu(l2_table[l2_index]);
564 if (!cluster_offset ||
565 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
566 if (!allocate)
567 return 0;
569 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
570 (n_end - n_start) < s->cluster_sectors) {
571 /* cluster is already allocated but compressed, we must
572 decompress it in the case it is not completely
573 overwritten */
574 if (decompress_cluster(s, cluster_offset) < 0)
575 return 0;
576 cluster_offset = lseek(s->fd, 0, SEEK_END);
577 cluster_offset = (cluster_offset + s->cluster_size - 1)
578 & ~(s->cluster_size - 1);
579 /* write the cluster content - not asynchronous */
580 lseek(s->fd, cluster_offset, SEEK_SET);
581 if (write(s->fd, s->cluster_cache, s->cluster_size) !=
582 s->cluster_size)
583 return -1;
584 } else {
585 /* allocate a new cluster */
586 cluster_offset = lseek(s->fd, 0, SEEK_END);
587 if (allocate == 1) {
588 /* round to cluster size */
589 cluster_offset =
590 (cluster_offset + s->cluster_size - 1)
591 & ~(s->cluster_size - 1);
592 ftruncate(s->fd, cluster_offset +
593 s->cluster_size);
594 /* if encrypted, we must initialize the cluster
595 content which won't be written */
596 if (s->crypt_method &&
597 (n_end - n_start) < s->cluster_sectors) {
598 uint64_t start_sect;
599 start_sect = (offset &
600 ~(s->cluster_size - 1))
601 >> 9;
602 memset(s->cluster_data + 512,
603 0xaa, 512);
604 for (i = 0; i < s->cluster_sectors;i++)
605 {
606 if (i < n_start || i >= n_end)
607 {
608 encrypt_sectors(s, start_sect + i,
609 s->cluster_data,
610 s->cluster_data + 512, 1, 1,
611 &s->aes_encrypt_key);
612 lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
613 if (write(s->fd, s->cluster_data, 512) != 512)
614 return -1;
615 }
616 }
617 }
618 } else {
619 cluster_offset |= QCOW_OFLAG_COMPRESSED |
620 (uint64_t)compressed_size
621 << (63 - s->cluster_bits);
622 }
623 }
624 /* update L2 table */
625 tmp = cpu_to_be64(cluster_offset);
626 l2_table[l2_index] = tmp;
628 /*For IO_DIRECT we write 4KByte blocks*/
629 l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
630 l2_ptr = (char *)l2_table + (l2_sector << 12);
632 if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
633 DPRINTF("ERROR allocating memory for L1 table\n");
634 }
635 memcpy(tmp_ptr2, l2_ptr, 4096);
636 aio_lock(s, offset >> 9);
637 async_write(s, s->fd, 4096, l2_offset + (l2_sector << 12),
638 tmp_ptr2, 0, -2, offset >> 9, 0, NULL);
639 }
640 return cluster_offset;
641 }
643 static void init_cluster_cache(struct td_state *bs)
644 {
645 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
646 uint32_t count = 0;
647 int i, cluster_entries;
649 cluster_entries = s->cluster_size / 512;
650 DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n",
651 cluster_entries, s->cluster_size);
653 for (i = 0; i < bs->size; i += cluster_entries) {
654 if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
655 if (count >= L2_CACHE_SIZE) return;
656 }
657 DPRINTF("Finished cluster initialisation, added %d entries\n", count);
658 return;
659 }
661 static int qcow_is_allocated(struct td_state *bs, int64_t sector_num,
662 int nb_sectors, int *pnum)
663 {
664 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
666 int index_in_cluster, n;
667 uint64_t cluster_offset;
669 cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
670 index_in_cluster = sector_num & (s->cluster_sectors - 1);
671 n = s->cluster_sectors - index_in_cluster;
672 if (n > nb_sectors)
673 n = nb_sectors;
674 *pnum = n;
675 return (cluster_offset != 0);
676 }
678 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
679 const uint8_t *buf, int buf_size)
680 {
681 z_stream strm1, *strm = &strm1;
682 int ret, out_len;
684 memset(strm, 0, sizeof(*strm));
686 strm->next_in = (uint8_t *)buf;
687 strm->avail_in = buf_size;
688 strm->next_out = out_buf;
689 strm->avail_out = out_buf_size;
691 ret = inflateInit2(strm, -12);
692 if (ret != Z_OK)
693 return -1;
694 ret = inflate(strm, Z_FINISH);
695 out_len = strm->next_out - out_buf;
696 if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
697 (out_len != out_buf_size) ) {
698 inflateEnd(strm);
699 return -1;
700 }
701 inflateEnd(strm);
702 return 0;
703 }
705 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
706 {
707 int ret, csize;
708 uint64_t coffset;
710 coffset = cluster_offset & s->cluster_offset_mask;
711 if (s->cluster_cache_offset != coffset) {
712 csize = cluster_offset >> (63 - s->cluster_bits);
713 csize &= (s->cluster_size - 1);
714 lseek(s->fd, coffset, SEEK_SET);
715 ret = read(s->fd, s->cluster_data, csize);
716 if (ret != csize)
717 return -1;
718 if (decompress_buffer(s->cluster_cache, s->cluster_size,
719 s->cluster_data, csize) < 0) {
720 return -1;
721 }
722 s->cluster_cache_offset = coffset;
723 }
724 return 0;
725 }
727 /* Open the disk file and initialize qcow state. */
728 int tdqcow_open (struct td_state *bs, const char *name)
729 {
730 int fd, len, i, shift, ret, size, l1_table_size;
731 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
732 char *buf;
733 QCowHeader *header;
734 QCowHeader_ext *exthdr;
735 uint32_t cksum;
737 DPRINTF("QCOW: Opening %s\n",name);
738 /* set up a pipe so that we can hand back a poll fd that won't fire.*/
739 ret = pipe(s->poll_pipe);
740 if (ret != 0)
741 return (0 - errno);
743 fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
744 if (fd < 0) {
745 DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
746 return -1;
747 }
749 s->fd = fd;
750 asprintf(&s->name,"%s", name);
752 ASSERT(sizeof(header) < 512);
754 ret = posix_memalign((void **)&buf, 512, 512);
755 if (ret != 0) goto fail;
757 if (read(fd, buf, 512) != 512)
758 goto fail;
760 header = (QCowHeader *)buf;
761 be32_to_cpus(&header->magic);
762 be32_to_cpus(&header->version);
763 be64_to_cpus(&header->backing_file_offset);
764 be32_to_cpus(&header->backing_file_size);
765 be32_to_cpus(&header->mtime);
766 be64_to_cpus(&header->size);
767 be32_to_cpus(&header->crypt_method);
768 be64_to_cpus(&header->l1_table_offset);
770 if (header->magic != QCOW_MAGIC || header->version > QCOW_VERSION)
771 goto fail;
772 if (header->size <= 1 || header->cluster_bits < 9)
773 goto fail;
774 if (header->crypt_method > QCOW_CRYPT_AES)
775 goto fail;
776 s->crypt_method_header = header->crypt_method;
777 if (s->crypt_method_header)
778 s->encrypted = 1;
779 s->cluster_bits = header->cluster_bits;
780 s->cluster_size = 1 << s->cluster_bits;
781 s->cluster_sectors = 1 << (s->cluster_bits - 9);
782 s->l2_bits = header->l2_bits;
783 s->l2_size = 1 << s->l2_bits;
784 s->cluster_alloc = s->l2_size;
785 bs->size = header->size / 512;
786 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
788 /* read the level 1 table */
789 shift = s->cluster_bits + s->l2_bits;
790 s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
792 s->l1_table_offset = header->l1_table_offset;
794 /*allocate a 4Kbyte multiple of memory*/
795 l1_table_size = s->l1_size * sizeof(uint64_t);
796 if (l1_table_size % 4096 > 0) {
797 l1_table_size = ((l1_table_size >> 12) + 1) << 12;
798 }
799 ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
800 if (ret != 0) goto fail;
801 memset(s->l1_table, 0x00, l1_table_size);
803 DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n",
804 (long long)s->l1_table_offset,
805 (int) (s->l1_size * sizeof(uint64_t)),
806 l1_table_size);
808 lseek(fd, s->l1_table_offset, SEEK_SET);
809 if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
810 goto fail;
811 /* for(i = 0;i < s->l1_size; i++) {
812 //be64_to_cpus(&s->l1_table[i]);
813 DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
814 }*/
816 /* alloc L2 cache */
817 size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
818 ret = posix_memalign((void **)&s->l2_cache, 4096, size);
819 if(ret != 0) goto fail;
821 size = s->cluster_size;
822 ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
823 if(ret != 0) goto fail;
825 ret = posix_memalign((void **)&s->cluster_data, 4096, size);
826 if(ret != 0) goto fail;
827 s->cluster_cache_offset = -1;
829 /* read the backing file name */
830 s->bfd = -1;
831 if (header->backing_file_offset != 0) {
832 DPRINTF("Reading backing file data\n");
833 len = header->backing_file_size;
834 if (len > 1023)
835 len = 1023;
837 /*TODO - Fix read size for O_DIRECT and use original fd!*/
838 fd = open(name, O_RDONLY | O_LARGEFILE);
840 lseek(fd, header->backing_file_offset, SEEK_SET);
841 if (read(fd, bs->backing_file, len) != len)
842 goto fail;
843 bs->backing_file[len] = '\0';
844 close(fd);
845 /***********************************/
847 /*Open backing file*/
848 fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
849 if (fd < 0) {
850 DPRINTF("Unable to open backing file: %s\n",
851 bs->backing_file);
852 goto fail;
853 }
854 s->bfd = fd;
855 s->cluster_alloc = 1; /*Cannot use pre-alloc*/
856 }
858 bs->sector_size = 512;
859 bs->info = 0;
861 /*Detect min_cluster_alloc*/
862 s->min_cluster_alloc = 1; /*Default*/
863 if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
864 /*We test to see if the xen magic # exists*/
865 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
866 be32_to_cpus(&exthdr->xmagic);
867 if(exthdr->xmagic != XEN_MAGIC)
868 goto end_xenhdr;
870 /*Finally check the L1 table cksum*/
871 be32_to_cpus(&exthdr->cksum);
872 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
873 if(exthdr->cksum != cksum)
874 goto end_xenhdr;
876 be32_to_cpus(&exthdr->min_cluster_alloc);
877 s->min_cluster_alloc = exthdr->min_cluster_alloc;
878 }
880 end_xenhdr:
881 if (init_aio_state(bs)!=0) {
882 DPRINTF("Unable to initialise AIO state\n");
883 goto fail;
884 }
885 s->fd_end = lseek(s->fd, 0, SEEK_END);
887 return 0;
889 fail:
890 DPRINTF("QCOW Open failed\n");
891 free(s->l1_table);
892 free(s->l2_cache);
893 free(s->cluster_cache);
894 free(s->cluster_data);
895 close(fd);
896 return -1;
897 }
899 int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
900 int nb_sectors, char *buf, td_callback_t cb,
901 int id, void *private)
902 {
903 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
904 int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
905 uint64_t cluster_offset;
907 /*Check we can get a lock*/
908 for (i = 0; i < nb_sectors; i++)
909 if (!aio_can_lock(s, sector + i)) {
910 DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
911 (long long) sector + i);
912 return -EBUSY;
913 }
915 /*We store a local record of the request*/
916 qcow_idx = get_free_idx(s);
917 while (nb_sectors > 0) {
918 cluster_offset =
919 get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
920 index_in_cluster = sector & (s->cluster_sectors - 1);
921 n = s->cluster_sectors - index_in_cluster;
922 if (n > nb_sectors)
923 n = nb_sectors;
925 if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
926 DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
927 "[%llu]\n", s->iocb_free_count,
928 (long long) sector);
929 return -ENOMEM;
930 }
932 if (!cluster_offset && (s->bfd > 0)) {
933 s->nr_reqs[qcow_idx]++;
934 asubmit += async_read(s, s->bfd, n * 512, sector << 9,
935 buf, cb, id, sector,
936 qcow_idx, private);
937 } else if(!cluster_offset) {
938 memset(buf, 0, 512 * n);
939 aio_unlock(s, sector);
940 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
941 if (decompress_cluster(s, cluster_offset) < 0) {
942 ret = -1;
943 goto done;
944 }
945 memcpy(buf, s->cluster_cache + index_in_cluster * 512,
946 512 * n);
947 } else {
948 s->nr_reqs[qcow_idx]++;
949 asubmit += async_read(s, s->fd, n * 512,
950 (cluster_offset +
951 index_in_cluster * 512),
952 buf, cb, id, sector,
953 qcow_idx, private);
954 }
955 nb_sectors -= n;
956 sector += n;
957 buf += n * 512;
958 }
959 done:
960 /*Callback if no async requests outstanding*/
961 if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
963 return 0;
964 }
966 int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
967 int nb_sectors, char *buf, td_callback_t cb,
968 int id, void *private)
969 {
970 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
971 int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
972 uint64_t cluster_offset;
974 /*Check we can get a lock*/
975 for (i = 0; i < nb_sectors; i++)
976 if (!aio_can_lock(s, sector + i)) {
977 DPRINTF("AIO_CAN_LOCK failed [%llu]\n",
978 (long long) (sector + i));
979 return -EBUSY;
980 }
982 /*We store a local record of the request*/
983 qcow_idx = get_free_idx(s);
984 while (nb_sectors > 0) {
985 index_in_cluster = sector & (s->cluster_sectors - 1);
986 n = s->cluster_sectors - index_in_cluster;
987 if (n > nb_sectors)
988 n = nb_sectors;
990 if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
991 DPRINTF("AIO_LOCK or iocb_free_count (%d) failed"
992 "[%llu]\n", s->iocb_free_count,
993 (long long) sector);
994 return -ENOMEM;
995 }
997 if (!IS_ZERO(buf,n * 512)) {
999 cluster_offset = get_cluster_offset(bs, sector << 9,
1000 1, 0,
1001 index_in_cluster,
1002 index_in_cluster+n
1003 );
1004 if (!cluster_offset) {
1005 DPRINTF("Ooops, no write cluster offset!\n");
1006 ret = -1;
1007 goto done;
1010 if (s->crypt_method) {
1011 encrypt_sectors(s, sector, s->cluster_data,
1012 (unsigned char *)buf, n, 1,
1013 &s->aes_encrypt_key);
1014 s->nr_reqs[qcow_idx]++;
1015 asubmit += async_write(s, s->fd, n * 512,
1016 (cluster_offset +
1017 index_in_cluster*512),
1018 (char *)s->cluster_data,
1019 cb, id, sector,
1020 qcow_idx, private);
1021 } else {
1022 s->nr_reqs[qcow_idx]++;
1023 asubmit += async_write(s, s->fd, n * 512,
1024 (cluster_offset +
1025 index_in_cluster*512),
1026 buf, cb, id, sector,
1027 qcow_idx, private);
1029 } else {
1030 /*Write data contains zeros, but we must check to see
1031 if cluster already allocated*/
1032 cluster_offset = get_cluster_offset(bs, sector << 9,
1033 0, 0,
1034 index_in_cluster,
1035 index_in_cluster+n
1036 );
1037 if(cluster_offset) {
1038 if (s->crypt_method) {
1039 encrypt_sectors(s, sector,
1040 s->cluster_data,
1041 (unsigned char *)buf,
1042 n, 1,
1043 &s->aes_encrypt_key);
1044 s->nr_reqs[qcow_idx]++;
1045 asubmit += async_write(s, s->fd,
1046 n * 512,
1047 (cluster_offset+
1048 index_in_cluster * 512),
1049 (char *)s->cluster_data, cb, id, sector,
1050 qcow_idx, private);
1051 } else {
1052 s->nr_reqs[qcow_idx]++;
1053 asubmit += async_write(s, s->fd, n*512,
1054 cluster_offset + index_in_cluster * 512,
1055 buf, cb, id, sector,
1056 qcow_idx, private);
1059 else aio_unlock(s, sector);
1061 nb_sectors -= n;
1062 sector += n;
1063 buf += n * 512;
1065 s->cluster_cache_offset = -1; /* disable compressed cache */
1067 done:
1068 /*Callback if no async requests outstanding*/
1069 if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
1071 return 0;
1074 int tdqcow_submit(struct td_state *bs)
1076 int ret;
1077 struct tdqcow_state *prv = (struct tdqcow_state *)bs->private;
1079 ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
1081 /* XXX: TODO: Handle error conditions here. */
1083 /* Success case: */
1084 prv->iocb_queued = 0;
1086 return ret;
1090 int *tdqcow_get_fd(struct td_state *bs)
1092 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
1093 int *fds, i;
1095 fds = malloc(sizeof(int) * MAX_IOFD);
1096 /*initialise the FD array*/
1097 for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
1099 fds[0] = s->poll_fd;
1100 return fds;
1103 int tdqcow_close(struct td_state *bs)
1105 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
1106 uint32_t cksum, out;
1107 int fd, offset;
1109 /*Update the hdr cksum*/
1110 if(s->min_cluster_alloc == s->l2_size) {
1111 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
1112 printf("Writing cksum: %d",cksum);
1113 fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/
1114 offset = sizeof(QCowHeader) + sizeof(uint32_t);
1115 lseek(fd, offset, SEEK_SET);
1116 out = cpu_to_be32(cksum);
1117 write(fd, &out, sizeof(uint32_t));
1118 close(fd);
1121 free(s->name);
1122 free(s->l1_table);
1123 free(s->l2_cache);
1124 free(s->cluster_cache);
1125 free(s->cluster_data);
1126 close(s->fd);
1127 return 0;
1130 int tdqcow_do_callbacks(struct td_state *s, int sid)
1132 int ret, i, rsp = 0,*ptr;
1133 struct io_event *ep;
1134 struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
1136 if (sid > MAX_IOFD) return 1;
1138 /* Non-blocking test for completed io. */
1139 ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
1140 NULL);
1142 for (ep=prv->aio_events, i = ret; i-->0; ep++) {
1143 struct iocb *io = ep->obj;
1144 struct pending_aio *pio;
1146 pio = &prv->pending_aio[(long)io->data];
1148 if (ep->res != io->u.c.nbytes) {
1149 /* TODO: handle this case better. */
1150 ptr = (int *)&ep->res;
1151 DPRINTF("AIO did less than I asked it to "
1152 "[%lu,%lu,%d]\n",
1153 ep->res, io->u.c.nbytes, *ptr);
1155 aio_unlock(prv, pio->sector);
1156 if (pio->id >= 0) {
1157 if (prv->crypt_method)
1158 encrypt_sectors(prv, pio->sector,
1159 (unsigned char *)pio->buf,
1160 (unsigned char *)pio->buf,
1161 pio->nb_sectors, 0,
1162 &prv->aes_decrypt_key);
1163 prv->nr_reqs[pio->qcow_idx]--;
1164 if (prv->nr_reqs[pio->qcow_idx] == 0)
1165 rsp += pio->cb(s, ep->res2, pio->id,
1166 pio->private);
1167 } else if (pio->id == -2) free(pio->buf);
1169 prv->iocb_free[prv->iocb_free_count++] = io;
1171 return rsp;
1174 int qcow_create(const char *filename, uint64_t total_size,
1175 const char *backing_file, int flags)
1177 int fd, header_size, backing_filename_len, l1_size, i;
1178 int shift, length, adjust, ret = 0;
1179 QCowHeader header;
1180 QCowHeader_ext exthdr;
1181 char backing_filename[1024], *ptr;
1182 uint64_t tmp, size;
1183 struct stat st;
1185 DPRINTF("Qcow_create: size %llu\n",(long long unsigned)total_size);
1187 fd = open(filename,
1188 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1189 0644);
1190 if (fd < 0)
1191 return -1;
1193 memset(&header, 0, sizeof(header));
1194 header.magic = cpu_to_be32(QCOW_MAGIC);
1195 header.version = cpu_to_be32(QCOW_VERSION);
1197 /*Create extended header fields*/
1198 exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
1200 header_size = sizeof(header) + sizeof(QCowHeader_ext);
1201 backing_filename_len = 0;
1202 size = (total_size >> SECTOR_SHIFT);
1203 if (backing_file) {
1204 if (strcmp(backing_file, "fat:")) {
1205 const char *p;
1206 /* XXX: this is a hack: we do not attempt to
1207 *check for URL like syntax */
1208 p = strchr(backing_file, ':');
1209 if (p && (p - backing_file) >= 2) {
1210 /* URL like but exclude "c:" like filenames */
1211 strncpy(backing_filename, backing_file,
1212 sizeof(backing_filename));
1213 } else {
1214 realpath(backing_file, backing_filename);
1215 if (stat(backing_filename, &st) != 0) {
1216 return -1;
1219 header.backing_file_offset = cpu_to_be64(header_size);
1220 backing_filename_len = strlen(backing_filename);
1221 header.backing_file_size = cpu_to_be32(
1222 backing_filename_len);
1223 header_size += backing_filename_len;
1225 /*Set to the backing file size*/
1226 if(get_filesize(backing_filename, &size, &st)) {
1227 return -1;
1229 DPRINTF("Backing file size detected: %lld sectors"
1230 "(total %lld [%lld MB])\n",
1231 (long long)size,
1232 (long long)(size << SECTOR_SHIFT),
1233 (long long)(size >> 11));
1234 } else {
1235 backing_file = NULL;
1236 DPRINTF("Setting file size: %lld (total %lld)\n",
1237 (long long) total_size,
1238 (long long) (total_size << SECTOR_SHIFT));
1240 header.mtime = cpu_to_be32(st.st_mtime);
1241 header.cluster_bits = 9; /* 512 byte cluster to avoid copying
1242 unmodifyed sectors */
1243 header.l2_bits = 12; /* 32 KB L2 tables */
1244 exthdr.min_cluster_alloc = cpu_to_be32(1);
1245 } else {
1246 DPRINTF("Setting file size: %lld sectors"
1247 "(total %lld [%lld MB])\n",
1248 (long long) size,
1249 (long long) (size << SECTOR_SHIFT),
1250 (long long) (size >> 11));
1251 header.cluster_bits = 12; /* 4 KB clusters */
1252 header.l2_bits = 9; /* 4 KB L2 tables */
1253 exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
1255 /*Set the header size value*/
1256 header.size = cpu_to_be64(size * 512);
1258 header_size = (header_size + 7) & ~7;
1259 if (header_size % 4096 > 0) {
1260 header_size = ((header_size >> 12) + 1) << 12;
1263 shift = header.cluster_bits + header.l2_bits;
1264 l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
1266 header.l1_table_offset = cpu_to_be64(header_size);
1267 DPRINTF("L1 Table offset: %d, size %d\n",
1268 header_size,
1269 (int)(l1_size * sizeof(uint64_t)));
1270 if (flags) {
1271 header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
1272 } else {
1273 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1276 ptr = calloc(1, l1_size * sizeof(uint64_t));
1277 exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
1278 printf("Created cksum: %d\n",exthdr.cksum);
1279 free(ptr);
1281 /* write all the data */
1282 ret += write(fd, &header, sizeof(header));
1283 ret += write(fd, &exthdr, sizeof(exthdr));
1284 if (backing_file) {
1285 ret += write(fd, backing_filename, backing_filename_len);
1287 lseek(fd, header_size, SEEK_SET);
1288 tmp = 0;
1289 for (i = 0;i < l1_size; i++) {
1290 ret += write(fd, &tmp, sizeof(tmp));
1293 /*adjust file length to 4 KByte boundary*/
1294 length = header_size + l1_size * sizeof(uint64_t);
1295 if (length % 4096 > 0) {
1296 length = ((length >> 12) + 1) << 12;
1297 ftruncate(fd, length);
1298 DPRINTF("Adjusted filelength to %d for 4 "
1299 "Kbyte alignment\n",length);
1302 close(fd);
1304 return 0;
1307 int qcow_make_empty(struct td_state *bs)
1309 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
1310 uint32_t l1_length = s->l1_size * sizeof(uint64_t);
1312 memset(s->l1_table, 0, l1_length);
1313 lseek(s->fd, s->l1_table_offset, SEEK_SET);
1314 if (write(s->fd, s->l1_table, l1_length) < 0)
1315 return -1;
1316 ftruncate(s->fd, s->l1_table_offset + l1_length);
1318 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1319 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
1320 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
1322 return 0;
1325 int qcow_get_cluster_size(struct td_state *bs)
1327 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
1329 return s->cluster_size;
1332 /* XXX: put compressed sectors first, then all the cluster aligned
1333 tables to avoid losing bytes in alignment */
1334 int qcow_compress_cluster(struct td_state *bs, int64_t sector_num,
1335 const uint8_t *buf)
1337 struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
1338 z_stream strm;
1339 int ret, out_len;
1340 uint8_t *out_buf;
1341 uint64_t cluster_offset;
1343 out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
1344 if (!out_buf)
1345 return -1;
1347 /* best compression, small window, no zlib header */
1348 memset(&strm, 0, sizeof(strm));
1349 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1350 Z_DEFLATED, -12,
1351 9, Z_DEFAULT_STRATEGY);
1352 if (ret != 0) {
1353 free(out_buf);
1354 return -1;
1357 strm.avail_in = s->cluster_size;
1358 strm.next_in = (uint8_t *)buf;
1359 strm.avail_out = s->cluster_size;
1360 strm.next_out = out_buf;
1362 ret = deflate(&strm, Z_FINISH);
1363 if (ret != Z_STREAM_END && ret != Z_OK) {
1364 free(out_buf);
1365 deflateEnd(&strm);
1366 return -1;
1368 out_len = strm.next_out - out_buf;
1370 deflateEnd(&strm);
1372 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1373 /* could not compress: write normal cluster */
1374 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
1375 } else {
1376 cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
1377 out_len, 0, 0);
1378 cluster_offset &= s->cluster_offset_mask;
1379 lseek(s->fd, cluster_offset, SEEK_SET);
1380 if (write(s->fd, out_buf, out_len) != out_len) {
1381 free(out_buf);
1382 return -1;
1386 free(out_buf);
1387 return 0;
1390 struct tap_disk tapdisk_qcow = {
1391 "tapdisk_qcow",
1392 sizeof(struct tdqcow_state),
1393 tdqcow_open,
1394 tdqcow_queue_read,
1395 tdqcow_queue_write,
1396 tdqcow_submit,
1397 tdqcow_get_fd,
1398 tdqcow_close,
1399 tdqcow_do_callbacks,
1400 };