ia64/xen-unstable

view tools/blktap2/drivers/block-log.c @ 19817:b7f73a7f3078

blktap2: portability fixes for NetBSD

- Use standard off_t and lseek() instead of non-portable off64_t and
lseek64()
- Use uuid API as documented in DCE 1.1 RPC specification
- Add NetBSD implementation for blk_getimagesize() and
blk_getsectorsize()
- Use blk_getimagesize() and blk_getsectorsize()
- Fix uuid header check

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 23 17:24:14 2009 +0100 (2009-06-23)
parents 1c627434605e
children
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 /* Driver to sit on top of another disk and log writes, in order
30 * to synchronize two distinct disks
31 *
32 * On receipt of a control request it can export a list of dirty
33 * sectors in the following format:
34 * struct writerange {
35 * u64 sector;
36 * u32 count;
37 * }
38 * terminated by { 0, 0 }
39 */
41 #include <errno.h>
42 #include <stdio.h>
43 #include <fcntl.h>
44 #include <unistd.h>
45 #include <stdlib.h>
46 #include <sys/mman.h>
47 #include <sys/socket.h>
48 #include <sys/un.h>
50 #include "log.h"
51 #include "tapdisk.h"
52 #include "tapdisk-server.h"
53 #include "tapdisk-driver.h"
54 #include "tapdisk-interface.h"
56 #define MAX_CONNECTIONS 1
58 typedef struct poll_fd {
59 int fd;
60 event_id_t id;
61 } poll_fd_t;
63 struct tdlog_state {
64 uint64_t size;
66 void* writelog;
68 char* ctlpath;
69 poll_fd_t ctl;
71 int connected;
72 poll_fd_t connections[MAX_CONNECTIONS];
74 char* shmpath;
75 void* shm;
77 log_sring_t* sring;
78 log_back_ring_t bring;
79 };
81 #define BDPRINTF(_f, _a...) syslog (LOG_DEBUG, "log: " _f "\n", ## _a)
83 #define BWPRINTF(_f, _a...) syslog (LOG_WARNING, "log: " _f "\n", ## _a)
85 static void ctl_accept(event_id_t, char, void *);
86 static void ctl_request(event_id_t, char, void *);
88 /* -- write log -- */
90 /* large flat bitmaps don't scale particularly well either in size or scan
91 * time, but they'll do for now */
92 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
93 #define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
95 #define BITMAP_ENTRY(_nr, _bmap) ((unsigned long*)(_bmap))[(_nr)/BITS_PER_LONG]
96 #define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
98 static inline int test_bit(int nr, void* bmap)
99 {
100 return (BITMAP_ENTRY(nr, bmap) >> BITMAP_SHIFT(nr)) & 1;
101 }
103 static inline void clear_bit(int nr, void* bmap)
104 {
105 BITMAP_ENTRY(nr, bmap) &= ~(1UL << BITMAP_SHIFT(nr));
106 }
108 static inline void set_bit(int nr, void* bmap)
109 {
110 BITMAP_ENTRY(nr, bmap) |= (1UL << BITMAP_SHIFT(nr));
111 }
113 static inline int bitmap_size(uint64_t sz)
114 {
115 return sz >> 3;
116 }
118 static int writelog_create(struct tdlog_state *s)
119 {
120 uint64_t bmsize;
122 bmsize = bitmap_size(s->size);
124 BDPRINTF("allocating %"PRIu64" bytes for dirty bitmap", bmsize);
126 if (!(s->writelog = calloc(bmsize, 1))) {
127 BWPRINTF("could not allocate dirty bitmap of size %"PRIu64, bmsize);
128 return -1;
129 }
131 return 0;
132 }
134 static int writelog_free(struct tdlog_state *s)
135 {
136 if (s->writelog)
137 free(s->writelog);
139 return 0;
140 }
142 static int writelog_set(struct tdlog_state* s, uint64_t sector, int count)
143 {
144 int i;
146 for (i = 0; i < count; i++)
147 set_bit(sector + i, s->writelog);
149 return 0;
150 }
152 /* if end is 0, clear to end of disk */
153 int writelog_clear(struct tdlog_state* s, uint64_t start, uint64_t end)
154 {
155 if (!end)
156 end = s->size;
158 /* clear to word boundaries */
159 while (BITMAP_SHIFT(start))
160 clear_bit(start++, s->writelog);
161 while (BITMAP_SHIFT(end))
162 clear_bit(end--, s->writelog);
164 memset(s->writelog + start / BITS_PER_LONG, 0, (end - start) >> 3);
166 return 0;
167 }
169 /* returns last block exported (may not be end of disk if shm region
170 * overflows) */
171 static uint64_t writelog_export(struct tdlog_state* s)
172 {
173 struct disk_range* range = s->shm;
174 uint64_t i = 0;
176 BDPRINTF("sector count: %"PRIu64, s->size);
178 for (i = 0; i < s->size; i++) {
179 if (test_bit(i, s->writelog)) {
180 /* range start */
181 range->sector = i;
182 range->count = 1;
183 /* find end */
184 for (i++; i < s->size && test_bit(i, s->writelog); i++)
185 range->count++;
187 BDPRINTF("export: dirty extent %"PRIu64":%u",
188 range->sector, range->count);
189 range++;
191 /* out of space in shared memory region */
192 if ((void*)range >= bmend(s->shm)) {
193 BDPRINTF("out of space in shm region at sector %"PRIu64, i);
194 return i;
195 }
197 /* undo forloop increment */
198 i--;
199 }
200 }
202 /* NULL-terminate range list */
203 range->sector = 0;
204 range->count = 0;
206 return i;
207 }
209 /* -- communication channel -- */
211 /* remove FS special characters in up to len bytes of path */
212 static inline void path_escape(char* path, size_t len) {
213 int i;
215 for (i = 0; i < len && path[i]; i++)
216 if (strchr(":/", path[i]))
217 path[i] = '_';
218 }
220 static char* ctl_makepath(const char* name, const char* ext)
221 {
222 char* res;
223 char *file;
225 file = strrchr(name, '/');
226 if (!file) {
227 BWPRINTF("invalid name %s\n", name);
228 return NULL;
229 }
231 if (asprintf(&res, BLKTAP_CTRL_DIR "/log_%s.%s", file, ext) < 0) {
232 BWPRINTF("could not allocate path");
233 return NULL;
234 }
236 path_escape(res + strlen(BLKTAP_CTRL_DIR) + 5, strlen(file));
238 return res;
239 }
241 static int shmem_open(struct tdlog_state* s, const char* name)
242 {
243 int i, l, fd;
245 /* device name -> path */
246 if (asprintf(&s->shmpath, "/log_%s.wlog", name) < 0) {
247 BWPRINTF("could not allocate shm path");
248 return -1;
249 }
251 path_escape(s->shmpath + 5, strlen(name));
253 if ((fd = shm_open(s->shmpath, O_CREAT|O_RDWR, 0750)) < 0) {
254 BWPRINTF("could not open shared memory file %s: %s", s->shmpath,
255 strerror(errno));
256 goto err;
257 }
258 if (ftruncate(fd, SHMSIZE) < 0) {
259 BWPRINTF("error truncating shmem to size %u", SHMSIZE);
260 close(fd);
261 goto err;
262 }
264 s->shm = mmap(NULL, SHMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
265 close(fd);
266 if (s->shm == MAP_FAILED) {
267 BWPRINTF("could not mmap write log shm: %s", strerror(errno));
268 goto err;
269 }
270 return 0;
272 err:
273 s->shm = NULL;
274 free(s->shmpath);
275 s->shmpath = NULL;
276 return -1;
277 }
279 static int shmem_close(struct tdlog_state* s)
280 {
281 if (s->shm) {
282 munmap(s->shm, SHMSIZE);
283 s->shm = NULL;
284 }
286 if (s->shmpath) {
287 shm_unlink(s->shmpath);
288 s->shmpath = NULL;
289 }
291 return 0;
292 }
294 /* control socket */
296 static int ctl_open(struct tdlog_state* s, const char* name)
297 {
298 struct sockaddr_un saddr;
300 if (!(s->ctlpath = ctl_makepath(name, "ctl")))
301 return -1;
303 if ((s->ctl.fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
304 BWPRINTF("error opening control socket: %s", strerror(errno));
305 goto err;
306 }
308 memset(&saddr, 0, sizeof(saddr));
309 saddr.sun_family = AF_UNIX;
310 memcpy(saddr.sun_path, s->ctlpath, strlen(s->ctlpath));
311 if (unlink(s->ctlpath) && errno != ENOENT) {
312 BWPRINTF("error unlinking old socket path %s: %s", s->ctlpath,
313 strerror(errno));
314 goto err_sock;
315 }
317 if (bind(s->ctl.fd, (const struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
318 BWPRINTF("error binding control socket to %s: %s", s->ctlpath,
319 strerror(errno));
320 goto err_sock;
321 }
323 if (listen(s->ctl.fd, 1) < 0) {
324 BWPRINTF("error listening on control socket: %s", strerror(errno));
325 goto err_sock;
326 }
328 s->ctl.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
329 s->ctl.fd, 0, ctl_accept, s);
330 if (s->ctl.id < 0) {
331 BWPRINTF("error register event handler: %s", strerror(s->ctl.id));
332 goto err_sock;
333 }
335 return 0;
337 err_sock:
338 close(s->ctl.fd);
339 s->ctl.fd = -1;
340 err:
341 free(s->ctlpath);
342 s->ctlpath = NULL;
344 return -1;
345 }
347 static int ctl_close(struct tdlog_state* s)
348 {
349 while (s->connected) {
350 tapdisk_server_unregister_event(s->connections[s->connected].id);
351 close(s->connections[s->connected].fd);
352 s->connections[s->connected].fd = -1;
353 s->connections[s->connected].id = 0;
354 s->connected--;
355 }
357 if (s->ctl.fd >= 0) {
358 tapdisk_server_unregister_event(s->ctl.id);
359 close(s->ctl.fd);
360 s->ctl.fd = -1;
361 s->ctl.id = 0;
362 }
364 if (s->ctlpath) {
365 unlink(s->ctlpath);
366 free(s->ctlpath);
367 s->ctlpath = NULL;
368 }
370 /* XXX this must be fixed once requests are actually in flight */
371 /* could just drain the existing ring here first */
372 if (s->sring) {
373 SHARED_RING_INIT(s->sring);
374 BACK_RING_INIT(&s->bring, s->sring, SRINGSIZE);
375 }
377 return 0;
378 }
380 /* walk list of open sockets, close matching fd */
381 static int ctl_close_sock(struct tdlog_state* s, int fd)
382 {
383 int i;
385 for (i = 0; i <= s->connected; i++) {
386 if (s->connections[i].fd == fd) {
387 tapdisk_server_unregister_event(s->connections[i].id);
388 close(s->connections[i].fd);
389 s->connections[i].fd = -1;
390 s->connections[i].id = 0;
391 s->connected--;
392 return 0;
393 }
394 }
396 BWPRINTF("requested to close unknown socket %d", fd);
397 return -1;
398 }
400 static void ctl_accept(event_id_t id, char mode, void *private)
401 {
402 struct tdlog_state* s = (struct tdlog_state *)private;
403 int fd;
404 event_id_t cid;
406 if ((fd = accept(s->ctl.fd, NULL, NULL)) < 0) {
407 BWPRINTF("error accepting control connection: %s", strerror(errno));
408 return;
409 }
411 if (s->connected) {
412 BWPRINTF("control session in progress, closing new connection");
413 close(fd);
414 return;
415 }
417 cid = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
418 fd, 0, ctl_request, s);
419 if (cid < 0) {
420 BWPRINTF("error registering connection event handler: %s", strerror(cid));
421 close(fd);
422 return;
423 }
425 s->connections[s->connected].fd = fd;
426 s->connections[s->connected].id = cid;
427 s->connected++;
428 }
430 /* response format: 4 bytes shmsize, 0-terminated path */
431 static int ctl_get_shmpath(struct tdlog_state* s, int fd)
432 {
433 char msg[CTLRSPLEN_SHMP + 1];
434 uint32_t sz;
435 int rc;
437 BDPRINTF("ctl: sending shared memory parameters (size: %u, path: %s)",
438 SHMSIZE, s->shmpath);
440 /* TMP: sanity-check shm */
441 sz = 0xdeadbeef;
442 memcpy(s->shm, &sz, sizeof(sz));
444 sz = SHMSIZE;
445 memcpy(msg, &sz, sizeof(sz));
446 snprintf(msg + sizeof(sz), sizeof(msg) - sizeof(sz), "%s", s->shmpath);
447 if ((rc = write(fd, msg, CTLRSPLEN_SHMP)) < 0) {
448 BWPRINTF("error writing shmpath: %s", strerror(errno));
449 return -1;
450 }
452 return 0;
453 }
455 static int ctl_peek_writes(struct tdlog_state* s, int fd)
456 {
457 int rc;
459 BDPRINTF("ctl: peeking bitmap");
461 writelog_export(s);
463 if ((rc = write(fd, "done", CTLRSPLEN_PEEK)) < 0) {
464 BWPRINTF("error writing peek ack: %s", strerror(errno));
465 return -1;
466 }
468 return 0;
469 }
471 static int ctl_clear_writes(struct tdlog_state* s, int fd)
472 {
473 int rc;
475 BDPRINTF("ctl: clearing bitmap");
477 writelog_clear(s, 0, 0);
479 if ((rc = write(fd, "done", CTLRSPLEN_CLEAR)) < 0) {
480 BWPRINTF("error writing clear ack: %s", strerror(errno));
481 return -1;
482 }
484 return 0;
485 }
487 /* get dirty bitmap and clear it atomically */
488 static int ctl_get_writes(struct tdlog_state* s, int fd)
489 {
490 int rc;
492 BDPRINTF("ctl: getting bitmap");
494 writelog_export(s);
495 writelog_clear(s, 0, 0);
497 if ((rc = write(fd, "done", CTLRSPLEN_GET)) < 0) {
498 BWPRINTF("error writing get ack: %s", strerror(errno));
499 return -1;
500 }
502 return 0;
503 }
505 /* get requests from ring */
506 static int ctl_kick(struct tdlog_state* s, int fd)
507 {
508 RING_IDX reqstart, reqend;
509 log_request_t req;
511 /* XXX testing */
512 RING_IDX rspstart, rspend;
513 log_response_t rsp;
514 struct log_ctlmsg msg;
515 int rc;
517 reqstart = s->bring.req_cons;
518 reqend = s->sring->req_prod;
520 BDPRINTF("ctl: ring kicked (start = %u, end = %u)", reqstart, reqend);
522 while (reqstart != reqend) {
523 /* XXX actually submit these! */
524 memcpy(&req, RING_GET_REQUEST(&s->bring, reqstart), sizeof(req));
525 BDPRINTF("ctl: read request %"PRIu64":%u", req.sector, req.count);
526 s->bring.req_cons = ++reqstart;
528 rsp.sector = req.sector;
529 rsp.count = req.count;
530 memcpy(RING_GET_RESPONSE(&s->bring, s->bring.rsp_prod_pvt), &rsp,
531 sizeof(rsp));
532 s->bring.rsp_prod_pvt++;
533 }
535 RING_PUSH_RESPONSES(&s->bring);
536 memset(&msg, 0, sizeof(msg));
537 memcpy(msg.msg, LOGCMD_KICK, 4);
538 if ((rc = write(fd, &msg, sizeof(msg))) < 0) {
539 BWPRINTF("error sending notify: %s", strerror(errno));
540 return -1;
541 } else if (rc < sizeof(msg)) {
542 BWPRINTF("short notify write (%d/%zd)", rc, sizeof(msg));
543 return -1;
544 }
546 return 0;
547 }
549 static int ctl_do_request(struct tdlog_state* s, int fd, struct log_ctlmsg* msg)
550 {
551 if (!strncmp(msg->msg, LOGCMD_SHMP, 4)) {
552 return ctl_get_shmpath(s, fd);
553 } else if (!strncmp(msg->msg, LOGCMD_PEEK, 4)) {
554 return ctl_peek_writes(s, fd);
555 } else if (!strncmp(msg->msg, LOGCMD_CLEAR, 4)) {
556 return ctl_clear_writes(s, fd);
557 } else if (!strncmp(msg->msg, LOGCMD_GET, 4)) {
558 return ctl_get_writes(s, fd);
559 } else if (!strncmp(msg->msg, LOGCMD_KICK, 4)) {
560 return ctl_kick(s, fd);
561 }
563 BWPRINTF("unknown control request %.4s", msg->msg);
564 return -1;
565 }
567 static inline int ctl_find_connection(struct tdlog_state *s, event_id_t id)
568 {
569 int i;
571 for (i = 0; i < s->connected; i++)
572 if (s->connections[i].id == id)
573 return s->connections[i].fd;
575 BWPRINTF("unrecognized event callback id %d", id);
576 return -1;
577 }
579 static void ctl_request(event_id_t id, char mode, void *private)
580 {
581 struct tdlog_state* s = (struct tdlog_state*)private;
582 struct log_ctlmsg msg;
583 int rc, i, fd = -1;
585 fd = ctl_find_connection(s, id);
586 if (fd == -1)
587 return;
589 if ((rc = read(fd, &msg, sizeof(msg))) < 0) {
590 BWPRINTF("error reading from ctl socket %d, closing: %s", fd,
591 strerror(errno));
592 ctl_close_sock(s, fd);
593 return;
594 } else if (rc == 0) {
595 BDPRINTF("ctl_request: EOF, closing socket");
596 ctl_close_sock(s, fd);
597 return;
598 } else if (rc < sizeof(msg)) {
599 BWPRINTF("short request received (%d/%zd bytes), ignoring", rc,
600 sizeof(msg));
601 return;
602 }
604 ctl_do_request(s, fd, &msg);
605 }
607 /* -- interface -- */
609 static int tdlog_close(td_driver_t*);
611 static int tdlog_open(td_driver_t* driver, const char* name, td_flag_t flags)
612 {
613 struct tdlog_state* s = (struct tdlog_state*)driver->data;
614 int rc;
616 memset(s, 0, sizeof(*s));
618 s->size = driver->info.size;
620 if ((rc = writelog_create(s))) {
621 tdlog_close(driver);
622 return rc;
623 }
624 if ((rc = shmem_open(s, name))) {
625 tdlog_close(driver);
626 return rc;
627 }
628 if ((rc = ctl_open(s, name))) {
629 tdlog_close(driver);
630 return rc;
631 }
633 s->sring = (log_sring_t*)sringstart(s->shm);
634 SHARED_RING_INIT(s->sring);
635 BACK_RING_INIT(&s->bring, s->sring, SRINGSIZE);
637 BDPRINTF("opened ctl socket");
639 return 0;
640 }
642 static int tdlog_close(td_driver_t* driver)
643 {
644 struct tdlog_state* s = (struct tdlog_state*)driver->data;
646 ctl_close(s);
647 shmem_close(s);
648 writelog_free(s);
650 return 0;
651 }
653 static void tdlog_queue_read(td_driver_t* driver, td_request_t treq)
654 {
655 td_forward_request(treq);
656 }
658 static void tdlog_queue_write(td_driver_t* driver, td_request_t treq)
659 {
660 struct tdlog_state* s = (struct tdlog_state*)driver->data;
661 int rc;
663 writelog_set(s, treq.sec, treq.secs);
664 td_forward_request(treq);
665 }
667 static int tdlog_get_parent_id(td_driver_t* driver, td_disk_id_t* id)
668 {
669 return -EINVAL;
670 }
672 static int tdlog_validate_parent(td_driver_t *driver,
673 td_driver_t *parent, td_flag_t flags)
674 {
675 return 0;
676 }
678 struct tap_disk tapdisk_log = {
679 .disk_type = "tapdisk_log",
680 .private_data_size = sizeof(struct tdlog_state),
681 .flags = 0,
682 .td_open = tdlog_open,
683 .td_close = tdlog_close,
684 .td_queue_read = tdlog_queue_read,
685 .td_queue_write = tdlog_queue_write,
686 .td_get_parent_id = tdlog_get_parent_id,
687 .td_validate_parent = tdlog_validate_parent,
688 };