ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 9542:601d0229a40e

Further attempts to recover from a corrupt store, this one triggered when a
tdb_store fails.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Fri Mar 31 00:10:54 2006 +0100 (2006-03-31)
parents 40e3df4cffe4
children e7d769001b4b
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 #include "hashtable.h"
57 extern int eventchn_fd; /* in xenstored_domain.c */
59 static bool verbose = false;
60 LIST_HEAD(connections);
61 static int tracefd = -1;
62 static bool recovery = true;
63 static bool remove_local = true;
64 static int reopen_log_pipe[2];
65 static char *tracefile = NULL;
66 static TDB_CONTEXT *tdb_ctx;
68 static void corrupt(struct connection *conn, const char *fmt, ...);
69 static void check_store(void);
71 #define log(...) \
72 do { \
73 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
74 trace("%s\n", s); \
75 syslog(LOG_ERR, "%s", s); \
76 talloc_free(s); \
77 } while (0)
80 #ifdef TESTING
81 static bool failtest = false;
83 /* We override talloc's malloc. */
84 void *test_malloc(size_t size)
85 {
86 /* 1 in 20 means only about 50% of connections establish. */
87 if (failtest && (random() % 32) == 0)
88 return NULL;
89 return malloc(size);
90 }
92 static void stop_failtest(int signum __attribute__((unused)))
93 {
94 failtest = false;
95 }
97 /* Need these before we #define away write_all/mkdir in testing.h */
98 bool test_write_all(int fd, void *contents, unsigned int len);
99 bool test_write_all(int fd, void *contents, unsigned int len)
100 {
101 if (failtest && (random() % 8) == 0) {
102 if (len)
103 len = random() % len;
104 write(fd, contents, len);
105 errno = ENOSPC;
106 return false;
107 }
108 return xs_write_all(fd, contents, len);
109 }
111 int test_mkdir(const char *dir, int perms);
112 int test_mkdir(const char *dir, int perms)
113 {
114 if (failtest && (random() % 8) == 0) {
115 errno = ENOSPC;
116 return -1;
117 }
118 return mkdir(dir, perms);
119 }
120 #endif /* TESTING */
122 #include "xenstored_test.h"
124 TDB_CONTEXT *tdb_context(struct connection *conn)
125 {
126 /* conn = NULL used in manual_node at setup. */
127 if (!conn || !conn->transaction)
128 return tdb_ctx;
129 return tdb_transaction_context(conn->transaction);
130 }
132 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
133 {
134 if (rename(newname, xs_daemon_tdb()) != 0)
135 return false;
136 tdb_close(tdb_ctx);
137 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
138 return true;
139 }
141 static char *sockmsg_string(enum xsd_sockmsg_type type)
142 {
143 switch (type) {
144 case XS_DEBUG: return "DEBUG";
145 case XS_DIRECTORY: return "DIRECTORY";
146 case XS_READ: return "READ";
147 case XS_GET_PERMS: return "GET_PERMS";
148 case XS_WATCH: return "WATCH";
149 case XS_UNWATCH: return "UNWATCH";
150 case XS_TRANSACTION_START: return "TRANSACTION_START";
151 case XS_TRANSACTION_END: return "TRANSACTION_END";
152 case XS_INTRODUCE: return "INTRODUCE";
153 case XS_RELEASE: return "RELEASE";
154 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
155 case XS_WRITE: return "WRITE";
156 case XS_MKDIR: return "MKDIR";
157 case XS_RM: return "RM";
158 case XS_SET_PERMS: return "SET_PERMS";
159 case XS_WATCH_EVENT: return "WATCH_EVENT";
160 case XS_ERROR: return "ERROR";
161 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
162 default:
163 return "**UNKNOWN**";
164 }
165 }
167 void trace(const char *fmt, ...)
168 {
169 va_list arglist;
170 char *str;
171 char sbuf[1024];
172 int ret;
174 if (tracefd < 0)
175 return;
177 /* try to use a static buffer */
178 va_start(arglist, fmt);
179 ret = vsnprintf(sbuf, 1024, fmt, arglist);
180 va_end(arglist);
182 if (ret <= 1024) {
183 write(tracefd, sbuf, ret);
184 return;
185 }
187 /* fail back to dynamic allocation */
188 va_start(arglist, fmt);
189 str = talloc_vasprintf(NULL, fmt, arglist);
190 va_end(arglist);
191 write(tracefd, str, strlen(str));
192 talloc_free(str);
193 }
195 static void trace_io(const struct connection *conn,
196 const char *prefix,
197 const struct buffered_data *data)
198 {
199 unsigned int i;
200 time_t now;
201 struct tm *tm;
203 if (tracefd < 0)
204 return;
206 now = time(NULL);
207 tm = localtime(&now);
209 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
210 tm->tm_year + 1900, tm->tm_mon + 1,
211 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
212 sockmsg_string(data->hdr.msg.type));
214 for (i = 0; i < data->hdr.msg.len; i++)
215 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
216 trace(")\n");
217 }
219 void trace_create(const void *data, const char *type)
220 {
221 trace("CREATE %s %p\n", type, data);
222 }
224 void trace_destroy(const void *data, const char *type)
225 {
226 trace("DESTROY %s %p\n", type, data);
227 }
229 /**
230 * Signal handler for SIGHUP, which requests that the trace log is reopened
231 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
232 * the select() in the main loop.
233 */
234 static void trigger_reopen_log(int signal __attribute__((unused)))
235 {
236 char c = 'A';
237 write(reopen_log_pipe[1], &c, 1);
238 }
241 static void reopen_log(void)
242 {
243 if (tracefile) {
244 if (tracefd > 0)
245 close(tracefd);
247 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
249 if (tracefd < 0)
250 perror("Could not open tracefile");
251 else
252 trace("\n***\n");
253 }
254 }
257 static bool write_messages(struct connection *conn)
258 {
259 int ret;
260 struct buffered_data *out;
262 out = list_top(&conn->out_list, struct buffered_data, list);
263 if (out == NULL)
264 return true;
266 if (out->inhdr) {
267 if (verbose)
268 xprintf("Writing msg %s (%.*s) out to %p\n",
269 sockmsg_string(out->hdr.msg.type),
270 out->hdr.msg.len,
271 out->buffer, conn);
272 ret = conn->write(conn, out->hdr.raw + out->used,
273 sizeof(out->hdr) - out->used);
274 if (ret < 0)
275 return false;
277 out->used += ret;
278 if (out->used < sizeof(out->hdr))
279 return true;
281 out->inhdr = false;
282 out->used = 0;
284 /* Second write might block if non-zero. */
285 if (out->hdr.msg.len && !conn->domain)
286 return true;
287 }
289 ret = conn->write(conn, out->buffer + out->used,
290 out->hdr.msg.len - out->used);
291 if (ret < 0)
292 return false;
294 out->used += ret;
295 if (out->used != out->hdr.msg.len)
296 return true;
298 trace_io(conn, "OUT", out);
300 list_del(&out->list);
301 talloc_free(out);
303 return true;
304 }
306 static int destroy_conn(void *_conn)
307 {
308 struct connection *conn = _conn;
310 /* Flush outgoing if possible, but don't block. */
311 if (!conn->domain) {
312 fd_set set;
313 struct timeval none;
315 FD_ZERO(&set);
316 FD_SET(conn->fd, &set);
317 none.tv_sec = none.tv_usec = 0;
319 while (!list_empty(&conn->out_list)
320 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
321 if (!write_messages(conn))
322 break;
323 close(conn->fd);
324 }
325 list_del(&conn->list);
326 trace_destroy(conn, "connection");
327 return 0;
328 }
331 static void set_fd(int fd, fd_set *set, int *max)
332 {
333 if (fd < 0)
334 return;
335 FD_SET(fd, set);
336 if (fd > *max)
337 *max = fd;
338 }
341 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
342 {
343 struct connection *i;
344 int max = -1;
346 FD_ZERO(inset);
347 FD_ZERO(outset);
349 set_fd(sock, inset, &max);
350 set_fd(ro_sock, inset, &max);
351 set_fd(eventchn_fd, inset, &max);
352 set_fd(reopen_log_pipe[0], inset, &max);
353 list_for_each_entry(i, &connections, list) {
354 if (i->domain)
355 continue;
356 set_fd(i->fd, inset, &max);
357 if (!list_empty(&i->out_list))
358 FD_SET(i->fd, outset);
359 }
360 return max;
361 }
363 static int destroy_fd(void *_fd)
364 {
365 int *fd = _fd;
366 close(*fd);
367 return 0;
368 }
370 /* Return a pointer to an fd, self-closing and attached to this pathname. */
371 int *talloc_open(const char *pathname, int flags, int mode)
372 {
373 int *fd;
375 fd = talloc(pathname, int);
376 *fd = open(pathname, flags, mode);
377 if (*fd < 0) {
378 int saved_errno = errno;
379 talloc_free(fd);
380 errno = saved_errno;
381 return NULL;
382 }
383 talloc_set_destructor(fd, destroy_fd);
384 return fd;
385 }
387 /* Is child a subnode of parent, or equal? */
388 bool is_child(const char *child, const char *parent)
389 {
390 unsigned int len = strlen(parent);
392 /* / should really be "" for this algorithm to work, but that's a
393 * usability nightmare. */
394 if (streq(parent, "/"))
395 return true;
397 if (strncmp(child, parent, len) != 0)
398 return false;
400 return child[len] == '/' || child[len] == '\0';
401 }
403 /* If it fails, returns NULL and sets errno. */
404 static struct node *read_node(struct connection *conn, const char *name)
405 {
406 TDB_DATA key, data;
407 uint32_t *p;
408 struct node *node;
409 TDB_CONTEXT * context = tdb_context(conn);
411 key.dptr = (void *)name;
412 key.dsize = strlen(name);
413 data = tdb_fetch(context, key);
415 if (data.dptr == NULL) {
416 if (tdb_error(context) == TDB_ERR_NOEXIST)
417 errno = ENOENT;
418 else {
419 log("TDB error on read: %s", tdb_errorstr(context));
420 errno = EIO;
421 }
422 return NULL;
423 }
425 node = talloc(name, struct node);
426 node->name = talloc_strdup(node, name);
427 node->parent = NULL;
428 node->tdb = tdb_context(conn);
429 talloc_steal(node, data.dptr);
431 /* Datalen, childlen, number of permissions */
432 p = (uint32_t *)data.dptr;
433 node->num_perms = p[0];
434 node->datalen = p[1];
435 node->childlen = p[2];
437 /* Permissions are struct xs_permissions. */
438 node->perms = (void *)&p[3];
439 /* Data is binary blob (usually ascii, no nul). */
440 node->data = node->perms + node->num_perms;
441 /* Children is strings, nul separated. */
442 node->children = node->data + node->datalen;
444 return node;
445 }
447 static bool write_node(struct connection *conn, const struct node *node)
448 {
449 TDB_DATA key, data;
450 void *p;
452 key.dptr = (void *)node->name;
453 key.dsize = strlen(node->name);
455 data.dsize = 3*sizeof(uint32_t)
456 + node->num_perms*sizeof(node->perms[0])
457 + node->datalen + node->childlen;
458 data.dptr = talloc_size(node, data.dsize);
459 ((uint32_t *)data.dptr)[0] = node->num_perms;
460 ((uint32_t *)data.dptr)[1] = node->datalen;
461 ((uint32_t *)data.dptr)[2] = node->childlen;
462 p = data.dptr + 3 * sizeof(uint32_t);
464 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
465 p += node->num_perms*sizeof(node->perms[0]);
466 memcpy(p, node->data, node->datalen);
467 p += node->datalen;
468 memcpy(p, node->children, node->childlen);
470 /* TDB should set errno, but doesn't even set ecode AFAICT. */
471 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
472 corrupt(conn, "Write of %s = %s failed", key, data);
473 errno = ENOSPC;
474 return false;
475 }
476 return true;
477 }
479 static enum xs_perm_type perm_for_conn(struct connection *conn,
480 struct xs_permissions *perms,
481 unsigned int num)
482 {
483 unsigned int i;
484 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
486 if (!conn->can_write)
487 mask &= ~XS_PERM_WRITE;
489 /* Owners and tools get it all... */
490 if (!conn->id || perms[0].id == conn->id)
491 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
493 for (i = 1; i < num; i++)
494 if (perms[i].id == conn->id)
495 return perms[i].perms & mask;
497 return perms[0].perms & mask;
498 }
500 static char *get_parent(const char *node)
501 {
502 char *slash = strrchr(node + 1, '/');
503 if (!slash)
504 return talloc_strdup(node, "/");
505 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
506 }
508 /* What do parents say? */
509 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
510 {
511 struct node *node;
513 do {
514 name = get_parent(name);
515 node = read_node(conn, name);
516 if (node)
517 break;
518 } while (!streq(name, "/"));
520 /* No permission at root? We're in trouble. */
521 if (!node)
522 corrupt(conn, "No permissions file at root");
524 return perm_for_conn(conn, node->perms, node->num_perms);
525 }
527 /* We have a weird permissions system. You can allow someone into a
528 * specific node without allowing it in the parents. If it's going to
529 * fail, however, we don't want the errno to indicate any information
530 * about the node. */
531 static int errno_from_parents(struct connection *conn, const char *node,
532 int errnum, enum xs_perm_type perm)
533 {
534 /* We always tell them about memory failures. */
535 if (errnum == ENOMEM)
536 return errnum;
538 if (ask_parents(conn, node) & perm)
539 return errnum;
540 return EACCES;
541 }
543 /* If it fails, returns NULL and sets errno. */
544 struct node *get_node(struct connection *conn,
545 const char *name,
546 enum xs_perm_type perm)
547 {
548 struct node *node;
550 if (!name || !is_valid_nodename(name)) {
551 errno = EINVAL;
552 return NULL;
553 }
554 node = read_node(conn, name);
555 /* If we don't have permission, we don't have node. */
556 if (node) {
557 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
558 != perm)
559 node = NULL;
560 }
561 /* Clean up errno if they weren't supposed to know. */
562 if (!node)
563 errno = errno_from_parents(conn, name, errno, perm);
564 return node;
565 }
567 static struct buffered_data *new_buffer(void *ctx)
568 {
569 struct buffered_data *data;
571 data = talloc_zero(ctx, struct buffered_data);
572 if (data == NULL)
573 return NULL;
575 data->inhdr = true;
576 return data;
577 }
579 /* Return length of string (including nul) at this offset. */
580 static unsigned int get_string(const struct buffered_data *data,
581 unsigned int offset)
582 {
583 const char *nul;
585 if (offset >= data->used)
586 return 0;
588 nul = memchr(data->buffer + offset, 0, data->used - offset);
589 if (!nul)
590 return 0;
592 return nul - (data->buffer + offset) + 1;
593 }
595 /* Break input into vectors, return the number, fill in up to num of them. */
596 unsigned int get_strings(struct buffered_data *data,
597 char *vec[], unsigned int num)
598 {
599 unsigned int off, i, len;
601 off = i = 0;
602 while ((len = get_string(data, off)) != 0) {
603 if (i < num)
604 vec[i] = data->buffer + off;
605 i++;
606 off += len;
607 }
608 return i;
609 }
611 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
612 const void *data, unsigned int len)
613 {
614 struct buffered_data *bdata;
616 /* Message is a child of the connection context for auto-cleanup. */
617 bdata = new_buffer(conn);
618 bdata->buffer = talloc_array(bdata, char, len);
620 /* Echo request header in reply unless this is an async watch event. */
621 if (type != XS_WATCH_EVENT) {
622 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
623 sizeof(struct xsd_sockmsg));
624 } else {
625 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
626 }
628 /* Update relevant header fields and fill in the message body. */
629 bdata->hdr.msg.type = type;
630 bdata->hdr.msg.len = len;
631 memcpy(bdata->buffer, data, len);
633 /* Queue for later transmission. */
634 list_add_tail(&bdata->list, &conn->out_list);
635 }
637 /* Some routines (write, mkdir, etc) just need a non-error return */
638 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
639 {
640 send_reply(conn, type, "OK", sizeof("OK"));
641 }
643 void send_error(struct connection *conn, int error)
644 {
645 unsigned int i;
647 for (i = 0; error != xsd_errors[i].errnum; i++) {
648 if (i == ARRAY_SIZE(xsd_errors) - 1) {
649 eprintf("xenstored: error %i untranslatable", error);
650 i = 0; /* EINVAL */
651 break;
652 }
653 }
654 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
655 strlen(xsd_errors[i].errstring) + 1);
656 }
658 static bool valid_chars(const char *node)
659 {
660 /* Nodes can have lots of crap. */
661 return (strspn(node,
662 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
663 "abcdefghijklmnopqrstuvwxyz"
664 "0123456789-/_@") == strlen(node));
665 }
667 bool is_valid_nodename(const char *node)
668 {
669 /* Must start in /. */
670 if (!strstarts(node, "/"))
671 return false;
673 /* Cannot end in / (unless it's just "/"). */
674 if (strends(node, "/") && !streq(node, "/"))
675 return false;
677 /* No double //. */
678 if (strstr(node, "//"))
679 return false;
681 return valid_chars(node);
682 }
684 /* We expect one arg in the input: return NULL otherwise. */
685 static const char *onearg(struct buffered_data *in)
686 {
687 if (!in->used || get_string(in, 0) != in->used)
688 return NULL;
689 return in->buffer;
690 }
692 static char *perms_to_strings(const void *ctx,
693 struct xs_permissions *perms, unsigned int num,
694 unsigned int *len)
695 {
696 unsigned int i;
697 char *strings = NULL;
698 char buffer[MAX_STRLEN(unsigned int) + 1];
700 for (*len = 0, i = 0; i < num; i++) {
701 if (!xs_perm_to_string(&perms[i], buffer))
702 return NULL;
704 strings = talloc_realloc(ctx, strings, char,
705 *len + strlen(buffer) + 1);
706 strcpy(strings + *len, buffer);
707 *len += strlen(buffer) + 1;
708 }
709 return strings;
710 }
712 char *canonicalize(struct connection *conn, const char *node)
713 {
714 const char *prefix;
716 if (!node || strstarts(node, "/"))
717 return (char *)node;
718 prefix = get_implicit_path(conn);
719 if (prefix)
720 return talloc_asprintf(node, "%s/%s", prefix, node);
721 return (char *)node;
722 }
724 bool check_event_node(const char *node)
725 {
726 if (!node || !strstarts(node, "@")) {
727 errno = EINVAL;
728 return false;
729 }
730 return true;
731 }
733 static void send_directory(struct connection *conn, const char *name)
734 {
735 struct node *node;
737 name = canonicalize(conn, name);
738 node = get_node(conn, name, XS_PERM_READ);
739 if (!node) {
740 send_error(conn, errno);
741 return;
742 }
744 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
745 }
747 static void do_read(struct connection *conn, const char *name)
748 {
749 struct node *node;
751 name = canonicalize(conn, name);
752 node = get_node(conn, name, XS_PERM_READ);
753 if (!node) {
754 send_error(conn, errno);
755 return;
756 }
758 send_reply(conn, XS_READ, node->data, node->datalen);
759 }
761 static void delete_node_single(struct connection *conn, struct node *node)
762 {
763 TDB_DATA key;
765 key.dptr = (void *)node->name;
766 key.dsize = strlen(node->name);
768 if (tdb_delete(tdb_context(conn), key) != 0)
769 corrupt(conn, "Could not delete '%s'", node->name);
770 }
772 /* Must not be / */
773 static char *basename(const char *name)
774 {
775 return strrchr(name, '/') + 1;
776 }
778 static struct node *construct_node(struct connection *conn, const char *name)
779 {
780 const char *base;
781 unsigned int baselen;
782 struct node *parent, *node;
783 char *children, *parentname = get_parent(name);
785 /* If parent doesn't exist, create it. */
786 parent = read_node(conn, parentname);
787 if (!parent)
788 parent = construct_node(conn, parentname);
789 if (!parent)
790 return NULL;
792 /* Add child to parent. */
793 base = basename(name);
794 baselen = strlen(base) + 1;
795 children = talloc_array(name, char, parent->childlen + baselen);
796 memcpy(children, parent->children, parent->childlen);
797 memcpy(children + parent->childlen, base, baselen);
798 parent->children = children;
799 parent->childlen += baselen;
801 /* Allocate node */
802 node = talloc(name, struct node);
803 node->tdb = tdb_context(conn);
804 node->name = talloc_strdup(node, name);
806 /* Inherit permissions, except domains own what they create */
807 node->num_perms = parent->num_perms;
808 node->perms = talloc_memdup(node, parent->perms,
809 node->num_perms * sizeof(node->perms[0]));
810 if (conn && conn->id)
811 node->perms[0].id = conn->id;
813 /* No children, no data */
814 node->children = node->data = NULL;
815 node->childlen = node->datalen = 0;
816 node->parent = parent;
817 return node;
818 }
820 static int destroy_node(void *_node)
821 {
822 struct node *node = _node;
823 TDB_DATA key;
825 if (streq(node->name, "/"))
826 corrupt(NULL, "Destroying root node!");
828 key.dptr = (void *)node->name;
829 key.dsize = strlen(node->name);
831 tdb_delete(node->tdb, key);
832 return 0;
833 }
835 static struct node *create_node(struct connection *conn,
836 const char *name,
837 void *data, unsigned int datalen)
838 {
839 struct node *node, *i;
841 node = construct_node(conn, name);
842 if (!node)
843 return NULL;
845 node->data = data;
846 node->datalen = datalen;
848 /* We write out the nodes down, setting destructor in case
849 * something goes wrong. */
850 for (i = node; i; i = i->parent) {
851 if (!write_node(conn, i))
852 return NULL;
853 talloc_set_destructor(i, destroy_node);
854 }
856 /* OK, now remove destructors so they stay around */
857 for (i = node; i; i = i->parent)
858 talloc_set_destructor(i, NULL);
859 return node;
860 }
862 /* path, data... */
863 static void do_write(struct connection *conn, struct buffered_data *in)
864 {
865 unsigned int offset, datalen;
866 struct node *node;
867 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
868 char *name;
870 /* Extra "strings" can be created by binary data. */
871 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
872 send_error(conn, EINVAL);
873 return;
874 }
876 offset = strlen(vec[0]) + 1;
877 datalen = in->used - offset;
879 name = canonicalize(conn, vec[0]);
880 node = get_node(conn, name, XS_PERM_WRITE);
881 if (!node) {
882 /* No permissions, invalid input? */
883 if (errno != ENOENT) {
884 send_error(conn, errno);
885 return;
886 }
887 node = create_node(conn, name, in->buffer + offset, datalen);
888 if (!node) {
889 send_error(conn, errno);
890 return;
891 }
892 } else {
893 node->data = in->buffer + offset;
894 node->datalen = datalen;
895 if (!write_node(conn, node)){
896 send_error(conn, errno);
897 return;
898 }
899 }
901 add_change_node(conn->transaction, name, false);
902 fire_watches(conn, name, false);
903 send_ack(conn, XS_WRITE);
904 }
906 static void do_mkdir(struct connection *conn, const char *name)
907 {
908 struct node *node;
910 name = canonicalize(conn, name);
911 node = get_node(conn, name, XS_PERM_WRITE);
913 /* If it already exists, fine. */
914 if (!node) {
915 /* No permissions? */
916 if (errno != ENOENT) {
917 send_error(conn, errno);
918 return;
919 }
920 node = create_node(conn, name, NULL, 0);
921 if (!node) {
922 send_error(conn, errno);
923 return;
924 }
925 add_change_node(conn->transaction, name, false);
926 fire_watches(conn, name, false);
927 }
928 send_ack(conn, XS_MKDIR);
929 }
931 static void delete_node(struct connection *conn, struct node *node)
932 {
933 unsigned int i;
935 /* Delete self, then delete children. If we crash, then the worst
936 that can happen is the children will continue to take up space, but
937 will otherwise be unreachable. */
938 delete_node_single(conn, node);
940 /* Delete children, too. */
941 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
942 struct node *child;
944 child = read_node(conn,
945 talloc_asprintf(node, "%s/%s", node->name,
946 node->children + i));
947 if (child) {
948 delete_node(conn, child);
949 }
950 else {
951 trace("delete_node: No child '%s/%s' found!\n",
952 node->name, node->children + i);
953 /* Skip it, we've already deleted the parent. */
954 }
955 }
956 }
959 /* Delete memory using memmove. */
960 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
961 {
962 memmove(mem + off, mem + off + len, total - off - len);
963 }
966 static bool remove_child_entry(struct connection *conn, struct node *node,
967 size_t offset)
968 {
969 size_t childlen = strlen(node->children + offset);
970 memdel(node->children, offset, childlen + 1, node->childlen);
971 node->childlen -= childlen + 1;
972 return write_node(conn, node);
973 }
976 static bool delete_child(struct connection *conn,
977 struct node *node, const char *childname)
978 {
979 unsigned int i;
981 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
982 if (streq(node->children+i, childname)) {
983 return remove_child_entry(conn, node, i);
984 }
985 }
986 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
987 return false;
988 }
991 static int _rm(struct connection *conn, struct node *node, const char *name)
992 {
993 /* Delete from parent first, then if we crash, the worst that can
994 happen is the child will continue to take up space, but will
995 otherwise be unreachable. */
996 struct node *parent = read_node(conn, get_parent(name));
997 if (!parent) {
998 send_error(conn, EINVAL);
999 return 0;
1002 if (!delete_child(conn, parent, basename(name))) {
1003 send_error(conn, EINVAL);
1004 return 0;
1007 delete_node(conn, node);
1008 return 1;
1012 static void internal_rm(const char *name)
1014 char *tname = talloc_strdup(NULL, name);
1015 struct node *node = read_node(NULL, tname);
1016 if (node)
1017 _rm(NULL, node, tname);
1018 talloc_free(node);
1019 talloc_free(tname);
1023 static void do_rm(struct connection *conn, const char *name)
1025 struct node *node;
1027 name = canonicalize(conn, name);
1028 node = get_node(conn, name, XS_PERM_WRITE);
1029 if (!node) {
1030 /* Didn't exist already? Fine, if parent exists. */
1031 if (errno == ENOENT) {
1032 node = read_node(conn, get_parent(name));
1033 if (node) {
1034 send_ack(conn, XS_RM);
1035 return;
1037 /* Restore errno, just in case. */
1038 errno = ENOENT;
1040 send_error(conn, errno);
1041 return;
1044 if (streq(name, "/")) {
1045 send_error(conn, EINVAL);
1046 return;
1049 if (_rm(conn, node, name)) {
1050 add_change_node(conn->transaction, name, true);
1051 fire_watches(conn, name, true);
1052 send_ack(conn, XS_RM);
1057 static void do_get_perms(struct connection *conn, const char *name)
1059 struct node *node;
1060 char *strings;
1061 unsigned int len;
1063 name = canonicalize(conn, name);
1064 node = get_node(conn, name, XS_PERM_READ);
1065 if (!node) {
1066 send_error(conn, errno);
1067 return;
1070 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1071 if (!strings)
1072 send_error(conn, errno);
1073 else
1074 send_reply(conn, XS_GET_PERMS, strings, len);
1077 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1079 unsigned int num;
1080 char *name, *permstr;
1081 struct node *node;
1083 num = xs_count_strings(in->buffer, in->used);
1084 if (num < 2) {
1085 send_error(conn, EINVAL);
1086 return;
1089 /* First arg is node name. */
1090 name = canonicalize(conn, in->buffer);
1091 permstr = in->buffer + strlen(in->buffer) + 1;
1092 num--;
1094 /* We must own node to do this (tools can do this too). */
1095 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1096 if (!node) {
1097 send_error(conn, errno);
1098 return;
1101 node->perms = talloc_array(node, struct xs_permissions, num);
1102 node->num_perms = num;
1103 if (!xs_strings_to_perms(node->perms, num, permstr)) {
1104 send_error(conn, errno);
1105 return;
1107 if (!write_node(conn, node)) {
1108 send_error(conn, errno);
1109 return;
1112 add_change_node(conn->transaction, name, false);
1113 fire_watches(conn, name, false);
1114 send_ack(conn, XS_SET_PERMS);
1117 static void do_debug(struct connection *conn, struct buffered_data *in)
1119 int num;
1121 num = xs_count_strings(in->buffer, in->used);
1123 if (streq(in->buffer, "print")) {
1124 if (num < 2) {
1125 send_error(conn, EINVAL);
1126 return;
1128 xprintf("debug: %s", in->buffer + get_string(in, 0));
1130 if (streq(in->buffer, "check"))
1131 check_store();
1132 #ifdef TESTING
1133 /* For testing, we allow them to set id. */
1134 if (streq(in->buffer, "setid")) {
1135 conn->id = atoi(in->buffer + get_string(in, 0));
1136 } else if (streq(in->buffer, "failtest")) {
1137 if (get_string(in, 0) < in->used)
1138 srandom(atoi(in->buffer + get_string(in, 0)));
1139 failtest = true;
1141 #endif /* TESTING */
1142 send_ack(conn, XS_DEBUG);
1145 /* Process "in" for conn: "in" will vanish after this conversation, so
1146 * we can talloc off it for temporary variables. May free "conn".
1147 */
1148 static void process_message(struct connection *conn, struct buffered_data *in)
1150 struct transaction *trans;
1152 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1153 if (IS_ERR(trans)) {
1154 send_error(conn, -PTR_ERR(trans));
1155 return;
1158 assert(conn->transaction == NULL);
1159 conn->transaction = trans;
1161 switch (in->hdr.msg.type) {
1162 case XS_DIRECTORY:
1163 send_directory(conn, onearg(in));
1164 break;
1166 case XS_READ:
1167 do_read(conn, onearg(in));
1168 break;
1170 case XS_WRITE:
1171 do_write(conn, in);
1172 break;
1174 case XS_MKDIR:
1175 do_mkdir(conn, onearg(in));
1176 break;
1178 case XS_RM:
1179 do_rm(conn, onearg(in));
1180 break;
1182 case XS_GET_PERMS:
1183 do_get_perms(conn, onearg(in));
1184 break;
1186 case XS_SET_PERMS:
1187 do_set_perms(conn, in);
1188 break;
1190 case XS_DEBUG:
1191 do_debug(conn, in);
1192 break;
1194 case XS_WATCH:
1195 do_watch(conn, in);
1196 break;
1198 case XS_UNWATCH:
1199 do_unwatch(conn, in);
1200 break;
1202 case XS_TRANSACTION_START:
1203 do_transaction_start(conn, in);
1204 break;
1206 case XS_TRANSACTION_END:
1207 do_transaction_end(conn, onearg(in));
1208 break;
1210 case XS_INTRODUCE:
1211 do_introduce(conn, in);
1212 break;
1214 case XS_IS_DOMAIN_INTRODUCED:
1215 do_is_domain_introduced(conn, onearg(in));
1216 break;
1218 case XS_RELEASE:
1219 do_release(conn, onearg(in));
1220 break;
1222 case XS_GET_DOMAIN_PATH:
1223 do_get_domain_path(conn, onearg(in));
1224 break;
1226 default:
1227 eprintf("Client unknown operation %i", in->hdr.msg.type);
1228 send_error(conn, ENOSYS);
1229 break;
1232 conn->transaction = NULL;
1235 static void consider_message(struct connection *conn)
1237 if (verbose)
1238 xprintf("Got message %s len %i from %p\n",
1239 sockmsg_string(conn->in->hdr.msg.type),
1240 conn->in->hdr.msg.len, conn);
1242 process_message(conn, conn->in);
1244 talloc_free(conn->in);
1245 conn->in = new_buffer(conn);
1248 /* Errors in reading or allocating here mean we get out of sync, so we
1249 * drop the whole client connection. */
1250 static void handle_input(struct connection *conn)
1252 int bytes;
1253 struct buffered_data *in = conn->in;
1255 /* Not finished header yet? */
1256 if (in->inhdr) {
1257 bytes = conn->read(conn, in->hdr.raw + in->used,
1258 sizeof(in->hdr) - in->used);
1259 if (bytes <= 0)
1260 goto bad_client;
1261 in->used += bytes;
1262 if (in->used != sizeof(in->hdr))
1263 return;
1265 if (in->hdr.msg.len > PATH_MAX) {
1266 #ifndef TESTING
1267 syslog(LOG_ERR, "Client tried to feed us %i",
1268 in->hdr.msg.len);
1269 #endif
1270 goto bad_client;
1273 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1274 if (!in->buffer)
1275 goto bad_client;
1276 in->used = 0;
1277 in->inhdr = false;
1278 return;
1281 bytes = conn->read(conn, in->buffer + in->used,
1282 in->hdr.msg.len - in->used);
1283 if (bytes < 0)
1284 goto bad_client;
1286 in->used += bytes;
1287 if (in->used != in->hdr.msg.len)
1288 return;
1290 trace_io(conn, "IN ", in);
1291 consider_message(conn);
1292 return;
1294 bad_client:
1295 /* Kill it. */
1296 talloc_free(conn);
1299 static void handle_output(struct connection *conn)
1301 if (!write_messages(conn))
1302 talloc_free(conn);
1305 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1307 struct connection *new;
1309 new = talloc_zero(talloc_autofree_context(), struct connection);
1310 if (!new)
1311 return NULL;
1313 new->fd = -1;
1314 new->write = write;
1315 new->read = read;
1316 new->can_write = true;
1317 INIT_LIST_HEAD(&new->out_list);
1318 INIT_LIST_HEAD(&new->watches);
1319 INIT_LIST_HEAD(&new->transaction_list);
1321 new->in = new_buffer(new);
1322 if (new->in == NULL) {
1323 talloc_free(new);
1324 return NULL;
1327 list_add_tail(&new->list, &connections);
1328 talloc_set_destructor(new, destroy_conn);
1329 trace_create(new, "connection");
1330 return new;
1333 static int writefd(struct connection *conn, const void *data, unsigned int len)
1335 return write(conn->fd, data, len);
1338 static int readfd(struct connection *conn, void *data, unsigned int len)
1340 return read(conn->fd, data, len);
1343 static void accept_connection(int sock, bool canwrite)
1345 int fd;
1346 struct connection *conn;
1348 fd = accept(sock, NULL, NULL);
1349 if (fd < 0)
1350 return;
1352 conn = new_connection(writefd, readfd);
1353 if (conn) {
1354 conn->fd = fd;
1355 conn->can_write = canwrite;
1356 } else
1357 close(fd);
1360 #ifdef TESTING
1361 /* Valgrind can check our writes better if we don't use mmap */
1362 #define TDB_FLAGS TDB_NOMMAP
1363 /* Useful for running under debugger. */
1364 void dump_connection(void)
1366 struct connection *i;
1368 list_for_each_entry(i, &connections, list) {
1369 printf("Connection %p:\n", i);
1370 printf(" state = %s\n",
1371 list_empty(&i->out_list) ? "OK" : "BUSY");
1372 if (i->id)
1373 printf(" id = %i\n", i->id);
1374 if (!i->in->inhdr || i->in->used)
1375 printf(" got %i bytes of %s\n",
1376 i->in->used, i->in->inhdr ? "header" : "data");
1377 #if 0
1378 if (i->out)
1379 printf(" sending message %s (%s) out\n",
1380 sockmsg_string(i->out->hdr.msg.type),
1381 i->out->buffer);
1382 if (i->transaction)
1383 dump_transaction(i);
1384 if (i->domain)
1385 dump_domain(i);
1386 #endif
1387 dump_watches(i);
1390 #else
1391 #define TDB_FLAGS 0
1392 #endif
1394 /* We create initial nodes manually. */
1395 static void manual_node(const char *name, const char *child)
1397 struct node *node;
1398 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1400 node = talloc_zero(NULL, struct node);
1401 node->name = name;
1402 node->perms = &perms;
1403 node->num_perms = 1;
1404 node->children = (char *)child;
1405 if (child)
1406 node->childlen = strlen(child) + 1;
1408 if (!write_node(NULL, node))
1409 barf_perror("Could not create initial node %s", name);
1410 talloc_free(node);
1413 static void setup_structure(void)
1415 char *tdbname;
1416 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1417 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1419 if (tdb_ctx) {
1420 /* XXX When we make xenstored able to restart, this will have
1421 to become cleverer, checking for existing domains and not
1422 removing the corresponding entries, but for now xenstored
1423 cannot be restarted without losing all the registered
1424 watches, which breaks all the backend drivers anyway. We
1425 can therefore get away with just clearing /local and
1426 expecting Xend to put the appropriate entries back in.
1428 When this change is made it is important to note that
1429 dom0's entries must be cleaned up on reboot _before_ this
1430 daemon starts, otherwise the backend drivers and dom0's
1431 balloon driver will pick up stale entries. In the case of
1432 the balloon driver, this can be fatal.
1433 */
1434 char *tlocal = talloc_strdup(NULL, "/local");
1436 check_store();
1438 if (remove_local) {
1439 internal_rm("/local");
1440 create_node(NULL, tlocal, NULL, 0);
1442 check_store();
1445 talloc_free(tlocal);
1447 else {
1448 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1449 0640);
1450 if (!tdb_ctx)
1451 barf_perror("Could not create tdb file %s", tdbname);
1453 manual_node("/", "tool");
1454 manual_node("/tool", "xenstored");
1455 manual_node("/tool/xenstored", NULL);
1457 check_store();
1462 static unsigned int hash_from_key_fn(void *k)
1464 char *str = k;
1465 unsigned int hash = 5381;
1466 char c;
1468 while ((c = *str++))
1469 hash = ((hash << 5) + hash) + (unsigned int)c;
1471 return hash;
1475 static int keys_equal_fn(void *key1, void *key2)
1477 return 0 == strcmp((char *)key1, (char *)key2);
1481 static char *child_name(const char *s1, const char *s2)
1483 if (strcmp(s1, "/")) {
1484 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1486 else {
1487 return talloc_asprintf(NULL, "/%s", s2);
1492 static void remember_string(struct hashtable *hash, const char *str)
1494 char *k = malloc(strlen(str) + 1);
1495 strcpy(k, str);
1496 hashtable_insert(hash, k, (void *)1);
1500 /**
1501 * A node has a children field that names the children of the node, separated
1502 * by NULs. We check whether there are entries in there that are duplicated
1503 * (and if so, delete the second one), and whether there are any that do not
1504 * have a corresponding child node (and if so, delete them). Each valid child
1505 * is then recursively checked.
1507 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1508 * passed on the command line).
1510 * As we go, we record each node in the given reachable hashtable. These
1511 * entries will be used later in clean_store.
1512 */
1513 static void check_store_(const char *name, struct hashtable *reachable)
1515 struct node *node = read_node(NULL, name);
1517 if (node) {
1518 size_t i = 0;
1520 struct hashtable * children =
1521 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1523 remember_string(reachable, name);
1525 while (i < node->childlen) {
1526 size_t childlen = strlen(node->children + i);
1527 char * childname = child_name(node->name,
1528 node->children + i);
1529 struct node *childnode = read_node(NULL, childname);
1531 if (childnode) {
1532 if (hashtable_search(children, childname)) {
1533 log("check_store: '%s' is duplicated!",
1534 childname);
1536 if (recovery) {
1537 remove_child_entry(NULL, node,
1538 i);
1539 i -= childlen + 1;
1542 else {
1543 remember_string(children, childname);
1544 check_store_(childname, reachable);
1547 else {
1548 log("check_store: No child '%s' found!\n",
1549 childname);
1551 if (recovery) {
1552 remove_child_entry(NULL, node, i);
1553 i -= childlen + 1;
1557 talloc_free(childnode);
1558 talloc_free(childname);
1559 i += childlen + 1;
1562 hashtable_destroy(children, 0 /* Don't free values (they are
1563 all (void *)1) */);
1564 talloc_free(node);
1566 else {
1567 /* Impossible, because no database should ever be without the
1568 root, and otherwise, we've just checked in our caller
1569 (which made a recursive call to get here). */
1571 log("check_store: No child '%s' found: impossible!", name);
1576 /**
1577 * Helper to clean_store below.
1578 */
1579 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1580 void *private)
1582 struct hashtable *reachable = private;
1583 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1585 if (!hashtable_search(reachable, name)) {
1586 log("clean_store: '%s' is orphaned!", name);
1587 if (recovery) {
1588 tdb_delete(tdb, key);
1592 talloc_free(name);
1594 return 0;
1598 /**
1599 * Given the list of reachable nodes, iterate over the whole store, and
1600 * remove any that were not reached.
1601 */
1602 static void clean_store(struct hashtable *reachable)
1604 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1608 static void check_store(void)
1610 char * root = talloc_strdup(NULL, "/");
1611 struct hashtable * reachable =
1612 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1614 log("Checking store ...");
1615 check_store_(root, reachable);
1616 clean_store(reachable);
1617 log("Checking store complete.");
1619 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1620 (void *)1) */);
1621 talloc_free(root);
1625 /* Something is horribly wrong: check the store. */
1626 static void corrupt(struct connection *conn, const char *fmt, ...)
1628 va_list arglist;
1629 char *str;
1630 int saved_errno = errno;
1632 va_start(arglist, fmt);
1633 str = talloc_vasprintf(NULL, fmt, arglist);
1634 va_end(arglist);
1636 log("corruption detected by connection %i: err %s: %s",
1637 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1639 #ifdef TESTING
1640 /* Allow them to attach debugger. */
1641 sleep(30);
1642 #endif
1643 check_store();
1647 static void write_pidfile(const char *pidfile)
1649 char buf[100];
1650 int len;
1651 int fd;
1653 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1654 if (fd == -1)
1655 barf_perror("Opening pid file %s", pidfile);
1657 /* We exit silently if daemon already running. */
1658 if (lockf(fd, F_TLOCK, 0) == -1)
1659 exit(0);
1661 len = sprintf(buf, "%d\n", getpid());
1662 write(fd, buf, len);
1665 /* Stevens. */
1666 static void daemonize(void)
1668 pid_t pid;
1670 /* Separate from our parent via fork, so init inherits us. */
1671 if ((pid = fork()) < 0)
1672 barf_perror("Failed to fork daemon");
1673 if (pid != 0)
1674 exit(0);
1676 /* Session leader so ^C doesn't whack us. */
1677 setsid();
1679 /* Let session leader exit so child cannot regain CTTY */
1680 if ((pid = fork()) < 0)
1681 barf_perror("Failed to fork daemon");
1682 if (pid != 0)
1683 exit(0);
1685 #ifndef TESTING /* Relative paths for socket names */
1686 /* Move off any mount points we might be in. */
1687 chdir("/");
1688 #endif
1689 /* Discard our parent's old-fashioned umask prejudices. */
1690 umask(0);
1694 static void usage(void)
1696 fprintf(stderr,
1697 "Usage:\n"
1698 "\n"
1699 " xenstored <options>\n"
1700 "\n"
1701 "where options may include:\n"
1702 "\n"
1703 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1704 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1705 " --help to output this message,\n"
1706 " --no-fork to request that the daemon does not fork,\n"
1707 " --output-pid to request that the pid of the daemon is output,\n"
1708 " --trace-file <file> giving the file for logging, and\n"
1709 " --no-recovery to request that no recovery should be attempted when\n"
1710 " the store is corrupted (debug only),\n"
1711 " --preserve-local to request that /local is preserved on start-up,\n"
1712 " --verbose to request verbose execution.\n");
1716 static struct option options[] = {
1717 { "no-domain-init", 0, NULL, 'D' },
1718 { "pid-file", 1, NULL, 'F' },
1719 { "help", 0, NULL, 'H' },
1720 { "no-fork", 0, NULL, 'N' },
1721 { "output-pid", 0, NULL, 'P' },
1722 { "trace-file", 1, NULL, 'T' },
1723 { "no-recovery", 0, NULL, 'R' },
1724 { "preserve-local", 0, NULL, 'L' },
1725 { "verbose", 0, NULL, 'V' },
1726 { NULL, 0, NULL, 0 } };
1728 extern void dump_conn(struct connection *conn);
1730 int main(int argc, char *argv[])
1732 int opt, *sock, *ro_sock, max;
1733 struct sockaddr_un addr;
1734 fd_set inset, outset;
1735 bool dofork = true;
1736 bool outputpid = false;
1737 bool no_domain_init = false;
1738 const char *pidfile = NULL;
1740 while ((opt = getopt_long(argc, argv, "DF:HNPT:RLV", options,
1741 NULL)) != -1) {
1742 switch (opt) {
1743 case 'D':
1744 no_domain_init = true;
1745 break;
1746 case 'F':
1747 pidfile = optarg;
1748 break;
1749 case 'H':
1750 usage();
1751 return 0;
1752 case 'N':
1753 dofork = false;
1754 break;
1755 case 'P':
1756 outputpid = true;
1757 break;
1758 case 'R':
1759 recovery = false;
1760 break;
1761 case 'L':
1762 remove_local = false;
1763 break;
1764 case 'T':
1765 tracefile = optarg;
1766 break;
1767 case 'V':
1768 verbose = true;
1769 break;
1772 if (optind != argc)
1773 barf("%s: No arguments desired", argv[0]);
1775 reopen_log();
1777 if (dofork) {
1778 openlog("xenstored", 0, LOG_DAEMON);
1779 daemonize();
1781 if (pidfile)
1782 write_pidfile(pidfile);
1784 talloc_enable_leak_report_full();
1786 /* Create sockets for them to listen to. */
1787 sock = talloc(talloc_autofree_context(), int);
1788 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1789 if (*sock < 0)
1790 barf_perror("Could not create socket");
1791 ro_sock = talloc(talloc_autofree_context(), int);
1792 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1793 if (*ro_sock < 0)
1794 barf_perror("Could not create socket");
1795 talloc_set_destructor(sock, destroy_fd);
1796 talloc_set_destructor(ro_sock, destroy_fd);
1798 /* Don't kill us with SIGPIPE. */
1799 signal(SIGPIPE, SIG_IGN);
1801 /* FIXME: Be more sophisticated, don't mug running daemon. */
1802 unlink(xs_daemon_socket());
1803 unlink(xs_daemon_socket_ro());
1805 addr.sun_family = AF_UNIX;
1806 strcpy(addr.sun_path, xs_daemon_socket());
1807 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1808 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1809 strcpy(addr.sun_path, xs_daemon_socket_ro());
1810 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1811 barf_perror("Could not bind socket to %s",
1812 xs_daemon_socket_ro());
1813 if (chmod(xs_daemon_socket(), 0600) != 0
1814 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1815 barf_perror("Could not chmod sockets");
1817 if (listen(*sock, 1) != 0
1818 || listen(*ro_sock, 1) != 0)
1819 barf_perror("Could not listen on sockets");
1821 if (pipe(reopen_log_pipe)) {
1822 barf_perror("pipe");
1825 /* Setup the database */
1826 setup_structure();
1828 /* Listen to hypervisor. */
1829 if (!no_domain_init)
1830 domain_init();
1832 /* Restore existing connections. */
1833 restore_existing_connections();
1835 if (outputpid) {
1836 printf("%i\n", getpid());
1837 fflush(stdout);
1840 /* close stdin/stdout now we're ready to accept connections */
1841 if (dofork) {
1842 close(STDIN_FILENO);
1843 close(STDOUT_FILENO);
1844 close(STDERR_FILENO);
1847 signal(SIGHUP, trigger_reopen_log);
1849 #ifdef TESTING
1850 signal(SIGUSR1, stop_failtest);
1851 #endif
1853 /* Get ready to listen to the tools. */
1854 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1856 /* Main loop. */
1857 /* FIXME: Rewrite so noone can starve. */
1858 for (;;) {
1859 struct connection *i;
1861 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1862 if (errno == EINTR)
1863 continue;
1864 barf_perror("Select failed");
1867 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1868 char c;
1869 read(reopen_log_pipe[0], &c, 1);
1870 reopen_log();
1873 if (FD_ISSET(*sock, &inset))
1874 accept_connection(*sock, true);
1876 if (FD_ISSET(*ro_sock, &inset))
1877 accept_connection(*ro_sock, false);
1879 if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
1880 handle_event();
1882 list_for_each_entry(i, &connections, list) {
1883 if (i->domain)
1884 continue;
1886 /* Operations can delete themselves or others
1887 * (xs_release): list is not safe after input,
1888 * so break. */
1889 if (FD_ISSET(i->fd, &inset)) {
1890 handle_input(i);
1891 break;
1893 if (FD_ISSET(i->fd, &outset)) {
1894 handle_output(i);
1895 break;
1899 /* Handle all possible I/O for domain connections. */
1900 more:
1901 list_for_each_entry(i, &connections, list) {
1902 if (!i->domain)
1903 continue;
1905 if (domain_can_read(i)) {
1906 handle_input(i);
1907 goto more;
1910 if (domain_can_write(i) && !list_empty(&i->out_list)) {
1911 handle_output(i);
1912 goto more;
1916 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1920 /*
1921 * Local variables:
1922 * c-file-style: "linux"
1923 * indent-tabs-mode: t
1924 * c-indent-level: 8
1925 * c-basic-offset: 8
1926 * tab-width: 8
1927 * End:
1928 */