ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 9293:250ff103c291

fix buffer overflow in print XS_DEBUG command

Signed-off-by: Vincent Hanquez <vincent@xensource.com>
author vhanquez@kneesa.uk.xensource.com
date Wed Mar 15 12:24:34 2006 +0000 (2006-03-15)
parents 51b0d4c2d4d9
children 40e3df4cffe4
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 #include "hashtable.h"
57 extern int eventchn_fd; /* in xenstored_domain.c */
59 static bool verbose = false;
60 LIST_HEAD(connections);
61 static int tracefd = -1;
62 static bool recovery = true;
63 static bool remove_local = true;
64 static int reopen_log_pipe[2];
65 static char *tracefile = NULL;
66 static TDB_CONTEXT *tdb_ctx;
68 static void corrupt(struct connection *conn, const char *fmt, ...);
69 static void check_store(void);
71 #define log(...) \
72 do { \
73 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
74 trace("%s\n", s); \
75 syslog(LOG_ERR, "%s", s); \
76 talloc_free(s); \
77 } while (0)
80 #ifdef TESTING
81 static bool failtest = false;
83 /* We override talloc's malloc. */
84 void *test_malloc(size_t size)
85 {
86 /* 1 in 20 means only about 50% of connections establish. */
87 if (failtest && (random() % 32) == 0)
88 return NULL;
89 return malloc(size);
90 }
92 static void stop_failtest(int signum __attribute__((unused)))
93 {
94 failtest = false;
95 }
97 /* Need these before we #define away write_all/mkdir in testing.h */
98 bool test_write_all(int fd, void *contents, unsigned int len);
99 bool test_write_all(int fd, void *contents, unsigned int len)
100 {
101 if (failtest && (random() % 8) == 0) {
102 if (len)
103 len = random() % len;
104 write(fd, contents, len);
105 errno = ENOSPC;
106 return false;
107 }
108 return xs_write_all(fd, contents, len);
109 }
111 int test_mkdir(const char *dir, int perms);
112 int test_mkdir(const char *dir, int perms)
113 {
114 if (failtest && (random() % 8) == 0) {
115 errno = ENOSPC;
116 return -1;
117 }
118 return mkdir(dir, perms);
119 }
120 #endif /* TESTING */
122 #include "xenstored_test.h"
124 TDB_CONTEXT *tdb_context(struct connection *conn)
125 {
126 /* conn = NULL used in manual_node at setup. */
127 if (!conn || !conn->transaction)
128 return tdb_ctx;
129 return tdb_transaction_context(conn->transaction);
130 }
132 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
133 {
134 if (rename(newname, xs_daemon_tdb()) != 0)
135 return false;
136 tdb_close(tdb_ctx);
137 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
138 return true;
139 }
141 static char *sockmsg_string(enum xsd_sockmsg_type type)
142 {
143 switch (type) {
144 case XS_DEBUG: return "DEBUG";
145 case XS_DIRECTORY: return "DIRECTORY";
146 case XS_READ: return "READ";
147 case XS_GET_PERMS: return "GET_PERMS";
148 case XS_WATCH: return "WATCH";
149 case XS_UNWATCH: return "UNWATCH";
150 case XS_TRANSACTION_START: return "TRANSACTION_START";
151 case XS_TRANSACTION_END: return "TRANSACTION_END";
152 case XS_INTRODUCE: return "INTRODUCE";
153 case XS_RELEASE: return "RELEASE";
154 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
155 case XS_WRITE: return "WRITE";
156 case XS_MKDIR: return "MKDIR";
157 case XS_RM: return "RM";
158 case XS_SET_PERMS: return "SET_PERMS";
159 case XS_WATCH_EVENT: return "WATCH_EVENT";
160 case XS_ERROR: return "ERROR";
161 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
162 default:
163 return "**UNKNOWN**";
164 }
165 }
167 void trace(const char *fmt, ...)
168 {
169 va_list arglist;
170 char *str;
171 char sbuf[1024];
172 int ret;
174 if (tracefd < 0)
175 return;
177 /* try to use a static buffer */
178 va_start(arglist, fmt);
179 ret = vsnprintf(sbuf, 1024, fmt, arglist);
180 va_end(arglist);
182 if (ret <= 1024) {
183 write(tracefd, sbuf, ret);
184 return;
185 }
187 /* fail back to dynamic allocation */
188 va_start(arglist, fmt);
189 str = talloc_vasprintf(NULL, fmt, arglist);
190 va_end(arglist);
191 write(tracefd, str, strlen(str));
192 talloc_free(str);
193 }
195 static void trace_io(const struct connection *conn,
196 const char *prefix,
197 const struct buffered_data *data)
198 {
199 unsigned int i;
200 time_t now;
201 struct tm *tm;
203 if (tracefd < 0)
204 return;
206 now = time(NULL);
207 tm = localtime(&now);
209 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
210 tm->tm_year + 1900, tm->tm_mon + 1,
211 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
212 sockmsg_string(data->hdr.msg.type));
214 for (i = 0; i < data->hdr.msg.len; i++)
215 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
216 trace(")\n");
217 }
219 void trace_create(const void *data, const char *type)
220 {
221 trace("CREATE %s %p\n", type, data);
222 }
224 void trace_destroy(const void *data, const char *type)
225 {
226 trace("DESTROY %s %p\n", type, data);
227 }
229 /**
230 * Signal handler for SIGHUP, which requests that the trace log is reopened
231 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
232 * the select() in the main loop.
233 */
234 static void trigger_reopen_log(int signal __attribute__((unused)))
235 {
236 char c = 'A';
237 write(reopen_log_pipe[1], &c, 1);
238 }
241 static void reopen_log(void)
242 {
243 if (tracefile) {
244 if (tracefd > 0)
245 close(tracefd);
247 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
249 if (tracefd < 0)
250 perror("Could not open tracefile");
251 else
252 trace("\n***\n");
253 }
254 }
257 static bool write_messages(struct connection *conn)
258 {
259 int ret;
260 struct buffered_data *out;
262 out = list_top(&conn->out_list, struct buffered_data, list);
263 if (out == NULL)
264 return true;
266 if (out->inhdr) {
267 if (verbose)
268 xprintf("Writing msg %s (%.*s) out to %p\n",
269 sockmsg_string(out->hdr.msg.type),
270 out->hdr.msg.len,
271 out->buffer, conn);
272 ret = conn->write(conn, out->hdr.raw + out->used,
273 sizeof(out->hdr) - out->used);
274 if (ret < 0)
275 return false;
277 out->used += ret;
278 if (out->used < sizeof(out->hdr))
279 return true;
281 out->inhdr = false;
282 out->used = 0;
284 /* Second write might block if non-zero. */
285 if (out->hdr.msg.len && !conn->domain)
286 return true;
287 }
289 ret = conn->write(conn, out->buffer + out->used,
290 out->hdr.msg.len - out->used);
291 if (ret < 0)
292 return false;
294 out->used += ret;
295 if (out->used != out->hdr.msg.len)
296 return true;
298 trace_io(conn, "OUT", out);
300 list_del(&out->list);
301 talloc_free(out);
303 return true;
304 }
306 static int destroy_conn(void *_conn)
307 {
308 struct connection *conn = _conn;
310 /* Flush outgoing if possible, but don't block. */
311 if (!conn->domain) {
312 fd_set set;
313 struct timeval none;
315 FD_ZERO(&set);
316 FD_SET(conn->fd, &set);
317 none.tv_sec = none.tv_usec = 0;
319 while (!list_empty(&conn->out_list)
320 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
321 if (!write_messages(conn))
322 break;
323 close(conn->fd);
324 }
325 list_del(&conn->list);
326 trace_destroy(conn, "connection");
327 return 0;
328 }
331 static void set_fd(int fd, fd_set *set, int *max)
332 {
333 if (fd < 0)
334 return;
335 FD_SET(fd, set);
336 if (fd > *max)
337 *max = fd;
338 }
341 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
342 {
343 struct connection *i;
344 int max = -1;
346 FD_ZERO(inset);
347 FD_ZERO(outset);
349 set_fd(sock, inset, &max);
350 set_fd(ro_sock, inset, &max);
351 set_fd(eventchn_fd, inset, &max);
352 set_fd(reopen_log_pipe[0], inset, &max);
353 list_for_each_entry(i, &connections, list) {
354 if (i->domain)
355 continue;
356 set_fd(i->fd, inset, &max);
357 if (!list_empty(&i->out_list))
358 FD_SET(i->fd, outset);
359 }
360 return max;
361 }
363 static int destroy_fd(void *_fd)
364 {
365 int *fd = _fd;
366 close(*fd);
367 return 0;
368 }
370 /* Return a pointer to an fd, self-closing and attached to this pathname. */
371 int *talloc_open(const char *pathname, int flags, int mode)
372 {
373 int *fd;
375 fd = talloc(pathname, int);
376 *fd = open(pathname, flags, mode);
377 if (*fd < 0) {
378 int saved_errno = errno;
379 talloc_free(fd);
380 errno = saved_errno;
381 return NULL;
382 }
383 talloc_set_destructor(fd, destroy_fd);
384 return fd;
385 }
387 /* Is child a subnode of parent, or equal? */
388 bool is_child(const char *child, const char *parent)
389 {
390 unsigned int len = strlen(parent);
392 /* / should really be "" for this algorithm to work, but that's a
393 * usability nightmare. */
394 if (streq(parent, "/"))
395 return true;
397 if (strncmp(child, parent, len) != 0)
398 return false;
400 return child[len] == '/' || child[len] == '\0';
401 }
403 /* If it fails, returns NULL and sets errno. */
404 static struct node *read_node(struct connection *conn, const char *name)
405 {
406 TDB_DATA key, data;
407 uint32_t *p;
408 struct node *node;
409 TDB_CONTEXT * context = tdb_context(conn);
411 key.dptr = (void *)name;
412 key.dsize = strlen(name);
413 data = tdb_fetch(context, key);
415 if (data.dptr == NULL) {
416 if (tdb_error(context) == TDB_ERR_NOEXIST)
417 errno = ENOENT;
418 else {
419 log("TDB error on read: %s", tdb_errorstr(context));
420 errno = EIO;
421 }
422 return NULL;
423 }
425 node = talloc(name, struct node);
426 node->name = talloc_strdup(node, name);
427 node->parent = NULL;
428 node->tdb = tdb_context(conn);
429 talloc_steal(node, data.dptr);
431 /* Datalen, childlen, number of permissions */
432 p = (uint32_t *)data.dptr;
433 node->num_perms = p[0];
434 node->datalen = p[1];
435 node->childlen = p[2];
437 /* Permissions are struct xs_permissions. */
438 node->perms = (void *)&p[3];
439 /* Data is binary blob (usually ascii, no nul). */
440 node->data = node->perms + node->num_perms;
441 /* Children is strings, nul separated. */
442 node->children = node->data + node->datalen;
444 return node;
445 }
447 static bool write_node(struct connection *conn, const struct node *node)
448 {
449 TDB_DATA key, data;
450 void *p;
452 key.dptr = (void *)node->name;
453 key.dsize = strlen(node->name);
455 data.dsize = 3*sizeof(uint32_t)
456 + node->num_perms*sizeof(node->perms[0])
457 + node->datalen + node->childlen;
458 data.dptr = talloc_size(node, data.dsize);
459 ((uint32_t *)data.dptr)[0] = node->num_perms;
460 ((uint32_t *)data.dptr)[1] = node->datalen;
461 ((uint32_t *)data.dptr)[2] = node->childlen;
462 p = data.dptr + 3 * sizeof(uint32_t);
464 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
465 p += node->num_perms*sizeof(node->perms[0]);
466 memcpy(p, node->data, node->datalen);
467 p += node->datalen;
468 memcpy(p, node->children, node->childlen);
470 /* TDB should set errno, but doesn't even set ecode AFAICT. */
471 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
472 errno = ENOSPC;
473 return false;
474 }
475 return true;
476 }
478 static enum xs_perm_type perm_for_conn(struct connection *conn,
479 struct xs_permissions *perms,
480 unsigned int num)
481 {
482 unsigned int i;
483 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
485 if (!conn->can_write)
486 mask &= ~XS_PERM_WRITE;
488 /* Owners and tools get it all... */
489 if (!conn->id || perms[0].id == conn->id)
490 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
492 for (i = 1; i < num; i++)
493 if (perms[i].id == conn->id)
494 return perms[i].perms & mask;
496 return perms[0].perms & mask;
497 }
499 static char *get_parent(const char *node)
500 {
501 char *slash = strrchr(node + 1, '/');
502 if (!slash)
503 return talloc_strdup(node, "/");
504 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
505 }
507 /* What do parents say? */
508 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
509 {
510 struct node *node;
512 do {
513 name = get_parent(name);
514 node = read_node(conn, name);
515 if (node)
516 break;
517 } while (!streq(name, "/"));
519 /* No permission at root? We're in trouble. */
520 if (!node)
521 corrupt(conn, "No permissions file at root");
523 return perm_for_conn(conn, node->perms, node->num_perms);
524 }
526 /* We have a weird permissions system. You can allow someone into a
527 * specific node without allowing it in the parents. If it's going to
528 * fail, however, we don't want the errno to indicate any information
529 * about the node. */
530 static int errno_from_parents(struct connection *conn, const char *node,
531 int errnum, enum xs_perm_type perm)
532 {
533 /* We always tell them about memory failures. */
534 if (errnum == ENOMEM)
535 return errnum;
537 if (ask_parents(conn, node) & perm)
538 return errnum;
539 return EACCES;
540 }
542 /* If it fails, returns NULL and sets errno. */
543 struct node *get_node(struct connection *conn,
544 const char *name,
545 enum xs_perm_type perm)
546 {
547 struct node *node;
549 if (!name || !is_valid_nodename(name)) {
550 errno = EINVAL;
551 return NULL;
552 }
553 node = read_node(conn, name);
554 /* If we don't have permission, we don't have node. */
555 if (node) {
556 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
557 != perm)
558 node = NULL;
559 }
560 /* Clean up errno if they weren't supposed to know. */
561 if (!node)
562 errno = errno_from_parents(conn, name, errno, perm);
563 return node;
564 }
566 static struct buffered_data *new_buffer(void *ctx)
567 {
568 struct buffered_data *data;
570 data = talloc_zero(ctx, struct buffered_data);
571 if (data == NULL)
572 return NULL;
574 data->inhdr = true;
575 return data;
576 }
578 /* Return length of string (including nul) at this offset. */
579 static unsigned int get_string(const struct buffered_data *data,
580 unsigned int offset)
581 {
582 const char *nul;
584 if (offset >= data->used)
585 return 0;
587 nul = memchr(data->buffer + offset, 0, data->used - offset);
588 if (!nul)
589 return 0;
591 return nul - (data->buffer + offset) + 1;
592 }
594 /* Break input into vectors, return the number, fill in up to num of them. */
595 unsigned int get_strings(struct buffered_data *data,
596 char *vec[], unsigned int num)
597 {
598 unsigned int off, i, len;
600 off = i = 0;
601 while ((len = get_string(data, off)) != 0) {
602 if (i < num)
603 vec[i] = data->buffer + off;
604 i++;
605 off += len;
606 }
607 return i;
608 }
610 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
611 const void *data, unsigned int len)
612 {
613 struct buffered_data *bdata;
615 /* Message is a child of the connection context for auto-cleanup. */
616 bdata = new_buffer(conn);
617 bdata->buffer = talloc_array(bdata, char, len);
619 /* Echo request header in reply unless this is an async watch event. */
620 if (type != XS_WATCH_EVENT) {
621 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
622 sizeof(struct xsd_sockmsg));
623 } else {
624 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
625 }
627 /* Update relevant header fields and fill in the message body. */
628 bdata->hdr.msg.type = type;
629 bdata->hdr.msg.len = len;
630 memcpy(bdata->buffer, data, len);
632 /* Queue for later transmission. */
633 list_add_tail(&bdata->list, &conn->out_list);
634 }
636 /* Some routines (write, mkdir, etc) just need a non-error return */
637 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
638 {
639 send_reply(conn, type, "OK", sizeof("OK"));
640 }
642 void send_error(struct connection *conn, int error)
643 {
644 unsigned int i;
646 for (i = 0; error != xsd_errors[i].errnum; i++) {
647 if (i == ARRAY_SIZE(xsd_errors) - 1) {
648 eprintf("xenstored: error %i untranslatable", error);
649 i = 0; /* EINVAL */
650 break;
651 }
652 }
653 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
654 strlen(xsd_errors[i].errstring) + 1);
655 }
657 static bool valid_chars(const char *node)
658 {
659 /* Nodes can have lots of crap. */
660 return (strspn(node,
661 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
662 "abcdefghijklmnopqrstuvwxyz"
663 "0123456789-/_@") == strlen(node));
664 }
666 bool is_valid_nodename(const char *node)
667 {
668 /* Must start in /. */
669 if (!strstarts(node, "/"))
670 return false;
672 /* Cannot end in / (unless it's just "/"). */
673 if (strends(node, "/") && !streq(node, "/"))
674 return false;
676 /* No double //. */
677 if (strstr(node, "//"))
678 return false;
680 return valid_chars(node);
681 }
683 /* We expect one arg in the input: return NULL otherwise. */
684 static const char *onearg(struct buffered_data *in)
685 {
686 if (!in->used || get_string(in, 0) != in->used)
687 return NULL;
688 return in->buffer;
689 }
691 static char *perms_to_strings(const void *ctx,
692 struct xs_permissions *perms, unsigned int num,
693 unsigned int *len)
694 {
695 unsigned int i;
696 char *strings = NULL;
697 char buffer[MAX_STRLEN(unsigned int) + 1];
699 for (*len = 0, i = 0; i < num; i++) {
700 if (!xs_perm_to_string(&perms[i], buffer))
701 return NULL;
703 strings = talloc_realloc(ctx, strings, char,
704 *len + strlen(buffer) + 1);
705 strcpy(strings + *len, buffer);
706 *len += strlen(buffer) + 1;
707 }
708 return strings;
709 }
711 char *canonicalize(struct connection *conn, const char *node)
712 {
713 const char *prefix;
715 if (!node || strstarts(node, "/"))
716 return (char *)node;
717 prefix = get_implicit_path(conn);
718 if (prefix)
719 return talloc_asprintf(node, "%s/%s", prefix, node);
720 return (char *)node;
721 }
723 bool check_event_node(const char *node)
724 {
725 if (!node || !strstarts(node, "@")) {
726 errno = EINVAL;
727 return false;
728 }
729 return true;
730 }
732 static void send_directory(struct connection *conn, const char *name)
733 {
734 struct node *node;
736 name = canonicalize(conn, name);
737 node = get_node(conn, name, XS_PERM_READ);
738 if (!node) {
739 send_error(conn, errno);
740 return;
741 }
743 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
744 }
746 static void do_read(struct connection *conn, const char *name)
747 {
748 struct node *node;
750 name = canonicalize(conn, name);
751 node = get_node(conn, name, XS_PERM_READ);
752 if (!node) {
753 send_error(conn, errno);
754 return;
755 }
757 send_reply(conn, XS_READ, node->data, node->datalen);
758 }
760 static void delete_node_single(struct connection *conn, struct node *node)
761 {
762 TDB_DATA key;
764 key.dptr = (void *)node->name;
765 key.dsize = strlen(node->name);
767 if (tdb_delete(tdb_context(conn), key) != 0)
768 corrupt(conn, "Could not delete '%s'", node->name);
769 }
771 /* Must not be / */
772 static char *basename(const char *name)
773 {
774 return strrchr(name, '/') + 1;
775 }
777 static struct node *construct_node(struct connection *conn, const char *name)
778 {
779 const char *base;
780 unsigned int baselen;
781 struct node *parent, *node;
782 char *children, *parentname = get_parent(name);
784 /* If parent doesn't exist, create it. */
785 parent = read_node(conn, parentname);
786 if (!parent)
787 parent = construct_node(conn, parentname);
788 if (!parent)
789 return NULL;
791 /* Add child to parent. */
792 base = basename(name);
793 baselen = strlen(base) + 1;
794 children = talloc_array(name, char, parent->childlen + baselen);
795 memcpy(children, parent->children, parent->childlen);
796 memcpy(children + parent->childlen, base, baselen);
797 parent->children = children;
798 parent->childlen += baselen;
800 /* Allocate node */
801 node = talloc(name, struct node);
802 node->tdb = tdb_context(conn);
803 node->name = talloc_strdup(node, name);
805 /* Inherit permissions, except domains own what they create */
806 node->num_perms = parent->num_perms;
807 node->perms = talloc_memdup(node, parent->perms,
808 node->num_perms * sizeof(node->perms[0]));
809 if (conn && conn->id)
810 node->perms[0].id = conn->id;
812 /* No children, no data */
813 node->children = node->data = NULL;
814 node->childlen = node->datalen = 0;
815 node->parent = parent;
816 return node;
817 }
819 static int destroy_node(void *_node)
820 {
821 struct node *node = _node;
822 TDB_DATA key;
824 if (streq(node->name, "/"))
825 corrupt(NULL, "Destroying root node!");
827 key.dptr = (void *)node->name;
828 key.dsize = strlen(node->name);
830 tdb_delete(node->tdb, key);
831 return 0;
832 }
834 static struct node *create_node(struct connection *conn,
835 const char *name,
836 void *data, unsigned int datalen)
837 {
838 struct node *node, *i;
840 node = construct_node(conn, name);
841 if (!node)
842 return NULL;
844 node->data = data;
845 node->datalen = datalen;
847 /* We write out the nodes down, setting destructor in case
848 * something goes wrong. */
849 for (i = node; i; i = i->parent) {
850 if (!write_node(conn, i))
851 return NULL;
852 talloc_set_destructor(i, destroy_node);
853 }
855 /* OK, now remove destructors so they stay around */
856 for (i = node; i; i = i->parent)
857 talloc_set_destructor(i, NULL);
858 return node;
859 }
861 /* path, data... */
862 static void do_write(struct connection *conn, struct buffered_data *in)
863 {
864 unsigned int offset, datalen;
865 struct node *node;
866 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
867 char *name;
869 /* Extra "strings" can be created by binary data. */
870 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
871 send_error(conn, EINVAL);
872 return;
873 }
875 offset = strlen(vec[0]) + 1;
876 datalen = in->used - offset;
878 name = canonicalize(conn, vec[0]);
879 node = get_node(conn, name, XS_PERM_WRITE);
880 if (!node) {
881 /* No permissions, invalid input? */
882 if (errno != ENOENT) {
883 send_error(conn, errno);
884 return;
885 }
886 node = create_node(conn, name, in->buffer + offset, datalen);
887 if (!node) {
888 send_error(conn, errno);
889 return;
890 }
891 } else {
892 node->data = in->buffer + offset;
893 node->datalen = datalen;
894 if (!write_node(conn, node)){
895 send_error(conn, errno);
896 return;
897 }
898 }
900 add_change_node(conn->transaction, name, false);
901 fire_watches(conn, name, false);
902 send_ack(conn, XS_WRITE);
903 }
905 static void do_mkdir(struct connection *conn, const char *name)
906 {
907 struct node *node;
909 name = canonicalize(conn, name);
910 node = get_node(conn, name, XS_PERM_WRITE);
912 /* If it already exists, fine. */
913 if (!node) {
914 /* No permissions? */
915 if (errno != ENOENT) {
916 send_error(conn, errno);
917 return;
918 }
919 node = create_node(conn, name, NULL, 0);
920 if (!node) {
921 send_error(conn, errno);
922 return;
923 }
924 add_change_node(conn->transaction, name, false);
925 fire_watches(conn, name, false);
926 }
927 send_ack(conn, XS_MKDIR);
928 }
930 static void delete_node(struct connection *conn, struct node *node)
931 {
932 unsigned int i;
934 /* Delete self, then delete children. If we crash, then the worst
935 that can happen is the children will continue to take up space, but
936 will otherwise be unreachable. */
937 delete_node_single(conn, node);
939 /* Delete children, too. */
940 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
941 struct node *child;
943 child = read_node(conn,
944 talloc_asprintf(node, "%s/%s", node->name,
945 node->children + i));
946 if (child) {
947 delete_node(conn, child);
948 }
949 else {
950 trace("delete_node: No child '%s/%s' found!\n",
951 node->name, node->children + i);
952 /* Skip it, we've already deleted the parent. */
953 }
954 }
955 }
958 /* Delete memory using memmove. */
959 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
960 {
961 memmove(mem + off, mem + off + len, total - off - len);
962 }
965 static bool remove_child_entry(struct connection *conn, struct node *node,
966 size_t offset)
967 {
968 size_t childlen = strlen(node->children + offset);
969 memdel(node->children, offset, childlen + 1, node->childlen);
970 node->childlen -= childlen + 1;
971 return write_node(conn, node);
972 }
975 static bool delete_child(struct connection *conn,
976 struct node *node, const char *childname)
977 {
978 unsigned int i;
980 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
981 if (streq(node->children+i, childname)) {
982 return remove_child_entry(conn, node, i);
983 }
984 }
985 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
986 return false;
987 }
990 static int _rm(struct connection *conn, struct node *node, const char *name)
991 {
992 /* Delete from parent first, then if we crash, the worst that can
993 happen is the child will continue to take up space, but will
994 otherwise be unreachable. */
995 struct node *parent = read_node(conn, get_parent(name));
996 if (!parent) {
997 send_error(conn, EINVAL);
998 return 0;
999 }
1001 if (!delete_child(conn, parent, basename(name))) {
1002 send_error(conn, EINVAL);
1003 return 0;
1006 delete_node(conn, node);
1007 return 1;
1011 static void internal_rm(const char *name)
1013 char *tname = talloc_strdup(NULL, name);
1014 struct node *node = read_node(NULL, tname);
1015 if (node)
1016 _rm(NULL, node, tname);
1017 talloc_free(node);
1018 talloc_free(tname);
1022 static void do_rm(struct connection *conn, const char *name)
1024 struct node *node;
1026 name = canonicalize(conn, name);
1027 node = get_node(conn, name, XS_PERM_WRITE);
1028 if (!node) {
1029 /* Didn't exist already? Fine, if parent exists. */
1030 if (errno == ENOENT) {
1031 node = read_node(conn, get_parent(name));
1032 if (node) {
1033 send_ack(conn, XS_RM);
1034 return;
1036 /* Restore errno, just in case. */
1037 errno = ENOENT;
1039 send_error(conn, errno);
1040 return;
1043 if (streq(name, "/")) {
1044 send_error(conn, EINVAL);
1045 return;
1048 if (_rm(conn, node, name)) {
1049 add_change_node(conn->transaction, name, true);
1050 fire_watches(conn, name, true);
1051 send_ack(conn, XS_RM);
1056 static void do_get_perms(struct connection *conn, const char *name)
1058 struct node *node;
1059 char *strings;
1060 unsigned int len;
1062 name = canonicalize(conn, name);
1063 node = get_node(conn, name, XS_PERM_READ);
1064 if (!node) {
1065 send_error(conn, errno);
1066 return;
1069 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1070 if (!strings)
1071 send_error(conn, errno);
1072 else
1073 send_reply(conn, XS_GET_PERMS, strings, len);
1076 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1078 unsigned int num;
1079 char *name, *permstr;
1080 struct node *node;
1082 num = xs_count_strings(in->buffer, in->used);
1083 if (num < 2) {
1084 send_error(conn, EINVAL);
1085 return;
1088 /* First arg is node name. */
1089 name = canonicalize(conn, in->buffer);
1090 permstr = in->buffer + strlen(in->buffer) + 1;
1091 num--;
1093 /* We must own node to do this (tools can do this too). */
1094 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1095 if (!node) {
1096 send_error(conn, errno);
1097 return;
1100 node->perms = talloc_array(node, struct xs_permissions, num);
1101 node->num_perms = num;
1102 if (!xs_strings_to_perms(node->perms, num, permstr)) {
1103 send_error(conn, errno);
1104 return;
1106 if (!write_node(conn, node)) {
1107 send_error(conn, errno);
1108 return;
1111 add_change_node(conn->transaction, name, false);
1112 fire_watches(conn, name, false);
1113 send_ack(conn, XS_SET_PERMS);
1116 static void do_debug(struct connection *conn, struct buffered_data *in)
1118 int num;
1120 num = xs_count_strings(in->buffer, in->used);
1122 if (streq(in->buffer, "print")) {
1123 if (num < 2) {
1124 send_error(conn, EINVAL);
1125 return;
1127 xprintf("debug: %s", in->buffer + get_string(in, 0));
1129 if (streq(in->buffer, "check"))
1130 check_store();
1131 #ifdef TESTING
1132 /* For testing, we allow them to set id. */
1133 if (streq(in->buffer, "setid")) {
1134 conn->id = atoi(in->buffer + get_string(in, 0));
1135 } else if (streq(in->buffer, "failtest")) {
1136 if (get_string(in, 0) < in->used)
1137 srandom(atoi(in->buffer + get_string(in, 0)));
1138 failtest = true;
1140 #endif /* TESTING */
1141 send_ack(conn, XS_DEBUG);
1144 /* Process "in" for conn: "in" will vanish after this conversation, so
1145 * we can talloc off it for temporary variables. May free "conn".
1146 */
1147 static void process_message(struct connection *conn, struct buffered_data *in)
1149 struct transaction *trans;
1151 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1152 if (IS_ERR(trans)) {
1153 send_error(conn, -PTR_ERR(trans));
1154 return;
1157 assert(conn->transaction == NULL);
1158 conn->transaction = trans;
1160 switch (in->hdr.msg.type) {
1161 case XS_DIRECTORY:
1162 send_directory(conn, onearg(in));
1163 break;
1165 case XS_READ:
1166 do_read(conn, onearg(in));
1167 break;
1169 case XS_WRITE:
1170 do_write(conn, in);
1171 break;
1173 case XS_MKDIR:
1174 do_mkdir(conn, onearg(in));
1175 break;
1177 case XS_RM:
1178 do_rm(conn, onearg(in));
1179 break;
1181 case XS_GET_PERMS:
1182 do_get_perms(conn, onearg(in));
1183 break;
1185 case XS_SET_PERMS:
1186 do_set_perms(conn, in);
1187 break;
1189 case XS_DEBUG:
1190 do_debug(conn, in);
1191 break;
1193 case XS_WATCH:
1194 do_watch(conn, in);
1195 break;
1197 case XS_UNWATCH:
1198 do_unwatch(conn, in);
1199 break;
1201 case XS_TRANSACTION_START:
1202 do_transaction_start(conn, in);
1203 break;
1205 case XS_TRANSACTION_END:
1206 do_transaction_end(conn, onearg(in));
1207 break;
1209 case XS_INTRODUCE:
1210 do_introduce(conn, in);
1211 break;
1213 case XS_IS_DOMAIN_INTRODUCED:
1214 do_is_domain_introduced(conn, onearg(in));
1215 break;
1217 case XS_RELEASE:
1218 do_release(conn, onearg(in));
1219 break;
1221 case XS_GET_DOMAIN_PATH:
1222 do_get_domain_path(conn, onearg(in));
1223 break;
1225 default:
1226 eprintf("Client unknown operation %i", in->hdr.msg.type);
1227 send_error(conn, ENOSYS);
1228 break;
1231 conn->transaction = NULL;
1234 static int out_of_mem(void *data)
1236 longjmp(*(jmp_buf *)data, 1);
1239 static void consider_message(struct connection *conn)
1241 jmp_buf talloc_fail;
1243 if (verbose)
1244 xprintf("Got message %s len %i from %p\n",
1245 sockmsg_string(conn->in->hdr.msg.type),
1246 conn->in->hdr.msg.len, conn);
1248 /* For simplicity, we kill the connection on OOM. */
1249 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1250 if (setjmp(talloc_fail)) {
1251 talloc_free(conn);
1252 goto end;
1255 process_message(conn, conn->in);
1257 talloc_free(conn->in);
1258 conn->in = new_buffer(conn);
1260 end:
1261 talloc_set_fail_handler(NULL, NULL);
1262 if (talloc_total_blocks(NULL)
1263 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1264 talloc_report_full(NULL, stderr);
1265 abort();
1269 /* Errors in reading or allocating here mean we get out of sync, so we
1270 * drop the whole client connection. */
1271 static void handle_input(struct connection *conn)
1273 int bytes;
1274 struct buffered_data *in = conn->in;
1276 /* Not finished header yet? */
1277 if (in->inhdr) {
1278 bytes = conn->read(conn, in->hdr.raw + in->used,
1279 sizeof(in->hdr) - in->used);
1280 if (bytes <= 0)
1281 goto bad_client;
1282 in->used += bytes;
1283 if (in->used != sizeof(in->hdr))
1284 return;
1286 if (in->hdr.msg.len > PATH_MAX) {
1287 #ifndef TESTING
1288 syslog(LOG_ERR, "Client tried to feed us %i",
1289 in->hdr.msg.len);
1290 #endif
1291 goto bad_client;
1294 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1295 if (!in->buffer)
1296 goto bad_client;
1297 in->used = 0;
1298 in->inhdr = false;
1299 return;
1302 bytes = conn->read(conn, in->buffer + in->used,
1303 in->hdr.msg.len - in->used);
1304 if (bytes < 0)
1305 goto bad_client;
1307 in->used += bytes;
1308 if (in->used != in->hdr.msg.len)
1309 return;
1311 trace_io(conn, "IN ", in);
1312 consider_message(conn);
1313 return;
1315 bad_client:
1316 /* Kill it. */
1317 talloc_free(conn);
1320 static void handle_output(struct connection *conn)
1322 if (!write_messages(conn))
1323 talloc_free(conn);
1326 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1328 struct connection *new;
1330 new = talloc_zero(talloc_autofree_context(), struct connection);
1331 if (!new)
1332 return NULL;
1334 new->fd = -1;
1335 new->write = write;
1336 new->read = read;
1337 new->can_write = true;
1338 INIT_LIST_HEAD(&new->out_list);
1339 INIT_LIST_HEAD(&new->watches);
1340 INIT_LIST_HEAD(&new->transaction_list);
1342 new->in = new_buffer(new);
1343 if (new->in == NULL) {
1344 talloc_free(new);
1345 return NULL;
1348 list_add_tail(&new->list, &connections);
1349 talloc_set_destructor(new, destroy_conn);
1350 trace_create(new, "connection");
1351 return new;
1354 static int writefd(struct connection *conn, const void *data, unsigned int len)
1356 return write(conn->fd, data, len);
1359 static int readfd(struct connection *conn, void *data, unsigned int len)
1361 return read(conn->fd, data, len);
1364 static void accept_connection(int sock, bool canwrite)
1366 int fd;
1367 struct connection *conn;
1369 fd = accept(sock, NULL, NULL);
1370 if (fd < 0)
1371 return;
1373 conn = new_connection(writefd, readfd);
1374 if (conn) {
1375 conn->fd = fd;
1376 conn->can_write = canwrite;
1377 } else
1378 close(fd);
1381 #ifdef TESTING
1382 /* Valgrind can check our writes better if we don't use mmap */
1383 #define TDB_FLAGS TDB_NOMMAP
1384 /* Useful for running under debugger. */
1385 void dump_connection(void)
1387 struct connection *i;
1389 list_for_each_entry(i, &connections, list) {
1390 printf("Connection %p:\n", i);
1391 printf(" state = %s\n",
1392 list_empty(&i->out_list) ? "OK" : "BUSY");
1393 if (i->id)
1394 printf(" id = %i\n", i->id);
1395 if (!i->in->inhdr || i->in->used)
1396 printf(" got %i bytes of %s\n",
1397 i->in->used, i->in->inhdr ? "header" : "data");
1398 #if 0
1399 if (i->out)
1400 printf(" sending message %s (%s) out\n",
1401 sockmsg_string(i->out->hdr.msg.type),
1402 i->out->buffer);
1403 if (i->transaction)
1404 dump_transaction(i);
1405 if (i->domain)
1406 dump_domain(i);
1407 #endif
1408 dump_watches(i);
1411 #else
1412 #define TDB_FLAGS 0
1413 #endif
1415 /* We create initial nodes manually. */
1416 static void manual_node(const char *name, const char *child)
1418 struct node *node;
1419 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1421 node = talloc_zero(NULL, struct node);
1422 node->name = name;
1423 node->perms = &perms;
1424 node->num_perms = 1;
1425 node->children = (char *)child;
1426 if (child)
1427 node->childlen = strlen(child) + 1;
1429 if (!write_node(NULL, node))
1430 barf_perror("Could not create initial node %s", name);
1431 talloc_free(node);
1434 static void setup_structure(void)
1436 char *tdbname;
1437 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1438 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1440 if (tdb_ctx) {
1441 /* XXX When we make xenstored able to restart, this will have
1442 to become cleverer, checking for existing domains and not
1443 removing the corresponding entries, but for now xenstored
1444 cannot be restarted without losing all the registered
1445 watches, which breaks all the backend drivers anyway. We
1446 can therefore get away with just clearing /local and
1447 expecting Xend to put the appropriate entries back in.
1449 When this change is made it is important to note that
1450 dom0's entries must be cleaned up on reboot _before_ this
1451 daemon starts, otherwise the backend drivers and dom0's
1452 balloon driver will pick up stale entries. In the case of
1453 the balloon driver, this can be fatal.
1454 */
1455 char *tlocal = talloc_strdup(NULL, "/local");
1457 check_store();
1459 if (remove_local) {
1460 internal_rm("/local");
1461 create_node(NULL, tlocal, NULL, 0);
1463 check_store();
1466 talloc_free(tlocal);
1468 else {
1469 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1470 0640);
1471 if (!tdb_ctx)
1472 barf_perror("Could not create tdb file %s", tdbname);
1474 manual_node("/", "tool");
1475 manual_node("/tool", "xenstored");
1476 manual_node("/tool/xenstored", NULL);
1478 check_store();
1483 static unsigned int hash_from_key_fn(void *k)
1485 char *str = k;
1486 unsigned int hash = 5381;
1487 char c;
1489 while ((c = *str++))
1490 hash = ((hash << 5) + hash) + (unsigned int)c;
1492 return hash;
1496 static int keys_equal_fn(void *key1, void *key2)
1498 return 0 == strcmp((char *)key1, (char *)key2);
1502 static char *child_name(const char *s1, const char *s2)
1504 if (strcmp(s1, "/")) {
1505 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1507 else {
1508 return talloc_asprintf(NULL, "/%s", s2);
1513 static void remember_string(struct hashtable *hash, const char *str)
1515 char *k = malloc(strlen(str) + 1);
1516 strcpy(k, str);
1517 hashtable_insert(hash, k, (void *)1);
1521 /**
1522 * A node has a children field that names the children of the node, separated
1523 * by NULs. We check whether there are entries in there that are duplicated
1524 * (and if so, delete the second one), and whether there are any that do not
1525 * have a corresponding child node (and if so, delete them). Each valid child
1526 * is then recursively checked.
1528 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1529 * passed on the command line).
1531 * As we go, we record each node in the given reachable hashtable. These
1532 * entries will be used later in clean_store.
1533 */
1534 static void check_store_(const char *name, struct hashtable *reachable)
1536 struct node *node = read_node(NULL, name);
1538 if (node) {
1539 size_t i = 0;
1541 struct hashtable * children =
1542 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1544 remember_string(reachable, name);
1546 while (i < node->childlen) {
1547 size_t childlen = strlen(node->children + i);
1548 char * childname = child_name(node->name,
1549 node->children + i);
1550 struct node *childnode = read_node(NULL, childname);
1552 if (childnode) {
1553 if (hashtable_search(children, childname)) {
1554 log("check_store: '%s' is duplicated!",
1555 childname);
1557 if (recovery) {
1558 remove_child_entry(NULL, node,
1559 i);
1560 i -= childlen + 1;
1563 else {
1564 remember_string(children, childname);
1565 check_store_(childname, reachable);
1568 else {
1569 log("check_store: No child '%s' found!\n",
1570 childname);
1572 if (recovery) {
1573 remove_child_entry(NULL, node, i);
1574 i -= childlen + 1;
1578 talloc_free(childnode);
1579 talloc_free(childname);
1580 i += childlen + 1;
1583 hashtable_destroy(children, 0 /* Don't free values (they are
1584 all (void *)1) */);
1585 talloc_free(node);
1587 else {
1588 /* Impossible, because no database should ever be without the
1589 root, and otherwise, we've just checked in our caller
1590 (which made a recursive call to get here). */
1592 log("check_store: No child '%s' found: impossible!", name);
1597 /**
1598 * Helper to clean_store below.
1599 */
1600 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1601 void *private)
1603 struct hashtable *reachable = private;
1604 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1606 if (!hashtable_search(reachable, name)) {
1607 log("clean_store: '%s' is orphaned!", name);
1608 if (recovery) {
1609 tdb_delete(tdb, key);
1613 talloc_free(name);
1615 return 0;
1619 /**
1620 * Given the list of reachable nodes, iterate over the whole store, and
1621 * remove any that were not reached.
1622 */
1623 static void clean_store(struct hashtable *reachable)
1625 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1629 static void check_store(void)
1631 char * root = talloc_strdup(NULL, "/");
1632 struct hashtable * reachable =
1633 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1635 log("Checking store ...");
1636 check_store_(root, reachable);
1637 clean_store(reachable);
1638 log("Checking store complete.");
1640 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1641 (void *)1) */);
1642 talloc_free(root);
1646 /* Something is horribly wrong: check the store. */
1647 static void corrupt(struct connection *conn, const char *fmt, ...)
1649 va_list arglist;
1650 char *str;
1651 int saved_errno = errno;
1653 va_start(arglist, fmt);
1654 str = talloc_vasprintf(NULL, fmt, arglist);
1655 va_end(arglist);
1657 log("corruption detected by connection %i: err %s: %s",
1658 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1660 #ifdef TESTING
1661 /* Allow them to attach debugger. */
1662 sleep(30);
1663 #endif
1664 check_store();
1668 static void write_pidfile(const char *pidfile)
1670 char buf[100];
1671 int len;
1672 int fd;
1674 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1675 if (fd == -1)
1676 barf_perror("Opening pid file %s", pidfile);
1678 /* We exit silently if daemon already running. */
1679 if (lockf(fd, F_TLOCK, 0) == -1)
1680 exit(0);
1682 len = sprintf(buf, "%d\n", getpid());
1683 write(fd, buf, len);
1686 /* Stevens. */
1687 static void daemonize(void)
1689 pid_t pid;
1691 /* Separate from our parent via fork, so init inherits us. */
1692 if ((pid = fork()) < 0)
1693 barf_perror("Failed to fork daemon");
1694 if (pid != 0)
1695 exit(0);
1697 /* Session leader so ^C doesn't whack us. */
1698 setsid();
1700 /* Let session leader exit so child cannot regain CTTY */
1701 if ((pid = fork()) < 0)
1702 barf_perror("Failed to fork daemon");
1703 if (pid != 0)
1704 exit(0);
1706 #ifndef TESTING /* Relative paths for socket names */
1707 /* Move off any mount points we might be in. */
1708 chdir("/");
1709 #endif
1710 /* Discard our parent's old-fashioned umask prejudices. */
1711 umask(0);
1715 static void usage(void)
1717 fprintf(stderr,
1718 "Usage:\n"
1719 "\n"
1720 " xenstored <options>\n"
1721 "\n"
1722 "where options may include:\n"
1723 "\n"
1724 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1725 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1726 " --help to output this message,\n"
1727 " --no-fork to request that the daemon does not fork,\n"
1728 " --output-pid to request that the pid of the daemon is output,\n"
1729 " --trace-file <file> giving the file for logging, and\n"
1730 " --no-recovery to request that no recovery should be attempted when\n"
1731 " the store is corrupted (debug only),\n"
1732 " --preserve-local to request that /local is preserved on start-up,\n"
1733 " --verbose to request verbose execution.\n");
1737 static struct option options[] = {
1738 { "no-domain-init", 0, NULL, 'D' },
1739 { "pid-file", 1, NULL, 'F' },
1740 { "help", 0, NULL, 'H' },
1741 { "no-fork", 0, NULL, 'N' },
1742 { "output-pid", 0, NULL, 'P' },
1743 { "trace-file", 1, NULL, 'T' },
1744 { "no-recovery", 0, NULL, 'R' },
1745 { "preserve-local", 0, NULL, 'L' },
1746 { "verbose", 0, NULL, 'V' },
1747 { NULL, 0, NULL, 0 } };
1749 extern void dump_conn(struct connection *conn);
1751 int main(int argc, char *argv[])
1753 int opt, *sock, *ro_sock, max;
1754 struct sockaddr_un addr;
1755 fd_set inset, outset;
1756 bool dofork = true;
1757 bool outputpid = false;
1758 bool no_domain_init = false;
1759 const char *pidfile = NULL;
1761 while ((opt = getopt_long(argc, argv, "DF:HNPT:RLV", options,
1762 NULL)) != -1) {
1763 switch (opt) {
1764 case 'D':
1765 no_domain_init = true;
1766 break;
1767 case 'F':
1768 pidfile = optarg;
1769 break;
1770 case 'H':
1771 usage();
1772 return 0;
1773 case 'N':
1774 dofork = false;
1775 break;
1776 case 'P':
1777 outputpid = true;
1778 break;
1779 case 'R':
1780 recovery = false;
1781 break;
1782 case 'L':
1783 remove_local = false;
1784 break;
1785 case 'T':
1786 tracefile = optarg;
1787 break;
1788 case 'V':
1789 verbose = true;
1790 break;
1793 if (optind != argc)
1794 barf("%s: No arguments desired", argv[0]);
1796 reopen_log();
1798 if (dofork) {
1799 openlog("xenstored", 0, LOG_DAEMON);
1800 daemonize();
1802 if (pidfile)
1803 write_pidfile(pidfile);
1805 talloc_enable_leak_report_full();
1807 /* Create sockets for them to listen to. */
1808 sock = talloc(talloc_autofree_context(), int);
1809 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1810 if (*sock < 0)
1811 barf_perror("Could not create socket");
1812 ro_sock = talloc(talloc_autofree_context(), int);
1813 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1814 if (*ro_sock < 0)
1815 barf_perror("Could not create socket");
1816 talloc_set_destructor(sock, destroy_fd);
1817 talloc_set_destructor(ro_sock, destroy_fd);
1819 /* Don't kill us with SIGPIPE. */
1820 signal(SIGPIPE, SIG_IGN);
1822 /* FIXME: Be more sophisticated, don't mug running daemon. */
1823 unlink(xs_daemon_socket());
1824 unlink(xs_daemon_socket_ro());
1826 addr.sun_family = AF_UNIX;
1827 strcpy(addr.sun_path, xs_daemon_socket());
1828 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1829 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1830 strcpy(addr.sun_path, xs_daemon_socket_ro());
1831 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1832 barf_perror("Could not bind socket to %s",
1833 xs_daemon_socket_ro());
1834 if (chmod(xs_daemon_socket(), 0600) != 0
1835 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1836 barf_perror("Could not chmod sockets");
1838 if (listen(*sock, 1) != 0
1839 || listen(*ro_sock, 1) != 0)
1840 barf_perror("Could not listen on sockets");
1842 if (pipe(reopen_log_pipe)) {
1843 barf_perror("pipe");
1846 /* Setup the database */
1847 setup_structure();
1849 /* Listen to hypervisor. */
1850 if (!no_domain_init)
1851 domain_init();
1853 /* Restore existing connections. */
1854 restore_existing_connections();
1856 if (outputpid) {
1857 printf("%i\n", getpid());
1858 fflush(stdout);
1861 /* close stdin/stdout now we're ready to accept connections */
1862 if (dofork) {
1863 close(STDIN_FILENO);
1864 close(STDOUT_FILENO);
1865 close(STDERR_FILENO);
1868 signal(SIGHUP, trigger_reopen_log);
1870 #ifdef TESTING
1871 signal(SIGUSR1, stop_failtest);
1872 #endif
1874 /* Get ready to listen to the tools. */
1875 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1877 /* Main loop. */
1878 /* FIXME: Rewrite so noone can starve. */
1879 for (;;) {
1880 struct connection *i;
1882 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1883 if (errno == EINTR)
1884 continue;
1885 barf_perror("Select failed");
1888 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1889 char c;
1890 read(reopen_log_pipe[0], &c, 1);
1891 reopen_log();
1894 if (FD_ISSET(*sock, &inset))
1895 accept_connection(*sock, true);
1897 if (FD_ISSET(*ro_sock, &inset))
1898 accept_connection(*ro_sock, false);
1900 if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
1901 handle_event();
1903 list_for_each_entry(i, &connections, list) {
1904 if (i->domain)
1905 continue;
1907 /* Operations can delete themselves or others
1908 * (xs_release): list is not safe after input,
1909 * so break. */
1910 if (FD_ISSET(i->fd, &inset)) {
1911 handle_input(i);
1912 break;
1914 if (FD_ISSET(i->fd, &outset)) {
1915 handle_output(i);
1916 break;
1920 /* Handle all possible I/O for domain connections. */
1921 more:
1922 list_for_each_entry(i, &connections, list) {
1923 if (!i->domain)
1924 continue;
1926 if (domain_can_read(i)) {
1927 handle_input(i);
1928 goto more;
1931 if (domain_can_write(i) && !list_empty(&i->out_list)) {
1932 handle_output(i);
1933 goto more;
1937 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1941 /*
1942 * Local variables:
1943 * c-file-style: "linux"
1944 * indent-tabs-mode: t
1945 * c-indent-level: 8
1946 * c-basic-offset: 8
1947 * tab-width: 8
1948 * End:
1949 */