direct-io.hg

view tools/xenstore/xenstored_core.c @ 10355:7fba181c8531

[TOOLS] Introduce xc_evtchn_*() interface for interacting with /dev/xen/evtchn.
No longer open the device as non-blocking: all reads immediately follow
a select() on the device indicating it's ready to read.

Signed-off-by: John Levon <john.levon@sun.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Jun 15 13:11:31 2006 +0100 (2006-06-15)
parents d809359ab32d
children c471b326b75e
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 #include "hashtable.h"
57 extern int xce_handle; /* in xenstored_domain.c */
59 static bool verbose = false;
60 LIST_HEAD(connections);
61 static int tracefd = -1;
62 static bool recovery = true;
63 static bool remove_local = true;
64 static int reopen_log_pipe[2];
65 static char *tracefile = NULL;
66 static TDB_CONTEXT *tdb_ctx;
68 static void corrupt(struct connection *conn, const char *fmt, ...);
69 static void check_store(void);
71 #define log(...) \
72 do { \
73 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
74 trace("%s\n", s); \
75 syslog(LOG_ERR, "%s", s); \
76 talloc_free(s); \
77 } while (0)
80 int quota_nb_entry_per_domain = 1000;
81 int quota_nb_watch_per_domain = 128;
82 int quota_max_entry_size = 2048; /* 2K */
84 #ifdef TESTING
85 static bool failtest = false;
87 /* We override talloc's malloc. */
88 void *test_malloc(size_t size)
89 {
90 /* 1 in 20 means only about 50% of connections establish. */
91 if (failtest && (random() % 32) == 0)
92 return NULL;
93 return malloc(size);
94 }
96 static void stop_failtest(int signum __attribute__((unused)))
97 {
98 failtest = false;
99 }
101 /* Need these before we #define away write_all/mkdir in testing.h */
102 bool test_write_all(int fd, void *contents, unsigned int len);
103 bool test_write_all(int fd, void *contents, unsigned int len)
104 {
105 if (failtest && (random() % 8) == 0) {
106 if (len)
107 len = random() % len;
108 write(fd, contents, len);
109 errno = ENOSPC;
110 return false;
111 }
112 return xs_write_all(fd, contents, len);
113 }
115 int test_mkdir(const char *dir, int perms);
116 int test_mkdir(const char *dir, int perms)
117 {
118 if (failtest && (random() % 8) == 0) {
119 errno = ENOSPC;
120 return -1;
121 }
122 return mkdir(dir, perms);
123 }
124 #endif /* TESTING */
126 #include "xenstored_test.h"
128 TDB_CONTEXT *tdb_context(struct connection *conn)
129 {
130 /* conn = NULL used in manual_node at setup. */
131 if (!conn || !conn->transaction)
132 return tdb_ctx;
133 return tdb_transaction_context(conn->transaction);
134 }
136 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
137 {
138 if (rename(newname, xs_daemon_tdb()) != 0)
139 return false;
140 tdb_close(tdb_ctx);
141 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
142 return true;
143 }
145 static char *sockmsg_string(enum xsd_sockmsg_type type)
146 {
147 switch (type) {
148 case XS_DEBUG: return "DEBUG";
149 case XS_DIRECTORY: return "DIRECTORY";
150 case XS_READ: return "READ";
151 case XS_GET_PERMS: return "GET_PERMS";
152 case XS_WATCH: return "WATCH";
153 case XS_UNWATCH: return "UNWATCH";
154 case XS_TRANSACTION_START: return "TRANSACTION_START";
155 case XS_TRANSACTION_END: return "TRANSACTION_END";
156 case XS_INTRODUCE: return "INTRODUCE";
157 case XS_RELEASE: return "RELEASE";
158 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
159 case XS_WRITE: return "WRITE";
160 case XS_MKDIR: return "MKDIR";
161 case XS_RM: return "RM";
162 case XS_SET_PERMS: return "SET_PERMS";
163 case XS_WATCH_EVENT: return "WATCH_EVENT";
164 case XS_ERROR: return "ERROR";
165 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
166 default:
167 return "**UNKNOWN**";
168 }
169 }
171 void trace(const char *fmt, ...)
172 {
173 va_list arglist;
174 char *str;
175 char sbuf[1024];
176 int ret, dummy;
178 if (tracefd < 0)
179 return;
181 /* try to use a static buffer */
182 va_start(arglist, fmt);
183 ret = vsnprintf(sbuf, 1024, fmt, arglist);
184 va_end(arglist);
186 if (ret <= 1024) {
187 dummy = write(tracefd, sbuf, ret);
188 return;
189 }
191 /* fail back to dynamic allocation */
192 va_start(arglist, fmt);
193 str = talloc_vasprintf(NULL, fmt, arglist);
194 va_end(arglist);
195 dummy = write(tracefd, str, strlen(str));
196 talloc_free(str);
197 }
199 static void trace_io(const struct connection *conn,
200 const char *prefix,
201 const struct buffered_data *data)
202 {
203 unsigned int i;
204 time_t now;
205 struct tm *tm;
207 if (tracefd < 0)
208 return;
210 now = time(NULL);
211 tm = localtime(&now);
213 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
214 tm->tm_year + 1900, tm->tm_mon + 1,
215 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
216 sockmsg_string(data->hdr.msg.type));
218 for (i = 0; i < data->hdr.msg.len; i++)
219 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
220 trace(")\n");
221 }
223 void trace_create(const void *data, const char *type)
224 {
225 trace("CREATE %s %p\n", type, data);
226 }
228 void trace_destroy(const void *data, const char *type)
229 {
230 trace("DESTROY %s %p\n", type, data);
231 }
233 /**
234 * Signal handler for SIGHUP, which requests that the trace log is reopened
235 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
236 * the select() in the main loop.
237 */
238 static void trigger_reopen_log(int signal __attribute__((unused)))
239 {
240 char c = 'A';
241 int dummy;
242 dummy = write(reopen_log_pipe[1], &c, 1);
243 }
246 static void reopen_log(void)
247 {
248 if (tracefile) {
249 if (tracefd > 0)
250 close(tracefd);
252 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
254 if (tracefd < 0)
255 perror("Could not open tracefile");
256 else
257 trace("\n***\n");
258 }
259 }
262 static bool write_messages(struct connection *conn)
263 {
264 int ret;
265 struct buffered_data *out;
267 out = list_top(&conn->out_list, struct buffered_data, list);
268 if (out == NULL)
269 return true;
271 if (out->inhdr) {
272 if (verbose)
273 xprintf("Writing msg %s (%.*s) out to %p\n",
274 sockmsg_string(out->hdr.msg.type),
275 out->hdr.msg.len,
276 out->buffer, conn);
277 ret = conn->write(conn, out->hdr.raw + out->used,
278 sizeof(out->hdr) - out->used);
279 if (ret < 0)
280 return false;
282 out->used += ret;
283 if (out->used < sizeof(out->hdr))
284 return true;
286 out->inhdr = false;
287 out->used = 0;
289 /* Second write might block if non-zero. */
290 if (out->hdr.msg.len && !conn->domain)
291 return true;
292 }
294 ret = conn->write(conn, out->buffer + out->used,
295 out->hdr.msg.len - out->used);
296 if (ret < 0)
297 return false;
299 out->used += ret;
300 if (out->used != out->hdr.msg.len)
301 return true;
303 trace_io(conn, "OUT", out);
305 list_del(&out->list);
306 talloc_free(out);
308 return true;
309 }
311 static int destroy_conn(void *_conn)
312 {
313 struct connection *conn = _conn;
315 /* Flush outgoing if possible, but don't block. */
316 if (!conn->domain) {
317 fd_set set;
318 struct timeval none;
320 FD_ZERO(&set);
321 FD_SET(conn->fd, &set);
322 none.tv_sec = none.tv_usec = 0;
324 while (!list_empty(&conn->out_list)
325 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
326 if (!write_messages(conn))
327 break;
328 close(conn->fd);
329 }
330 list_del(&conn->list);
331 trace_destroy(conn, "connection");
332 return 0;
333 }
336 static void set_fd(int fd, fd_set *set, int *max)
337 {
338 if (fd < 0)
339 return;
340 FD_SET(fd, set);
341 if (fd > *max)
342 *max = fd;
343 }
346 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
347 {
348 struct connection *i;
349 int max = -1;
351 FD_ZERO(inset);
352 FD_ZERO(outset);
354 set_fd(sock, inset, &max);
355 set_fd(ro_sock, inset, &max);
356 set_fd(reopen_log_pipe[0], inset, &max);
358 if (xce_handle != -1)
359 set_fd(xc_evtchn_fd(xce_handle), inset, &max);
361 list_for_each_entry(i, &connections, list) {
362 if (i->domain)
363 continue;
364 set_fd(i->fd, inset, &max);
365 if (!list_empty(&i->out_list))
366 FD_SET(i->fd, outset);
367 }
368 return max;
369 }
371 static int destroy_fd(void *_fd)
372 {
373 int *fd = _fd;
374 close(*fd);
375 return 0;
376 }
378 /* Return a pointer to an fd, self-closing and attached to this pathname. */
379 int *talloc_open(const char *pathname, int flags, int mode)
380 {
381 int *fd;
383 fd = talloc(pathname, int);
384 *fd = open(pathname, flags, mode);
385 if (*fd < 0) {
386 int saved_errno = errno;
387 talloc_free(fd);
388 errno = saved_errno;
389 return NULL;
390 }
391 talloc_set_destructor(fd, destroy_fd);
392 return fd;
393 }
395 /* Is child a subnode of parent, or equal? */
396 bool is_child(const char *child, const char *parent)
397 {
398 unsigned int len = strlen(parent);
400 /* / should really be "" for this algorithm to work, but that's a
401 * usability nightmare. */
402 if (streq(parent, "/"))
403 return true;
405 if (strncmp(child, parent, len) != 0)
406 return false;
408 return child[len] == '/' || child[len] == '\0';
409 }
411 /* If it fails, returns NULL and sets errno. */
412 static struct node *read_node(struct connection *conn, const char *name)
413 {
414 TDB_DATA key, data;
415 uint32_t *p;
416 struct node *node;
417 TDB_CONTEXT * context = tdb_context(conn);
419 key.dptr = (void *)name;
420 key.dsize = strlen(name);
421 data = tdb_fetch(context, key);
423 if (data.dptr == NULL) {
424 if (tdb_error(context) == TDB_ERR_NOEXIST)
425 errno = ENOENT;
426 else {
427 log("TDB error on read: %s", tdb_errorstr(context));
428 errno = EIO;
429 }
430 return NULL;
431 }
433 node = talloc(name, struct node);
434 node->name = talloc_strdup(node, name);
435 node->parent = NULL;
436 node->tdb = tdb_context(conn);
437 talloc_steal(node, data.dptr);
439 /* Datalen, childlen, number of permissions */
440 p = (uint32_t *)data.dptr;
441 node->num_perms = p[0];
442 node->datalen = p[1];
443 node->childlen = p[2];
445 /* Permissions are struct xs_permissions. */
446 node->perms = (void *)&p[3];
447 /* Data is binary blob (usually ascii, no nul). */
448 node->data = node->perms + node->num_perms;
449 /* Children is strings, nul separated. */
450 node->children = node->data + node->datalen;
452 return node;
453 }
455 static bool write_node(struct connection *conn, const struct node *node)
456 {
457 /*
458 * conn will be null when this is called from manual_node.
459 * tdb_context copes with this.
460 */
462 TDB_DATA key, data;
463 void *p;
465 key.dptr = (void *)node->name;
466 key.dsize = strlen(node->name);
468 data.dsize = 3*sizeof(uint32_t)
469 + node->num_perms*sizeof(node->perms[0])
470 + node->datalen + node->childlen;
472 if (domain_is_unprivileged(conn) && data.dsize >= quota_max_entry_size)
473 goto error;
475 data.dptr = talloc_size(node, data.dsize);
476 ((uint32_t *)data.dptr)[0] = node->num_perms;
477 ((uint32_t *)data.dptr)[1] = node->datalen;
478 ((uint32_t *)data.dptr)[2] = node->childlen;
479 p = data.dptr + 3 * sizeof(uint32_t);
481 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
482 p += node->num_perms*sizeof(node->perms[0]);
483 memcpy(p, node->data, node->datalen);
484 p += node->datalen;
485 memcpy(p, node->children, node->childlen);
487 /* TDB should set errno, but doesn't even set ecode AFAICT. */
488 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
489 corrupt(conn, "Write of %s failed", key.dptr);
490 goto error;
491 }
492 return true;
493 error:
494 errno = ENOSPC;
495 return false;
496 }
498 static enum xs_perm_type perm_for_conn(struct connection *conn,
499 struct xs_permissions *perms,
500 unsigned int num)
501 {
502 unsigned int i;
503 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
505 if (!conn->can_write)
506 mask &= ~XS_PERM_WRITE;
508 /* Owners and tools get it all... */
509 if (!conn->id || perms[0].id == conn->id)
510 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
512 for (i = 1; i < num; i++)
513 if (perms[i].id == conn->id)
514 return perms[i].perms & mask;
516 return perms[0].perms & mask;
517 }
519 static char *get_parent(const char *node)
520 {
521 char *slash = strrchr(node + 1, '/');
522 if (!slash)
523 return talloc_strdup(node, "/");
524 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
525 }
527 /* What do parents say? */
528 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
529 {
530 struct node *node;
532 do {
533 name = get_parent(name);
534 node = read_node(conn, name);
535 if (node)
536 break;
537 } while (!streq(name, "/"));
539 /* No permission at root? We're in trouble. */
540 if (!node)
541 corrupt(conn, "No permissions file at root");
543 return perm_for_conn(conn, node->perms, node->num_perms);
544 }
546 /* We have a weird permissions system. You can allow someone into a
547 * specific node without allowing it in the parents. If it's going to
548 * fail, however, we don't want the errno to indicate any information
549 * about the node. */
550 static int errno_from_parents(struct connection *conn, const char *node,
551 int errnum, enum xs_perm_type perm)
552 {
553 /* We always tell them about memory failures. */
554 if (errnum == ENOMEM)
555 return errnum;
557 if (ask_parents(conn, node) & perm)
558 return errnum;
559 return EACCES;
560 }
562 /* If it fails, returns NULL and sets errno. */
563 struct node *get_node(struct connection *conn,
564 const char *name,
565 enum xs_perm_type perm)
566 {
567 struct node *node;
569 if (!name || !is_valid_nodename(name)) {
570 errno = EINVAL;
571 return NULL;
572 }
573 node = read_node(conn, name);
574 /* If we don't have permission, we don't have node. */
575 if (node) {
576 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
577 != perm)
578 node = NULL;
579 }
580 /* Clean up errno if they weren't supposed to know. */
581 if (!node)
582 errno = errno_from_parents(conn, name, errno, perm);
583 return node;
584 }
586 static struct buffered_data *new_buffer(void *ctx)
587 {
588 struct buffered_data *data;
590 data = talloc_zero(ctx, struct buffered_data);
591 if (data == NULL)
592 return NULL;
594 data->inhdr = true;
595 return data;
596 }
598 /* Return length of string (including nul) at this offset. */
599 static unsigned int get_string(const struct buffered_data *data,
600 unsigned int offset)
601 {
602 const char *nul;
604 if (offset >= data->used)
605 return 0;
607 nul = memchr(data->buffer + offset, 0, data->used - offset);
608 if (!nul)
609 return 0;
611 return nul - (data->buffer + offset) + 1;
612 }
614 /* Break input into vectors, return the number, fill in up to num of them. */
615 unsigned int get_strings(struct buffered_data *data,
616 char *vec[], unsigned int num)
617 {
618 unsigned int off, i, len;
620 off = i = 0;
621 while ((len = get_string(data, off)) != 0) {
622 if (i < num)
623 vec[i] = data->buffer + off;
624 i++;
625 off += len;
626 }
627 return i;
628 }
630 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
631 const void *data, unsigned int len)
632 {
633 struct buffered_data *bdata;
635 /* Message is a child of the connection context for auto-cleanup. */
636 bdata = new_buffer(conn);
637 bdata->buffer = talloc_array(bdata, char, len);
639 /* Echo request header in reply unless this is an async watch event. */
640 if (type != XS_WATCH_EVENT) {
641 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
642 sizeof(struct xsd_sockmsg));
643 } else {
644 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
645 }
647 /* Update relevant header fields and fill in the message body. */
648 bdata->hdr.msg.type = type;
649 bdata->hdr.msg.len = len;
650 memcpy(bdata->buffer, data, len);
652 /* Queue for later transmission. */
653 list_add_tail(&bdata->list, &conn->out_list);
654 }
656 /* Some routines (write, mkdir, etc) just need a non-error return */
657 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
658 {
659 send_reply(conn, type, "OK", sizeof("OK"));
660 }
662 void send_error(struct connection *conn, int error)
663 {
664 unsigned int i;
666 for (i = 0; error != xsd_errors[i].errnum; i++) {
667 if (i == ARRAY_SIZE(xsd_errors) - 1) {
668 eprintf("xenstored: error %i untranslatable", error);
669 i = 0; /* EINVAL */
670 break;
671 }
672 }
673 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
674 strlen(xsd_errors[i].errstring) + 1);
675 }
677 static bool valid_chars(const char *node)
678 {
679 /* Nodes can have lots of crap. */
680 return (strspn(node,
681 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
682 "abcdefghijklmnopqrstuvwxyz"
683 "0123456789-/_@") == strlen(node));
684 }
686 bool is_valid_nodename(const char *node)
687 {
688 /* Must start in /. */
689 if (!strstarts(node, "/"))
690 return false;
692 /* Cannot end in / (unless it's just "/"). */
693 if (strends(node, "/") && !streq(node, "/"))
694 return false;
696 /* No double //. */
697 if (strstr(node, "//"))
698 return false;
700 return valid_chars(node);
701 }
703 /* We expect one arg in the input: return NULL otherwise. */
704 static const char *onearg(struct buffered_data *in)
705 {
706 if (!in->used || get_string(in, 0) != in->used)
707 return NULL;
708 return in->buffer;
709 }
711 static char *perms_to_strings(const void *ctx,
712 struct xs_permissions *perms, unsigned int num,
713 unsigned int *len)
714 {
715 unsigned int i;
716 char *strings = NULL;
717 char buffer[MAX_STRLEN(unsigned int) + 1];
719 for (*len = 0, i = 0; i < num; i++) {
720 if (!xs_perm_to_string(&perms[i], buffer))
721 return NULL;
723 strings = talloc_realloc(ctx, strings, char,
724 *len + strlen(buffer) + 1);
725 strcpy(strings + *len, buffer);
726 *len += strlen(buffer) + 1;
727 }
728 return strings;
729 }
731 char *canonicalize(struct connection *conn, const char *node)
732 {
733 const char *prefix;
735 if (!node || strstarts(node, "/"))
736 return (char *)node;
737 prefix = get_implicit_path(conn);
738 if (prefix)
739 return talloc_asprintf(node, "%s/%s", prefix, node);
740 return (char *)node;
741 }
743 bool check_event_node(const char *node)
744 {
745 if (!node || !strstarts(node, "@")) {
746 errno = EINVAL;
747 return false;
748 }
749 return true;
750 }
752 static void send_directory(struct connection *conn, const char *name)
753 {
754 struct node *node;
756 name = canonicalize(conn, name);
757 node = get_node(conn, name, XS_PERM_READ);
758 if (!node) {
759 send_error(conn, errno);
760 return;
761 }
763 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
764 }
766 static void do_read(struct connection *conn, const char *name)
767 {
768 struct node *node;
770 name = canonicalize(conn, name);
771 node = get_node(conn, name, XS_PERM_READ);
772 if (!node) {
773 send_error(conn, errno);
774 return;
775 }
777 send_reply(conn, XS_READ, node->data, node->datalen);
778 }
780 static void delete_node_single(struct connection *conn, struct node *node)
781 {
782 TDB_DATA key;
784 key.dptr = (void *)node->name;
785 key.dsize = strlen(node->name);
787 if (tdb_delete(tdb_context(conn), key) != 0) {
788 corrupt(conn, "Could not delete '%s'", node->name);
789 return;
790 }
791 domain_entry_dec(conn);
792 }
794 /* Must not be / */
795 static char *basename(const char *name)
796 {
797 return strrchr(name, '/') + 1;
798 }
800 static struct node *construct_node(struct connection *conn, const char *name)
801 {
802 const char *base;
803 unsigned int baselen;
804 struct node *parent, *node;
805 char *children, *parentname = get_parent(name);
807 /* If parent doesn't exist, create it. */
808 parent = read_node(conn, parentname);
809 if (!parent)
810 parent = construct_node(conn, parentname);
811 if (!parent)
812 return NULL;
814 if (domain_entry(conn) >= quota_nb_entry_per_domain)
815 return NULL;
817 /* Add child to parent. */
818 base = basename(name);
819 baselen = strlen(base) + 1;
820 children = talloc_array(name, char, parent->childlen + baselen);
821 memcpy(children, parent->children, parent->childlen);
822 memcpy(children + parent->childlen, base, baselen);
823 parent->children = children;
824 parent->childlen += baselen;
826 /* Allocate node */
827 node = talloc(name, struct node);
828 node->tdb = tdb_context(conn);
829 node->name = talloc_strdup(node, name);
831 /* Inherit permissions, except domains own what they create */
832 node->num_perms = parent->num_perms;
833 node->perms = talloc_memdup(node, parent->perms,
834 node->num_perms * sizeof(node->perms[0]));
835 if (conn && conn->id)
836 node->perms[0].id = conn->id;
838 /* No children, no data */
839 node->children = node->data = NULL;
840 node->childlen = node->datalen = 0;
841 node->parent = parent;
842 domain_entry_inc(conn);
843 return node;
844 }
846 static int destroy_node(void *_node)
847 {
848 struct node *node = _node;
849 TDB_DATA key;
851 if (streq(node->name, "/"))
852 corrupt(NULL, "Destroying root node!");
854 key.dptr = (void *)node->name;
855 key.dsize = strlen(node->name);
857 tdb_delete(node->tdb, key);
858 return 0;
859 }
861 static struct node *create_node(struct connection *conn,
862 const char *name,
863 void *data, unsigned int datalen)
864 {
865 struct node *node, *i;
867 node = construct_node(conn, name);
868 if (!node)
869 return NULL;
871 node->data = data;
872 node->datalen = datalen;
874 /* We write out the nodes down, setting destructor in case
875 * something goes wrong. */
876 for (i = node; i; i = i->parent) {
877 if (!write_node(conn, i)) {
878 domain_entry_dec(conn);
879 return NULL;
880 }
881 talloc_set_destructor(i, destroy_node);
882 }
884 /* OK, now remove destructors so they stay around */
885 for (i = node; i; i = i->parent)
886 talloc_set_destructor(i, NULL);
887 return node;
888 }
890 /* path, data... */
891 static void do_write(struct connection *conn, struct buffered_data *in)
892 {
893 unsigned int offset, datalen;
894 struct node *node;
895 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
896 char *name;
898 /* Extra "strings" can be created by binary data. */
899 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
900 send_error(conn, EINVAL);
901 return;
902 }
904 offset = strlen(vec[0]) + 1;
905 datalen = in->used - offset;
907 name = canonicalize(conn, vec[0]);
908 node = get_node(conn, name, XS_PERM_WRITE);
909 if (!node) {
910 /* No permissions, invalid input? */
911 if (errno != ENOENT) {
912 send_error(conn, errno);
913 return;
914 }
915 node = create_node(conn, name, in->buffer + offset, datalen);
916 if (!node) {
917 send_error(conn, errno);
918 return;
919 }
920 } else {
921 node->data = in->buffer + offset;
922 node->datalen = datalen;
923 if (!write_node(conn, node)){
924 send_error(conn, errno);
925 return;
926 }
927 }
929 add_change_node(conn->transaction, name, false);
930 fire_watches(conn, name, false);
931 send_ack(conn, XS_WRITE);
932 }
934 static void do_mkdir(struct connection *conn, const char *name)
935 {
936 struct node *node;
938 name = canonicalize(conn, name);
939 node = get_node(conn, name, XS_PERM_WRITE);
941 /* If it already exists, fine. */
942 if (!node) {
943 /* No permissions? */
944 if (errno != ENOENT) {
945 send_error(conn, errno);
946 return;
947 }
948 node = create_node(conn, name, NULL, 0);
949 if (!node) {
950 send_error(conn, errno);
951 return;
952 }
953 add_change_node(conn->transaction, name, false);
954 fire_watches(conn, name, false);
955 }
956 send_ack(conn, XS_MKDIR);
957 }
959 static void delete_node(struct connection *conn, struct node *node)
960 {
961 unsigned int i;
963 /* Delete self, then delete children. If we crash, then the worst
964 that can happen is the children will continue to take up space, but
965 will otherwise be unreachable. */
966 delete_node_single(conn, node);
968 /* Delete children, too. */
969 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
970 struct node *child;
972 child = read_node(conn,
973 talloc_asprintf(node, "%s/%s", node->name,
974 node->children + i));
975 if (child) {
976 delete_node(conn, child);
977 }
978 else {
979 trace("delete_node: No child '%s/%s' found!\n",
980 node->name, node->children + i);
981 /* Skip it, we've already deleted the parent. */
982 }
983 }
984 }
987 /* Delete memory using memmove. */
988 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
989 {
990 memmove(mem + off, mem + off + len, total - off - len);
991 }
994 static bool remove_child_entry(struct connection *conn, struct node *node,
995 size_t offset)
996 {
997 size_t childlen = strlen(node->children + offset);
998 memdel(node->children, offset, childlen + 1, node->childlen);
999 node->childlen -= childlen + 1;
1000 return write_node(conn, node);
1004 static bool delete_child(struct connection *conn,
1005 struct node *node, const char *childname)
1007 unsigned int i;
1009 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1010 if (streq(node->children+i, childname)) {
1011 return remove_child_entry(conn, node, i);
1014 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1015 return false;
1019 static int _rm(struct connection *conn, struct node *node, const char *name)
1021 /* Delete from parent first, then if we crash, the worst that can
1022 happen is the child will continue to take up space, but will
1023 otherwise be unreachable. */
1024 struct node *parent = read_node(conn, get_parent(name));
1025 if (!parent) {
1026 send_error(conn, EINVAL);
1027 return 0;
1030 if (!delete_child(conn, parent, basename(name))) {
1031 send_error(conn, EINVAL);
1032 return 0;
1035 delete_node(conn, node);
1036 return 1;
1040 static void internal_rm(const char *name)
1042 char *tname = talloc_strdup(NULL, name);
1043 struct node *node = read_node(NULL, tname);
1044 if (node)
1045 _rm(NULL, node, tname);
1046 talloc_free(node);
1047 talloc_free(tname);
1051 static void do_rm(struct connection *conn, const char *name)
1053 struct node *node;
1055 name = canonicalize(conn, name);
1056 node = get_node(conn, name, XS_PERM_WRITE);
1057 if (!node) {
1058 /* Didn't exist already? Fine, if parent exists. */
1059 if (errno == ENOENT) {
1060 node = read_node(conn, get_parent(name));
1061 if (node) {
1062 send_ack(conn, XS_RM);
1063 return;
1065 /* Restore errno, just in case. */
1066 errno = ENOENT;
1068 send_error(conn, errno);
1069 return;
1072 if (streq(name, "/")) {
1073 send_error(conn, EINVAL);
1074 return;
1077 if (_rm(conn, node, name)) {
1078 add_change_node(conn->transaction, name, true);
1079 fire_watches(conn, name, true);
1080 send_ack(conn, XS_RM);
1085 static void do_get_perms(struct connection *conn, const char *name)
1087 struct node *node;
1088 char *strings;
1089 unsigned int len;
1091 name = canonicalize(conn, name);
1092 node = get_node(conn, name, XS_PERM_READ);
1093 if (!node) {
1094 send_error(conn, errno);
1095 return;
1098 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1099 if (!strings)
1100 send_error(conn, errno);
1101 else
1102 send_reply(conn, XS_GET_PERMS, strings, len);
1105 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1107 unsigned int num;
1108 char *name, *permstr;
1109 struct node *node;
1111 num = xs_count_strings(in->buffer, in->used);
1112 if (num < 2) {
1113 send_error(conn, EINVAL);
1114 return;
1117 /* First arg is node name. */
1118 name = canonicalize(conn, in->buffer);
1119 permstr = in->buffer + strlen(in->buffer) + 1;
1120 num--;
1122 /* We must own node to do this (tools can do this too). */
1123 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1124 if (!node) {
1125 send_error(conn, errno);
1126 return;
1129 node->perms = talloc_array(node, struct xs_permissions, num);
1130 node->num_perms = num;
1131 if (!xs_strings_to_perms(node->perms, num, permstr)) {
1132 send_error(conn, errno);
1133 return;
1135 if (!write_node(conn, node)) {
1136 send_error(conn, errno);
1137 return;
1140 add_change_node(conn->transaction, name, false);
1141 fire_watches(conn, name, false);
1142 send_ack(conn, XS_SET_PERMS);
1145 static void do_debug(struct connection *conn, struct buffered_data *in)
1147 int num;
1149 num = xs_count_strings(in->buffer, in->used);
1151 if (streq(in->buffer, "print")) {
1152 if (num < 2) {
1153 send_error(conn, EINVAL);
1154 return;
1156 xprintf("debug: %s", in->buffer + get_string(in, 0));
1158 if (streq(in->buffer, "check"))
1159 check_store();
1160 #ifdef TESTING
1161 /* For testing, we allow them to set id. */
1162 if (streq(in->buffer, "setid")) {
1163 conn->id = atoi(in->buffer + get_string(in, 0));
1164 } else if (streq(in->buffer, "failtest")) {
1165 if (get_string(in, 0) < in->used)
1166 srandom(atoi(in->buffer + get_string(in, 0)));
1167 failtest = true;
1169 #endif /* TESTING */
1170 send_ack(conn, XS_DEBUG);
1173 /* Process "in" for conn: "in" will vanish after this conversation, so
1174 * we can talloc off it for temporary variables. May free "conn".
1175 */
1176 static void process_message(struct connection *conn, struct buffered_data *in)
1178 struct transaction *trans;
1180 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1181 if (IS_ERR(trans)) {
1182 send_error(conn, -PTR_ERR(trans));
1183 return;
1186 assert(conn->transaction == NULL);
1187 conn->transaction = trans;
1189 switch (in->hdr.msg.type) {
1190 case XS_DIRECTORY:
1191 send_directory(conn, onearg(in));
1192 break;
1194 case XS_READ:
1195 do_read(conn, onearg(in));
1196 break;
1198 case XS_WRITE:
1199 do_write(conn, in);
1200 break;
1202 case XS_MKDIR:
1203 do_mkdir(conn, onearg(in));
1204 break;
1206 case XS_RM:
1207 do_rm(conn, onearg(in));
1208 break;
1210 case XS_GET_PERMS:
1211 do_get_perms(conn, onearg(in));
1212 break;
1214 case XS_SET_PERMS:
1215 do_set_perms(conn, in);
1216 break;
1218 case XS_DEBUG:
1219 do_debug(conn, in);
1220 break;
1222 case XS_WATCH:
1223 do_watch(conn, in);
1224 break;
1226 case XS_UNWATCH:
1227 do_unwatch(conn, in);
1228 break;
1230 case XS_TRANSACTION_START:
1231 do_transaction_start(conn, in);
1232 break;
1234 case XS_TRANSACTION_END:
1235 do_transaction_end(conn, onearg(in));
1236 break;
1238 case XS_INTRODUCE:
1239 do_introduce(conn, in);
1240 break;
1242 case XS_IS_DOMAIN_INTRODUCED:
1243 do_is_domain_introduced(conn, onearg(in));
1244 break;
1246 case XS_RELEASE:
1247 do_release(conn, onearg(in));
1248 break;
1250 case XS_GET_DOMAIN_PATH:
1251 do_get_domain_path(conn, onearg(in));
1252 break;
1254 default:
1255 eprintf("Client unknown operation %i", in->hdr.msg.type);
1256 send_error(conn, ENOSYS);
1257 break;
1260 conn->transaction = NULL;
1263 static void consider_message(struct connection *conn)
1265 if (verbose)
1266 xprintf("Got message %s len %i from %p\n",
1267 sockmsg_string(conn->in->hdr.msg.type),
1268 conn->in->hdr.msg.len, conn);
1270 process_message(conn, conn->in);
1272 talloc_free(conn->in);
1273 conn->in = new_buffer(conn);
1276 /* Errors in reading or allocating here mean we get out of sync, so we
1277 * drop the whole client connection. */
1278 static void handle_input(struct connection *conn)
1280 int bytes;
1281 struct buffered_data *in = conn->in;
1283 /* Not finished header yet? */
1284 if (in->inhdr) {
1285 bytes = conn->read(conn, in->hdr.raw + in->used,
1286 sizeof(in->hdr) - in->used);
1287 if (bytes <= 0)
1288 goto bad_client;
1289 in->used += bytes;
1290 if (in->used != sizeof(in->hdr))
1291 return;
1293 if (in->hdr.msg.len > PATH_MAX) {
1294 #ifndef TESTING
1295 syslog(LOG_ERR, "Client tried to feed us %i",
1296 in->hdr.msg.len);
1297 #endif
1298 goto bad_client;
1301 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1302 if (!in->buffer)
1303 goto bad_client;
1304 in->used = 0;
1305 in->inhdr = false;
1306 return;
1309 bytes = conn->read(conn, in->buffer + in->used,
1310 in->hdr.msg.len - in->used);
1311 if (bytes < 0)
1312 goto bad_client;
1314 in->used += bytes;
1315 if (in->used != in->hdr.msg.len)
1316 return;
1318 trace_io(conn, "IN ", in);
1319 consider_message(conn);
1320 return;
1322 bad_client:
1323 /* Kill it. */
1324 talloc_free(conn);
1327 static void handle_output(struct connection *conn)
1329 if (!write_messages(conn))
1330 talloc_free(conn);
1333 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1335 struct connection *new;
1337 new = talloc_zero(talloc_autofree_context(), struct connection);
1338 if (!new)
1339 return NULL;
1341 new->fd = -1;
1342 new->write = write;
1343 new->read = read;
1344 new->can_write = true;
1345 INIT_LIST_HEAD(&new->out_list);
1346 INIT_LIST_HEAD(&new->watches);
1347 INIT_LIST_HEAD(&new->transaction_list);
1349 new->in = new_buffer(new);
1350 if (new->in == NULL) {
1351 talloc_free(new);
1352 return NULL;
1355 list_add_tail(&new->list, &connections);
1356 talloc_set_destructor(new, destroy_conn);
1357 trace_create(new, "connection");
1358 return new;
1361 static int writefd(struct connection *conn, const void *data, unsigned int len)
1363 return write(conn->fd, data, len);
1366 static int readfd(struct connection *conn, void *data, unsigned int len)
1368 return read(conn->fd, data, len);
1371 static void accept_connection(int sock, bool canwrite)
1373 int fd;
1374 struct connection *conn;
1376 fd = accept(sock, NULL, NULL);
1377 if (fd < 0)
1378 return;
1380 conn = new_connection(writefd, readfd);
1381 if (conn) {
1382 conn->fd = fd;
1383 conn->can_write = canwrite;
1384 } else
1385 close(fd);
1388 #ifdef TESTING
1389 /* Valgrind can check our writes better if we don't use mmap */
1390 #define TDB_FLAGS TDB_NOMMAP
1391 /* Useful for running under debugger. */
1392 void dump_connection(void)
1394 struct connection *i;
1396 list_for_each_entry(i, &connections, list) {
1397 printf("Connection %p:\n", i);
1398 printf(" state = %s\n",
1399 list_empty(&i->out_list) ? "OK" : "BUSY");
1400 if (i->id)
1401 printf(" id = %i\n", i->id);
1402 if (!i->in->inhdr || i->in->used)
1403 printf(" got %i bytes of %s\n",
1404 i->in->used, i->in->inhdr ? "header" : "data");
1405 #if 0
1406 if (i->out)
1407 printf(" sending message %s (%s) out\n",
1408 sockmsg_string(i->out->hdr.msg.type),
1409 i->out->buffer);
1410 if (i->transaction)
1411 dump_transaction(i);
1412 if (i->domain)
1413 dump_domain(i);
1414 #endif
1415 dump_watches(i);
1418 #else
1419 #define TDB_FLAGS 0
1420 #endif
1422 /* We create initial nodes manually. */
1423 static void manual_node(const char *name, const char *child)
1425 struct node *node;
1426 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1428 node = talloc_zero(NULL, struct node);
1429 node->name = name;
1430 node->perms = &perms;
1431 node->num_perms = 1;
1432 node->children = (char *)child;
1433 if (child)
1434 node->childlen = strlen(child) + 1;
1436 if (!write_node(NULL, node))
1437 barf_perror("Could not create initial node %s", name);
1438 talloc_free(node);
1441 static void setup_structure(void)
1443 char *tdbname;
1444 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1445 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1447 if (tdb_ctx) {
1448 /* XXX When we make xenstored able to restart, this will have
1449 to become cleverer, checking for existing domains and not
1450 removing the corresponding entries, but for now xenstored
1451 cannot be restarted without losing all the registered
1452 watches, which breaks all the backend drivers anyway. We
1453 can therefore get away with just clearing /local and
1454 expecting Xend to put the appropriate entries back in.
1456 When this change is made it is important to note that
1457 dom0's entries must be cleaned up on reboot _before_ this
1458 daemon starts, otherwise the backend drivers and dom0's
1459 balloon driver will pick up stale entries. In the case of
1460 the balloon driver, this can be fatal.
1461 */
1462 char *tlocal = talloc_strdup(NULL, "/local");
1464 check_store();
1466 if (remove_local) {
1467 internal_rm("/local");
1468 create_node(NULL, tlocal, NULL, 0);
1470 check_store();
1473 talloc_free(tlocal);
1475 else {
1476 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1477 0640);
1478 if (!tdb_ctx)
1479 barf_perror("Could not create tdb file %s", tdbname);
1481 manual_node("/", "tool");
1482 manual_node("/tool", "xenstored");
1483 manual_node("/tool/xenstored", NULL);
1485 check_store();
1490 static unsigned int hash_from_key_fn(void *k)
1492 char *str = k;
1493 unsigned int hash = 5381;
1494 char c;
1496 while ((c = *str++))
1497 hash = ((hash << 5) + hash) + (unsigned int)c;
1499 return hash;
1503 static int keys_equal_fn(void *key1, void *key2)
1505 return 0 == strcmp((char *)key1, (char *)key2);
1509 static char *child_name(const char *s1, const char *s2)
1511 if (strcmp(s1, "/")) {
1512 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1514 else {
1515 return talloc_asprintf(NULL, "/%s", s2);
1520 static void remember_string(struct hashtable *hash, const char *str)
1522 char *k = malloc(strlen(str) + 1);
1523 strcpy(k, str);
1524 hashtable_insert(hash, k, (void *)1);
1528 /**
1529 * A node has a children field that names the children of the node, separated
1530 * by NULs. We check whether there are entries in there that are duplicated
1531 * (and if so, delete the second one), and whether there are any that do not
1532 * have a corresponding child node (and if so, delete them). Each valid child
1533 * is then recursively checked.
1535 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1536 * passed on the command line).
1538 * As we go, we record each node in the given reachable hashtable. These
1539 * entries will be used later in clean_store.
1540 */
1541 static void check_store_(const char *name, struct hashtable *reachable)
1543 struct node *node = read_node(NULL, name);
1545 if (node) {
1546 size_t i = 0;
1548 struct hashtable * children =
1549 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1551 remember_string(reachable, name);
1553 while (i < node->childlen) {
1554 size_t childlen = strlen(node->children + i);
1555 char * childname = child_name(node->name,
1556 node->children + i);
1557 struct node *childnode = read_node(NULL, childname);
1559 if (childnode) {
1560 if (hashtable_search(children, childname)) {
1561 log("check_store: '%s' is duplicated!",
1562 childname);
1564 if (recovery) {
1565 remove_child_entry(NULL, node,
1566 i);
1567 i -= childlen + 1;
1570 else {
1571 remember_string(children, childname);
1572 check_store_(childname, reachable);
1575 else {
1576 log("check_store: No child '%s' found!\n",
1577 childname);
1579 if (recovery) {
1580 remove_child_entry(NULL, node, i);
1581 i -= childlen + 1;
1585 talloc_free(childnode);
1586 talloc_free(childname);
1587 i += childlen + 1;
1590 hashtable_destroy(children, 0 /* Don't free values (they are
1591 all (void *)1) */);
1592 talloc_free(node);
1594 else {
1595 /* Impossible, because no database should ever be without the
1596 root, and otherwise, we've just checked in our caller
1597 (which made a recursive call to get here). */
1599 log("check_store: No child '%s' found: impossible!", name);
1604 /**
1605 * Helper to clean_store below.
1606 */
1607 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1608 void *private)
1610 struct hashtable *reachable = private;
1611 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1613 if (!hashtable_search(reachable, name)) {
1614 log("clean_store: '%s' is orphaned!", name);
1615 if (recovery) {
1616 tdb_delete(tdb, key);
1620 talloc_free(name);
1622 return 0;
1626 /**
1627 * Given the list of reachable nodes, iterate over the whole store, and
1628 * remove any that were not reached.
1629 */
1630 static void clean_store(struct hashtable *reachable)
1632 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1636 static void check_store(void)
1638 char * root = talloc_strdup(NULL, "/");
1639 struct hashtable * reachable =
1640 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1642 log("Checking store ...");
1643 check_store_(root, reachable);
1644 clean_store(reachable);
1645 log("Checking store complete.");
1647 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1648 (void *)1) */);
1649 talloc_free(root);
1653 /* Something is horribly wrong: check the store. */
1654 static void corrupt(struct connection *conn, const char *fmt, ...)
1656 va_list arglist;
1657 char *str;
1658 int saved_errno = errno;
1660 va_start(arglist, fmt);
1661 str = talloc_vasprintf(NULL, fmt, arglist);
1662 va_end(arglist);
1664 log("corruption detected by connection %i: err %s: %s",
1665 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1667 #ifdef TESTING
1668 /* Allow them to attach debugger. */
1669 sleep(30);
1670 #endif
1671 check_store();
1675 static void write_pidfile(const char *pidfile)
1677 char buf[100];
1678 int len;
1679 int fd;
1681 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1682 if (fd == -1)
1683 barf_perror("Opening pid file %s", pidfile);
1685 /* We exit silently if daemon already running. */
1686 if (lockf(fd, F_TLOCK, 0) == -1)
1687 exit(0);
1689 len = sprintf(buf, "%d\n", getpid());
1690 if (write(fd, buf, len) != len)
1691 barf_perror("Writing pid file %s", pidfile);
1694 /* Stevens. */
1695 static void daemonize(void)
1697 pid_t pid;
1699 /* Separate from our parent via fork, so init inherits us. */
1700 if ((pid = fork()) < 0)
1701 barf_perror("Failed to fork daemon");
1702 if (pid != 0)
1703 exit(0);
1705 /* Session leader so ^C doesn't whack us. */
1706 setsid();
1708 /* Let session leader exit so child cannot regain CTTY */
1709 if ((pid = fork()) < 0)
1710 barf_perror("Failed to fork daemon");
1711 if (pid != 0)
1712 exit(0);
1714 #ifndef TESTING /* Relative paths for socket names */
1715 /* Move off any mount points we might be in. */
1716 if (chdir("/") == -1)
1717 barf_perror("Failed to chdir");
1718 #endif
1719 /* Discard our parent's old-fashioned umask prejudices. */
1720 umask(0);
1724 static void usage(void)
1726 fprintf(stderr,
1727 "Usage:\n"
1728 "\n"
1729 " xenstored <options>\n"
1730 "\n"
1731 "where options may include:\n"
1732 "\n"
1733 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1734 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1735 " --help to output this message,\n"
1736 " --no-fork to request that the daemon does not fork,\n"
1737 " --output-pid to request that the pid of the daemon is output,\n"
1738 " --trace-file <file> giving the file for logging, and\n"
1739 " --entry-nb <nb> limit the number of entries per domain,\n"
1740 " --entry-size <size> limit the size of entry per domain, and\n"
1741 " --entry-watch <nb> limit the number of watches per domain,\n"
1742 " --no-recovery to request that no recovery should be attempted when\n"
1743 " the store is corrupted (debug only),\n"
1744 " --preserve-local to request that /local is preserved on start-up,\n"
1745 " --verbose to request verbose execution.\n");
1749 static struct option options[] = {
1750 { "no-domain-init", 0, NULL, 'D' },
1751 { "entry-nb", 1, NULL, 'E' },
1752 { "pid-file", 1, NULL, 'F' },
1753 { "help", 0, NULL, 'H' },
1754 { "no-fork", 0, NULL, 'N' },
1755 { "output-pid", 0, NULL, 'P' },
1756 { "entry-size", 1, NULL, 'S' },
1757 { "trace-file", 1, NULL, 'T' },
1758 { "no-recovery", 0, NULL, 'R' },
1759 { "preserve-local", 0, NULL, 'L' },
1760 { "verbose", 0, NULL, 'V' },
1761 { "watch-nb", 1, NULL, 'W' },
1762 { NULL, 0, NULL, 0 } };
1764 extern void dump_conn(struct connection *conn);
1766 int main(int argc, char *argv[])
1768 int opt, *sock, *ro_sock, max;
1769 struct sockaddr_un addr;
1770 fd_set inset, outset;
1771 bool dofork = true;
1772 bool outputpid = false;
1773 bool no_domain_init = false;
1774 const char *pidfile = NULL;
1775 int evtchn_fd = -1;
1777 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options,
1778 NULL)) != -1) {
1779 switch (opt) {
1780 case 'D':
1781 no_domain_init = true;
1782 break;
1783 case 'E':
1784 quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1785 break;
1786 case 'F':
1787 pidfile = optarg;
1788 break;
1789 case 'H':
1790 usage();
1791 return 0;
1792 case 'N':
1793 dofork = false;
1794 break;
1795 case 'P':
1796 outputpid = true;
1797 break;
1798 case 'R':
1799 recovery = false;
1800 break;
1801 case 'L':
1802 remove_local = false;
1803 break;
1804 case 'S':
1805 quota_max_entry_size = strtol(optarg, NULL, 10);
1806 break;
1807 case 'T':
1808 tracefile = optarg;
1809 break;
1810 case 'V':
1811 verbose = true;
1812 break;
1813 case 'W':
1814 quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1815 break;
1818 if (optind != argc)
1819 barf("%s: No arguments desired", argv[0]);
1821 reopen_log();
1823 /* make sure xenstored directory exists */
1824 if (mkdir(xs_daemon_rundir(), 0755)) {
1825 if (errno != EEXIST) {
1826 perror("error: mkdir daemon rundir");
1827 exit(-1);
1831 if (mkdir(xs_daemon_rootdir(), 0755)) {
1832 if (errno != EEXIST) {
1833 perror("error: mkdir daemon rootdir");
1834 exit(-1);
1838 if (dofork) {
1839 openlog("xenstored", 0, LOG_DAEMON);
1840 daemonize();
1842 if (pidfile)
1843 write_pidfile(pidfile);
1845 talloc_enable_leak_report_full();
1847 /* Create sockets for them to listen to. */
1848 sock = talloc(talloc_autofree_context(), int);
1849 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1850 if (*sock < 0)
1851 barf_perror("Could not create socket");
1852 ro_sock = talloc(talloc_autofree_context(), int);
1853 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1854 if (*ro_sock < 0)
1855 barf_perror("Could not create socket");
1856 talloc_set_destructor(sock, destroy_fd);
1857 talloc_set_destructor(ro_sock, destroy_fd);
1859 /* Don't kill us with SIGPIPE. */
1860 signal(SIGPIPE, SIG_IGN);
1862 /* FIXME: Be more sophisticated, don't mug running daemon. */
1863 unlink(xs_daemon_socket());
1864 unlink(xs_daemon_socket_ro());
1866 addr.sun_family = AF_UNIX;
1867 strcpy(addr.sun_path, xs_daemon_socket());
1868 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1869 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1870 strcpy(addr.sun_path, xs_daemon_socket_ro());
1871 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1872 barf_perror("Could not bind socket to %s",
1873 xs_daemon_socket_ro());
1874 if (chmod(xs_daemon_socket(), 0600) != 0
1875 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1876 barf_perror("Could not chmod sockets");
1878 if (listen(*sock, 1) != 0
1879 || listen(*ro_sock, 1) != 0)
1880 barf_perror("Could not listen on sockets");
1882 if (pipe(reopen_log_pipe)) {
1883 barf_perror("pipe");
1886 /* Setup the database */
1887 setup_structure();
1889 /* Listen to hypervisor. */
1890 if (!no_domain_init)
1891 domain_init();
1893 /* Restore existing connections. */
1894 restore_existing_connections();
1896 if (outputpid) {
1897 printf("%i\n", getpid());
1898 fflush(stdout);
1901 /* close stdin/stdout now we're ready to accept connections */
1902 if (dofork) {
1903 close(STDIN_FILENO);
1904 close(STDOUT_FILENO);
1905 close(STDERR_FILENO);
1908 signal(SIGHUP, trigger_reopen_log);
1910 #ifdef TESTING
1911 signal(SIGUSR1, stop_failtest);
1912 #endif
1914 if (xce_handle != -1)
1915 evtchn_fd = xc_evtchn_fd(xce_handle);
1917 /* Get ready to listen to the tools. */
1918 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1920 /* Main loop. */
1921 /* FIXME: Rewrite so noone can starve. */
1922 for (;;) {
1923 struct connection *i;
1925 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1926 if (errno == EINTR)
1927 continue;
1928 barf_perror("Select failed");
1931 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1932 char c;
1933 if (read(reopen_log_pipe[0], &c, 1) != 1)
1934 barf_perror("read failed");
1935 reopen_log();
1938 if (FD_ISSET(*sock, &inset))
1939 accept_connection(*sock, true);
1941 if (FD_ISSET(*ro_sock, &inset))
1942 accept_connection(*ro_sock, false);
1944 if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
1945 handle_event();
1947 list_for_each_entry(i, &connections, list) {
1948 if (i->domain)
1949 continue;
1951 /* Operations can delete themselves or others
1952 * (xs_release): list is not safe after input,
1953 * so break. */
1954 if (FD_ISSET(i->fd, &inset)) {
1955 handle_input(i);
1956 break;
1958 if (FD_ISSET(i->fd, &outset)) {
1959 handle_output(i);
1960 break;
1964 /* Handle all possible I/O for domain connections. */
1965 more:
1966 list_for_each_entry(i, &connections, list) {
1967 if (!i->domain)
1968 continue;
1970 if (domain_can_read(i)) {
1971 handle_input(i);
1972 goto more;
1975 if (domain_can_write(i) && !list_empty(&i->out_list)) {
1976 handle_output(i);
1977 goto more;
1981 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1985 /*
1986 * Local variables:
1987 * c-file-style: "linux"
1988 * indent-tabs-mode: t
1989 * c-indent-level: 8
1990 * c-basic-offset: 8
1991 * tab-width: 8
1992 * End:
1993 */