ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 10878:c471b326b75e

Add a transaction_started field in xenstored connection structure instead of
browsing the list of transaction each time
Bump the default to 10, and make it configurable through the command line.

Signed-off-by: Vincent Hanquez <vincent@xensource.com>
author vhanquez@gwig.uk.xensource.com
date Mon Jul 31 09:30:36 2006 +0000 (2006-07-31)
parents 7fba181c8531
children 1e6d52d06fa4
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 #include "hashtable.h"
57 extern int xce_handle; /* in xenstored_domain.c */
59 static bool verbose = false;
60 LIST_HEAD(connections);
61 static int tracefd = -1;
62 static bool recovery = true;
63 static bool remove_local = true;
64 static int reopen_log_pipe[2];
65 static char *tracefile = NULL;
66 static TDB_CONTEXT *tdb_ctx;
68 static void corrupt(struct connection *conn, const char *fmt, ...);
69 static void check_store(void);
71 #define log(...) \
72 do { \
73 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
74 trace("%s\n", s); \
75 syslog(LOG_ERR, "%s", s); \
76 talloc_free(s); \
77 } while (0)
80 int quota_nb_entry_per_domain = 1000;
81 int quota_nb_watch_per_domain = 128;
82 int quota_max_entry_size = 2048; /* 2K */
83 int quota_max_transaction = 10;
85 #ifdef TESTING
86 static bool failtest = false;
88 /* We override talloc's malloc. */
89 void *test_malloc(size_t size)
90 {
91 /* 1 in 20 means only about 50% of connections establish. */
92 if (failtest && (random() % 32) == 0)
93 return NULL;
94 return malloc(size);
95 }
97 static void stop_failtest(int signum __attribute__((unused)))
98 {
99 failtest = false;
100 }
102 /* Need these before we #define away write_all/mkdir in testing.h */
103 bool test_write_all(int fd, void *contents, unsigned int len);
104 bool test_write_all(int fd, void *contents, unsigned int len)
105 {
106 if (failtest && (random() % 8) == 0) {
107 if (len)
108 len = random() % len;
109 write(fd, contents, len);
110 errno = ENOSPC;
111 return false;
112 }
113 return xs_write_all(fd, contents, len);
114 }
116 int test_mkdir(const char *dir, int perms);
117 int test_mkdir(const char *dir, int perms)
118 {
119 if (failtest && (random() % 8) == 0) {
120 errno = ENOSPC;
121 return -1;
122 }
123 return mkdir(dir, perms);
124 }
125 #endif /* TESTING */
127 #include "xenstored_test.h"
129 TDB_CONTEXT *tdb_context(struct connection *conn)
130 {
131 /* conn = NULL used in manual_node at setup. */
132 if (!conn || !conn->transaction)
133 return tdb_ctx;
134 return tdb_transaction_context(conn->transaction);
135 }
137 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
138 {
139 if (rename(newname, xs_daemon_tdb()) != 0)
140 return false;
141 tdb_close(tdb_ctx);
142 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
143 return true;
144 }
146 static char *sockmsg_string(enum xsd_sockmsg_type type)
147 {
148 switch (type) {
149 case XS_DEBUG: return "DEBUG";
150 case XS_DIRECTORY: return "DIRECTORY";
151 case XS_READ: return "READ";
152 case XS_GET_PERMS: return "GET_PERMS";
153 case XS_WATCH: return "WATCH";
154 case XS_UNWATCH: return "UNWATCH";
155 case XS_TRANSACTION_START: return "TRANSACTION_START";
156 case XS_TRANSACTION_END: return "TRANSACTION_END";
157 case XS_INTRODUCE: return "INTRODUCE";
158 case XS_RELEASE: return "RELEASE";
159 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
160 case XS_WRITE: return "WRITE";
161 case XS_MKDIR: return "MKDIR";
162 case XS_RM: return "RM";
163 case XS_SET_PERMS: return "SET_PERMS";
164 case XS_WATCH_EVENT: return "WATCH_EVENT";
165 case XS_ERROR: return "ERROR";
166 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
167 default:
168 return "**UNKNOWN**";
169 }
170 }
172 void trace(const char *fmt, ...)
173 {
174 va_list arglist;
175 char *str;
176 char sbuf[1024];
177 int ret, dummy;
179 if (tracefd < 0)
180 return;
182 /* try to use a static buffer */
183 va_start(arglist, fmt);
184 ret = vsnprintf(sbuf, 1024, fmt, arglist);
185 va_end(arglist);
187 if (ret <= 1024) {
188 dummy = write(tracefd, sbuf, ret);
189 return;
190 }
192 /* fail back to dynamic allocation */
193 va_start(arglist, fmt);
194 str = talloc_vasprintf(NULL, fmt, arglist);
195 va_end(arglist);
196 dummy = write(tracefd, str, strlen(str));
197 talloc_free(str);
198 }
200 static void trace_io(const struct connection *conn,
201 const char *prefix,
202 const struct buffered_data *data)
203 {
204 unsigned int i;
205 time_t now;
206 struct tm *tm;
208 if (tracefd < 0)
209 return;
211 now = time(NULL);
212 tm = localtime(&now);
214 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
215 tm->tm_year + 1900, tm->tm_mon + 1,
216 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
217 sockmsg_string(data->hdr.msg.type));
219 for (i = 0; i < data->hdr.msg.len; i++)
220 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
221 trace(")\n");
222 }
224 void trace_create(const void *data, const char *type)
225 {
226 trace("CREATE %s %p\n", type, data);
227 }
229 void trace_destroy(const void *data, const char *type)
230 {
231 trace("DESTROY %s %p\n", type, data);
232 }
234 /**
235 * Signal handler for SIGHUP, which requests that the trace log is reopened
236 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
237 * the select() in the main loop.
238 */
239 static void trigger_reopen_log(int signal __attribute__((unused)))
240 {
241 char c = 'A';
242 int dummy;
243 dummy = write(reopen_log_pipe[1], &c, 1);
244 }
247 static void reopen_log(void)
248 {
249 if (tracefile) {
250 if (tracefd > 0)
251 close(tracefd);
253 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
255 if (tracefd < 0)
256 perror("Could not open tracefile");
257 else
258 trace("\n***\n");
259 }
260 }
263 static bool write_messages(struct connection *conn)
264 {
265 int ret;
266 struct buffered_data *out;
268 out = list_top(&conn->out_list, struct buffered_data, list);
269 if (out == NULL)
270 return true;
272 if (out->inhdr) {
273 if (verbose)
274 xprintf("Writing msg %s (%.*s) out to %p\n",
275 sockmsg_string(out->hdr.msg.type),
276 out->hdr.msg.len,
277 out->buffer, conn);
278 ret = conn->write(conn, out->hdr.raw + out->used,
279 sizeof(out->hdr) - out->used);
280 if (ret < 0)
281 return false;
283 out->used += ret;
284 if (out->used < sizeof(out->hdr))
285 return true;
287 out->inhdr = false;
288 out->used = 0;
290 /* Second write might block if non-zero. */
291 if (out->hdr.msg.len && !conn->domain)
292 return true;
293 }
295 ret = conn->write(conn, out->buffer + out->used,
296 out->hdr.msg.len - out->used);
297 if (ret < 0)
298 return false;
300 out->used += ret;
301 if (out->used != out->hdr.msg.len)
302 return true;
304 trace_io(conn, "OUT", out);
306 list_del(&out->list);
307 talloc_free(out);
309 return true;
310 }
312 static int destroy_conn(void *_conn)
313 {
314 struct connection *conn = _conn;
316 /* Flush outgoing if possible, but don't block. */
317 if (!conn->domain) {
318 fd_set set;
319 struct timeval none;
321 FD_ZERO(&set);
322 FD_SET(conn->fd, &set);
323 none.tv_sec = none.tv_usec = 0;
325 while (!list_empty(&conn->out_list)
326 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
327 if (!write_messages(conn))
328 break;
329 close(conn->fd);
330 }
331 list_del(&conn->list);
332 trace_destroy(conn, "connection");
333 return 0;
334 }
337 static void set_fd(int fd, fd_set *set, int *max)
338 {
339 if (fd < 0)
340 return;
341 FD_SET(fd, set);
342 if (fd > *max)
343 *max = fd;
344 }
347 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
348 {
349 struct connection *i;
350 int max = -1;
352 FD_ZERO(inset);
353 FD_ZERO(outset);
355 set_fd(sock, inset, &max);
356 set_fd(ro_sock, inset, &max);
357 set_fd(reopen_log_pipe[0], inset, &max);
359 if (xce_handle != -1)
360 set_fd(xc_evtchn_fd(xce_handle), inset, &max);
362 list_for_each_entry(i, &connections, list) {
363 if (i->domain)
364 continue;
365 set_fd(i->fd, inset, &max);
366 if (!list_empty(&i->out_list))
367 FD_SET(i->fd, outset);
368 }
369 return max;
370 }
372 static int destroy_fd(void *_fd)
373 {
374 int *fd = _fd;
375 close(*fd);
376 return 0;
377 }
379 /* Return a pointer to an fd, self-closing and attached to this pathname. */
380 int *talloc_open(const char *pathname, int flags, int mode)
381 {
382 int *fd;
384 fd = talloc(pathname, int);
385 *fd = open(pathname, flags, mode);
386 if (*fd < 0) {
387 int saved_errno = errno;
388 talloc_free(fd);
389 errno = saved_errno;
390 return NULL;
391 }
392 talloc_set_destructor(fd, destroy_fd);
393 return fd;
394 }
396 /* Is child a subnode of parent, or equal? */
397 bool is_child(const char *child, const char *parent)
398 {
399 unsigned int len = strlen(parent);
401 /* / should really be "" for this algorithm to work, but that's a
402 * usability nightmare. */
403 if (streq(parent, "/"))
404 return true;
406 if (strncmp(child, parent, len) != 0)
407 return false;
409 return child[len] == '/' || child[len] == '\0';
410 }
412 /* If it fails, returns NULL and sets errno. */
413 static struct node *read_node(struct connection *conn, const char *name)
414 {
415 TDB_DATA key, data;
416 uint32_t *p;
417 struct node *node;
418 TDB_CONTEXT * context = tdb_context(conn);
420 key.dptr = (void *)name;
421 key.dsize = strlen(name);
422 data = tdb_fetch(context, key);
424 if (data.dptr == NULL) {
425 if (tdb_error(context) == TDB_ERR_NOEXIST)
426 errno = ENOENT;
427 else {
428 log("TDB error on read: %s", tdb_errorstr(context));
429 errno = EIO;
430 }
431 return NULL;
432 }
434 node = talloc(name, struct node);
435 node->name = talloc_strdup(node, name);
436 node->parent = NULL;
437 node->tdb = tdb_context(conn);
438 talloc_steal(node, data.dptr);
440 /* Datalen, childlen, number of permissions */
441 p = (uint32_t *)data.dptr;
442 node->num_perms = p[0];
443 node->datalen = p[1];
444 node->childlen = p[2];
446 /* Permissions are struct xs_permissions. */
447 node->perms = (void *)&p[3];
448 /* Data is binary blob (usually ascii, no nul). */
449 node->data = node->perms + node->num_perms;
450 /* Children is strings, nul separated. */
451 node->children = node->data + node->datalen;
453 return node;
454 }
456 static bool write_node(struct connection *conn, const struct node *node)
457 {
458 /*
459 * conn will be null when this is called from manual_node.
460 * tdb_context copes with this.
461 */
463 TDB_DATA key, data;
464 void *p;
466 key.dptr = (void *)node->name;
467 key.dsize = strlen(node->name);
469 data.dsize = 3*sizeof(uint32_t)
470 + node->num_perms*sizeof(node->perms[0])
471 + node->datalen + node->childlen;
473 if (domain_is_unprivileged(conn) && data.dsize >= quota_max_entry_size)
474 goto error;
476 data.dptr = talloc_size(node, data.dsize);
477 ((uint32_t *)data.dptr)[0] = node->num_perms;
478 ((uint32_t *)data.dptr)[1] = node->datalen;
479 ((uint32_t *)data.dptr)[2] = node->childlen;
480 p = data.dptr + 3 * sizeof(uint32_t);
482 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
483 p += node->num_perms*sizeof(node->perms[0]);
484 memcpy(p, node->data, node->datalen);
485 p += node->datalen;
486 memcpy(p, node->children, node->childlen);
488 /* TDB should set errno, but doesn't even set ecode AFAICT. */
489 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
490 corrupt(conn, "Write of %s failed", key.dptr);
491 goto error;
492 }
493 return true;
494 error:
495 errno = ENOSPC;
496 return false;
497 }
499 static enum xs_perm_type perm_for_conn(struct connection *conn,
500 struct xs_permissions *perms,
501 unsigned int num)
502 {
503 unsigned int i;
504 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
506 if (!conn->can_write)
507 mask &= ~XS_PERM_WRITE;
509 /* Owners and tools get it all... */
510 if (!conn->id || perms[0].id == conn->id)
511 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
513 for (i = 1; i < num; i++)
514 if (perms[i].id == conn->id)
515 return perms[i].perms & mask;
517 return perms[0].perms & mask;
518 }
520 static char *get_parent(const char *node)
521 {
522 char *slash = strrchr(node + 1, '/');
523 if (!slash)
524 return talloc_strdup(node, "/");
525 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
526 }
528 /* What do parents say? */
529 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
530 {
531 struct node *node;
533 do {
534 name = get_parent(name);
535 node = read_node(conn, name);
536 if (node)
537 break;
538 } while (!streq(name, "/"));
540 /* No permission at root? We're in trouble. */
541 if (!node)
542 corrupt(conn, "No permissions file at root");
544 return perm_for_conn(conn, node->perms, node->num_perms);
545 }
547 /* We have a weird permissions system. You can allow someone into a
548 * specific node without allowing it in the parents. If it's going to
549 * fail, however, we don't want the errno to indicate any information
550 * about the node. */
551 static int errno_from_parents(struct connection *conn, const char *node,
552 int errnum, enum xs_perm_type perm)
553 {
554 /* We always tell them about memory failures. */
555 if (errnum == ENOMEM)
556 return errnum;
558 if (ask_parents(conn, node) & perm)
559 return errnum;
560 return EACCES;
561 }
563 /* If it fails, returns NULL and sets errno. */
564 struct node *get_node(struct connection *conn,
565 const char *name,
566 enum xs_perm_type perm)
567 {
568 struct node *node;
570 if (!name || !is_valid_nodename(name)) {
571 errno = EINVAL;
572 return NULL;
573 }
574 node = read_node(conn, name);
575 /* If we don't have permission, we don't have node. */
576 if (node) {
577 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
578 != perm)
579 node = NULL;
580 }
581 /* Clean up errno if they weren't supposed to know. */
582 if (!node)
583 errno = errno_from_parents(conn, name, errno, perm);
584 return node;
585 }
587 static struct buffered_data *new_buffer(void *ctx)
588 {
589 struct buffered_data *data;
591 data = talloc_zero(ctx, struct buffered_data);
592 if (data == NULL)
593 return NULL;
595 data->inhdr = true;
596 return data;
597 }
599 /* Return length of string (including nul) at this offset. */
600 static unsigned int get_string(const struct buffered_data *data,
601 unsigned int offset)
602 {
603 const char *nul;
605 if (offset >= data->used)
606 return 0;
608 nul = memchr(data->buffer + offset, 0, data->used - offset);
609 if (!nul)
610 return 0;
612 return nul - (data->buffer + offset) + 1;
613 }
615 /* Break input into vectors, return the number, fill in up to num of them. */
616 unsigned int get_strings(struct buffered_data *data,
617 char *vec[], unsigned int num)
618 {
619 unsigned int off, i, len;
621 off = i = 0;
622 while ((len = get_string(data, off)) != 0) {
623 if (i < num)
624 vec[i] = data->buffer + off;
625 i++;
626 off += len;
627 }
628 return i;
629 }
631 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
632 const void *data, unsigned int len)
633 {
634 struct buffered_data *bdata;
636 /* Message is a child of the connection context for auto-cleanup. */
637 bdata = new_buffer(conn);
638 bdata->buffer = talloc_array(bdata, char, len);
640 /* Echo request header in reply unless this is an async watch event. */
641 if (type != XS_WATCH_EVENT) {
642 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
643 sizeof(struct xsd_sockmsg));
644 } else {
645 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
646 }
648 /* Update relevant header fields and fill in the message body. */
649 bdata->hdr.msg.type = type;
650 bdata->hdr.msg.len = len;
651 memcpy(bdata->buffer, data, len);
653 /* Queue for later transmission. */
654 list_add_tail(&bdata->list, &conn->out_list);
655 }
657 /* Some routines (write, mkdir, etc) just need a non-error return */
658 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
659 {
660 send_reply(conn, type, "OK", sizeof("OK"));
661 }
663 void send_error(struct connection *conn, int error)
664 {
665 unsigned int i;
667 for (i = 0; error != xsd_errors[i].errnum; i++) {
668 if (i == ARRAY_SIZE(xsd_errors) - 1) {
669 eprintf("xenstored: error %i untranslatable", error);
670 i = 0; /* EINVAL */
671 break;
672 }
673 }
674 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
675 strlen(xsd_errors[i].errstring) + 1);
676 }
678 static bool valid_chars(const char *node)
679 {
680 /* Nodes can have lots of crap. */
681 return (strspn(node,
682 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
683 "abcdefghijklmnopqrstuvwxyz"
684 "0123456789-/_@") == strlen(node));
685 }
687 bool is_valid_nodename(const char *node)
688 {
689 /* Must start in /. */
690 if (!strstarts(node, "/"))
691 return false;
693 /* Cannot end in / (unless it's just "/"). */
694 if (strends(node, "/") && !streq(node, "/"))
695 return false;
697 /* No double //. */
698 if (strstr(node, "//"))
699 return false;
701 return valid_chars(node);
702 }
704 /* We expect one arg in the input: return NULL otherwise. */
705 static const char *onearg(struct buffered_data *in)
706 {
707 if (!in->used || get_string(in, 0) != in->used)
708 return NULL;
709 return in->buffer;
710 }
712 static char *perms_to_strings(const void *ctx,
713 struct xs_permissions *perms, unsigned int num,
714 unsigned int *len)
715 {
716 unsigned int i;
717 char *strings = NULL;
718 char buffer[MAX_STRLEN(unsigned int) + 1];
720 for (*len = 0, i = 0; i < num; i++) {
721 if (!xs_perm_to_string(&perms[i], buffer))
722 return NULL;
724 strings = talloc_realloc(ctx, strings, char,
725 *len + strlen(buffer) + 1);
726 strcpy(strings + *len, buffer);
727 *len += strlen(buffer) + 1;
728 }
729 return strings;
730 }
732 char *canonicalize(struct connection *conn, const char *node)
733 {
734 const char *prefix;
736 if (!node || strstarts(node, "/"))
737 return (char *)node;
738 prefix = get_implicit_path(conn);
739 if (prefix)
740 return talloc_asprintf(node, "%s/%s", prefix, node);
741 return (char *)node;
742 }
744 bool check_event_node(const char *node)
745 {
746 if (!node || !strstarts(node, "@")) {
747 errno = EINVAL;
748 return false;
749 }
750 return true;
751 }
753 static void send_directory(struct connection *conn, const char *name)
754 {
755 struct node *node;
757 name = canonicalize(conn, name);
758 node = get_node(conn, name, XS_PERM_READ);
759 if (!node) {
760 send_error(conn, errno);
761 return;
762 }
764 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
765 }
767 static void do_read(struct connection *conn, const char *name)
768 {
769 struct node *node;
771 name = canonicalize(conn, name);
772 node = get_node(conn, name, XS_PERM_READ);
773 if (!node) {
774 send_error(conn, errno);
775 return;
776 }
778 send_reply(conn, XS_READ, node->data, node->datalen);
779 }
781 static void delete_node_single(struct connection *conn, struct node *node)
782 {
783 TDB_DATA key;
785 key.dptr = (void *)node->name;
786 key.dsize = strlen(node->name);
788 if (tdb_delete(tdb_context(conn), key) != 0) {
789 corrupt(conn, "Could not delete '%s'", node->name);
790 return;
791 }
792 domain_entry_dec(conn);
793 }
795 /* Must not be / */
796 static char *basename(const char *name)
797 {
798 return strrchr(name, '/') + 1;
799 }
801 static struct node *construct_node(struct connection *conn, const char *name)
802 {
803 const char *base;
804 unsigned int baselen;
805 struct node *parent, *node;
806 char *children, *parentname = get_parent(name);
808 /* If parent doesn't exist, create it. */
809 parent = read_node(conn, parentname);
810 if (!parent)
811 parent = construct_node(conn, parentname);
812 if (!parent)
813 return NULL;
815 if (domain_entry(conn) >= quota_nb_entry_per_domain)
816 return NULL;
818 /* Add child to parent. */
819 base = basename(name);
820 baselen = strlen(base) + 1;
821 children = talloc_array(name, char, parent->childlen + baselen);
822 memcpy(children, parent->children, parent->childlen);
823 memcpy(children + parent->childlen, base, baselen);
824 parent->children = children;
825 parent->childlen += baselen;
827 /* Allocate node */
828 node = talloc(name, struct node);
829 node->tdb = tdb_context(conn);
830 node->name = talloc_strdup(node, name);
832 /* Inherit permissions, except domains own what they create */
833 node->num_perms = parent->num_perms;
834 node->perms = talloc_memdup(node, parent->perms,
835 node->num_perms * sizeof(node->perms[0]));
836 if (conn && conn->id)
837 node->perms[0].id = conn->id;
839 /* No children, no data */
840 node->children = node->data = NULL;
841 node->childlen = node->datalen = 0;
842 node->parent = parent;
843 domain_entry_inc(conn);
844 return node;
845 }
847 static int destroy_node(void *_node)
848 {
849 struct node *node = _node;
850 TDB_DATA key;
852 if (streq(node->name, "/"))
853 corrupt(NULL, "Destroying root node!");
855 key.dptr = (void *)node->name;
856 key.dsize = strlen(node->name);
858 tdb_delete(node->tdb, key);
859 return 0;
860 }
862 static struct node *create_node(struct connection *conn,
863 const char *name,
864 void *data, unsigned int datalen)
865 {
866 struct node *node, *i;
868 node = construct_node(conn, name);
869 if (!node)
870 return NULL;
872 node->data = data;
873 node->datalen = datalen;
875 /* We write out the nodes down, setting destructor in case
876 * something goes wrong. */
877 for (i = node; i; i = i->parent) {
878 if (!write_node(conn, i)) {
879 domain_entry_dec(conn);
880 return NULL;
881 }
882 talloc_set_destructor(i, destroy_node);
883 }
885 /* OK, now remove destructors so they stay around */
886 for (i = node; i; i = i->parent)
887 talloc_set_destructor(i, NULL);
888 return node;
889 }
891 /* path, data... */
892 static void do_write(struct connection *conn, struct buffered_data *in)
893 {
894 unsigned int offset, datalen;
895 struct node *node;
896 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
897 char *name;
899 /* Extra "strings" can be created by binary data. */
900 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
901 send_error(conn, EINVAL);
902 return;
903 }
905 offset = strlen(vec[0]) + 1;
906 datalen = in->used - offset;
908 name = canonicalize(conn, vec[0]);
909 node = get_node(conn, name, XS_PERM_WRITE);
910 if (!node) {
911 /* No permissions, invalid input? */
912 if (errno != ENOENT) {
913 send_error(conn, errno);
914 return;
915 }
916 node = create_node(conn, name, in->buffer + offset, datalen);
917 if (!node) {
918 send_error(conn, errno);
919 return;
920 }
921 } else {
922 node->data = in->buffer + offset;
923 node->datalen = datalen;
924 if (!write_node(conn, node)){
925 send_error(conn, errno);
926 return;
927 }
928 }
930 add_change_node(conn->transaction, name, false);
931 fire_watches(conn, name, false);
932 send_ack(conn, XS_WRITE);
933 }
935 static void do_mkdir(struct connection *conn, const char *name)
936 {
937 struct node *node;
939 name = canonicalize(conn, name);
940 node = get_node(conn, name, XS_PERM_WRITE);
942 /* If it already exists, fine. */
943 if (!node) {
944 /* No permissions? */
945 if (errno != ENOENT) {
946 send_error(conn, errno);
947 return;
948 }
949 node = create_node(conn, name, NULL, 0);
950 if (!node) {
951 send_error(conn, errno);
952 return;
953 }
954 add_change_node(conn->transaction, name, false);
955 fire_watches(conn, name, false);
956 }
957 send_ack(conn, XS_MKDIR);
958 }
960 static void delete_node(struct connection *conn, struct node *node)
961 {
962 unsigned int i;
964 /* Delete self, then delete children. If we crash, then the worst
965 that can happen is the children will continue to take up space, but
966 will otherwise be unreachable. */
967 delete_node_single(conn, node);
969 /* Delete children, too. */
970 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
971 struct node *child;
973 child = read_node(conn,
974 talloc_asprintf(node, "%s/%s", node->name,
975 node->children + i));
976 if (child) {
977 delete_node(conn, child);
978 }
979 else {
980 trace("delete_node: No child '%s/%s' found!\n",
981 node->name, node->children + i);
982 /* Skip it, we've already deleted the parent. */
983 }
984 }
985 }
988 /* Delete memory using memmove. */
989 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
990 {
991 memmove(mem + off, mem + off + len, total - off - len);
992 }
995 static bool remove_child_entry(struct connection *conn, struct node *node,
996 size_t offset)
997 {
998 size_t childlen = strlen(node->children + offset);
999 memdel(node->children, offset, childlen + 1, node->childlen);
1000 node->childlen -= childlen + 1;
1001 return write_node(conn, node);
1005 static bool delete_child(struct connection *conn,
1006 struct node *node, const char *childname)
1008 unsigned int i;
1010 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1011 if (streq(node->children+i, childname)) {
1012 return remove_child_entry(conn, node, i);
1015 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1016 return false;
1020 static int _rm(struct connection *conn, struct node *node, const char *name)
1022 /* Delete from parent first, then if we crash, the worst that can
1023 happen is the child will continue to take up space, but will
1024 otherwise be unreachable. */
1025 struct node *parent = read_node(conn, get_parent(name));
1026 if (!parent) {
1027 send_error(conn, EINVAL);
1028 return 0;
1031 if (!delete_child(conn, parent, basename(name))) {
1032 send_error(conn, EINVAL);
1033 return 0;
1036 delete_node(conn, node);
1037 return 1;
1041 static void internal_rm(const char *name)
1043 char *tname = talloc_strdup(NULL, name);
1044 struct node *node = read_node(NULL, tname);
1045 if (node)
1046 _rm(NULL, node, tname);
1047 talloc_free(node);
1048 talloc_free(tname);
1052 static void do_rm(struct connection *conn, const char *name)
1054 struct node *node;
1056 name = canonicalize(conn, name);
1057 node = get_node(conn, name, XS_PERM_WRITE);
1058 if (!node) {
1059 /* Didn't exist already? Fine, if parent exists. */
1060 if (errno == ENOENT) {
1061 node = read_node(conn, get_parent(name));
1062 if (node) {
1063 send_ack(conn, XS_RM);
1064 return;
1066 /* Restore errno, just in case. */
1067 errno = ENOENT;
1069 send_error(conn, errno);
1070 return;
1073 if (streq(name, "/")) {
1074 send_error(conn, EINVAL);
1075 return;
1078 if (_rm(conn, node, name)) {
1079 add_change_node(conn->transaction, name, true);
1080 fire_watches(conn, name, true);
1081 send_ack(conn, XS_RM);
1086 static void do_get_perms(struct connection *conn, const char *name)
1088 struct node *node;
1089 char *strings;
1090 unsigned int len;
1092 name = canonicalize(conn, name);
1093 node = get_node(conn, name, XS_PERM_READ);
1094 if (!node) {
1095 send_error(conn, errno);
1096 return;
1099 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1100 if (!strings)
1101 send_error(conn, errno);
1102 else
1103 send_reply(conn, XS_GET_PERMS, strings, len);
1106 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1108 unsigned int num;
1109 char *name, *permstr;
1110 struct node *node;
1112 num = xs_count_strings(in->buffer, in->used);
1113 if (num < 2) {
1114 send_error(conn, EINVAL);
1115 return;
1118 /* First arg is node name. */
1119 name = canonicalize(conn, in->buffer);
1120 permstr = in->buffer + strlen(in->buffer) + 1;
1121 num--;
1123 /* We must own node to do this (tools can do this too). */
1124 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1125 if (!node) {
1126 send_error(conn, errno);
1127 return;
1130 node->perms = talloc_array(node, struct xs_permissions, num);
1131 node->num_perms = num;
1132 if (!xs_strings_to_perms(node->perms, num, permstr)) {
1133 send_error(conn, errno);
1134 return;
1136 if (!write_node(conn, node)) {
1137 send_error(conn, errno);
1138 return;
1141 add_change_node(conn->transaction, name, false);
1142 fire_watches(conn, name, false);
1143 send_ack(conn, XS_SET_PERMS);
1146 static void do_debug(struct connection *conn, struct buffered_data *in)
1148 int num;
1150 num = xs_count_strings(in->buffer, in->used);
1152 if (streq(in->buffer, "print")) {
1153 if (num < 2) {
1154 send_error(conn, EINVAL);
1155 return;
1157 xprintf("debug: %s", in->buffer + get_string(in, 0));
1159 if (streq(in->buffer, "check"))
1160 check_store();
1161 #ifdef TESTING
1162 /* For testing, we allow them to set id. */
1163 if (streq(in->buffer, "setid")) {
1164 conn->id = atoi(in->buffer + get_string(in, 0));
1165 } else if (streq(in->buffer, "failtest")) {
1166 if (get_string(in, 0) < in->used)
1167 srandom(atoi(in->buffer + get_string(in, 0)));
1168 failtest = true;
1170 #endif /* TESTING */
1171 send_ack(conn, XS_DEBUG);
1174 /* Process "in" for conn: "in" will vanish after this conversation, so
1175 * we can talloc off it for temporary variables. May free "conn".
1176 */
1177 static void process_message(struct connection *conn, struct buffered_data *in)
1179 struct transaction *trans;
1181 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1182 if (IS_ERR(trans)) {
1183 send_error(conn, -PTR_ERR(trans));
1184 return;
1187 assert(conn->transaction == NULL);
1188 conn->transaction = trans;
1190 switch (in->hdr.msg.type) {
1191 case XS_DIRECTORY:
1192 send_directory(conn, onearg(in));
1193 break;
1195 case XS_READ:
1196 do_read(conn, onearg(in));
1197 break;
1199 case XS_WRITE:
1200 do_write(conn, in);
1201 break;
1203 case XS_MKDIR:
1204 do_mkdir(conn, onearg(in));
1205 break;
1207 case XS_RM:
1208 do_rm(conn, onearg(in));
1209 break;
1211 case XS_GET_PERMS:
1212 do_get_perms(conn, onearg(in));
1213 break;
1215 case XS_SET_PERMS:
1216 do_set_perms(conn, in);
1217 break;
1219 case XS_DEBUG:
1220 do_debug(conn, in);
1221 break;
1223 case XS_WATCH:
1224 do_watch(conn, in);
1225 break;
1227 case XS_UNWATCH:
1228 do_unwatch(conn, in);
1229 break;
1231 case XS_TRANSACTION_START:
1232 do_transaction_start(conn, in);
1233 break;
1235 case XS_TRANSACTION_END:
1236 do_transaction_end(conn, onearg(in));
1237 break;
1239 case XS_INTRODUCE:
1240 do_introduce(conn, in);
1241 break;
1243 case XS_IS_DOMAIN_INTRODUCED:
1244 do_is_domain_introduced(conn, onearg(in));
1245 break;
1247 case XS_RELEASE:
1248 do_release(conn, onearg(in));
1249 break;
1251 case XS_GET_DOMAIN_PATH:
1252 do_get_domain_path(conn, onearg(in));
1253 break;
1255 default:
1256 eprintf("Client unknown operation %i", in->hdr.msg.type);
1257 send_error(conn, ENOSYS);
1258 break;
1261 conn->transaction = NULL;
1264 static void consider_message(struct connection *conn)
1266 if (verbose)
1267 xprintf("Got message %s len %i from %p\n",
1268 sockmsg_string(conn->in->hdr.msg.type),
1269 conn->in->hdr.msg.len, conn);
1271 process_message(conn, conn->in);
1273 talloc_free(conn->in);
1274 conn->in = new_buffer(conn);
1277 /* Errors in reading or allocating here mean we get out of sync, so we
1278 * drop the whole client connection. */
1279 static void handle_input(struct connection *conn)
1281 int bytes;
1282 struct buffered_data *in = conn->in;
1284 /* Not finished header yet? */
1285 if (in->inhdr) {
1286 bytes = conn->read(conn, in->hdr.raw + in->used,
1287 sizeof(in->hdr) - in->used);
1288 if (bytes <= 0)
1289 goto bad_client;
1290 in->used += bytes;
1291 if (in->used != sizeof(in->hdr))
1292 return;
1294 if (in->hdr.msg.len > PATH_MAX) {
1295 #ifndef TESTING
1296 syslog(LOG_ERR, "Client tried to feed us %i",
1297 in->hdr.msg.len);
1298 #endif
1299 goto bad_client;
1302 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1303 if (!in->buffer)
1304 goto bad_client;
1305 in->used = 0;
1306 in->inhdr = false;
1307 return;
1310 bytes = conn->read(conn, in->buffer + in->used,
1311 in->hdr.msg.len - in->used);
1312 if (bytes < 0)
1313 goto bad_client;
1315 in->used += bytes;
1316 if (in->used != in->hdr.msg.len)
1317 return;
1319 trace_io(conn, "IN ", in);
1320 consider_message(conn);
1321 return;
1323 bad_client:
1324 /* Kill it. */
1325 talloc_free(conn);
1328 static void handle_output(struct connection *conn)
1330 if (!write_messages(conn))
1331 talloc_free(conn);
1334 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1336 struct connection *new;
1338 new = talloc_zero(talloc_autofree_context(), struct connection);
1339 if (!new)
1340 return NULL;
1342 new->fd = -1;
1343 new->write = write;
1344 new->read = read;
1345 new->can_write = true;
1346 new->transaction_started = 0;
1347 INIT_LIST_HEAD(&new->out_list);
1348 INIT_LIST_HEAD(&new->watches);
1349 INIT_LIST_HEAD(&new->transaction_list);
1351 new->in = new_buffer(new);
1352 if (new->in == NULL) {
1353 talloc_free(new);
1354 return NULL;
1357 list_add_tail(&new->list, &connections);
1358 talloc_set_destructor(new, destroy_conn);
1359 trace_create(new, "connection");
1360 return new;
1363 static int writefd(struct connection *conn, const void *data, unsigned int len)
1365 return write(conn->fd, data, len);
1368 static int readfd(struct connection *conn, void *data, unsigned int len)
1370 return read(conn->fd, data, len);
1373 static void accept_connection(int sock, bool canwrite)
1375 int fd;
1376 struct connection *conn;
1378 fd = accept(sock, NULL, NULL);
1379 if (fd < 0)
1380 return;
1382 conn = new_connection(writefd, readfd);
1383 if (conn) {
1384 conn->fd = fd;
1385 conn->can_write = canwrite;
1386 } else
1387 close(fd);
1390 #ifdef TESTING
1391 /* Valgrind can check our writes better if we don't use mmap */
1392 #define TDB_FLAGS TDB_NOMMAP
1393 /* Useful for running under debugger. */
1394 void dump_connection(void)
1396 struct connection *i;
1398 list_for_each_entry(i, &connections, list) {
1399 printf("Connection %p:\n", i);
1400 printf(" state = %s\n",
1401 list_empty(&i->out_list) ? "OK" : "BUSY");
1402 if (i->id)
1403 printf(" id = %i\n", i->id);
1404 if (!i->in->inhdr || i->in->used)
1405 printf(" got %i bytes of %s\n",
1406 i->in->used, i->in->inhdr ? "header" : "data");
1407 #if 0
1408 if (i->out)
1409 printf(" sending message %s (%s) out\n",
1410 sockmsg_string(i->out->hdr.msg.type),
1411 i->out->buffer);
1412 if (i->transaction)
1413 dump_transaction(i);
1414 if (i->domain)
1415 dump_domain(i);
1416 #endif
1417 dump_watches(i);
1420 #else
1421 #define TDB_FLAGS 0
1422 #endif
1424 /* We create initial nodes manually. */
1425 static void manual_node(const char *name, const char *child)
1427 struct node *node;
1428 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1430 node = talloc_zero(NULL, struct node);
1431 node->name = name;
1432 node->perms = &perms;
1433 node->num_perms = 1;
1434 node->children = (char *)child;
1435 if (child)
1436 node->childlen = strlen(child) + 1;
1438 if (!write_node(NULL, node))
1439 barf_perror("Could not create initial node %s", name);
1440 talloc_free(node);
1443 static void setup_structure(void)
1445 char *tdbname;
1446 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1447 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1449 if (tdb_ctx) {
1450 /* XXX When we make xenstored able to restart, this will have
1451 to become cleverer, checking for existing domains and not
1452 removing the corresponding entries, but for now xenstored
1453 cannot be restarted without losing all the registered
1454 watches, which breaks all the backend drivers anyway. We
1455 can therefore get away with just clearing /local and
1456 expecting Xend to put the appropriate entries back in.
1458 When this change is made it is important to note that
1459 dom0's entries must be cleaned up on reboot _before_ this
1460 daemon starts, otherwise the backend drivers and dom0's
1461 balloon driver will pick up stale entries. In the case of
1462 the balloon driver, this can be fatal.
1463 */
1464 char *tlocal = talloc_strdup(NULL, "/local");
1466 check_store();
1468 if (remove_local) {
1469 internal_rm("/local");
1470 create_node(NULL, tlocal, NULL, 0);
1472 check_store();
1475 talloc_free(tlocal);
1477 else {
1478 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1479 0640);
1480 if (!tdb_ctx)
1481 barf_perror("Could not create tdb file %s", tdbname);
1483 manual_node("/", "tool");
1484 manual_node("/tool", "xenstored");
1485 manual_node("/tool/xenstored", NULL);
1487 check_store();
1492 static unsigned int hash_from_key_fn(void *k)
1494 char *str = k;
1495 unsigned int hash = 5381;
1496 char c;
1498 while ((c = *str++))
1499 hash = ((hash << 5) + hash) + (unsigned int)c;
1501 return hash;
1505 static int keys_equal_fn(void *key1, void *key2)
1507 return 0 == strcmp((char *)key1, (char *)key2);
1511 static char *child_name(const char *s1, const char *s2)
1513 if (strcmp(s1, "/")) {
1514 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1516 else {
1517 return talloc_asprintf(NULL, "/%s", s2);
1522 static void remember_string(struct hashtable *hash, const char *str)
1524 char *k = malloc(strlen(str) + 1);
1525 strcpy(k, str);
1526 hashtable_insert(hash, k, (void *)1);
1530 /**
1531 * A node has a children field that names the children of the node, separated
1532 * by NULs. We check whether there are entries in there that are duplicated
1533 * (and if so, delete the second one), and whether there are any that do not
1534 * have a corresponding child node (and if so, delete them). Each valid child
1535 * is then recursively checked.
1537 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1538 * passed on the command line).
1540 * As we go, we record each node in the given reachable hashtable. These
1541 * entries will be used later in clean_store.
1542 */
1543 static void check_store_(const char *name, struct hashtable *reachable)
1545 struct node *node = read_node(NULL, name);
1547 if (node) {
1548 size_t i = 0;
1550 struct hashtable * children =
1551 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1553 remember_string(reachable, name);
1555 while (i < node->childlen) {
1556 size_t childlen = strlen(node->children + i);
1557 char * childname = child_name(node->name,
1558 node->children + i);
1559 struct node *childnode = read_node(NULL, childname);
1561 if (childnode) {
1562 if (hashtable_search(children, childname)) {
1563 log("check_store: '%s' is duplicated!",
1564 childname);
1566 if (recovery) {
1567 remove_child_entry(NULL, node,
1568 i);
1569 i -= childlen + 1;
1572 else {
1573 remember_string(children, childname);
1574 check_store_(childname, reachable);
1577 else {
1578 log("check_store: No child '%s' found!\n",
1579 childname);
1581 if (recovery) {
1582 remove_child_entry(NULL, node, i);
1583 i -= childlen + 1;
1587 talloc_free(childnode);
1588 talloc_free(childname);
1589 i += childlen + 1;
1592 hashtable_destroy(children, 0 /* Don't free values (they are
1593 all (void *)1) */);
1594 talloc_free(node);
1596 else {
1597 /* Impossible, because no database should ever be without the
1598 root, and otherwise, we've just checked in our caller
1599 (which made a recursive call to get here). */
1601 log("check_store: No child '%s' found: impossible!", name);
1606 /**
1607 * Helper to clean_store below.
1608 */
1609 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1610 void *private)
1612 struct hashtable *reachable = private;
1613 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1615 if (!hashtable_search(reachable, name)) {
1616 log("clean_store: '%s' is orphaned!", name);
1617 if (recovery) {
1618 tdb_delete(tdb, key);
1622 talloc_free(name);
1624 return 0;
1628 /**
1629 * Given the list of reachable nodes, iterate over the whole store, and
1630 * remove any that were not reached.
1631 */
1632 static void clean_store(struct hashtable *reachable)
1634 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1638 static void check_store(void)
1640 char * root = talloc_strdup(NULL, "/");
1641 struct hashtable * reachable =
1642 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1644 log("Checking store ...");
1645 check_store_(root, reachable);
1646 clean_store(reachable);
1647 log("Checking store complete.");
1649 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1650 (void *)1) */);
1651 talloc_free(root);
1655 /* Something is horribly wrong: check the store. */
1656 static void corrupt(struct connection *conn, const char *fmt, ...)
1658 va_list arglist;
1659 char *str;
1660 int saved_errno = errno;
1662 va_start(arglist, fmt);
1663 str = talloc_vasprintf(NULL, fmt, arglist);
1664 va_end(arglist);
1666 log("corruption detected by connection %i: err %s: %s",
1667 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1669 #ifdef TESTING
1670 /* Allow them to attach debugger. */
1671 sleep(30);
1672 #endif
1673 check_store();
1677 static void write_pidfile(const char *pidfile)
1679 char buf[100];
1680 int len;
1681 int fd;
1683 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1684 if (fd == -1)
1685 barf_perror("Opening pid file %s", pidfile);
1687 /* We exit silently if daemon already running. */
1688 if (lockf(fd, F_TLOCK, 0) == -1)
1689 exit(0);
1691 len = sprintf(buf, "%d\n", getpid());
1692 if (write(fd, buf, len) != len)
1693 barf_perror("Writing pid file %s", pidfile);
1696 /* Stevens. */
1697 static void daemonize(void)
1699 pid_t pid;
1701 /* Separate from our parent via fork, so init inherits us. */
1702 if ((pid = fork()) < 0)
1703 barf_perror("Failed to fork daemon");
1704 if (pid != 0)
1705 exit(0);
1707 /* Session leader so ^C doesn't whack us. */
1708 setsid();
1710 /* Let session leader exit so child cannot regain CTTY */
1711 if ((pid = fork()) < 0)
1712 barf_perror("Failed to fork daemon");
1713 if (pid != 0)
1714 exit(0);
1716 #ifndef TESTING /* Relative paths for socket names */
1717 /* Move off any mount points we might be in. */
1718 if (chdir("/") == -1)
1719 barf_perror("Failed to chdir");
1720 #endif
1721 /* Discard our parent's old-fashioned umask prejudices. */
1722 umask(0);
1726 static void usage(void)
1728 fprintf(stderr,
1729 "Usage:\n"
1730 "\n"
1731 " xenstored <options>\n"
1732 "\n"
1733 "where options may include:\n"
1734 "\n"
1735 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1736 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1737 " --help to output this message,\n"
1738 " --no-fork to request that the daemon does not fork,\n"
1739 " --output-pid to request that the pid of the daemon is output,\n"
1740 " --trace-file <file> giving the file for logging, and\n"
1741 " --entry-nb <nb> limit the number of entries per domain,\n"
1742 " --entry-size <size> limit the size of entry per domain, and\n"
1743 " --entry-watch <nb> limit the number of watches per domain,\n"
1744 " --transaction <nb> limit the number of transaction allowed per domain,\n"
1745 " --no-recovery to request that no recovery should be attempted when\n"
1746 " the store is corrupted (debug only),\n"
1747 " --preserve-local to request that /local is preserved on start-up,\n"
1748 " --verbose to request verbose execution.\n");
1752 static struct option options[] = {
1753 { "no-domain-init", 0, NULL, 'D' },
1754 { "entry-nb", 1, NULL, 'E' },
1755 { "pid-file", 1, NULL, 'F' },
1756 { "help", 0, NULL, 'H' },
1757 { "no-fork", 0, NULL, 'N' },
1758 { "output-pid", 0, NULL, 'P' },
1759 { "entry-size", 1, NULL, 'S' },
1760 { "trace-file", 1, NULL, 'T' },
1761 { "transaction", 1, NULL, 't' },
1762 { "no-recovery", 0, NULL, 'R' },
1763 { "preserve-local", 0, NULL, 'L' },
1764 { "verbose", 0, NULL, 'V' },
1765 { "watch-nb", 1, NULL, 'W' },
1766 { NULL, 0, NULL, 0 } };
1768 extern void dump_conn(struct connection *conn);
1770 int main(int argc, char *argv[])
1772 int opt, *sock, *ro_sock, max;
1773 struct sockaddr_un addr;
1774 fd_set inset, outset;
1775 bool dofork = true;
1776 bool outputpid = false;
1777 bool no_domain_init = false;
1778 const char *pidfile = NULL;
1779 int evtchn_fd = -1;
1781 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
1782 NULL)) != -1) {
1783 switch (opt) {
1784 case 'D':
1785 no_domain_init = true;
1786 break;
1787 case 'E':
1788 quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1789 break;
1790 case 'F':
1791 pidfile = optarg;
1792 break;
1793 case 'H':
1794 usage();
1795 return 0;
1796 case 'N':
1797 dofork = false;
1798 break;
1799 case 'P':
1800 outputpid = true;
1801 break;
1802 case 'R':
1803 recovery = false;
1804 break;
1805 case 'L':
1806 remove_local = false;
1807 break;
1808 case 'S':
1809 quota_max_entry_size = strtol(optarg, NULL, 10);
1810 break;
1811 case 't':
1812 quota_max_transaction = strtol(optarg, NULL, 10);
1813 break;
1814 case 'T':
1815 tracefile = optarg;
1816 break;
1817 case 'V':
1818 verbose = true;
1819 break;
1820 case 'W':
1821 quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1822 break;
1825 if (optind != argc)
1826 barf("%s: No arguments desired", argv[0]);
1828 reopen_log();
1830 /* make sure xenstored directory exists */
1831 if (mkdir(xs_daemon_rundir(), 0755)) {
1832 if (errno != EEXIST) {
1833 perror("error: mkdir daemon rundir");
1834 exit(-1);
1838 if (mkdir(xs_daemon_rootdir(), 0755)) {
1839 if (errno != EEXIST) {
1840 perror("error: mkdir daemon rootdir");
1841 exit(-1);
1845 if (dofork) {
1846 openlog("xenstored", 0, LOG_DAEMON);
1847 daemonize();
1849 if (pidfile)
1850 write_pidfile(pidfile);
1852 talloc_enable_leak_report_full();
1854 /* Create sockets for them to listen to. */
1855 sock = talloc(talloc_autofree_context(), int);
1856 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1857 if (*sock < 0)
1858 barf_perror("Could not create socket");
1859 ro_sock = talloc(talloc_autofree_context(), int);
1860 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1861 if (*ro_sock < 0)
1862 barf_perror("Could not create socket");
1863 talloc_set_destructor(sock, destroy_fd);
1864 talloc_set_destructor(ro_sock, destroy_fd);
1866 /* Don't kill us with SIGPIPE. */
1867 signal(SIGPIPE, SIG_IGN);
1869 /* FIXME: Be more sophisticated, don't mug running daemon. */
1870 unlink(xs_daemon_socket());
1871 unlink(xs_daemon_socket_ro());
1873 addr.sun_family = AF_UNIX;
1874 strcpy(addr.sun_path, xs_daemon_socket());
1875 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1876 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1877 strcpy(addr.sun_path, xs_daemon_socket_ro());
1878 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1879 barf_perror("Could not bind socket to %s",
1880 xs_daemon_socket_ro());
1881 if (chmod(xs_daemon_socket(), 0600) != 0
1882 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1883 barf_perror("Could not chmod sockets");
1885 if (listen(*sock, 1) != 0
1886 || listen(*ro_sock, 1) != 0)
1887 barf_perror("Could not listen on sockets");
1889 if (pipe(reopen_log_pipe)) {
1890 barf_perror("pipe");
1893 /* Setup the database */
1894 setup_structure();
1896 /* Listen to hypervisor. */
1897 if (!no_domain_init)
1898 domain_init();
1900 /* Restore existing connections. */
1901 restore_existing_connections();
1903 if (outputpid) {
1904 printf("%i\n", getpid());
1905 fflush(stdout);
1908 /* close stdin/stdout now we're ready to accept connections */
1909 if (dofork) {
1910 close(STDIN_FILENO);
1911 close(STDOUT_FILENO);
1912 close(STDERR_FILENO);
1915 signal(SIGHUP, trigger_reopen_log);
1917 #ifdef TESTING
1918 signal(SIGUSR1, stop_failtest);
1919 #endif
1921 if (xce_handle != -1)
1922 evtchn_fd = xc_evtchn_fd(xce_handle);
1924 /* Get ready to listen to the tools. */
1925 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1927 /* Main loop. */
1928 /* FIXME: Rewrite so noone can starve. */
1929 for (;;) {
1930 struct connection *i;
1932 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1933 if (errno == EINTR)
1934 continue;
1935 barf_perror("Select failed");
1938 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1939 char c;
1940 if (read(reopen_log_pipe[0], &c, 1) != 1)
1941 barf_perror("read failed");
1942 reopen_log();
1945 if (FD_ISSET(*sock, &inset))
1946 accept_connection(*sock, true);
1948 if (FD_ISSET(*ro_sock, &inset))
1949 accept_connection(*ro_sock, false);
1951 if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
1952 handle_event();
1954 list_for_each_entry(i, &connections, list) {
1955 if (i->domain)
1956 continue;
1958 /* Operations can delete themselves or others
1959 * (xs_release): list is not safe after input,
1960 * so break. */
1961 if (FD_ISSET(i->fd, &inset)) {
1962 handle_input(i);
1963 break;
1965 if (FD_ISSET(i->fd, &outset)) {
1966 handle_output(i);
1967 break;
1971 /* Handle all possible I/O for domain connections. */
1972 more:
1973 list_for_each_entry(i, &connections, list) {
1974 if (!i->domain)
1975 continue;
1977 if (domain_can_read(i)) {
1978 handle_input(i);
1979 goto more;
1982 if (domain_can_write(i) && !list_empty(&i->out_list)) {
1983 handle_output(i);
1984 goto more;
1988 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1992 /*
1993 * Local variables:
1994 * c-file-style: "linux"
1995 * indent-tabs-mode: t
1996 * c-indent-level: 8
1997 * c-basic-offset: 8
1998 * tab-width: 8
1999 * End:
2000 */