ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 14978:18844eff507a

Handle EOF on xenstored socket.
Suggested by Pavel Kankovsky <peak@argo.troja.mff.cuni.cz>.

Signed-off-by: Steven Hand <steven@xensource.com>
author Steven Hand <steven@xensource.com>
date Fri Apr 27 16:17:44 2007 +0100 (2007-04-27)
parents 9e44519ee9a2
children eb71f258e855
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 #include "hashtable.h"
57 extern int xce_handle; /* in xenstored_domain.c */
59 static bool verbose = false;
60 LIST_HEAD(connections);
61 static int tracefd = -1;
62 static bool recovery = true;
63 static bool remove_local = true;
64 static int reopen_log_pipe[2];
65 static char *tracefile = NULL;
66 static TDB_CONTEXT *tdb_ctx;
68 static void corrupt(struct connection *conn, const char *fmt, ...);
69 static void check_store(void);
71 #define log(...) \
72 do { \
73 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
74 trace("%s\n", s); \
75 syslog(LOG_ERR, "%s", s); \
76 talloc_free(s); \
77 } while (0)
80 int quota_nb_entry_per_domain = 1000;
81 int quota_nb_watch_per_domain = 128;
82 int quota_max_entry_size = 2048; /* 2K */
83 int quota_max_transaction = 10;
85 #ifdef TESTING
86 static bool failtest = false;
88 /* We override talloc's malloc. */
89 void *test_malloc(size_t size)
90 {
91 /* 1 in 20 means only about 50% of connections establish. */
92 if (failtest && (random() % 32) == 0)
93 return NULL;
94 return malloc(size);
95 }
97 static void stop_failtest(int signum __attribute__((unused)))
98 {
99 failtest = false;
100 }
102 /* Need these before we #define away write_all/mkdir in testing.h */
103 bool test_write_all(int fd, void *contents, unsigned int len);
104 bool test_write_all(int fd, void *contents, unsigned int len)
105 {
106 if (failtest && (random() % 8) == 0) {
107 if (len)
108 len = random() % len;
109 write(fd, contents, len);
110 errno = ENOSPC;
111 return false;
112 }
113 return xs_write_all(fd, contents, len);
114 }
116 int test_mkdir(const char *dir, int perms);
117 int test_mkdir(const char *dir, int perms)
118 {
119 if (failtest && (random() % 8) == 0) {
120 errno = ENOSPC;
121 return -1;
122 }
123 return mkdir(dir, perms);
124 }
125 #endif /* TESTING */
127 #include "xenstored_test.h"
129 TDB_CONTEXT *tdb_context(struct connection *conn)
130 {
131 /* conn = NULL used in manual_node at setup. */
132 if (!conn || !conn->transaction)
133 return tdb_ctx;
134 return tdb_transaction_context(conn->transaction);
135 }
137 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
138 {
139 if (rename(newname, xs_daemon_tdb()) != 0)
140 return false;
141 tdb_close(tdb_ctx);
142 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
143 return true;
144 }
146 static char *sockmsg_string(enum xsd_sockmsg_type type)
147 {
148 switch (type) {
149 case XS_DEBUG: return "DEBUG";
150 case XS_DIRECTORY: return "DIRECTORY";
151 case XS_READ: return "READ";
152 case XS_GET_PERMS: return "GET_PERMS";
153 case XS_WATCH: return "WATCH";
154 case XS_UNWATCH: return "UNWATCH";
155 case XS_TRANSACTION_START: return "TRANSACTION_START";
156 case XS_TRANSACTION_END: return "TRANSACTION_END";
157 case XS_INTRODUCE: return "INTRODUCE";
158 case XS_RELEASE: return "RELEASE";
159 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
160 case XS_WRITE: return "WRITE";
161 case XS_MKDIR: return "MKDIR";
162 case XS_RM: return "RM";
163 case XS_SET_PERMS: return "SET_PERMS";
164 case XS_WATCH_EVENT: return "WATCH_EVENT";
165 case XS_ERROR: return "ERROR";
166 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
167 case XS_RESUME: return "RESUME";
168 default:
169 return "**UNKNOWN**";
170 }
171 }
173 void trace(const char *fmt, ...)
174 {
175 va_list arglist;
176 char *str;
177 char sbuf[1024];
178 int ret, dummy;
180 if (tracefd < 0)
181 return;
183 /* try to use a static buffer */
184 va_start(arglist, fmt);
185 ret = vsnprintf(sbuf, 1024, fmt, arglist);
186 va_end(arglist);
188 if (ret <= 1024) {
189 dummy = write(tracefd, sbuf, ret);
190 return;
191 }
193 /* fail back to dynamic allocation */
194 va_start(arglist, fmt);
195 str = talloc_vasprintf(NULL, fmt, arglist);
196 va_end(arglist);
197 dummy = write(tracefd, str, strlen(str));
198 talloc_free(str);
199 }
201 static void trace_io(const struct connection *conn,
202 const char *prefix,
203 const struct buffered_data *data)
204 {
205 unsigned int i;
206 time_t now;
207 struct tm *tm;
209 if (tracefd < 0)
210 return;
212 now = time(NULL);
213 tm = localtime(&now);
215 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
216 tm->tm_year + 1900, tm->tm_mon + 1,
217 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
218 sockmsg_string(data->hdr.msg.type));
220 for (i = 0; i < data->hdr.msg.len; i++)
221 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
222 trace(")\n");
223 }
225 void trace_create(const void *data, const char *type)
226 {
227 trace("CREATE %s %p\n", type, data);
228 }
230 void trace_destroy(const void *data, const char *type)
231 {
232 trace("DESTROY %s %p\n", type, data);
233 }
235 /**
236 * Signal handler for SIGHUP, which requests that the trace log is reopened
237 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
238 * the select() in the main loop.
239 */
240 static void trigger_reopen_log(int signal __attribute__((unused)))
241 {
242 char c = 'A';
243 int dummy;
244 dummy = write(reopen_log_pipe[1], &c, 1);
245 }
248 static void reopen_log(void)
249 {
250 if (tracefile) {
251 if (tracefd > 0)
252 close(tracefd);
254 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
256 if (tracefd < 0)
257 perror("Could not open tracefile");
258 else
259 trace("\n***\n");
260 }
261 }
264 static bool write_messages(struct connection *conn)
265 {
266 int ret;
267 struct buffered_data *out;
269 out = list_top(&conn->out_list, struct buffered_data, list);
270 if (out == NULL)
271 return true;
273 if (out->inhdr) {
274 if (verbose)
275 xprintf("Writing msg %s (%.*s) out to %p\n",
276 sockmsg_string(out->hdr.msg.type),
277 out->hdr.msg.len,
278 out->buffer, conn);
279 ret = conn->write(conn, out->hdr.raw + out->used,
280 sizeof(out->hdr) - out->used);
281 if (ret < 0)
282 return false;
284 out->used += ret;
285 if (out->used < sizeof(out->hdr))
286 return true;
288 out->inhdr = false;
289 out->used = 0;
291 /* Second write might block if non-zero. */
292 if (out->hdr.msg.len && !conn->domain)
293 return true;
294 }
296 ret = conn->write(conn, out->buffer + out->used,
297 out->hdr.msg.len - out->used);
298 if (ret < 0)
299 return false;
301 out->used += ret;
302 if (out->used != out->hdr.msg.len)
303 return true;
305 trace_io(conn, "OUT", out);
307 list_del(&out->list);
308 talloc_free(out);
310 return true;
311 }
313 static int destroy_conn(void *_conn)
314 {
315 struct connection *conn = _conn;
317 /* Flush outgoing if possible, but don't block. */
318 if (!conn->domain) {
319 fd_set set;
320 struct timeval none;
322 FD_ZERO(&set);
323 FD_SET(conn->fd, &set);
324 none.tv_sec = none.tv_usec = 0;
326 while (!list_empty(&conn->out_list)
327 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
328 if (!write_messages(conn))
329 break;
330 close(conn->fd);
331 }
332 list_del(&conn->list);
333 trace_destroy(conn, "connection");
334 return 0;
335 }
338 static void set_fd(int fd, fd_set *set, int *max)
339 {
340 if (fd < 0)
341 return;
342 FD_SET(fd, set);
343 if (fd > *max)
344 *max = fd;
345 }
348 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
349 {
350 struct connection *i;
351 int max = -1;
353 FD_ZERO(inset);
354 FD_ZERO(outset);
356 set_fd(sock, inset, &max);
357 set_fd(ro_sock, inset, &max);
358 set_fd(reopen_log_pipe[0], inset, &max);
360 if (xce_handle != -1)
361 set_fd(xc_evtchn_fd(xce_handle), inset, &max);
363 list_for_each_entry(i, &connections, list) {
364 if (i->domain)
365 continue;
366 set_fd(i->fd, inset, &max);
367 if (!list_empty(&i->out_list))
368 FD_SET(i->fd, outset);
369 }
370 return max;
371 }
373 static int destroy_fd(void *_fd)
374 {
375 int *fd = _fd;
376 close(*fd);
377 return 0;
378 }
380 /* Return a pointer to an fd, self-closing and attached to this pathname. */
381 int *talloc_open(const char *pathname, int flags, int mode)
382 {
383 int *fd;
385 fd = talloc(pathname, int);
386 *fd = open(pathname, flags, mode);
387 if (*fd < 0) {
388 int saved_errno = errno;
389 talloc_free(fd);
390 errno = saved_errno;
391 return NULL;
392 }
393 talloc_set_destructor(fd, destroy_fd);
394 return fd;
395 }
397 /* Is child a subnode of parent, or equal? */
398 bool is_child(const char *child, const char *parent)
399 {
400 unsigned int len = strlen(parent);
402 /* / should really be "" for this algorithm to work, but that's a
403 * usability nightmare. */
404 if (streq(parent, "/"))
405 return true;
407 if (strncmp(child, parent, len) != 0)
408 return false;
410 return child[len] == '/' || child[len] == '\0';
411 }
413 /* If it fails, returns NULL and sets errno. */
414 static struct node *read_node(struct connection *conn, const char *name)
415 {
416 TDB_DATA key, data;
417 uint32_t *p;
418 struct node *node;
419 TDB_CONTEXT * context = tdb_context(conn);
421 key.dptr = (void *)name;
422 key.dsize = strlen(name);
423 data = tdb_fetch(context, key);
425 if (data.dptr == NULL) {
426 if (tdb_error(context) == TDB_ERR_NOEXIST)
427 errno = ENOENT;
428 else {
429 log("TDB error on read: %s", tdb_errorstr(context));
430 errno = EIO;
431 }
432 return NULL;
433 }
435 node = talloc(name, struct node);
436 node->name = talloc_strdup(node, name);
437 node->parent = NULL;
438 node->tdb = tdb_context(conn);
439 talloc_steal(node, data.dptr);
441 /* Datalen, childlen, number of permissions */
442 p = (uint32_t *)data.dptr;
443 node->num_perms = p[0];
444 node->datalen = p[1];
445 node->childlen = p[2];
447 /* Permissions are struct xs_permissions. */
448 node->perms = (void *)&p[3];
449 /* Data is binary blob (usually ascii, no nul). */
450 node->data = node->perms + node->num_perms;
451 /* Children is strings, nul separated. */
452 node->children = node->data + node->datalen;
454 return node;
455 }
457 static bool write_node(struct connection *conn, const struct node *node)
458 {
459 /*
460 * conn will be null when this is called from manual_node.
461 * tdb_context copes with this.
462 */
464 TDB_DATA key, data;
465 void *p;
467 key.dptr = (void *)node->name;
468 key.dsize = strlen(node->name);
470 data.dsize = 3*sizeof(uint32_t)
471 + node->num_perms*sizeof(node->perms[0])
472 + node->datalen + node->childlen;
474 if (domain_is_unprivileged(conn) && data.dsize >= quota_max_entry_size)
475 goto error;
477 data.dptr = talloc_size(node, data.dsize);
478 ((uint32_t *)data.dptr)[0] = node->num_perms;
479 ((uint32_t *)data.dptr)[1] = node->datalen;
480 ((uint32_t *)data.dptr)[2] = node->childlen;
481 p = data.dptr + 3 * sizeof(uint32_t);
483 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
484 p += node->num_perms*sizeof(node->perms[0]);
485 memcpy(p, node->data, node->datalen);
486 p += node->datalen;
487 memcpy(p, node->children, node->childlen);
489 /* TDB should set errno, but doesn't even set ecode AFAICT. */
490 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
491 corrupt(conn, "Write of %s failed", key.dptr);
492 goto error;
493 }
494 return true;
495 error:
496 errno = ENOSPC;
497 return false;
498 }
500 static enum xs_perm_type perm_for_conn(struct connection *conn,
501 struct xs_permissions *perms,
502 unsigned int num)
503 {
504 unsigned int i;
505 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
507 if (!conn->can_write)
508 mask &= ~XS_PERM_WRITE;
510 /* Owners and tools get it all... */
511 if (!conn->id || perms[0].id == conn->id)
512 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
514 for (i = 1; i < num; i++)
515 if (perms[i].id == conn->id)
516 return perms[i].perms & mask;
518 return perms[0].perms & mask;
519 }
521 static char *get_parent(const char *node)
522 {
523 char *slash = strrchr(node + 1, '/');
524 if (!slash)
525 return talloc_strdup(node, "/");
526 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
527 }
529 /* What do parents say? */
530 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
531 {
532 struct node *node;
534 do {
535 name = get_parent(name);
536 node = read_node(conn, name);
537 if (node)
538 break;
539 } while (!streq(name, "/"));
541 /* No permission at root? We're in trouble. */
542 if (!node)
543 corrupt(conn, "No permissions file at root");
545 return perm_for_conn(conn, node->perms, node->num_perms);
546 }
548 /* We have a weird permissions system. You can allow someone into a
549 * specific node without allowing it in the parents. If it's going to
550 * fail, however, we don't want the errno to indicate any information
551 * about the node. */
552 static int errno_from_parents(struct connection *conn, const char *node,
553 int errnum, enum xs_perm_type perm)
554 {
555 /* We always tell them about memory failures. */
556 if (errnum == ENOMEM)
557 return errnum;
559 if (ask_parents(conn, node) & perm)
560 return errnum;
561 return EACCES;
562 }
564 /* If it fails, returns NULL and sets errno. */
565 struct node *get_node(struct connection *conn,
566 const char *name,
567 enum xs_perm_type perm)
568 {
569 struct node *node;
571 if (!name || !is_valid_nodename(name)) {
572 errno = EINVAL;
573 return NULL;
574 }
575 node = read_node(conn, name);
576 /* If we don't have permission, we don't have node. */
577 if (node) {
578 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
579 != perm) {
580 errno = EACCES;
581 node = NULL;
582 }
583 }
584 /* Clean up errno if they weren't supposed to know. */
585 if (!node)
586 errno = errno_from_parents(conn, name, errno, perm);
587 return node;
588 }
590 static struct buffered_data *new_buffer(void *ctx)
591 {
592 struct buffered_data *data;
594 data = talloc_zero(ctx, struct buffered_data);
595 if (data == NULL)
596 return NULL;
598 data->inhdr = true;
599 return data;
600 }
602 /* Return length of string (including nul) at this offset. */
603 static unsigned int get_string(const struct buffered_data *data,
604 unsigned int offset)
605 {
606 const char *nul;
608 if (offset >= data->used)
609 return 0;
611 nul = memchr(data->buffer + offset, 0, data->used - offset);
612 if (!nul)
613 return 0;
615 return nul - (data->buffer + offset) + 1;
616 }
618 /* Break input into vectors, return the number, fill in up to num of them. */
619 unsigned int get_strings(struct buffered_data *data,
620 char *vec[], unsigned int num)
621 {
622 unsigned int off, i, len;
624 off = i = 0;
625 while ((len = get_string(data, off)) != 0) {
626 if (i < num)
627 vec[i] = data->buffer + off;
628 i++;
629 off += len;
630 }
631 return i;
632 }
634 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
635 const void *data, unsigned int len)
636 {
637 struct buffered_data *bdata;
639 /* Message is a child of the connection context for auto-cleanup. */
640 bdata = new_buffer(conn);
641 bdata->buffer = talloc_array(bdata, char, len);
643 /* Echo request header in reply unless this is an async watch event. */
644 if (type != XS_WATCH_EVENT) {
645 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
646 sizeof(struct xsd_sockmsg));
647 } else {
648 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
649 }
651 /* Update relevant header fields and fill in the message body. */
652 bdata->hdr.msg.type = type;
653 bdata->hdr.msg.len = len;
654 memcpy(bdata->buffer, data, len);
656 /* Queue for later transmission. */
657 list_add_tail(&bdata->list, &conn->out_list);
658 }
660 /* Some routines (write, mkdir, etc) just need a non-error return */
661 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
662 {
663 send_reply(conn, type, "OK", sizeof("OK"));
664 }
666 void send_error(struct connection *conn, int error)
667 {
668 unsigned int i;
670 for (i = 0; error != xsd_errors[i].errnum; i++) {
671 if (i == ARRAY_SIZE(xsd_errors) - 1) {
672 eprintf("xenstored: error %i untranslatable", error);
673 i = 0; /* EINVAL */
674 break;
675 }
676 }
677 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
678 strlen(xsd_errors[i].errstring) + 1);
679 }
681 static bool valid_chars(const char *node)
682 {
683 /* Nodes can have lots of crap. */
684 return (strspn(node,
685 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
686 "abcdefghijklmnopqrstuvwxyz"
687 "0123456789-/_@") == strlen(node));
688 }
690 bool is_valid_nodename(const char *node)
691 {
692 /* Must start in /. */
693 if (!strstarts(node, "/"))
694 return false;
696 /* Cannot end in / (unless it's just "/"). */
697 if (strends(node, "/") && !streq(node, "/"))
698 return false;
700 /* No double //. */
701 if (strstr(node, "//"))
702 return false;
704 return valid_chars(node);
705 }
707 /* We expect one arg in the input: return NULL otherwise. */
708 static const char *onearg(struct buffered_data *in)
709 {
710 if (!in->used || get_string(in, 0) != in->used)
711 return NULL;
712 return in->buffer;
713 }
715 static char *perms_to_strings(const void *ctx,
716 struct xs_permissions *perms, unsigned int num,
717 unsigned int *len)
718 {
719 unsigned int i;
720 char *strings = NULL;
721 char buffer[MAX_STRLEN(unsigned int) + 1];
723 for (*len = 0, i = 0; i < num; i++) {
724 if (!xs_perm_to_string(&perms[i], buffer))
725 return NULL;
727 strings = talloc_realloc(ctx, strings, char,
728 *len + strlen(buffer) + 1);
729 strcpy(strings + *len, buffer);
730 *len += strlen(buffer) + 1;
731 }
732 return strings;
733 }
735 char *canonicalize(struct connection *conn, const char *node)
736 {
737 const char *prefix;
739 if (!node || strstarts(node, "/"))
740 return (char *)node;
741 prefix = get_implicit_path(conn);
742 if (prefix)
743 return talloc_asprintf(node, "%s/%s", prefix, node);
744 return (char *)node;
745 }
747 bool check_event_node(const char *node)
748 {
749 if (!node || !strstarts(node, "@")) {
750 errno = EINVAL;
751 return false;
752 }
753 return true;
754 }
756 static void send_directory(struct connection *conn, const char *name)
757 {
758 struct node *node;
760 name = canonicalize(conn, name);
761 node = get_node(conn, name, XS_PERM_READ);
762 if (!node) {
763 send_error(conn, errno);
764 return;
765 }
767 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
768 }
770 static void do_read(struct connection *conn, const char *name)
771 {
772 struct node *node;
774 name = canonicalize(conn, name);
775 node = get_node(conn, name, XS_PERM_READ);
776 if (!node) {
777 send_error(conn, errno);
778 return;
779 }
781 send_reply(conn, XS_READ, node->data, node->datalen);
782 }
784 static void delete_node_single(struct connection *conn, struct node *node)
785 {
786 TDB_DATA key;
788 key.dptr = (void *)node->name;
789 key.dsize = strlen(node->name);
791 if (tdb_delete(tdb_context(conn), key) != 0) {
792 corrupt(conn, "Could not delete '%s'", node->name);
793 return;
794 }
795 domain_entry_dec(conn, node);
796 }
798 /* Must not be / */
799 static char *basename(const char *name)
800 {
801 return strrchr(name, '/') + 1;
802 }
804 static struct node *construct_node(struct connection *conn, const char *name)
805 {
806 const char *base;
807 unsigned int baselen;
808 struct node *parent, *node;
809 char *children, *parentname = get_parent(name);
811 /* If parent doesn't exist, create it. */
812 parent = read_node(conn, parentname);
813 if (!parent)
814 parent = construct_node(conn, parentname);
815 if (!parent)
816 return NULL;
818 if (domain_entry(conn) >= quota_nb_entry_per_domain)
819 return NULL;
821 /* Add child to parent. */
822 base = basename(name);
823 baselen = strlen(base) + 1;
824 children = talloc_array(name, char, parent->childlen + baselen);
825 memcpy(children, parent->children, parent->childlen);
826 memcpy(children + parent->childlen, base, baselen);
827 parent->children = children;
828 parent->childlen += baselen;
830 /* Allocate node */
831 node = talloc(name, struct node);
832 node->tdb = tdb_context(conn);
833 node->name = talloc_strdup(node, name);
835 /* Inherit permissions, except domains own what they create */
836 node->num_perms = parent->num_perms;
837 node->perms = talloc_memdup(node, parent->perms,
838 node->num_perms * sizeof(node->perms[0]));
839 if (conn && conn->id)
840 node->perms[0].id = conn->id;
842 /* No children, no data */
843 node->children = node->data = NULL;
844 node->childlen = node->datalen = 0;
845 node->parent = parent;
846 domain_entry_inc(conn, node);
847 return node;
848 }
850 static int destroy_node(void *_node)
851 {
852 struct node *node = _node;
853 TDB_DATA key;
855 if (streq(node->name, "/"))
856 corrupt(NULL, "Destroying root node!");
858 key.dptr = (void *)node->name;
859 key.dsize = strlen(node->name);
861 tdb_delete(node->tdb, key);
862 return 0;
863 }
865 static struct node *create_node(struct connection *conn,
866 const char *name,
867 void *data, unsigned int datalen)
868 {
869 struct node *node, *i;
871 node = construct_node(conn, name);
872 if (!node)
873 return NULL;
875 node->data = data;
876 node->datalen = datalen;
878 /* We write out the nodes down, setting destructor in case
879 * something goes wrong. */
880 for (i = node; i; i = i->parent) {
881 if (!write_node(conn, i)) {
882 domain_entry_dec(conn, i);
883 return NULL;
884 }
885 talloc_set_destructor(i, destroy_node);
886 }
888 /* OK, now remove destructors so they stay around */
889 for (i = node; i; i = i->parent)
890 talloc_set_destructor(i, NULL);
891 return node;
892 }
894 /* path, data... */
895 static void do_write(struct connection *conn, struct buffered_data *in)
896 {
897 unsigned int offset, datalen;
898 struct node *node;
899 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
900 char *name;
902 /* Extra "strings" can be created by binary data. */
903 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
904 send_error(conn, EINVAL);
905 return;
906 }
908 offset = strlen(vec[0]) + 1;
909 datalen = in->used - offset;
911 name = canonicalize(conn, vec[0]);
912 node = get_node(conn, name, XS_PERM_WRITE);
913 if (!node) {
914 /* No permissions, invalid input? */
915 if (errno != ENOENT) {
916 send_error(conn, errno);
917 return;
918 }
919 node = create_node(conn, name, in->buffer + offset, datalen);
920 if (!node) {
921 send_error(conn, errno);
922 return;
923 }
924 } else {
925 node->data = in->buffer + offset;
926 node->datalen = datalen;
927 if (!write_node(conn, node)){
928 send_error(conn, errno);
929 return;
930 }
931 }
933 add_change_node(conn->transaction, name, false);
934 fire_watches(conn, name, false);
935 send_ack(conn, XS_WRITE);
936 }
938 static void do_mkdir(struct connection *conn, const char *name)
939 {
940 struct node *node;
942 name = canonicalize(conn, name);
943 node = get_node(conn, name, XS_PERM_WRITE);
945 /* If it already exists, fine. */
946 if (!node) {
947 /* No permissions? */
948 if (errno != ENOENT) {
949 send_error(conn, errno);
950 return;
951 }
952 node = create_node(conn, name, NULL, 0);
953 if (!node) {
954 send_error(conn, errno);
955 return;
956 }
957 add_change_node(conn->transaction, name, false);
958 fire_watches(conn, name, false);
959 }
960 send_ack(conn, XS_MKDIR);
961 }
963 static void delete_node(struct connection *conn, struct node *node)
964 {
965 unsigned int i;
967 /* Delete self, then delete children. If we crash, then the worst
968 that can happen is the children will continue to take up space, but
969 will otherwise be unreachable. */
970 delete_node_single(conn, node);
972 /* Delete children, too. */
973 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
974 struct node *child;
976 child = read_node(conn,
977 talloc_asprintf(node, "%s/%s", node->name,
978 node->children + i));
979 if (child) {
980 delete_node(conn, child);
981 }
982 else {
983 trace("delete_node: No child '%s/%s' found!\n",
984 node->name, node->children + i);
985 /* Skip it, we've already deleted the parent. */
986 }
987 }
988 }
991 /* Delete memory using memmove. */
992 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
993 {
994 memmove(mem + off, mem + off + len, total - off - len);
995 }
998 static bool remove_child_entry(struct connection *conn, struct node *node,
999 size_t offset)
1001 size_t childlen = strlen(node->children + offset);
1002 memdel(node->children, offset, childlen + 1, node->childlen);
1003 node->childlen -= childlen + 1;
1004 return write_node(conn, node);
1008 static bool delete_child(struct connection *conn,
1009 struct node *node, const char *childname)
1011 unsigned int i;
1013 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1014 if (streq(node->children+i, childname)) {
1015 return remove_child_entry(conn, node, i);
1018 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1019 return false;
1023 static int _rm(struct connection *conn, struct node *node, const char *name)
1025 /* Delete from parent first, then if we crash, the worst that can
1026 happen is the child will continue to take up space, but will
1027 otherwise be unreachable. */
1028 struct node *parent = read_node(conn, get_parent(name));
1029 if (!parent) {
1030 send_error(conn, EINVAL);
1031 return 0;
1034 if (!delete_child(conn, parent, basename(name))) {
1035 send_error(conn, EINVAL);
1036 return 0;
1039 delete_node(conn, node);
1040 return 1;
1044 static void internal_rm(const char *name)
1046 char *tname = talloc_strdup(NULL, name);
1047 struct node *node = read_node(NULL, tname);
1048 if (node)
1049 _rm(NULL, node, tname);
1050 talloc_free(node);
1051 talloc_free(tname);
1055 static void do_rm(struct connection *conn, const char *name)
1057 struct node *node;
1059 name = canonicalize(conn, name);
1060 node = get_node(conn, name, XS_PERM_WRITE);
1061 if (!node) {
1062 /* Didn't exist already? Fine, if parent exists. */
1063 if (errno == ENOENT) {
1064 node = read_node(conn, get_parent(name));
1065 if (node) {
1066 send_ack(conn, XS_RM);
1067 return;
1069 /* Restore errno, just in case. */
1070 errno = ENOENT;
1072 send_error(conn, errno);
1073 return;
1076 if (streq(name, "/")) {
1077 send_error(conn, EINVAL);
1078 return;
1081 if (_rm(conn, node, name)) {
1082 add_change_node(conn->transaction, name, true);
1083 fire_watches(conn, name, true);
1084 send_ack(conn, XS_RM);
1089 static void do_get_perms(struct connection *conn, const char *name)
1091 struct node *node;
1092 char *strings;
1093 unsigned int len;
1095 name = canonicalize(conn, name);
1096 node = get_node(conn, name, XS_PERM_READ);
1097 if (!node) {
1098 send_error(conn, errno);
1099 return;
1102 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1103 if (!strings)
1104 send_error(conn, errno);
1105 else
1106 send_reply(conn, XS_GET_PERMS, strings, len);
1109 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1111 unsigned int num;
1112 struct xs_permissions *perms;
1113 char *name, *permstr;
1114 struct node *node;
1116 num = xs_count_strings(in->buffer, in->used);
1117 if (num < 2) {
1118 send_error(conn, EINVAL);
1119 return;
1122 /* First arg is node name. */
1123 name = canonicalize(conn, in->buffer);
1124 permstr = in->buffer + strlen(in->buffer) + 1;
1125 num--;
1127 /* We must own node to do this (tools can do this too). */
1128 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1129 if (!node) {
1130 send_error(conn, errno);
1131 return;
1134 perms = talloc_array(node, struct xs_permissions, num);
1135 if (!xs_strings_to_perms(perms, num, permstr)) {
1136 send_error(conn, errno);
1137 return;
1140 /* Unprivileged domains may not change the owner. */
1141 if (domain_is_unprivileged(conn) &&
1142 perms[0].id != node->perms[0].id) {
1143 send_error(conn, EPERM);
1144 return;
1147 domain_entry_dec(conn, node);
1148 node->perms = perms;
1149 node->num_perms = num;
1150 domain_entry_inc(conn, node);
1152 if (!write_node(conn, node)) {
1153 send_error(conn, errno);
1154 return;
1157 add_change_node(conn->transaction, name, false);
1158 fire_watches(conn, name, false);
1159 send_ack(conn, XS_SET_PERMS);
1162 static void do_debug(struct connection *conn, struct buffered_data *in)
1164 int num;
1166 #ifndef TESTING
1167 if (conn->id != 0) {
1168 send_error(conn, EACCES);
1169 return;
1171 #endif
1173 num = xs_count_strings(in->buffer, in->used);
1175 if (streq(in->buffer, "print")) {
1176 if (num < 2) {
1177 send_error(conn, EINVAL);
1178 return;
1180 xprintf("debug: %s", in->buffer + get_string(in, 0));
1182 if (streq(in->buffer, "check"))
1183 check_store();
1184 #ifdef TESTING
1185 /* For testing, we allow them to set id. */
1186 if (streq(in->buffer, "setid")) {
1187 conn->id = atoi(in->buffer + get_string(in, 0));
1188 } else if (streq(in->buffer, "failtest")) {
1189 if (get_string(in, 0) < in->used)
1190 srandom(atoi(in->buffer + get_string(in, 0)));
1191 failtest = true;
1193 #endif /* TESTING */
1194 send_ack(conn, XS_DEBUG);
1197 /* Process "in" for conn: "in" will vanish after this conversation, so
1198 * we can talloc off it for temporary variables. May free "conn".
1199 */
1200 static void process_message(struct connection *conn, struct buffered_data *in)
1202 struct transaction *trans;
1204 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1205 if (IS_ERR(trans)) {
1206 send_error(conn, -PTR_ERR(trans));
1207 return;
1210 assert(conn->transaction == NULL);
1211 conn->transaction = trans;
1213 switch (in->hdr.msg.type) {
1214 case XS_DIRECTORY:
1215 send_directory(conn, onearg(in));
1216 break;
1218 case XS_READ:
1219 do_read(conn, onearg(in));
1220 break;
1222 case XS_WRITE:
1223 do_write(conn, in);
1224 break;
1226 case XS_MKDIR:
1227 do_mkdir(conn, onearg(in));
1228 break;
1230 case XS_RM:
1231 do_rm(conn, onearg(in));
1232 break;
1234 case XS_GET_PERMS:
1235 do_get_perms(conn, onearg(in));
1236 break;
1238 case XS_SET_PERMS:
1239 do_set_perms(conn, in);
1240 break;
1242 case XS_DEBUG:
1243 do_debug(conn, in);
1244 break;
1246 case XS_WATCH:
1247 do_watch(conn, in);
1248 break;
1250 case XS_UNWATCH:
1251 do_unwatch(conn, in);
1252 break;
1254 case XS_TRANSACTION_START:
1255 do_transaction_start(conn, in);
1256 break;
1258 case XS_TRANSACTION_END:
1259 do_transaction_end(conn, onearg(in));
1260 break;
1262 case XS_INTRODUCE:
1263 do_introduce(conn, in);
1264 break;
1266 case XS_IS_DOMAIN_INTRODUCED:
1267 do_is_domain_introduced(conn, onearg(in));
1268 break;
1270 case XS_RELEASE:
1271 do_release(conn, onearg(in));
1272 break;
1274 case XS_GET_DOMAIN_PATH:
1275 do_get_domain_path(conn, onearg(in));
1276 break;
1278 case XS_RESUME:
1279 do_resume(conn, onearg(in));
1280 break;
1282 default:
1283 eprintf("Client unknown operation %i", in->hdr.msg.type);
1284 send_error(conn, ENOSYS);
1285 break;
1288 conn->transaction = NULL;
1291 static void consider_message(struct connection *conn)
1293 if (verbose)
1294 xprintf("Got message %s len %i from %p\n",
1295 sockmsg_string(conn->in->hdr.msg.type),
1296 conn->in->hdr.msg.len, conn);
1298 process_message(conn, conn->in);
1300 talloc_free(conn->in);
1301 conn->in = new_buffer(conn);
1304 /* Errors in reading or allocating here mean we get out of sync, so we
1305 * drop the whole client connection. */
1306 static void handle_input(struct connection *conn)
1308 int bytes;
1309 struct buffered_data *in = conn->in;
1311 /* Not finished header yet? */
1312 if (in->inhdr) {
1313 bytes = conn->read(conn, in->hdr.raw + in->used,
1314 sizeof(in->hdr) - in->used);
1315 if (bytes <= 0)
1316 goto bad_client;
1317 in->used += bytes;
1318 if (in->used != sizeof(in->hdr))
1319 return;
1321 if (in->hdr.msg.len > PATH_MAX) {
1322 #ifndef TESTING
1323 syslog(LOG_ERR, "Client tried to feed us %i",
1324 in->hdr.msg.len);
1325 #endif
1326 goto bad_client;
1329 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1330 if (!in->buffer)
1331 goto bad_client;
1332 in->used = 0;
1333 in->inhdr = false;
1334 return;
1337 bytes = conn->read(conn, in->buffer + in->used,
1338 in->hdr.msg.len - in->used);
1339 if (bytes <= 0)
1340 goto bad_client;
1342 in->used += bytes;
1343 if (in->used != in->hdr.msg.len)
1344 return;
1346 trace_io(conn, "IN ", in);
1347 consider_message(conn);
1348 return;
1350 bad_client:
1351 /* Kill it. */
1352 talloc_free(conn);
1355 static void handle_output(struct connection *conn)
1357 if (!write_messages(conn))
1358 talloc_free(conn);
1361 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1363 struct connection *new;
1365 new = talloc_zero(talloc_autofree_context(), struct connection);
1366 if (!new)
1367 return NULL;
1369 new->fd = -1;
1370 new->write = write;
1371 new->read = read;
1372 new->can_write = true;
1373 new->transaction_started = 0;
1374 INIT_LIST_HEAD(&new->out_list);
1375 INIT_LIST_HEAD(&new->watches);
1376 INIT_LIST_HEAD(&new->transaction_list);
1378 new->in = new_buffer(new);
1379 if (new->in == NULL) {
1380 talloc_free(new);
1381 return NULL;
1384 list_add_tail(&new->list, &connections);
1385 talloc_set_destructor(new, destroy_conn);
1386 trace_create(new, "connection");
1387 return new;
1390 static int writefd(struct connection *conn, const void *data, unsigned int len)
1392 return write(conn->fd, data, len);
1395 static int readfd(struct connection *conn, void *data, unsigned int len)
1397 return read(conn->fd, data, len);
1400 static void accept_connection(int sock, bool canwrite)
1402 int fd;
1403 struct connection *conn;
1405 fd = accept(sock, NULL, NULL);
1406 if (fd < 0)
1407 return;
1409 conn = new_connection(writefd, readfd);
1410 if (conn) {
1411 conn->fd = fd;
1412 conn->can_write = canwrite;
1413 } else
1414 close(fd);
1417 #ifdef TESTING
1418 /* Valgrind can check our writes better if we don't use mmap */
1419 #define TDB_FLAGS TDB_NOMMAP
1420 /* Useful for running under debugger. */
1421 void dump_connection(void)
1423 struct connection *i;
1425 list_for_each_entry(i, &connections, list) {
1426 printf("Connection %p:\n", i);
1427 printf(" state = %s\n",
1428 list_empty(&i->out_list) ? "OK" : "BUSY");
1429 if (i->id)
1430 printf(" id = %i\n", i->id);
1431 if (!i->in->inhdr || i->in->used)
1432 printf(" got %i bytes of %s\n",
1433 i->in->used, i->in->inhdr ? "header" : "data");
1434 #if 0
1435 if (i->out)
1436 printf(" sending message %s (%s) out\n",
1437 sockmsg_string(i->out->hdr.msg.type),
1438 i->out->buffer);
1439 if (i->transaction)
1440 dump_transaction(i);
1441 if (i->domain)
1442 dump_domain(i);
1443 #endif
1444 dump_watches(i);
1447 #else
1448 #define TDB_FLAGS 0
1449 #endif
1451 /* We create initial nodes manually. */
1452 static void manual_node(const char *name, const char *child)
1454 struct node *node;
1455 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1457 node = talloc_zero(NULL, struct node);
1458 node->name = name;
1459 node->perms = &perms;
1460 node->num_perms = 1;
1461 node->children = (char *)child;
1462 if (child)
1463 node->childlen = strlen(child) + 1;
1465 if (!write_node(NULL, node))
1466 barf_perror("Could not create initial node %s", name);
1467 talloc_free(node);
1470 static void setup_structure(void)
1472 char *tdbname;
1473 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1474 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1476 if (tdb_ctx) {
1477 /* XXX When we make xenstored able to restart, this will have
1478 to become cleverer, checking for existing domains and not
1479 removing the corresponding entries, but for now xenstored
1480 cannot be restarted without losing all the registered
1481 watches, which breaks all the backend drivers anyway. We
1482 can therefore get away with just clearing /local and
1483 expecting Xend to put the appropriate entries back in.
1485 When this change is made it is important to note that
1486 dom0's entries must be cleaned up on reboot _before_ this
1487 daemon starts, otherwise the backend drivers and dom0's
1488 balloon driver will pick up stale entries. In the case of
1489 the balloon driver, this can be fatal.
1490 */
1491 char *tlocal = talloc_strdup(NULL, "/local");
1493 check_store();
1495 if (remove_local) {
1496 internal_rm("/local");
1497 create_node(NULL, tlocal, NULL, 0);
1499 check_store();
1502 talloc_free(tlocal);
1504 else {
1505 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1506 0640);
1507 if (!tdb_ctx)
1508 barf_perror("Could not create tdb file %s", tdbname);
1510 manual_node("/", "tool");
1511 manual_node("/tool", "xenstored");
1512 manual_node("/tool/xenstored", NULL);
1514 check_store();
1519 static unsigned int hash_from_key_fn(void *k)
1521 char *str = k;
1522 unsigned int hash = 5381;
1523 char c;
1525 while ((c = *str++))
1526 hash = ((hash << 5) + hash) + (unsigned int)c;
1528 return hash;
1532 static int keys_equal_fn(void *key1, void *key2)
1534 return 0 == strcmp((char *)key1, (char *)key2);
1538 static char *child_name(const char *s1, const char *s2)
1540 if (strcmp(s1, "/")) {
1541 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1543 else {
1544 return talloc_asprintf(NULL, "/%s", s2);
1549 static void remember_string(struct hashtable *hash, const char *str)
1551 char *k = malloc(strlen(str) + 1);
1552 strcpy(k, str);
1553 hashtable_insert(hash, k, (void *)1);
1557 /**
1558 * A node has a children field that names the children of the node, separated
1559 * by NULs. We check whether there are entries in there that are duplicated
1560 * (and if so, delete the second one), and whether there are any that do not
1561 * have a corresponding child node (and if so, delete them). Each valid child
1562 * is then recursively checked.
1564 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1565 * passed on the command line).
1567 * As we go, we record each node in the given reachable hashtable. These
1568 * entries will be used later in clean_store.
1569 */
1570 static void check_store_(const char *name, struct hashtable *reachable)
1572 struct node *node = read_node(NULL, name);
1574 if (node) {
1575 size_t i = 0;
1577 struct hashtable * children =
1578 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1580 remember_string(reachable, name);
1582 while (i < node->childlen) {
1583 size_t childlen = strlen(node->children + i);
1584 char * childname = child_name(node->name,
1585 node->children + i);
1586 struct node *childnode = read_node(NULL, childname);
1588 if (childnode) {
1589 if (hashtable_search(children, childname)) {
1590 log("check_store: '%s' is duplicated!",
1591 childname);
1593 if (recovery) {
1594 remove_child_entry(NULL, node,
1595 i);
1596 i -= childlen + 1;
1599 else {
1600 remember_string(children, childname);
1601 check_store_(childname, reachable);
1604 else {
1605 log("check_store: No child '%s' found!\n",
1606 childname);
1608 if (recovery) {
1609 remove_child_entry(NULL, node, i);
1610 i -= childlen + 1;
1614 talloc_free(childnode);
1615 talloc_free(childname);
1616 i += childlen + 1;
1619 hashtable_destroy(children, 0 /* Don't free values (they are
1620 all (void *)1) */);
1621 talloc_free(node);
1623 else {
1624 /* Impossible, because no database should ever be without the
1625 root, and otherwise, we've just checked in our caller
1626 (which made a recursive call to get here). */
1628 log("check_store: No child '%s' found: impossible!", name);
1633 /**
1634 * Helper to clean_store below.
1635 */
1636 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1637 void *private)
1639 struct hashtable *reachable = private;
1640 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1642 if (!hashtable_search(reachable, name)) {
1643 log("clean_store: '%s' is orphaned!", name);
1644 if (recovery) {
1645 tdb_delete(tdb, key);
1649 talloc_free(name);
1651 return 0;
1655 /**
1656 * Given the list of reachable nodes, iterate over the whole store, and
1657 * remove any that were not reached.
1658 */
1659 static void clean_store(struct hashtable *reachable)
1661 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1665 static void check_store(void)
1667 char * root = talloc_strdup(NULL, "/");
1668 struct hashtable * reachable =
1669 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1671 log("Checking store ...");
1672 check_store_(root, reachable);
1673 clean_store(reachable);
1674 log("Checking store complete.");
1676 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1677 (void *)1) */);
1678 talloc_free(root);
1682 /* Something is horribly wrong: check the store. */
1683 static void corrupt(struct connection *conn, const char *fmt, ...)
1685 va_list arglist;
1686 char *str;
1687 int saved_errno = errno;
1689 va_start(arglist, fmt);
1690 str = talloc_vasprintf(NULL, fmt, arglist);
1691 va_end(arglist);
1693 log("corruption detected by connection %i: err %s: %s",
1694 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1696 #ifdef TESTING
1697 /* Allow them to attach debugger. */
1698 sleep(30);
1699 #endif
1700 check_store();
1704 static void write_pidfile(const char *pidfile)
1706 char buf[100];
1707 int len;
1708 int fd;
1710 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1711 if (fd == -1)
1712 barf_perror("Opening pid file %s", pidfile);
1714 /* We exit silently if daemon already running. */
1715 if (lockf(fd, F_TLOCK, 0) == -1)
1716 exit(0);
1718 len = sprintf(buf, "%ld\n", (long)getpid());
1719 if (write(fd, buf, len) != len)
1720 barf_perror("Writing pid file %s", pidfile);
1723 /* Stevens. */
1724 static void daemonize(void)
1726 pid_t pid;
1728 /* Separate from our parent via fork, so init inherits us. */
1729 if ((pid = fork()) < 0)
1730 barf_perror("Failed to fork daemon");
1731 if (pid != 0)
1732 exit(0);
1734 /* Session leader so ^C doesn't whack us. */
1735 setsid();
1737 /* Let session leader exit so child cannot regain CTTY */
1738 if ((pid = fork()) < 0)
1739 barf_perror("Failed to fork daemon");
1740 if (pid != 0)
1741 exit(0);
1743 #ifndef TESTING /* Relative paths for socket names */
1744 /* Move off any mount points we might be in. */
1745 if (chdir("/") == -1)
1746 barf_perror("Failed to chdir");
1747 #endif
1748 /* Discard our parent's old-fashioned umask prejudices. */
1749 umask(0);
1753 static void usage(void)
1755 fprintf(stderr,
1756 "Usage:\n"
1757 "\n"
1758 " xenstored <options>\n"
1759 "\n"
1760 "where options may include:\n"
1761 "\n"
1762 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1763 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1764 " --help to output this message,\n"
1765 " --no-fork to request that the daemon does not fork,\n"
1766 " --output-pid to request that the pid of the daemon is output,\n"
1767 " --trace-file <file> giving the file for logging, and\n"
1768 " --entry-nb <nb> limit the number of entries per domain,\n"
1769 " --entry-size <size> limit the size of entry per domain, and\n"
1770 " --entry-watch <nb> limit the number of watches per domain,\n"
1771 " --transaction <nb> limit the number of transaction allowed per domain,\n"
1772 " --no-recovery to request that no recovery should be attempted when\n"
1773 " the store is corrupted (debug only),\n"
1774 " --preserve-local to request that /local is preserved on start-up,\n"
1775 " --verbose to request verbose execution.\n");
1779 static struct option options[] = {
1780 { "no-domain-init", 0, NULL, 'D' },
1781 { "entry-nb", 1, NULL, 'E' },
1782 { "pid-file", 1, NULL, 'F' },
1783 { "help", 0, NULL, 'H' },
1784 { "no-fork", 0, NULL, 'N' },
1785 { "output-pid", 0, NULL, 'P' },
1786 { "entry-size", 1, NULL, 'S' },
1787 { "trace-file", 1, NULL, 'T' },
1788 { "transaction", 1, NULL, 't' },
1789 { "no-recovery", 0, NULL, 'R' },
1790 { "preserve-local", 0, NULL, 'L' },
1791 { "verbose", 0, NULL, 'V' },
1792 { "watch-nb", 1, NULL, 'W' },
1793 { NULL, 0, NULL, 0 } };
1795 extern void dump_conn(struct connection *conn);
1797 int main(int argc, char *argv[])
1799 int opt, *sock, *ro_sock, max;
1800 struct sockaddr_un addr;
1801 fd_set inset, outset;
1802 bool dofork = true;
1803 bool outputpid = false;
1804 bool no_domain_init = false;
1805 const char *pidfile = NULL;
1806 int evtchn_fd = -1;
1808 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
1809 NULL)) != -1) {
1810 switch (opt) {
1811 case 'D':
1812 no_domain_init = true;
1813 break;
1814 case 'E':
1815 quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1816 break;
1817 case 'F':
1818 pidfile = optarg;
1819 break;
1820 case 'H':
1821 usage();
1822 return 0;
1823 case 'N':
1824 dofork = false;
1825 break;
1826 case 'P':
1827 outputpid = true;
1828 break;
1829 case 'R':
1830 recovery = false;
1831 break;
1832 case 'L':
1833 remove_local = false;
1834 break;
1835 case 'S':
1836 quota_max_entry_size = strtol(optarg, NULL, 10);
1837 break;
1838 case 't':
1839 quota_max_transaction = strtol(optarg, NULL, 10);
1840 break;
1841 case 'T':
1842 tracefile = optarg;
1843 break;
1844 case 'V':
1845 verbose = true;
1846 break;
1847 case 'W':
1848 quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1849 break;
1852 if (optind != argc)
1853 barf("%s: No arguments desired", argv[0]);
1855 reopen_log();
1857 /* make sure xenstored directory exists */
1858 if (mkdir(xs_daemon_rundir(), 0755)) {
1859 if (errno != EEXIST) {
1860 perror("error: mkdir daemon rundir");
1861 exit(-1);
1865 if (mkdir(xs_daemon_rootdir(), 0755)) {
1866 if (errno != EEXIST) {
1867 perror("error: mkdir daemon rootdir");
1868 exit(-1);
1872 if (dofork) {
1873 openlog("xenstored", 0, LOG_DAEMON);
1874 daemonize();
1876 if (pidfile)
1877 write_pidfile(pidfile);
1879 talloc_enable_leak_report_full();
1881 /* Create sockets for them to listen to. */
1882 sock = talloc(talloc_autofree_context(), int);
1883 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1884 if (*sock < 0)
1885 barf_perror("Could not create socket");
1886 ro_sock = talloc(talloc_autofree_context(), int);
1887 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1888 if (*ro_sock < 0)
1889 barf_perror("Could not create socket");
1890 talloc_set_destructor(sock, destroy_fd);
1891 talloc_set_destructor(ro_sock, destroy_fd);
1893 /* Don't kill us with SIGPIPE. */
1894 signal(SIGPIPE, SIG_IGN);
1896 /* FIXME: Be more sophisticated, don't mug running daemon. */
1897 unlink(xs_daemon_socket());
1898 unlink(xs_daemon_socket_ro());
1900 addr.sun_family = AF_UNIX;
1901 strcpy(addr.sun_path, xs_daemon_socket());
1902 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1903 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1904 strcpy(addr.sun_path, xs_daemon_socket_ro());
1905 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1906 barf_perror("Could not bind socket to %s",
1907 xs_daemon_socket_ro());
1908 if (chmod(xs_daemon_socket(), 0600) != 0
1909 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1910 barf_perror("Could not chmod sockets");
1912 if (listen(*sock, 1) != 0
1913 || listen(*ro_sock, 1) != 0)
1914 barf_perror("Could not listen on sockets");
1916 if (pipe(reopen_log_pipe)) {
1917 barf_perror("pipe");
1920 /* Setup the database */
1921 setup_structure();
1923 /* Listen to hypervisor. */
1924 if (!no_domain_init)
1925 domain_init();
1927 /* Restore existing connections. */
1928 restore_existing_connections();
1930 if (outputpid) {
1931 printf("%ld\n", (long)getpid());
1932 fflush(stdout);
1935 /* close stdin/stdout now we're ready to accept connections */
1936 if (dofork) {
1937 close(STDIN_FILENO);
1938 close(STDOUT_FILENO);
1939 close(STDERR_FILENO);
1942 signal(SIGHUP, trigger_reopen_log);
1944 #ifdef TESTING
1945 signal(SIGUSR1, stop_failtest);
1946 #endif
1948 if (xce_handle != -1)
1949 evtchn_fd = xc_evtchn_fd(xce_handle);
1951 /* Get ready to listen to the tools. */
1952 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1954 /* Tell the kernel we're up and running. */
1955 xenbus_notify_running();
1957 /* Main loop. */
1958 /* FIXME: Rewrite so noone can starve. */
1959 for (;;) {
1960 struct connection *i;
1962 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1963 if (errno == EINTR)
1964 continue;
1965 barf_perror("Select failed");
1968 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1969 char c;
1970 if (read(reopen_log_pipe[0], &c, 1) != 1)
1971 barf_perror("read failed");
1972 reopen_log();
1975 if (FD_ISSET(*sock, &inset))
1976 accept_connection(*sock, true);
1978 if (FD_ISSET(*ro_sock, &inset))
1979 accept_connection(*ro_sock, false);
1981 if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
1982 handle_event();
1984 list_for_each_entry(i, &connections, list) {
1985 if (i->domain)
1986 continue;
1988 /* Operations can delete themselves or others
1989 * (xs_release): list is not safe after input,
1990 * so break. */
1991 if (FD_ISSET(i->fd, &inset)) {
1992 handle_input(i);
1993 break;
1995 if (FD_ISSET(i->fd, &outset)) {
1996 handle_output(i);
1997 break;
2001 /* Handle all possible I/O for domain connections. */
2002 more:
2003 list_for_each_entry(i, &connections, list) {
2004 if (!i->domain)
2005 continue;
2007 if (domain_can_read(i)) {
2008 handle_input(i);
2009 goto more;
2012 if (domain_can_write(i) && !list_empty(&i->out_list)) {
2013 handle_output(i);
2014 goto more;
2018 max = initialize_set(&inset, &outset, *sock, *ro_sock);
2022 /*
2023 * Local variables:
2024 * c-file-style: "linux"
2025 * indent-tabs-mode: t
2026 * c-indent-level: 8
2027 * c-basic-offset: 8
2028 * tab-width: 8
2029 * End:
2030 */