ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 19243:226031d62fc5

xenstored: fix use-after free bug

Problem: Handling requests for one connection can not only zap the
connection itself, due to socket disconnects for example. It can also
zap *other* connections, due to domain release requests. Especially
it can zap the connection we have saved a pointer to in the "next"
variable.

From: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Sun Mar 01 14:50:04 2009 +0000 (2009-03-01)
parents 750eee596adf
children
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 #include "utils.h"
43 #include "list.h"
44 #include "talloc.h"
45 #include "xs_lib.h"
46 #include "xenstored_core.h"
47 #include "xenstored_watch.h"
48 #include "xenstored_transaction.h"
49 #include "xenstored_domain.h"
50 #include "xenctrl.h"
51 #include "tdb.h"
53 #include "hashtable.h"
55 extern int xce_handle; /* in xenstored_domain.c */
57 static bool verbose = false;
58 LIST_HEAD(connections);
59 static int tracefd = -1;
60 static bool recovery = true;
61 static bool remove_local = true;
62 static int reopen_log_pipe[2];
63 static char *tracefile = NULL;
64 static TDB_CONTEXT *tdb_ctx;
66 static void corrupt(struct connection *conn, const char *fmt, ...);
67 static void check_store(void);
69 #define log(...) \
70 do { \
71 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
72 trace("%s\n", s); \
73 syslog(LOG_ERR, "%s", s); \
74 talloc_free(s); \
75 } while (0)
78 int quota_nb_entry_per_domain = 1000;
79 int quota_nb_watch_per_domain = 128;
80 int quota_max_entry_size = 2048; /* 2K */
81 int quota_max_transaction = 10;
83 TDB_CONTEXT *tdb_context(struct connection *conn)
84 {
85 /* conn = NULL used in manual_node at setup. */
86 if (!conn || !conn->transaction)
87 return tdb_ctx;
88 return tdb_transaction_context(conn->transaction);
89 }
91 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
92 {
93 if (rename(newname, xs_daemon_tdb()) != 0)
94 return false;
95 tdb_close(tdb_ctx);
96 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
97 return true;
98 }
100 static char *sockmsg_string(enum xsd_sockmsg_type type)
101 {
102 switch (type) {
103 case XS_DEBUG: return "DEBUG";
104 case XS_DIRECTORY: return "DIRECTORY";
105 case XS_READ: return "READ";
106 case XS_GET_PERMS: return "GET_PERMS";
107 case XS_WATCH: return "WATCH";
108 case XS_UNWATCH: return "UNWATCH";
109 case XS_TRANSACTION_START: return "TRANSACTION_START";
110 case XS_TRANSACTION_END: return "TRANSACTION_END";
111 case XS_INTRODUCE: return "INTRODUCE";
112 case XS_RELEASE: return "RELEASE";
113 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
114 case XS_WRITE: return "WRITE";
115 case XS_MKDIR: return "MKDIR";
116 case XS_RM: return "RM";
117 case XS_SET_PERMS: return "SET_PERMS";
118 case XS_WATCH_EVENT: return "WATCH_EVENT";
119 case XS_ERROR: return "ERROR";
120 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
121 case XS_RESUME: return "RESUME";
122 case XS_SET_TARGET: return "SET_TARGET";
123 default:
124 return "**UNKNOWN**";
125 }
126 }
128 void trace(const char *fmt, ...)
129 {
130 va_list arglist;
131 char *str;
132 char sbuf[1024];
133 int ret, dummy;
135 if (tracefd < 0)
136 return;
138 /* try to use a static buffer */
139 va_start(arglist, fmt);
140 ret = vsnprintf(sbuf, 1024, fmt, arglist);
141 va_end(arglist);
143 if (ret <= 1024) {
144 dummy = write(tracefd, sbuf, ret);
145 return;
146 }
148 /* fail back to dynamic allocation */
149 va_start(arglist, fmt);
150 str = talloc_vasprintf(NULL, fmt, arglist);
151 va_end(arglist);
152 dummy = write(tracefd, str, strlen(str));
153 talloc_free(str);
154 }
156 static void trace_io(const struct connection *conn,
157 const struct buffered_data *data,
158 int out)
159 {
160 unsigned int i;
161 time_t now;
162 struct tm *tm;
164 #ifdef HAVE_DTRACE
165 dtrace_io(conn, data, out);
166 #endif
168 if (tracefd < 0)
169 return;
171 now = time(NULL);
172 tm = localtime(&now);
174 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (",
175 out ? "OUT" : "IN", conn,
176 tm->tm_year + 1900, tm->tm_mon + 1,
177 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
178 sockmsg_string(data->hdr.msg.type));
180 for (i = 0; i < data->hdr.msg.len; i++)
181 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
182 trace(")\n");
183 }
185 void trace_create(const void *data, const char *type)
186 {
187 trace("CREATE %s %p\n", type, data);
188 }
190 void trace_destroy(const void *data, const char *type)
191 {
192 trace("DESTROY %s %p\n", type, data);
193 }
195 /**
196 * Signal handler for SIGHUP, which requests that the trace log is reopened
197 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
198 * the select() in the main loop.
199 */
200 static void trigger_reopen_log(int signal __attribute__((unused)))
201 {
202 char c = 'A';
203 int dummy;
204 dummy = write(reopen_log_pipe[1], &c, 1);
205 }
208 static void reopen_log(void)
209 {
210 if (tracefile) {
211 if (tracefd > 0)
212 close(tracefd);
214 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
216 if (tracefd < 0)
217 perror("Could not open tracefile");
218 else
219 trace("\n***\n");
220 }
221 }
224 static bool write_messages(struct connection *conn)
225 {
226 int ret;
227 struct buffered_data *out;
229 out = list_top(&conn->out_list, struct buffered_data, list);
230 if (out == NULL)
231 return true;
233 if (out->inhdr) {
234 if (verbose)
235 xprintf("Writing msg %s (%.*s) out to %p\n",
236 sockmsg_string(out->hdr.msg.type),
237 out->hdr.msg.len,
238 out->buffer, conn);
239 ret = conn->write(conn, out->hdr.raw + out->used,
240 sizeof(out->hdr) - out->used);
241 if (ret < 0)
242 return false;
244 out->used += ret;
245 if (out->used < sizeof(out->hdr))
246 return true;
248 out->inhdr = false;
249 out->used = 0;
251 /* Second write might block if non-zero. */
252 if (out->hdr.msg.len && !conn->domain)
253 return true;
254 }
256 ret = conn->write(conn, out->buffer + out->used,
257 out->hdr.msg.len - out->used);
258 if (ret < 0)
259 return false;
261 out->used += ret;
262 if (out->used != out->hdr.msg.len)
263 return true;
265 trace_io(conn, out, 1);
267 list_del(&out->list);
268 talloc_free(out);
270 return true;
271 }
273 static int destroy_conn(void *_conn)
274 {
275 struct connection *conn = _conn;
277 /* Flush outgoing if possible, but don't block. */
278 if (!conn->domain) {
279 fd_set set;
280 struct timeval none;
282 FD_ZERO(&set);
283 FD_SET(conn->fd, &set);
284 none.tv_sec = none.tv_usec = 0;
286 while (!list_empty(&conn->out_list)
287 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
288 if (!write_messages(conn))
289 break;
290 close(conn->fd);
291 }
292 if (conn->target)
293 talloc_unlink(conn, conn->target);
294 list_del(&conn->list);
295 trace_destroy(conn, "connection");
296 return 0;
297 }
300 static void set_fd(int fd, fd_set *set, int *max)
301 {
302 if (fd < 0)
303 return;
304 FD_SET(fd, set);
305 if (fd > *max)
306 *max = fd;
307 }
310 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
311 struct timeval **ptimeout)
312 {
313 static struct timeval zero_timeout = { 0 };
314 struct connection *conn;
315 int max = -1;
317 *ptimeout = NULL;
319 FD_ZERO(inset);
320 FD_ZERO(outset);
322 set_fd(sock, inset, &max);
323 set_fd(ro_sock, inset, &max);
324 set_fd(reopen_log_pipe[0], inset, &max);
326 if (xce_handle != -1)
327 set_fd(xc_evtchn_fd(xce_handle), inset, &max);
329 list_for_each_entry(conn, &connections, list) {
330 if (conn->domain) {
331 if (domain_can_read(conn) ||
332 (domain_can_write(conn) &&
333 !list_empty(&conn->out_list)))
334 *ptimeout = &zero_timeout;
335 } else {
336 set_fd(conn->fd, inset, &max);
337 if (!list_empty(&conn->out_list))
338 FD_SET(conn->fd, outset);
339 }
340 }
342 return max;
343 }
345 static int destroy_fd(void *_fd)
346 {
347 int *fd = _fd;
348 close(*fd);
349 return 0;
350 }
352 /* Is child a subnode of parent, or equal? */
353 bool is_child(const char *child, const char *parent)
354 {
355 unsigned int len = strlen(parent);
357 /* / should really be "" for this algorithm to work, but that's a
358 * usability nightmare. */
359 if (streq(parent, "/"))
360 return true;
362 if (strncmp(child, parent, len) != 0)
363 return false;
365 return child[len] == '/' || child[len] == '\0';
366 }
368 /* If it fails, returns NULL and sets errno. */
369 static struct node *read_node(struct connection *conn, const char *name)
370 {
371 TDB_DATA key, data;
372 uint32_t *p;
373 struct node *node;
374 TDB_CONTEXT * context = tdb_context(conn);
376 key.dptr = (void *)name;
377 key.dsize = strlen(name);
378 data = tdb_fetch(context, key);
380 if (data.dptr == NULL) {
381 if (tdb_error(context) == TDB_ERR_NOEXIST)
382 errno = ENOENT;
383 else {
384 log("TDB error on read: %s", tdb_errorstr(context));
385 errno = EIO;
386 }
387 return NULL;
388 }
390 node = talloc(name, struct node);
391 node->name = talloc_strdup(node, name);
392 node->parent = NULL;
393 node->tdb = tdb_context(conn);
394 talloc_steal(node, data.dptr);
396 /* Datalen, childlen, number of permissions */
397 p = (uint32_t *)data.dptr;
398 node->num_perms = p[0];
399 node->datalen = p[1];
400 node->childlen = p[2];
402 /* Permissions are struct xs_permissions. */
403 node->perms = (void *)&p[3];
404 /* Data is binary blob (usually ascii, no nul). */
405 node->data = node->perms + node->num_perms;
406 /* Children is strings, nul separated. */
407 node->children = node->data + node->datalen;
409 return node;
410 }
412 static bool write_node(struct connection *conn, const struct node *node)
413 {
414 /*
415 * conn will be null when this is called from manual_node.
416 * tdb_context copes with this.
417 */
419 TDB_DATA key, data;
420 void *p;
422 key.dptr = (void *)node->name;
423 key.dsize = strlen(node->name);
425 data.dsize = 3*sizeof(uint32_t)
426 + node->num_perms*sizeof(node->perms[0])
427 + node->datalen + node->childlen;
429 if (domain_is_unprivileged(conn) && data.dsize >= quota_max_entry_size)
430 goto error;
432 data.dptr = talloc_size(node, data.dsize);
433 ((uint32_t *)data.dptr)[0] = node->num_perms;
434 ((uint32_t *)data.dptr)[1] = node->datalen;
435 ((uint32_t *)data.dptr)[2] = node->childlen;
436 p = data.dptr + 3 * sizeof(uint32_t);
438 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
439 p += node->num_perms*sizeof(node->perms[0]);
440 memcpy(p, node->data, node->datalen);
441 p += node->datalen;
442 memcpy(p, node->children, node->childlen);
444 /* TDB should set errno, but doesn't even set ecode AFAICT. */
445 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
446 corrupt(conn, "Write of %s failed", key.dptr);
447 goto error;
448 }
449 return true;
450 error:
451 errno = ENOSPC;
452 return false;
453 }
455 static enum xs_perm_type perm_for_conn(struct connection *conn,
456 struct xs_permissions *perms,
457 unsigned int num)
458 {
459 unsigned int i;
460 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
462 if (!conn->can_write)
463 mask &= ~XS_PERM_WRITE;
465 /* Owners and tools get it all... */
466 if (!conn->id || perms[0].id == conn->id
467 || (conn->target && perms[0].id == conn->target->id))
468 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
470 for (i = 1; i < num; i++)
471 if (perms[i].id == conn->id
472 || (conn->target && perms[i].id == conn->target->id))
473 return perms[i].perms & mask;
475 return perms[0].perms & mask;
476 }
478 static char *get_parent(const char *node)
479 {
480 char *slash = strrchr(node + 1, '/');
481 if (!slash)
482 return talloc_strdup(node, "/");
483 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
484 }
486 /* What do parents say? */
487 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
488 {
489 struct node *node;
491 do {
492 name = get_parent(name);
493 node = read_node(conn, name);
494 if (node)
495 break;
496 } while (!streq(name, "/"));
498 /* No permission at root? We're in trouble. */
499 if (!node)
500 corrupt(conn, "No permissions file at root");
502 return perm_for_conn(conn, node->perms, node->num_perms);
503 }
505 /* We have a weird permissions system. You can allow someone into a
506 * specific node without allowing it in the parents. If it's going to
507 * fail, however, we don't want the errno to indicate any information
508 * about the node. */
509 static int errno_from_parents(struct connection *conn, const char *node,
510 int errnum, enum xs_perm_type perm)
511 {
512 /* We always tell them about memory failures. */
513 if (errnum == ENOMEM)
514 return errnum;
516 if (ask_parents(conn, node) & perm)
517 return errnum;
518 return EACCES;
519 }
521 /* If it fails, returns NULL and sets errno. */
522 struct node *get_node(struct connection *conn,
523 const char *name,
524 enum xs_perm_type perm)
525 {
526 struct node *node;
528 if (!name || !is_valid_nodename(name)) {
529 errno = EINVAL;
530 return NULL;
531 }
532 node = read_node(conn, name);
533 /* If we don't have permission, we don't have node. */
534 if (node) {
535 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
536 != perm) {
537 errno = EACCES;
538 node = NULL;
539 }
540 }
541 /* Clean up errno if they weren't supposed to know. */
542 if (!node)
543 errno = errno_from_parents(conn, name, errno, perm);
544 return node;
545 }
547 static struct buffered_data *new_buffer(void *ctx)
548 {
549 struct buffered_data *data;
551 data = talloc_zero(ctx, struct buffered_data);
552 if (data == NULL)
553 return NULL;
555 data->inhdr = true;
556 return data;
557 }
559 /* Return length of string (including nul) at this offset.
560 * If there is no nul, returns 0 for failure.
561 */
562 static unsigned int get_string(const struct buffered_data *data,
563 unsigned int offset)
564 {
565 const char *nul;
567 if (offset >= data->used)
568 return 0;
570 nul = memchr(data->buffer + offset, 0, data->used - offset);
571 if (!nul)
572 return 0;
574 return nul - (data->buffer + offset) + 1;
575 }
577 /* Break input into vectors, return the number, fill in up to num of them.
578 * Always returns the actual number of nuls in the input. Stores the
579 * positions of the starts of the nul-terminated strings in vec.
580 * Callers who use this and then rely only on vec[] will
581 * ignore any data after the final nul.
582 */
583 unsigned int get_strings(struct buffered_data *data,
584 char *vec[], unsigned int num)
585 {
586 unsigned int off, i, len;
588 off = i = 0;
589 while ((len = get_string(data, off)) != 0) {
590 if (i < num)
591 vec[i] = data->buffer + off;
592 i++;
593 off += len;
594 }
595 return i;
596 }
598 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
599 const void *data, unsigned int len)
600 {
601 struct buffered_data *bdata;
603 /* Message is a child of the connection context for auto-cleanup. */
604 bdata = new_buffer(conn);
605 bdata->buffer = talloc_array(bdata, char, len);
607 /* Echo request header in reply unless this is an async watch event. */
608 if (type != XS_WATCH_EVENT) {
609 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
610 sizeof(struct xsd_sockmsg));
611 } else {
612 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
613 }
615 /* Update relevant header fields and fill in the message body. */
616 bdata->hdr.msg.type = type;
617 bdata->hdr.msg.len = len;
618 memcpy(bdata->buffer, data, len);
620 /* Queue for later transmission. */
621 list_add_tail(&bdata->list, &conn->out_list);
622 }
624 /* Some routines (write, mkdir, etc) just need a non-error return */
625 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
626 {
627 send_reply(conn, type, "OK", sizeof("OK"));
628 }
630 void send_error(struct connection *conn, int error)
631 {
632 unsigned int i;
634 for (i = 0; error != xsd_errors[i].errnum; i++) {
635 if (i == ARRAY_SIZE(xsd_errors) - 1) {
636 eprintf("xenstored: error %i untranslatable", error);
637 i = 0; /* EINVAL */
638 break;
639 }
640 }
641 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
642 strlen(xsd_errors[i].errstring) + 1);
643 }
645 static bool valid_chars(const char *node)
646 {
647 /* Nodes can have lots of crap. */
648 return (strspn(node,
649 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
650 "abcdefghijklmnopqrstuvwxyz"
651 "0123456789-/_@") == strlen(node));
652 }
654 bool is_valid_nodename(const char *node)
655 {
656 /* Must start in /. */
657 if (!strstarts(node, "/"))
658 return false;
660 /* Cannot end in / (unless it's just "/"). */
661 if (strends(node, "/") && !streq(node, "/"))
662 return false;
664 /* No double //. */
665 if (strstr(node, "//"))
666 return false;
668 if (strlen(node) > XENSTORE_ABS_PATH_MAX)
669 return false;
671 return valid_chars(node);
672 }
674 /* We expect one arg in the input: return NULL otherwise.
675 * The payload must contain exactly one nul, at the end.
676 */
677 static const char *onearg(struct buffered_data *in)
678 {
679 if (!in->used || get_string(in, 0) != in->used)
680 return NULL;
681 return in->buffer;
682 }
684 static char *perms_to_strings(const void *ctx,
685 struct xs_permissions *perms, unsigned int num,
686 unsigned int *len)
687 {
688 unsigned int i;
689 char *strings = NULL;
690 char buffer[MAX_STRLEN(unsigned int) + 1];
692 for (*len = 0, i = 0; i < num; i++) {
693 if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer)))
694 return NULL;
696 strings = talloc_realloc(ctx, strings, char,
697 *len + strlen(buffer) + 1);
698 strcpy(strings + *len, buffer);
699 *len += strlen(buffer) + 1;
700 }
701 return strings;
702 }
704 char *canonicalize(struct connection *conn, const char *node)
705 {
706 const char *prefix;
708 if (!node || (node[0] == '/') || (node[0] == '@'))
709 return (char *)node;
710 prefix = get_implicit_path(conn);
711 if (prefix)
712 return talloc_asprintf(node, "%s/%s", prefix, node);
713 return (char *)node;
714 }
716 bool check_event_node(const char *node)
717 {
718 if (!node || !strstarts(node, "@")) {
719 errno = EINVAL;
720 return false;
721 }
722 return true;
723 }
725 static void send_directory(struct connection *conn, const char *name)
726 {
727 struct node *node;
729 name = canonicalize(conn, name);
730 node = get_node(conn, name, XS_PERM_READ);
731 if (!node) {
732 send_error(conn, errno);
733 return;
734 }
736 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
737 }
739 static void do_read(struct connection *conn, const char *name)
740 {
741 struct node *node;
743 name = canonicalize(conn, name);
744 node = get_node(conn, name, XS_PERM_READ);
745 if (!node) {
746 send_error(conn, errno);
747 return;
748 }
750 send_reply(conn, XS_READ, node->data, node->datalen);
751 }
753 static void delete_node_single(struct connection *conn, struct node *node)
754 {
755 TDB_DATA key;
757 key.dptr = (void *)node->name;
758 key.dsize = strlen(node->name);
760 if (tdb_delete(tdb_context(conn), key) != 0) {
761 corrupt(conn, "Could not delete '%s'", node->name);
762 return;
763 }
764 domain_entry_dec(conn, node);
765 }
767 /* Must not be / */
768 static char *basename(const char *name)
769 {
770 return strrchr(name, '/') + 1;
771 }
773 static struct node *construct_node(struct connection *conn, const char *name)
774 {
775 const char *base;
776 unsigned int baselen;
777 struct node *parent, *node;
778 char *children, *parentname = get_parent(name);
780 /* If parent doesn't exist, create it. */
781 parent = read_node(conn, parentname);
782 if (!parent)
783 parent = construct_node(conn, parentname);
784 if (!parent)
785 return NULL;
787 if (domain_entry(conn) >= quota_nb_entry_per_domain)
788 return NULL;
790 /* Add child to parent. */
791 base = basename(name);
792 baselen = strlen(base) + 1;
793 children = talloc_array(name, char, parent->childlen + baselen);
794 memcpy(children, parent->children, parent->childlen);
795 memcpy(children + parent->childlen, base, baselen);
796 parent->children = children;
797 parent->childlen += baselen;
799 /* Allocate node */
800 node = talloc(name, struct node);
801 node->tdb = tdb_context(conn);
802 node->name = talloc_strdup(node, name);
804 /* Inherit permissions, except domains own what they create */
805 node->num_perms = parent->num_perms;
806 node->perms = talloc_memdup(node, parent->perms,
807 node->num_perms * sizeof(node->perms[0]));
808 if (conn && conn->id)
809 node->perms[0].id = conn->id;
811 /* No children, no data */
812 node->children = node->data = NULL;
813 node->childlen = node->datalen = 0;
814 node->parent = parent;
815 domain_entry_inc(conn, node);
816 return node;
817 }
819 static int destroy_node(void *_node)
820 {
821 struct node *node = _node;
822 TDB_DATA key;
824 if (streq(node->name, "/"))
825 corrupt(NULL, "Destroying root node!");
827 key.dptr = (void *)node->name;
828 key.dsize = strlen(node->name);
830 tdb_delete(node->tdb, key);
831 return 0;
832 }
834 static struct node *create_node(struct connection *conn,
835 const char *name,
836 void *data, unsigned int datalen)
837 {
838 struct node *node, *i;
840 node = construct_node(conn, name);
841 if (!node)
842 return NULL;
844 node->data = data;
845 node->datalen = datalen;
847 /* We write out the nodes down, setting destructor in case
848 * something goes wrong. */
849 for (i = node; i; i = i->parent) {
850 if (!write_node(conn, i)) {
851 domain_entry_dec(conn, i);
852 return NULL;
853 }
854 talloc_set_destructor(i, destroy_node);
855 }
857 /* OK, now remove destructors so they stay around */
858 for (i = node; i; i = i->parent)
859 talloc_set_destructor(i, NULL);
860 return node;
861 }
863 /* path, data... */
864 static void do_write(struct connection *conn, struct buffered_data *in)
865 {
866 unsigned int offset, datalen;
867 struct node *node;
868 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
869 char *name;
871 /* Extra "strings" can be created by binary data. */
872 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
873 send_error(conn, EINVAL);
874 return;
875 }
877 offset = strlen(vec[0]) + 1;
878 datalen = in->used - offset;
880 name = canonicalize(conn, vec[0]);
881 node = get_node(conn, name, XS_PERM_WRITE);
882 if (!node) {
883 /* No permissions, invalid input? */
884 if (errno != ENOENT) {
885 send_error(conn, errno);
886 return;
887 }
888 node = create_node(conn, name, in->buffer + offset, datalen);
889 if (!node) {
890 send_error(conn, errno);
891 return;
892 }
893 } else {
894 node->data = in->buffer + offset;
895 node->datalen = datalen;
896 if (!write_node(conn, node)){
897 send_error(conn, errno);
898 return;
899 }
900 }
902 add_change_node(conn->transaction, name, false);
903 fire_watches(conn, name, false);
904 send_ack(conn, XS_WRITE);
905 }
907 static void do_mkdir(struct connection *conn, const char *name)
908 {
909 struct node *node;
911 name = canonicalize(conn, name);
912 node = get_node(conn, name, XS_PERM_WRITE);
914 /* If it already exists, fine. */
915 if (!node) {
916 /* No permissions? */
917 if (errno != ENOENT) {
918 send_error(conn, errno);
919 return;
920 }
921 node = create_node(conn, name, NULL, 0);
922 if (!node) {
923 send_error(conn, errno);
924 return;
925 }
926 add_change_node(conn->transaction, name, false);
927 fire_watches(conn, name, false);
928 }
929 send_ack(conn, XS_MKDIR);
930 }
932 static void delete_node(struct connection *conn, struct node *node)
933 {
934 unsigned int i;
936 /* Delete self, then delete children. If we crash, then the worst
937 that can happen is the children will continue to take up space, but
938 will otherwise be unreachable. */
939 delete_node_single(conn, node);
941 /* Delete children, too. */
942 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
943 struct node *child;
945 child = read_node(conn,
946 talloc_asprintf(node, "%s/%s", node->name,
947 node->children + i));
948 if (child) {
949 delete_node(conn, child);
950 }
951 else {
952 trace("delete_node: No child '%s/%s' found!\n",
953 node->name, node->children + i);
954 /* Skip it, we've already deleted the parent. */
955 }
956 }
957 }
960 /* Delete memory using memmove. */
961 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
962 {
963 memmove(mem + off, mem + off + len, total - off - len);
964 }
967 static bool remove_child_entry(struct connection *conn, struct node *node,
968 size_t offset)
969 {
970 size_t childlen = strlen(node->children + offset);
971 memdel(node->children, offset, childlen + 1, node->childlen);
972 node->childlen -= childlen + 1;
973 return write_node(conn, node);
974 }
977 static bool delete_child(struct connection *conn,
978 struct node *node, const char *childname)
979 {
980 unsigned int i;
982 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
983 if (streq(node->children+i, childname)) {
984 return remove_child_entry(conn, node, i);
985 }
986 }
987 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
988 return false;
989 }
992 static int _rm(struct connection *conn, struct node *node, const char *name)
993 {
994 /* Delete from parent first, then if we crash, the worst that can
995 happen is the child will continue to take up space, but will
996 otherwise be unreachable. */
997 struct node *parent = read_node(conn, get_parent(name));
998 if (!parent) {
999 send_error(conn, EINVAL);
1000 return 0;
1003 if (!delete_child(conn, parent, basename(name))) {
1004 send_error(conn, EINVAL);
1005 return 0;
1008 delete_node(conn, node);
1009 return 1;
1013 static void internal_rm(const char *name)
1015 char *tname = talloc_strdup(NULL, name);
1016 struct node *node = read_node(NULL, tname);
1017 if (node)
1018 _rm(NULL, node, tname);
1019 talloc_free(node);
1020 talloc_free(tname);
1024 static void do_rm(struct connection *conn, const char *name)
1026 struct node *node;
1028 name = canonicalize(conn, name);
1029 node = get_node(conn, name, XS_PERM_WRITE);
1030 if (!node) {
1031 /* Didn't exist already? Fine, if parent exists. */
1032 if (errno == ENOENT) {
1033 node = read_node(conn, get_parent(name));
1034 if (node) {
1035 send_ack(conn, XS_RM);
1036 return;
1038 /* Restore errno, just in case. */
1039 errno = ENOENT;
1041 send_error(conn, errno);
1042 return;
1045 if (streq(name, "/")) {
1046 send_error(conn, EINVAL);
1047 return;
1050 if (_rm(conn, node, name)) {
1051 add_change_node(conn->transaction, name, true);
1052 fire_watches(conn, name, true);
1053 send_ack(conn, XS_RM);
1058 static void do_get_perms(struct connection *conn, const char *name)
1060 struct node *node;
1061 char *strings;
1062 unsigned int len;
1064 name = canonicalize(conn, name);
1065 node = get_node(conn, name, XS_PERM_READ);
1066 if (!node) {
1067 send_error(conn, errno);
1068 return;
1071 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1072 if (!strings)
1073 send_error(conn, errno);
1074 else
1075 send_reply(conn, XS_GET_PERMS, strings, len);
1078 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1080 unsigned int num;
1081 struct xs_permissions *perms;
1082 char *name, *permstr;
1083 struct node *node;
1085 num = xs_count_strings(in->buffer, in->used);
1086 if (num < 2) {
1087 send_error(conn, EINVAL);
1088 return;
1091 /* First arg is node name. */
1092 name = canonicalize(conn, in->buffer);
1093 permstr = in->buffer + strlen(in->buffer) + 1;
1094 num--;
1096 /* We must own node to do this (tools can do this too). */
1097 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1098 if (!node) {
1099 send_error(conn, errno);
1100 return;
1103 perms = talloc_array(node, struct xs_permissions, num);
1104 if (!xs_strings_to_perms(perms, num, permstr)) {
1105 send_error(conn, errno);
1106 return;
1109 /* Unprivileged domains may not change the owner. */
1110 if (domain_is_unprivileged(conn) &&
1111 perms[0].id != node->perms[0].id) {
1112 send_error(conn, EPERM);
1113 return;
1116 domain_entry_dec(conn, node);
1117 node->perms = perms;
1118 node->num_perms = num;
1119 domain_entry_inc(conn, node);
1121 if (!write_node(conn, node)) {
1122 send_error(conn, errno);
1123 return;
1126 add_change_node(conn->transaction, name, false);
1127 fire_watches(conn, name, false);
1128 send_ack(conn, XS_SET_PERMS);
1131 static void do_debug(struct connection *conn, struct buffered_data *in)
1133 int num;
1135 if (conn->id != 0) {
1136 send_error(conn, EACCES);
1137 return;
1140 num = xs_count_strings(in->buffer, in->used);
1142 if (streq(in->buffer, "print")) {
1143 if (num < 2) {
1144 send_error(conn, EINVAL);
1145 return;
1147 xprintf("debug: %s", in->buffer + get_string(in, 0));
1150 if (streq(in->buffer, "check"))
1151 check_store();
1153 send_ack(conn, XS_DEBUG);
1156 /* Process "in" for conn: "in" will vanish after this conversation, so
1157 * we can talloc off it for temporary variables. May free "conn".
1158 */
1159 static void process_message(struct connection *conn, struct buffered_data *in)
1161 struct transaction *trans;
1163 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1164 if (IS_ERR(trans)) {
1165 send_error(conn, -PTR_ERR(trans));
1166 return;
1169 assert(conn->transaction == NULL);
1170 conn->transaction = trans;
1172 switch (in->hdr.msg.type) {
1173 case XS_DIRECTORY:
1174 send_directory(conn, onearg(in));
1175 break;
1177 case XS_READ:
1178 do_read(conn, onearg(in));
1179 break;
1181 case XS_WRITE:
1182 do_write(conn, in);
1183 break;
1185 case XS_MKDIR:
1186 do_mkdir(conn, onearg(in));
1187 break;
1189 case XS_RM:
1190 do_rm(conn, onearg(in));
1191 break;
1193 case XS_GET_PERMS:
1194 do_get_perms(conn, onearg(in));
1195 break;
1197 case XS_SET_PERMS:
1198 do_set_perms(conn, in);
1199 break;
1201 case XS_DEBUG:
1202 do_debug(conn, in);
1203 break;
1205 case XS_WATCH:
1206 do_watch(conn, in);
1207 break;
1209 case XS_UNWATCH:
1210 do_unwatch(conn, in);
1211 break;
1213 case XS_TRANSACTION_START:
1214 do_transaction_start(conn, in);
1215 break;
1217 case XS_TRANSACTION_END:
1218 do_transaction_end(conn, onearg(in));
1219 break;
1221 case XS_INTRODUCE:
1222 do_introduce(conn, in);
1223 break;
1225 case XS_IS_DOMAIN_INTRODUCED:
1226 do_is_domain_introduced(conn, onearg(in));
1227 break;
1229 case XS_RELEASE:
1230 do_release(conn, onearg(in));
1231 break;
1233 case XS_GET_DOMAIN_PATH:
1234 do_get_domain_path(conn, onearg(in));
1235 break;
1237 case XS_RESUME:
1238 do_resume(conn, onearg(in));
1239 break;
1241 case XS_SET_TARGET:
1242 do_set_target(conn, in);
1243 break;
1245 default:
1246 eprintf("Client unknown operation %i", in->hdr.msg.type);
1247 send_error(conn, ENOSYS);
1248 break;
1251 conn->transaction = NULL;
1254 static void consider_message(struct connection *conn)
1256 if (verbose)
1257 xprintf("Got message %s len %i from %p\n",
1258 sockmsg_string(conn->in->hdr.msg.type),
1259 conn->in->hdr.msg.len, conn);
1261 process_message(conn, conn->in);
1263 talloc_free(conn->in);
1264 conn->in = new_buffer(conn);
1267 /* Errors in reading or allocating here mean we get out of sync, so we
1268 * drop the whole client connection. */
1269 static void handle_input(struct connection *conn)
1271 int bytes;
1272 struct buffered_data *in = conn->in;
1274 /* Not finished header yet? */
1275 if (in->inhdr) {
1276 bytes = conn->read(conn, in->hdr.raw + in->used,
1277 sizeof(in->hdr) - in->used);
1278 if (bytes < 0)
1279 goto bad_client;
1280 in->used += bytes;
1281 if (in->used != sizeof(in->hdr))
1282 return;
1284 if (in->hdr.msg.len > XENSTORE_PAYLOAD_MAX) {
1285 syslog(LOG_ERR, "Client tried to feed us %i",
1286 in->hdr.msg.len);
1287 goto bad_client;
1290 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1291 if (!in->buffer)
1292 goto bad_client;
1293 in->used = 0;
1294 in->inhdr = false;
1295 return;
1298 bytes = conn->read(conn, in->buffer + in->used,
1299 in->hdr.msg.len - in->used);
1300 if (bytes < 0)
1301 goto bad_client;
1303 in->used += bytes;
1304 if (in->used != in->hdr.msg.len)
1305 return;
1307 trace_io(conn, in, 0);
1308 consider_message(conn);
1309 return;
1311 bad_client:
1312 /* Kill it. */
1313 talloc_free(conn);
1316 static void handle_output(struct connection *conn)
1318 if (!write_messages(conn))
1319 talloc_free(conn);
1322 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1324 struct connection *new;
1326 new = talloc_zero(talloc_autofree_context(), struct connection);
1327 if (!new)
1328 return NULL;
1330 new->fd = -1;
1331 new->write = write;
1332 new->read = read;
1333 new->can_write = true;
1334 new->transaction_started = 0;
1335 INIT_LIST_HEAD(&new->out_list);
1336 INIT_LIST_HEAD(&new->watches);
1337 INIT_LIST_HEAD(&new->transaction_list);
1339 new->in = new_buffer(new);
1340 if (new->in == NULL) {
1341 talloc_free(new);
1342 return NULL;
1345 list_add_tail(&new->list, &connections);
1346 talloc_set_destructor(new, destroy_conn);
1347 trace_create(new, "connection");
1348 return new;
1351 static int writefd(struct connection *conn, const void *data, unsigned int len)
1353 int rc;
1355 while ((rc = write(conn->fd, data, len)) < 0) {
1356 if (errno == EAGAIN) {
1357 rc = 0;
1358 break;
1360 if (errno != EINTR)
1361 break;
1364 return rc;
1367 static int readfd(struct connection *conn, void *data, unsigned int len)
1369 int rc;
1371 while ((rc = read(conn->fd, data, len)) < 0) {
1372 if (errno == EAGAIN) {
1373 rc = 0;
1374 break;
1376 if (errno != EINTR)
1377 break;
1380 /* Reading zero length means we're done with this connection. */
1381 if ((rc == 0) && (len != 0)) {
1382 errno = EBADF;
1383 rc = -1;
1386 return rc;
1389 static void accept_connection(int sock, bool canwrite)
1391 int fd;
1392 struct connection *conn;
1394 fd = accept(sock, NULL, NULL);
1395 if (fd < 0)
1396 return;
1398 conn = new_connection(writefd, readfd);
1399 if (conn) {
1400 conn->fd = fd;
1401 conn->can_write = canwrite;
1402 } else
1403 close(fd);
1406 #define TDB_FLAGS 0
1408 /* We create initial nodes manually. */
1409 static void manual_node(const char *name, const char *child)
1411 struct node *node;
1412 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1414 node = talloc_zero(NULL, struct node);
1415 node->name = name;
1416 node->perms = &perms;
1417 node->num_perms = 1;
1418 node->children = (char *)child;
1419 if (child)
1420 node->childlen = strlen(child) + 1;
1422 if (!write_node(NULL, node))
1423 barf_perror("Could not create initial node %s", name);
1424 talloc_free(node);
1427 static void setup_structure(void)
1429 char *tdbname;
1430 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1431 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1433 if (tdb_ctx) {
1434 /* XXX When we make xenstored able to restart, this will have
1435 to become cleverer, checking for existing domains and not
1436 removing the corresponding entries, but for now xenstored
1437 cannot be restarted without losing all the registered
1438 watches, which breaks all the backend drivers anyway. We
1439 can therefore get away with just clearing /local and
1440 expecting Xend to put the appropriate entries back in.
1442 When this change is made it is important to note that
1443 dom0's entries must be cleaned up on reboot _before_ this
1444 daemon starts, otherwise the backend drivers and dom0's
1445 balloon driver will pick up stale entries. In the case of
1446 the balloon driver, this can be fatal.
1447 */
1448 char *tlocal = talloc_strdup(NULL, "/local");
1450 check_store();
1452 if (remove_local) {
1453 internal_rm("/local");
1454 create_node(NULL, tlocal, NULL, 0);
1456 check_store();
1459 talloc_free(tlocal);
1461 else {
1462 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1463 0640);
1464 if (!tdb_ctx)
1465 barf_perror("Could not create tdb file %s", tdbname);
1467 manual_node("/", "tool");
1468 manual_node("/tool", "xenstored");
1469 manual_node("/tool/xenstored", NULL);
1471 check_store();
1476 static unsigned int hash_from_key_fn(void *k)
1478 char *str = k;
1479 unsigned int hash = 5381;
1480 char c;
1482 while ((c = *str++))
1483 hash = ((hash << 5) + hash) + (unsigned int)c;
1485 return hash;
1489 static int keys_equal_fn(void *key1, void *key2)
1491 return 0 == strcmp((char *)key1, (char *)key2);
1495 static char *child_name(const char *s1, const char *s2)
1497 if (strcmp(s1, "/")) {
1498 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1500 else {
1501 return talloc_asprintf(NULL, "/%s", s2);
1506 static void remember_string(struct hashtable *hash, const char *str)
1508 char *k = malloc(strlen(str) + 1);
1509 strcpy(k, str);
1510 hashtable_insert(hash, k, (void *)1);
1514 /**
1515 * A node has a children field that names the children of the node, separated
1516 * by NULs. We check whether there are entries in there that are duplicated
1517 * (and if so, delete the second one), and whether there are any that do not
1518 * have a corresponding child node (and if so, delete them). Each valid child
1519 * is then recursively checked.
1521 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1522 * passed on the command line).
1524 * As we go, we record each node in the given reachable hashtable. These
1525 * entries will be used later in clean_store.
1526 */
1527 static void check_store_(const char *name, struct hashtable *reachable)
1529 struct node *node = read_node(NULL, name);
1531 if (node) {
1532 size_t i = 0;
1534 struct hashtable * children =
1535 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1537 remember_string(reachable, name);
1539 while (i < node->childlen) {
1540 size_t childlen = strlen(node->children + i);
1541 char * childname = child_name(node->name,
1542 node->children + i);
1543 struct node *childnode = read_node(NULL, childname);
1545 if (childnode) {
1546 if (hashtable_search(children, childname)) {
1547 log("check_store: '%s' is duplicated!",
1548 childname);
1550 if (recovery) {
1551 remove_child_entry(NULL, node,
1552 i);
1553 i -= childlen + 1;
1556 else {
1557 remember_string(children, childname);
1558 check_store_(childname, reachable);
1561 else {
1562 log("check_store: No child '%s' found!\n",
1563 childname);
1565 if (recovery) {
1566 remove_child_entry(NULL, node, i);
1567 i -= childlen + 1;
1571 talloc_free(childnode);
1572 talloc_free(childname);
1573 i += childlen + 1;
1576 hashtable_destroy(children, 0 /* Don't free values (they are
1577 all (void *)1) */);
1578 talloc_free(node);
1580 else {
1581 /* Impossible, because no database should ever be without the
1582 root, and otherwise, we've just checked in our caller
1583 (which made a recursive call to get here). */
1585 log("check_store: No child '%s' found: impossible!", name);
1590 /**
1591 * Helper to clean_store below.
1592 */
1593 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1594 void *private)
1596 struct hashtable *reachable = private;
1597 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1599 if (!hashtable_search(reachable, name)) {
1600 log("clean_store: '%s' is orphaned!", name);
1601 if (recovery) {
1602 tdb_delete(tdb, key);
1606 talloc_free(name);
1608 return 0;
1612 /**
1613 * Given the list of reachable nodes, iterate over the whole store, and
1614 * remove any that were not reached.
1615 */
1616 static void clean_store(struct hashtable *reachable)
1618 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1622 static void check_store(void)
1624 char * root = talloc_strdup(NULL, "/");
1625 struct hashtable * reachable =
1626 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1628 log("Checking store ...");
1629 check_store_(root, reachable);
1630 clean_store(reachable);
1631 log("Checking store complete.");
1633 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1634 (void *)1) */);
1635 talloc_free(root);
1639 /* Something is horribly wrong: check the store. */
1640 static void corrupt(struct connection *conn, const char *fmt, ...)
1642 va_list arglist;
1643 char *str;
1644 int saved_errno = errno;
1646 va_start(arglist, fmt);
1647 str = talloc_vasprintf(NULL, fmt, arglist);
1648 va_end(arglist);
1650 log("corruption detected by connection %i: err %s: %s",
1651 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1653 check_store();
1657 static void write_pidfile(const char *pidfile)
1659 char buf[100];
1660 int len;
1661 int fd;
1663 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1664 if (fd == -1)
1665 barf_perror("Opening pid file %s", pidfile);
1667 /* We exit silently if daemon already running. */
1668 if (lockf(fd, F_TLOCK, 0) == -1)
1669 exit(0);
1671 len = snprintf(buf, sizeof(buf), "%ld\n", (long)getpid());
1672 if (write(fd, buf, len) != len)
1673 barf_perror("Writing pid file %s", pidfile);
1676 /* Stevens. */
1677 static void daemonize(void)
1679 pid_t pid;
1681 /* Separate from our parent via fork, so init inherits us. */
1682 if ((pid = fork()) < 0)
1683 barf_perror("Failed to fork daemon");
1684 if (pid != 0)
1685 exit(0);
1687 /* Session leader so ^C doesn't whack us. */
1688 setsid();
1690 /* Let session leader exit so child cannot regain CTTY */
1691 if ((pid = fork()) < 0)
1692 barf_perror("Failed to fork daemon");
1693 if (pid != 0)
1694 exit(0);
1696 /* Move off any mount points we might be in. */
1697 if (chdir("/") == -1)
1698 barf_perror("Failed to chdir");
1700 /* Discard our parent's old-fashioned umask prejudices. */
1701 umask(0);
1705 static void usage(void)
1707 fprintf(stderr,
1708 "Usage:\n"
1709 "\n"
1710 " xenstored <options>\n"
1711 "\n"
1712 "where options may include:\n"
1713 "\n"
1714 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1715 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1716 " --help to output this message,\n"
1717 " --no-fork to request that the daemon does not fork,\n"
1718 " --output-pid to request that the pid of the daemon is output,\n"
1719 " --trace-file <file> giving the file for logging, and\n"
1720 " --entry-nb <nb> limit the number of entries per domain,\n"
1721 " --entry-size <size> limit the size of entry per domain, and\n"
1722 " --entry-watch <nb> limit the number of watches per domain,\n"
1723 " --transaction <nb> limit the number of transaction allowed per domain,\n"
1724 " --no-recovery to request that no recovery should be attempted when\n"
1725 " the store is corrupted (debug only),\n"
1726 " --preserve-local to request that /local is preserved on start-up,\n"
1727 " --verbose to request verbose execution.\n");
1731 static struct option options[] = {
1732 { "no-domain-init", 0, NULL, 'D' },
1733 { "entry-nb", 1, NULL, 'E' },
1734 { "pid-file", 1, NULL, 'F' },
1735 { "help", 0, NULL, 'H' },
1736 { "no-fork", 0, NULL, 'N' },
1737 { "output-pid", 0, NULL, 'P' },
1738 { "entry-size", 1, NULL, 'S' },
1739 { "trace-file", 1, NULL, 'T' },
1740 { "transaction", 1, NULL, 't' },
1741 { "no-recovery", 0, NULL, 'R' },
1742 { "preserve-local", 0, NULL, 'L' },
1743 { "verbose", 0, NULL, 'V' },
1744 { "watch-nb", 1, NULL, 'W' },
1745 { NULL, 0, NULL, 0 } };
1747 extern void dump_conn(struct connection *conn);
1749 int main(int argc, char *argv[])
1751 int opt, *sock, *ro_sock, max;
1752 struct sockaddr_un addr;
1753 fd_set inset, outset;
1754 bool dofork = true;
1755 bool outputpid = false;
1756 bool no_domain_init = false;
1757 const char *pidfile = NULL;
1758 int evtchn_fd = -1;
1759 struct timeval *timeout;
1761 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
1762 NULL)) != -1) {
1763 switch (opt) {
1764 case 'D':
1765 no_domain_init = true;
1766 break;
1767 case 'E':
1768 quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1769 break;
1770 case 'F':
1771 pidfile = optarg;
1772 break;
1773 case 'H':
1774 usage();
1775 return 0;
1776 case 'N':
1777 dofork = false;
1778 break;
1779 case 'P':
1780 outputpid = true;
1781 break;
1782 case 'R':
1783 recovery = false;
1784 break;
1785 case 'L':
1786 remove_local = false;
1787 break;
1788 case 'S':
1789 quota_max_entry_size = strtol(optarg, NULL, 10);
1790 break;
1791 case 't':
1792 quota_max_transaction = strtol(optarg, NULL, 10);
1793 break;
1794 case 'T':
1795 tracefile = optarg;
1796 break;
1797 case 'V':
1798 verbose = true;
1799 break;
1800 case 'W':
1801 quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1802 break;
1805 if (optind != argc)
1806 barf("%s: No arguments desired", argv[0]);
1808 reopen_log();
1810 /* make sure xenstored directory exists */
1811 if (mkdir(xs_daemon_rundir(), 0755)) {
1812 if (errno != EEXIST) {
1813 perror("error: mkdir daemon rundir");
1814 exit(-1);
1818 if (mkdir(xs_daemon_rootdir(), 0755)) {
1819 if (errno != EEXIST) {
1820 perror("error: mkdir daemon rootdir");
1821 exit(-1);
1825 if (dofork) {
1826 openlog("xenstored", 0, LOG_DAEMON);
1827 daemonize();
1829 if (pidfile)
1830 write_pidfile(pidfile);
1832 /* Talloc leak reports go to stderr, which is closed if we fork. */
1833 if (!dofork)
1834 talloc_enable_leak_report_full();
1836 /* Create sockets for them to listen to. */
1837 sock = talloc(talloc_autofree_context(), int);
1838 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1839 if (*sock < 0)
1840 barf_perror("Could not create socket");
1841 ro_sock = talloc(talloc_autofree_context(), int);
1842 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1843 if (*ro_sock < 0)
1844 barf_perror("Could not create socket");
1845 talloc_set_destructor(sock, destroy_fd);
1846 talloc_set_destructor(ro_sock, destroy_fd);
1848 /* Don't kill us with SIGPIPE. */
1849 signal(SIGPIPE, SIG_IGN);
1851 /* FIXME: Be more sophisticated, don't mug running daemon. */
1852 unlink(xs_daemon_socket());
1853 unlink(xs_daemon_socket_ro());
1855 addr.sun_family = AF_UNIX;
1856 strcpy(addr.sun_path, xs_daemon_socket());
1857 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1858 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1859 strcpy(addr.sun_path, xs_daemon_socket_ro());
1860 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1861 barf_perror("Could not bind socket to %s",
1862 xs_daemon_socket_ro());
1863 if (chmod(xs_daemon_socket(), 0600) != 0
1864 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1865 barf_perror("Could not chmod sockets");
1867 if (listen(*sock, 1) != 0
1868 || listen(*ro_sock, 1) != 0)
1869 barf_perror("Could not listen on sockets");
1871 if (pipe(reopen_log_pipe)) {
1872 barf_perror("pipe");
1875 /* Setup the database */
1876 setup_structure();
1878 /* Listen to hypervisor. */
1879 if (!no_domain_init)
1880 domain_init();
1882 /* Restore existing connections. */
1883 restore_existing_connections();
1885 if (outputpid) {
1886 printf("%ld\n", (long)getpid());
1887 fflush(stdout);
1890 /* redirect to /dev/null now we're ready to accept connections */
1891 if (dofork) {
1892 int devnull = open("/dev/null", O_RDWR);
1893 if (devnull == -1)
1894 barf_perror("Could not open /dev/null\n");
1895 dup2(devnull, STDIN_FILENO);
1896 dup2(devnull, STDOUT_FILENO);
1897 dup2(devnull, STDERR_FILENO);
1898 close(devnull);
1899 xprintf = trace;
1902 signal(SIGHUP, trigger_reopen_log);
1904 if (xce_handle != -1)
1905 evtchn_fd = xc_evtchn_fd(xce_handle);
1907 /* Get ready to listen to the tools. */
1908 max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
1910 /* Tell the kernel we're up and running. */
1911 xenbus_notify_running();
1913 /* Main loop. */
1914 for (;;) {
1915 struct connection *conn, *next;
1917 if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
1918 if (errno == EINTR)
1919 continue;
1920 barf_perror("Select failed");
1923 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1924 char c;
1925 if (read(reopen_log_pipe[0], &c, 1) != 1)
1926 barf_perror("read failed");
1927 reopen_log();
1930 if (FD_ISSET(*sock, &inset))
1931 accept_connection(*sock, true);
1933 if (FD_ISSET(*ro_sock, &inset))
1934 accept_connection(*ro_sock, false);
1936 if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
1937 handle_event();
1939 next = list_entry(connections.next, typeof(*conn), list);
1940 if (&next->list != &connections)
1941 talloc_increase_ref_count(next);
1942 while (&next->list != &connections) {
1943 conn = next;
1945 next = list_entry(conn->list.next,
1946 typeof(*conn), list);
1947 if (&next->list != &connections)
1948 talloc_increase_ref_count(next);
1950 if (conn->domain) {
1951 if (domain_can_read(conn))
1952 handle_input(conn);
1953 if (talloc_free(conn) == 0)
1954 continue;
1956 talloc_increase_ref_count(conn);
1957 if (domain_can_write(conn) &&
1958 !list_empty(&conn->out_list))
1959 handle_output(conn);
1960 if (talloc_free(conn) == 0)
1961 continue;
1962 } else {
1963 if (FD_ISSET(conn->fd, &inset))
1964 handle_input(conn);
1965 if (talloc_free(conn) == 0)
1966 continue;
1968 talloc_increase_ref_count(conn);
1969 if (FD_ISSET(conn->fd, &outset))
1970 handle_output(conn);
1971 if (talloc_free(conn) == 0)
1972 continue;
1976 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1977 &timeout);
1981 /*
1982 * Local variables:
1983 * c-file-style: "linux"
1984 * indent-tabs-mode: t
1985 * c-indent-level: 8
1986 * c-basic-offset: 8
1987 * tab-width: 8
1988 * End:
1989 */