ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 7493:98c6c36ac444

Added a timestamp to the logging output by trace_io.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Mon Oct 24 14:12:42 2005 +0100 (2005-10-24)
parents a90d670c98b9
children 34b4068704c5
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored_core.h"
48 #include "xenstored_watch.h"
49 #include "xenstored_transaction.h"
50 #include "xenstored_domain.h"
51 #include "xenctrl.h"
52 #include "tdb.h"
54 extern int eventchn_fd; /* in xenstored_domain.c */
56 static bool verbose;
57 LIST_HEAD(connections);
58 static int tracefd = -1;
59 static TDB_CONTEXT *tdb_ctx;
61 #ifdef TESTING
62 static bool failtest = false;
64 /* We override talloc's malloc. */
65 void *test_malloc(size_t size)
66 {
67 /* 1 in 20 means only about 50% of connections establish. */
68 if (failtest && (random() % 32) == 0)
69 return NULL;
70 return malloc(size);
71 }
73 static void stop_failtest(int signum __attribute__((unused)))
74 {
75 failtest = false;
76 }
78 /* Need these before we #define away write_all/mkdir in testing.h */
79 bool test_write_all(int fd, void *contents, unsigned int len);
80 bool test_write_all(int fd, void *contents, unsigned int len)
81 {
82 if (failtest && (random() % 8) == 0) {
83 if (len)
84 len = random() % len;
85 write(fd, contents, len);
86 errno = ENOSPC;
87 return false;
88 }
89 return xs_write_all(fd, contents, len);
90 }
92 int test_mkdir(const char *dir, int perms);
93 int test_mkdir(const char *dir, int perms)
94 {
95 if (failtest && (random() % 8) == 0) {
96 errno = ENOSPC;
97 return -1;
98 }
99 return mkdir(dir, perms);
100 }
101 #endif /* TESTING */
103 #include "xenstored_test.h"
105 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
106 /* Something is horribly wrong: shutdown immediately. */
107 void __attribute__((noreturn)) corrupt(struct connection *conn,
108 const char *fmt, ...)
109 {
110 va_list arglist;
111 char *str;
112 int saved_errno = errno;
114 va_start(arglist, fmt);
115 str = talloc_vasprintf(NULL, fmt, arglist);
116 va_end(arglist);
118 trace("xenstored corruption: connection id %i: err %s: %s",
119 conn ? (int)conn->id : -1, strerror(saved_errno), str);
120 eprintf("xenstored corruption: connection id %i: err %s: %s",
121 conn ? (int)conn->id : -1, strerror(saved_errno), str);
122 #ifdef TESTING
123 /* Allow them to attach debugger. */
124 sleep(30);
125 #endif
126 syslog(LOG_DAEMON,
127 "xenstored corruption: connection id %i: err %s: %s",
128 conn ? (int)conn->id : -1, strerror(saved_errno), str);
129 _exit(2);
130 }
132 TDB_CONTEXT *tdb_context(struct connection *conn)
133 {
134 /* conn = NULL used in manual_node at setup. */
135 if (!conn || !conn->transaction)
136 return tdb_ctx;
137 return tdb_transaction_context(conn->transaction);
138 }
140 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
141 {
142 if (rename(newname, xs_daemon_tdb()) != 0)
143 return false;
144 tdb_close(tdb_ctx);
145 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
146 return true;
147 }
149 static char *sockmsg_string(enum xsd_sockmsg_type type)
150 {
151 switch (type) {
152 case XS_DEBUG: return "DEBUG";
153 case XS_DIRECTORY: return "DIRECTORY";
154 case XS_READ: return "READ";
155 case XS_GET_PERMS: return "GET_PERMS";
156 case XS_WATCH: return "WATCH";
157 case XS_UNWATCH: return "UNWATCH";
158 case XS_TRANSACTION_START: return "TRANSACTION_START";
159 case XS_TRANSACTION_END: return "TRANSACTION_END";
160 case XS_INTRODUCE: return "INTRODUCE";
161 case XS_RELEASE: return "RELEASE";
162 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
163 case XS_WRITE: return "WRITE";
164 case XS_MKDIR: return "MKDIR";
165 case XS_RM: return "RM";
166 case XS_SET_PERMS: return "SET_PERMS";
167 case XS_WATCH_EVENT: return "WATCH_EVENT";
168 case XS_ERROR: return "ERROR";
169 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
170 default:
171 return "**UNKNOWN**";
172 }
173 }
175 static void trace_io(const struct connection *conn,
176 const char *prefix,
177 const struct buffered_data *data)
178 {
179 char string[64];
180 unsigned int i;
181 time_t now;
182 struct tm *tm;
184 if (tracefd < 0)
185 return;
187 now = time(NULL);
188 tm = localtime(&now);
190 write(tracefd, prefix, strlen(prefix));
191 sprintf(string, " %p %0d:%0d:%0d ", conn, tm->tm_hour, tm->tm_min,
192 tm->tm_sec);
193 write(tracefd, string, strlen(string));
194 write(tracefd, sockmsg_string(data->hdr.msg.type),
195 strlen(sockmsg_string(data->hdr.msg.type)));
196 write(tracefd, " (", 2);
197 for (i = 0; i < data->hdr.msg.len; i++) {
198 if (data->buffer[i] == '\0')
199 write(tracefd, " ", 1);
200 else
201 write(tracefd, data->buffer + i, 1);
202 }
203 write(tracefd, ")\n", 2);
204 }
206 void trace_create(const void *data, const char *type)
207 {
208 char string[64];
209 if (tracefd < 0)
210 return;
212 write(tracefd, "CREATE ", strlen("CREATE "));
213 write(tracefd, type, strlen(type));
214 sprintf(string, " %p\n", data);
215 write(tracefd, string, strlen(string));
216 }
218 void trace_destroy(const void *data, const char *type)
219 {
220 char string[64];
221 if (tracefd < 0)
222 return;
224 write(tracefd, "DESTROY ", strlen("DESTROY "));
225 write(tracefd, type, strlen(type));
226 sprintf(string, " %p\n", data);
227 write(tracefd, string, strlen(string));
228 }
230 void trace(const char *fmt, ...)
231 {
232 va_list arglist;
233 char *str;
235 if (tracefd < 0)
236 return;
238 va_start(arglist, fmt);
239 str = talloc_vasprintf(NULL, fmt, arglist);
240 va_end(arglist);
241 write(tracefd, str, strlen(str));
242 talloc_free(str);
243 }
245 static bool write_messages(struct connection *conn)
246 {
247 int ret;
248 struct buffered_data *out;
250 out = list_top(&conn->out_list, struct buffered_data, list);
251 if (out == NULL)
252 return true;
254 if (out->inhdr) {
255 if (verbose)
256 xprintf("Writing msg %s (%.*s) out to %p\n",
257 sockmsg_string(out->hdr.msg.type),
258 out->hdr.msg.len,
259 out->buffer, conn);
260 ret = conn->write(conn, out->hdr.raw + out->used,
261 sizeof(out->hdr) - out->used);
262 if (ret < 0)
263 return false;
265 out->used += ret;
266 if (out->used < sizeof(out->hdr))
267 return true;
269 out->inhdr = false;
270 out->used = 0;
272 /* Second write might block if non-zero. */
273 if (out->hdr.msg.len && !conn->domain)
274 return true;
275 }
277 ret = conn->write(conn, out->buffer + out->used,
278 out->hdr.msg.len - out->used);
279 if (ret < 0)
280 return false;
282 out->used += ret;
283 if (out->used != out->hdr.msg.len)
284 return true;
286 trace_io(conn, "OUT", out);
288 list_del(&out->list);
289 talloc_free(out);
291 return true;
292 }
294 static int destroy_conn(void *_conn)
295 {
296 struct connection *conn = _conn;
298 /* Flush outgoing if possible, but don't block. */
299 if (!conn->domain) {
300 fd_set set;
301 struct timeval none;
303 FD_ZERO(&set);
304 FD_SET(conn->fd, &set);
305 none.tv_sec = none.tv_usec = 0;
307 while (!list_empty(&conn->out_list)
308 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
309 if (!write_messages(conn))
310 break;
311 close(conn->fd);
312 }
313 list_del(&conn->list);
314 trace_destroy(conn, "connection");
315 return 0;
316 }
318 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock)
319 {
320 struct connection *i;
321 int max;
323 FD_ZERO(inset);
324 FD_ZERO(outset);
325 FD_SET(sock, inset);
326 max = sock;
327 FD_SET(ro_sock, inset);
328 if (ro_sock > max)
329 max = ro_sock;
330 FD_SET(eventchn_fd, inset);
331 if (eventchn_fd > max)
332 max = eventchn_fd;
333 list_for_each_entry(i, &connections, list) {
334 if (i->domain)
335 continue;
336 FD_SET(i->fd, inset);
337 if (!list_empty(&i->out_list))
338 FD_SET(i->fd, outset);
339 if (i->fd > max)
340 max = i->fd;
341 }
342 return max;
343 }
345 static int destroy_fd(void *_fd)
346 {
347 int *fd = _fd;
348 close(*fd);
349 return 0;
350 }
352 /* Return a pointer to an fd, self-closing and attached to this pathname. */
353 int *talloc_open(const char *pathname, int flags, int mode)
354 {
355 int *fd;
357 fd = talloc(pathname, int);
358 *fd = open(pathname, flags, mode);
359 if (*fd < 0) {
360 int saved_errno = errno;
361 talloc_free(fd);
362 errno = saved_errno;
363 return NULL;
364 }
365 talloc_set_destructor(fd, destroy_fd);
366 return fd;
367 }
369 /* Is child a subnode of parent, or equal? */
370 bool is_child(const char *child, const char *parent)
371 {
372 unsigned int len = strlen(parent);
374 /* / should really be "" for this algorithm to work, but that's a
375 * usability nightmare. */
376 if (streq(parent, "/"))
377 return true;
379 if (strncmp(child, parent, len) != 0)
380 return false;
382 return child[len] == '/' || child[len] == '\0';
383 }
385 /* If it fails, returns NULL and sets errno. */
386 static struct node *read_node(struct connection *conn, const char *name)
387 {
388 TDB_DATA key, data;
389 uint32_t *p;
390 struct node *node;
392 key.dptr = (void *)name;
393 key.dsize = strlen(name);
394 data = tdb_fetch(tdb_context(conn), key);
396 if (data.dptr == NULL) {
397 if (tdb_error(tdb_context(conn)) == TDB_ERR_NOEXIST)
398 errno = ENOENT;
399 else
400 errno = EIO;
401 return NULL;
402 }
404 node = talloc(name, struct node);
405 node->name = talloc_strdup(node, name);
406 node->parent = NULL;
407 node->tdb = tdb_context(conn);
408 talloc_steal(node, data.dptr);
410 /* Datalen, childlen, number of permissions */
411 p = (uint32_t *)data.dptr;
412 node->num_perms = p[0];
413 node->datalen = p[1];
414 node->childlen = p[2];
416 /* Permissions are struct xs_permissions. */
417 node->perms = (void *)&p[3];
418 /* Data is binary blob (usually ascii, no nul). */
419 node->data = node->perms + node->num_perms;
420 /* Children is strings, nul separated. */
421 node->children = node->data + node->datalen;
423 return node;
424 }
426 static bool write_node(struct connection *conn, const struct node *node)
427 {
428 TDB_DATA key, data;
429 void *p;
431 key.dptr = (void *)node->name;
432 key.dsize = strlen(node->name);
434 data.dsize = 3*sizeof(uint32_t)
435 + node->num_perms*sizeof(node->perms[0])
436 + node->datalen + node->childlen;
437 data.dptr = talloc_size(node, data.dsize);
438 ((uint32_t *)data.dptr)[0] = node->num_perms;
439 ((uint32_t *)data.dptr)[1] = node->datalen;
440 ((uint32_t *)data.dptr)[2] = node->childlen;
441 p = data.dptr + 3 * sizeof(uint32_t);
443 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
444 p += node->num_perms*sizeof(node->perms[0]);
445 memcpy(p, node->data, node->datalen);
446 p += node->datalen;
447 memcpy(p, node->children, node->childlen);
449 /* TDB should set errno, but doesn't even set ecode AFAICT. */
450 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
451 errno = ENOSPC;
452 return false;
453 }
454 return true;
455 }
457 static enum xs_perm_type perm_for_conn(struct connection *conn,
458 struct xs_permissions *perms,
459 unsigned int num)
460 {
461 unsigned int i;
462 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
464 if (!conn->can_write)
465 mask &= ~XS_PERM_WRITE;
467 /* Owners and tools get it all... */
468 if (!conn->id || perms[0].id == conn->id)
469 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
471 for (i = 1; i < num; i++)
472 if (perms[i].id == conn->id)
473 return perms[i].perms & mask;
475 return perms[0].perms & mask;
476 }
478 static char *get_parent(const char *node)
479 {
480 char *slash = strrchr(node + 1, '/');
481 if (!slash)
482 return talloc_strdup(node, "/");
483 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
484 }
486 /* What do parents say? */
487 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
488 {
489 struct node *node;
491 do {
492 name = get_parent(name);
493 node = read_node(conn, name);
494 if (node)
495 break;
496 } while (!streq(name, "/"));
498 /* No permission at root? We're in trouble. */
499 if (!node)
500 corrupt(conn, "No permissions file at root");
502 return perm_for_conn(conn, node->perms, node->num_perms);
503 }
505 /* We have a weird permissions system. You can allow someone into a
506 * specific node without allowing it in the parents. If it's going to
507 * fail, however, we don't want the errno to indicate any information
508 * about the node. */
509 static int errno_from_parents(struct connection *conn, const char *node,
510 int errnum, enum xs_perm_type perm)
511 {
512 /* We always tell them about memory failures. */
513 if (errnum == ENOMEM)
514 return errnum;
516 if (ask_parents(conn, node) & perm)
517 return errnum;
518 return EACCES;
519 }
521 /* If it fails, returns NULL and sets errno. */
522 struct node *get_node(struct connection *conn,
523 const char *name,
524 enum xs_perm_type perm)
525 {
526 struct node *node;
528 if (!name || !is_valid_nodename(name)) {
529 errno = EINVAL;
530 return NULL;
531 }
532 node = read_node(conn, name);
533 /* If we don't have permission, we don't have node. */
534 if (node) {
535 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
536 != perm)
537 node = NULL;
538 }
539 /* Clean up errno if they weren't supposed to know. */
540 if (!node)
541 errno = errno_from_parents(conn, name, errno, perm);
542 return node;
543 }
545 static struct buffered_data *new_buffer(void *ctx)
546 {
547 struct buffered_data *data;
549 data = talloc(ctx, struct buffered_data);
550 if (data == NULL)
551 return NULL;
553 data->inhdr = true;
554 data->used = 0;
555 data->buffer = NULL;
557 return data;
558 }
560 /* Return length of string (including nul) at this offset. */
561 static unsigned int get_string(const struct buffered_data *data,
562 unsigned int offset)
563 {
564 const char *nul;
566 if (offset >= data->used)
567 return 0;
569 nul = memchr(data->buffer + offset, 0, data->used - offset);
570 if (!nul)
571 return 0;
573 return nul - (data->buffer + offset) + 1;
574 }
576 /* Break input into vectors, return the number, fill in up to num of them. */
577 unsigned int get_strings(struct buffered_data *data,
578 char *vec[], unsigned int num)
579 {
580 unsigned int off, i, len;
582 off = i = 0;
583 while ((len = get_string(data, off)) != 0) {
584 if (i < num)
585 vec[i] = data->buffer + off;
586 i++;
587 off += len;
588 }
589 return i;
590 }
592 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
593 const void *data, unsigned int len)
594 {
595 struct buffered_data *bdata;
597 /* Message is a child of the connection context for auto-cleanup. */
598 bdata = new_buffer(conn);
599 bdata->buffer = talloc_array(bdata, char, len);
601 /* Echo request header in reply unless this is an async watch event. */
602 if (type != XS_WATCH_EVENT) {
603 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
604 sizeof(struct xsd_sockmsg));
605 } else {
606 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
607 }
609 /* Update relevant header fields and fill in the message body. */
610 bdata->hdr.msg.type = type;
611 bdata->hdr.msg.len = len;
612 memcpy(bdata->buffer, data, len);
614 /* Queue for later transmission. */
615 list_add_tail(&bdata->list, &conn->out_list);
616 }
618 /* Some routines (write, mkdir, etc) just need a non-error return */
619 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
620 {
621 send_reply(conn, type, "OK", sizeof("OK"));
622 }
624 void send_error(struct connection *conn, int error)
625 {
626 unsigned int i;
628 for (i = 0; error != xsd_errors[i].errnum; i++) {
629 if (i == ARRAY_SIZE(xsd_errors) - 1) {
630 eprintf("xenstored: error %i untranslatable", error);
631 i = 0; /* EINVAL */
632 break;
633 }
634 }
635 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
636 strlen(xsd_errors[i].errstring) + 1);
637 }
639 static bool valid_chars(const char *node)
640 {
641 /* Nodes can have lots of crap. */
642 return (strspn(node,
643 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
644 "abcdefghijklmnopqrstuvwxyz"
645 "0123456789-/_@") == strlen(node));
646 }
648 bool is_valid_nodename(const char *node)
649 {
650 /* Must start in /. */
651 if (!strstarts(node, "/"))
652 return false;
654 /* Cannot end in / (unless it's just "/"). */
655 if (strends(node, "/") && !streq(node, "/"))
656 return false;
658 /* No double //. */
659 if (strstr(node, "//"))
660 return false;
662 return valid_chars(node);
663 }
665 /* We expect one arg in the input: return NULL otherwise. */
666 static const char *onearg(struct buffered_data *in)
667 {
668 if (!in->used || get_string(in, 0) != in->used)
669 return NULL;
670 return in->buffer;
671 }
673 static char *perms_to_strings(const void *ctx,
674 struct xs_permissions *perms, unsigned int num,
675 unsigned int *len)
676 {
677 unsigned int i;
678 char *strings = NULL;
679 char buffer[MAX_STRLEN(unsigned int) + 1];
681 for (*len = 0, i = 0; i < num; i++) {
682 if (!xs_perm_to_string(&perms[i], buffer))
683 return NULL;
685 strings = talloc_realloc(ctx, strings, char,
686 *len + strlen(buffer) + 1);
687 strcpy(strings + *len, buffer);
688 *len += strlen(buffer) + 1;
689 }
690 return strings;
691 }
693 char *canonicalize(struct connection *conn, const char *node)
694 {
695 const char *prefix;
697 if (!node || strstarts(node, "/"))
698 return (char *)node;
699 prefix = get_implicit_path(conn);
700 if (prefix)
701 return talloc_asprintf(node, "%s/%s", prefix, node);
702 return (char *)node;
703 }
705 bool check_event_node(const char *node)
706 {
707 if (!node || !strstarts(node, "@")) {
708 errno = EINVAL;
709 return false;
710 }
711 return true;
712 }
714 static void send_directory(struct connection *conn, const char *name)
715 {
716 struct node *node;
718 name = canonicalize(conn, name);
719 node = get_node(conn, name, XS_PERM_READ);
720 if (!node) {
721 send_error(conn, errno);
722 return;
723 }
725 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
726 }
728 static void do_read(struct connection *conn, const char *name)
729 {
730 struct node *node;
732 name = canonicalize(conn, name);
733 node = get_node(conn, name, XS_PERM_READ);
734 if (!node) {
735 send_error(conn, errno);
736 return;
737 }
739 send_reply(conn, XS_READ, node->data, node->datalen);
740 }
742 static void delete_node_single(struct connection *conn, struct node *node)
743 {
744 TDB_DATA key;
746 key.dptr = (void *)node->name;
747 key.dsize = strlen(node->name);
749 if (tdb_delete(tdb_context(conn), key) != 0)
750 corrupt(conn, "Could not delete '%s'", node->name);
751 }
753 /* Must not be / */
754 static char *basename(const char *name)
755 {
756 return strrchr(name, '/') + 1;
757 }
759 static struct node *construct_node(struct connection *conn, const char *name)
760 {
761 const char *base;
762 unsigned int baselen;
763 struct node *parent, *node;
764 char *children, *parentname = get_parent(name);
766 /* If parent doesn't exist, create it. */
767 parent = read_node(conn, parentname);
768 if (!parent)
769 parent = construct_node(conn, parentname);
770 if (!parent)
771 return NULL;
773 /* Add child to parent. */
774 base = basename(name);
775 baselen = strlen(base) + 1;
776 children = talloc_array(name, char, parent->childlen + baselen);
777 memcpy(children, parent->children, parent->childlen);
778 memcpy(children + parent->childlen, base, baselen);
779 parent->children = children;
780 parent->childlen += baselen;
782 /* Allocate node */
783 node = talloc(name, struct node);
784 node->tdb = tdb_context(conn);
785 node->name = talloc_strdup(node, name);
787 /* Inherit permissions, except domains own what they create */
788 node->num_perms = parent->num_perms;
789 node->perms = talloc_memdup(node, parent->perms,
790 node->num_perms * sizeof(node->perms[0]));
791 if (conn->id)
792 node->perms[0].id = conn->id;
794 /* No children, no data */
795 node->children = node->data = NULL;
796 node->childlen = node->datalen = 0;
797 node->parent = parent;
798 return node;
799 }
801 static int destroy_node(void *_node)
802 {
803 struct node *node = _node;
804 TDB_DATA key;
806 if (streq(node->name, "/"))
807 corrupt(NULL, "Destroying root node!");
809 key.dptr = (void *)node->name;
810 key.dsize = strlen(node->name);
812 tdb_delete(node->tdb, key);
813 return 0;
814 }
816 /* Be careful: create heirarchy, put entry in existing parent *last*.
817 * This helps fsck if we die during this. */
818 static struct node *create_node(struct connection *conn,
819 const char *name,
820 void *data, unsigned int datalen)
821 {
822 struct node *node, *i;
824 node = construct_node(conn, name);
825 if (!node)
826 return NULL;
828 node->data = data;
829 node->datalen = datalen;
831 /* We write out the nodes down, setting destructor in case
832 * something goes wrong. */
833 for (i = node; i; i = i->parent) {
834 if (!write_node(conn, i))
835 return NULL;
836 talloc_set_destructor(i, destroy_node);
837 }
839 /* OK, now remove destructors so they stay around */
840 for (i = node; i; i = i->parent)
841 talloc_set_destructor(i, NULL);
842 return node;
843 }
845 /* path, data... */
846 static void do_write(struct connection *conn, struct buffered_data *in)
847 {
848 unsigned int offset, datalen;
849 struct node *node;
850 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
851 char *name;
853 /* Extra "strings" can be created by binary data. */
854 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
855 send_error(conn, EINVAL);
856 return;
857 }
859 offset = strlen(vec[0]) + 1;
860 datalen = in->used - offset;
862 name = canonicalize(conn, vec[0]);
863 node = get_node(conn, name, XS_PERM_WRITE);
864 if (!node) {
865 /* No permissions, invalid input? */
866 if (errno != ENOENT) {
867 send_error(conn, errno);
868 return;
869 }
870 node = create_node(conn, name, in->buffer + offset, datalen);
871 if (!node) {
872 send_error(conn, errno);
873 return;
874 }
875 } else {
876 node->data = in->buffer + offset;
877 node->datalen = datalen;
878 if (!write_node(conn, node)){
879 send_error(conn, errno);
880 return;
881 }
882 }
884 add_change_node(conn->transaction, name, false);
885 fire_watches(conn, name, false);
886 send_ack(conn, XS_WRITE);
887 }
889 static void do_mkdir(struct connection *conn, const char *name)
890 {
891 struct node *node;
893 name = canonicalize(conn, name);
894 node = get_node(conn, name, XS_PERM_WRITE);
896 /* If it already exists, fine. */
897 if (!node) {
898 /* No permissions? */
899 if (errno != ENOENT) {
900 send_error(conn, errno);
901 return;
902 }
903 node = create_node(conn, name, NULL, 0);
904 if (!node) {
905 send_error(conn, errno);
906 return;
907 }
908 add_change_node(conn->transaction, name, false);
909 fire_watches(conn, name, false);
910 }
911 send_ack(conn, XS_MKDIR);
912 }
914 static void delete_node(struct connection *conn, struct node *node)
915 {
916 unsigned int i;
918 /* Delete self, then delete children. If something goes wrong,
919 * consistency check will clean up this way. */
920 delete_node_single(conn, node);
922 /* Delete children, too. */
923 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
924 struct node *child;
926 child = read_node(conn,
927 talloc_asprintf(node, "%s/%s", node->name,
928 node->children + i));
929 if (!child)
930 corrupt(conn, "No child '%s' found", child);
931 delete_node(conn, child);
932 }
933 }
935 /* Delete memory using memmove. */
936 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
937 {
938 memmove(mem + off, mem + off + len, total - off - len);
939 }
941 static bool delete_child(struct connection *conn,
942 struct node *node, const char *childname)
943 {
944 unsigned int i;
946 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
947 if (streq(node->children+i, childname)) {
948 memdel(node->children, i, strlen(childname) + 1,
949 node->childlen);
950 node->childlen -= strlen(childname) + 1;
951 return write_node(conn, node);
952 }
953 }
954 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
955 }
958 static int _rm(struct connection *conn, struct node *node, const char *name)
959 {
960 /* Delete from parent first, then if something explodes fsck cleans. */
961 struct node *parent = read_node(conn, get_parent(name));
962 if (!parent) {
963 send_error(conn, EINVAL);
964 return 0;
965 }
967 if (!delete_child(conn, parent, basename(name))) {
968 send_error(conn, EINVAL);
969 return 0;
970 }
972 delete_node(conn, node);
973 return 1;
974 }
977 static void do_rm(struct connection *conn, const char *name)
978 {
979 struct node *node;
981 name = canonicalize(conn, name);
982 node = get_node(conn, name, XS_PERM_WRITE);
983 if (!node) {
984 /* Didn't exist already? Fine, if parent exists. */
985 if (errno == ENOENT) {
986 node = read_node(conn, get_parent(name));
987 if (node) {
988 send_ack(conn, XS_RM);
989 return;
990 }
991 /* Restore errno, just in case. */
992 errno = ENOENT;
993 }
994 send_error(conn, errno);
995 return;
996 }
998 if (streq(name, "/")) {
999 send_error(conn, EINVAL);
1000 return;
1003 if (_rm(conn, node, name)) {
1004 add_change_node(conn->transaction, name, true);
1005 fire_watches(conn, name, true);
1006 send_ack(conn, XS_RM);
1011 static void do_get_perms(struct connection *conn, const char *name)
1013 struct node *node;
1014 char *strings;
1015 unsigned int len;
1017 name = canonicalize(conn, name);
1018 node = get_node(conn, name, XS_PERM_READ);
1019 if (!node) {
1020 send_error(conn, errno);
1021 return;
1024 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1025 if (!strings)
1026 send_error(conn, errno);
1027 else
1028 send_reply(conn, XS_GET_PERMS, strings, len);
1031 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1033 unsigned int num;
1034 char *name, *permstr;
1035 struct node *node;
1037 num = xs_count_strings(in->buffer, in->used);
1038 if (num < 2) {
1039 send_error(conn, EINVAL);
1040 return;
1043 /* First arg is node name. */
1044 name = canonicalize(conn, in->buffer);
1045 permstr = in->buffer + strlen(in->buffer) + 1;
1046 num--;
1048 /* We must own node to do this (tools can do this too). */
1049 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1050 if (!node) {
1051 send_error(conn, errno);
1052 return;
1055 node->perms = talloc_array(node, struct xs_permissions, num);
1056 node->num_perms = num;
1057 if (!xs_strings_to_perms(node->perms, num, permstr)) {
1058 send_error(conn, errno);
1059 return;
1061 if (!write_node(conn, node)) {
1062 send_error(conn, errno);
1063 return;
1066 add_change_node(conn->transaction, name, false);
1067 fire_watches(conn, name, false);
1068 send_ack(conn, XS_SET_PERMS);
1071 /* Process "in" for conn: "in" will vanish after this conversation, so
1072 * we can talloc off it for temporary variables. May free "conn".
1073 */
1074 static void process_message(struct connection *conn, struct buffered_data *in)
1076 struct transaction *trans;
1078 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1079 if (IS_ERR(trans)) {
1080 send_error(conn, -PTR_ERR(trans));
1081 return;
1084 assert(conn->transaction == NULL);
1085 conn->transaction = trans;
1087 switch (in->hdr.msg.type) {
1088 case XS_DIRECTORY:
1089 send_directory(conn, onearg(in));
1090 break;
1092 case XS_READ:
1093 do_read(conn, onearg(in));
1094 break;
1096 case XS_WRITE:
1097 do_write(conn, in);
1098 break;
1100 case XS_MKDIR:
1101 do_mkdir(conn, onearg(in));
1102 break;
1104 case XS_RM:
1105 do_rm(conn, onearg(in));
1106 break;
1108 case XS_GET_PERMS:
1109 do_get_perms(conn, onearg(in));
1110 break;
1112 case XS_SET_PERMS:
1113 do_set_perms(conn, in);
1114 break;
1116 case XS_DEBUG:
1117 if (streq(in->buffer, "print"))
1118 xprintf("debug: %s", in->buffer + get_string(in, 0));
1119 #ifdef TESTING
1120 /* For testing, we allow them to set id. */
1121 if (streq(in->buffer, "setid")) {
1122 conn->id = atoi(in->buffer + get_string(in, 0));
1123 send_ack(conn, XS_DEBUG);
1124 } else if (streq(in->buffer, "failtest")) {
1125 if (get_string(in, 0) < in->used)
1126 srandom(atoi(in->buffer + get_string(in, 0)));
1127 send_ack(conn, XS_DEBUG);
1128 failtest = true;
1130 #endif /* TESTING */
1131 break;
1133 case XS_WATCH:
1134 do_watch(conn, in);
1135 break;
1137 case XS_UNWATCH:
1138 do_unwatch(conn, in);
1139 break;
1141 case XS_TRANSACTION_START:
1142 do_transaction_start(conn, in);
1143 break;
1145 case XS_TRANSACTION_END:
1146 do_transaction_end(conn, onearg(in));
1147 break;
1149 case XS_INTRODUCE:
1150 do_introduce(conn, in);
1151 break;
1153 case XS_IS_DOMAIN_INTRODUCED:
1154 do_is_domain_introduced(conn, onearg(in));
1155 break;
1157 case XS_RELEASE:
1158 do_release(conn, onearg(in));
1159 break;
1161 case XS_GET_DOMAIN_PATH:
1162 do_get_domain_path(conn, onearg(in));
1163 break;
1165 default:
1166 eprintf("Client unknown operation %i", in->hdr.msg.type);
1167 send_error(conn, ENOSYS);
1168 break;
1171 conn->transaction = NULL;
1174 static int out_of_mem(void *data)
1176 longjmp(*(jmp_buf *)data, 1);
1179 static void consider_message(struct connection *conn)
1181 jmp_buf talloc_fail;
1183 if (verbose)
1184 xprintf("Got message %s len %i from %p\n",
1185 sockmsg_string(conn->in->hdr.msg.type),
1186 conn->in->hdr.msg.len, conn);
1188 /* For simplicity, we kill the connection on OOM. */
1189 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1190 if (setjmp(talloc_fail)) {
1191 talloc_free(conn);
1192 goto end;
1195 process_message(conn, conn->in);
1197 talloc_free(conn->in);
1198 conn->in = new_buffer(conn);
1200 end:
1201 talloc_set_fail_handler(NULL, NULL);
1202 if (talloc_total_blocks(NULL)
1203 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1204 talloc_report_full(NULL, stderr);
1205 abort();
1209 /* Errors in reading or allocating here mean we get out of sync, so we
1210 * drop the whole client connection. */
1211 static void handle_input(struct connection *conn)
1213 int bytes;
1214 struct buffered_data *in = conn->in;
1216 /* Not finished header yet? */
1217 if (in->inhdr) {
1218 bytes = conn->read(conn, in->hdr.raw + in->used,
1219 sizeof(in->hdr) - in->used);
1220 if (bytes <= 0)
1221 goto bad_client;
1222 in->used += bytes;
1223 if (in->used != sizeof(in->hdr))
1224 return;
1226 if (in->hdr.msg.len > PATH_MAX) {
1227 #ifndef TESTING
1228 syslog(LOG_DAEMON, "Client tried to feed us %i",
1229 in->hdr.msg.len);
1230 #endif
1231 goto bad_client;
1234 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1235 if (!in->buffer)
1236 goto bad_client;
1237 in->used = 0;
1238 in->inhdr = false;
1239 return;
1242 bytes = conn->read(conn, in->buffer + in->used,
1243 in->hdr.msg.len - in->used);
1244 if (bytes < 0)
1245 goto bad_client;
1247 in->used += bytes;
1248 if (in->used != in->hdr.msg.len)
1249 return;
1251 trace_io(conn, "IN ", in);
1252 consider_message(conn);
1253 return;
1255 bad_client:
1256 /* Kill it. */
1257 talloc_free(conn);
1260 static void handle_output(struct connection *conn)
1262 if (!write_messages(conn))
1263 talloc_free(conn);
1266 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1268 struct connection *new;
1270 new = talloc(talloc_autofree_context(), struct connection);
1271 if (!new)
1272 return NULL;
1274 memset(new, 0, sizeof(*new));
1275 new->fd = -1;
1276 new->write = write;
1277 new->read = read;
1278 new->can_write = true;
1279 INIT_LIST_HEAD(&new->out_list);
1280 INIT_LIST_HEAD(&new->watches);
1281 INIT_LIST_HEAD(&new->transaction_list);
1283 new->in = new_buffer(new);
1284 if (new->in == NULL) {
1285 talloc_free(new);
1286 return NULL;
1289 list_add_tail(&new->list, &connections);
1290 talloc_set_destructor(new, destroy_conn);
1291 trace_create(new, "connection");
1292 return new;
1295 static int writefd(struct connection *conn, const void *data, unsigned int len)
1297 return write(conn->fd, data, len);
1300 static int readfd(struct connection *conn, void *data, unsigned int len)
1302 return read(conn->fd, data, len);
1305 static void accept_connection(int sock, bool canwrite)
1307 int fd;
1308 struct connection *conn;
1310 fd = accept(sock, NULL, NULL);
1311 if (fd < 0)
1312 return;
1314 conn = new_connection(writefd, readfd);
1315 if (conn) {
1316 conn->fd = fd;
1317 conn->can_write = canwrite;
1318 } else
1319 close(fd);
1322 #ifdef TESTING
1323 /* Valgrind can check our writes better if we don't use mmap */
1324 #define TDB_FLAGS TDB_NOMMAP
1325 /* Useful for running under debugger. */
1326 void dump_connection(void)
1328 struct connection *i;
1330 list_for_each_entry(i, &connections, list) {
1331 printf("Connection %p:\n", i);
1332 printf(" state = %s\n",
1333 list_empty(&i->out_list) ? "OK" : "BUSY");
1334 if (i->id)
1335 printf(" id = %i\n", i->id);
1336 if (!i->in->inhdr || i->in->used)
1337 printf(" got %i bytes of %s\n",
1338 i->in->used, i->in->inhdr ? "header" : "data");
1339 #if 0
1340 if (i->out)
1341 printf(" sending message %s (%s) out\n",
1342 sockmsg_string(i->out->hdr.msg.type),
1343 i->out->buffer);
1344 if (i->transaction)
1345 dump_transaction(i);
1346 if (i->domain)
1347 dump_domain(i);
1348 #endif
1349 dump_watches(i);
1352 #else
1353 #define TDB_FLAGS 0
1354 #endif
1356 /* We create initial nodes manually. */
1357 static void manual_node(const char *name, const char *child)
1359 struct node *node;
1360 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1362 node = talloc(NULL, struct node);
1363 node->name = name;
1364 node->perms = &perms;
1365 node->num_perms = 1;
1366 node->data = NULL;
1367 node->datalen = 0;
1368 node->children = (char *)child;
1369 if (child)
1370 node->childlen = strlen(child) + 1;
1371 else
1372 node->childlen = 0;
1374 if (!write_node(NULL, node))
1375 barf_perror("Could not create initial node %s", name);
1376 talloc_free(node);
1381 static void setup_structure(void)
1383 char *tdbname;
1384 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1385 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1387 if (!tdb_ctx) {
1388 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1389 0640);
1390 if (!tdb_ctx)
1391 barf_perror("Could not create tdb file %s", tdbname);
1393 manual_node("/", "tool");
1394 manual_node("/tool", "xenstored");
1395 manual_node("/tool/xenstored", NULL);
1398 /* FIXME: Fsck */
1401 static void write_pidfile(const char *pidfile)
1403 char buf[100];
1404 int len;
1405 int fd;
1407 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1408 if (fd == -1)
1409 barf_perror("Opening pid file %s", pidfile);
1411 /* We exit silently if daemon already running. */
1412 if (lockf(fd, F_TLOCK, 0) == -1)
1413 exit(0);
1415 len = sprintf(buf, "%d\n", getpid());
1416 write(fd, buf, len);
1419 /* Stevens. */
1420 static void daemonize(void)
1422 pid_t pid;
1424 /* Separate from our parent via fork, so init inherits us. */
1425 if ((pid = fork()) < 0)
1426 barf_perror("Failed to fork daemon");
1427 if (pid != 0)
1428 exit(0);
1430 /* Session leader so ^C doesn't whack us. */
1431 setsid();
1432 #ifndef TESTING /* Relative paths for socket names */
1433 /* Move off any mount points we might be in. */
1434 chdir("/");
1435 #endif
1436 /* Discard our parent's old-fashioned umask prejudices. */
1437 umask(0);
1441 static void usage(void)
1443 fprintf(stderr,
1444 "Usage:\n"
1445 "\n"
1446 " xenstored <options>\n"
1447 "\n"
1448 "where options may include:\n"
1449 "\n"
1450 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1451 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1452 " --help to output this message,\n"
1453 " --no-fork to request that the daemon does not fork,\n"
1454 " --output-pid to request that the pid of the daemon is output,\n"
1455 " --trace-file <file> giving the file for logging, and\n"
1456 " --verbose to request verbose execution.\n");
1460 static struct option options[] = {
1461 { "no-domain-init", 0, NULL, 'D' },
1462 { "pid-file", 1, NULL, 'F' },
1463 { "help", 0, NULL, 'H' },
1464 { "no-fork", 0, NULL, 'N' },
1465 { "output-pid", 0, NULL, 'P' },
1466 { "trace-file", 1, NULL, 'T' },
1467 { "verbose", 0, NULL, 'V' },
1468 { NULL, 0, NULL, 0 } };
1470 extern void dump_conn(struct connection *conn);
1472 int main(int argc, char *argv[])
1474 int opt, *sock, *ro_sock, max;
1475 struct sockaddr_un addr;
1476 fd_set inset, outset;
1477 bool dofork = true;
1478 bool outputpid = false;
1479 bool no_domain_init = false;
1480 const char *pidfile = NULL;
1482 while ((opt = getopt_long(argc, argv, "DF:HNPT:V", options,
1483 NULL)) != -1) {
1484 switch (opt) {
1485 case 'D':
1486 no_domain_init = true;
1487 break;
1488 case 'F':
1489 pidfile = optarg;
1490 break;
1491 case 'H':
1492 usage();
1493 return 0;
1494 case 'N':
1495 dofork = false;
1496 break;
1497 case 'P':
1498 outputpid = true;
1499 break;
1500 case 'T':
1501 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1502 if (tracefd < 0)
1503 barf_perror("Could not open tracefile %s",
1504 optarg);
1505 write(tracefd, "\n***\n", strlen("\n***\n"));
1506 break;
1507 case 'V':
1508 verbose = true;
1509 break;
1512 if (optind != argc)
1513 barf("%s: No arguments desired", argv[0]);
1515 if (dofork) {
1516 openlog("xenstored", 0, LOG_DAEMON);
1517 daemonize();
1519 if (pidfile)
1520 write_pidfile(pidfile);
1522 talloc_enable_leak_report_full();
1524 /* Create sockets for them to listen to. */
1525 sock = talloc(talloc_autofree_context(), int);
1526 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1527 if (*sock < 0)
1528 barf_perror("Could not create socket");
1529 ro_sock = talloc(talloc_autofree_context(), int);
1530 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1531 if (*ro_sock < 0)
1532 barf_perror("Could not create socket");
1533 talloc_set_destructor(sock, destroy_fd);
1534 talloc_set_destructor(ro_sock, destroy_fd);
1536 /* Don't kill us with SIGPIPE. */
1537 signal(SIGPIPE, SIG_IGN);
1539 /* FIXME: Be more sophisticated, don't mug running daemon. */
1540 unlink(xs_daemon_socket());
1541 unlink(xs_daemon_socket_ro());
1543 addr.sun_family = AF_UNIX;
1544 strcpy(addr.sun_path, xs_daemon_socket());
1545 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1546 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1547 strcpy(addr.sun_path, xs_daemon_socket_ro());
1548 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1549 barf_perror("Could not bind socket to %s",
1550 xs_daemon_socket_ro());
1551 if (chmod(xs_daemon_socket(), 0600) != 0
1552 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1553 barf_perror("Could not chmod sockets");
1555 if (listen(*sock, 1) != 0
1556 || listen(*ro_sock, 1) != 0)
1557 barf_perror("Could not listen on sockets");
1559 /* Setup the database */
1560 setup_structure();
1562 /* Listen to hypervisor. */
1563 if (!no_domain_init)
1564 domain_init();
1566 /* Restore existing connections. */
1567 restore_existing_connections();
1569 if (outputpid) {
1570 printf("%i\n", getpid());
1571 fflush(stdout);
1574 /* close stdin/stdout now we're ready to accept connections */
1575 if (dofork) {
1576 close(STDIN_FILENO);
1577 close(STDOUT_FILENO);
1578 close(STDERR_FILENO);
1581 #ifdef TESTING
1582 signal(SIGUSR1, stop_failtest);
1583 #endif
1585 /* Get ready to listen to the tools. */
1586 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1588 /* Main loop. */
1589 /* FIXME: Rewrite so noone can starve. */
1590 for (;;) {
1591 struct connection *i;
1593 if (select(max+1, &inset, &outset, NULL, NULL) < 0) {
1594 if (errno == EINTR)
1595 continue;
1596 barf_perror("Select failed");
1599 if (FD_ISSET(*sock, &inset))
1600 accept_connection(*sock, true);
1602 if (FD_ISSET(*ro_sock, &inset))
1603 accept_connection(*ro_sock, false);
1605 if (FD_ISSET(eventchn_fd, &inset))
1606 handle_event();
1608 list_for_each_entry(i, &connections, list) {
1609 if (i->domain)
1610 continue;
1612 /* Operations can delete themselves or others
1613 * (xs_release): list is not safe after input,
1614 * so break. */
1615 if (FD_ISSET(i->fd, &inset)) {
1616 handle_input(i);
1617 break;
1619 if (FD_ISSET(i->fd, &outset)) {
1620 handle_output(i);
1621 break;
1625 /* Handle all possible I/O for domain connections. */
1626 more:
1627 list_for_each_entry(i, &connections, list) {
1628 if (!i->domain)
1629 continue;
1631 if (domain_can_read(i)) {
1632 handle_input(i);
1633 goto more;
1636 if (domain_can_write(i) && !list_empty(&i->out_list)) {
1637 handle_output(i);
1638 goto more;
1642 max = initialize_set(&inset, &outset, *sock, *ro_sock);
1646 /*
1647 * Local variables:
1648 * c-file-style: "linux"
1649 * indent-tabs-mode: t
1650 * c-indent-level: 8
1651 * c-basic-offset: 8
1652 * tab-width: 8
1653 * End:
1654 */