ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 6946:e703abaf6e3d

Add behaviour to the remove methods to remove the transaction's path itself. This allows us to write Remove(path) to remove the specified path rather than having to slice the path ourselves.
author emellor@ewan
date Sun Sep 18 14:42:13 2005 +0100 (2005-09-18)
parents 3233e7ecfa9f
children a5d67e3fbff1 872cf6ee0594
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored.h"
48 #include "xenstored_core.h"
49 #include "xenstored_watch.h"
50 #include "xenstored_transaction.h"
51 #include "xenstored_domain.h"
52 #include "xenctrl.h"
54 static bool verbose;
55 LIST_HEAD(connections);
56 static int tracefd = -1;
58 #ifdef TESTING
59 static bool failtest = false;
61 /* We override talloc's malloc. */
62 void *test_malloc(size_t size)
63 {
64 /* 1 in 20 means only about 50% of connections establish. */
65 if (failtest && (random() % 32) == 0)
66 return NULL;
67 return malloc(size);
68 }
70 static void stop_failtest(int signum __attribute__((unused)))
71 {
72 failtest = false;
73 }
75 /* Need these before we #define away write_all/mkdir in testing.h */
76 bool test_write_all(int fd, void *contents, unsigned int len);
77 bool test_write_all(int fd, void *contents, unsigned int len)
78 {
79 if (failtest && (random() % 8) == 0) {
80 if (len)
81 len = random() % len;
82 write(fd, contents, len);
83 errno = ENOSPC;
84 return false;
85 }
86 return xs_write_all(fd, contents, len);
87 }
89 int test_mkdir(const char *dir, int perms);
90 int test_mkdir(const char *dir, int perms)
91 {
92 if (failtest && (random() % 8) == 0) {
93 errno = ENOSPC;
94 return -1;
95 }
96 return mkdir(dir, perms);
97 }
98 #endif /* TESTING */
100 #include "xenstored_test.h"
102 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
103 /* Something is horribly wrong: shutdown immediately. */
104 void __attribute__((noreturn)) corrupt(struct connection *conn,
105 const char *fmt, ...)
106 {
107 va_list arglist;
108 char *str;
109 int saved_errno = errno;
111 va_start(arglist, fmt);
112 str = talloc_vasprintf(NULL, fmt, arglist);
113 va_end(arglist);
115 trace("xenstored corruption: connection id %i: err %s: %s",
116 conn ? (int)conn->id : -1, strerror(saved_errno), str);
117 eprintf("xenstored corruption: connection id %i: err %s: %s",
118 conn ? (int)conn->id : -1, strerror(saved_errno), str);
119 #ifdef TESTING
120 /* Allow them to attach debugger. */
121 sleep(30);
122 #endif
123 syslog(LOG_DAEMON,
124 "xenstored corruption: connection id %i: err %s: %s",
125 conn ? (int)conn->id : -1, strerror(saved_errno), str);
126 _exit(2);
127 }
129 static char *sockmsg_string(enum xsd_sockmsg_type type)
130 {
131 switch (type) {
132 case XS_DEBUG: return "DEBUG";
133 case XS_SHUTDOWN: return "SHUTDOWN";
134 case XS_DIRECTORY: return "DIRECTORY";
135 case XS_READ: return "READ";
136 case XS_GET_PERMS: return "GET_PERMS";
137 case XS_WATCH: return "WATCH";
138 case XS_WATCH_ACK: return "WATCH_ACK";
139 case XS_UNWATCH: return "UNWATCH";
140 case XS_TRANSACTION_START: return "TRANSACTION_START";
141 case XS_TRANSACTION_END: return "TRANSACTION_END";
142 case XS_INTRODUCE: return "INTRODUCE";
143 case XS_RELEASE: return "RELEASE";
144 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
145 case XS_WRITE: return "WRITE";
146 case XS_MKDIR: return "MKDIR";
147 case XS_RM: return "RM";
148 case XS_SET_PERMS: return "SET_PERMS";
149 case XS_WATCH_EVENT: return "WATCH_EVENT";
150 case XS_ERROR: return "ERROR";
151 default:
152 return "**UNKNOWN**";
153 }
154 }
156 static void trace_io(const struct connection *conn,
157 const char *prefix,
158 const struct buffered_data *data)
159 {
160 char string[64];
161 unsigned int i;
163 if (tracefd < 0)
164 return;
166 write(tracefd, prefix, strlen(prefix));
167 sprintf(string, " %p ", conn);
168 write(tracefd, string, strlen(string));
169 write(tracefd, sockmsg_string(data->hdr.msg.type),
170 strlen(sockmsg_string(data->hdr.msg.type)));
171 write(tracefd, " (", 2);
172 for (i = 0; i < data->hdr.msg.len; i++) {
173 if (data->buffer[i] == '\0')
174 write(tracefd, " ", 1);
175 else
176 write(tracefd, data->buffer + i, 1);
177 }
178 write(tracefd, ")\n", 2);
179 }
181 void trace_create(const void *data, const char *type)
182 {
183 char string[64];
184 if (tracefd < 0)
185 return;
187 write(tracefd, "CREATE ", strlen("CREATE "));
188 write(tracefd, type, strlen(type));
189 sprintf(string, " %p\n", data);
190 write(tracefd, string, strlen(string));
191 }
193 void trace_destroy(const void *data, const char *type)
194 {
195 char string[64];
196 if (tracefd < 0)
197 return;
199 write(tracefd, "DESTROY ", strlen("DESTROY "));
200 write(tracefd, type, strlen(type));
201 sprintf(string, " %p\n", data);
202 write(tracefd, string, strlen(string));
203 }
205 void trace_watch_timeout(const struct connection *conn, const char *node, const char *token)
206 {
207 char string[64];
208 if (tracefd < 0)
209 return;
210 write(tracefd, "WATCH_TIMEOUT ", strlen("WATCH_TIMEOUT "));
211 sprintf(string, " %p ", conn);
212 write(tracefd, string, strlen(string));
213 write(tracefd, " (", 2);
214 write(tracefd, node, strlen(node));
215 write(tracefd, " ", 1);
216 write(tracefd, token, strlen(token));
217 write(tracefd, ")\n", 2);
218 }
220 static void trace_blocked(const struct connection *conn,
221 const struct buffered_data *data)
222 {
223 char string[64];
225 if (tracefd < 0)
226 return;
228 write(tracefd, "BLOCKED", strlen("BLOCKED"));
229 sprintf(string, " %p (", conn);
230 write(tracefd, string, strlen(string));
231 write(tracefd, sockmsg_string(data->hdr.msg.type),
232 strlen(sockmsg_string(data->hdr.msg.type)));
233 write(tracefd, ")\n", 2);
234 }
236 void trace(const char *fmt, ...)
237 {
238 va_list arglist;
239 char *str;
241 if (tracefd < 0)
242 return;
244 va_start(arglist, fmt);
245 str = talloc_vasprintf(NULL, fmt, arglist);
246 va_end(arglist);
247 write(tracefd, str, strlen(str));
248 talloc_free(str);
249 }
251 static bool write_message(struct connection *conn)
252 {
253 int ret;
254 struct buffered_data *out = conn->out;
256 assert(conn->state != BLOCKED);
257 if (out->inhdr) {
258 if (verbose)
259 xprintf("Writing msg %s (%s) out to %p\n",
260 sockmsg_string(out->hdr.msg.type),
261 out->buffer, conn);
262 ret = conn->write(conn, out->hdr.raw + out->used,
263 sizeof(out->hdr) - out->used);
264 if (ret < 0)
265 return false;
267 out->used += ret;
268 if (out->used < sizeof(out->hdr))
269 return true;
271 out->inhdr = false;
272 out->used = 0;
274 /* Second write might block if non-zero. */
275 if (out->hdr.msg.len && !conn->domain)
276 return true;
277 }
279 ret = conn->write(conn, out->buffer + out->used,
280 out->hdr.msg.len - out->used);
282 if (ret < 0)
283 return false;
285 out->used += ret;
286 if (out->used != out->hdr.msg.len)
287 return true;
289 trace_io(conn, "OUT", out);
290 conn->out = NULL;
291 talloc_free(out);
293 queue_next_event(conn);
295 /* No longer busy? */
296 if (!conn->out)
297 conn->state = OK;
298 return true;
299 }
301 static int destroy_conn(void *_conn)
302 {
303 struct connection *conn = _conn;
305 /* Flush outgoing if possible, but don't block. */
306 if (!conn->domain) {
307 fd_set set;
308 struct timeval none;
310 FD_ZERO(&set);
311 FD_SET(conn->fd, &set);
312 none.tv_sec = none.tv_usec = 0;
314 while (conn->out
315 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
316 if (!write_message(conn))
317 break;
318 close(conn->fd);
319 }
320 list_del(&conn->list);
321 trace_destroy(conn, "connection");
322 return 0;
323 }
325 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
326 int event_fd)
327 {
328 struct connection *i;
329 int max;
331 FD_ZERO(inset);
332 FD_ZERO(outset);
333 FD_SET(sock, inset);
334 max = sock;
335 FD_SET(ro_sock, inset);
336 if (ro_sock > max)
337 max = ro_sock;
338 FD_SET(event_fd, inset);
339 if (event_fd > max)
340 max = event_fd;
341 list_for_each_entry(i, &connections, list) {
342 if (i->domain)
343 continue;
344 if (i->state == OK)
345 FD_SET(i->fd, inset);
346 if (i->out)
347 FD_SET(i->fd, outset);
348 if (i->fd > max)
349 max = i->fd;
350 }
351 return max;
352 }
354 /* Read everything from a talloc_open'ed fd. */
355 void *read_all(int *fd, unsigned int *size)
356 {
357 unsigned int max = 4;
358 int ret;
359 void *buffer = talloc_size(fd, max);
361 *size = 0;
362 while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
363 *size += ret;
364 if (*size == max)
365 buffer = talloc_realloc_size(fd, buffer, max *= 2);
366 }
367 if (ret < 0)
368 return NULL;
369 return buffer;
370 }
372 static int destroy_fd(void *_fd)
373 {
374 int *fd = _fd;
375 close(*fd);
376 return 0;
377 }
379 /* Return a pointer to an fd, self-closing and attached to this pathname. */
380 int *talloc_open(const char *pathname, int flags, int mode)
381 {
382 int *fd;
384 fd = talloc(pathname, int);
385 *fd = open(pathname, flags, mode);
386 if (*fd < 0) {
387 int saved_errno = errno;
388 talloc_free(fd);
389 errno = saved_errno;
390 return NULL;
391 }
392 talloc_set_destructor(fd, destroy_fd);
393 return fd;
394 }
396 /* Is child a subnode of parent, or equal? */
397 bool is_child(const char *child, const char *parent)
398 {
399 unsigned int len = strlen(parent);
401 /* / should really be "" for this algorithm to work, but that's a
402 * usability nightmare. */
403 if (streq(parent, "/"))
404 return true;
406 if (strncmp(child, parent, len) != 0)
407 return false;
409 return child[len] == '/' || child[len] == '\0';
410 }
412 /* Answer never ends in /. */
413 char *node_dir_outside_transaction(const char *node)
414 {
415 if (streq(node, "/"))
416 return talloc_strdup(node, xs_daemon_store());
417 return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
418 }
420 static char *node_dir(struct transaction *trans, const char *node)
421 {
422 if (!trans || !within_transaction(trans, node))
423 return node_dir_outside_transaction(node);
424 return node_dir_inside_transaction(trans, node);
425 }
427 static char *datafile(const char *dir)
428 {
429 return talloc_asprintf(dir, "%s/.data", dir);
430 }
432 static char *node_datafile(struct transaction *trans, const char *node)
433 {
434 return datafile(node_dir(trans, node));
435 }
437 static char *permfile(const char *dir)
438 {
439 return talloc_asprintf(dir, "%s/.perms", dir);
440 }
442 static char *node_permfile(struct transaction *trans, const char *node)
443 {
444 return permfile(node_dir(trans, node));
445 }
447 struct buffered_data *new_buffer(void *ctx)
448 {
449 struct buffered_data *data;
451 data = talloc(ctx, struct buffered_data);
452 data->inhdr = true;
453 data->used = 0;
454 data->buffer = NULL;
456 return data;
457 }
459 /* Return length of string (including nul) at this offset. */
460 unsigned int get_string(const struct buffered_data *data, unsigned int offset)
461 {
462 const char *nul;
464 if (offset >= data->used)
465 return 0;
467 nul = memchr(data->buffer + offset, 0, data->used - offset);
468 if (!nul)
469 return 0;
471 return nul - (data->buffer + offset) + 1;
472 }
474 /* Break input into vectors, return the number, fill in up to num of them. */
475 unsigned int get_strings(struct buffered_data *data,
476 char *vec[], unsigned int num)
477 {
478 unsigned int off, i, len;
480 off = i = 0;
481 while ((len = get_string(data, off)) != 0) {
482 if (i < num)
483 vec[i] = data->buffer + off;
484 i++;
485 off += len;
486 }
487 return i;
488 }
490 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
491 const void *data, unsigned int len)
492 {
493 struct buffered_data *bdata;
495 /* When data gets freed, we want list entry is destroyed (so
496 * list entry is a child). */
497 bdata = new_buffer(conn);
498 bdata->buffer = talloc_array(bdata, char, len);
500 bdata->hdr.msg.type = type;
501 bdata->hdr.msg.len = len;
502 memcpy(bdata->buffer, data, len);
504 /* There might be an event going out now. Queue behind it. */
505 if (conn->out) {
506 assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
507 assert(!conn->waiting_reply);
508 conn->waiting_reply = bdata;
509 } else
510 conn->out = bdata;
511 assert(conn->state != BLOCKED);
512 conn->state = BUSY;
513 }
515 /* Some routines (write, mkdir, etc) just need a non-error return */
516 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
517 {
518 send_reply(conn, type, "OK", sizeof("OK"));
519 }
521 void send_error(struct connection *conn, int error)
522 {
523 unsigned int i;
525 for (i = 0; error != xsd_errors[i].errnum; i++) {
526 if (i == ARRAY_SIZE(xsd_errors) - 1) {
527 eprintf("xenstored: error %i untranslatable", error);
528 i = 0; /* EINVAL */
529 break;
530 }
531 }
532 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
533 strlen(xsd_errors[i].errstring) + 1);
534 }
536 static bool valid_chars(const char *node)
537 {
538 /* Nodes can have lots of crap. */
539 return (strspn(node,
540 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
541 "abcdefghijklmnopqrstuvwxyz"
542 "0123456789-/_@") == strlen(node));
543 }
545 bool is_valid_nodename(const char *node)
546 {
547 /* Must start in /. */
548 if (!strstarts(node, "/"))
549 return false;
551 /* Cannot end in / (unless it's just "/"). */
552 if (strends(node, "/") && !streq(node, "/"))
553 return false;
555 /* No double //. */
556 if (strstr(node, "//"))
557 return false;
559 return valid_chars(node);
560 }
562 /* We expect one arg in the input: return NULL otherwise. */
563 static const char *onearg(struct buffered_data *in)
564 {
565 if (!in->used || get_string(in, 0) != in->used)
566 return NULL;
567 return in->buffer;
568 }
570 /* If it fails, returns NULL and sets errno. */
571 static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
572 {
573 unsigned int size;
574 char *strings;
575 struct xs_permissions *ret;
576 int *fd;
578 fd = talloc_open(permfile(dir), O_RDONLY, 0);
579 if (!fd)
580 return NULL;
581 strings = read_all(fd, &size);
582 if (!strings)
583 return NULL;
585 *num = xs_count_strings(strings, size);
586 ret = talloc_array(dir, struct xs_permissions, *num);
587 if (!xs_strings_to_perms(ret, *num, strings))
588 corrupt(NULL, "Permissions corrupt for %s", dir);
590 return ret;
591 }
593 static char *perms_to_strings(const void *ctx,
594 struct xs_permissions *perms, unsigned int num,
595 unsigned int *len)
596 {
597 unsigned int i;
598 char *strings = NULL;
599 char buffer[MAX_STRLEN(domid_t) + 1];
601 for (*len = 0, i = 0; i < num; i++) {
602 if (!xs_perm_to_string(&perms[i], buffer))
603 return NULL;
605 strings = talloc_realloc(ctx, strings, char,
606 *len + strlen(buffer) + 1);
607 strcpy(strings + *len, buffer);
608 *len += strlen(buffer) + 1;
609 }
610 return strings;
611 }
613 /* Destroy this, and its children, and its children's children. */
614 int destroy_path(void *path)
615 {
616 DIR *dir;
617 struct dirent *dirent;
619 dir = opendir(path);
620 if (!dir) {
621 if (unlink(path) == 0 || errno == ENOENT)
622 return 0;
623 corrupt(NULL, "Destroying path %s", path);
624 }
626 while ((dirent = readdir(dir)) != NULL) {
627 char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
628 sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
629 if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
630 destroy_path(fullpath);
631 }
632 closedir(dir);
633 if (rmdir(path) != 0)
634 corrupt(NULL, "Destroying directory %s", path);
635 return 0;
636 }
638 /* Create a self-destructing temporary path */
639 static char *temppath(const char *path)
640 {
641 char *tmppath = talloc_asprintf(path, "%s.tmp", path);
642 talloc_set_destructor(tmppath, destroy_path);
643 return tmppath;
644 }
646 /* Create a self-destructing temporary file */
647 static char *tempfile(const char *path, void *contents, unsigned int len)
648 {
649 int *fd;
650 char *tmppath = temppath(path);
652 fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
653 if (!fd)
654 return NULL;
655 if (!xs_write_all(*fd, contents, len))
656 return NULL;
658 return tmppath;
659 }
661 static int destroy_opendir(void *_dir)
662 {
663 DIR **dir = _dir;
664 closedir(*dir);
665 return 0;
666 }
668 /* Return a pointer to a DIR*, self-closing and attached to this pathname. */
669 DIR **talloc_opendir(const char *pathname)
670 {
671 DIR **dir;
673 dir = talloc(pathname, DIR *);
674 *dir = opendir(pathname);
675 if (!*dir) {
676 int saved_errno = errno;
677 talloc_free(dir);
678 errno = saved_errno;
679 return NULL;
680 }
681 talloc_set_destructor(dir, destroy_opendir);
682 return dir;
683 }
685 /* We assume rename() doesn't fail on moves in same dir. */
686 static void commit_tempfile(const char *path)
687 {
688 char realname[strlen(path) + 1];
689 unsigned int len = strrchr(path, '.') - path;
691 memcpy(realname, path, len);
692 realname[len] = '\0';
693 if (rename(path, realname) != 0)
694 corrupt(NULL, "Committing %s", realname);
695 talloc_set_destructor(path, NULL);
696 }
698 static bool set_perms(struct transaction *transaction,
699 const char *node,
700 struct xs_permissions *perms, unsigned int num)
701 {
702 unsigned int len;
703 char *permpath, *strings;
705 strings = perms_to_strings(node, perms, num, &len);
706 if (!strings)
707 return false;
709 /* Create then move. */
710 permpath = tempfile(node_permfile(transaction, node), strings, len);
711 if (!permpath)
712 return false;
714 commit_tempfile(permpath);
715 return true;
716 }
718 static char *get_parent(const char *node)
719 {
720 char *slash = strrchr(node + 1, '/');
721 if (!slash)
722 return talloc_strdup(node, "/");
723 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
724 }
726 static enum xs_perm_type perm_for_id(domid_t id,
727 struct xs_permissions *perms,
728 unsigned int num)
729 {
730 unsigned int i;
732 /* Owners and tools get it all... */
733 if (!id || perms[0].id == id)
734 return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
736 for (i = 1; i < num; i++)
737 if (perms[i].id == id)
738 return perms[i].perms;
740 return perms[0].perms;
741 }
743 /* What do parents say? */
744 static enum xs_perm_type ask_parents(struct connection *conn,
745 const char *node)
746 {
747 struct xs_permissions *perms;
748 unsigned int num;
750 do {
751 node = get_parent(node);
752 perms = get_perms(node_dir(conn->transaction, node), &num);
753 if (perms)
754 break;
755 } while (!streq(node, "/"));
757 /* No permission at root? We're in trouble. */
758 if (!perms)
759 corrupt(conn, "No permissions file at root");
761 return perm_for_id(conn->id, perms, num);
762 }
764 /* We have a weird permissions system. You can allow someone into a
765 * specific node without allowing it in the parents. If it's going to
766 * fail, however, we don't want the errno to indicate any information
767 * about the node. */
768 static int errno_from_parents(struct connection *conn, const char *node,
769 int errnum)
770 {
771 /* We always tell them about memory failures. */
772 if (errnum == ENOMEM)
773 return errnum;
775 if (ask_parents(conn, node) & XS_PERM_READ)
776 return errnum;
777 return EACCES;
778 }
780 char *canonicalize(struct connection *conn, const char *node)
781 {
782 const char *prefix;
784 if (!node || strstarts(node, "/"))
785 return (char *)node;
786 prefix = get_implicit_path(conn);
787 if (prefix)
788 return talloc_asprintf(node, "%s/%s", prefix, node);
789 return (char *)node;
790 }
792 bool check_node_perms(struct connection *conn, const char *node,
793 enum xs_perm_type perm)
794 {
795 struct xs_permissions *perms;
796 unsigned int num;
798 if (!node || !is_valid_nodename(node)) {
799 errno = EINVAL;
800 return false;
801 }
803 if (!conn->can_write && (perm & XS_PERM_WRITE)) {
804 errno = EROFS;
805 return false;
806 }
808 perms = get_perms(node_dir(conn->transaction, node), &num);
810 if (perms) {
811 if (perm_for_id(conn->id, perms, num) & perm)
812 return true;
813 errno = EACCES;
814 return false;
815 }
817 /* If it's OK not to exist, we consult parents. */
818 if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
819 if (ask_parents(conn, node) & perm)
820 return true;
821 /* Parents say they should not know. */
822 errno = EACCES;
823 return false;
824 }
826 /* They might not have permission to even *see* this node, in
827 * which case we return EACCES even if it's ENOENT or EIO. */
828 errno = errno_from_parents(conn, node, errno);
829 return false;
830 }
832 bool check_event_node(const char *node)
833 {
834 if (!node || !strstarts(node, "@")) {
835 errno = EINVAL;
836 return false;
837 }
838 return true;
839 }
841 static void send_directory(struct connection *conn, const char *node)
842 {
843 char *path, *reply;
844 unsigned int reply_len = 0;
845 DIR **dir;
846 struct dirent *dirent;
848 node = canonicalize(conn, node);
849 if (!check_node_perms(conn, node, XS_PERM_READ)) {
850 send_error(conn, errno);
851 return;
852 }
854 path = node_dir(conn->transaction, node);
855 dir = talloc_opendir(path);
856 if (!dir) {
857 send_error(conn, errno);
858 return;
859 }
861 reply = talloc_strdup(node, "");
862 while ((dirent = readdir(*dir)) != NULL) {
863 int len = strlen(dirent->d_name) + 1;
865 if (!valid_chars(dirent->d_name))
866 continue;
868 reply = talloc_realloc(path, reply, char, reply_len + len);
869 strcpy(reply + reply_len, dirent->d_name);
870 reply_len += len;
871 }
873 send_reply(conn, XS_DIRECTORY, reply, reply_len);
874 }
876 static void do_read(struct connection *conn, const char *node)
877 {
878 char *value;
879 unsigned int size;
880 int *fd;
882 node = canonicalize(conn, node);
883 if (!check_node_perms(conn, node, XS_PERM_READ)) {
884 send_error(conn, errno);
885 return;
886 }
888 fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
889 if (!fd) {
890 /* Data file doesn't exist? We call that a directory */
891 if (errno == ENOENT)
892 errno = EISDIR;
893 send_error(conn, errno);
894 return;
895 }
897 value = read_all(fd, &size);
898 if (!value)
899 send_error(conn, errno);
900 else
901 send_reply(conn, XS_READ, value, size);
902 }
904 /* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
905 static bool commit_dir(char *dir)
906 {
907 char *dot, *slash, *dest;
909 dot = strrchr(dir, '.');
910 slash = strchr(dot, '/');
911 if (slash)
912 *slash = '\0';
914 dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
915 return rename(dir, dest) == 0;
916 }
918 /* Create a temporary directory. Put data in it (if data != NULL) */
919 static char *tempdir(struct connection *conn,
920 const char *node, void *data, unsigned int datalen)
921 {
922 struct xs_permissions *perms;
923 char *permstr;
924 unsigned int num, len;
925 int *fd;
926 char *dir;
928 dir = temppath(node_dir(conn->transaction, node));
929 if (mkdir(dir, 0750) != 0) {
930 if (errno != ENOENT)
931 return NULL;
933 dir = tempdir(conn, get_parent(node), NULL, 0);
934 if (!dir)
935 return NULL;
937 dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
938 if (mkdir(dir, 0750) != 0)
939 return NULL;
940 talloc_set_destructor(dir, destroy_path);
941 }
943 perms = get_perms(get_parent(dir), &num);
944 assert(perms);
945 /* Domains own what they create. */
946 if (conn->id)
947 perms->id = conn->id;
949 permstr = perms_to_strings(dir, perms, num, &len);
950 fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
951 if (!fd || !xs_write_all(*fd, permstr, len))
952 return NULL;
954 if (data) {
955 char *datapath = datafile(dir);
957 fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
958 if (!fd || !xs_write_all(*fd, data, datalen))
959 return NULL;
960 }
961 return dir;
962 }
964 /* path, flags, data... */
965 static void do_write(struct connection *conn, struct buffered_data *in)
966 {
967 unsigned int offset, datalen;
968 char *vec[2];
969 char *node, *tmppath;
970 enum xs_perm_type mode;
971 struct stat st;
973 /* Extra "strings" can be created by binary data. */
974 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
975 send_error(conn, EINVAL);
976 return;
977 }
979 node = canonicalize(conn, vec[0]);
980 if (!within_transaction(conn->transaction, node)) {
981 send_error(conn, EROFS);
982 return;
983 }
985 if (transaction_block(conn, node))
986 return;
988 offset = strlen(vec[0]) + strlen(vec[1]) + 2;
989 datalen = in->used - offset;
991 if (streq(vec[1], XS_WRITE_NONE))
992 mode = XS_PERM_WRITE;
993 else if (streq(vec[1], XS_WRITE_CREATE))
994 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
995 else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
996 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
997 else {
998 send_error(conn, EINVAL);
999 return;
1002 if (!check_node_perms(conn, node, mode)) {
1003 send_error(conn, errno);
1004 return;
1007 if (lstat(node_dir(conn->transaction, node), &st) != 0) {
1008 char *dir;
1010 /* Does not exist... */
1011 if (errno != ENOENT) {
1012 send_error(conn, errno);
1013 return;
1016 /* Not going to create it? */
1017 if (streq(vec[1], XS_WRITE_NONE)) {
1018 send_error(conn, ENOENT);
1019 return;
1022 dir = tempdir(conn, node, in->buffer + offset, datalen);
1023 if (!dir || !commit_dir(dir)) {
1024 send_error(conn, errno);
1025 return;
1028 } else {
1029 /* Exists... */
1030 if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
1031 send_error(conn, EEXIST);
1032 return;
1035 tmppath = tempfile(node_datafile(conn->transaction, node),
1036 in->buffer + offset, datalen);
1037 if (!tmppath) {
1038 send_error(conn, errno);
1039 return;
1042 commit_tempfile(tmppath);
1045 add_change_node(conn->transaction, node, false);
1046 fire_watches(conn, node, false);
1047 send_ack(conn, XS_WRITE);
1050 static void do_mkdir(struct connection *conn, const char *node)
1052 char *dir;
1053 struct stat st;
1055 node = canonicalize(conn, node);
1056 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
1057 send_error(conn, errno);
1058 return;
1061 if (!within_transaction(conn->transaction, node)) {
1062 send_error(conn, EROFS);
1063 return;
1066 if (transaction_block(conn, node))
1067 return;
1069 /* Must not already exist. */
1070 if (lstat(node_dir(conn->transaction, node), &st) == 0) {
1071 send_error(conn, EEXIST);
1072 return;
1075 dir = tempdir(conn, node, NULL, 0);
1076 if (!dir || !commit_dir(dir)) {
1077 send_error(conn, errno);
1078 return;
1081 add_change_node(conn->transaction, node, false);
1082 fire_watches(conn, node, false);
1083 send_ack(conn, XS_MKDIR);
1086 static void do_rm(struct connection *conn, const char *node)
1088 char *tmppath, *path;
1090 node = canonicalize(conn, node);
1091 if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
1092 send_error(conn, errno);
1093 return;
1096 if (!within_transaction(conn->transaction, node)) {
1097 send_error(conn, EROFS);
1098 return;
1101 if (transaction_block(conn, node))
1102 return;
1104 if (streq(node, "/")) {
1105 send_error(conn, EINVAL);
1106 return;
1109 /* We move the directory to temporary name, destructor cleans up. */
1110 path = node_dir(conn->transaction, node);
1111 tmppath = talloc_asprintf(node, "%s.tmp", path);
1112 talloc_set_destructor(tmppath, destroy_path);
1114 if (rename(path, tmppath) != 0) {
1115 send_error(conn, errno);
1116 return;
1119 add_change_node(conn->transaction, node, true);
1120 fire_watches(conn, node, true);
1121 send_ack(conn, XS_RM);
1124 static void do_get_perms(struct connection *conn, const char *node)
1126 struct xs_permissions *perms;
1127 char *strings;
1128 unsigned int len, num;
1130 node = canonicalize(conn, node);
1131 if (!check_node_perms(conn, node, XS_PERM_READ)) {
1132 send_error(conn, errno);
1133 return;
1136 perms = get_perms(node_dir(conn->transaction, node), &num);
1137 if (!perms) {
1138 send_error(conn, errno);
1139 return;
1142 strings = perms_to_strings(node, perms, num, &len);
1143 if (!strings)
1144 send_error(conn, errno);
1145 else
1146 send_reply(conn, XS_GET_PERMS, strings, len);
1149 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1151 unsigned int num;
1152 char *node, *permstr;
1153 struct xs_permissions *perms;
1155 num = xs_count_strings(in->buffer, in->used);
1156 if (num < 2) {
1157 send_error(conn, EINVAL);
1158 return;
1161 /* First arg is node name. */
1162 node = canonicalize(conn, in->buffer);
1163 permstr = in->buffer + strlen(in->buffer) + 1;
1164 num--;
1166 if (!within_transaction(conn->transaction, node)) {
1167 send_error(conn, EROFS);
1168 return;
1171 if (transaction_block(conn, node))
1172 return;
1174 /* We must own node to do this (tools can do this too). */
1175 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
1176 send_error(conn, errno);
1177 return;
1180 perms = talloc_array(node, struct xs_permissions, num);
1181 if (!xs_strings_to_perms(perms, num, permstr)) {
1182 send_error(conn, errno);
1183 return;
1186 if (!set_perms(conn->transaction, node, perms, num)) {
1187 send_error(conn, errno);
1188 return;
1191 add_change_node(conn->transaction, node, false);
1192 fire_watches(conn, node, false);
1193 send_ack(conn, XS_SET_PERMS);
1196 /* Process "in" for conn: "in" will vanish after this conversation, so
1197 * we can talloc off it for temporary variables. May free "conn".
1198 */
1199 static void process_message(struct connection *conn, struct buffered_data *in)
1201 switch (in->hdr.msg.type) {
1202 case XS_DIRECTORY:
1203 send_directory(conn, onearg(in));
1204 break;
1206 case XS_READ:
1207 do_read(conn, onearg(in));
1208 break;
1210 case XS_WRITE:
1211 do_write(conn, in);
1212 break;
1214 case XS_MKDIR:
1215 do_mkdir(conn, onearg(in));
1216 break;
1218 case XS_RM:
1219 do_rm(conn, onearg(in));
1220 break;
1222 case XS_GET_PERMS:
1223 do_get_perms(conn, onearg(in));
1224 break;
1226 case XS_SET_PERMS:
1227 do_set_perms(conn, in);
1228 break;
1230 case XS_SHUTDOWN:
1231 /* FIXME: Implement gentle shutdown too. */
1232 /* Only tools can do this. */
1233 if (conn->id != 0) {
1234 send_error(conn, EACCES);
1235 break;
1237 if (!conn->can_write) {
1238 send_error(conn, EROFS);
1239 break;
1241 send_ack(conn, XS_SHUTDOWN);
1242 /* Everything hangs off auto-free context, freed at exit. */
1243 exit(0);
1245 case XS_DEBUG:
1246 if (streq(in->buffer, "print"))
1247 xprintf("debug: %s", in->buffer + get_string(in, 0));
1248 #ifdef TESTING
1249 /* For testing, we allow them to set id. */
1250 if (streq(in->buffer, "setid")) {
1251 conn->id = atoi(in->buffer + get_string(in, 0));
1252 send_ack(conn, XS_DEBUG);
1253 } else if (streq(in->buffer, "failtest")) {
1254 if (get_string(in, 0) < in->used)
1255 srandom(atoi(in->buffer + get_string(in, 0)));
1256 send_ack(conn, XS_DEBUG);
1257 failtest = true;
1259 #endif /* TESTING */
1260 break;
1262 case XS_WATCH:
1263 do_watch(conn, in);
1264 break;
1266 case XS_WATCH_ACK:
1267 do_watch_ack(conn, onearg(in));
1268 break;
1270 case XS_UNWATCH:
1271 do_unwatch(conn, in);
1272 break;
1274 case XS_TRANSACTION_START:
1275 do_transaction_start(conn, onearg(in));
1276 break;
1278 case XS_TRANSACTION_END:
1279 do_transaction_end(conn, onearg(in));
1280 break;
1282 case XS_INTRODUCE:
1283 do_introduce(conn, in);
1284 break;
1286 case XS_RELEASE:
1287 do_release(conn, onearg(in));
1288 break;
1290 case XS_GET_DOMAIN_PATH:
1291 do_get_domain_path(conn, onearg(in));
1292 break;
1294 case XS_WATCH_EVENT:
1295 default:
1296 eprintf("Client unknown operation %i", in->hdr.msg.type);
1297 send_error(conn, ENOSYS);
1301 static int out_of_mem(void *data)
1303 longjmp(*(jmp_buf *)data, 1);
1306 static void consider_message(struct connection *conn)
1308 /*
1309 * 'volatile' qualifier prevents register allocation which fixes:
1310 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1311 */
1312 struct buffered_data *volatile in = NULL;
1313 enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
1314 jmp_buf talloc_fail;
1316 assert(conn->state == OK);
1318 /* For simplicity, we kill the connection on OOM. */
1319 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1320 if (setjmp(talloc_fail)) {
1321 talloc_free(conn);
1322 goto end;
1325 if (verbose)
1326 xprintf("Got message %s len %i from %p\n",
1327 sockmsg_string(type), conn->in->hdr.msg.len, conn);
1329 /* We might get a command while waiting for an ack: this means
1330 * the other end discarded it: we will re-transmit. */
1331 if (type != XS_WATCH_ACK)
1332 conn->waiting_for_ack = NULL;
1334 /* Careful: process_message may free connection. We detach
1335 * "in" beforehand and allocate the new buffer to avoid
1336 * touching conn after process_message.
1337 */
1338 in = talloc_steal(talloc_autofree_context(), conn->in);
1339 conn->in = new_buffer(conn);
1340 process_message(conn, in);
1342 if (conn->state == BLOCKED) {
1343 /* Blocked by transaction: queue for re-xmit. */
1344 talloc_free(conn->in);
1345 conn->in = in;
1346 in = NULL;
1347 trace_blocked(conn, conn->in);
1350 end:
1351 talloc_free(in);
1352 talloc_set_fail_handler(NULL, NULL);
1353 if (talloc_total_blocks(NULL)
1354 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1355 talloc_report_full(NULL, stderr);
1356 abort();
1360 /* Errors in reading or allocating here mean we get out of sync, so we
1361 * drop the whole client connection. */
1362 void handle_input(struct connection *conn)
1364 int bytes;
1365 struct buffered_data *in;
1367 assert(conn->state == OK);
1368 in = conn->in;
1370 /* Not finished header yet? */
1371 if (in->inhdr) {
1372 bytes = conn->read(conn, in->hdr.raw + in->used,
1373 sizeof(in->hdr) - in->used);
1374 if (bytes <= 0)
1375 goto bad_client;
1376 in->used += bytes;
1377 if (in->used != sizeof(in->hdr))
1378 return;
1380 if (in->hdr.msg.len > PATH_MAX) {
1381 #ifndef TESTING
1382 syslog(LOG_DAEMON, "Client tried to feed us %i",
1383 in->hdr.msg.len);
1384 #endif
1385 goto bad_client;
1388 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1389 if (!in->buffer)
1390 goto bad_client;
1391 in->used = 0;
1392 in->inhdr = false;
1393 return;
1396 bytes = conn->read(conn, in->buffer + in->used,
1397 in->hdr.msg.len - in->used);
1398 if (bytes < 0)
1399 goto bad_client;
1401 in->used += bytes;
1402 if (in->used != in->hdr.msg.len)
1403 return;
1405 trace_io(conn, "IN ", in);
1406 consider_message(conn);
1407 return;
1409 bad_client:
1410 /* Kill it. */
1411 talloc_free(conn);
1414 void handle_output(struct connection *conn)
1416 if (!write_message(conn))
1417 talloc_free(conn);
1420 /* If a transaction has ended, see if we can unblock any connections. */
1421 static void unblock_connections(void)
1423 struct connection *i, *tmp;
1425 list_for_each_entry_safe(i, tmp, &connections, list) {
1426 switch (i->state) {
1427 case BLOCKED:
1428 if (!transaction_covering_node(i->blocked_by)) {
1429 talloc_free(i->blocked_by);
1430 i->blocked_by = NULL;
1431 i->state = OK;
1432 consider_message(i);
1434 break;
1435 case BUSY:
1436 case OK:
1437 break;
1441 /* To balance bias, move first entry to end. */
1442 if (!list_empty(&connections)) {
1443 i = list_top(&connections, struct connection, list);
1444 list_del(&i->list);
1445 list_add_tail(&i->list, &connections);
1449 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1451 /*
1452 * 'volatile' qualifier prevents register allocation which fixes:
1453 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1454 */
1455 struct connection *volatile new;
1456 jmp_buf talloc_fail;
1458 new = talloc(talloc_autofree_context(), struct connection);
1459 if (!new)
1460 return NULL;
1462 new->state = OK;
1463 new->blocked_by = NULL;
1464 new->out = new->waiting_reply = NULL;
1465 new->waiting_for_ack = NULL;
1466 new->fd = -1;
1467 new->id = 0;
1468 new->domain = NULL;
1469 new->transaction = NULL;
1470 new->write = write;
1471 new->read = read;
1472 new->can_write = true;
1473 INIT_LIST_HEAD(&new->watches);
1475 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1476 if (setjmp(talloc_fail)) {
1477 talloc_free(new);
1478 return NULL;
1480 new->in = new_buffer(new);
1481 talloc_set_fail_handler(NULL, NULL);
1483 list_add_tail(&new->list, &connections);
1484 talloc_set_destructor(new, destroy_conn);
1485 trace_create(new, "connection");
1486 return new;
1489 static int writefd(struct connection *conn, const void *data, unsigned int len)
1491 return write(conn->fd, data, len);
1494 static int readfd(struct connection *conn, void *data, unsigned int len)
1496 return read(conn->fd, data, len);
1499 static void accept_connection(int sock, bool canwrite)
1501 int fd;
1502 struct connection *conn;
1504 fd = accept(sock, NULL, NULL);
1505 if (fd < 0)
1506 return;
1508 conn = new_connection(writefd, readfd);
1509 if (conn) {
1510 conn->fd = fd;
1511 conn->can_write = canwrite;
1512 } else
1513 close(fd);
1516 /* Calc timespan from now to absolute time. */
1517 static void time_relative_to_now(struct timeval *tv)
1519 struct timeval now;
1521 gettimeofday(&now, NULL);
1522 if (timercmp(&now, tv, >))
1523 timerclear(tv);
1524 else {
1525 tv->tv_sec -= now.tv_sec;
1526 if (now.tv_usec > tv->tv_usec) {
1527 tv->tv_sec--;
1528 tv->tv_usec += 1000000;
1530 tv->tv_usec -= now.tv_usec;
1534 #ifdef TESTING
1535 /* Useful for running under debugger. */
1536 void dump_connection(void)
1538 struct connection *i;
1540 list_for_each_entry(i, &connections, list) {
1541 printf("Connection %p:\n", i);
1542 printf(" state = %s\n",
1543 i->state == OK ? "OK"
1544 : i->state == BLOCKED ? "BLOCKED"
1545 : i->state == BUSY ? "BUSY"
1546 : "INVALID");
1547 if (i->id)
1548 printf(" id = %i\n", i->id);
1549 if (i->blocked_by)
1550 printf(" blocked on = %s\n", i->blocked_by);
1551 if (!i->in->inhdr || i->in->used)
1552 printf(" got %i bytes of %s\n",
1553 i->in->used, i->in->inhdr ? "header" : "data");
1554 if (i->out)
1555 printf(" sending message %s (%s) out\n",
1556 sockmsg_string(i->out->hdr.msg.type),
1557 i->out->buffer);
1558 if (i->waiting_reply)
1559 printf(" ... and behind is queued %s (%s)\n",
1560 sockmsg_string(i->waiting_reply->hdr.msg.type),
1561 i->waiting_reply->buffer);
1562 #if 0
1563 if (i->transaction)
1564 dump_transaction(i);
1565 if (i->domain)
1566 dump_domain(i);
1567 #endif
1568 dump_watches(i);
1571 #endif
1573 static void setup_structure(void)
1575 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1576 char *root, *dir, *permfile;
1578 /* Create root directory, with permissions. */
1579 if (mkdir(xs_daemon_store(), 0750) != 0) {
1580 if (errno != EEXIST)
1581 barf_perror("Could not create root %s",
1582 xs_daemon_store());
1583 return;
1585 root = talloc_strdup(talloc_autofree_context(), "/");
1586 if (!set_perms(NULL, root, &perms, 1))
1587 barf_perror("Could not create permissions in root");
1589 /* Create tool directory, with xenstored subdir. */
1590 dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
1591 if (mkdir(dir, 0750) != 0)
1592 barf_perror("Making dir %s", dir);
1594 permfile = talloc_strdup(root, "/tool");
1595 if (!set_perms(NULL, permfile, &perms, 1))
1596 barf_perror("Could not create permissions on %s", permfile);
1598 dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
1599 if (mkdir(dir, 0750) != 0)
1600 barf_perror("Making dir %s", dir);
1602 permfile = talloc_strdup(root, "/tool/xenstored");
1603 if (!set_perms(NULL, permfile, &perms, 1))
1604 barf_perror("Could not create permissions on %s", permfile);
1605 talloc_free(root);
1606 if (mkdir(xs_daemon_transactions(), 0750) != 0)
1607 barf_perror("Could not create transaction dir %s",
1608 xs_daemon_transactions());
1611 static void write_pidfile(const char *pidfile)
1613 char buf[100];
1614 int len;
1615 int fd;
1617 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1618 if (fd == -1)
1619 barf_perror("Opening pid file %s", pidfile);
1621 /* We exit silently if daemon already running. */
1622 if (lockf(fd, F_TLOCK, 0) == -1)
1623 exit(0);
1625 len = sprintf(buf, "%d\n", getpid());
1626 write(fd, buf, len);
1629 /* Stevens. */
1630 static void daemonize(void)
1632 pid_t pid;
1634 /* Separate from our parent via fork, so init inherits us. */
1635 if ((pid = fork()) < 0)
1636 barf_perror("Failed to fork daemon");
1637 if (pid != 0)
1638 exit(0);
1640 /* Session leader so ^C doesn't whack us. */
1641 setsid();
1642 #ifndef TESTING /* Relative paths for socket names */
1643 /* Move off any mount points we might be in. */
1644 chdir("/");
1645 #endif
1646 /* Discard our parent's old-fashioned umask prejudices. */
1647 umask(0);
1651 static struct option options[] = {
1652 { "pid-file", 1, NULL, 'F' },
1653 { "no-fork", 0, NULL, 'N' },
1654 { "output-pid", 0, NULL, 'P' },
1655 { "trace-file", 1, NULL, 'T' },
1656 { "verbose", 0, NULL, 'V' },
1657 { NULL, 0, NULL, 0 } };
1659 int main(int argc, char *argv[])
1661 int opt, *sock, *ro_sock, event_fd, max;
1662 struct sockaddr_un addr;
1663 fd_set inset, outset;
1664 bool dofork = true;
1665 bool outputpid = false;
1666 const char *pidfile = NULL;
1668 while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
1669 NULL)) != -1) {
1670 switch (opt) {
1671 case 'F':
1672 pidfile = optarg;
1673 break;
1674 case 'N':
1675 dofork = false;
1676 break;
1677 case 'P':
1678 outputpid = true;
1679 break;
1680 case 'T':
1681 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1682 if (tracefd < 0)
1683 barf_perror("Could not open tracefile %s",
1684 optarg);
1685 write(tracefd, "\n***\n", strlen("\n***\n"));
1686 break;
1687 case 'V':
1688 verbose = true;
1689 break;
1692 if (optind != argc)
1693 barf("%s: No arguments desired", argv[0]);
1695 if (dofork) {
1696 openlog("xenstored", 0, LOG_DAEMON);
1697 daemonize();
1699 if (pidfile)
1700 write_pidfile(pidfile);
1702 talloc_enable_leak_report_full();
1704 /* Create sockets for them to listen to. */
1705 sock = talloc(talloc_autofree_context(), int);
1706 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1707 if (*sock < 0)
1708 barf_perror("Could not create socket");
1709 ro_sock = talloc(talloc_autofree_context(), int);
1710 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1711 if (*ro_sock < 0)
1712 barf_perror("Could not create socket");
1713 talloc_set_destructor(sock, destroy_fd);
1714 talloc_set_destructor(ro_sock, destroy_fd);
1716 /* Don't kill us with SIGPIPE. */
1717 signal(SIGPIPE, SIG_IGN);
1719 /* FIXME: Be more sophisticated, don't mug running daemon. */
1720 unlink(xs_daemon_socket());
1721 unlink(xs_daemon_socket_ro());
1723 addr.sun_family = AF_UNIX;
1724 strcpy(addr.sun_path, xs_daemon_socket());
1725 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1726 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1727 strcpy(addr.sun_path, xs_daemon_socket_ro());
1728 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1729 barf_perror("Could not bind socket to %s",
1730 xs_daemon_socket_ro());
1731 if (chmod(xs_daemon_socket(), 0600) != 0
1732 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1733 barf_perror("Could not chmod sockets");
1735 if (listen(*sock, 1) != 0
1736 || listen(*ro_sock, 1) != 0)
1737 barf_perror("Could not listen on sockets");
1739 /* If we're the first, create .perms file for root. */
1740 setup_structure();
1742 /* Listen to hypervisor. */
1743 event_fd = domain_init();
1745 /* Restore existing connections. */
1746 restore_existing_connections();
1748 if (outputpid) {
1749 printf("%i\n", getpid());
1750 fflush(stdout);
1753 /* close stdin/stdout now we're ready to accept connections */
1754 if (dofork) {
1755 close(STDIN_FILENO);
1756 close(STDOUT_FILENO);
1757 close(STDERR_FILENO);
1760 #ifdef TESTING
1761 signal(SIGUSR1, stop_failtest);
1762 #endif
1764 /* Get ready to listen to the tools. */
1765 max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
1767 /* Main loop. */
1768 /* FIXME: Rewrite so noone can starve. */
1769 for (;;) {
1770 struct connection *i;
1771 struct timeval *tvp = NULL, tv;
1773 timerclear(&tv);
1774 shortest_transaction_timeout(&tv);
1775 shortest_watch_ack_timeout(&tv);
1776 if (timerisset(&tv)) {
1777 time_relative_to_now(&tv);
1778 tvp = &tv;
1781 if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
1782 if (errno == EINTR)
1783 continue;
1784 barf_perror("Select failed");
1787 if (FD_ISSET(*sock, &inset))
1788 accept_connection(*sock, true);
1790 if (FD_ISSET(*ro_sock, &inset))
1791 accept_connection(*ro_sock, false);
1793 if (FD_ISSET(event_fd, &inset))
1794 handle_event(event_fd);
1796 list_for_each_entry(i, &connections, list) {
1797 if (i->domain)
1798 continue;
1800 /* Operations can delete themselves or others
1801 * (xs_release): list is not safe after input,
1802 * so break. */
1803 if (FD_ISSET(i->fd, &inset)) {
1804 handle_input(i);
1805 break;
1807 if (FD_ISSET(i->fd, &outset)) {
1808 handle_output(i);
1809 break;
1813 /* Handle all possible I/O for domain connections. */
1814 more:
1815 list_for_each_entry(i, &connections, list) {
1816 if (!i->domain)
1817 continue;
1819 if (domain_can_read(i)) {
1820 handle_input(i);
1821 goto more;
1824 if (domain_can_write(i)) {
1825 handle_output(i);
1826 goto more;
1830 if (tvp) {
1831 check_transaction_timeout();
1832 check_watch_ack_timeout();
1835 /* If transactions ended, we might be able to do more work. */
1836 unblock_connections();
1838 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1839 event_fd);