ia64/xen-unstable

view tools/xenstore/xenstored_core.c @ 6653:158d23cbd2e6

Enable xenstored optimisations.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Sep 06 15:47:23 2005 +0000 (2005-09-06)
parents aeaa3c83f6e5
children d4d69c509371
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored.h"
48 #include "xenstored_core.h"
49 #include "xenstored_watch.h"
50 #include "xenstored_transaction.h"
51 #include "xenstored_domain.h"
53 static bool verbose;
54 LIST_HEAD(connections);
55 static int tracefd = -1;
57 #ifdef TESTING
58 static bool failtest = false;
60 /* We override talloc's malloc. */
61 void *test_malloc(size_t size)
62 {
63 /* 1 in 20 means only about 50% of connections establish. */
64 if (failtest && (random() % 32) == 0)
65 return NULL;
66 return malloc(size);
67 }
69 static void stop_failtest(int signum __attribute__((unused)))
70 {
71 failtest = false;
72 }
74 /* Need these before we #define away write_all/mkdir in testing.h */
75 bool test_write_all(int fd, void *contents, unsigned int len);
76 bool test_write_all(int fd, void *contents, unsigned int len)
77 {
78 if (failtest && (random() % 8) == 0) {
79 if (len)
80 len = random() % len;
81 write(fd, contents, len);
82 errno = ENOSPC;
83 return false;
84 }
85 return xs_write_all(fd, contents, len);
86 }
88 int test_mkdir(const char *dir, int perms);
89 int test_mkdir(const char *dir, int perms)
90 {
91 if (failtest && (random() % 8) == 0) {
92 errno = ENOSPC;
93 return -1;
94 }
95 return mkdir(dir, perms);
96 }
97 #endif /* TESTING */
99 #include "xenstored_test.h"
101 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
102 /* Something is horribly wrong: shutdown immediately. */
103 void __attribute__((noreturn)) corrupt(struct connection *conn,
104 const char *fmt, ...)
105 {
106 va_list arglist;
107 char *str;
108 int saved_errno = errno;
110 va_start(arglist, fmt);
111 str = talloc_vasprintf(NULL, fmt, arglist);
112 va_end(arglist);
114 trace("xenstored corruption: connection id %i: err %s: %s",
115 conn ? (int)conn->id : -1, strerror(saved_errno), str);
116 eprintf("xenstored corruption: connection id %i: err %s: %s",
117 conn ? (int)conn->id : -1, strerror(saved_errno), str);
118 #ifdef TESTING
119 /* Allow them to attach debugger. */
120 sleep(30);
121 #endif
122 syslog(LOG_DAEMON,
123 "xenstored corruption: connection id %i: err %s: %s",
124 conn ? (int)conn->id : -1, strerror(saved_errno), str);
125 _exit(2);
126 }
128 static char *sockmsg_string(enum xsd_sockmsg_type type)
129 {
130 switch (type) {
131 case XS_DEBUG: return "DEBUG";
132 case XS_SHUTDOWN: return "SHUTDOWN";
133 case XS_DIRECTORY: return "DIRECTORY";
134 case XS_READ: return "READ";
135 case XS_GET_PERMS: return "GET_PERMS";
136 case XS_WATCH: return "WATCH";
137 case XS_WATCH_ACK: return "WATCH_ACK";
138 case XS_UNWATCH: return "UNWATCH";
139 case XS_TRANSACTION_START: return "TRANSACTION_START";
140 case XS_TRANSACTION_END: return "TRANSACTION_END";
141 case XS_INTRODUCE: return "INTRODUCE";
142 case XS_RELEASE: return "RELEASE";
143 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
144 case XS_WRITE: return "WRITE";
145 case XS_MKDIR: return "MKDIR";
146 case XS_RM: return "RM";
147 case XS_SET_PERMS: return "SET_PERMS";
148 case XS_WATCH_EVENT: return "WATCH_EVENT";
149 case XS_ERROR: return "ERROR";
150 default:
151 return "**UNKNOWN**";
152 }
153 }
155 static void trace_io(const struct connection *conn,
156 const char *prefix,
157 const struct buffered_data *data)
158 {
159 char string[64];
160 unsigned int i;
162 if (tracefd < 0)
163 return;
165 write(tracefd, prefix, strlen(prefix));
166 sprintf(string, " %p ", conn);
167 write(tracefd, string, strlen(string));
168 write(tracefd, sockmsg_string(data->hdr.msg.type),
169 strlen(sockmsg_string(data->hdr.msg.type)));
170 write(tracefd, " (", 2);
171 for (i = 0; i < data->hdr.msg.len; i++) {
172 if (data->buffer[i] == '\0')
173 write(tracefd, " ", 1);
174 else
175 write(tracefd, data->buffer + i, 1);
176 }
177 write(tracefd, ")\n", 2);
178 }
180 void trace_create(const void *data, const char *type)
181 {
182 char string[64];
183 if (tracefd < 0)
184 return;
186 write(tracefd, "CREATE ", strlen("CREATE "));
187 write(tracefd, type, strlen(type));
188 sprintf(string, " %p\n", data);
189 write(tracefd, string, strlen(string));
190 }
192 void trace_destroy(const void *data, const char *type)
193 {
194 char string[64];
195 if (tracefd < 0)
196 return;
198 write(tracefd, "DESTROY ", strlen("DESTROY "));
199 write(tracefd, type, strlen(type));
200 sprintf(string, " %p\n", data);
201 write(tracefd, string, strlen(string));
202 }
204 void trace_watch_timeout(const struct connection *conn, const char *node, const char *token)
205 {
206 char string[64];
207 if (tracefd < 0)
208 return;
209 write(tracefd, "WATCH_TIMEOUT ", strlen("WATCH_TIMEOUT "));
210 sprintf(string, " %p ", conn);
211 write(tracefd, string, strlen(string));
212 write(tracefd, " (", 2);
213 write(tracefd, node, strlen(node));
214 write(tracefd, " ", 1);
215 write(tracefd, token, strlen(token));
216 write(tracefd, ")\n", 2);
217 }
219 static void trace_blocked(const struct connection *conn,
220 const struct buffered_data *data)
221 {
222 char string[64];
224 if (tracefd < 0)
225 return;
227 write(tracefd, "BLOCKED", strlen("BLOCKED"));
228 sprintf(string, " %p (", conn);
229 write(tracefd, string, strlen(string));
230 write(tracefd, sockmsg_string(data->hdr.msg.type),
231 strlen(sockmsg_string(data->hdr.msg.type)));
232 write(tracefd, ")\n", 2);
233 }
235 void trace(const char *fmt, ...)
236 {
237 va_list arglist;
238 char *str;
240 if (tracefd < 0)
241 return;
243 va_start(arglist, fmt);
244 str = talloc_vasprintf(NULL, fmt, arglist);
245 va_end(arglist);
246 write(tracefd, str, strlen(str));
247 talloc_free(str);
248 }
250 static bool write_message(struct connection *conn)
251 {
252 int ret;
253 struct buffered_data *out = conn->out;
255 assert(conn->state != BLOCKED);
256 if (out->inhdr) {
257 if (verbose)
258 xprintf("Writing msg %s (%s) out to %p\n",
259 sockmsg_string(out->hdr.msg.type),
260 out->buffer, conn);
261 ret = conn->write(conn, out->hdr.raw + out->used,
262 sizeof(out->hdr) - out->used);
263 if (ret < 0)
264 return false;
266 out->used += ret;
267 if (out->used < sizeof(out->hdr))
268 return true;
270 out->inhdr = false;
271 out->used = 0;
273 /* Second write might block if non-zero. */
274 if (out->hdr.msg.len && !conn->domain)
275 return true;
276 }
278 ret = conn->write(conn, out->buffer + out->used,
279 out->hdr.msg.len - out->used);
281 if (ret < 0)
282 return false;
284 out->used += ret;
285 if (out->used != out->hdr.msg.len)
286 return true;
288 trace_io(conn, "OUT", out);
289 conn->out = NULL;
290 talloc_free(out);
292 queue_next_event(conn);
294 /* No longer busy? */
295 if (!conn->out)
296 conn->state = OK;
297 return true;
298 }
300 static int destroy_conn(void *_conn)
301 {
302 struct connection *conn = _conn;
304 /* Flush outgoing if possible, but don't block. */
305 if (!conn->domain) {
306 fd_set set;
307 struct timeval none;
309 FD_ZERO(&set);
310 FD_SET(conn->fd, &set);
311 none.tv_sec = none.tv_usec = 0;
313 while (conn->out
314 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
315 if (!write_message(conn))
316 break;
317 close(conn->fd);
318 }
319 list_del(&conn->list);
320 trace_destroy(conn, "connection");
321 return 0;
322 }
324 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
325 int event_fd)
326 {
327 struct connection *i;
328 int max;
330 FD_ZERO(inset);
331 FD_ZERO(outset);
332 FD_SET(sock, inset);
333 max = sock;
334 FD_SET(ro_sock, inset);
335 if (ro_sock > max)
336 max = ro_sock;
337 FD_SET(event_fd, inset);
338 if (event_fd > max)
339 max = event_fd;
340 list_for_each_entry(i, &connections, list) {
341 if (i->domain)
342 continue;
343 if (i->state == OK)
344 FD_SET(i->fd, inset);
345 if (i->out)
346 FD_SET(i->fd, outset);
347 if (i->fd > max)
348 max = i->fd;
349 }
350 return max;
351 }
353 /* Read everything from a talloc_open'ed fd. */
354 void *read_all(int *fd, unsigned int *size)
355 {
356 unsigned int max = 4;
357 int ret;
358 void *buffer = talloc_size(fd, max);
360 *size = 0;
361 while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
362 *size += ret;
363 if (*size == max)
364 buffer = talloc_realloc_size(fd, buffer, max *= 2);
365 }
366 if (ret < 0)
367 return NULL;
368 return buffer;
369 }
371 static int destroy_fd(void *_fd)
372 {
373 int *fd = _fd;
374 close(*fd);
375 return 0;
376 }
378 /* Return a pointer to an fd, self-closing and attached to this pathname. */
379 int *talloc_open(const char *pathname, int flags, int mode)
380 {
381 int *fd;
383 fd = talloc(pathname, int);
384 *fd = open(pathname, flags, mode);
385 if (*fd < 0) {
386 int saved_errno = errno;
387 talloc_free(fd);
388 errno = saved_errno;
389 return NULL;
390 }
391 talloc_set_destructor(fd, destroy_fd);
392 return fd;
393 }
395 /* Is child a subnode of parent, or equal? */
396 bool is_child(const char *child, const char *parent)
397 {
398 unsigned int len = strlen(parent);
400 /* / should really be "" for this algorithm to work, but that's a
401 * usability nightmare. */
402 if (streq(parent, "/"))
403 return true;
405 if (strncmp(child, parent, len) != 0)
406 return false;
408 return child[len] == '/' || child[len] == '\0';
409 }
411 /* Answer never ends in /. */
412 char *node_dir_outside_transaction(const char *node)
413 {
414 if (streq(node, "/"))
415 return talloc_strdup(node, xs_daemon_store());
416 return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
417 }
419 static char *node_dir(struct transaction *trans, const char *node)
420 {
421 if (!trans || !within_transaction(trans, node))
422 return node_dir_outside_transaction(node);
423 return node_dir_inside_transaction(trans, node);
424 }
426 static char *datafile(const char *dir)
427 {
428 return talloc_asprintf(dir, "%s/.data", dir);
429 }
431 static char *node_datafile(struct transaction *trans, const char *node)
432 {
433 return datafile(node_dir(trans, node));
434 }
436 static char *permfile(const char *dir)
437 {
438 return talloc_asprintf(dir, "%s/.perms", dir);
439 }
441 static char *node_permfile(struct transaction *trans, const char *node)
442 {
443 return permfile(node_dir(trans, node));
444 }
446 struct buffered_data *new_buffer(void *ctx)
447 {
448 struct buffered_data *data;
450 data = talloc(ctx, struct buffered_data);
451 data->inhdr = true;
452 data->used = 0;
453 data->buffer = NULL;
455 return data;
456 }
458 /* Return length of string (including nul) at this offset. */
459 unsigned int get_string(const struct buffered_data *data, unsigned int offset)
460 {
461 const char *nul;
463 if (offset >= data->used)
464 return 0;
466 nul = memchr(data->buffer + offset, 0, data->used - offset);
467 if (!nul)
468 return 0;
470 return nul - (data->buffer + offset) + 1;
471 }
473 /* Break input into vectors, return the number, fill in up to num of them. */
474 unsigned int get_strings(struct buffered_data *data,
475 char *vec[], unsigned int num)
476 {
477 unsigned int off, i, len;
479 off = i = 0;
480 while ((len = get_string(data, off)) != 0) {
481 if (i < num)
482 vec[i] = data->buffer + off;
483 i++;
484 off += len;
485 }
486 return i;
487 }
489 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
490 const void *data, unsigned int len)
491 {
492 struct buffered_data *bdata;
494 /* When data gets freed, we want list entry is destroyed (so
495 * list entry is a child). */
496 bdata = new_buffer(conn);
497 bdata->buffer = talloc_array(bdata, char, len);
499 bdata->hdr.msg.type = type;
500 bdata->hdr.msg.len = len;
501 memcpy(bdata->buffer, data, len);
503 /* There might be an event going out now. Queue behind it. */
504 if (conn->out) {
505 assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
506 assert(!conn->waiting_reply);
507 conn->waiting_reply = bdata;
508 } else
509 conn->out = bdata;
510 assert(conn->state != BLOCKED);
511 conn->state = BUSY;
512 }
514 /* Some routines (write, mkdir, etc) just need a non-error return */
515 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
516 {
517 send_reply(conn, type, "OK", sizeof("OK"));
518 }
520 void send_error(struct connection *conn, int error)
521 {
522 unsigned int i;
524 for (i = 0; error != xsd_errors[i].errnum; i++) {
525 if (i == ARRAY_SIZE(xsd_errors) - 1) {
526 eprintf("xenstored: error %i untranslatable", error);
527 i = 0; /* EINVAL */
528 break;
529 }
530 }
531 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
532 strlen(xsd_errors[i].errstring) + 1);
533 }
535 static bool valid_chars(const char *node)
536 {
537 /* Nodes can have lots of crap. */
538 return (strspn(node,
539 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
540 "abcdefghijklmnopqrstuvwxyz"
541 "0123456789-/_@") == strlen(node));
542 }
544 bool is_valid_nodename(const char *node)
545 {
546 /* Must start in /. */
547 if (!strstarts(node, "/"))
548 return false;
550 /* Cannot end in / (unless it's just "/"). */
551 if (strends(node, "/") && !streq(node, "/"))
552 return false;
554 /* No double //. */
555 if (strstr(node, "//"))
556 return false;
558 return valid_chars(node);
559 }
561 /* We expect one arg in the input: return NULL otherwise. */
562 static const char *onearg(struct buffered_data *in)
563 {
564 if (!in->used || get_string(in, 0) != in->used)
565 return NULL;
566 return in->buffer;
567 }
569 /* If it fails, returns NULL and sets errno. */
570 static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
571 {
572 unsigned int size;
573 char *strings;
574 struct xs_permissions *ret;
575 int *fd;
577 fd = talloc_open(permfile(dir), O_RDONLY, 0);
578 if (!fd)
579 return NULL;
580 strings = read_all(fd, &size);
581 if (!strings)
582 return NULL;
584 *num = xs_count_strings(strings, size);
585 ret = talloc_array(dir, struct xs_permissions, *num);
586 if (!xs_strings_to_perms(ret, *num, strings))
587 corrupt(NULL, "Permissions corrupt for %s", dir);
589 return ret;
590 }
592 static char *perms_to_strings(const void *ctx,
593 struct xs_permissions *perms, unsigned int num,
594 unsigned int *len)
595 {
596 unsigned int i;
597 char *strings = NULL;
598 char buffer[MAX_STRLEN(domid_t) + 1];
600 for (*len = 0, i = 0; i < num; i++) {
601 if (!xs_perm_to_string(&perms[i], buffer))
602 return NULL;
604 strings = talloc_realloc(ctx, strings, char,
605 *len + strlen(buffer) + 1);
606 strcpy(strings + *len, buffer);
607 *len += strlen(buffer) + 1;
608 }
609 return strings;
610 }
612 /* Destroy this, and its children, and its children's children. */
613 int destroy_path(void *path)
614 {
615 DIR *dir;
616 struct dirent *dirent;
618 dir = opendir(path);
619 if (!dir) {
620 if (unlink(path) == 0 || errno == ENOENT)
621 return 0;
622 corrupt(NULL, "Destroying path %s", path);
623 }
625 while ((dirent = readdir(dir)) != NULL) {
626 char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
627 sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
628 if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
629 destroy_path(fullpath);
630 }
631 closedir(dir);
632 if (rmdir(path) != 0)
633 corrupt(NULL, "Destroying directory %s", path);
634 return 0;
635 }
637 /* Create a self-destructing temporary path */
638 static char *temppath(const char *path)
639 {
640 char *tmppath = talloc_asprintf(path, "%s.tmp", path);
641 talloc_set_destructor(tmppath, destroy_path);
642 return tmppath;
643 }
645 /* Create a self-destructing temporary file */
646 static char *tempfile(const char *path, void *contents, unsigned int len)
647 {
648 int *fd;
649 char *tmppath = temppath(path);
651 fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
652 if (!fd)
653 return NULL;
654 if (!xs_write_all(*fd, contents, len))
655 return NULL;
657 return tmppath;
658 }
660 static int destroy_opendir(void *_dir)
661 {
662 DIR **dir = _dir;
663 closedir(*dir);
664 return 0;
665 }
667 /* Return a pointer to a DIR*, self-closing and attached to this pathname. */
668 DIR **talloc_opendir(const char *pathname)
669 {
670 DIR **dir;
672 dir = talloc(pathname, DIR *);
673 *dir = opendir(pathname);
674 if (!*dir) {
675 int saved_errno = errno;
676 talloc_free(dir);
677 errno = saved_errno;
678 return NULL;
679 }
680 talloc_set_destructor(dir, destroy_opendir);
681 return dir;
682 }
684 /* We assume rename() doesn't fail on moves in same dir. */
685 static void commit_tempfile(const char *path)
686 {
687 char realname[strlen(path) + 1];
688 unsigned int len = strrchr(path, '.') - path;
690 memcpy(realname, path, len);
691 realname[len] = '\0';
692 if (rename(path, realname) != 0)
693 corrupt(NULL, "Committing %s", realname);
694 talloc_set_destructor(path, NULL);
695 }
697 static bool set_perms(struct transaction *transaction,
698 const char *node,
699 struct xs_permissions *perms, unsigned int num)
700 {
701 unsigned int len;
702 char *permpath, *strings;
704 strings = perms_to_strings(node, perms, num, &len);
705 if (!strings)
706 return false;
708 /* Create then move. */
709 permpath = tempfile(node_permfile(transaction, node), strings, len);
710 if (!permpath)
711 return false;
713 commit_tempfile(permpath);
714 return true;
715 }
717 static char *get_parent(const char *node)
718 {
719 char *slash = strrchr(node + 1, '/');
720 if (!slash)
721 return talloc_strdup(node, "/");
722 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
723 }
725 static enum xs_perm_type perm_for_id(domid_t id,
726 struct xs_permissions *perms,
727 unsigned int num)
728 {
729 unsigned int i;
731 /* Owners and tools get it all... */
732 if (!id || perms[0].id == id)
733 return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
735 for (i = 1; i < num; i++)
736 if (perms[i].id == id)
737 return perms[i].perms;
739 return perms[0].perms;
740 }
742 /* What do parents say? */
743 static enum xs_perm_type ask_parents(struct connection *conn,
744 const char *node)
745 {
746 struct xs_permissions *perms;
747 unsigned int num;
749 do {
750 node = get_parent(node);
751 perms = get_perms(node_dir(conn->transaction, node), &num);
752 if (perms)
753 break;
754 } while (!streq(node, "/"));
756 /* No permission at root? We're in trouble. */
757 if (!perms)
758 corrupt(conn, "No permissions file at root");
760 return perm_for_id(conn->id, perms, num);
761 }
763 /* We have a weird permissions system. You can allow someone into a
764 * specific node without allowing it in the parents. If it's going to
765 * fail, however, we don't want the errno to indicate any information
766 * about the node. */
767 static int errno_from_parents(struct connection *conn, const char *node,
768 int errnum)
769 {
770 /* We always tell them about memory failures. */
771 if (errnum == ENOMEM)
772 return errnum;
774 if (ask_parents(conn, node) & XS_PERM_READ)
775 return errnum;
776 return EACCES;
777 }
779 char *canonicalize(struct connection *conn, const char *node)
780 {
781 const char *prefix;
783 if (!node || strstarts(node, "/"))
784 return (char *)node;
785 prefix = get_implicit_path(conn);
786 if (prefix)
787 return talloc_asprintf(node, "%s/%s", prefix, node);
788 return (char *)node;
789 }
791 bool check_node_perms(struct connection *conn, const char *node,
792 enum xs_perm_type perm)
793 {
794 struct xs_permissions *perms;
795 unsigned int num;
797 if (!node || !is_valid_nodename(node)) {
798 errno = EINVAL;
799 return false;
800 }
802 if (!conn->can_write && (perm & XS_PERM_WRITE)) {
803 errno = EROFS;
804 return false;
805 }
807 perms = get_perms(node_dir(conn->transaction, node), &num);
809 if (perms) {
810 if (perm_for_id(conn->id, perms, num) & perm)
811 return true;
812 errno = EACCES;
813 return false;
814 }
816 /* If it's OK not to exist, we consult parents. */
817 if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
818 if (ask_parents(conn, node) & perm)
819 return true;
820 /* Parents say they should not know. */
821 errno = EACCES;
822 return false;
823 }
825 /* They might not have permission to even *see* this node, in
826 * which case we return EACCES even if it's ENOENT or EIO. */
827 errno = errno_from_parents(conn, node, errno);
828 return false;
829 }
831 bool check_event_node(const char *node)
832 {
833 if (!node || !strstarts(node, "@")) {
834 errno = EINVAL;
835 return false;
836 }
837 return true;
838 }
840 static void send_directory(struct connection *conn, const char *node)
841 {
842 char *path, *reply;
843 unsigned int reply_len = 0;
844 DIR **dir;
845 struct dirent *dirent;
847 node = canonicalize(conn, node);
848 if (!check_node_perms(conn, node, XS_PERM_READ)) {
849 send_error(conn, errno);
850 return;
851 }
853 path = node_dir(conn->transaction, node);
854 dir = talloc_opendir(path);
855 if (!dir) {
856 send_error(conn, errno);
857 return;
858 }
860 reply = talloc_strdup(node, "");
861 while ((dirent = readdir(*dir)) != NULL) {
862 int len = strlen(dirent->d_name) + 1;
864 if (!valid_chars(dirent->d_name))
865 continue;
867 reply = talloc_realloc(path, reply, char, reply_len + len);
868 strcpy(reply + reply_len, dirent->d_name);
869 reply_len += len;
870 }
872 send_reply(conn, XS_DIRECTORY, reply, reply_len);
873 }
875 static void do_read(struct connection *conn, const char *node)
876 {
877 char *value;
878 unsigned int size;
879 int *fd;
881 node = canonicalize(conn, node);
882 if (!check_node_perms(conn, node, XS_PERM_READ)) {
883 send_error(conn, errno);
884 return;
885 }
887 fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
888 if (!fd) {
889 /* Data file doesn't exist? We call that a directory */
890 if (errno == ENOENT)
891 errno = EISDIR;
892 send_error(conn, errno);
893 return;
894 }
896 value = read_all(fd, &size);
897 if (!value)
898 send_error(conn, errno);
899 else
900 send_reply(conn, XS_READ, value, size);
901 }
903 /* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
904 static bool commit_dir(char *dir)
905 {
906 char *dot, *slash, *dest;
908 dot = strrchr(dir, '.');
909 slash = strchr(dot, '/');
910 if (slash)
911 *slash = '\0';
913 dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
914 return rename(dir, dest) == 0;
915 }
917 /* Create a temporary directory. Put data in it (if data != NULL) */
918 static char *tempdir(struct connection *conn,
919 const char *node, void *data, unsigned int datalen)
920 {
921 struct xs_permissions *perms;
922 char *permstr;
923 unsigned int num, len;
924 int *fd;
925 char *dir;
927 dir = temppath(node_dir(conn->transaction, node));
928 if (mkdir(dir, 0750) != 0) {
929 if (errno != ENOENT)
930 return NULL;
932 dir = tempdir(conn, get_parent(node), NULL, 0);
933 if (!dir)
934 return NULL;
936 dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
937 if (mkdir(dir, 0750) != 0)
938 return NULL;
939 talloc_set_destructor(dir, destroy_path);
940 }
942 perms = get_perms(get_parent(dir), &num);
943 assert(perms);
944 /* Domains own what they create. */
945 if (conn->id)
946 perms->id = conn->id;
948 permstr = perms_to_strings(dir, perms, num, &len);
949 fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
950 if (!fd || !xs_write_all(*fd, permstr, len))
951 return NULL;
953 if (data) {
954 char *datapath = datafile(dir);
956 fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
957 if (!fd || !xs_write_all(*fd, data, datalen))
958 return NULL;
959 }
960 return dir;
961 }
963 /* path, flags, data... */
964 static void do_write(struct connection *conn, struct buffered_data *in)
965 {
966 unsigned int offset, datalen;
967 char *vec[2];
968 char *node, *tmppath;
969 enum xs_perm_type mode;
970 struct stat st;
972 /* Extra "strings" can be created by binary data. */
973 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
974 send_error(conn, EINVAL);
975 return;
976 }
978 node = canonicalize(conn, vec[0]);
979 if (!within_transaction(conn->transaction, node)) {
980 send_error(conn, EROFS);
981 return;
982 }
984 if (transaction_block(conn, node))
985 return;
987 offset = strlen(vec[0]) + strlen(vec[1]) + 2;
988 datalen = in->used - offset;
990 if (streq(vec[1], XS_WRITE_NONE))
991 mode = XS_PERM_WRITE;
992 else if (streq(vec[1], XS_WRITE_CREATE))
993 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
994 else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
995 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
996 else {
997 send_error(conn, EINVAL);
998 return;
999 }
1001 if (!check_node_perms(conn, node, mode)) {
1002 send_error(conn, errno);
1003 return;
1006 if (lstat(node_dir(conn->transaction, node), &st) != 0) {
1007 char *dir;
1009 /* Does not exist... */
1010 if (errno != ENOENT) {
1011 send_error(conn, errno);
1012 return;
1015 /* Not going to create it? */
1016 if (streq(vec[1], XS_WRITE_NONE)) {
1017 send_error(conn, ENOENT);
1018 return;
1021 dir = tempdir(conn, node, in->buffer + offset, datalen);
1022 if (!dir || !commit_dir(dir)) {
1023 send_error(conn, errno);
1024 return;
1027 } else {
1028 /* Exists... */
1029 if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
1030 send_error(conn, EEXIST);
1031 return;
1034 tmppath = tempfile(node_datafile(conn->transaction, node),
1035 in->buffer + offset, datalen);
1036 if (!tmppath) {
1037 send_error(conn, errno);
1038 return;
1041 commit_tempfile(tmppath);
1044 add_change_node(conn->transaction, node, false);
1045 fire_watches(conn, node, false);
1046 send_ack(conn, XS_WRITE);
1049 static void do_mkdir(struct connection *conn, const char *node)
1051 char *dir;
1052 struct stat st;
1054 node = canonicalize(conn, node);
1055 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
1056 send_error(conn, errno);
1057 return;
1060 if (!within_transaction(conn->transaction, node)) {
1061 send_error(conn, EROFS);
1062 return;
1065 if (transaction_block(conn, node))
1066 return;
1068 /* Must not already exist. */
1069 if (lstat(node_dir(conn->transaction, node), &st) == 0) {
1070 send_error(conn, EEXIST);
1071 return;
1074 dir = tempdir(conn, node, NULL, 0);
1075 if (!dir || !commit_dir(dir)) {
1076 send_error(conn, errno);
1077 return;
1080 add_change_node(conn->transaction, node, false);
1081 fire_watches(conn, node, false);
1082 send_ack(conn, XS_MKDIR);
1085 static void do_rm(struct connection *conn, const char *node)
1087 char *tmppath, *path;
1089 node = canonicalize(conn, node);
1090 if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
1091 send_error(conn, errno);
1092 return;
1095 if (!within_transaction(conn->transaction, node)) {
1096 send_error(conn, EROFS);
1097 return;
1100 if (transaction_block(conn, node))
1101 return;
1103 if (streq(node, "/")) {
1104 send_error(conn, EINVAL);
1105 return;
1108 /* We move the directory to temporary name, destructor cleans up. */
1109 path = node_dir(conn->transaction, node);
1110 tmppath = talloc_asprintf(node, "%s.tmp", path);
1111 talloc_set_destructor(tmppath, destroy_path);
1113 if (rename(path, tmppath) != 0) {
1114 send_error(conn, errno);
1115 return;
1118 add_change_node(conn->transaction, node, true);
1119 fire_watches(conn, node, true);
1120 send_ack(conn, XS_RM);
1123 static void do_get_perms(struct connection *conn, const char *node)
1125 struct xs_permissions *perms;
1126 char *strings;
1127 unsigned int len, num;
1129 node = canonicalize(conn, node);
1130 if (!check_node_perms(conn, node, XS_PERM_READ)) {
1131 send_error(conn, errno);
1132 return;
1135 perms = get_perms(node_dir(conn->transaction, node), &num);
1136 if (!perms) {
1137 send_error(conn, errno);
1138 return;
1141 strings = perms_to_strings(node, perms, num, &len);
1142 if (!strings)
1143 send_error(conn, errno);
1144 else
1145 send_reply(conn, XS_GET_PERMS, strings, len);
1148 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1150 unsigned int num;
1151 char *node, *permstr;
1152 struct xs_permissions *perms;
1154 num = xs_count_strings(in->buffer, in->used);
1155 if (num < 2) {
1156 send_error(conn, EINVAL);
1157 return;
1160 /* First arg is node name. */
1161 node = canonicalize(conn, in->buffer);
1162 permstr = in->buffer + strlen(in->buffer) + 1;
1163 num--;
1165 if (!within_transaction(conn->transaction, node)) {
1166 send_error(conn, EROFS);
1167 return;
1170 if (transaction_block(conn, node))
1171 return;
1173 /* We must own node to do this (tools can do this too). */
1174 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
1175 send_error(conn, errno);
1176 return;
1179 perms = talloc_array(node, struct xs_permissions, num);
1180 if (!xs_strings_to_perms(perms, num, permstr)) {
1181 send_error(conn, errno);
1182 return;
1185 if (!set_perms(conn->transaction, node, perms, num)) {
1186 send_error(conn, errno);
1187 return;
1190 add_change_node(conn->transaction, node, false);
1191 fire_watches(conn, node, false);
1192 send_ack(conn, XS_SET_PERMS);
1195 /* Process "in" for conn: "in" will vanish after this conversation, so
1196 * we can talloc off it for temporary variables. May free "conn".
1197 */
1198 static void process_message(struct connection *conn, struct buffered_data *in)
1200 switch (in->hdr.msg.type) {
1201 case XS_DIRECTORY:
1202 send_directory(conn, onearg(in));
1203 break;
1205 case XS_READ:
1206 do_read(conn, onearg(in));
1207 break;
1209 case XS_WRITE:
1210 do_write(conn, in);
1211 break;
1213 case XS_MKDIR:
1214 do_mkdir(conn, onearg(in));
1215 break;
1217 case XS_RM:
1218 do_rm(conn, onearg(in));
1219 break;
1221 case XS_GET_PERMS:
1222 do_get_perms(conn, onearg(in));
1223 break;
1225 case XS_SET_PERMS:
1226 do_set_perms(conn, in);
1227 break;
1229 case XS_SHUTDOWN:
1230 /* FIXME: Implement gentle shutdown too. */
1231 /* Only tools can do this. */
1232 if (conn->id != 0) {
1233 send_error(conn, EACCES);
1234 break;
1236 if (!conn->can_write) {
1237 send_error(conn, EROFS);
1238 break;
1240 send_ack(conn, XS_SHUTDOWN);
1241 /* Everything hangs off auto-free context, freed at exit. */
1242 exit(0);
1244 case XS_DEBUG:
1245 if (streq(in->buffer, "print"))
1246 xprintf("debug: %s", in->buffer + get_string(in, 0));
1247 #ifdef TESTING
1248 /* For testing, we allow them to set id. */
1249 if (streq(in->buffer, "setid")) {
1250 conn->id = atoi(in->buffer + get_string(in, 0));
1251 send_ack(conn, XS_DEBUG);
1252 } else if (streq(in->buffer, "failtest")) {
1253 if (get_string(in, 0) < in->used)
1254 srandom(atoi(in->buffer + get_string(in, 0)));
1255 send_ack(conn, XS_DEBUG);
1256 failtest = true;
1258 #endif /* TESTING */
1259 break;
1261 case XS_WATCH:
1262 do_watch(conn, in);
1263 break;
1265 case XS_WATCH_ACK:
1266 do_watch_ack(conn, onearg(in));
1267 break;
1269 case XS_UNWATCH:
1270 do_unwatch(conn, in);
1271 break;
1273 case XS_TRANSACTION_START:
1274 do_transaction_start(conn, onearg(in));
1275 break;
1277 case XS_TRANSACTION_END:
1278 do_transaction_end(conn, onearg(in));
1279 break;
1281 case XS_INTRODUCE:
1282 do_introduce(conn, in);
1283 break;
1285 case XS_RELEASE:
1286 do_release(conn, onearg(in));
1287 break;
1289 case XS_GET_DOMAIN_PATH:
1290 do_get_domain_path(conn, onearg(in));
1291 break;
1293 case XS_WATCH_EVENT:
1294 default:
1295 eprintf("Client unknown operation %i", in->hdr.msg.type);
1296 send_error(conn, ENOSYS);
1300 static int out_of_mem(void *data)
1302 longjmp(*(jmp_buf *)data, 1);
1305 static void consider_message(struct connection *conn)
1307 /*
1308 * 'volatile' qualifier prevents register allocation which fixes:
1309 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1310 */
1311 struct buffered_data *volatile in = NULL;
1312 enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
1313 jmp_buf talloc_fail;
1315 assert(conn->state == OK);
1317 /* For simplicity, we kill the connection on OOM. */
1318 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1319 if (setjmp(talloc_fail)) {
1320 talloc_free(conn);
1321 goto end;
1324 if (verbose)
1325 xprintf("Got message %s len %i from %p\n",
1326 sockmsg_string(type), conn->in->hdr.msg.len, conn);
1328 /* We might get a command while waiting for an ack: this means
1329 * the other end discarded it: we will re-transmit. */
1330 if (type != XS_WATCH_ACK)
1331 conn->waiting_for_ack = NULL;
1333 /* Careful: process_message may free connection. We detach
1334 * "in" beforehand and allocate the new buffer to avoid
1335 * touching conn after process_message.
1336 */
1337 in = talloc_steal(talloc_autofree_context(), conn->in);
1338 conn->in = new_buffer(conn);
1339 process_message(conn, in);
1341 if (conn->state == BLOCKED) {
1342 /* Blocked by transaction: queue for re-xmit. */
1343 talloc_free(conn->in);
1344 conn->in = in;
1345 in = NULL;
1346 trace_blocked(conn, conn->in);
1349 end:
1350 talloc_free(in);
1351 talloc_set_fail_handler(NULL, NULL);
1352 if (talloc_total_blocks(NULL)
1353 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1354 talloc_report_full(NULL, stderr);
1355 abort();
1359 /* Errors in reading or allocating here mean we get out of sync, so we
1360 * drop the whole client connection. */
1361 void handle_input(struct connection *conn)
1363 int bytes;
1364 struct buffered_data *in;
1366 assert(conn->state == OK);
1367 in = conn->in;
1369 /* Not finished header yet? */
1370 if (in->inhdr) {
1371 bytes = conn->read(conn, in->hdr.raw + in->used,
1372 sizeof(in->hdr) - in->used);
1373 if (bytes <= 0)
1374 goto bad_client;
1375 in->used += bytes;
1376 if (in->used != sizeof(in->hdr))
1377 return;
1379 if (in->hdr.msg.len > PATH_MAX) {
1380 #ifndef TESTING
1381 syslog(LOG_DAEMON, "Client tried to feed us %i",
1382 in->hdr.msg.len);
1383 #endif
1384 goto bad_client;
1387 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1388 if (!in->buffer)
1389 goto bad_client;
1390 in->used = 0;
1391 in->inhdr = false;
1392 return;
1395 bytes = conn->read(conn, in->buffer + in->used,
1396 in->hdr.msg.len - in->used);
1397 if (bytes < 0)
1398 goto bad_client;
1400 in->used += bytes;
1401 if (in->used != in->hdr.msg.len)
1402 return;
1404 trace_io(conn, "IN ", in);
1405 consider_message(conn);
1406 return;
1408 bad_client:
1409 /* Kill it. */
1410 talloc_free(conn);
1413 void handle_output(struct connection *conn)
1415 if (!write_message(conn))
1416 talloc_free(conn);
1419 /* If a transaction has ended, see if we can unblock any connections. */
1420 static void unblock_connections(void)
1422 struct connection *i, *tmp;
1424 list_for_each_entry_safe(i, tmp, &connections, list) {
1425 switch (i->state) {
1426 case BLOCKED:
1427 if (!transaction_covering_node(i->blocked_by)) {
1428 talloc_free(i->blocked_by);
1429 i->blocked_by = NULL;
1430 i->state = OK;
1431 consider_message(i);
1433 break;
1434 case BUSY:
1435 case OK:
1436 break;
1440 /* To balance bias, move first entry to end. */
1441 if (!list_empty(&connections)) {
1442 i = list_top(&connections, struct connection, list);
1443 list_del(&i->list);
1444 list_add_tail(&i->list, &connections);
1448 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1450 /*
1451 * 'volatile' qualifier prevents register allocation which fixes:
1452 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1453 */
1454 struct connection *volatile new;
1455 jmp_buf talloc_fail;
1457 new = talloc(talloc_autofree_context(), struct connection);
1458 if (!new)
1459 return NULL;
1461 new->state = OK;
1462 new->blocked_by = NULL;
1463 new->out = new->waiting_reply = NULL;
1464 new->waiting_for_ack = NULL;
1465 new->fd = -1;
1466 new->id = 0;
1467 new->domain = NULL;
1468 new->transaction = NULL;
1469 new->write = write;
1470 new->read = read;
1471 new->can_write = true;
1472 INIT_LIST_HEAD(&new->watches);
1474 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1475 if (setjmp(talloc_fail)) {
1476 talloc_free(new);
1477 return NULL;
1479 new->in = new_buffer(new);
1480 talloc_set_fail_handler(NULL, NULL);
1482 list_add_tail(&new->list, &connections);
1483 talloc_set_destructor(new, destroy_conn);
1484 trace_create(new, "connection");
1485 return new;
1488 static int writefd(struct connection *conn, const void *data, unsigned int len)
1490 return write(conn->fd, data, len);
1493 static int readfd(struct connection *conn, void *data, unsigned int len)
1495 return read(conn->fd, data, len);
1498 static void accept_connection(int sock, bool canwrite)
1500 int fd;
1501 struct connection *conn;
1503 fd = accept(sock, NULL, NULL);
1504 if (fd < 0)
1505 return;
1507 conn = new_connection(writefd, readfd);
1508 if (conn) {
1509 conn->fd = fd;
1510 conn->can_write = canwrite;
1511 } else
1512 close(fd);
1515 /* Calc timespan from now to absolute time. */
1516 static void time_relative_to_now(struct timeval *tv)
1518 struct timeval now;
1520 gettimeofday(&now, NULL);
1521 if (timercmp(&now, tv, >))
1522 timerclear(tv);
1523 else {
1524 tv->tv_sec -= now.tv_sec;
1525 if (now.tv_usec > tv->tv_usec) {
1526 tv->tv_sec--;
1527 tv->tv_usec += 1000000;
1529 tv->tv_usec -= now.tv_usec;
1533 #ifdef TESTING
1534 /* Useful for running under debugger. */
1535 void dump_connection(void)
1537 struct connection *i;
1539 list_for_each_entry(i, &connections, list) {
1540 printf("Connection %p:\n", i);
1541 printf(" state = %s\n",
1542 i->state == OK ? "OK"
1543 : i->state == BLOCKED ? "BLOCKED"
1544 : i->state == BUSY ? "BUSY"
1545 : "INVALID");
1546 if (i->id)
1547 printf(" id = %i\n", i->id);
1548 if (i->blocked_by)
1549 printf(" blocked on = %s\n", i->blocked_by);
1550 if (!i->in->inhdr || i->in->used)
1551 printf(" got %i bytes of %s\n",
1552 i->in->used, i->in->inhdr ? "header" : "data");
1553 if (i->out)
1554 printf(" sending message %s (%s) out\n",
1555 sockmsg_string(i->out->hdr.msg.type),
1556 i->out->buffer);
1557 if (i->waiting_reply)
1558 printf(" ... and behind is queued %s (%s)\n",
1559 sockmsg_string(i->waiting_reply->hdr.msg.type),
1560 i->waiting_reply->buffer);
1561 #if 0
1562 if (i->transaction)
1563 dump_transaction(i);
1564 if (i->domain)
1565 dump_domain(i);
1566 #endif
1567 dump_watches(i);
1570 #endif
1572 static void setup_structure(void)
1574 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1575 char *root, *dir, *permfile;
1577 /* Create root directory, with permissions. */
1578 if (mkdir(xs_daemon_store(), 0750) != 0) {
1579 if (errno != EEXIST)
1580 barf_perror("Could not create root %s",
1581 xs_daemon_store());
1582 return;
1584 root = talloc_strdup(talloc_autofree_context(), "/");
1585 if (!set_perms(NULL, root, &perms, 1))
1586 barf_perror("Could not create permissions in root");
1588 /* Create tool directory, with xenstored subdir. */
1589 dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
1590 if (mkdir(dir, 0750) != 0)
1591 barf_perror("Making dir %s", dir);
1593 permfile = talloc_strdup(root, "/tool");
1594 if (!set_perms(NULL, permfile, &perms, 1))
1595 barf_perror("Could not create permissions on %s", permfile);
1597 dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
1598 if (mkdir(dir, 0750) != 0)
1599 barf_perror("Making dir %s", dir);
1601 permfile = talloc_strdup(root, "/tool/xenstored");
1602 if (!set_perms(NULL, permfile, &perms, 1))
1603 barf_perror("Could not create permissions on %s", permfile);
1604 talloc_free(root);
1605 if (mkdir(xs_daemon_transactions(), 0750) != 0)
1606 barf_perror("Could not create transaction dir %s",
1607 xs_daemon_transactions());
1610 static void write_pidfile(const char *pidfile)
1612 char buf[100];
1613 int len;
1614 int fd;
1616 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1617 if (fd == -1)
1618 barf_perror("Opening pid file %s", pidfile);
1620 /* We exit silently if daemon already running. */
1621 if (lockf(fd, F_TLOCK, 0) == -1)
1622 exit(0);
1624 len = sprintf(buf, "%d\n", getpid());
1625 write(fd, buf, len);
1628 /* Stevens. */
1629 static void daemonize(void)
1631 pid_t pid;
1633 /* Separate from our parent via fork, so init inherits us. */
1634 if ((pid = fork()) < 0)
1635 barf_perror("Failed to fork daemon");
1636 if (pid != 0)
1637 exit(0);
1639 /* Session leader so ^C doesn't whack us. */
1640 setsid();
1641 /* Move off any mount points we might be in. */
1642 chdir("/");
1643 /* Discard our parent's old-fashioned umask prejudices. */
1644 umask(0);
1648 static struct option options[] = {
1649 { "pid-file", 1, NULL, 'F' },
1650 { "no-fork", 0, NULL, 'N' },
1651 { "output-pid", 0, NULL, 'P' },
1652 { "trace-file", 1, NULL, 'T' },
1653 { "verbose", 0, NULL, 'V' },
1654 { NULL, 0, NULL, 0 } };
1656 int main(int argc, char *argv[])
1658 int opt, *sock, *ro_sock, event_fd, max;
1659 struct sockaddr_un addr;
1660 fd_set inset, outset;
1661 bool dofork = true;
1662 bool outputpid = false;
1663 const char *pidfile = NULL;
1665 while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
1666 NULL)) != -1) {
1667 switch (opt) {
1668 case 'F':
1669 pidfile = optarg;
1670 break;
1671 case 'N':
1672 dofork = false;
1673 break;
1674 case 'P':
1675 outputpid = true;
1676 break;
1677 case 'T':
1678 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1679 if (tracefd < 0)
1680 barf_perror("Could not open tracefile %s",
1681 optarg);
1682 write(tracefd, "\n***\n", strlen("\n***\n"));
1683 break;
1684 case 'V':
1685 verbose = true;
1686 break;
1689 if (optind != argc)
1690 barf("%s: No arguments desired", argv[0]);
1692 if (dofork) {
1693 openlog("xenstored", 0, LOG_DAEMON);
1694 daemonize();
1696 if (pidfile)
1697 write_pidfile(pidfile);
1699 talloc_enable_leak_report_full();
1701 /* Create sockets for them to listen to. */
1702 sock = talloc(talloc_autofree_context(), int);
1703 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1704 if (*sock < 0)
1705 barf_perror("Could not create socket");
1706 ro_sock = talloc(talloc_autofree_context(), int);
1707 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1708 if (*ro_sock < 0)
1709 barf_perror("Could not create socket");
1710 talloc_set_destructor(sock, destroy_fd);
1711 talloc_set_destructor(ro_sock, destroy_fd);
1713 /* Don't kill us with SIGPIPE. */
1714 signal(SIGPIPE, SIG_IGN);
1716 /* FIXME: Be more sophisticated, don't mug running daemon. */
1717 unlink(xs_daemon_socket());
1718 unlink(xs_daemon_socket_ro());
1720 addr.sun_family = AF_UNIX;
1721 strcpy(addr.sun_path, xs_daemon_socket());
1722 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1723 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1724 strcpy(addr.sun_path, xs_daemon_socket_ro());
1725 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1726 barf_perror("Could not bind socket to %s",
1727 xs_daemon_socket_ro());
1728 if (chmod(xs_daemon_socket(), 0600) != 0
1729 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1730 barf_perror("Could not chmod sockets");
1732 if (listen(*sock, 1) != 0
1733 || listen(*ro_sock, 1) != 0)
1734 barf_perror("Could not listen on sockets");
1736 /* If we're the first, create .perms file for root. */
1737 setup_structure();
1739 /* Listen to hypervisor. */
1740 event_fd = domain_init();
1742 /* Restore existing connections. */
1743 restore_existing_connections();
1745 if (outputpid) {
1746 printf("%i\n", getpid());
1747 fflush(stdout);
1750 /* close stdin/stdout now we're ready to accept connections */
1751 if (dofork) {
1752 close(STDIN_FILENO);
1753 close(STDOUT_FILENO);
1754 close(STDERR_FILENO);
1757 #ifdef TESTING
1758 signal(SIGUSR1, stop_failtest);
1759 #endif
1761 /* Get ready to listen to the tools. */
1762 max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
1764 /* Main loop. */
1765 /* FIXME: Rewrite so noone can starve. */
1766 for (;;) {
1767 struct connection *i;
1768 struct timeval *tvp = NULL, tv;
1770 timerclear(&tv);
1771 shortest_transaction_timeout(&tv);
1772 shortest_watch_ack_timeout(&tv);
1773 if (timerisset(&tv)) {
1774 time_relative_to_now(&tv);
1775 tvp = &tv;
1778 if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
1779 if (errno == EINTR)
1780 continue;
1781 barf_perror("Select failed");
1784 if (FD_ISSET(*sock, &inset))
1785 accept_connection(*sock, true);
1787 if (FD_ISSET(*ro_sock, &inset))
1788 accept_connection(*ro_sock, false);
1790 if (FD_ISSET(event_fd, &inset))
1791 handle_event(event_fd);
1793 list_for_each_entry(i, &connections, list) {
1794 if (i->domain)
1795 continue;
1797 /* Operations can delete themselves or others
1798 * (xs_release): list is not safe after input,
1799 * so break. */
1800 if (FD_ISSET(i->fd, &inset)) {
1801 handle_input(i);
1802 break;
1804 if (FD_ISSET(i->fd, &outset)) {
1805 handle_output(i);
1806 break;
1810 /* Handle all possible I/O for domain connections. */
1811 more:
1812 list_for_each_entry(i, &connections, list) {
1813 if (!i->domain)
1814 continue;
1816 if (domain_can_read(i)) {
1817 handle_input(i);
1818 goto more;
1821 if (domain_can_write(i)) {
1822 handle_output(i);
1823 goto more;
1827 if (tvp) {
1828 check_transaction_timeout();
1829 check_watch_ack_timeout();
1832 /* If transactions ended, we might be able to do more work. */
1833 unblock_connections();
1835 max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd);