ia64/xen-unstable
changeset 9093:689da5e0a970
Added a basic integrity checker, and some basic ability to recover from store
corruption, rather than just spewing error messages and exiting.
Added a xenstore-control executable, which sends commands to xenstored.
Currently, the only command is 'check', which triggers an integrity check.
(The integrity check is also triggered whenever a corrupted store is detected).
Signed-off-by: Ewan Mellor <ewan@xensource.com>
corruption, rather than just spewing error messages and exiting.
Added a xenstore-control executable, which sends commands to xenstored.
Currently, the only command is 'check', which triggers an integrity check.
(The integrity check is also triggered whenever a corrupted store is detected).
Signed-off-by: Ewan Mellor <ewan@xensource.com>
author | emellor@leeni.uk.xensource.com |
---|---|
date | Thu Mar 02 02:09:23 2006 +0100 (2006-03-02) |
parents | 849723752858 |
children | cff782f65c4d |
files | .hgignore tools/xenstore/Makefile tools/xenstore/xenstore_control.c tools/xenstore/xenstored_core.c tools/xenstore/xenstored_core.h |
line diff
1.1 --- a/.hgignore Thu Mar 02 02:01:17 2006 +0100 1.2 +++ b/.hgignore Thu Mar 02 02:09:23 2006 +0100 1.3 @@ -166,6 +166,7 @@ 1.4 ^tools/xenstore/xenstore-read$ 1.5 ^tools/xenstore/xenstore-rm$ 1.6 ^tools/xenstore/xenstore-write$ 1.7 +^tools/xenstore/xenstore-control$ 1.8 ^tools/xenstore/xenstore-ls$ 1.9 ^tools/xenstore/xenstored$ 1.10 ^tools/xenstore/xenstored_test$
2.1 --- a/tools/xenstore/Makefile Thu Mar 02 02:01:17 2006 +0100 2.2 +++ b/tools/xenstore/Makefile Thu Mar 02 02:09:23 2006 +0100 2.3 @@ -27,7 +27,10 @@ CLIENTS := xenstore-exists xenstore-list 2.4 CLIENTS += xenstore-write 2.5 CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS)) 2.6 2.7 -all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-ls 2.8 +all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls 2.9 + 2.10 +test_interleaved_transactions: test_interleaved_transactions.o 2.11 + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ 2.12 2.13 testcode: xs_test xenstored_test xs_random 2.14 2.15 @@ -35,13 +38,16 @@ xenstored: xenstored_core.o xenstored_wa 2.16 $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ 2.17 2.18 $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so 2.19 - $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -lxenctrl -L. -lxenstore -o $@ 2.20 + $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ 2.21 2.22 $(CLIENTS_OBJS): xenstore_%.o: xenstore_client.c 2.23 $(COMPILE.c) -DCLIENT_$(*F) -o $@ $< 2.24 2.25 +xenstore-control: xenstore_control.o libxenstore.so 2.26 + $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ 2.27 + 2.28 xenstore-ls: xsls.o libxenstore.so 2.29 - $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -lxenctrl -L. -lxenstore -o $@ 2.30 + $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ 2.31 2.32 xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o tdb.o 2.33 $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ 2.34 @@ -77,7 +83,8 @@ libxenstore.so: xs.opic xs_lib.opic 2.35 clean: testsuite-clean 2.36 rm -f *.o *.opic *.so 2.37 rm -f xenstored xs_random xs_stress xs_crashme 2.38 - rm -f xs_test xenstored_test xs_tdb_dump xenstore-ls $(CLIENTS) 2.39 + rm -f xs_test xenstored_test xs_tdb_dump xenstore-control xenstore-ls 2.40 + rm -f $(CLIENTS) 2.41 $(RM) $(PROG_DEP) 2.42 2.43 print-dir: 2.44 @@ -129,7 +136,7 @@ TAGS: 2.45 tarball: clean 2.46 cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/ 2.47 2.48 -install: libxenstore.so xenstored xenstore-ls $(CLIENTS) 2.49 +install: all 2.50 $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored 2.51 $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored 2.52 $(INSTALL_DIR) -p $(DESTDIR)/usr/bin 2.53 @@ -137,6 +144,7 @@ install: libxenstore.so xenstored xensto 2.54 $(INSTALL_DIR) -p $(DESTDIR)/usr/include 2.55 $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin 2.56 $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin 2.57 + $(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin 2.58 $(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin 2.59 $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 2.60 $(INSTALL_DATA) libxenstore.so $(DESTDIR)/usr/$(LIBDIR)
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/tools/xenstore/xenstore_control.c Thu Mar 02 02:09:23 2006 +0100 3.3 @@ -0,0 +1,28 @@ 3.4 +#include <stdio.h> 3.5 +#include <stdlib.h> 3.6 +#include <string.h> 3.7 + 3.8 +#include "xs.h" 3.9 + 3.10 + 3.11 +int main(int argc, char **argv) 3.12 +{ 3.13 + if (argc < 2 || 3.14 + strcmp(argv[1], "check")) 3.15 + { 3.16 + fprintf(stderr, 3.17 + "Usage:\n" 3.18 + "\n" 3.19 + " %s check\n" 3.20 + "\n", argv[0]); 3.21 + return 2; 3.22 + } 3.23 + 3.24 + struct xs_handle * xsh = xs_daemon_open(); 3.25 + 3.26 + xs_debug_command(xsh, argv[1], NULL, 0); 3.27 + 3.28 + xs_daemon_close(xsh); 3.29 + 3.30 + return 0; 3.31 +}
4.1 --- a/tools/xenstore/xenstored_core.c Thu Mar 02 02:01:17 2006 +0100 4.2 +++ b/tools/xenstore/xenstored_core.c Thu Mar 02 02:09:23 2006 +0100 4.3 @@ -60,6 +60,18 @@ static int reopen_log_pipe[2]; 4.4 static char *tracefile = NULL; 4.5 static TDB_CONTEXT *tdb_ctx; 4.6 4.7 +static void corrupt(struct connection *conn, const char *fmt, ...); 4.8 +static void check_store(); 4.9 + 4.10 +#define log(...) \ 4.11 + do { \ 4.12 + char *s = talloc_asprintf(NULL, __VA_ARGS__); \ 4.13 + trace("%s\n", s); \ 4.14 + syslog(LOG_ERR, "%s", s); \ 4.15 + talloc_free(s); \ 4.16 + } while (0) 4.17 + 4.18 + 4.19 #ifdef TESTING 4.20 static bool failtest = false; 4.21 4.22 @@ -104,33 +116,6 @@ int test_mkdir(const char *dir, int perm 4.23 4.24 #include "xenstored_test.h" 4.25 4.26 -/* FIXME: Ideally, this should never be called. Some can be eliminated. */ 4.27 -/* Something is horribly wrong: shutdown immediately. */ 4.28 -void __attribute__((noreturn)) corrupt(struct connection *conn, 4.29 - const char *fmt, ...) 4.30 -{ 4.31 - va_list arglist; 4.32 - char *str; 4.33 - int saved_errno = errno; 4.34 - 4.35 - va_start(arglist, fmt); 4.36 - str = talloc_vasprintf(NULL, fmt, arglist); 4.37 - va_end(arglist); 4.38 - 4.39 - trace("xenstored corruption: connection id %i: err %s: %s", 4.40 - conn ? (int)conn->id : -1, strerror(saved_errno), str); 4.41 - eprintf("xenstored corruption: connection id %i: err %s: %s", 4.42 - conn ? (int)conn->id : -1, strerror(saved_errno), str); 4.43 -#ifdef TESTING 4.44 - /* Allow them to attach debugger. */ 4.45 - sleep(30); 4.46 -#endif 4.47 - syslog(LOG_DAEMON, 4.48 - "xenstored corruption: connection id %i: err %s: %s", 4.49 - conn ? (int)conn->id : -1, strerror(saved_errno), str); 4.50 - _exit(2); 4.51 -} 4.52 - 4.53 TDB_CONTEXT *tdb_context(struct connection *conn) 4.54 { 4.55 /* conn = NULL used in manual_node at setup. */ 4.56 @@ -216,7 +201,8 @@ static void trace_io(const struct connec 4.57 now = time(NULL); 4.58 tm = localtime(&now); 4.59 4.60 - trace("%s %p %02d:%02d:%02d %s (", prefix, conn, 4.61 + trace("%s %p %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn, 4.62 + conn->transaction, tm->year + 1900, tm->mon + 1, tm->mday, 4.63 tm->tm_hour, tm->tm_min, tm->tm_sec, 4.64 sockmsg_string(data->hdr.msg.type)); 4.65 4.66 @@ -837,8 +823,6 @@ static int destroy_node(void *_node) 4.67 return 0; 4.68 } 4.69 4.70 -/* Be careful: create heirarchy, put entry in existing parent *last*. 4.71 - * This helps fsck if we die during this. */ 4.72 static struct node *create_node(struct connection *conn, 4.73 const char *name, 4.74 void *data, unsigned int datalen) 4.75 @@ -939,8 +923,9 @@ static void delete_node(struct connectio 4.76 { 4.77 unsigned int i; 4.78 4.79 - /* Delete self, then delete children. If something goes wrong, 4.80 - * consistency check will clean up this way. */ 4.81 + /* Delete self, then delete children. If we crash, then the worst 4.82 + that can happen is the children will continue to take up space, but 4.83 + will otherwise be unreachable. */ 4.84 delete_node_single(conn, node); 4.85 4.86 /* Delete children, too. */ 4.87 @@ -950,9 +935,14 @@ static void delete_node(struct connectio 4.88 child = read_node(conn, 4.89 talloc_asprintf(node, "%s/%s", node->name, 4.90 node->children + i)); 4.91 - if (!child) 4.92 - corrupt(conn, "No child '%s' found", child); 4.93 - delete_node(conn, child); 4.94 + if (child) { 4.95 + delete_node(conn, child); 4.96 + } 4.97 + else { 4.98 + trace("delete_node: No child '%s/%s' found!\n", 4.99 + node->name, node->children + i); 4.100 + /* Skip it, we've already deleted the parent. */ 4.101 + } 4.102 } 4.103 } 4.104 4.105 @@ -976,12 +966,15 @@ static bool delete_child(struct connecti 4.106 } 4.107 } 4.108 corrupt(conn, "Can't find child '%s' in %s", childname, node->name); 4.109 + return false; 4.110 } 4.111 4.112 4.113 static int _rm(struct connection *conn, struct node *node, const char *name) 4.114 { 4.115 - /* Delete from parent first, then if something explodes fsck cleans. */ 4.116 + /* Delete from parent first, then if we crash, the worst that can 4.117 + happen is the child will continue to take up space, but will 4.118 + otherwise be unreachable. */ 4.119 struct node *parent = read_node(conn, get_parent(name)); 4.120 if (!parent) { 4.121 send_error(conn, EINVAL); 4.122 @@ -1000,10 +993,11 @@ static int _rm(struct connection *conn, 4.123 4.124 static void internal_rm(const char *name) 4.125 { 4.126 - char *tname = talloc_strdup(talloc_autofree_context(), name); 4.127 + char *tname = talloc_strdup(NULL, name); 4.128 struct node *node = read_node(NULL, tname); 4.129 if (node) 4.130 _rm(NULL, node, tname); 4.131 + talloc_free(tname); 4.132 } 4.133 4.134 4.135 @@ -1149,18 +1143,19 @@ static void process_message(struct conne 4.136 case XS_DEBUG: 4.137 if (streq(in->buffer, "print")) 4.138 xprintf("debug: %s", in->buffer + get_string(in, 0)); 4.139 + if (streq(in->buffer, "check")) 4.140 + check_store(); 4.141 #ifdef TESTING 4.142 /* For testing, we allow them to set id. */ 4.143 if (streq(in->buffer, "setid")) { 4.144 conn->id = atoi(in->buffer + get_string(in, 0)); 4.145 - send_ack(conn, XS_DEBUG); 4.146 } else if (streq(in->buffer, "failtest")) { 4.147 if (get_string(in, 0) < in->used) 4.148 srandom(atoi(in->buffer + get_string(in, 0))); 4.149 - send_ack(conn, XS_DEBUG); 4.150 failtest = true; 4.151 } 4.152 #endif /* TESTING */ 4.153 + send_ack(conn, XS_DEBUG); 4.154 break; 4.155 4.156 case XS_WATCH: 4.157 @@ -1258,7 +1253,7 @@ static void handle_input(struct connecti 4.158 4.159 if (in->hdr.msg.len > PATH_MAX) { 4.160 #ifndef TESTING 4.161 - syslog(LOG_DAEMON, "Client tried to feed us %i", 4.162 + syslog(LOG_ERR, "Client tried to feed us %i", 4.163 in->hdr.msg.len); 4.164 #endif 4.165 goto bad_client; 4.166 @@ -1425,10 +1420,16 @@ static void setup_structure(void) 4.167 balloon driver will pick up stale entries. In the case of 4.168 the balloon driver, this can be fatal. 4.169 */ 4.170 - char *tlocal = talloc_strdup(talloc_autofree_context(), 4.171 - "/local"); 4.172 + char *tlocal = talloc_strdup(NULL, "/local"); 4.173 + 4.174 + check_store(); 4.175 + 4.176 internal_rm("/local"); 4.177 create_node(NULL, tlocal, NULL, 0); 4.178 + 4.179 + talloc_free(tlocal); 4.180 + 4.181 + check_store(); 4.182 } 4.183 else { 4.184 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT, 4.185 @@ -1439,11 +1440,93 @@ static void setup_structure(void) 4.186 manual_node("/", "tool"); 4.187 manual_node("/tool", "xenstored"); 4.188 manual_node("/tool/xenstored", NULL); 4.189 + 4.190 + check_store(); 4.191 } 4.192 +} 4.193 + 4.194 +static char *child_name(const char *s1, const char *s2) 4.195 +{ 4.196 + if (strcmp(s1, "/")) { 4.197 + return talloc_asprintf(NULL, "%s/%s", s1, s2); 4.198 + } 4.199 + else { 4.200 + return talloc_asprintf(NULL, "/%s", s2); 4.201 + } 4.202 +} 4.203 + 4.204 +static void check_store_(const char *name) 4.205 +{ 4.206 + struct node *node = read_node(NULL, name); 4.207 + 4.208 + if (node) { 4.209 + size_t i = 0; 4.210 + 4.211 + while (i < node->childlen) { 4.212 + size_t childlen = strlen(node->children + i); 4.213 + char * childname = child_name(node->name, 4.214 + node->children + i); 4.215 + struct node *childnode = read_node(NULL, childname); 4.216 + 4.217 + if (childnode) { 4.218 + check_store_(childname); 4.219 + i += childlen + 1; 4.220 + } 4.221 + else { 4.222 + log("check_store: No child '%s' found!\n", 4.223 + childname); 4.224 + 4.225 + memdel(node->children, i, childlen + 1, 4.226 + node->childlen); 4.227 + node->childlen -= childlen + 1; 4.228 + write_node(NULL, node); 4.229 + } 4.230 4.231 - /* FIXME: Fsck */ 4.232 + talloc_free(childname); 4.233 + } 4.234 + } 4.235 + else { 4.236 + /* Impossible, because no database should ever be without the 4.237 + root, and otherwise, we've just checked in our caller 4.238 + (which made a recursive call to get here). */ 4.239 + 4.240 + log("check_store: No child '%s' found: impossible!", name); 4.241 + } 4.242 +} 4.243 + 4.244 + 4.245 +static void check_store() 4.246 +{ 4.247 + char * root = talloc_strdup(NULL, "/"); 4.248 + log("Checking store ..."); 4.249 + check_store_(root); 4.250 + log("Checking store complete."); 4.251 + talloc_free(root); 4.252 } 4.253 4.254 + 4.255 +/* Something is horribly wrong: check the store. */ 4.256 +static void corrupt(struct connection *conn, const char *fmt, ...) 4.257 +{ 4.258 + va_list arglist; 4.259 + char *str; 4.260 + int saved_errno = errno; 4.261 + 4.262 + va_start(arglist, fmt); 4.263 + str = talloc_vasprintf(NULL, fmt, arglist); 4.264 + va_end(arglist); 4.265 + 4.266 + log("corruption detected by connection %i: err %s: %s", 4.267 + conn ? (int)conn->id : -1, strerror(saved_errno), str); 4.268 + 4.269 +#ifdef TESTING 4.270 + /* Allow them to attach debugger. */ 4.271 + sleep(30); 4.272 +#endif 4.273 + check_store(); 4.274 +} 4.275 + 4.276 + 4.277 static void write_pidfile(const char *pidfile) 4.278 { 4.279 char buf[100];
5.1 --- a/tools/xenstore/xenstored_core.h Thu Mar 02 02:01:17 2006 +0100 5.2 +++ b/tools/xenstore/xenstored_core.h Thu Mar 02 02:09:23 2006 +0100 5.3 @@ -148,10 +148,6 @@ int destroy_tdb(void *_tdb); 5.4 /* Replace the tdb: required for transaction code */ 5.5 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb); 5.6 5.7 -/* Fail due to excessive corruption, capitalist pigdogs! */ 5.8 -void __attribute__((noreturn)) corrupt(struct connection *conn, 5.9 - const char *fmt, ...); 5.10 - 5.11 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); 5.12 5.13