ia64/xen-unstable

changeset 9129:2274f293af41

Added further integrity checking, this time checking for duplicate directory
entries and for orphaned nodes in the database.

Added two flags, -R and -L, to disable the recovery code and the remove of
/local at start-up. This makes it much easier to analyse corrupted tdb files.

Added some missing talloc_free calls in the previous integrity checking code.

Removed the transaction handle from the trace_io message -- unfortunately,
the transaction is always null at this point, as it's not yet been looked up.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Fri Mar 03 15:37:28 2006 +0100 (2006-03-03)
parents 871f768aadc6
children c8c0f99193d4
files tools/xenstore/Makefile tools/xenstore/xenstored_core.c
line diff
     1.1 --- a/tools/xenstore/Makefile	Fri Mar 03 15:32:42 2006 +0100
     1.2 +++ b/tools/xenstore/Makefile	Fri Mar 03 15:37:28 2006 +0100
     1.3 @@ -34,7 +34,7 @@ test_interleaved_transactions: test_inte
     1.4  
     1.5  testcode: xs_test xenstored_test xs_random
     1.6  
     1.7 -xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o
     1.8 +xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
     1.9  	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
    1.10  
    1.11  $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
     2.1 --- a/tools/xenstore/xenstored_core.c	Fri Mar 03 15:32:42 2006 +0100
     2.2 +++ b/tools/xenstore/xenstored_core.c	Fri Mar 03 15:37:28 2006 +0100
     2.3 @@ -51,11 +51,16 @@
     2.4  #include "xenctrl.h"
     2.5  #include "tdb.h"
     2.6  
     2.7 +#include "hashtable.h"
     2.8 +
     2.9 +
    2.10  extern int eventchn_fd; /* in xenstored_domain.c */
    2.11  
    2.12 -static bool verbose;
    2.13 +static bool verbose = false;
    2.14  LIST_HEAD(connections);
    2.15  static int tracefd = -1;
    2.16 +static bool recovery = true;
    2.17 +static bool remove_local = true;
    2.18  static int reopen_log_pipe[2];
    2.19  static char *tracefile = NULL;
    2.20  static TDB_CONTEXT *tdb_ctx;
    2.21 @@ -201,8 +206,8 @@ static void trace_io(const struct connec
    2.22  	now = time(NULL);
    2.23  	tm = localtime(&now);
    2.24  
    2.25 -	trace("%s %p %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
    2.26 -	      conn->transaction, tm->tm_year + 1900, tm->tm_mon + 1,
    2.27 +	trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (", prefix, conn,
    2.28 +	      tm->tm_year + 1900, tm->tm_mon + 1,
    2.29  	      tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
    2.30  	      sockmsg_string(data->hdr.msg.type));
    2.31  	
    2.32 @@ -946,12 +951,24 @@ static void delete_node(struct connectio
    2.33  	}
    2.34  }
    2.35  
    2.36 +
    2.37  /* Delete memory using memmove. */
    2.38  static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
    2.39  {
    2.40  	memmove(mem + off, mem + off + len, total - off - len);
    2.41  }
    2.42  
    2.43 +
    2.44 +static bool remove_child_entry(struct connection *conn, struct node *node,
    2.45 +			       size_t offset)
    2.46 +{
    2.47 +	size_t childlen = strlen(node->children + offset);
    2.48 +	memdel(node->children, offset, childlen + 1, node->childlen);
    2.49 +	node->childlen -= childlen + 1;
    2.50 +	return write_node(conn, node);
    2.51 +}
    2.52 +
    2.53 +
    2.54  static bool delete_child(struct connection *conn,
    2.55  			 struct node *node, const char *childname)
    2.56  {
    2.57 @@ -959,10 +976,7 @@ static bool delete_child(struct connecti
    2.58  
    2.59  	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
    2.60  		if (streq(node->children+i, childname)) {
    2.61 -			memdel(node->children, i, strlen(childname) + 1,
    2.62 -			       node->childlen);
    2.63 -			node->childlen -= strlen(childname) + 1;
    2.64 -			return write_node(conn, node);
    2.65 +			return remove_child_entry(conn, node, i);
    2.66  		}
    2.67  	}
    2.68  	corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
    2.69 @@ -997,6 +1011,7 @@ static void internal_rm(const char *name
    2.70  	struct node *node = read_node(NULL, tname);
    2.71  	if (node)
    2.72  		_rm(NULL, node, tname);
    2.73 +	talloc_free(node);
    2.74  	talloc_free(tname);
    2.75  }
    2.76  
    2.77 @@ -1424,12 +1439,14 @@ static void setup_structure(void)
    2.78  
    2.79  		check_store();
    2.80  
    2.81 -		internal_rm("/local");
    2.82 -		create_node(NULL, tlocal, NULL, 0);
    2.83 +		if (remove_local) {
    2.84 +			internal_rm("/local");
    2.85 +			create_node(NULL, tlocal, NULL, 0);
    2.86 +
    2.87 +			check_store();
    2.88 +		}
    2.89  
    2.90  		talloc_free(tlocal);
    2.91 -
    2.92 -		check_store();
    2.93  	}
    2.94  	else {
    2.95  		tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
    2.96 @@ -1445,6 +1462,26 @@ static void setup_structure(void)
    2.97  	}
    2.98  }
    2.99  
   2.100 +
   2.101 +static unsigned int hash_from_key_fn(void *k)
   2.102 +{
   2.103 +	char *str = k;
   2.104 +        unsigned int hash = 5381;
   2.105 +        char c;
   2.106 +
   2.107 +        while ((c = *str++))
   2.108 +		hash = ((hash << 5) + hash) + (unsigned int)c;
   2.109 +
   2.110 +        return hash;
   2.111 +}
   2.112 +
   2.113 +
   2.114 +static int keys_equal_fn(void *key1, void *key2)
   2.115 +{
   2.116 +	return 0 == strcmp((char *)key1, (char *)key2);
   2.117 +}
   2.118 +
   2.119 +
   2.120  static char *child_name(const char *s1, const char *s2)
   2.121  {
   2.122  	if (strcmp(s1, "/")) {
   2.123 @@ -1455,13 +1492,40 @@ static char *child_name(const char *s1, 
   2.124  	}
   2.125  }
   2.126  
   2.127 -static void check_store_(const char *name)
   2.128 +
   2.129 +static void remember_string(struct hashtable *hash, const char *str)
   2.130 +{
   2.131 +	char *k = malloc(strlen(str) + 1);
   2.132 +	strcpy(k, str);
   2.133 +	hashtable_insert(hash, k, (void *)1);
   2.134 +}
   2.135 +
   2.136 +
   2.137 +/**
   2.138 + * A node has a children field that names the children of the node, separated
   2.139 + * by NULs.  We check whether there are entries in there that are duplicated
   2.140 + * (and if so, delete the second one), and whether there are any that do not
   2.141 + * have a corresponding child node (and if so, delete them).  Each valid child
   2.142 + * is then recursively checked.
   2.143 + *
   2.144 + * No deleting is performed if the recovery flag is cleared (i.e. -R was
   2.145 + * passed on the command line).
   2.146 + *
   2.147 + * As we go, we record each node in the given reachable hashtable.  These
   2.148 + * entries will be used later in clean_store.
   2.149 + */
   2.150 +static void check_store_(const char *name, struct hashtable *reachable)
   2.151  {
   2.152  	struct node *node = read_node(NULL, name);
   2.153  
   2.154  	if (node) {
   2.155  		size_t i = 0;
   2.156  
   2.157 +		struct hashtable * children =
   2.158 +			create_hashtable(16, hash_from_key_fn, keys_equal_fn);
   2.159 +
   2.160 +		remember_string(reachable, name);
   2.161 +
   2.162  		while (i < node->childlen) {
   2.163  			size_t childlen = strlen(node->children + i);
   2.164  			char * childname = child_name(node->name,
   2.165 @@ -1469,21 +1533,39 @@ static void check_store_(const char *nam
   2.166  			struct node *childnode = read_node(NULL, childname);
   2.167  			
   2.168  			if (childnode) {
   2.169 -				check_store_(childname);
   2.170 -				i += childlen + 1;
   2.171 +				if (hashtable_search(children, childname)) {
   2.172 +					log("check_store: '%s' is duplicated!",
   2.173 +					    childname);
   2.174 +
   2.175 +					if (recovery) {
   2.176 +						remove_child_entry(NULL, node,
   2.177 +								   i);
   2.178 +						i -= childlen + 1;
   2.179 +					}
   2.180 +				}
   2.181 +				else {
   2.182 +					remember_string(children, childname);
   2.183 +					check_store_(childname, reachable);
   2.184 +				}
   2.185  			}
   2.186  			else {
   2.187  				log("check_store: No child '%s' found!\n",
   2.188  				    childname);
   2.189  
   2.190 -				memdel(node->children, i, childlen + 1,
   2.191 -				       node->childlen);
   2.192 -				node->childlen -= childlen + 1;
   2.193 -				write_node(NULL, node);
   2.194 +				if (recovery) {
   2.195 +					remove_child_entry(NULL, node, i);
   2.196 +					i -= childlen + 1;
   2.197 +				}
   2.198  			}
   2.199  
   2.200 +			talloc_free(childnode);
   2.201  			talloc_free(childname);
   2.202 +			i += childlen + 1;
   2.203  		}
   2.204 +
   2.205 +		hashtable_destroy(children, 0 /* Don't free values (they are
   2.206 +						 all (void *)1) */);
   2.207 +		talloc_free(node);
   2.208  	}
   2.209  	else {
   2.210  		/* Impossible, because no database should ever be without the
   2.211 @@ -1495,12 +1577,51 @@ static void check_store_(const char *nam
   2.212  }
   2.213  
   2.214  
   2.215 +/**
   2.216 + * Helper to clean_store below.
   2.217 + */
   2.218 +static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
   2.219 +			void *private)
   2.220 +{
   2.221 +	struct hashtable *reachable = private;
   2.222 +	char * name = talloc_strndup(NULL, key.dptr, key.dsize);
   2.223 +
   2.224 +	if (!hashtable_search(reachable, name)) {
   2.225 +		log("clean_store: '%s' is orphaned!", name);
   2.226 +		if (recovery) {
   2.227 +			tdb_delete(tdb, key);
   2.228 +		}
   2.229 +	}
   2.230 +
   2.231 +	talloc_free(name);
   2.232 +
   2.233 +	return 0;
   2.234 +}
   2.235 +
   2.236 +
   2.237 +/**
   2.238 + * Given the list of reachable nodes, iterate over the whole store, and
   2.239 + * remove any that were not reached.
   2.240 + */
   2.241 +static void clean_store(struct hashtable *reachable)
   2.242 +{
   2.243 +	tdb_traverse(tdb_ctx, &clean_store_, reachable);
   2.244 +}
   2.245 +
   2.246 +
   2.247  static void check_store()
   2.248  {
   2.249  	char * root = talloc_strdup(NULL, "/");
   2.250 +	struct hashtable * reachable =
   2.251 +		create_hashtable(16, hash_from_key_fn, keys_equal_fn);
   2.252 + 
   2.253  	log("Checking store ...");
   2.254 -	check_store_(root);
   2.255 +	check_store_(root, reachable);
   2.256 +	clean_store(reachable);
   2.257  	log("Checking store complete.");
   2.258 +
   2.259 +	hashtable_destroy(reachable, 0 /* Don't free values (they are all
   2.260 +					  (void *)1) */);
   2.261  	talloc_free(root);
   2.262  }
   2.263  
   2.264 @@ -1589,6 +1710,9 @@ static void usage(void)
   2.265  "  --no-fork           to request that the daemon does not fork,\n"
   2.266  "  --output-pid        to request that the pid of the daemon is output,\n"
   2.267  "  --trace-file <file> giving the file for logging, and\n"
   2.268 +"  --no-recovery       to request that no recovery should be attempted when\n"
   2.269 +"                      the store is corrupted (debug only),\n"
   2.270 +"  --preserve-local    to request that /local is preserved on start-up,\n"
   2.271  "  --verbose           to request verbose execution.\n");
   2.272  }
   2.273  
   2.274 @@ -1600,6 +1724,8 @@ static struct option options[] = {
   2.275  	{ "no-fork", 0, NULL, 'N' },
   2.276  	{ "output-pid", 0, NULL, 'P' },
   2.277  	{ "trace-file", 1, NULL, 'T' },
   2.278 +	{ "no-recovery", 0, NULL, 'R' },
   2.279 +	{ "preserve-local", 0, NULL, 'L' },
   2.280  	{ "verbose", 0, NULL, 'V' },
   2.281  	{ NULL, 0, NULL, 0 } };
   2.282  
   2.283 @@ -1615,7 +1741,7 @@ int main(int argc, char *argv[])
   2.284  	bool no_domain_init = false;
   2.285  	const char *pidfile = NULL;
   2.286  
   2.287 -	while ((opt = getopt_long(argc, argv, "DF:HNPT:V", options,
   2.288 +	while ((opt = getopt_long(argc, argv, "DF:HNPT:RLV", options,
   2.289  				  NULL)) != -1) {
   2.290  		switch (opt) {
   2.291  		case 'D':
   2.292 @@ -1633,6 +1759,12 @@ int main(int argc, char *argv[])
   2.293  		case 'P':
   2.294  			outputpid = true;
   2.295  			break;
   2.296 +		case 'R':
   2.297 +			recovery = false;
   2.298 +			break;
   2.299 +		case 'L':
   2.300 +			remove_local = false;
   2.301 +			break;
   2.302  		case 'T':
   2.303  			tracefile = optarg;
   2.304  			break;