ia64/xen-unstable
changeset 5877:1853a6e966bd
Remove ill-conceived concept of watches blocking reply on
connection which did write/mkdir/rm/setperm etc.
This causes deadlocks in real life, and I can't see a sane way
of avoiding them: it is reasonable for someone to ignore watch
notifications while doing other actions, and that means that
we can do other writes. These writes can block pending other
watchers; if one of these is the process blocked awaiting our
ack, we deadlock.
Signed-off-by: Rusty Russel <rusty@rustcorp.com.au>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
connection which did write/mkdir/rm/setperm etc.
This causes deadlocks in real life, and I can't see a sane way
of avoiding them: it is reasonable for someone to ignore watch
notifications while doing other actions, and that means that
we can do other writes. These writes can block pending other
watchers; if one of these is the process blocked awaiting our
ack, we deadlock.
Signed-off-by: Rusty Russel <rusty@rustcorp.com.au>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author | cl349@firebug.cl.cam.ac.uk |
---|---|
date | Tue Jul 26 15:26:32 2005 +0000 (2005-07-26) |
parents | b9903985e9b6 |
children | 62e13cae2f58 |
files | tools/xenstore/xenstored_core.c tools/xenstore/xenstored_core.h tools/xenstore/xenstored_transaction.c tools/xenstore/xenstored_watch.c tools/xenstore/xenstored_watch.h |
line diff
1.1 --- a/tools/xenstore/xenstored_core.c Tue Jul 26 15:24:28 2005 +0000 1.2 +++ b/tools/xenstore/xenstored_core.c Tue Jul 26 15:26:32 2005 +0000 1.3 @@ -976,10 +976,7 @@ static void do_write(struct connection * 1.4 } 1.5 1.6 add_change_node(conn->transaction, node, false); 1.7 - if (fire_watches(conn, node, false)) { 1.8 - conn->watch_ack = XS_WRITE; 1.9 - return; 1.10 - } 1.11 + fire_watches(conn, node, false); 1.12 send_ack(conn, XS_WRITE); 1.13 } 1.14 1.15 @@ -1005,10 +1002,7 @@ static void do_mkdir(struct connection * 1.16 } 1.17 1.18 add_change_node(conn->transaction, node, false); 1.19 - if (fire_watches(conn, node, false)) { 1.20 - conn->watch_ack = XS_MKDIR; 1.21 - return; 1.22 - } 1.23 + fire_watches(conn, node, false); 1.24 send_ack(conn, XS_MKDIR); 1.25 } 1.26 1.27 @@ -1046,10 +1040,7 @@ static void do_rm(struct connection *con 1.28 } 1.29 1.30 add_change_node(conn->transaction, node, true); 1.31 - if (fire_watches(conn, node, true)) { 1.32 - conn->watch_ack = XS_RM; 1.33 - return; 1.34 - } 1.35 + fire_watches(conn, node, true); 1.36 send_ack(conn, XS_RM); 1.37 } 1.38 1.39 @@ -1121,10 +1112,7 @@ static void do_set_perms(struct connecti 1.40 } 1.41 1.42 add_change_node(conn->transaction, node, false); 1.43 - if (fire_watches(conn, node, false)) { 1.44 - conn->watch_ack = XS_SET_PERMS; 1.45 - return; 1.46 - } 1.47 + fire_watches(conn, node, false); 1.48 send_ack(conn, XS_SET_PERMS); 1.49 } 1.50 1.51 @@ -1359,12 +1347,6 @@ static void unblock_connections(void) 1.52 consider_message(i); 1.53 } 1.54 break; 1.55 - case WATCHED: 1.56 - if (i->watches_unacked == 0) { 1.57 - i->state = OK; 1.58 - send_ack(i, i->watch_ack); 1.59 - } 1.60 - break; 1.61 case OK: 1.62 break; 1.63 } 1.64 @@ -1389,8 +1371,6 @@ struct connection *new_connection(connwr 1.65 1.66 new->state = OK; 1.67 new->blocked_by = NULL; 1.68 - new->watch_ack = XS_ERROR; 1.69 - new->watches_unacked = 0; 1.70 new->out = new->waiting_reply = NULL; 1.71 new->fd = -1; 1.72 new->id = 0; 1.73 @@ -1471,7 +1451,6 @@ void dump_connection(void) 1.74 printf(" state = %s\n", 1.75 i->state == OK ? "OK" 1.76 : i->state == BLOCKED ? "BLOCKED" 1.77 - : i->state == WATCHED ? "WATCHED" 1.78 : "INVALID"); 1.79 if (i->id) 1.80 printf(" id = %i\n", i->id);
2.1 --- a/tools/xenstore/xenstored_core.h Tue Jul 26 15:24:28 2005 +0000 2.2 +++ b/tools/xenstore/xenstored_core.h Tue Jul 26 15:26:32 2005 +0000 2.3 @@ -51,8 +51,6 @@ enum state 2.4 { 2.5 /* Blocked by transaction. */ 2.6 BLOCKED, 2.7 - /* Waiting for watchers to ack event we caused */ 2.8 - WATCHED, 2.9 /* Completed */ 2.10 OK, 2.11 }; 2.12 @@ -73,12 +71,6 @@ struct connection 2.13 /* Node we are waiting for (if state == BLOCKED) */ 2.14 char *blocked_by; 2.15 2.16 - /* Are we waiting for watches to be acked from an event we caused? */ 2.17 - unsigned int watches_unacked; 2.18 - 2.19 - /* Type of ack to send once watches fired. */ 2.20 - enum xsd_sockmsg_type watch_ack; 2.21 - 2.22 /* Is this a read-only connection? */ 2.23 bool can_write; 2.24
3.1 --- a/tools/xenstore/xenstored_transaction.c Tue Jul 26 15:24:28 2005 +0000 3.2 +++ b/tools/xenstore/xenstored_transaction.c Tue Jul 26 15:26:32 2005 +0000 3.3 @@ -307,7 +307,6 @@ void do_transaction_end(struct connectio 3.4 { 3.5 struct changed_node *i; 3.6 struct transaction *trans; 3.7 - bool fired = false; 3.8 3.9 if (!arg || (!streq(arg, "T") && !streq(arg, "F"))) { 3.10 send_error(conn, EINVAL); 3.11 @@ -337,12 +336,8 @@ void do_transaction_end(struct connectio 3.12 3.13 /* Fire off the watches for everything that changed. */ 3.14 list_for_each_entry(i, &trans->changes, list) 3.15 - fired |= fire_watches(conn, i->node, i->recurse); 3.16 + fire_watches(conn, i->node, i->recurse); 3.17 } 3.18 - 3.19 - if (fired) 3.20 - conn->watch_ack = XS_TRANSACTION_END; 3.21 - else 3.22 - send_ack(conn, XS_TRANSACTION_END); 3.23 + send_ack(conn, XS_TRANSACTION_END); 3.24 } 3.25
4.1 --- a/tools/xenstore/xenstored_watch.c Tue Jul 26 15:24:28 2005 +0000 4.2 +++ b/tools/xenstore/xenstored_watch.c Tue Jul 26 15:26:32 2005 +0000 4.3 @@ -41,9 +41,6 @@ struct watch_event 4.4 /* Data to send (node\0token\0). */ 4.5 unsigned int len; 4.6 char *data; 4.7 - 4.8 - /* Connection which caused watch event (which we are blocking) */ 4.9 - struct connection *cause; 4.10 }; 4.11 4.12 struct watch 4.13 @@ -95,14 +92,10 @@ static int destroy_watch_event(void *_ev 4.14 struct watch_event *event = _event; 4.15 4.16 trace_destroy(event, "watch_event"); 4.17 - assert(event->cause->watches_unacked != 0); 4.18 - /* If it hits zero, will unblock in unblock_connections. */ 4.19 - event->cause->watches_unacked--; 4.20 return 0; 4.21 } 4.22 4.23 -static void add_event(struct connection *cause, struct watch *watch, 4.24 - const char *node) 4.25 +static void add_event(struct watch *watch, const char *node) 4.26 { 4.27 struct watch_event *event; 4.28 4.29 @@ -117,22 +110,20 @@ static void add_event(struct connection 4.30 event->data = talloc_array(event, char, event->len); 4.31 strcpy(event->data, node); 4.32 strcpy(event->data + strlen(node) + 1, watch->token); 4.33 - event->cause = cause; 4.34 - cause->watches_unacked++; 4.35 talloc_set_destructor(event, destroy_watch_event); 4.36 list_add_tail(&event->list, &watch->events); 4.37 trace_create(event, "watch_event"); 4.38 } 4.39 4.40 /* FIXME: we fail to fire on out of memory. Should drop connections. */ 4.41 -bool fire_watches(struct connection *conn, const char *node, bool recurse) 4.42 +void fire_watches(struct connection *conn, const char *node, bool recurse) 4.43 { 4.44 struct connection *i; 4.45 struct watch *watch; 4.46 4.47 /* During transactions, don't fire watches. */ 4.48 if (conn->transaction) 4.49 - return false; 4.50 + return; 4.51 4.52 /* Create an event for each watch. Don't send to self. */ 4.53 list_for_each_entry(i, &connections, list) { 4.54 @@ -141,18 +132,16 @@ bool fire_watches(struct connection *con 4.55 4.56 list_for_each_entry(watch, &i->watches, list) { 4.57 if (is_child(node, watch->node)) 4.58 - add_event(conn, watch, node); 4.59 + add_event(watch, node); 4.60 else if (recurse && is_child(watch->node, node)) 4.61 - add_event(conn, watch, watch->node); 4.62 + add_event(watch, watch->node); 4.63 else 4.64 continue; 4.65 - conn->state = WATCHED; 4.66 /* If connection not doing anything, queue this. */ 4.67 if (!i->out) 4.68 queue_next_event(i); 4.69 } 4.70 } 4.71 - return conn->state == WATCHED; 4.72 } 4.73 4.74 static int destroy_watch(void *_watch)
5.1 --- a/tools/xenstore/xenstored_watch.h Tue Jul 26 15:24:28 2005 +0000 5.2 +++ b/tools/xenstore/xenstored_watch.h Tue Jul 26 15:26:32 2005 +0000 5.3 @@ -33,9 +33,8 @@ bool is_watch_event(struct connection *c 5.4 void queue_next_event(struct connection *conn); 5.5 5.6 /* Fire all watches: recurse means all the children are effected (ie. rm). 5.7 - * Returns true if there were any, meaning connection has to wait. 5.8 */ 5.9 -bool fire_watches(struct connection *conn, const char *node, bool recurse); 5.10 +void fire_watches(struct connection *conn, const char *node, bool recurse); 5.11 5.12 /* Find shortest timeout: if any, reduce tv (may already be set). */ 5.13 void shortest_watch_ack_timeout(struct timeval *tv);