]> xenbits.xensource.com Git - xen.git/commitdiff
xenstored: apply a write transaction rate limit
authorIan Jackson <ian.jackson@eu.citrix.com>
Sat, 18 Mar 2017 17:12:39 +0000 (17:12 +0000)
committerIan Jackson <Ian.Jackson@eu.citrix.com>
Wed, 5 Apr 2017 14:26:21 +0000 (15:26 +0100)
This avoids a rogue client being about to stall another client (eg the
toolstack) indefinitely.

This is XSA-206.

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com>
tools/xenstore/Makefile
tools/xenstore/xenstored_core.c
tools/xenstore/xenstored_core.h
tools/xenstore/xenstored_domain.c
tools/xenstore/xenstored_domain.h
tools/xenstore/xenstored_transaction.c

index 262f401146909fe0ed67a0e67363797e5629004f..0622c63bff09b803721fc21f783ac17d04e34487 100644 (file)
@@ -19,6 +19,7 @@ XENSTORED_OBJS_$(CONFIG_NetBSD) = xenstored_netbsd.o xenstored_posix.o
 XENSTORED_OBJS_$(CONFIG_MiniOS) = xenstored_minios.o
 
 XENSTORED_OBJS += $(XENSTORED_OBJS_y)
+LDLIBS_xenstored += -lrt
 
 ifneq ($(XENSTORE_STATIC_CLIENTS),y)
 LIBXENSTORE := libxenstore.so
@@ -61,7 +62,7 @@ init-xenstore-domain: init-xenstore-domain.o $(LIBXENSTORE)
        $(CC) $(LDFLAGS) $^ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) -o $@ $(APPEND_LDFLAGS)
 
 xenstored: $(XENSTORED_OBJS)
-       $(CC) $(LDFLAGS) $^ $(LDLIBS_libxenctrl) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS)
+       $(CC) $(LDFLAGS) $^ $(LDLIBS_libxenctrl) $(LDLIBS_xenstored) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS)
 
 xenstored.a: $(XENSTORED_OBJS)
        $(AR) cr $@ $^
index 2324e537910383dcca8c9fff25f97d9816d46b8a..beb630b2ed9ca12a9394a11a8b90f8187e6021fc 100644 (file)
@@ -342,6 +342,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx,
                           int *ptimeout)
 {
        struct connection *conn;
+       struct wrl_timestampt now;
 
        if (fds)
                memset(fds, 0, sizeof(struct pollfd) * current_array_size);
@@ -361,8 +362,11 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx,
                xce_pollfd_idx = set_fd(xc_evtchn_fd(xce_handle),
                                        POLLIN|POLLPRI);
 
+       wrl_gettime_now(&now);
+
        list_for_each_entry(conn, &connections, list) {
                if (conn->domain) {
+                       wrl_check_timeout(conn->domain, now, ptimeout);
                        if (domain_can_read(conn) ||
                            (domain_can_write(conn) &&
                             !list_empty(&conn->out_list)))
@@ -795,6 +799,7 @@ static void delete_node_single(struct connection *conn, struct node *node)
                corrupt(conn, "Could not delete '%s'", node->name);
                return;
        }
+
        domain_entry_dec(conn, node);
 }
 
@@ -934,6 +939,7 @@ static void do_write(struct connection *conn, struct buffered_data *in)
        }
 
        add_change_node(conn->transaction, name, false);
+       wrl_apply_debit_direct(conn);
        fire_watches(conn, name, false);
        send_ack(conn, XS_WRITE);
 }
@@ -958,6 +964,7 @@ static void do_mkdir(struct connection *conn, const char *name)
                        return;
                }
                add_change_node(conn->transaction, name, false);
+               wrl_apply_debit_direct(conn);
                fire_watches(conn, name, false);
        }
        send_ack(conn, XS_MKDIR);
@@ -1083,6 +1090,7 @@ static void do_rm(struct connection *conn, const char *name)
 
        if (_rm(conn, node, name)) {
                add_change_node(conn->transaction, name, true);
+               wrl_apply_debit_direct(conn);
                fire_watches(conn, name, true);
                send_ack(conn, XS_RM);
        }
@@ -1158,6 +1166,7 @@ static void do_set_perms(struct connection *conn, struct buffered_data *in)
        }
 
        add_change_node(conn->transaction, name, false);
+       wrl_apply_debit_direct(conn);
        fire_watches(conn, name, false);
        send_ack(conn, XS_SET_PERMS);
 }
index cfbcf6f3c3a7555cd610783aa5da51b2a464918c..fb4d0e0b45fbd5db5a21bd81faa70ede4d79e215 100644 (file)
 #include "list.h"
 #include "tdb.h"
 
+#define MIN(a, b) (((a) < (b))? (a) : (b))
+
+typedef int32_t wrl_creditt;
+#define WRL_CREDIT_MAX (1000*1000*1000)
+/* ^ satisfies non-overflow condition for wrl_xfer_credit */
+
 struct buffered_data
 {
        struct list_head list;
index f24bd6bd188bfcf12a6af63c0b54ca6100248420..16c303e51645f991b1169518355289b61032e2cc 100644 (file)
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <xenctrl.h>
+#include <time.h>
 
 #include "utils.h"
 #include "talloc.h"
@@ -75,6 +76,10 @@ struct domain
 
        /* number of watch for this domain */
        int nbwatch;
+
+       /* write rate limit */
+       wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
+       struct wrl_timestampt wrl_timestamp;
 };
 
 static LIST_HEAD(domains);
@@ -207,6 +212,8 @@ static int destroy_domain(void *_domain)
 
        fire_watches(NULL, "@releaseDomain", false);
 
+       wrl_domain_destroy(domain);
+
        return 0;
 }
 
@@ -254,6 +261,9 @@ void handle_event(void)
 bool domain_can_read(struct connection *conn)
 {
        struct xenstore_domain_interface *intf = conn->domain->interface;
+
+       if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
+               return false;
        return (intf->req_cons != intf->req_prod);
 }
 
@@ -285,6 +295,8 @@ static struct domain *new_domain(void *context, unsigned int domid,
        domain->domid = domid;
        domain->path = talloc_domain_path(domain, domid);
 
+       wrl_domain_new(domain);
+
        list_add(&domain->list, &domains);
        talloc_set_destructor(domain, destroy_domain);
 
@@ -748,6 +760,209 @@ int domain_watch(struct connection *conn)
                : 0;
 }
 
+static wrl_creditt wrl_config_writecost      = WRL_FACTOR;
+static wrl_creditt wrl_config_rate           = WRL_RATE   * WRL_FACTOR;
+static wrl_creditt wrl_config_dburst         = WRL_DBURST * WRL_FACTOR;
+static wrl_creditt wrl_config_gburst         = WRL_GBURST * WRL_FACTOR;
+static wrl_creditt wrl_config_newdoms_dburst =
+                                WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR;
+
+long wrl_ntransactions;
+
+static long wrl_ndomains;
+static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */
+
+void wrl_gettime_now(struct wrl_timestampt *now_wt)
+{
+       struct timespec now_ts;
+       int r;
+
+       r = clock_gettime(CLOCK_MONOTONIC, &now_ts);
+       if (r)
+               barf_perror("Could not find time (clock_gettime failed)");
+
+       now_wt->sec = now_ts.tv_sec;
+       now_wt->msec = now_ts.tv_nsec / 1000000;
+}
+
+static void wrl_xfer_credit(wrl_creditt *debit,  wrl_creditt debit_floor,
+                           wrl_creditt *credit, wrl_creditt credit_ceil)
+       /*
+        * Transfers zero or more credit from "debit" to "credit".
+        * Transfers as much as possible while maintaining
+        * debit >= debit_floor and credit <= credit_ceil.
+        * (If that's violated already, does nothing.)
+        *
+        * Sufficient conditions to avoid overflow, either of:
+        *  |every argument| <= 0x3fffffff
+        *  |every argument| <= 1E9
+        *  |every argument| <= WRL_CREDIT_MAX
+        * (And this condition is preserved.)
+        */
+{
+       wrl_creditt xfer = MIN( *debit      - debit_floor,
+                               credit_ceil - *credit      );
+       if (xfer > 0) {
+               *debit -= xfer;
+               *credit += xfer;
+       }
+}
+
+void wrl_domain_new(struct domain *domain)
+{
+       domain->wrl_credit = 0;
+       wrl_gettime_now(&domain->wrl_timestamp);
+       wrl_ndomains++;
+       /* Steal up to DBURST from the reserve */
+       wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst,
+                       &domain->wrl_credit, wrl_config_dburst);
+}
+
+void wrl_domain_destroy(struct domain *domain)
+{
+       wrl_ndomains--;
+       /*
+        * Don't bother recalculating domain's credit - this just
+        * means we don't give the reserve the ending domain's credit
+        * for time elapsed since last update.
+        */
+       wrl_xfer_credit(&domain->wrl_credit, 0,
+                       &wrl_reserve, wrl_config_dburst);
+}
+
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now)
+{
+       /*
+        * We want to calculate
+        *    credit += (now - timestamp) * RATE / ndoms;
+        * But we want it to saturate, and to avoid floating point.
+        * To avoid rounding errors from constantly adding small
+        * amounts of credit, we only add credit for whole milliseconds.
+        */
+       long seconds      = now.sec -  domain->wrl_timestamp.sec;
+       long milliseconds = now.msec - domain->wrl_timestamp.msec;
+       long msec;
+       int64_t denom, num;
+       wrl_creditt surplus;
+
+       seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */
+       msec = seconds * 1000 + milliseconds;
+
+       if (msec < 0)
+                /* shouldn't happen with CLOCK_MONOTONIC */
+               msec = 0;
+
+       /* 32x32 -> 64 cannot overflow */
+       denom = (int64_t)msec * wrl_config_rate;
+       num  =  (int64_t)wrl_ndomains * 1000;
+       /* denom / num <= 1E6 * wrl_config_rate, so with
+          reasonable wrl_config_rate, denom / num << 2^64 */
+
+       /* at last! */
+       domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num,
+                                 WRL_CREDIT_MAX );
+       /* (maybe briefly violating the DBURST cap on wrl_credit) */
+
+       /* maybe take from the reserve to make us nonnegative */
+       wrl_xfer_credit(&wrl_reserve,        0,
+                       &domain->wrl_credit, 0);
+
+       /* return any surplus (over DBURST) to the reserve */
+       surplus = 0;
+       wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst,
+                       &surplus,            WRL_CREDIT_MAX);
+       wrl_xfer_credit(&surplus,     0,
+                       &wrl_reserve, wrl_config_gburst);
+       /* surplus is now implicitly discarded */
+
+       domain->wrl_timestamp = now;
+
+       trace("wrl: dom %4d %6ld  msec  %9ld credit   %9ld reserve"
+             "  %9ld discard\n",
+             domain->domid,
+             msec,
+             (long)domain->wrl_credit, (long)wrl_reserve,
+             (long)surplus);
+}
+                       
+void wrl_check_timeout(struct domain *domain,
+                      struct wrl_timestampt now,
+                      int *ptimeout)
+{
+       uint64_t num, denom;
+       int wakeup;
+
+       wrl_credit_update(domain, now);
+
+       if (domain->wrl_credit >= 0)
+               /* not blocked */
+               return;
+
+       if (!*ptimeout)
+               /* already decided on immediate wakeup,
+                  so no need to calculate our timeout */
+               return;
+
+       /* calculate  wakeup = now + -credit / (RATE / ndoms); */
+
+       /* credit cannot go more -ve than one transaction,
+        * so the first multiplication cannot overflow even 32-bit */
+       num   = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains;
+       denom = wrl_config_rate;
+
+       wakeup = MIN( num / denom /* uint64_t */, INT_MAX );
+       if (*ptimeout==-1 || wakeup < *ptimeout)
+               *ptimeout = wakeup;
+
+       trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n",
+             domain->domid,
+             (long)domain->wrl_credit, (long)wrl_reserve,
+             wakeup);
+}
+
+void wrl_apply_debit_actual(struct domain *domain)
+{
+       struct wrl_timestampt now;
+
+       if (!domain)
+               /* sockets escape the write rate limit */
+               return;
+
+       wrl_gettime_now(&now);
+       wrl_credit_update(domain, now);
+
+       domain->wrl_credit -= wrl_config_writecost;
+       trace("wrl: domain %u credit=%ld (reserve=%ld)\n",
+             domain->domid,
+             (long)domain->wrl_credit, (long)wrl_reserve);
+}
+
+void wrl_apply_debit_direct(struct connection *conn)
+{
+       if (!conn)
+               /* some writes are generated internally */
+               return;
+
+       if (conn->transaction)
+               /* these are accounted for when the transaction ends */
+               return;
+
+       if (!wrl_ntransactions)
+               /* we don't conflict with anyone */
+               return;
+
+       wrl_apply_debit_actual(conn->domain);
+}
+
+void wrl_apply_debit_trans_commit(struct connection *conn)
+{
+       if (wrl_ntransactions <= 1)
+               /* our own transaction appears in the counter */
+               return;
+
+       wrl_apply_debit_actual(conn->domain);
+}
+
 /*
  * Local variables:
  *  c-file-style: "linux"
index 9e2afaea5e2bef0658a7b14319c74f8c7ed1a2a3..a0085543cde505cf291341b67233de12470989d6 100644 (file)
@@ -66,4 +66,29 @@ void domain_watch_inc(struct connection *conn);
 void domain_watch_dec(struct connection *conn);
 int domain_watch(struct connection *conn);
 
+/* Write rate limiting */
+
+#define WRL_FACTOR   1000 /* for fixed-point arithmetic */
+#define WRL_RATE      200
+#define WRL_DBURST     10
+#define WRL_GBURST   1000
+#define WRL_NEWDOMS     5
+
+struct wrl_timestampt {
+       time_t sec;
+       int msec;
+};
+
+extern long wrl_ntransactions;
+
+void wrl_gettime_now(struct wrl_timestampt *now_ts);
+void wrl_domain_new(struct domain *domain);
+void wrl_domain_destroy(struct domain *domain);
+void wrl_credit_update(struct domain *domain, struct wrl_timestampt now);
+void wrl_check_timeout(struct domain *domain,
+                       struct wrl_timestampt now,
+                       int *ptimeout);
+void wrl_apply_debit_direct(struct connection *conn);
+void wrl_apply_debit_trans_commit(struct connection *conn);
+
 #endif /* _XENSTORED_DOMAIN_H */
index 50a32fbcba0648bae9e9ce0a04a1d4c471a11ac9..4ddc8c8dd13c42749b8a639d68e54a1fe0720c11 100644 (file)
@@ -117,6 +117,7 @@ static int destroy_transaction(void *_transaction)
 {
        struct transaction *trans = _transaction;
 
+       wrl_ntransactions--;
        trace_destroy(trans, "transaction");
        if (trans->tdb)
                tdb_close(trans->tdb);
@@ -180,6 +181,7 @@ void do_transaction_start(struct connection *conn, struct buffered_data *in)
        talloc_steal(conn, trans);
        talloc_set_destructor(trans, destroy_transaction);
        conn->transaction_started++;
+       wrl_ntransactions++;
 
        snprintf(id_str, sizeof(id_str), "%u", trans->id);
        send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1);
@@ -214,6 +216,9 @@ void do_transaction_end(struct connection *conn, const char *arg)
                        send_error(conn, EAGAIN);
                        return;
                }
+
+               wrl_apply_debit_trans_commit(conn);
+
                if (!replace_tdb(trans->tdb_name, trans->tdb)) {
                        send_error(conn, errno);
                        return;