]> xenbits.xensource.com Git - xen.git/commitdiff
libxl/devd: fix a race with concurrent device addition/removal
authorRoger Pau Monne <roger.pau@citrix.com>
Tue, 16 May 2017 07:59:23 +0000 (08:59 +0100)
committerWei Liu <wei.liu2@citrix.com>
Fri, 19 May 2017 13:33:22 +0000 (14:33 +0100)
Current code can free the libxl__device inside of the libxl__ddomain_device
before the addition has finished if a removal happens while an addition is
still in process:

  backend_watch_callback
            |
            v
       add_device
            |                 backend_watch_callback
    (async operation)                   |
            |                           v
            |                     remove_device
            |                           |
            |                           V
            |                    device_complete
            |                 (free libxl__device)
            v
     device_complete
  (deref libxl__device)

Fix this by creating a temporary copy of the libxl__device, that's tracked by
the GC of the nested async operation. This ensures that the libxl__device used
by the async operations cannot be freed while being used.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-by: Ian Jackson <ian.jackson@eu.citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Release-acked-by: Julien Grall <julien.grall@arm.com>
tools/libxl/libxl_device.c
tools/libxl/libxl_internal.h

index 5e966762c6c41f0db8114192d4476c81aca55dac..cd4ad05a6f5f10e59d111a2f2bcf56bea9aa3fa6 100644 (file)
@@ -1415,9 +1415,6 @@ static void device_complete(libxl__egc *egc, libxl__ao_device *aodev)
                libxl__device_action_to_string(aodev->action),
                aodev->rc ? "failed" : "succeed");
 
-    if (aodev->action == LIBXL__DEVICE_ACTION_REMOVE)
-        free(aodev->dev);
-
     libxl__nested_ao_free(aodev->ao);
 }
 
@@ -1521,7 +1518,12 @@ static int add_device(libxl__egc *egc, libxl__ao *ao,
 
         GCNEW(aodev);
         libxl__prepare_ao_device(ao, aodev);
-        aodev->dev = dev;
+        /*
+         * Clone the libxl__device to avoid races if remove_device is called
+         * before the device addition has finished.
+         */
+        GCNEW(aodev->dev);
+        *aodev->dev = *dev;
         aodev->action = LIBXL__DEVICE_ACTION_ADD;
         aodev->callback = device_complete;
         libxl__wait_device_connection(egc, aodev);
@@ -1564,7 +1566,12 @@ static int remove_device(libxl__egc *egc, libxl__ao *ao,
 
         GCNEW(aodev);
         libxl__prepare_ao_device(ao, aodev);
-        aodev->dev = dev;
+        /*
+         * Clone the libxl__device to avoid races if there's a add_device
+         * running in parallel.
+         */
+        GCNEW(aodev->dev);
+        *aodev->dev = *dev;
         aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
         aodev->callback = device_complete;
         libxl__initiate_device_generic_remove(egc, aodev);
@@ -1576,7 +1583,6 @@ static int remove_device(libxl__egc *egc, libxl__ao *ao,
                 goto out;
         }
         libxl__device_destroy(gc, dev);
-        free(dev);
         /* Fall through to return > 0, no ao has been dispatched */
     default:
         rc = 1;
@@ -1597,7 +1603,7 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
     char *p, *path;
     const char *sstate, *sonline;
     int state, online, rc, num_devs;
-    libxl__device *dev = NULL;
+    libxl__device *dev;
     libxl__ddomain_device *ddev = NULL;
     libxl__ddomain_guest *dguest = NULL;
     bool free_ao = false;
@@ -1625,7 +1631,7 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
         goto skip;
     online = atoi(sonline);
 
-    dev = libxl__zalloc(NOGC, sizeof(*dev));
+    GCNEW(dev);
     rc = libxl__parse_backend_path(gc, path, dev);
     if (rc)
         goto skip;
@@ -1659,7 +1665,8 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
          * to the list of active devices for a given guest.
          */
         ddev = libxl__zalloc(NOGC, sizeof(*ddev));
-        ddev->dev = dev;
+        ddev->dev = libxl__zalloc(NOGC, sizeof(*ddev->dev));
+        *ddev->dev = *dev;
         LIBXL_SLIST_INSERT_HEAD(&dguest->devices, ddev, next);
         LOGD(DEBUG, dev->domid, "Added device %s to the list of active devices",
              path);
@@ -1670,9 +1677,6 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
         /*
          * Removal of an active device, remove it from the list and
          * free it's data structures if they are no longer needed.
-         *
-         * The free of the associated libxl__device is left to the
-         * helper remove_device function.
          */
         LIBXL_SLIST_REMOVE(&dguest->devices, ddev, libxl__ddomain_device,
                            next);
@@ -1682,6 +1686,7 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
         if (rc > 0)
             free_ao = true;
 
+        free(ddev->dev);
         free(ddev);
         /* If this was the last device in the domain, remove it from the list */
         num_devs = dguest->num_vifs + dguest->num_vbds + dguest->num_qdisks;
@@ -1703,7 +1708,8 @@ static void backend_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
 
 skip:
     libxl__nested_ao_free(nested_ao);
-    free(dev);
+    if (ddev)
+        free(ddev->dev);
     free(ddev);
     free(dguest);
     return;
index 5d082c570482381ce4445e0f6a28b16de7e87677..afe66528474ca4942d7dc6ac6b4d5bea73f8894c 100644 (file)
@@ -501,6 +501,10 @@ struct libxl__ctx {
     libxl_version_info version_info;
 };
 
+/*
+ * libxl__device is a transparent structure that doesn't contain private fields
+ * or external memory references, and as such can be copied by assignment.
+ */
 typedef struct {
     uint32_t backend_devid;
     uint32_t backend_domid;