]> xenbits.xensource.com Git - osstest/openstack-nova.git/commitdiff
Cleanup after any failed libvirt spawn
authorDan Smith <dansmith@redhat.com>
Thu, 8 Dec 2016 20:25:37 +0000 (12:25 -0800)
committerDan Smith <dansmith@redhat.com>
Fri, 9 Dec 2016 16:24:46 +0000 (08:24 -0800)
When we go to spawn a libvirt domain, we catch a few types of exceptions
and perform cleanup before failing the operation. For some reason, we
don't do this universally, which means that we leave things like network
devices laying around (from plug_vifs()). If a delete comes later, it
should clean those things up. However, if a subsequent failure prevents
that, and especially if we do a local delete at the API, we'll leak those
interfaces.

As seen in at least one real-world situation, this can cause us to leak
interfaces until we have tens of thousands of them on the system, which
then causes secondary failures.

Since we run the cleanup() routine for certain failures, it certainly
seems appropriate to run it always and not leave residue until a
successful delete is performed.

Closes-Bug: #1648840
Change-Id: Iab5afdf1b5b8d107ea0e5895c24d50712e7dc7b1

nova/tests/unit/virt/libvirt/test_driver.py
nova/virt/libvirt/driver.py

index c570e60e35c26c0afd100e6ec7c9ea46fad3b1db..168b97b20632dca6152d75f3f095a02b0c555774 100644 (file)
@@ -14300,6 +14300,60 @@ class LibvirtConnTestCase(test.NoDBTestCase):
     def test_create_with_network_events_non_neutron(self, is_neutron):
         self._test_create_with_network_events()
 
+    def test_create_with_other_error(self):
+        drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
+
+        @mock.patch.object(drvr, 'plug_vifs')
+        @mock.patch.object(drvr, 'firewall_driver')
+        @mock.patch.object(drvr, '_create_domain')
+        @mock.patch.object(drvr, '_cleanup_failed_start')
+        def the_test(mock_cleanup, mock_create, mock_fw, mock_plug):
+            instance = objects.Instance(**self.test_instance)
+            mock_create.side_effect = test.TestingException
+            self.assertRaises(test.TestingException,
+                              drvr._create_domain_and_network,
+                              self.context, 'xml', instance, [], None)
+            mock_cleanup.assert_called_once_with(self.context, instance,
+                                                 [], None, None)
+
+        the_test()
+
+    def test_cleanup_failed_start_no_guest(self):
+        drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
+        with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
+            drvr._cleanup_failed_start(None, None, None, None, None)
+            self.assertTrue(mock_cleanup.called)
+
+    def test_cleanup_failed_start_inactive_guest(self):
+        drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
+        guest = mock.MagicMock()
+        guest.is_active.return_value = False
+        with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
+            drvr._cleanup_failed_start(None, None, None, None, guest)
+            self.assertTrue(mock_cleanup.called)
+            self.assertFalse(guest.poweroff.called)
+
+    def test_cleanup_failed_start_active_guest(self):
+        drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
+        guest = mock.MagicMock()
+        guest.is_active.return_value = True
+        with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
+            drvr._cleanup_failed_start(None, None, None, None, guest)
+            self.assertTrue(mock_cleanup.called)
+            self.assertTrue(guest.poweroff.called)
+
+    def test_cleanup_failed_start_failed_poweroff(self):
+        drvr = libvirt_driver.LibvirtDriver(mock.MagicMock(), False)
+        guest = mock.MagicMock()
+        guest.is_active.return_value = True
+        guest.poweroff.side_effect = test.TestingException
+        with mock.patch.object(drvr, 'cleanup') as mock_cleanup:
+            self.assertRaises(test.TestingException,
+                              drvr._cleanup_failed_start,
+                              None, None, None, None, guest)
+            self.assertTrue(mock_cleanup.called)
+            self.assertTrue(guest.poweroff.called)
+
     @mock.patch('nova.volume.encryptors.get_encryption_metadata')
     @mock.patch('nova.virt.libvirt.blockinfo.get_info_from_bdm')
     def test_create_with_bdm(self, get_info_from_bdm, get_encryption_metadata):
index 39d12b49484189be41a026aa5f49c45478e0c350..6facb6f2f76b4d20254f4a76e2b0af1863e8cd17 100644 (file)
@@ -4966,6 +4966,15 @@ class LibvirtDriver(driver.ComputeDriver):
         return [('network-vif-plugged', vif['id'])
                 for vif in network_info if vif.get('active', True) is False]
 
+    def _cleanup_failed_start(self, context, instance, network_info,
+                              block_device_info, guest):
+        try:
+            if guest and guest.is_active():
+                guest.poweroff()
+        finally:
+            self.cleanup(context, instance, network_info=network_info,
+                         block_device_info=block_device_info)
+
     def _create_domain_and_network(self, context, xml, instance, network_info,
                                    disk_info, block_device_info=None,
                                    power_on=True, reboot=False,
@@ -5021,21 +5030,24 @@ class LibvirtDriver(driver.ComputeDriver):
             # Neutron reported failure and we didn't swallow it, so
             # bail here
             with excutils.save_and_reraise_exception():
-                if guest:
-                    guest.poweroff()
-                self.cleanup(context, instance, network_info=network_info,
-                             block_device_info=block_device_info)
+                self._cleanup_failed_start(context, instance, network_info,
+                                           block_device_info, guest)
         except eventlet.timeout.Timeout:
             # We never heard from Neutron
             LOG.warning(_LW('Timeout waiting for vif plugging callback for '
                          'instance %(uuid)s'), {'uuid': instance.uuid},
                      instance=instance)
             if CONF.vif_plugging_is_fatal:
-                if guest:
-                    guest.poweroff()
-                self.cleanup(context, instance, network_info=network_info,
-                             block_device_info=block_device_info)
+                self._cleanup_failed_start(context, instance, network_info,
+                                           block_device_info, guest)
                 raise exception.VirtualInterfaceCreateException()
+        except Exception:
+            # Any other error, be sure to clean up
+            LOG.error(_LE('Failed to start libvirt guest'),
+                      instance=instance)
+            with excutils.save_and_reraise_exception():
+                self._cleanup_failed_start(context, instance, network_info,
+                                           block_device_info, guest)
 
         # Resume only if domain has been paused
         if pause: