]> xenbits.xensource.com Git - osstest/openstack-nova.git/commitdiff
Stop failed live-migrates getting stuck migrating
authorJohn Garbutt <john.garbutt@rackspace.com>
Tue, 7 Feb 2017 19:12:50 +0000 (19:12 +0000)
committerMatt Riedemann <mriedem.os@gmail.com>
Sat, 15 Apr 2017 18:58:32 +0000 (18:58 +0000)
When there are failures in driver.cleanup, we are seeing live-migrations
that get stuck in the live-migrating state. While there has been a patch
to stop the cause listed in the bug this closes, there are other
failures (such as a token timeout when talking to cinder or neutron)
that could trigger this same failure mode.

When we hit an error this late in live-migration, it should be a very
rare event, so its best to just put the instance and migration into an
error state, and help alert both the operator and API user to the
failure that has occurred.

Closes-Bug: #1662626

Change-Id: Idfdce9e7dd8106af01db0358ada15737cb846395
(cherry picked from commit b56f8fc2d1392f4675a5baae0977e4817a362159)

nova/compute/manager.py
nova/tests/unit/compute/test_compute.py

index c019715903a4f68b3bdfa117a419ba1761dae3e7..ed58725df3c769df86f2c487d1011da3cb714589 100644 (file)
@@ -5396,12 +5396,16 @@ class ComputeManager(manager.Manager):
                                        self._rollback_live_migration,
                                        block_migration, migrate_data)
         except Exception:
-            # Executing live migration
-            # live_migration might raises exceptions, but
-            # nothing must be recovered in this version.
             LOG.exception(_LE('Live migration failed.'), instance=instance)
             with excutils.save_and_reraise_exception():
+                # Put instance and migration into error state,
+                # as its almost certainly too late to rollback
                 self._set_migration_status(migration, 'error')
+                # first refresh instance as it may have got updated by
+                # post_live_migration_at_destination
+                instance.refresh()
+                self._set_instance_obj_error_state(context, instance,
+                                                   clean_task_state=True)
 
     @wrap_exception()
     @wrap_instance_event(prefix='compute')
index daffcf199fac363ff2b3477614241a3b114e2770..7db8964f423ffe9ab96c5b1b2940acc3f54df247 100644 (file)
@@ -5961,6 +5961,49 @@ class ComputeTestCase(BaseTestCase):
         mock_post.assert_called_once_with(c, instance, False, dest)
         mock_clear.assert_called_once_with(mock.ANY)
 
+    @mock.patch.object(compute_rpcapi.ComputeAPI, 'pre_live_migration')
+    @mock.patch.object(compute_rpcapi.ComputeAPI,
+                       'post_live_migration_at_destination')
+    @mock.patch.object(compute_manager.InstanceEvents,
+                       'clear_events_for_instance')
+    @mock.patch.object(compute_utils, 'EventReporter')
+    @mock.patch('nova.objects.Migration.save')
+    def test_live_migration_handles_errors_correctly(self, mock_save,
+            mock_event, mock_clear, mock_post, mock_pre):
+        # Confirm live_migration() works as expected correctly.
+        # creating instance testdata
+        c = context.get_admin_context()
+        instance = self._create_fake_instance_obj(context=c)
+        instance.host = self.compute.host
+        dest = 'desthost'
+
+        migrate_data = migrate_data_obj.LibvirtLiveMigrateData(
+            is_shared_instance_path=False,
+            is_shared_block_storage=False)
+        mock_pre.return_value = migrate_data
+
+        # start test
+        migration = objects.Migration()
+        with mock.patch.object(self.compute.driver,
+                               'cleanup') as mock_cleanup:
+            mock_cleanup.side_effect = test.TestingException
+
+            self.assertRaises(test.TestingException,
+                self.compute.live_migration,
+                c, dest, instance, False, migration, migrate_data)
+
+        # ensure we have updated the instance and migration objects
+        self.assertEqual(vm_states.ERROR, instance.vm_state)
+        self.assertIsNone(instance.task_state)
+        self.assertEqual("error", migration.status)
+
+        mock_pre.assert_called_once_with(c, instance, False, None,
+                                         dest, migrate_data)
+        self.assertEqual(0, mock_clear.call_count)
+
+        # cleanup
+        instance.destroy()
+
     @mock.patch.object(fake.FakeDriver, 'unfilter_instance')
     @mock.patch.object(compute_rpcapi.ComputeAPI,
                        'post_live_migration_at_destination')