]> xenbits.xensource.com Git - osstest/openstack-nova.git/commitdiff
Cache database and message queue connection objects
authormelanie witt <melwittt@gmail.com>
Tue, 16 May 2017 10:25:42 +0000 (10:25 +0000)
committermelanie witt <melwittt@gmail.com>
Fri, 19 May 2017 21:54:25 +0000 (21:54 +0000)
Recently in the gate we have seen a trace on some work-in-progress
patches:

  OperationalError: (pymysql.err.OperationalError)
                    (1040, u'Too many connections')

and at least one operator has reported that the number of database
connections increased significantly going from Mitaka to Newton.

It was suspected that the increase was caused by creating new oslo.db
transaction context managers on-the-fly when switching database
connections for cells. Comparing the dstat --tcp output of runs of the
gate-tempest-dsvm-neutron-full-ubuntu-xenial job with and without
caching of the database connections showed a difference of 445 active
TCP connections and 1495 active TCP connections, respectively [1].

This adds caching of the oslo.db transaction context managers and the
oslo.messaging transports to avoid creating a large number of objects
that are not being garbage-collected as expected.

Closes-Bug: #1691545

[1] https://docs.google.com/spreadsheets/d/1DIfFfX3kaA_SRoCM-aO7BN4IBEShChXLztOBFeKryt4/edit?usp=sharing

 Conflicts:
nova/context.py
nova/tests/unit/test_context.py

NOTE(melwitt): Conflicts caused by the fact that the set_target_cell
function doesn't exist in Ocata and message queue connections were
not stored on the context in Ocata.

Change-Id: I17e0eb836dd87aac5859f506e7d771d42753d31a
(cherry picked from commit 47fa88d94754fcdad6bb132b45196b4d44c0f4cd)

nova/context.py
nova/test.py
nova/tests/unit/test_context.py
releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml [new file with mode: 0644]

index 02549f31b13bd4bbdd06fb44ba9508cbc0f93f8d..cbd50629b43a87977972e482370fe42d08a2f619 100644 (file)
@@ -34,6 +34,10 @@ from nova import policy
 from nova import utils
 
 LOG = logging.getLogger(__name__)
+# TODO(melwitt): This cache should be cleared whenever WSGIService receives a
+# SIGHUP and periodically based on an expiration time. Currently, none of the
+# cell caches are purged, so neither is this one, for now.
+CELL_CACHE = {}
 
 
 class _ContextAuthPlugin(plugin.BaseAuthPlugin):
@@ -366,11 +370,27 @@ def target_cell(context, cell_mapping):
     :param context: The RequestContext to add connection information
     :param cell_mapping: A objects.CellMapping object
     """
+    global CELL_CACHE
+
     original_db_connection = context.db_connection
     # avoid circular import
     from nova import db
-    db_connection_string = cell_mapping.database_connection
-    context.db_connection = db.create_context_manager(db_connection_string)
+
+    # Synchronize access to the cache by multiple API workers.
+    @utils.synchronized(cell_mapping.uuid)
+    def get_or_set_cached_cell_and_set_connections():
+        try:
+            cell_db_conn = CELL_CACHE[cell_mapping.uuid]
+        except KeyError:
+            db_connection_string = cell_mapping.database_connection
+            context.db_connection = db.create_context_manager(
+                db_connection_string)
+            CELL_CACHE[cell_mapping.uuid] = context.db_connection
+        else:
+            context.db_connection = cell_db_conn
+
+    get_or_set_cached_cell_and_set_connections()
+
     try:
         yield context
     finally:
index af5e4131bb745d95a8ef17c698211e18db9055bd..e8813aa35a720fc985fbaa62474b98640e3f557a 100644 (file)
@@ -233,6 +233,7 @@ class TestCase(testtools.TestCase):
         # NOTE(danms): Reset the cached list of cells
         from nova.compute import api
         api.CELLS = []
+        context.CELL_CACHE = {}
 
         self.cell_mappings = {}
         self.host_mappings = {}
index 33d72f7d7ce32d17749f6c35e413af7cf2b5a0a4..95e5d8372d9f06227069689a729cc42ff2a476b5 100644 (file)
@@ -20,6 +20,7 @@ from nova import context
 from nova import exception
 from nova import objects
 from nova import test
+from nova.tests import uuidsentinel as uuids
 
 
 class ContextTestCase(test.NoDBTestCase):
@@ -298,7 +299,9 @@ class ContextTestCase(test.NoDBTestCase):
                                       roles=['admin', 'weasel'])
         # Verify the existing db_connection, if any, is restored
         ctxt.db_connection = mock.sentinel.db_conn
-        mapping = objects.CellMapping(database_connection='fake://')
+        mapping = objects.CellMapping(database_connection='fake://',
+                                      transport_url='fake://',
+                                      uuid=uuids.cell)
         with context.target_cell(ctxt, mapping):
             self.assertEqual(ctxt.db_connection, mock.sentinel.cm)
         self.assertEqual(mock.sentinel.db_conn, ctxt.db_connection)
@@ -308,3 +311,20 @@ class ContextTestCase(test.NoDBTestCase):
         self.assertIsNone(ctxt.user_id)
         self.assertIsNone(ctxt.project_id)
         self.assertFalse(ctxt.is_admin)
+
+    @mock.patch('nova.db.create_context_manager')
+    def test_target_cell_caching(self, mock_create_cm):
+        mock_create_cm.return_value = mock.sentinel.db_conn_obj
+        ctxt = context.get_context()
+        mapping = objects.CellMapping(database_connection='fake://db',
+                                      transport_url='fake://mq',
+                                      uuid=uuids.cell)
+        # First call should create new connection objects.
+        with context.target_cell(ctxt, mapping):
+            self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
+        mock_create_cm.assert_called_once_with('fake://db')
+        # Second call should use cached objects.
+        mock_create_cm.reset_mock()
+        with context.target_cell(ctxt, mapping):
+            self.assertEqual(mock.sentinel.db_conn_obj, ctxt.db_connection)
+        mock_create_cm.assert_not_called()
diff --git a/releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml b/releasenotes/notes/bug-1691545-1acd6512effbdffb.yaml
new file mode 100644 (file)
index 0000000..e4a0bf4
--- /dev/null
@@ -0,0 +1,10 @@
+---
+fixes:
+  - |
+    Fixes `bug 1691545`_ in which there was a significant increase in database
+    connections because of the way connections to cell databases were being
+    established. With this fix, objects related to database connections are
+    cached in the API service and reused to prevent new connections being
+    established for every communication with cell databases.
+
+    .. _bug 1691545: https://bugs.launchpad.net/nova/+bug/1691545