From 2b069b6cf82b2118bf100d89ef69be872be5d3fa Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Tue, 15 Dec 2015 15:14:34 +0000 Subject: [PATCH] Database locking: Perl: Retry all deadlocks in PostgreSQL Previously we would retry all COMMITs but nothing else. This is correct for SQLite3 but not for PostgreSQL. We got away with it before because of the heavyweight locking of even long-running read-only transactions, but now the LOCK TABLEs can fail (at least in a mixed-version system, and perhaps even in a system with only new code). So: cover all of the database work in db_retry with the eval, and explicitly ask the JobDB adaptation layer (via a new need_retry method) whether to go around again. We tell the JobDB layer whether the problem was during commit, so that we can avoid making any overall semantic change to the interaction with SQLite3. In the PostgreSQL case, the db handle can be asked whether there was an error and what the error code was. Deadlock has its own error code. (One side effect here is that db_retry_retry, which sets $db_retry_stop='retry', is now no longer affected by the retry count in db_retry. But there are no callers and that may be more right anyway. db_retry_abort always exits the loop, as before.) I have tested this with the following rune: OSSTEST_CONFIG=/u/iwj/.xen-osstest/config:local-config.test-database_iwj perl -w -MData::Dumper -e 'use strict; use Osstest::Executive; use Osstest; csreadconfig(); print Dumper($dbh_tests->{AutoCommit}); eval { $dbh_tests->do("BOGUS"); }; db_begin_work($dbh_tests, [qw(flights resources)])' adding a sleep(2) to the loop Osstest::JobDB::Executive::begin_work, and running a second copy of the rune with the tables to lock in the other order. Acked-by: Ian Campbell Signed-off-by: Ian Jackson --- v2: Mention db_retry_retry in commit message. --- Osstest.pm | 34 +++++++++++++++++++++------------- Osstest/JobDB/Executive.pm | 7 +++++++ Osstest/JobDB/Standalone.pm | 5 +++++ 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/Osstest.pm b/Osstest.pm index d4ddda7..a39ae42 100644 --- a/Osstest.pm +++ b/Osstest.pm @@ -288,20 +288,28 @@ sub db_retry ($$$;$$) { for (;;) { $pre->(); - db_begin_work($dbh, $tables); - if (defined $fl) { - die unless $dbh eq $dbh_tests; - $mjobdb->dbfl_check($fl,$flok); - } - $db_retry_stop= 0; - $r= &$body; - if ($db_retry_stop) { - $dbh->rollback(); - last if $db_retry_stop eq 'abort'; - } else { - last if eval { $dbh->commit(); 1; }; - } + my $committing = 0; + eval { + db_begin_work($dbh, $tables); + if (defined $fl) { + die unless $dbh eq $dbh_tests; + $mjobdb->dbfl_check($fl,$flok); + } + $db_retry_stop= 0; + $r= &$body; + if ($db_retry_stop) { + $dbh->rollback(); + last if $db_retry_stop eq 'abort'; + next; + } + $committing = 1; + $dbh->commit(); + }; + last if !length $@; + die $@ unless $mjobdb->need_retry($dbh, $committing); die "$dbh $body $@ ?" unless $retries-- > 0; + eval { $dbh->rollback(); }; + print STDERR "DB conflict (messages above may refer); retrying...\n"; sleep(1); } return $r; diff --git a/Osstest/JobDB/Executive.pm b/Osstest/JobDB/Executive.pm index 124e7c0..6fb77a4 100644 --- a/Osstest/JobDB/Executive.pm +++ b/Osstest/JobDB/Executive.pm @@ -47,6 +47,13 @@ sub begin_work ($$$) { #method } } +sub need_retry ($$$) { + my ($jd, $dbh,$committing) = @_; + return + $dbh_tests->err()==7 && + ($dbh_tests->state =~ m/^40P01/); # DEADLOCK DETECTED +} + sub current_flight ($) { #method return $ENV{'OSSTEST_FLIGHT'}; } diff --git a/Osstest/JobDB/Standalone.pm b/Osstest/JobDB/Standalone.pm index 431ba5a..98d0173 100644 --- a/Osstest/JobDB/Standalone.pm +++ b/Osstest/JobDB/Standalone.pm @@ -41,6 +41,11 @@ augmentconfigdefaults( sub new { return bless {}, $_[0]; }; sub begin_work { } +sub need_retry ($$$) { + my ($jd, $dbh,$committing) = @_; + return $committing; +} + sub dbfl_check { } sub open ($) { -- 2.39.5