From ac14957fbb91c22b7dca12c7a12d1180051b60c0 Mon Sep 17 00:00:00 2001 From: Greg Ward Date: Tue, 28 Jun 2016 17:46:21 -0400 Subject: [PATCH 1/5] Always detect when a connection is closed behind psycopg2's back. There's a race condition that only seems to happen over Unix-domain sockets. Sometimes, the closed socket is reported by the kernel to libpq like this (captured with strace): sendto(3, "Q\0\0\0\34select pg_backend_pid()\0", 29, MSG_NOSIGNAL, NULL, 0) = 29 recvfrom(3, "E\0\0\0mSFATAL\0C57P01\0Mterminating "..., 16384, 0, NULL, NULL) = 110 recvfrom(3, 0x12d0330, 16384, 0, 0, 0) = -1 ECONNRESET (Connection reset by peer) That is, psycopg2/libpq sees no error when sending the first query after the connection is closed, but gets an error reading the result. In that case, everything worked fine. But sometimes, the error manifests like this: sendto(3, "Q\0\0\0\34select pg_backend_pid()\0", 29, MSG_NOSIGNAL, NULL, 0) = -1 EPIPE (Broken pipe) recvfrom(3, "E\0\0\0mSFATAL\0C57P01\0Mterminating "..., 16384, 0, NULL, NULL) = 110 recvfrom(3, "", 16274, 0, NULL, NULL) = 0 recvfrom(3, "", 16274, 0, NULL, NULL) = 0 i.e. libpq received an error when sending the query. This manifests as a slightly different exception from a slightly different place. More importantly, in this case connection.closed is left at 0 rather than being set to 2, and that is the bug I'm fixing here. Note that we see almost identical behaviour for sync and async connections, and the fixes are the same. So I added extremely similar test cases. Finally, there is still a bug here: for async connections, we sometimes raise DatabaseError (incorrect) and sometimes raise OperationalError (correct). Will fix that next. --- psycopg/pqpath.c | 6 ++++++ tests/test_cursor.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 99dd40be..0a0de560 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -934,6 +934,9 @@ pq_execute(cursorObject *curs, const char *query, int async, int no_result, int /* dont let pgres = NULL go to pq_fetch() */ if (curs->pgres == NULL) { + if (CONNECTION_BAD == PQstatus(curs->conn->pgconn)) { + curs->conn->closed = 2; + } pthread_mutex_unlock(&(curs->conn->lock)); Py_BLOCK_THREADS; if (!PyErr_Occurred()) { @@ -961,6 +964,9 @@ pq_execute(cursorObject *curs, const char *query, int async, int no_result, int CLEARPGRES(curs->pgres); if (PQsendQuery(curs->conn->pgconn, query) == 0) { + if (CONNECTION_BAD == PQstatus(curs->conn->pgconn)) { + curs->conn->closed = 2; + } pthread_mutex_unlock(&(curs->conn->lock)); Py_BLOCK_THREADS; PyErr_SetString(OperationalError, diff --git a/tests/test_cursor.py b/tests/test_cursor.py index 970cc37d..69430e08 100755 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -26,6 +26,7 @@ import time import pickle import psycopg2 import psycopg2.extensions +import psycopg2.extras from psycopg2.extensions import b from testutils import unittest, ConnectingTestCase, skip_before_postgres from testutils import skip_if_no_namedtuple, skip_if_no_getrefcount @@ -490,6 +491,49 @@ class CursorTests(ConnectingTestCase): cur = self.conn.cursor() self.assertRaises(TypeError, cur.callproc, 'lower', 42) + def test_external_close_sync(self): + # If a "victim" connection is closed by a "control" connection + # behind psycopg2's back, psycopg2 always handles it correctly: + # raise OperationalError, set conn.closed to 2. This reproduces + # issue #443, a race between control_conn closing victim_conn and + # psycopg2 noticing. + control_conn = self.conn + connect_func = self.connect + wait_func = lambda conn: None + self._test_external_close(control_conn, connect_func, wait_func) + + def test_external_close_async(self): + # Issue #443 is in the async code too. Since the fix is duplicated, + # so is the test. + control_conn = self.conn + connect_func = lambda: self.connect(async=True) + wait_func = psycopg2.extras.wait_select + self._test_external_close(control_conn, connect_func, wait_func) + + def _test_external_close(self, control_conn, connect_func, wait_func): + # The short sleep before using victim_conn the second time makes it + # much more likely to lose the race and see the bug. Repeating the + # test several times makes it even more likely. + for i in range(10): + victim_conn = connect_func() + wait_func(victim_conn) + + with victim_conn.cursor() as cur: + cur.execute('select pg_backend_pid()') + wait_func(victim_conn) + pid1 = cur.fetchall()[0][0] + + with control_conn.cursor() as cur: + cur.execute('select pg_terminate_backend(%s)', (pid1,)) + + time.sleep(0.001) + with self.assertRaises(psycopg2.DatabaseError): + with victim_conn.cursor() as cur: + cur.execute('select 1') + wait_func(victim_conn) + + self.assertEqual(victim_conn.closed, 2) + def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) From 53c1607644f45ddde5cf1353c95d60a09b829b74 Mon Sep 17 00:00:00 2001 From: Greg Ward Date: Tue, 28 Jun 2016 18:03:05 -0400 Subject: [PATCH 2/5] Always raise OperationalError when connection was closed externally. From the DB-API (https://www.python.org/dev/peps/pep-0249/): OperationalError Exception raised for errors that are related to the database's operation and not necessarily under the control of the programmer, e.g. an unexpected disconnect occurs, [...] Additionally, psycopg2 was inconsistent, at least in the async case: depending on how the "connection closed" error was reported from the kernel to libpq, it would sometimes raise OperationalError and sometimes DatabaseError. Now it always raises OperationalError. --- psycopg/pqpath.c | 10 ++++++---- tests/test_cursor.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 0a0de560..e3c14901 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -167,8 +167,10 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) /* if the connection has somehow beed broken, we mark the connection object as closed but requiring cleanup */ - if (conn->pgconn != NULL && PQstatus(conn->pgconn) == CONNECTION_BAD) + if (conn->pgconn != NULL && PQstatus(conn->pgconn) == CONNECTION_BAD) { conn->closed = 2; + exc = OperationalError; + } if (pgres == NULL && curs != NULL) pgres = &curs->pgres; @@ -202,9 +204,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) if (code != NULL) { exc = exception_from_sqlstate(code); } - else { - /* Fallback if there is no exception code (reported happening e.g. - * when the connection is closed). */ + else if (exc == NULL) { + /* Fallback if there is no exception code (unless we already + determined that the connection was closed). */ exc = DatabaseError; } diff --git a/tests/test_cursor.py b/tests/test_cursor.py index 69430e08..60f74ba5 100755 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -527,7 +527,7 @@ class CursorTests(ConnectingTestCase): cur.execute('select pg_terminate_backend(%s)', (pid1,)) time.sleep(0.001) - with self.assertRaises(psycopg2.DatabaseError): + with self.assertRaises(psycopg2.OperationalError): with victim_conn.cursor() as cur: cur.execute('select 1') wait_func(victim_conn) From 9d4e3a06b753581eaddd84665ec9194a26b53537 Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Mon, 4 Jul 2016 21:49:45 +0100 Subject: [PATCH 3/5] Skip killing test on brontosaur servers --- tests/test_cursor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_cursor.py b/tests/test_cursor.py index 60f74ba5..b2385a28 100755 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -491,6 +491,7 @@ class CursorTests(ConnectingTestCase): cur = self.conn.cursor() self.assertRaises(TypeError, cur.callproc, 'lower', 42) + @skip_before_postgres(8, 4) def test_external_close_sync(self): # If a "victim" connection is closed by a "control" connection # behind psycopg2's back, psycopg2 always handles it correctly: @@ -502,6 +503,7 @@ class CursorTests(ConnectingTestCase): wait_func = lambda conn: None self._test_external_close(control_conn, connect_func, wait_func) + @skip_before_postgres(8, 4) def test_external_close_async(self): # Issue #443 is in the async code too. Since the fix is duplicated, # so is the test. From f4b57822b081d49e506ea22743391207709704d8 Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Mon, 4 Jul 2016 21:58:25 +0100 Subject: [PATCH 4/5] Fixed tests on Py 2.5 --- tests/test_cursor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_cursor.py b/tests/test_cursor.py index b2385a28..1f2c94c2 100755 --- a/tests/test_cursor.py +++ b/tests/test_cursor.py @@ -22,6 +22,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public # License for more details. +from __future__ import with_statement + import time import pickle import psycopg2 @@ -528,12 +530,13 @@ class CursorTests(ConnectingTestCase): with control_conn.cursor() as cur: cur.execute('select pg_terminate_backend(%s)', (pid1,)) - time.sleep(0.001) - with self.assertRaises(psycopg2.OperationalError): + def f(): with victim_conn.cursor() as cur: cur.execute('select 1') wait_func(victim_conn) + time.sleep(0.001) + self.assertRaises(psycopg2.OperationalError, f) self.assertEqual(victim_conn.closed, 2) From 9886a7210eb007495ca589ac781bc1177af2ac04 Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Mon, 4 Jul 2016 20:49:21 +0100 Subject: [PATCH 5/5] Connection state fixed noted in the news --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index c73d9963..e93c23d7 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,8 @@ Current release What's new in psycopg 2.6.2 ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Fixed inconsistent state in externally closed connections + (:tickets:`#263, #311, #443`). - Report the server response status on errors (such as :ticket:`#281`). - The `~psycopg2.extras.wait_select` callback allows interrupting a long-running query in an interactive shell using :kbd:`Ctrl-C`