From 75cb5d75d78b985268a7abba89313ef174583bd1 Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Sun, 11 Nov 2007 08:53:44 +0000 Subject: [PATCH] Use escape string syntax for string escape if connected to a server requiring it. Added a connection flag to store whether E''-style quoting is required: this avoids repeated PQparameterStatus() calls. Added a test case to verify correct behavior on strings, unicode and binary data. Tested with PG versions from 7.4 to 8.3b2, with any server 'standard_conforming_strings' setting and with 'PSYCOPG_OWN_QUOTING' too. --- ChangeLog | 5 +++ psycopg/adapter_binary.c | 31 ++-------------- psycopg/adapter_qstring.c | 13 +++++-- psycopg/connection.h | 2 + psycopg/connection_int.c | 31 +++++++++++++++- tests/test_quote.py | 77 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 127 insertions(+), 32 deletions(-) create mode 100644 tests/test_quote.py diff --git a/ChangeLog b/ChangeLog index d445b936..51ae1d6e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2007-11-11 Daniele Varrazzo + + * Use escape string syntax for string escape if connected to a + server requiring it. + 2007-11-09 Daniele Varrazzo * Use escape string syntax for binary escape if connected to a diff --git a/psycopg/adapter_binary.c b/psycopg/adapter_binary.c index eaea99ad..df193888 100644 --- a/psycopg/adapter_binary.c +++ b/psycopg/adapter_binary.c @@ -137,46 +137,23 @@ binary_quote(binaryObject *self) const char *buffer; Py_ssize_t buffer_len; size_t len = 0; - PGconn *pgconn = NULL; - const char *quotes = "'%s'"; - const char *scs; /* if we got a plain string or a buffer we escape it and save the buffer */ if (PyString_Check(self->wrapped) || PyBuffer_Check(self->wrapped)) { /* escape and build quoted buffer */ PyObject_AsCharBuffer(self->wrapped, &buffer, &buffer_len); - if (self->conn) { - pgconn = ((connectionObject*)self->conn)->pgconn; - - /* - * The presence of the 'standard_conforming_strings' parameters - * means that the server _accepts_ the E'' quote. - * - * If the paramer is off, the PQescapeByteaConn returns - * backslash escaped strings (e.g. '\001' -> "\\001"), - * so the E'' quotes are required to avoid warnings - * if 'escape_string_warning' is set. - * - * If the parameter is on, the PQescapeByteaConn returns - * not escaped strings (e.g. '\001' -> "\001"), relying on the - * fact that the '\' will pass untouched the string parser. - * In this case the E'' quotes are NOT to be used. - */ - scs = PQparameterStatus(pgconn, "standard_conforming_strings"); - if (scs && (0 == strcmp("off", scs))) - quotes = "E'%s'"; - } - to = (char *)binary_escape((unsigned char*)buffer, (size_t) buffer_len, - &len, pgconn); + &len, self->conn ? ((connectionObject*)self->conn)->pgconn : NULL); if (to == NULL) { PyErr_NoMemory(); return NULL; } if (len > 0) - self->buffer = PyString_FromFormat(quotes, to); + self->buffer = PyString_FromFormat( + (self->conn && ((connectionObject*)self->conn)->equote) + ? "E'%s'" : "'%s'" , to); else self->buffer = PyString_FromString("''"); diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c index ac2c9aa1..ae95cb63 100644 --- a/psycopg/adapter_qstring.c +++ b/psycopg/adapter_qstring.c @@ -94,6 +94,7 @@ qstring_quote(qstringObject *self) PyObject *str; char *s, *buffer; Py_ssize_t len; + int equote; /* buffer offset if E'' quotes are needed */ /* if the wrapped object is an unicode object we can encode it to match self->encoding but if the encoding is not specified we don't know what @@ -141,20 +142,22 @@ qstring_quote(qstringObject *self) /* encode the string into buffer */ PyString_AsStringAndSize(str, &s, &len); - buffer = (char *)PyMem_Malloc((len*2+3) * sizeof(char)); + buffer = (char *)PyMem_Malloc((len*2+4) * sizeof(char)); if (buffer == NULL) { Py_DECREF(str); PyErr_NoMemory(); return NULL; } + equote = (self->conn && ((connectionObject*)self->conn)->equote) ? 1 : 0; + { /* Call qstring_escape with the GIL released, then reacquire the GIL * before verifying that the results can fit into a Python string; raise * an exception if not. */ size_t qstring_res; Py_BEGIN_ALLOW_THREADS - qstring_res = qstring_escape(buffer+1, s, len, + qstring_res = qstring_escape(buffer+equote+1, s, len, self->conn ? ((connectionObject*)self->conn)->pgconn : NULL); Py_END_ALLOW_THREADS @@ -166,10 +169,12 @@ qstring_quote(qstringObject *self) return NULL; } len = (Py_ssize_t) qstring_res; - buffer[0] = '\'' ; buffer[len+1] = '\''; + if (equote) + buffer[0] = 'E'; + buffer[equote] = '\'' ; buffer[len+equote+1] = '\''; } - self->buffer = PyString_FromStringAndSize(buffer, len+2); + self->buffer = PyString_FromStringAndSize(buffer, len+equote+2); PyMem_Free(buffer); Py_DECREF(str); diff --git a/psycopg/connection.h b/psycopg/connection.h index 90dc1291..bc62381d 100644 --- a/psycopg/connection.h +++ b/psycopg/connection.h @@ -83,6 +83,8 @@ typedef struct { PyObject *string_types; /* a set of typecasters for string types */ PyObject *binary_types; /* a set of typecasters for binary types */ + int equote; /* use E''-style quotes for escaped strings */ + } connectionObject; /* C-callable functions in connection_int.c and connection_ext.c */ diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c index fb4018c3..1906e82c 100644 --- a/psycopg/connection_int.c +++ b/psycopg/connection_int.c @@ -54,7 +54,7 @@ conn_notice_callback(void *args, const char *message) } } -/* conn_connect - execute a connection to the dataabase */ +/* conn_connect - execute a connection to the database */ int conn_connect(connectionObject *self) @@ -62,6 +62,7 @@ conn_connect(connectionObject *self) PGconn *pgconn; PGresult *pgres; char *data, *tmp; + const char *scs; /* standard-conforming strings */ size_t i; /* we need the initial date style to be ISO, for typecasters; if the user @@ -97,6 +98,34 @@ conn_connect(connectionObject *self) PQsetNoticeProcessor(pgconn, conn_notice_callback, (void*)self); + /* + * The presence of the 'standard_conforming_strings' parameter + * means that the server _accepts_ the E'' quote. + * + * If the paramer is off, the PQescapeByteaConn returns + * backslash escaped strings (e.g. '\001' -> "\\001"), + * so the E'' quotes are required to avoid warnings + * if 'escape_string_warning' is set. + * + * If the parameter is on, the PQescapeByteaConn returns + * not escaped strings (e.g. '\001' -> "\001"), relying on the + * fact that the '\' will pass untouched the string parser. + * In this case the E'' quotes are NOT to be used. + * + * The PSYCOPG_OWN_QUOTING implementation always returns escaped strings. + */ + scs = PQparameterStatus(pgconn, "standard_conforming_strings"); + Dprintf("conn_connect: server standard_conforming_strings parameter: %s", + scs ? scs : "unavailable"); + +#ifndef PSYCOPG_OWN_QUOTING + self->equote = (scs && (0 == strcmp("off", scs))); +#else + self->equote = (scs != NULL); +#endif + Dprintf("conn_connect: server requires E'' quotes: %s", + self->equote ? "YES" : "NO"); + Py_BEGIN_ALLOW_THREADS; pgres = PQexec(pgconn, datestyle); Py_END_ALLOW_THREADS; diff --git a/tests/test_quote.py b/tests/test_quote.py new file mode 100644 index 00000000..60f1d6a8 --- /dev/null +++ b/tests/test_quote.py @@ -0,0 +1,77 @@ +import psycopg2 +import psycopg2.extensions +import unittest +import tests + +class QuotingTestCase(unittest.TestCase): + r"""Checks the correct quoting of strings and binary objects. + + Since ver. 8.1, PostgreSQL is moving towards SQL standard conforming + strings, where the backslash (\) is treated as literal character, + not as escape. To treat the backslash as a C-style escapes, PG supports + the E'' quotes. + + This test case checks that the E'' quotes are used whenever they are + needed. The tests are expected to pass with all PostgreSQL server versions + (currently tested with 7.4 <= PG <= 8.3beta) and with any + 'standard_conforming_strings' server parameter value. + The tests also check that no warning is raised ('escape_string_warning' + should be on). + + http://www.postgresql.org/docs/8.1/static/sql-syntax.html#SQL-SYNTAX-STRINGS + http://www.postgresql.org/docs/8.1/static/runtime-config-compatible.html + """ + def setUp(self): + self.conn = psycopg2.connect("dbname=%s" % tests.dbname) + + def tearDown(self): + self.conn.close() + + def test_string(self): + data = """some data with \t chars + to escape into, 'quotes' and \\ a backslash too. + """ + data += "".join(map(chr, range(1,127))) + + curs = self.conn.cursor() + curs.execute("SELECT %s;", (data,)) + res = curs.fetchone()[0] + + self.assertEqual(res, data) + self.assert_(not self.conn.notices) + + def test_binary(self): + data = """some data with \000\013 binary + stuff into, 'quotes' and \\ a backslash too. + """ + data += "".join(map(chr, range(256))) + + curs = self.conn.cursor() + curs.execute("SELECT %s::bytea;", (psycopg2.Binary(data),)) + res = str(curs.fetchone()[0]) + + self.assertEqual(res, data) + self.assert_(not self.conn.notices) + + def test_unicode(self): + data = u"""some data with \t chars + to escape into, 'quotes', \u20ac euro sign and \\ a backslash too. + """ + data += u"".join(map(unichr, [ u for u in range(1,65536) + if not 0xD800 <= u <= 0xDFFF ])) # surrogate area + self.conn.set_client_encoding('UNICODE') + + psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) + curs = self.conn.cursor() + curs.execute("SELECT %s::text;", (data,)) + res = curs.fetchone()[0] + + self.assertEqual(res, data) + self.assert_(not self.conn.notices) + +def test_suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == "__main__": + unittest.main() +