From 66c543b16c5d26f1a48fa71664b6fa32712b082d Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Sat, 26 Mar 2011 10:59:27 +0000 Subject: [PATCH] Parse bytea output format ourselves instead of using the libpq PG 9.0 uses the hex format by default, and clients < 9.0 can't parse that format, requiring client update and great care in what is linked at runtime, and generally giving headache to users and transitively us. --- psycopg/typecast_binary.c | 229 ++++++++++++++++++++++++++------------ tests/testutils.py | 18 --- tests/types_basic.py | 10 +- 3 files changed, 160 insertions(+), 97 deletions(-) diff --git a/psycopg/typecast_binary.c b/psycopg/typecast_binary.c index fa371e2e..62b10829 100644 --- a/psycopg/typecast_binary.c +++ b/psycopg/typecast_binary.c @@ -40,7 +40,7 @@ chunk_dealloc(chunkObject *self) FORMAT_CODE_PY_SSIZE_T, self->base, self->len ); - PQfreemem(self->base); + PyMem_Free(self->base); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -127,95 +127,184 @@ PyTypeObject chunkType = { chunk_doc /* tp_doc */ }; + +static char *psycopg_parse_hex( + const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout); +static char *psycopg_parse_escape( + const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout); + static PyObject * typecast_BINARY_cast(const char *s, Py_ssize_t l, PyObject *curs) { chunkObject *chunk = NULL; PyObject *res = NULL; - char *str = NULL, *buffer = NULL; - size_t len; + char *buffer = NULL; + Py_ssize_t len; if (s == NULL) {Py_INCREF(Py_None); return Py_None;} - /* PQunescapeBytea absolutely wants a 0-terminated string and we don't - want to copy the whole buffer, right? Wrong, but there isn't any other - way */ - if (s[l] != '\0') { - if ((buffer = PyMem_Malloc(l+1)) == NULL) { - PyErr_NoMemory(); - goto fail; + if (s[0] == '\\' && s[1] == 'x') { + /* This is a buffer escaped in hex format: libpq before 9.0 can't + * parse it and we can't detect reliably the libpq version at runtime. + * So the only robust option is to parse it ourselves - luckily it's + * an easy format. + */ + if (NULL == (buffer = psycopg_parse_hex(s, l, &len))) { + goto exit; } - /* Py_ssize_t->size_t cast is safe, as long as the Py_ssize_t is - * >= 0: */ - assert (l >= 0); - strncpy(buffer, s, (size_t) l); - - buffer[l] = '\0'; - s = buffer; } - str = (char*)PQunescapeBytea((unsigned char*)s, &len); - Dprintf("typecast_BINARY_cast: unescaped " FORMAT_CODE_SIZE_T " bytes", - len); - - /* The type of the second parameter to PQunescapeBytea is size_t *, so it's - * possible (especially with Python < 2.5) to get a return value too large - * to fit into a Python container. */ - if (len > (size_t) PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_IndexError, "PG buffer too large to fit in Python" - " buffer."); - goto fail; - } - - /* Check the escaping was successful */ - if (s[0] == '\\' && s[1] == 'x' /* input encoded in hex format */ - && str[0] == 'x' /* output resulted in an 'x' */ - && s[2] != '7' && s[3] != '8') /* input wasn't really an x (0x78) */ - { - PyErr_SetString(InterfaceError, - "can't receive bytea data from server >= 9.0 with the current " - "libpq client library: please update the libpq to at least 9.0 " - "or set bytea_output to 'escape' in the server config " - "or with a query"); - goto fail; + else { + /* This is a buffer in the classic bytea format. So we can handle it + * to the PQunescapeBytea to have it parsed, rignt? ...Wrong. We + * could, but then we'd have to record whether buffer was allocated by + * Python or by the libpq to dispose it properly. Furthermore the + * PQunescapeBytea interface is not the most brilliant as it wants a + * null-terminated string even if we have known its length thus + * requiring a useless memcpy and strlen. + * So we'll just have our better integrated parser, let's finish this + * story. + */ + if (NULL == (buffer = psycopg_parse_escape(s, l, &len))) { + goto exit; + } } chunk = (chunkObject *) PyObject_New(chunkObject, &chunkType); - if (chunk == NULL) goto fail; + if (chunk == NULL) goto exit; - /* **Transfer** ownership of str's memory to the chunkObject: */ - chunk->base = str; - str = NULL; + /* **Transfer** ownership of buffer's memory to the chunkObject: */ + chunk->base = buffer; + buffer = NULL; + chunk->len = (Py_ssize_t)len; - /* size_t->Py_ssize_t cast was validated above: */ - chunk->len = (Py_ssize_t) len; #if PY_MAJOR_VERSION < 3 if ((res = PyBuffer_FromObject((PyObject *)chunk, 0, chunk->len)) == NULL) - goto fail; + goto exit; #else if ((res = PyMemoryView_FromObject((PyObject*)chunk)) == NULL) - goto fail; + goto exit; #endif - /* PyBuffer_FromObject() created a new reference. We'll release our - * reference held in 'chunk' in the 'cleanup' clause. */ - goto cleanup; - fail: - assert (PyErr_Occurred()); - if (res != NULL) { - Py_DECREF(res); - res = NULL; - } - /* Fall through to cleanup: */ - cleanup: - if (chunk != NULL) { - Py_DECREF((PyObject *) chunk); - } - if (str != NULL) { - /* str's mem was allocated by PQunescapeBytea; must use PQfreemem: */ - PQfreemem(str); - } - /* We allocated buffer with PyMem_Malloc; must use PyMem_Free: */ - PyMem_Free(buffer); +exit: + Py_XDECREF((PyObject *)chunk); + PyMem_Free(buffer); - return res; + return res; } + + +static const char hex_lut[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +/* Parse a bytea output buffer encoded in 'hex' format. + * + * the format is described in + * http://www.postgresql.org/docs/9.0/static/datatype-binary.html + * + * Parse the buffer in 'bufin', whose length is 'sizein'. + * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size. + * In case of error set an exception and return NULL. + */ +static char * +psycopg_parse_hex(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout) +{ + char *ret = NULL; + const char *bufend = bufin + sizein; + const char *pi = bufin + 2; /* past the \x */ + char *bufout; + char *po; + + po = bufout = PyMem_Malloc((sizein - 2) >> 1); /* output size upper bound */ + if (NULL == bufout) { + PyErr_NoMemory(); + goto exit; + } + + /* Implementation note: we call this function upon database response, not + * user input (because we are parsing the output format of a buffer) so we + * don't expect errors. On bad input we reserve the right to return a bad + * output, not an error. + */ + while (pi < bufend) { + char c; + while (-1 == (c = hex_lut[*pi++ & '\x7f'])) { + if (pi >= bufend) { goto endloop; } + } + *po = c << 4; + + while (-1 == (c = hex_lut[*pi++ & '\x7f'])) { + if (pi >= bufend) { goto endloop; } + } + *po++ |= c; + } +endloop: + + ret = bufout; + *sizeout = po - bufout; + +exit: + return ret; +} + +/* Parse a bytea output buffer encoded in 'escape' format. + * + * the format is described in + * http://www.postgresql.org/docs/9.0/static/datatype-binary.html + * + * Parse the buffer in 'bufin', whose length is 'sizein'. + * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size. + * In case of error set an exception and return NULL. + */ +static char * +psycopg_parse_escape(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout) +{ + char *ret = NULL; + const char *bufend = bufin + sizein; + const char *pi = bufin; + char *bufout; + char *po; + + po = bufout = PyMem_Malloc(sizein); /* output size upper bound */ + if (NULL == bufout) { + PyErr_NoMemory(); + goto exit; + } + + while (pi < bufend) { + if (*pi != '\\') { + /* Unescaped char */ + *po++ = *pi++; + continue; + } + if ((pi[1] >= '0' && pi[1] <= '3') && + (pi[2] >= '0' && pi[2] <= '7') && + (pi[3] >= '0' && pi[3] <= '7')) + { + /* Escaped octal value */ + *po++ = ((pi[1] - '0') << 6) | + ((pi[2] - '0') << 3) | + ((pi[3] - '0')); + pi += 4; + } + else { + /* Escaped char */ + *po++ = pi[1]; + pi += 2; + } + } + + ret = bufout; + *sizeout = po - bufout; + +exit: + return ret; +} + diff --git a/tests/testutils.py b/tests/testutils.py index 2459894f..26551d4e 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -140,24 +140,6 @@ def skip_if_no_namedtuple(f): return skip_if_no_namedtuple_ -def skip_if_broken_hex_binary(f): - """Decorator to detect libpq < 9.0 unable to parse bytea in hex format""" - def cope_with_hex_binary_(self): - from psycopg2 import InterfaceError - try: - return f(self) - except InterfaceError, e: - if '9.0' in str(e) and self.conn.server_version >= 90000: - return self.skipTest( - # FIXME: we are only assuming the libpq is older here, - # but we don't have a reliable way to detect the libpq - # version, not pre-9 at least. - "bytea broken with server >= 9.0, libpq < 9") - else: - raise - - return cope_with_hex_binary_ - def skip_if_no_iobase(f): """Skip a test if io.TextIOBase is not available.""" def skip_if_no_iobase_(self): diff --git a/tests/types_basic.py b/tests/types_basic.py index 40106310..83d526ff 100755 --- a/tests/types_basic.py +++ b/tests/types_basic.py @@ -28,7 +28,7 @@ except: pass import sys import testutils -from testutils import unittest, skip_if_broken_hex_binary +from testutils import unittest from testconfig import dsn import psycopg2 @@ -116,7 +116,6 @@ class TypesBasicTests(unittest.TestCase): s = self.execute("SELECT %s AS foo", (float("-inf"),)) self.failUnless(str(s) == "-inf", "wrong float quoting: " + str(s)) - @skip_if_broken_hex_binary def testBinary(self): if sys.version_info[0] < 3: s = ''.join([chr(x) for x in range(256)]) @@ -143,7 +142,6 @@ class TypesBasicTests(unittest.TestCase): b = psycopg2.Binary(bytes([])) self.assertEqual(str(b), "''::bytea") - @skip_if_broken_hex_binary def testBinaryRoundTrip(self): # test to make sure buffers returned by psycopg2 are # understood by execute: @@ -191,7 +189,6 @@ class TypesBasicTests(unittest.TestCase): s = self.execute("SELECT '{}'::text AS foo") self.failUnlessEqual(s, "{}") - @skip_if_broken_hex_binary @testutils.skip_from_python(3) def testTypeRoundtripBuffer(self): o1 = buffer("".join(map(chr, range(256)))) @@ -204,7 +201,6 @@ class TypesBasicTests(unittest.TestCase): self.assertEqual(type(o1), type(o2)) self.assertEqual(str(o1), str(o2)) - @skip_if_broken_hex_binary @testutils.skip_from_python(3) def testTypeRoundtripBufferArray(self): o1 = buffer("".join(map(chr, range(256)))) @@ -213,7 +209,6 @@ class TypesBasicTests(unittest.TestCase): self.assertEqual(type(o1[0]), type(o2[0])) self.assertEqual(str(o1[0]), str(o2[0])) - @skip_if_broken_hex_binary @testutils.skip_before_python(3) def testTypeRoundtripBytes(self): o1 = bytes(range(256)) @@ -225,7 +220,6 @@ class TypesBasicTests(unittest.TestCase): o2 = self.execute("select %s;", (o1,)) self.assertEqual(memoryview, type(o2)) - @skip_if_broken_hex_binary @testutils.skip_before_python(3) def testTypeRoundtripBytesArray(self): o1 = bytes(range(256)) @@ -233,7 +227,6 @@ class TypesBasicTests(unittest.TestCase): o2 = self.execute("select %s;", (o1,)) self.assertEqual(memoryview, type(o2[0])) - @skip_if_broken_hex_binary @testutils.skip_before_python(2, 6) def testAdaptBytearray(self): o1 = bytearray(range(256)) @@ -258,7 +251,6 @@ class TypesBasicTests(unittest.TestCase): else: self.assertEqual(memoryview, type(o2)) - @skip_if_broken_hex_binary @testutils.skip_before_python(2, 7) def testAdaptMemoryview(self): o1 = memoryview(bytearray(range(256)))