mirror of
https://github.com/psycopg/psycopg2.git
synced 2024-11-23 01:16:34 +03:00
Merge branch 'bytea-parser' into devel
This commit is contained in:
commit
90536a187d
2
NEWS
2
NEWS
|
@ -1,6 +1,8 @@
|
|||
What's new in psycopg 2.4.1
|
||||
---------------------------
|
||||
|
||||
- Use own parser for bytea output, not requiring anymore the libpq 9.0
|
||||
to parse the hex format.
|
||||
- Correctly detect an empty query sent to the backend (ticket #46).
|
||||
|
||||
|
||||
|
|
|
@ -97,7 +97,9 @@ Psycopg converts :sql:`decimal`\/\ :sql:`numeric` database types into Python `!D
|
|||
Transferring binary data from PostgreSQL 9.0 doesn't work.
|
||||
PostgreSQL 9.0 uses by default `the "hex" format`__ to transfer
|
||||
:sql:`bytea` data: the format can't be parsed by the libpq 8.4 and
|
||||
earlier. Three options to solve the problem are:
|
||||
earlier. The problem is solved in Psycopg 2.4.1, that uses its own parser
|
||||
for the :sql:`bytea` format. For previous Psycopg releases, three options
|
||||
to solve the problem are:
|
||||
|
||||
- set the bytea_output__ parameter to ``escape`` in the server;
|
||||
- execute the database command ``SET bytea_output TO escape;`` in the
|
||||
|
|
|
@ -271,6 +271,10 @@ the SQL string that would be sent to the database.
|
|||
.. versionchanged:: 2.4
|
||||
only strings were supported before.
|
||||
|
||||
.. versionchanged:: 2.4.1
|
||||
can parse the 'hex' format from 9.0 servers without relying on the
|
||||
version of the client library.
|
||||
|
||||
.. note::
|
||||
|
||||
In Python 2, if you have binary data in a `!str` object, you can pass them
|
||||
|
@ -282,17 +286,14 @@ the SQL string that would be sent to the database.
|
|||
|
||||
.. warning::
|
||||
|
||||
PostgreSQL 9 uses by default `a new "hex" format`__ to emit :sql:`bytea`
|
||||
fields. Unfortunately this format can't be parsed by libpq versions
|
||||
before 9.0. This means that using a library client with version lesser
|
||||
than 9.0 to talk with a server 9.0 or later you may have problems
|
||||
receiving :sql:`bytea` data. To work around this problem you can set the
|
||||
`bytea_output`__ parameter to ``escape``, either in the server
|
||||
configuration or in the client session using a query such as ``SET
|
||||
bytea_output TO escape;`` before trying to receive binary data.
|
||||
|
||||
Starting from Psycopg 2.4 this condition is detected and signaled with a
|
||||
`~psycopg2.InterfaceError`.
|
||||
Since version 9.0 PostgreSQL uses by default `a new "hex" format`__ to
|
||||
emit :sql:`bytea` fields. Starting from Psycopg 2.4.1 the format is
|
||||
correctly supported. If you use a previous version you will need some
|
||||
extra care when receiving bytea from PostgreSQL: you must have at least
|
||||
the libpq 9.0 installed on the client or alternatively you can set the
|
||||
`bytea_output`__ configutation parameter to ``escape``, either in the
|
||||
server configuration file or in the client session (using a query such as
|
||||
``SET bytea_output TO escape;``) before receiving binary data.
|
||||
|
||||
.. __: http://www.postgresql.org/docs/9.0/static/datatype-binary.html
|
||||
.. __: http://www.postgresql.org/docs/9.0/static/runtime-config-client.html#GUC-BYTEA-OUTPUT
|
||||
|
|
|
@ -40,7 +40,7 @@ chunk_dealloc(chunkObject *self)
|
|||
FORMAT_CODE_PY_SSIZE_T,
|
||||
self->base, self->len
|
||||
);
|
||||
PQfreemem(self->base);
|
||||
PyMem_Free(self->base);
|
||||
Py_TYPE(self)->tp_free((PyObject *)self);
|
||||
}
|
||||
|
||||
|
@ -127,95 +127,185 @@ PyTypeObject chunkType = {
|
|||
chunk_doc /* tp_doc */
|
||||
};
|
||||
|
||||
static PyObject *
|
||||
|
||||
static char *psycopg_parse_hex(
|
||||
const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
|
||||
static char *psycopg_parse_escape(
|
||||
const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
|
||||
|
||||
/* The function is not static and not hidden as we use ctypes to test it. */
|
||||
PyObject *
|
||||
typecast_BINARY_cast(const char *s, Py_ssize_t l, PyObject *curs)
|
||||
{
|
||||
chunkObject *chunk = NULL;
|
||||
PyObject *res = NULL;
|
||||
char *str = NULL, *buffer = NULL;
|
||||
size_t len;
|
||||
char *buffer = NULL;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (s == NULL) {Py_INCREF(Py_None); return Py_None;}
|
||||
|
||||
/* PQunescapeBytea absolutely wants a 0-terminated string and we don't
|
||||
want to copy the whole buffer, right? Wrong, but there isn't any other
|
||||
way <g> */
|
||||
if (s[l] != '\0') {
|
||||
if ((buffer = PyMem_Malloc(l+1)) == NULL) {
|
||||
PyErr_NoMemory();
|
||||
goto fail;
|
||||
if (s[0] == '\\' && s[1] == 'x') {
|
||||
/* This is a buffer escaped in hex format: libpq before 9.0 can't
|
||||
* parse it and we can't detect reliably the libpq version at runtime.
|
||||
* So the only robust option is to parse it ourselves - luckily it's
|
||||
* an easy format.
|
||||
*/
|
||||
if (NULL == (buffer = psycopg_parse_hex(s, l, &len))) {
|
||||
goto exit;
|
||||
}
|
||||
/* Py_ssize_t->size_t cast is safe, as long as the Py_ssize_t is
|
||||
* >= 0: */
|
||||
assert (l >= 0);
|
||||
strncpy(buffer, s, (size_t) l);
|
||||
|
||||
buffer[l] = '\0';
|
||||
s = buffer;
|
||||
}
|
||||
str = (char*)PQunescapeBytea((unsigned char*)s, &len);
|
||||
Dprintf("typecast_BINARY_cast: unescaped " FORMAT_CODE_SIZE_T " bytes",
|
||||
len);
|
||||
|
||||
/* The type of the second parameter to PQunescapeBytea is size_t *, so it's
|
||||
* possible (especially with Python < 2.5) to get a return value too large
|
||||
* to fit into a Python container. */
|
||||
if (len > (size_t) PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_IndexError, "PG buffer too large to fit in Python"
|
||||
" buffer.");
|
||||
goto fail;
|
||||
else {
|
||||
/* This is a buffer in the classic bytea format. So we can handle it
|
||||
* to the PQunescapeBytea to have it parsed, rignt? ...Wrong. We
|
||||
* could, but then we'd have to record whether buffer was allocated by
|
||||
* Python or by the libpq to dispose it properly. Furthermore the
|
||||
* PQunescapeBytea interface is not the most brilliant as it wants a
|
||||
* null-terminated string even if we have known its length thus
|
||||
* requiring a useless memcpy and strlen.
|
||||
* So we'll just have our better integrated parser, let's finish this
|
||||
* story.
|
||||
*/
|
||||
if (NULL == (buffer = psycopg_parse_escape(s, l, &len))) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Check the escaping was successful */
|
||||
if (s[0] == '\\' && s[1] == 'x' /* input encoded in hex format */
|
||||
&& str[0] == 'x' /* output resulted in an 'x' */
|
||||
&& s[2] != '7' && s[3] != '8') /* input wasn't really an x (0x78) */
|
||||
{
|
||||
PyErr_SetString(InterfaceError,
|
||||
"can't receive bytea data from server >= 9.0 with the current "
|
||||
"libpq client library: please update the libpq to at least 9.0 "
|
||||
"or set bytea_output to 'escape' in the server config "
|
||||
"or with a query");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
chunk = (chunkObject *) PyObject_New(chunkObject, &chunkType);
|
||||
if (chunk == NULL) goto fail;
|
||||
if (chunk == NULL) goto exit;
|
||||
|
||||
/* **Transfer** ownership of str's memory to the chunkObject: */
|
||||
chunk->base = str;
|
||||
str = NULL;
|
||||
/* **Transfer** ownership of buffer's memory to the chunkObject: */
|
||||
chunk->base = buffer;
|
||||
buffer = NULL;
|
||||
chunk->len = (Py_ssize_t)len;
|
||||
|
||||
/* size_t->Py_ssize_t cast was validated above: */
|
||||
chunk->len = (Py_ssize_t) len;
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
if ((res = PyBuffer_FromObject((PyObject *)chunk, 0, chunk->len)) == NULL)
|
||||
goto fail;
|
||||
goto exit;
|
||||
#else
|
||||
if ((res = PyMemoryView_FromObject((PyObject*)chunk)) == NULL)
|
||||
goto fail;
|
||||
goto exit;
|
||||
#endif
|
||||
/* PyBuffer_FromObject() created a new reference. We'll release our
|
||||
* reference held in 'chunk' in the 'cleanup' clause. */
|
||||
|
||||
goto cleanup;
|
||||
fail:
|
||||
assert (PyErr_Occurred());
|
||||
if (res != NULL) {
|
||||
Py_DECREF(res);
|
||||
res = NULL;
|
||||
}
|
||||
/* Fall through to cleanup: */
|
||||
cleanup:
|
||||
if (chunk != NULL) {
|
||||
Py_DECREF((PyObject *) chunk);
|
||||
}
|
||||
if (str != NULL) {
|
||||
/* str's mem was allocated by PQunescapeBytea; must use PQfreemem: */
|
||||
PQfreemem(str);
|
||||
}
|
||||
/* We allocated buffer with PyMem_Malloc; must use PyMem_Free: */
|
||||
exit:
|
||||
Py_XDECREF((PyObject *)chunk);
|
||||
PyMem_Free(buffer);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static const char hex_lut[128] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
/* Parse a bytea output buffer encoded in 'hex' format.
|
||||
*
|
||||
* the format is described in
|
||||
* http://www.postgresql.org/docs/9.0/static/datatype-binary.html
|
||||
*
|
||||
* Parse the buffer in 'bufin', whose length is 'sizein'.
|
||||
* Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
|
||||
* In case of error set an exception and return NULL.
|
||||
*/
|
||||
static char *
|
||||
psycopg_parse_hex(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
|
||||
{
|
||||
char *ret = NULL;
|
||||
const char *bufend = bufin + sizein;
|
||||
const char *pi = bufin + 2; /* past the \x */
|
||||
char *bufout;
|
||||
char *po;
|
||||
|
||||
po = bufout = PyMem_Malloc((sizein - 2) >> 1); /* output size upper bound */
|
||||
if (NULL == bufout) {
|
||||
PyErr_NoMemory();
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Implementation note: we call this function upon database response, not
|
||||
* user input (because we are parsing the output format of a buffer) so we
|
||||
* don't expect errors. On bad input we reserve the right to return a bad
|
||||
* output, not an error.
|
||||
*/
|
||||
while (pi < bufend) {
|
||||
char c;
|
||||
while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
|
||||
if (pi >= bufend) { goto endloop; }
|
||||
}
|
||||
*po = c << 4;
|
||||
|
||||
while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
|
||||
if (pi >= bufend) { goto endloop; }
|
||||
}
|
||||
*po++ |= c;
|
||||
}
|
||||
endloop:
|
||||
|
||||
ret = bufout;
|
||||
*sizeout = po - bufout;
|
||||
|
||||
exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Parse a bytea output buffer encoded in 'escape' format.
|
||||
*
|
||||
* the format is described in
|
||||
* http://www.postgresql.org/docs/9.0/static/datatype-binary.html
|
||||
*
|
||||
* Parse the buffer in 'bufin', whose length is 'sizein'.
|
||||
* Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
|
||||
* In case of error set an exception and return NULL.
|
||||
*/
|
||||
static char *
|
||||
psycopg_parse_escape(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
|
||||
{
|
||||
char *ret = NULL;
|
||||
const char *bufend = bufin + sizein;
|
||||
const char *pi = bufin;
|
||||
char *bufout;
|
||||
char *po;
|
||||
|
||||
po = bufout = PyMem_Malloc(sizein); /* output size upper bound */
|
||||
if (NULL == bufout) {
|
||||
PyErr_NoMemory();
|
||||
goto exit;
|
||||
}
|
||||
|
||||
while (pi < bufend) {
|
||||
if (*pi != '\\') {
|
||||
/* Unescaped char */
|
||||
*po++ = *pi++;
|
||||
continue;
|
||||
}
|
||||
if ((pi[1] >= '0' && pi[1] <= '3') &&
|
||||
(pi[2] >= '0' && pi[2] <= '7') &&
|
||||
(pi[3] >= '0' && pi[3] <= '7'))
|
||||
{
|
||||
/* Escaped octal value */
|
||||
*po++ = ((pi[1] - '0') << 6) |
|
||||
((pi[2] - '0') << 3) |
|
||||
((pi[3] - '0'));
|
||||
pi += 4;
|
||||
}
|
||||
else {
|
||||
/* Escaped char */
|
||||
*po++ = pi[1];
|
||||
pi += 2;
|
||||
}
|
||||
}
|
||||
|
||||
ret = bufout;
|
||||
*sizeout = po - bufout;
|
||||
|
||||
exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -140,24 +140,6 @@ def skip_if_no_namedtuple(f):
|
|||
return skip_if_no_namedtuple_
|
||||
|
||||
|
||||
def skip_if_broken_hex_binary(f):
|
||||
"""Decorator to detect libpq < 9.0 unable to parse bytea in hex format"""
|
||||
def cope_with_hex_binary_(self):
|
||||
from psycopg2 import InterfaceError
|
||||
try:
|
||||
return f(self)
|
||||
except InterfaceError, e:
|
||||
if '9.0' in str(e) and self.conn.server_version >= 90000:
|
||||
return self.skipTest(
|
||||
# FIXME: we are only assuming the libpq is older here,
|
||||
# but we don't have a reliable way to detect the libpq
|
||||
# version, not pre-9 at least.
|
||||
"bytea broken with server >= 9.0, libpq < 9")
|
||||
else:
|
||||
raise
|
||||
|
||||
return cope_with_hex_binary_
|
||||
|
||||
def skip_if_no_iobase(f):
|
||||
"""Skip a test if io.TextIOBase is not available."""
|
||||
def skip_if_no_iobase_(self):
|
||||
|
|
|
@ -28,7 +28,7 @@ except:
|
|||
pass
|
||||
import sys
|
||||
import testutils
|
||||
from testutils import unittest, skip_if_broken_hex_binary
|
||||
from testutils import unittest
|
||||
from testconfig import dsn
|
||||
|
||||
import psycopg2
|
||||
|
@ -116,7 +116,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
s = self.execute("SELECT %s AS foo", (float("-inf"),))
|
||||
self.failUnless(str(s) == "-inf", "wrong float quoting: " + str(s))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
def testBinary(self):
|
||||
if sys.version_info[0] < 3:
|
||||
s = ''.join([chr(x) for x in range(256)])
|
||||
|
@ -143,7 +142,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
b = psycopg2.Binary(bytes([]))
|
||||
self.assertEqual(str(b), "''::bytea")
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
def testBinaryRoundTrip(self):
|
||||
# test to make sure buffers returned by psycopg2 are
|
||||
# understood by execute:
|
||||
|
@ -191,7 +189,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
s = self.execute("SELECT '{}'::text AS foo")
|
||||
self.failUnlessEqual(s, "{}")
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_from_python(3)
|
||||
def testTypeRoundtripBuffer(self):
|
||||
o1 = buffer("".join(map(chr, range(256))))
|
||||
|
@ -204,7 +201,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
self.assertEqual(type(o1), type(o2))
|
||||
self.assertEqual(str(o1), str(o2))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_from_python(3)
|
||||
def testTypeRoundtripBufferArray(self):
|
||||
o1 = buffer("".join(map(chr, range(256))))
|
||||
|
@ -213,7 +209,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
self.assertEqual(type(o1[0]), type(o2[0]))
|
||||
self.assertEqual(str(o1[0]), str(o2[0]))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_before_python(3)
|
||||
def testTypeRoundtripBytes(self):
|
||||
o1 = bytes(range(256))
|
||||
|
@ -225,7 +220,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
o2 = self.execute("select %s;", (o1,))
|
||||
self.assertEqual(memoryview, type(o2))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_before_python(3)
|
||||
def testTypeRoundtripBytesArray(self):
|
||||
o1 = bytes(range(256))
|
||||
|
@ -233,7 +227,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
o2 = self.execute("select %s;", (o1,))
|
||||
self.assertEqual(memoryview, type(o2[0]))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_before_python(2, 6)
|
||||
def testAdaptBytearray(self):
|
||||
o1 = bytearray(range(256))
|
||||
|
@ -258,7 +251,6 @@ class TypesBasicTests(unittest.TestCase):
|
|||
else:
|
||||
self.assertEqual(memoryview, type(o2))
|
||||
|
||||
@skip_if_broken_hex_binary
|
||||
@testutils.skip_before_python(2, 7)
|
||||
def testAdaptMemoryview(self):
|
||||
o1 = memoryview(bytearray(range(256)))
|
||||
|
@ -335,6 +327,92 @@ class AdaptSubclassTest(unittest.TestCase):
|
|||
del psycopg2.extensions.adapters[A, psycopg2.extensions.ISQLQuote]
|
||||
|
||||
|
||||
class ByteaParserTest(unittest.TestCase):
|
||||
"""Unit test for our bytea format parser."""
|
||||
def setUp(self):
|
||||
try:
|
||||
self._cast = self._import_cast()
|
||||
except Exception, e:
|
||||
return self.skipTest("can't test bytea parser: %s - %s"
|
||||
% (e.__class__.__name__, e))
|
||||
|
||||
def _import_cast(self):
|
||||
"""Use ctypes to access the C function.
|
||||
|
||||
Raise any sort of error: we just support this where ctypes works as
|
||||
expected.
|
||||
"""
|
||||
import ctypes
|
||||
lib = ctypes.cdll.LoadLibrary(psycopg2._psycopg.__file__)
|
||||
cast = lib.typecast_BINARY_cast
|
||||
cast.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.py_object]
|
||||
cast.restype = ctypes.py_object
|
||||
return cast
|
||||
|
||||
def cast(self, buffer):
|
||||
"""Cast a buffer from the output format"""
|
||||
l = buffer and len(buffer) or 0
|
||||
rv = self._cast(buffer, l, None)
|
||||
|
||||
if rv is None:
|
||||
return None
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
return str(rv)
|
||||
else:
|
||||
return rv.tobytes()
|
||||
|
||||
def test_null(self):
|
||||
rv = self.cast(None)
|
||||
self.assertEqual(rv, None)
|
||||
|
||||
def test_blank(self):
|
||||
rv = self.cast(b(''))
|
||||
self.assertEqual(rv, b(''))
|
||||
|
||||
def test_blank_hex(self):
|
||||
# Reported as problematic in ticket #48
|
||||
rv = self.cast(b('\\x'))
|
||||
self.assertEqual(rv, b(''))
|
||||
|
||||
def test_full_hex(self, upper=False):
|
||||
buf = ''.join(("%02x" % i) for i in range(256))
|
||||
if upper: buf = buf.upper()
|
||||
buf = '\\x' + buf
|
||||
rv = self.cast(b(buf))
|
||||
if sys.version_info[0] < 3:
|
||||
self.assertEqual(rv, ''.join(map(chr, range(256))))
|
||||
else:
|
||||
self.assertEqual(rv, bytes(range(256)))
|
||||
|
||||
def test_full_hex_upper(self):
|
||||
return self.test_full_hex(upper=True)
|
||||
|
||||
def test_full_escaped_octal(self):
|
||||
buf = ''.join(("\\%03o" % i) for i in range(256))
|
||||
rv = self.cast(b(buf))
|
||||
if sys.version_info[0] < 3:
|
||||
self.assertEqual(rv, ''.join(map(chr, range(256))))
|
||||
else:
|
||||
self.assertEqual(rv, bytes(range(256)))
|
||||
|
||||
def test_escaped_mixed(self):
|
||||
import string
|
||||
buf = ''.join(("\\%03o" % i) for i in range(32))
|
||||
buf += string.ascii_letters
|
||||
buf += ''.join('\\' + c for c in string.ascii_letters)
|
||||
buf += '\\\\'
|
||||
rv = self.cast(b(buf))
|
||||
if sys.version_info[0] < 3:
|
||||
tgt = ''.join(map(chr, range(32))) \
|
||||
+ string.ascii_letters * 2 + '\\'
|
||||
else:
|
||||
tgt = bytes(range(32)) + \
|
||||
(string.ascii_letters * 2 + '\\').encode('ascii')
|
||||
|
||||
self.assertEqual(rv, tgt)
|
||||
|
||||
|
||||
def test_suite():
|
||||
return unittest.TestLoader().loadTestsFromName(__name__)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user