Merge remote-tracking branch 'origin/register-bytes'

Close #835
This commit is contained in:
Daniele Varrazzo 2019-01-19 15:32:46 +00:00
commit 5b08dc45b1
10 changed files with 100 additions and 18 deletions

2
NEWS
View File

@ -9,6 +9,8 @@ New features:
- Added `~psycopg2.errors` module. Every PostgreSQL error is converted into
a specific exception class (:ticket:`#682`).
- Added `~psycopg2.extensions.encrypt_password()` function (:ticket:`#576`).
- Added `~psycopg2.extensions.BYTES` adapter to manage databases with mixed
encodings on Python 3 (:ticket:`#835`).
- Added `~psycopg2.extensions.Column.table_oid` and
`~psycopg2.extensions.Column.table_column` attributes on `cursor.description`
items (:ticket:`#661`).

View File

@ -947,6 +947,7 @@ Python objects. All the typecasters are automatically registered, except
from the database. See :ref:`unicode-handling` for details.
.. data:: BOOLEAN
BYTES
DATE
DECIMAL
FLOAT
@ -963,6 +964,7 @@ from the database. See :ref:`unicode-handling` for details.
.. data:: BINARYARRAY
BOOLEANARRAY
BYTESARRAY
DATEARRAY
DATETIMEARRAY
DECIMALARRAY
@ -1011,5 +1013,8 @@ from the database. See :ref:`unicode-handling` for details.
module. In older versions they can be imported from the implementation
module `!psycopg2._psycopg`.
.. versionchanged:: 2.7.2
added `!*DATETIMETZ*` objects.
.. versionadded:: 2.7.2
the `!*DATETIMETZ*` objects.
.. versionadded:: 2.8
the `!BYTES` and `BYTESARRAY` objects.

View File

@ -108,6 +108,19 @@ My database is Unicode, but I receive all the strings as UTF-8 `!str`. Can I rec
See :ref:`unicode-handling` for the gory details.
.. _faq-bytes:
.. cssclass:: faq
My database is in mixed encoding. My program was working on Python 2 but Python 3 fails decoding the strings. How do I avoid decoding?
From psycopg 2.8 you can use the following adapters to always return bytes
from strings::
psycopg2.extensions.register_type(psycopg2.extensions.BYTES)
psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY)
See :ref:`unicode-handling` for an example.
.. _faq-float:
.. cssclass:: faq

View File

@ -457,13 +457,29 @@ the connection or globally: see the function
Unicode, you can register the related typecasters globally as soon as
Psycopg is imported::
import psycopg2
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
and forget about this story.
.. note::
In some cases, on Python 3, you may want to receive `!bytes` instead of
`!str`, without undergoing to any decoding. This is especially the case if
the data in the database is in mixed encoding. The
`~psycopg2.extensions.BYTES` caster is what you neeed::
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn)
psycopg2.extensions.register_type(psycopg2.extensions.BYTESARRAY, conn)
cur = conn.cursor()
cur.execute("select %s::text", (u"€",))
cur.fetchone()[0]
b'\xe2\x82\xac'
.. versionadded: 2.8
.. index::
single: Buffer; Adaptation

View File

@ -35,10 +35,10 @@ This module holds all the extensions to the DBAPI-2.0 provided by psycopg.
import re as _re
from psycopg2._psycopg import ( # noqa
BINARYARRAY, BOOLEAN, BOOLEANARRAY, DATE, DATEARRAY, DATETIMEARRAY,
DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER, INTEGERARRAY,
INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY, ROWIDARRAY,
STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY,
BINARYARRAY, BOOLEAN, BOOLEANARRAY, BYTES, BYTESARRAY, DATE, DATEARRAY,
DATETIMEARRAY, DECIMAL, DECIMALARRAY, FLOAT, FLOATARRAY, INTEGER,
INTEGERARRAY, INTERVAL, INTERVALARRAY, LONGINTEGER, LONGINTEGERARRAY,
ROWIDARRAY, STRINGARRAY, TIME, TIMEARRAY, UNICODE, UNICODEARRAY,
AsIs, Binary, Boolean, Float, Int, QuotedString, )
try:

View File

@ -286,6 +286,7 @@ typecast_GENERIC_ARRAY_cast(const char *str, Py_ssize_t len, PyObject *curs)
#define typecast_DECIMALARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_STRINGARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_UNICODEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_BYTESARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_BOOLEANARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATETIMEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATETIMETZARRAY_cast typecast_GENERIC_ARRAY_cast

View File

@ -75,18 +75,16 @@ typecast_FLOAT_cast(const char *s, Py_ssize_t len, PyObject *curs)
return flo;
}
/** STRING - cast strings of any type to python string **/
#if PY_MAJOR_VERSION < 3
/** BYTES - cast strings of any type to python bytes **/
static PyObject *
typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
typecast_BYTES_cast(const char *s, Py_ssize_t len, PyObject *curs)
{
if (s == NULL) { Py_RETURN_NONE; }
return PyString_FromStringAndSize(s, len);
return Bytes_FromStringAndSize(s, len);
}
#else
#define typecast_STRING_cast typecast_UNICODE_cast
#endif
/** UNICODE - cast strings of any type to a python unicode object **/
@ -101,6 +99,16 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
return conn_decode(conn, s, len);
}
/** STRING - cast strings of any type to python string **/
#if PY_MAJOR_VERSION < 3
#define typecast_STRING_cast typecast_BYTES_cast
#else
#define typecast_STRING_cast typecast_UNICODE_cast
#endif
/** BOOLEAN - cast boolean value into right python object **/
static PyObject *

View File

@ -3,7 +3,6 @@ static long int typecast_LONGINTEGER_types[] = {20, 0};
static long int typecast_INTEGER_types[] = {23, 21, 0};
static long int typecast_FLOAT_types[] = {701, 700, 0};
static long int typecast_DECIMAL_types[] = {1700, 0};
static long int typecast_UNICODE_types[] = {19, 18, 25, 1042, 1043, 0};
static long int typecast_STRING_types[] = {19, 18, 25, 1042, 1043, 0};
static long int typecast_BOOLEAN_types[] = {16, 0};
static long int typecast_DATETIME_types[] = {1114, 0};
@ -17,7 +16,6 @@ static long int typecast_LONGINTEGERARRAY_types[] = {1016, 0};
static long int typecast_INTEGERARRAY_types[] = {1005, 1006, 1007, 0};
static long int typecast_FLOATARRAY_types[] = {1021, 1022, 0};
static long int typecast_DECIMALARRAY_types[] = {1231, 0};
static long int typecast_UNICODEARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0};
static long int typecast_STRINGARRAY_types[] = {1002, 1003, 1009, 1014, 1015, 0};
static long int typecast_BOOLEANARRAY_types[] = {1000, 0};
static long int typecast_DATETIMEARRAY_types[] = {1115, 0};
@ -39,7 +37,8 @@ static typecastObject_initlist typecast_builtins[] = {
{"INTEGER", typecast_INTEGER_types, typecast_INTEGER_cast, NULL},
{"FLOAT", typecast_FLOAT_types, typecast_FLOAT_cast, NULL},
{"DECIMAL", typecast_DECIMAL_types, typecast_DECIMAL_cast, NULL},
{"UNICODE", typecast_UNICODE_types, typecast_UNICODE_cast, NULL},
{"UNICODE", typecast_STRING_types, typecast_UNICODE_cast, NULL},
{"BYTES", typecast_STRING_types, typecast_BYTES_cast, NULL},
{"STRING", typecast_STRING_types, typecast_STRING_cast, NULL},
{"BOOLEAN", typecast_BOOLEAN_types, typecast_BOOLEAN_cast, NULL},
{"DATETIME", typecast_DATETIME_types, typecast_DATETIME_cast, NULL},
@ -53,7 +52,8 @@ static typecastObject_initlist typecast_builtins[] = {
{"INTEGERARRAY", typecast_INTEGERARRAY_types, typecast_INTEGERARRAY_cast, "INTEGER"},
{"FLOATARRAY", typecast_FLOATARRAY_types, typecast_FLOATARRAY_cast, "FLOAT"},
{"DECIMALARRAY", typecast_DECIMALARRAY_types, typecast_DECIMALARRAY_cast, "DECIMAL"},
{"UNICODEARRAY", typecast_UNICODEARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"},
{"UNICODEARRAY", typecast_STRINGARRAY_types, typecast_UNICODEARRAY_cast, "UNICODE"},
{"BYTESARRAY", typecast_STRINGARRAY_types, typecast_BYTESARRAY_cast, "BYTES"},
{"STRINGARRAY", typecast_STRINGARRAY_types, typecast_STRINGARRAY_cast, "STRING"},
{"BOOLEANARRAY", typecast_BOOLEANARRAY_types, typecast_BOOLEANARRAY_cast, "BOOLEAN"},
{"DATETIMEARRAY", typecast_DATETIMEARRAY_types, typecast_DATETIMEARRAY_cast, "DATETIME"},

View File

@ -170,6 +170,17 @@ class QuotingTestCase(ConnectingTestCase):
self.assertEqual(res, data)
self.assert_(not self.conn.notices)
def test_bytes(self):
snowman = u"\u2603"
conn = self.connect()
conn.set_client_encoding('UNICODE')
psycopg2.extensions.register_type(psycopg2.extensions.BYTES, conn)
curs = conn.cursor()
curs.execute("select %s::text", (snowman,))
x = curs.fetchone()[0]
self.assert_(isinstance(x, bytes))
self.assertEqual(x, snowman.encode('utf8'))
class TestQuotedString(ConnectingTestCase):
def test_encoding_from_conn(self):

View File

@ -32,6 +32,7 @@ import unittest
from .testutils import ConnectingTestCase, long
import psycopg2
from psycopg2.compat import text_type
class TypesBasicTests(ConnectingTestCase):
@ -208,6 +209,31 @@ class TypesBasicTests(ConnectingTestCase):
self.assertRaises(psycopg2.DataError,
psycopg2.extensions.STRINGARRAY, s.encode('utf8'), curs)
def testTextArray(self):
curs = self.conn.cursor()
curs.execute("select '{a,b,c}'::text[]")
x = curs.fetchone()[0]
self.assert_(isinstance(x[0], str))
self.assertEqual(x, ['a', 'b', 'c'])
def testUnicodeArray(self):
psycopg2.extensions.register_type(
psycopg2.extensions.UNICODEARRAY, self.conn)
curs = self.conn.cursor()
curs.execute("select '{a,b,c}'::text[]")
x = curs.fetchone()[0]
self.assert_(isinstance(x[0], text_type))
self.assertEqual(x, [u'a', u'b', u'c'])
def testBytesArray(self):
psycopg2.extensions.register_type(
psycopg2.extensions.BYTESARRAY, self.conn)
curs = self.conn.cursor()
curs.execute("select '{a,b,c}'::text[]")
x = curs.fetchone()[0]
self.assert_(isinstance(x[0], bytes))
self.assertEqual(x, [b'a', b'b', b'c'])
@testutils.skip_before_postgres(8, 2)
def testArrayOfNulls(self):
curs = self.conn.cursor()