Merge branch 'fast-codecs'

This commit is contained in:
Daniele Varrazzo 2016-12-29 22:15:46 +01:00
commit 449bd4485f
18 changed files with 361 additions and 179 deletions

2
NEWS
View File

@ -25,6 +25,8 @@ New features:
- Added `~psycopg2.extensions.quote_ident()` function (:ticket:`#359`). - Added `~psycopg2.extensions.quote_ident()` function (:ticket:`#359`).
- Added `~connection.get_dsn_parameters()` connection method (:ticket:`#364`). - Added `~connection.get_dsn_parameters()` connection method (:ticket:`#364`).
- `~cursor.callproc()` now accepts a dictionary of parameters (:ticket:`#381`). - `~cursor.callproc()` now accepts a dictionary of parameters (:ticket:`#381`).
- Using Python C API decoding functions and codecs caching for faster
unicode encoding/decoding (:ticket:`#473`).
Other changes: Other changes:

View File

@ -417,7 +417,7 @@ details.
.. data:: encodings .. data:: encodings
Mapping from `PostgreSQL encoding`__ names to `Python codec`__ names. Mapping from `PostgreSQL encoding`__ to `Python encoding`__ names.
Used by Psycopg when adapting or casting unicode strings. See Used by Psycopg when adapting or casting unicode strings. See
:ref:`unicode-handling`. :ref:`unicode-handling`.

View File

@ -355,7 +355,7 @@ Unicode handling
Psycopg can exchange Unicode data with a PostgreSQL database. Python Psycopg can exchange Unicode data with a PostgreSQL database. Python
`!unicode` objects are automatically *encoded* in the client encoding `!unicode` objects are automatically *encoded* in the client encoding
defined on the database connection (the `PostgreSQL encoding`__, available in defined on the database connection (the `PostgreSQL encoding`__, available in
`connection.encoding`, is translated into a `Python codec`__ using the `connection.encoding`, is translated into a `Python encoding`__ using the
`~psycopg2.extensions.encodings` mapping):: `~psycopg2.extensions.encodings` mapping)::
>>> print u, type(u) >>> print u, type(u)

View File

@ -36,20 +36,6 @@ static const char *default_encoding = "latin1";
/* qstring_quote - do the quote process on plain and unicode strings */ /* qstring_quote - do the quote process on plain and unicode strings */
const char *
_qstring_get_encoding(qstringObject *self)
{
/* if the wrapped object is an unicode object we can encode it to match
conn->encoding but if the encoding is not specified we don't know what
to do and we raise an exception */
if (self->conn) {
return self->conn->codec;
}
else {
return self->encoding ? self->encoding : default_encoding;
}
}
static PyObject * static PyObject *
qstring_quote(qstringObject *self) qstring_quote(qstringObject *self)
{ {
@ -59,19 +45,15 @@ qstring_quote(qstringObject *self)
const char *encoding; const char *encoding;
PyObject *rv = NULL; PyObject *rv = NULL;
encoding = _qstring_get_encoding(self);
Dprintf("qstring_quote: encoding to %s", encoding);
if (PyUnicode_Check(self->wrapped)) { if (PyUnicode_Check(self->wrapped)) {
if (encoding) { if (self->conn) {
str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL); if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; }
Dprintf("qstring_quote: got encoded object at %p", str);
if (str == NULL) goto exit;
} }
else { else {
PyErr_SetString(PyExc_TypeError, encoding = self->encoding ? self->encoding : default_encoding;
"missing encoding to encode unicode object"); if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) {
goto exit; goto exit;
}
} }
} }
@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args)
static PyObject * static PyObject *
qstring_get_encoding(qstringObject *self) qstring_get_encoding(qstringObject *self)
{ {
const char *encoding; if (self->conn) {
encoding = _qstring_get_encoding(self); return conn_pgenc_to_pyenc(self->conn->encoding, NULL);
return Text_FromUTF8(encoding); }
else {
return Text_FromUTF8(self->encoding ? self->encoding : default_encoding);
}
} }
static int static int
@ -178,7 +163,7 @@ qstring_set_encoding(qstringObject *self, PyObject *pyenc)
Py_INCREF(pyenc); Py_INCREF(pyenc);
if (!(pyenc = psycopg_ensure_bytes(pyenc))) { goto exit; } if (!(pyenc = psycopg_ensure_bytes(pyenc))) { goto exit; }
if (!(tmp = Bytes_AsString(pyenc))) { goto exit; } if (!(tmp = Bytes_AsString(pyenc))) { goto exit; }
if (0 > psycopg_strdup(&cenc, tmp, 0)) { goto exit; } if (0 > psycopg_strdup(&cenc, tmp, -1)) { goto exit; }
Dprintf("qstring_set_encoding: encoding set to %s", cenc); Dprintf("qstring_set_encoding: encoding set to %s", cenc);
PyMem_Free((void *)self->encoding); PyMem_Free((void *)self->encoding);

View File

@ -83,7 +83,6 @@ struct connectionObject {
char *dsn; /* data source name */ char *dsn; /* data source name */
char *critical; /* critical error on this connection */ char *critical; /* critical error on this connection */
char *encoding; /* current backend encoding */ char *encoding; /* current backend encoding */
char *codec; /* python codec name for encoding */
long int closed; /* 1 means connection has been closed; long int closed; /* 1 means connection has been closed;
2 that something horrible happened */ 2 that something horrible happened */
@ -122,6 +121,14 @@ struct connectionObject {
int autocommit; int autocommit;
PyObject *cursor_factory; /* default cursor factory from cursor() */ PyObject *cursor_factory; /* default cursor factory from cursor() */
/* Optional pointer to a decoding C function, e.g. PyUnicode_DecodeUTF8 */
PyObject *(*cdecoder)(const char *, Py_ssize_t, const char *);
/* Pointers to python encoding/decoding functions, e.g.
* codecs.getdecoder('utf8') */
PyObject *pyencoder; /* python codec encoding function */
PyObject *pydecoder; /* python codec decoding function */
}; };
/* map isolation level values into a numeric const */ /* map isolation level values into a numeric const */
@ -132,7 +139,10 @@ typedef struct {
/* C-callable functions in connection_int.c and connection_ext.c */ /* C-callable functions in connection_int.c and connection_ext.c */
HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str); HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str);
HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b);
HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len);
HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn); HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn);
HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding);
RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self); RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self);
HIDDEN int conn_get_protocol_version(PGconn *pgconn); HIDDEN int conn_get_protocol_version(PGconn *pgconn);
HIDDEN int conn_get_server_version(PGconn *pgconn); HIDDEN int conn_get_server_version(PGconn *pgconn);

View File

@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = {
PyObject * PyObject *
conn_text_from_chars(connectionObject *self, const char *str) conn_text_from_chars(connectionObject *self, const char *str)
{ {
#if PY_MAJOR_VERSION < 3 return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL);
return PyString_FromString(str); }
#else
const char *codec = self ? self->codec : "ascii";
return PyUnicode_Decode(str, strlen(str), codec, "replace"); /* Encode an unicode object into a bytes object in the connection encoding.
#endif *
* If no connection or encoding is available, default to utf8
*/
PyObject *
conn_encode(connectionObject *self, PyObject *u)
{
PyObject *t = NULL;
PyObject *rv = NULL;
if (!(self && self->pyencoder)) {
rv = PyUnicode_AsUTF8String(u);
goto exit;
}
if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) {
goto exit;
}
if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
Py_INCREF(rv);
exit:
Py_XDECREF(t);
return rv;
}
/* decode a c string into a Python unicode in the connection encoding
*
* len can be < 0: in this case it will be calculated
*
* If no connection or encoding is available, default to utf8
*/
PyObject *
conn_decode(connectionObject *self, const char *str, Py_ssize_t len)
{
PyObject *b = NULL;
PyObject *t = NULL;
PyObject *rv = NULL;
if (len < 0) { len = strlen(str); }
if (self) {
if (self->cdecoder) {
return self->cdecoder(str, len, NULL);
}
else if (self->pydecoder) {
if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; }
if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) {
goto exit;
}
rv = PyTuple_GetItem(t, 0);
Py_XINCREF(rv);
}
}
else {
return PyUnicode_FromStringAndSize(str, len);
}
exit:
Py_XDECREF(t);
Py_XDECREF(b);
return rv;
} }
/* conn_notice_callback - process notices */ /* conn_notice_callback - process notices */
@ -321,92 +384,168 @@ exit:
return rv; return rv;
} }
/* Convert a PostgreSQL encoding to a Python codec. /* set fast access functions according to the currently selected encoding
*
* Set 'codec' to a new copy of the codec name allocated on the Python heap.
* Return 0 in case of success, else -1 and set an exception.
*
* 'enc' should be already normalized (uppercase, no - or _).
*/ */
RAISES_NEG static int static void
conn_encoding_to_codec(const char *enc, char **codec) conn_set_fast_codec(connectionObject *self)
{ {
char *tmp; Dprintf("conn_set_fast_codec: encoding=%s", self->encoding);
Py_ssize_t size;
PyObject *pyenc = NULL;
int rv = -1;
/* Find the Py codec name from the PG encoding */ if (0 == strcmp(self->encoding, "UTF8")) {
if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) { Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8");
self->cdecoder = PyUnicode_DecodeUTF8;
return;
}
if (0 == strcmp(self->encoding, "LATIN1")) {
Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1");
self->cdecoder = PyUnicode_DecodeLatin1;
return;
}
Dprintf("conn_set_fast_codec: no fast codec");
self->cdecoder = NULL;
}
/* Return the Python encoding from a PostgreSQL encoding.
*
* Optionally return the clean version of the postgres encoding too
*/
PyObject *
conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding)
{
char *pgenc = NULL;
PyObject *rv = NULL;
if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) {
PyErr_Format(OperationalError, PyErr_Format(OperationalError,
"no Python codec for client encoding '%s'", enc); "no Python encoding for PostgreSQL encoding '%s'", pgenc);
goto exit; goto exit;
} }
Py_INCREF(rv);
/* Convert the codec in a bytes string to extract the c string. */ if (clean_encoding) {
Py_INCREF(pyenc); *clean_encoding = pgenc;
if (!(pyenc = psycopg_ensure_bytes(pyenc))) {
goto exit;
} }
else {
if (-1 == Bytes_AsStringAndSize(pyenc, &tmp, &size)) { PyMem_Free(pgenc);
goto exit;
} }
/* have our own copy of the python codec name */
rv = psycopg_strdup(codec, tmp, size);
exit: exit:
Py_XDECREF(pyenc);
return rv; return rv;
} }
/* Read the client encoding from the connection. /* Convert a Postgres encoding into Python encoding and decoding functions.
* *
* Store the encoding in the pgconn->encoding field and the name of the * Set clean_encoding to a clean version of the Postgres encoding name
* matching python codec in codec. The buffers are allocated on the Python * and pyenc and pydec to python codec functions.
* heap.
* *
* Return 0 on success, else nonzero. * Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
conn_get_python_codec(const char *encoding,
char **clean_encoding, PyObject **pyenc, PyObject **pydec)
{
int rv = -1;
char *pgenc = NULL;
PyObject *encname = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
/* get the Python name of the encoding as a C string */
if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; }
if (!(encname = psycopg_ensure_bytes(encname))) { goto exit; }
/* Look up the codec functions */
if (!(enc_tmp = PyCodec_Encoder(Bytes_AS_STRING(encname)))) { goto exit; }
if (!(dec_tmp = PyCodec_Decoder(Bytes_AS_STRING(encname)))) { goto exit; }
/* success */
*pyenc = enc_tmp; enc_tmp = NULL;
*pydec = dec_tmp; dec_tmp = NULL;
*clean_encoding = pgenc; pgenc = NULL;
rv = 0;
exit:
Py_XDECREF(enc_tmp);
Py_XDECREF(dec_tmp);
Py_XDECREF(encname);
PyMem_Free(pgenc);
return rv;
}
/* Store the encoding in the pgconn->encoding field and set the other related
* encoding fields in the connection structure.
*
* Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
conn_store_encoding(connectionObject *self, const char *encoding)
{
int rv = -1;
char *pgenc = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) {
goto exit;
}
/* Good, success: store the encoding/codec in the connection. */
{
char *tmp = self->encoding;
self->encoding = pgenc;
PyMem_Free(tmp);
pgenc = NULL;
}
Py_CLEAR(self->pyencoder);
self->pyencoder = enc_tmp;
enc_tmp = NULL;
Py_CLEAR(self->pydecoder);
self->pydecoder = dec_tmp;
dec_tmp = NULL;
conn_set_fast_codec(self);
rv = 0;
exit:
Py_XDECREF(enc_tmp);
Py_XDECREF(dec_tmp);
PyMem_Free(pgenc);
return rv;
}
/* Read the client encoding from the backend and store it in the connection.
*
* Return 0 on success, else -1.
*/ */
RAISES_NEG static int RAISES_NEG static int
conn_read_encoding(connectionObject *self, PGconn *pgconn) conn_read_encoding(connectionObject *self, PGconn *pgconn)
{ {
char *enc = NULL, *codec = NULL; const char *encoding;
const char *tmp;
int rv = -1; int rv = -1;
tmp = PQparameterStatus(pgconn, "client_encoding"); encoding = PQparameterStatus(pgconn, "client_encoding");
Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)"); Dprintf("conn_connect: client encoding: %s", encoding ? encoding : "(none)");
if (!tmp) { if (!encoding) {
PyErr_SetString(OperationalError, PyErr_SetString(OperationalError,
"server didn't return client encoding"); "server didn't return client encoding");
goto exit; goto exit;
} }
if (0 > clear_encoding_name(tmp, &enc)) { if (0 > conn_store_encoding(self, encoding)) {
goto exit; goto exit;
} }
/* Look for this encoding in Python codecs. */
if (0 > conn_encoding_to_codec(enc, &codec)) {
goto exit;
}
/* Good, success: store the encoding/codec in the connection. */
PyMem_Free(self->encoding);
self->encoding = enc;
enc = NULL;
PyMem_Free(self->codec);
self->codec = codec;
codec = NULL;
rv = 0; rv = 0;
exit: exit:
PyMem_Free(enc);
PyMem_Free(codec);
return rv; return rv;
} }
@ -1225,21 +1364,19 @@ endlock:
/* conn_set_client_encoding - switch client encoding on connection */ /* conn_set_client_encoding - switch client encoding on connection */
RAISES_NEG int RAISES_NEG int
conn_set_client_encoding(connectionObject *self, const char *enc) conn_set_client_encoding(connectionObject *self, const char *pgenc)
{ {
PGresult *pgres = NULL; PGresult *pgres = NULL;
char *error = NULL; char *error = NULL;
int res = -1; int res = -1;
char *codec = NULL;
char *clean_enc = NULL; char *clean_enc = NULL;
/* We must know what python encoding this encoding is. */
if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; }
/* If the current encoding is equal to the requested one we don't /* If the current encoding is equal to the requested one we don't
issue any query to the backend */ issue any query to the backend */
if (strcmp(self->encoding, enc) == 0) return 0; if (strcmp(self->encoding, clean_enc) == 0) return 0;
/* We must know what python codec this encoding is. */
if (0 > clear_encoding_name(enc, &clean_enc)) { goto exit; }
if (0 > conn_encoding_to_codec(clean_enc, &codec)) { goto exit; }
Py_BEGIN_ALLOW_THREADS; Py_BEGIN_ALLOW_THREADS;
pthread_mutex_lock(&self->lock); pthread_mutex_lock(&self->lock);
@ -1255,35 +1392,21 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
goto endlock; goto endlock;
} }
/* no error, we can proceed and store the new encoding */
{
char *tmp = self->encoding;
self->encoding = clean_enc;
PyMem_Free(tmp);
clean_enc = NULL;
}
/* Store the python codec too. */
{
char *tmp = self->codec;
self->codec = codec;
PyMem_Free(tmp);
codec = NULL;
}
Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
self->encoding, self->codec);
endlock: endlock:
pthread_mutex_unlock(&self->lock); pthread_mutex_unlock(&self->lock);
Py_END_ALLOW_THREADS; Py_END_ALLOW_THREADS;
if (res < 0) if (res < 0) {
pq_complete_error(self, &pgres, &error); pq_complete_error(self, &pgres, &error);
goto exit;
}
res = conn_store_encoding(self, pgenc);
Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding);
exit: exit:
PyMem_Free(clean_enc); PyMem_Free(clean_enc);
PyMem_Free(codec);
return res; return res;
} }

View File

@ -1097,7 +1097,7 @@ connection_setup(connectionObject *self, const char *dsn, long int async)
self, async, Py_REFCNT(self) self, async, Py_REFCNT(self)
); );
if (0 > psycopg_strdup(&self->dsn, dsn, 0)) { goto exit; } if (0 > psycopg_strdup(&self->dsn, dsn, -1)) { goto exit; }
if (!(self->notice_list = PyList_New(0))) { goto exit; } if (!(self->notice_list = PyList_New(0))) { goto exit; }
if (!(self->notifies = PyList_New(0))) { goto exit; } if (!(self->notifies = PyList_New(0))) { goto exit; }
self->async = async; self->async = async;
@ -1141,6 +1141,9 @@ connection_clear(connectionObject *self)
Py_CLEAR(self->notifies); Py_CLEAR(self->notifies);
Py_CLEAR(self->string_types); Py_CLEAR(self->string_types);
Py_CLEAR(self->binary_types); Py_CLEAR(self->binary_types);
Py_CLEAR(self->cursor_factory);
Py_CLEAR(self->pyencoder);
Py_CLEAR(self->pydecoder);
return 0; return 0;
} }
@ -1164,7 +1167,6 @@ connection_dealloc(PyObject* obj)
PyMem_Free(self->dsn); PyMem_Free(self->dsn);
PyMem_Free(self->encoding); PyMem_Free(self->encoding);
PyMem_Free(self->codec);
if (self->critical) free(self->critical); if (self->critical) free(self->critical);
if (self->cancel) PQfreeCancel(self->cancel); if (self->cancel) PQfreeCancel(self->cancel);
@ -1216,6 +1218,9 @@ connection_traverse(connectionObject *self, visitproc visit, void *arg)
Py_VISIT(self->notifies); Py_VISIT(self->notifies);
Py_VISIT(self->string_types); Py_VISIT(self->string_types);
Py_VISIT(self->binary_types); Py_VISIT(self->binary_types);
Py_VISIT(self->cursor_factory);
Py_VISIT(self->pyencoder);
Py_VISIT(self->pydecoder);
return 0; return 0;
} }

View File

@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic(
Py_INCREF(sql); Py_INCREF(sql);
} }
else if (PyUnicode_Check(sql)) { else if (PyUnicode_Check(sql)) {
char *enc = self->conn->codec; if (!(sql = conn_encode(self->conn, sql))) { goto fail; }
sql = PyUnicode_AsEncodedString(sql, enc, NULL);
/* if there was an error during the encoding from unicode to the
target encoding, we just let the exception propagate */
if (sql == NULL) { goto fail; }
} }
else { else {
/* the is not unicode or string, raise an error */ /* the is not unicode or string, raise an error */
@ -1079,7 +1075,7 @@ psyco_curs_callproc(cursorObject *self, PyObject *args)
if (!(cpname = Bytes_AsString(pname))) { goto exit; } if (!(cpname = Bytes_AsString(pname))) { goto exit; }
if (!(scpnames[i] = psycopg_escape_identifier( if (!(scpnames[i] = psycopg_escape_identifier(
self->conn, cpname, 0))) { self->conn, cpname, -1))) {
Py_CLEAR(pname); Py_CLEAR(pname);
goto exit; goto exit;
} }
@ -1457,12 +1453,12 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs)
goto exit; goto exit;
if (!(quoted_delimiter = psycopg_escape_string( if (!(quoted_delimiter = psycopg_escape_string(
self->conn, sep, 0, NULL, NULL))) { self->conn, sep, -1, NULL, NULL))) {
goto exit; goto exit;
} }
if (!(quoted_null = psycopg_escape_string( if (!(quoted_null = psycopg_escape_string(
self->conn, null, 0, NULL, NULL))) { self->conn, null, -1, NULL, NULL))) {
goto exit; goto exit;
} }
@ -1551,12 +1547,12 @@ psyco_curs_copy_to(cursorObject *self, PyObject *args, PyObject *kwargs)
goto exit; goto exit;
if (!(quoted_delimiter = psycopg_escape_string( if (!(quoted_delimiter = psycopg_escape_string(
self->conn, sep, 0, NULL, NULL))) { self->conn, sep, -1, NULL, NULL))) {
goto exit; goto exit;
} }
if (!(quoted_null = psycopg_escape_string( if (!(quoted_null = psycopg_escape_string(
self->conn, null, 0, NULL, NULL))) { self->conn, null, -1, NULL, NULL))) {
goto exit; goto exit;
} }
@ -1899,10 +1895,10 @@ cursor_setup(cursorObject *self, connectionObject *conn, const char *name)
Dprintf("cursor_setup: parameters: name = %s, conn = %p", name, conn); Dprintf("cursor_setup: parameters: name = %s, conn = %p", name, conn);
if (name) { if (name) {
if (0 > psycopg_strdup(&self->name, name, 0)) { if (0 > psycopg_strdup(&self->name, name, -1)) {
return -1; return -1;
} }
if (!(self->qname = psycopg_escape_identifier(conn, name, 0))) { if (!(self->qname = psycopg_escape_identifier(conn, name, -1))) {
return -1; return -1;
} }
} }

View File

@ -34,7 +34,7 @@ typedef struct {
PyObject *pgerror; PyObject *pgerror;
PyObject *pgcode; PyObject *pgcode;
cursorObject *cursor; cursorObject *cursor;
char *codec; PyObject *pydecoder;
PGresult *pgres; PGresult *pgres;
} errorObject; } errorObject;

View File

@ -34,17 +34,7 @@
PyObject * PyObject *
error_text_from_chars(errorObject *self, const char *str) error_text_from_chars(errorObject *self, const char *str)
{ {
if (str == NULL) { return psycopg_text_from_chars_safe(str, -1, self->pydecoder);
Py_INCREF(Py_None);
return (Py_None);
}
#if PY_MAJOR_VERSION < 3
return PyString_FromString(str);
#else
return PyUnicode_Decode(str, strlen(str),
self->codec ? self->codec : "ascii", "replace");
#endif
} }
@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg)
Py_VISIT(self->pgerror); Py_VISIT(self->pgerror);
Py_VISIT(self->pgcode); Py_VISIT(self->pgcode);
Py_VISIT(self->cursor); Py_VISIT(self->cursor);
Py_VISIT(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_traverse( return ((PyTypeObject *)PyExc_StandardError)->tp_traverse(
(PyObject *)self, visit, arg); (PyObject *)self, visit, arg);
@ -104,6 +95,7 @@ error_clear(errorObject *self)
Py_CLEAR(self->pgerror); Py_CLEAR(self->pgerror);
Py_CLEAR(self->pgcode); Py_CLEAR(self->pgcode);
Py_CLEAR(self->cursor); Py_CLEAR(self->cursor);
Py_CLEAR(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self); return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self);
} }
@ -113,7 +105,6 @@ error_dealloc(errorObject *self)
{ {
PyObject_GC_UnTrack((PyObject *)self); PyObject_GC_UnTrack((PyObject *)self);
error_clear(self); error_clear(self);
PyMem_Free(self->codec);
CLEARPGRES(self->pgres); CLEARPGRES(self->pgres);
Py_TYPE(self)->tp_free((PyObject *)self); Py_TYPE(self)->tp_free((PyObject *)self);

View File

@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args)
data = obj; data = obj;
} }
else if (PyUnicode_Check(obj)) { else if (PyUnicode_Check(obj)) {
if (!(data = PyUnicode_AsEncodedString(obj, self->conn->codec, NULL))) { if (!(data = conn_encode(self->conn, obj))) { goto exit; }
goto exit;
}
} }
else { else {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args)
if (self->mode & LOBJECT_BINARY) { if (self->mode & LOBJECT_BINARY) {
res = Bytes_FromStringAndSize(buffer, size); res = Bytes_FromStringAndSize(buffer, size);
} else { } else {
res = PyUnicode_Decode(buffer, size, self->conn->codec, NULL); res = conn_decode(self->conn, buffer, size);
} }
PyMem_Free(buffer); PyMem_Free(buffer);

View File

@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn)
/* Convert to bytes. */ /* Convert to bytes. */
if (res && PyUnicode_CheckExact(res)) { if (res && PyUnicode_CheckExact(res)) {
PyObject *b; PyObject *b;
const char *codec; b = conn_encode(conn, res);
codec = (conn && conn->codec) ? conn->codec : "utf8";
b = PyUnicode_AsEncodedString(res, codec, NULL);
Py_DECREF(res); Py_DECREF(res);
res = b; res = b;
} }

View File

@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
const char *err2 = NULL; const char *err2 = NULL;
const char *code = NULL; const char *code = NULL;
PyObject *pyerr = NULL; PyObject *pyerr = NULL;
PyObject *pgerror = NULL, *pgcode = NULL;
if (conn == NULL) { if (conn == NULL) {
PyErr_SetString(DatabaseError, PyErr_SetString(DatabaseError,
@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
err2 = strip_severity(err); err2 = strip_severity(err);
Dprintf("pq_raise: err2=%s", err2); Dprintf("pq_raise: err2=%s", err2);
/* decode now the details of the error, because after psyco_set_error
* decoding will fail.
*/
if (!(pgerror = conn_text_from_chars(conn, err))) {
/* we can't really handle an exception while handling this error
* so just print it. */
PyErr_Print();
PyErr_Clear();
}
if (!(pgcode = conn_text_from_chars(conn, code))) {
PyErr_Print();
PyErr_Clear();
}
pyerr = psyco_set_error(exc, curs, err2); pyerr = psyco_set_error(exc, curs, err2);
if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) { if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) {
errorObject *perr = (errorObject *)pyerr; errorObject *perr = (errorObject *)pyerr;
PyMem_Free(perr->codec); Py_CLEAR(perr->pydecoder);
psycopg_strdup(&perr->codec, conn->codec, 0); Py_XINCREF(conn->pydecoder);
perr->pydecoder = conn->pydecoder;
Py_CLEAR(perr->pgerror); Py_CLEAR(perr->pgerror);
perr->pgerror = error_text_from_chars(perr, err); perr->pgerror = pgerror;
pgerror = NULL;
Py_CLEAR(perr->pgcode); Py_CLEAR(perr->pgcode);
perr->pgcode = error_text_from_chars(perr, code); perr->pgcode = pgcode;
pgcode = NULL;
CLEARPGRES(perr->pgres); CLEARPGRES(perr->pgres);
if (pgres && *pgres) { if (pgres && *pgres) {
@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
*pgres = NULL; *pgres = NULL;
} }
} }
Py_XDECREF(pgerror);
Py_XDECREF(pgcode);
} }
/* pq_set_critical, pq_resolve_critical - manage critical errors /* pq_set_critical, pq_resolve_critical - manage critical errors
@ -765,7 +787,7 @@ pq_tpc_command_locked(connectionObject *conn, const char *cmd, const char *tid,
PyEval_RestoreThread(*tstate); PyEval_RestoreThread(*tstate);
/* convert the xid into the postgres transaction_id and quote it. */ /* convert the xid into the postgres transaction_id and quote it. */
if (!(etid = psycopg_escape_string(conn, tid, 0, NULL, NULL))) if (!(etid = psycopg_escape_string(conn, tid, -1, NULL, NULL)))
{ goto exit; } { goto exit; }
/* prepare the command to the server */ /* prepare the command to the server */
@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs)
/* a file may return unicode if implements io.TextIOBase */ /* a file may return unicode if implements io.TextIOBase */
if (PyUnicode_Check(o)) { if (PyUnicode_Check(o)) {
PyObject *tmp; PyObject *tmp;
Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->codec); if (!(tmp = conn_encode(curs->conn, o))) {
if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->codec, NULL))) {
Dprintf("_pq_copy_in_v3: encoding() failed"); Dprintf("_pq_copy_in_v3: encoding() failed");
error = 1; error = 1;
break; break;
@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs)
if (len > 0 && buffer) { if (len > 0 && buffer) {
if (is_text) { if (is_text) {
obj = PyUnicode_Decode(buffer, len, curs->conn->codec, NULL); obj = conn_decode(curs->conn, buffer, len);
} else { } else {
obj = Bytes_FromStringAndSize(buffer, len); obj = Bytes_FromStringAndSize(buffer, len);
} }
@ -1638,7 +1659,7 @@ retry:
Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr); Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr);
if (repl->decode) { if (repl->decode) {
str = PyUnicode_Decode(buffer + hdr, data_size, conn->codec, NULL); str = conn_decode(conn, buffer + hdr, data_size);
} else { } else {
str = Bytes_FromStringAndSize(buffer + hdr, data_size); str = Bytes_FromStringAndSize(buffer + hdr, data_size);
} }

View File

@ -129,9 +129,11 @@ RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const
HIDDEN char *psycopg_escape_string(connectionObject *conn, HIDDEN char *psycopg_escape_string(connectionObject *conn,
const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen); const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen);
HIDDEN char *psycopg_escape_identifier(connectionObject *conn, HIDDEN char *psycopg_escape_identifier(connectionObject *conn,
const char *str, size_t len); const char *str, Py_ssize_t len);
HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len); HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len);
HIDDEN int psycopg_is_text_file(PyObject *f); HIDDEN int psycopg_is_text_file(PyObject *f);
HIDDEN PyObject *psycopg_text_from_chars_safe(
const char *str, Py_ssize_t len, PyObject *decoder);
STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj); STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj);

View File

@ -165,7 +165,6 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs)
{ {
PyObject *ident = NULL, *obj = NULL, *result = NULL; PyObject *ident = NULL, *obj = NULL, *result = NULL;
connectionObject *conn; connectionObject *conn;
const char *str;
char *quoted = NULL; char *quoted = NULL;
static char *kwlist[] = {"ident", "scope", NULL}; static char *kwlist[] = {"ident", "scope", NULL};
@ -188,12 +187,9 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs)
Py_INCREF(ident); /* for ensure_bytes */ Py_INCREF(ident); /* for ensure_bytes */
if (!(ident = psycopg_ensure_bytes(ident))) { goto exit; } if (!(ident = psycopg_ensure_bytes(ident))) { goto exit; }
str = Bytes_AS_STRING(ident); if (!(quoted = psycopg_escape_identifier(conn,
Bytes_AS_STRING(ident), Bytes_GET_SIZE(ident)))) { goto exit; }
quoted = psycopg_escape_identifier(conn, str, strlen(str));
if (!quoted) {
goto exit;
}
result = conn_text_from_chars(conn, quoted); result = conn_text_from_chars(conn, quoted);
exit: exit:

View File

@ -675,8 +675,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs)
#if PY_MAJOR_VERSION < 3 #if PY_MAJOR_VERSION < 3
s = PyString_FromStringAndSize(str, len); s = PyString_FromStringAndSize(str, len);
#else #else
s = PyUnicode_Decode(str, len, s = conn_decode(((cursorObject *)curs)->conn, str, len);
((cursorObject *)curs)->conn->codec, NULL);
#endif #endif
} }
else { else {

View File

@ -93,12 +93,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
static PyObject * static PyObject *
typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
{ {
char *enc; connectionObject *conn;
if (s == NULL) { Py_RETURN_NONE; } if (s == NULL) { Py_RETURN_NONE; }
enc = ((cursorObject*)curs)->conn->codec; conn = ((cursorObject*)curs)->conn;
return PyUnicode_Decode(s, len, enc, NULL); return conn_decode(conn, s, len);
} }
/** BOOLEAN - cast boolean value into right python object **/ /** BOOLEAN - cast boolean value into right python object **/

View File

@ -40,6 +40,8 @@
* and set an exception. The returned string includes quotes and leading E if * and set an exception. The returned string includes quotes and leading E if
* needed. * needed.
* *
* `len` is optional: if < 0 it will be calculated.
*
* If tolen is set, it will contain the length of the escaped string, * If tolen is set, it will contain the length of the escaped string,
* including quotes. * including quotes.
*/ */
@ -50,7 +52,7 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len,
Py_ssize_t ql; Py_ssize_t ql;
int eq = (conn && (conn->equote)) ? 1 : 0; int eq = (conn && (conn->equote)) ? 1 : 0;
if (len == 0) { if (len < 0) {
len = strlen(from); len = strlen(from);
} else if (strchr(from, '\0') != from + len) { } else if (strchr(from, '\0') != from + len) {
PyErr_Format(PyExc_ValueError, "A string literal cannot contain NUL (0x00) characters."); PyErr_Format(PyExc_ValueError, "A string literal cannot contain NUL (0x00) characters.");
@ -92,13 +94,13 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len,
/* Escape a string for inclusion in a query as identifier. /* Escape a string for inclusion in a query as identifier.
* *
* 'len' is optional: if 0 the length is calculated. * 'len' is optional: if < 0 it will be calculated.
* *
* Return a string allocated by Postgres: free it using PQfreemem * Return a string allocated by Postgres: free it using PQfreemem
* In case of error set a Python exception. * In case of error set a Python exception.
*/ */
char * char *
psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len) psycopg_escape_identifier(connectionObject *conn, const char *str, Py_ssize_t len)
{ {
char *rv = NULL; char *rv = NULL;
@ -107,7 +109,7 @@ psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len)
goto exit; goto exit;
} }
if (!len) { len = strlen(str); } if (len < 0) { len = strlen(str); }
rv = PQescapeIdentifier(conn->pgconn, str, len); rv = PQescapeIdentifier(conn->pgconn, str, len);
if (!rv) { if (!rv) {
@ -127,7 +129,7 @@ exit:
/* Duplicate a string. /* Duplicate a string.
* *
* Allocate a new buffer on the Python heap containing the new string. * Allocate a new buffer on the Python heap containing the new string.
* 'len' is optional: if 0 the length is calculated. * 'len' is optional: if < 0 the length is calculated.
* *
* Store the return in 'to' and return 0 in case of success, else return -1 * Store the return in 'to' and return 0 in case of success, else return -1
* and raise an exception. * and raise an exception.
@ -141,7 +143,7 @@ psycopg_strdup(char **to, const char *from, Py_ssize_t len)
*to = NULL; *to = NULL;
return 0; return 0;
} }
if (!len) { len = strlen(from); } if (len < 0) { len = strlen(from); }
if (!(*to = PyMem_Malloc(len + 1))) { if (!(*to = PyMem_Malloc(len + 1))) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
@ -276,3 +278,57 @@ exit:
return res; return res;
} }
/* Convert a C string into Python Text using a specified codec.
*
* The codec is the python function codec.getdecoder(enc). It is only used on
* Python 3 to return unicode: in Py2 the function returns a string.
*
* len is optional: use -1 to have it calculated by the function.
*/
PyObject *
psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder)
{
#if PY_MAJOR_VERSION < 3
if (!str) { Py_RETURN_NONE; }
if (len < 0) { len = strlen(str); }
return PyString_FromStringAndSize(str, len);
#else
static PyObject *replace = NULL;
PyObject *rv = NULL;
PyObject *b = NULL;
PyObject *t = NULL;
if (!str) { Py_RETURN_NONE; }
if (len < 0) { len = strlen(str); }
if (decoder) {
if (!replace) {
if (!(replace = PyUnicode_FromString("replace"))) { goto exit; }
}
if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; }
if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) {
goto exit;
}
if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
Py_INCREF(rv);
}
else {
rv = PyUnicode_DecodeASCII(str, len, "replace");
}
exit:
Py_XDECREF(t);
Py_XDECREF(b);
return rv;
#endif
}