Don't look up for Python encoding

Store the encode/decode functions for the right codec in the connection.
The Python encoding name has been dropped of the connection to avoid the
temptation to use it...
This commit is contained in:
Daniele Varrazzo 2016-12-26 19:47:48 +01:00
parent dfe547856e
commit 3295beb777
14 changed files with 231 additions and 160 deletions

View File

@ -36,20 +36,6 @@ static const char *default_encoding = "latin1";
/* qstring_quote - do the quote process on plain and unicode strings */
const char *
_qstring_get_encoding(qstringObject *self)
{
/* if the wrapped object is an unicode object we can encode it to match
conn->encoding but if the encoding is not specified we don't know what
to do and we raise an exception */
if (self->conn) {
return self->conn->pyenc;
}
else {
return self->encoding ? self->encoding : default_encoding;
}
}
static PyObject *
qstring_quote(qstringObject *self)
{
@ -59,19 +45,15 @@ qstring_quote(qstringObject *self)
const char *encoding;
PyObject *rv = NULL;
encoding = _qstring_get_encoding(self);
Dprintf("qstring_quote: encoding to %s", encoding);
if (PyUnicode_Check(self->wrapped)) {
if (encoding) {
str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL);
Dprintf("qstring_quote: got encoded object at %p", str);
if (str == NULL) goto exit;
if (self->conn) {
if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; }
}
else {
PyErr_SetString(PyExc_TypeError,
"missing encoding to encode unicode object");
goto exit;
encoding = self->encoding ? self->encoding : default_encoding;
if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) {
goto exit;
}
}
}
@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args)
static PyObject *
qstring_get_encoding(qstringObject *self)
{
const char *encoding;
encoding = _qstring_get_encoding(self);
return Text_FromUTF8(encoding);
if (self->conn) {
return conn_pgenc_to_pyenc(self->conn->encoding, NULL);
}
else {
return Text_FromUTF8(self->encoding ? self->encoding : default_encoding);
}
}
static int

View File

@ -83,8 +83,6 @@ struct connectionObject {
char *dsn; /* data source name */
char *critical; /* critical error on this connection */
char *encoding; /* current backend encoding */
/* TODO: drop */
char *pyenc; /* connection encoding python name */
long int closed; /* 1 means connection has been closed;
2 that something horrible happened */
@ -139,7 +137,10 @@ typedef struct {
/* C-callable functions in connection_int.c and connection_ext.c */
HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str);
HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b);
HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len);
HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn);
HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding);
RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self);
HIDDEN int conn_get_protocol_version(PGconn *pgconn);
HIDDEN int conn_get_server_version(PGconn *pgconn);

View File

@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = {
PyObject *
conn_text_from_chars(connectionObject *self, const char *str)
{
#if PY_MAJOR_VERSION < 3
return PyString_FromString(str);
#else
const char *pyenc = self ? self->pyenc : "ascii";
return PyUnicode_Decode(str, strlen(str), pyenc, "replace");
#endif
return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL);
}
/* Encode an unicode object into a bytes object in the connection encoding.
*
* If no connection or encoding is available, default to utf8
*/
PyObject *
conn_encode(connectionObject *self, PyObject *u)
{
PyObject *t = NULL;
PyObject *rv = NULL;
if (!(self && self->pyencoder)) {
rv = PyUnicode_AsUTF8String(u);
goto exit;
}
if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) {
goto exit;
}
if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
Py_INCREF(rv);
exit:
Py_XDECREF(t);
return rv;
}
/* decode a c string into a Python unicode in the connection encoding
*
* len can be < 0: in this case it will be calculated
*
* If no connection or encoding is available, default to utf8
*/
PyObject *
conn_decode(connectionObject *self, const char *str, Py_ssize_t len)
{
PyObject *b = NULL;
PyObject *t = NULL;
PyObject *rv = NULL;
if (len < 0) { len = strlen(str); }
if (self) {
if (self->cdecoder) {
return self->cdecoder(str, len, NULL);
}
else if (self->pydecoder) {
if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; }
if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) {
goto exit;
}
rv = PyTuple_GetItem(t, 0);
Py_XINCREF(rv);
}
}
else {
return PyUnicode_FromStringAndSize(str, len);
}
exit:
Py_XDECREF(t);
Py_XDECREF(b);
return rv;
}
/* conn_notice_callback - process notices */
@ -321,61 +384,20 @@ exit:
return rv;
}
/* Convert a PostgreSQL encoding name to a Python encoding name.
*
* Set 'pyenc' to a new copy of the encoding name allocated on the Python heap.
* Return 0 in case of success, else -1 and set an exception.
*
* 'pgenc' should be already normalized (uppercase, no - or _).
*/
RAISES_NEG static int
conn_pgenc_to_pyenc(const char *pgenc, char **pyenc)
{
char *tmp;
Py_ssize_t size;
PyObject *opyenc = NULL;
int rv = -1;
/* Find the Py encoding name from the PG encoding */
if (!(opyenc = PyDict_GetItemString(psycoEncodings, pgenc))) {
PyErr_Format(OperationalError,
"no Python encoding for PostgreSQL encoding '%s'", pgenc);
goto exit;
}
/* Convert the encoding in a bytes string to extract the c string. */
Py_INCREF(opyenc);
if (!(opyenc = psycopg_ensure_bytes(opyenc))) {
goto exit;
}
if (-1 == Bytes_AsStringAndSize(opyenc, &tmp, &size)) {
goto exit;
}
/* have our own copy of the python encoding name */
rv = psycopg_strdup(pyenc, tmp, size);
exit:
Py_XDECREF(opyenc);
return rv;
}
/* set fast access functions according to the currently selected encoding
*/
static void
conn_set_fast_codec(connectionObject *self)
{
Dprintf("conn_set_fast_codec: encoding=%s", self->pyenc);
Dprintf("conn_set_fast_codec: encoding=%s", self->encoding);
if (0 == strcmp(self->pyenc, "utf_8")) {
if (0 == strcmp(self->encoding, "UTF8")) {
Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8");
self->cdecoder = PyUnicode_DecodeUTF8;
return;
}
if (0 == strcmp(self->pyenc, "iso8859_1")) {
if (0 == strcmp(self->encoding, "LATIN1")) {
Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1");
self->cdecoder = PyUnicode_DecodeLatin1;
return;
@ -386,12 +408,45 @@ conn_set_fast_codec(connectionObject *self)
}
/* Return the Python encoding from a PostgreSQL encoding.
*
* Optionally return the clean version of the postgres encoding too
*/
PyObject *
conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding)
{
char *pgenc = NULL;
PyObject *rv = NULL;
if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) {
PyErr_Format(OperationalError,
"no Python encoding for PostgreSQL encoding '%s'", pgenc);
goto exit;
}
Py_INCREF(rv);
if (clean_encoding) {
*clean_encoding = pgenc;
}
else {
PyMem_Free(pgenc);
}
exit:
return rv;
}
/* Convert a Postgres encoding into Python encoding and decoding functions.
*
* Set clean_encoding to a clean version of the Postgres encoding name
* and pyenc and pydec to python codec functions.
*
* Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
conn_get_python_codec(const char *encoding,
char **clean_encoding, PyObject **pyenc, PyObject **pydec)
{
int rv = -1;
char *pgenc = NULL;
@ -399,15 +454,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
PyObject *m = NULL, *f = NULL, *codec = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
/* Find the Py encoding name from the PG encoding */
if (!(encname = PyDict_GetItemString(psycoEncodings, pgenc))) {
PyErr_Format(OperationalError,
"no Python encoding for PostgreSQL encoding '%s'", pgenc);
goto exit;
}
Py_INCREF(encname);
if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; }
/* Look up the python codec */
if (!(m = PyImport_ImportModule("codecs"))) { goto exit; }
@ -419,6 +466,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec)
/* success */
*pyenc = enc_tmp; enc_tmp = NULL;
*pydec = dec_tmp; dec_tmp = NULL;
*clean_encoding = pgenc; pgenc = NULL;
rv = 0;
exit:
@ -440,20 +488,17 @@ exit:
* Return 0 on success, else -1 and set an exception.
*/
RAISES_NEG static int
conn_set_encoding(connectionObject *self, const char *encoding)
conn_store_encoding(connectionObject *self, const char *encoding)
{
int rv = -1;
char *pgenc = NULL, *pyenc = NULL;
char *pgenc = NULL;
PyObject *enc_tmp = NULL, *dec_tmp = NULL;
if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } /* TODO: drop */
if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) {
goto exit;
}
/* Look for this encoding in Python codecs. */
if (0 > conn_pgenc_to_pyenc(pgenc, &pyenc)) { goto exit; } /* TODO: drop */
if (0 > conn_get_python_codec(encoding, &enc_tmp, &dec_tmp)) { goto exit; }
/* Good, success: store the encoding/pyenc in the connection. */
/* Good, success: store the encoding/codec in the connection. */
{
char *tmp = self->encoding;
self->encoding = pgenc;
@ -461,14 +506,6 @@ conn_set_encoding(connectionObject *self, const char *encoding)
pgenc = NULL;
}
{
/* TODO: drop */
char *tmp = self->pyenc;
self->pyenc = pyenc;
PyMem_Free(tmp);
pyenc = NULL;
}
Py_CLEAR(self->pyencoder);
self->pyencoder = enc_tmp;
enc_tmp = NULL;
@ -485,7 +522,6 @@ exit:
Py_XDECREF(enc_tmp);
Py_XDECREF(dec_tmp);
PyMem_Free(pgenc);
PyMem_Free(pyenc);
return rv;
}
@ -508,7 +544,7 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
goto exit;
}
if (0 > conn_set_encoding(self, encoding)) {
if (0 > conn_store_encoding(self, encoding)) {
goto exit;
}
@ -1338,16 +1374,14 @@ conn_set_client_encoding(connectionObject *self, const char *pgenc)
PGresult *pgres = NULL;
char *error = NULL;
int res = -1;
char *pyenc = NULL;
char *clean_enc = NULL;
/* If the current encoding is equal to the requested one we don't
issue any query to the backend */
if (strcmp(self->encoding, pgenc) == 0) return 0;
/* We must know what python encoding this encoding is. */
if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; }
if (0 > conn_pgenc_to_pyenc(clean_enc, &pyenc)) { goto exit; }
/* If the current encoding is equal to the requested one we don't
issue any query to the backend */
if (strcmp(self->encoding, clean_enc) == 0) return 0;
Py_BEGIN_ALLOW_THREADS;
pthread_mutex_lock(&self->lock);
@ -1372,14 +1406,12 @@ endlock:
goto exit;
}
res = conn_set_encoding(self, pgenc);
res = conn_store_encoding(self, pgenc);
Dprintf("conn_set_client_encoding: set encoding to %s (Python: %s)",
self->encoding, self->pyenc);
Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding);
exit:
PyMem_Free(clean_enc);
PyMem_Free(pyenc);
return res;
}

View File

@ -1167,7 +1167,6 @@ connection_dealloc(PyObject* obj)
PyMem_Free(self->dsn);
PyMem_Free(self->encoding);
PyMem_Free(self->pyenc);
if (self->critical) free(self->critical);
if (self->cancel) PQfreeCancel(self->cancel);

View File

@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic(
Py_INCREF(sql);
}
else if (PyUnicode_Check(sql)) {
char *enc = self->conn->pyenc;
sql = PyUnicode_AsEncodedString(sql, enc, NULL);
/* if there was an error during the encoding from unicode to the
target encoding, we just let the exception propagate */
if (sql == NULL) { goto fail; }
if (!(sql = conn_encode(self->conn, sql))) { goto fail; }
}
else {
/* the is not unicode or string, raise an error */

View File

@ -34,7 +34,7 @@ typedef struct {
PyObject *pgerror;
PyObject *pgcode;
cursorObject *cursor;
char *pyenc;
PyObject *pydecoder;
PGresult *pgres;
} errorObject;

View File

@ -34,17 +34,7 @@
PyObject *
error_text_from_chars(errorObject *self, const char *str)
{
if (str == NULL) {
Py_INCREF(Py_None);
return (Py_None);
}
#if PY_MAJOR_VERSION < 3
return PyString_FromString(str);
#else
return PyUnicode_Decode(str, strlen(str),
self->pyenc ? self->pyenc : "ascii", "replace");
#endif
return psycopg_text_from_chars_safe(str, -1, self->pydecoder);
}
@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg)
Py_VISIT(self->pgerror);
Py_VISIT(self->pgcode);
Py_VISIT(self->cursor);
Py_VISIT(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_traverse(
(PyObject *)self, visit, arg);
@ -104,6 +95,7 @@ error_clear(errorObject *self)
Py_CLEAR(self->pgerror);
Py_CLEAR(self->pgcode);
Py_CLEAR(self->cursor);
Py_CLEAR(self->pydecoder);
return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self);
}
@ -113,7 +105,6 @@ error_dealloc(errorObject *self)
{
PyObject_GC_UnTrack((PyObject *)self);
error_clear(self);
PyMem_Free(self->pyenc);
CLEARPGRES(self->pgres);
Py_TYPE(self)->tp_free((PyObject *)self);

View File

@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args)
data = obj;
}
else if (PyUnicode_Check(obj)) {
if (!(data = PyUnicode_AsEncodedString(obj, self->conn->pyenc, NULL))) {
goto exit;
}
if (!(data = conn_encode(self->conn, obj))) { goto exit; }
}
else {
PyErr_Format(PyExc_TypeError,
@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args)
if (self->mode & LOBJECT_BINARY) {
res = Bytes_FromStringAndSize(buffer, size);
} else {
res = PyUnicode_Decode(buffer, size, self->conn->pyenc, NULL);
res = conn_decode(self->conn, buffer, size);
}
PyMem_Free(buffer);

View File

@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn)
/* Convert to bytes. */
if (res && PyUnicode_CheckExact(res)) {
PyObject *b;
const char *pyenc;
pyenc = (conn && conn->pyenc) ? conn->pyenc : "utf8";
b = PyUnicode_AsEncodedString(res, pyenc, NULL);
b = conn_encode(conn, res);
Py_DECREF(res);
res = b;
}

View File

@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
const char *err2 = NULL;
const char *code = NULL;
PyObject *pyerr = NULL;
PyObject *pgerror = NULL, *pgcode = NULL;
if (conn == NULL) {
PyErr_SetString(DatabaseError,
@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
err2 = strip_severity(err);
Dprintf("pq_raise: err2=%s", err2);
/* decode now the details of the error, because after psyco_set_error
* decoding will fail.
*/
if (!(pgerror = conn_text_from_chars(conn, err))) {
/* we can't really handle an exception while handling this error
* so just print it. */
PyErr_Print();
PyErr_Clear();
}
if (!(pgcode = conn_text_from_chars(conn, code))) {
PyErr_Print();
PyErr_Clear();
}
pyerr = psyco_set_error(exc, curs, err2);
if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) {
errorObject *perr = (errorObject *)pyerr;
PyMem_Free(perr->pyenc);
psycopg_strdup(&perr->pyenc, conn->pyenc, -1);
Py_CLEAR(perr->pydecoder);
Py_XINCREF(conn->pydecoder);
perr->pydecoder = conn->pydecoder;
Py_CLEAR(perr->pgerror);
perr->pgerror = error_text_from_chars(perr, err);
perr->pgerror = pgerror;
pgerror = NULL;
Py_CLEAR(perr->pgcode);
perr->pgcode = error_text_from_chars(perr, code);
perr->pgcode = pgcode;
pgcode = NULL;
CLEARPGRES(perr->pgres);
if (pgres && *pgres) {
@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
*pgres = NULL;
}
}
Py_XDECREF(pgerror);
Py_XDECREF(pgcode);
}
/* pq_set_critical, pq_resolve_critical - manage critical errors
@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs)
/* a file may return unicode if implements io.TextIOBase */
if (PyUnicode_Check(o)) {
PyObject *tmp;
Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->pyenc);
if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->pyenc, NULL))) {
if (!(tmp = conn_encode(curs->conn, o))) {
Dprintf("_pq_copy_in_v3: encoding() failed");
error = 1;
break;
@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs)
if (len > 0 && buffer) {
if (is_text) {
obj = PyUnicode_Decode(buffer, len, curs->conn->pyenc, NULL);
obj = conn_decode(curs->conn, buffer, len);
} else {
obj = Bytes_FromStringAndSize(buffer, len);
}
@ -1638,7 +1659,7 @@ retry:
Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr);
if (repl->decode) {
str = PyUnicode_Decode(buffer + hdr, data_size, conn->pyenc, NULL);
str = conn_decode(conn, buffer + hdr, data_size);
} else {
str = Bytes_FromStringAndSize(buffer + hdr, data_size);
}

View File

@ -132,6 +132,8 @@ HIDDEN char *psycopg_escape_identifier(connectionObject *conn,
const char *str, Py_ssize_t len);
HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len);
HIDDEN int psycopg_is_text_file(PyObject *f);
HIDDEN PyObject *psycopg_text_from_chars_safe(
const char *str, Py_ssize_t len, PyObject *decoder);
STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj);

View File

@ -671,8 +671,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs)
#if PY_MAJOR_VERSION < 3
s = PyString_FromStringAndSize(str, len);
#else
s = PyUnicode_Decode(str, len,
((cursorObject *)curs)->conn->pyenc, NULL);
s = conn_decode(((cursorObject *)curs)->conn, str, len);
#endif
}
else {

View File

@ -98,12 +98,7 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
if (s == NULL) { Py_RETURN_NONE; }
conn = ((cursorObject*)curs)->conn;
if (conn->cdecoder) {
return conn->cdecoder(s, len, NULL);
}
else {
return PyUnicode_Decode(s, len, conn->pyenc, NULL);
}
return conn_decode(conn, s, len);
}
/** BOOLEAN - cast boolean value into right python object **/

View File

@ -278,3 +278,57 @@ exit:
return res;
}
/* Convert a C string into Python Text using a specified codec.
*
* The codec is the python function codec.getdecoder(enc). It is only used on
* Python 3 to return unicode: in Py2 the function returns a string.
*
* len is optional: use -1 to have it calculated by the function.
*/
PyObject *
psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder)
{
#if PY_MAJOR_VERSION < 3
if (!str) { Py_RETURN_NONE; }
if (len < 0) { len = strlen(str); }
return PyString_FromStringAndSize(str, len);
#else
static PyObject *replace = NULL;
PyObject *rv = NULL;
PyObject *b = NULL;
PyObject *t = NULL;
if (!str) { Py_RETURN_NONE; }
if (len < 0) { len = strlen(str); }
if (decoder) {
if (!replace) {
if (!(replace = PyUnicode_FromString("replace"))) { goto exit; }
}
if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; }
if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) {
goto exit;
}
if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
Py_INCREF(rv);
}
else {
rv = PyUnicode_DecodeASCII(str, len, "replace");
}
exit:
Py_XDECREF(t);
Py_XDECREF(b);
return rv;
#endif
}