From 3295beb7774098659a40649d65e84f7ae9a4838e Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Mon, 26 Dec 2016 19:47:48 +0100 Subject: [PATCH] Don't look up for Python encoding Store the encode/decode functions for the right codec in the connection. The Python encoding name has been dropped of the connection to avoid the temptation to use it... --- psycopg/adapter_qstring.c | 39 +++---- psycopg/connection.h | 5 +- psycopg/connection_int.c | 210 ++++++++++++++++++++++---------------- psycopg/connection_type.c | 1 - psycopg/cursor_type.c | 6 +- psycopg/error.h | 2 +- psycopg/error_type.c | 15 +-- psycopg/lobject_type.c | 6 +- psycopg/microprotocols.c | 4 +- psycopg/pqpath.c | 37 +++++-- psycopg/psycopg.h | 2 + psycopg/typecast.c | 3 +- psycopg/typecast_basic.c | 7 +- psycopg/utils.c | 54 ++++++++++ 14 files changed, 231 insertions(+), 160 deletions(-) diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c index febb49ac..73579c57 100644 --- a/psycopg/adapter_qstring.c +++ b/psycopg/adapter_qstring.c @@ -36,20 +36,6 @@ static const char *default_encoding = "latin1"; /* qstring_quote - do the quote process on plain and unicode strings */ -const char * -_qstring_get_encoding(qstringObject *self) -{ - /* if the wrapped object is an unicode object we can encode it to match - conn->encoding but if the encoding is not specified we don't know what - to do and we raise an exception */ - if (self->conn) { - return self->conn->pyenc; - } - else { - return self->encoding ? self->encoding : default_encoding; - } -} - static PyObject * qstring_quote(qstringObject *self) { @@ -59,19 +45,15 @@ qstring_quote(qstringObject *self) const char *encoding; PyObject *rv = NULL; - encoding = _qstring_get_encoding(self); - Dprintf("qstring_quote: encoding to %s", encoding); - if (PyUnicode_Check(self->wrapped)) { - if (encoding) { - str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL); - Dprintf("qstring_quote: got encoded object at %p", str); - if (str == NULL) goto exit; + if (self->conn) { + if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; } } else { - PyErr_SetString(PyExc_TypeError, - "missing encoding to encode unicode object"); - goto exit; + encoding = self->encoding ? self->encoding : default_encoding; + if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) { + goto exit; + } } } @@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args) static PyObject * qstring_get_encoding(qstringObject *self) { - const char *encoding; - encoding = _qstring_get_encoding(self); - return Text_FromUTF8(encoding); + if (self->conn) { + return conn_pgenc_to_pyenc(self->conn->encoding, NULL); + } + else { + return Text_FromUTF8(self->encoding ? self->encoding : default_encoding); + } } static int diff --git a/psycopg/connection.h b/psycopg/connection.h index 32b34fa2..6c5a5f6d 100644 --- a/psycopg/connection.h +++ b/psycopg/connection.h @@ -83,8 +83,6 @@ struct connectionObject { char *dsn; /* data source name */ char *critical; /* critical error on this connection */ char *encoding; /* current backend encoding */ - /* TODO: drop */ - char *pyenc; /* connection encoding python name */ long int closed; /* 1 means connection has been closed; 2 that something horrible happened */ @@ -139,7 +137,10 @@ typedef struct { /* C-callable functions in connection_int.c and connection_ext.c */ HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str); +HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b); +HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len); HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn); +HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding); RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self); HIDDEN int conn_get_protocol_version(PGconn *pgconn); HIDDEN int conn_get_server_version(PGconn *pgconn); diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c index 83b706ba..38688d30 100644 --- a/psycopg/connection_int.c +++ b/psycopg/connection_int.c @@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = { PyObject * conn_text_from_chars(connectionObject *self, const char *str) { -#if PY_MAJOR_VERSION < 3 - return PyString_FromString(str); -#else - const char *pyenc = self ? self->pyenc : "ascii"; - return PyUnicode_Decode(str, strlen(str), pyenc, "replace"); -#endif + return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL); +} + + +/* Encode an unicode object into a bytes object in the connection encoding. + * + * If no connection or encoding is available, default to utf8 + */ +PyObject * +conn_encode(connectionObject *self, PyObject *u) +{ + PyObject *t = NULL; + PyObject *rv = NULL; + + if (!(self && self->pyencoder)) { + rv = PyUnicode_AsUTF8String(u); + goto exit; + } + + if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) { + goto exit; + } + + if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; } + Py_INCREF(rv); + +exit: + Py_XDECREF(t); + + return rv; +} + + +/* decode a c string into a Python unicode in the connection encoding + * + * len can be < 0: in this case it will be calculated + * + * If no connection or encoding is available, default to utf8 + */ +PyObject * +conn_decode(connectionObject *self, const char *str, Py_ssize_t len) +{ + PyObject *b = NULL; + PyObject *t = NULL; + PyObject *rv = NULL; + + if (len < 0) { len = strlen(str); } + + if (self) { + if (self->cdecoder) { + return self->cdecoder(str, len, NULL); + } + else if (self->pydecoder) { + if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; } + if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) { + goto exit; + } + rv = PyTuple_GetItem(t, 0); + Py_XINCREF(rv); + } + } + else { + return PyUnicode_FromStringAndSize(str, len); + } + +exit: + Py_XDECREF(t); + Py_XDECREF(b); + return rv; } /* conn_notice_callback - process notices */ @@ -321,61 +384,20 @@ exit: return rv; } -/* Convert a PostgreSQL encoding name to a Python encoding name. - * - * Set 'pyenc' to a new copy of the encoding name allocated on the Python heap. - * Return 0 in case of success, else -1 and set an exception. - * - * 'pgenc' should be already normalized (uppercase, no - or _). - */ -RAISES_NEG static int -conn_pgenc_to_pyenc(const char *pgenc, char **pyenc) -{ - char *tmp; - Py_ssize_t size; - PyObject *opyenc = NULL; - int rv = -1; - - /* Find the Py encoding name from the PG encoding */ - if (!(opyenc = PyDict_GetItemString(psycoEncodings, pgenc))) { - PyErr_Format(OperationalError, - "no Python encoding for PostgreSQL encoding '%s'", pgenc); - goto exit; - } - - /* Convert the encoding in a bytes string to extract the c string. */ - Py_INCREF(opyenc); - if (!(opyenc = psycopg_ensure_bytes(opyenc))) { - goto exit; - } - - if (-1 == Bytes_AsStringAndSize(opyenc, &tmp, &size)) { - goto exit; - } - - /* have our own copy of the python encoding name */ - rv = psycopg_strdup(pyenc, tmp, size); - -exit: - Py_XDECREF(opyenc); - return rv; -} - - /* set fast access functions according to the currently selected encoding */ static void conn_set_fast_codec(connectionObject *self) { - Dprintf("conn_set_fast_codec: encoding=%s", self->pyenc); + Dprintf("conn_set_fast_codec: encoding=%s", self->encoding); - if (0 == strcmp(self->pyenc, "utf_8")) { + if (0 == strcmp(self->encoding, "UTF8")) { Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8"); self->cdecoder = PyUnicode_DecodeUTF8; return; } - if (0 == strcmp(self->pyenc, "iso8859_1")) { + if (0 == strcmp(self->encoding, "LATIN1")) { Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1"); self->cdecoder = PyUnicode_DecodeLatin1; return; @@ -386,12 +408,45 @@ conn_set_fast_codec(connectionObject *self) } +/* Return the Python encoding from a PostgreSQL encoding. + * + * Optionally return the clean version of the postgres encoding too + */ +PyObject * +conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding) +{ + char *pgenc = NULL; + PyObject *rv = NULL; + + if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } + if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) { + PyErr_Format(OperationalError, + "no Python encoding for PostgreSQL encoding '%s'", pgenc); + goto exit; + } + Py_INCREF(rv); + + if (clean_encoding) { + *clean_encoding = pgenc; + } + else { + PyMem_Free(pgenc); + } + +exit: + return rv; +} + /* Convert a Postgres encoding into Python encoding and decoding functions. + * + * Set clean_encoding to a clean version of the Postgres encoding name + * and pyenc and pydec to python codec functions. * * Return 0 on success, else -1 and set an exception. */ RAISES_NEG static int -conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec) +conn_get_python_codec(const char *encoding, + char **clean_encoding, PyObject **pyenc, PyObject **pydec) { int rv = -1; char *pgenc = NULL; @@ -399,15 +454,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec) PyObject *m = NULL, *f = NULL, *codec = NULL; PyObject *enc_tmp = NULL, *dec_tmp = NULL; - if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } - - /* Find the Py encoding name from the PG encoding */ - if (!(encname = PyDict_GetItemString(psycoEncodings, pgenc))) { - PyErr_Format(OperationalError, - "no Python encoding for PostgreSQL encoding '%s'", pgenc); - goto exit; - } - Py_INCREF(encname); + if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; } /* Look up the python codec */ if (!(m = PyImport_ImportModule("codecs"))) { goto exit; } @@ -419,6 +466,7 @@ conn_get_python_codec(const char *encoding, PyObject **pyenc, PyObject **pydec) /* success */ *pyenc = enc_tmp; enc_tmp = NULL; *pydec = dec_tmp; dec_tmp = NULL; + *clean_encoding = pgenc; pgenc = NULL; rv = 0; exit: @@ -440,20 +488,17 @@ exit: * Return 0 on success, else -1 and set an exception. */ RAISES_NEG static int -conn_set_encoding(connectionObject *self, const char *encoding) +conn_store_encoding(connectionObject *self, const char *encoding) { int rv = -1; - char *pgenc = NULL, *pyenc = NULL; + char *pgenc = NULL; PyObject *enc_tmp = NULL, *dec_tmp = NULL; - if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } /* TODO: drop */ + if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) { + goto exit; + } - /* Look for this encoding in Python codecs. */ - if (0 > conn_pgenc_to_pyenc(pgenc, &pyenc)) { goto exit; } /* TODO: drop */ - - if (0 > conn_get_python_codec(encoding, &enc_tmp, &dec_tmp)) { goto exit; } - - /* Good, success: store the encoding/pyenc in the connection. */ + /* Good, success: store the encoding/codec in the connection. */ { char *tmp = self->encoding; self->encoding = pgenc; @@ -461,14 +506,6 @@ conn_set_encoding(connectionObject *self, const char *encoding) pgenc = NULL; } - { - /* TODO: drop */ - char *tmp = self->pyenc; - self->pyenc = pyenc; - PyMem_Free(tmp); - pyenc = NULL; - } - Py_CLEAR(self->pyencoder); self->pyencoder = enc_tmp; enc_tmp = NULL; @@ -485,7 +522,6 @@ exit: Py_XDECREF(enc_tmp); Py_XDECREF(dec_tmp); PyMem_Free(pgenc); - PyMem_Free(pyenc); return rv; } @@ -508,7 +544,7 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn) goto exit; } - if (0 > conn_set_encoding(self, encoding)) { + if (0 > conn_store_encoding(self, encoding)) { goto exit; } @@ -1338,16 +1374,14 @@ conn_set_client_encoding(connectionObject *self, const char *pgenc) PGresult *pgres = NULL; char *error = NULL; int res = -1; - char *pyenc = NULL; char *clean_enc = NULL; - /* If the current encoding is equal to the requested one we don't - issue any query to the backend */ - if (strcmp(self->encoding, pgenc) == 0) return 0; - /* We must know what python encoding this encoding is. */ if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; } - if (0 > conn_pgenc_to_pyenc(clean_enc, &pyenc)) { goto exit; } + + /* If the current encoding is equal to the requested one we don't + issue any query to the backend */ + if (strcmp(self->encoding, clean_enc) == 0) return 0; Py_BEGIN_ALLOW_THREADS; pthread_mutex_lock(&self->lock); @@ -1372,14 +1406,12 @@ endlock: goto exit; } - res = conn_set_encoding(self, pgenc); + res = conn_store_encoding(self, pgenc); - Dprintf("conn_set_client_encoding: set encoding to %s (Python: %s)", - self->encoding, self->pyenc); + Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding); exit: PyMem_Free(clean_enc); - PyMem_Free(pyenc); return res; } diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c index ba4e4335..7401bc14 100644 --- a/psycopg/connection_type.c +++ b/psycopg/connection_type.c @@ -1167,7 +1167,6 @@ connection_dealloc(PyObject* obj) PyMem_Free(self->dsn); PyMem_Free(self->encoding); - PyMem_Free(self->pyenc); if (self->critical) free(self->critical); if (self->cancel) PQfreeCancel(self->cancel); diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index c580daa2..a7303c68 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic( Py_INCREF(sql); } else if (PyUnicode_Check(sql)) { - char *enc = self->conn->pyenc; - sql = PyUnicode_AsEncodedString(sql, enc, NULL); - /* if there was an error during the encoding from unicode to the - target encoding, we just let the exception propagate */ - if (sql == NULL) { goto fail; } + if (!(sql = conn_encode(self->conn, sql))) { goto fail; } } else { /* the is not unicode or string, raise an error */ diff --git a/psycopg/error.h b/psycopg/error.h index 8bc4df5e..275a7ce7 100644 --- a/psycopg/error.h +++ b/psycopg/error.h @@ -34,7 +34,7 @@ typedef struct { PyObject *pgerror; PyObject *pgcode; cursorObject *cursor; - char *pyenc; + PyObject *pydecoder; PGresult *pgres; } errorObject; diff --git a/psycopg/error_type.c b/psycopg/error_type.c index 40b71aa6..4ab21915 100644 --- a/psycopg/error_type.c +++ b/psycopg/error_type.c @@ -34,17 +34,7 @@ PyObject * error_text_from_chars(errorObject *self, const char *str) { - if (str == NULL) { - Py_INCREF(Py_None); - return (Py_None); - } - -#if PY_MAJOR_VERSION < 3 - return PyString_FromString(str); -#else - return PyUnicode_Decode(str, strlen(str), - self->pyenc ? self->pyenc : "ascii", "replace"); -#endif + return psycopg_text_from_chars_safe(str, -1, self->pydecoder); } @@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg) Py_VISIT(self->pgerror); Py_VISIT(self->pgcode); Py_VISIT(self->cursor); + Py_VISIT(self->pydecoder); return ((PyTypeObject *)PyExc_StandardError)->tp_traverse( (PyObject *)self, visit, arg); @@ -104,6 +95,7 @@ error_clear(errorObject *self) Py_CLEAR(self->pgerror); Py_CLEAR(self->pgcode); Py_CLEAR(self->cursor); + Py_CLEAR(self->pydecoder); return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self); } @@ -113,7 +105,6 @@ error_dealloc(errorObject *self) { PyObject_GC_UnTrack((PyObject *)self); error_clear(self); - PyMem_Free(self->pyenc); CLEARPGRES(self->pgres); Py_TYPE(self)->tp_free((PyObject *)self); diff --git a/psycopg/lobject_type.c b/psycopg/lobject_type.c index 61c92324..54f3a4be 100644 --- a/psycopg/lobject_type.c +++ b/psycopg/lobject_type.c @@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args) data = obj; } else if (PyUnicode_Check(obj)) { - if (!(data = PyUnicode_AsEncodedString(obj, self->conn->pyenc, NULL))) { - goto exit; - } + if (!(data = conn_encode(self->conn, obj))) { goto exit; } } else { PyErr_Format(PyExc_TypeError, @@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args) if (self->mode & LOBJECT_BINARY) { res = Bytes_FromStringAndSize(buffer, size); } else { - res = PyUnicode_Decode(buffer, size, self->conn->pyenc, NULL); + res = conn_decode(self->conn, buffer, size); } PyMem_Free(buffer); diff --git a/psycopg/microprotocols.c b/psycopg/microprotocols.c index 7bd33745..3ddcc485 100644 --- a/psycopg/microprotocols.c +++ b/psycopg/microprotocols.c @@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn) /* Convert to bytes. */ if (res && PyUnicode_CheckExact(res)) { PyObject *b; - const char *pyenc; - pyenc = (conn && conn->pyenc) ? conn->pyenc : "utf8"; - b = PyUnicode_AsEncodedString(res, pyenc, NULL); + b = conn_encode(conn, res); Py_DECREF(res); res = b; } diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index c8d9c46b..328a2b26 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) const char *err2 = NULL; const char *code = NULL; PyObject *pyerr = NULL; + PyObject *pgerror = NULL, *pgcode = NULL; if (conn == NULL) { PyErr_SetString(DatabaseError, @@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) err2 = strip_severity(err); Dprintf("pq_raise: err2=%s", err2); + /* decode now the details of the error, because after psyco_set_error + * decoding will fail. + */ + if (!(pgerror = conn_text_from_chars(conn, err))) { + /* we can't really handle an exception while handling this error + * so just print it. */ + PyErr_Print(); + PyErr_Clear(); + } + + if (!(pgcode = conn_text_from_chars(conn, code))) { + PyErr_Print(); + PyErr_Clear(); + } + pyerr = psyco_set_error(exc, curs, err2); if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) { errorObject *perr = (errorObject *)pyerr; - PyMem_Free(perr->pyenc); - psycopg_strdup(&perr->pyenc, conn->pyenc, -1); + Py_CLEAR(perr->pydecoder); + Py_XINCREF(conn->pydecoder); + perr->pydecoder = conn->pydecoder; Py_CLEAR(perr->pgerror); - perr->pgerror = error_text_from_chars(perr, err); + perr->pgerror = pgerror; + pgerror = NULL; Py_CLEAR(perr->pgcode); - perr->pgcode = error_text_from_chars(perr, code); + perr->pgcode = pgcode; + pgcode = NULL; CLEARPGRES(perr->pgres); if (pgres && *pgres) { @@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) *pgres = NULL; } } + + Py_XDECREF(pgerror); + Py_XDECREF(pgcode); } /* pq_set_critical, pq_resolve_critical - manage critical errors @@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs) /* a file may return unicode if implements io.TextIOBase */ if (PyUnicode_Check(o)) { PyObject *tmp; - Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->pyenc); - if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->pyenc, NULL))) { + if (!(tmp = conn_encode(curs->conn, o))) { Dprintf("_pq_copy_in_v3: encoding() failed"); error = 1; break; @@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs) if (len > 0 && buffer) { if (is_text) { - obj = PyUnicode_Decode(buffer, len, curs->conn->pyenc, NULL); + obj = conn_decode(curs->conn, buffer, len); } else { obj = Bytes_FromStringAndSize(buffer, len); } @@ -1638,7 +1659,7 @@ retry: Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr); if (repl->decode) { - str = PyUnicode_Decode(buffer + hdr, data_size, conn->pyenc, NULL); + str = conn_decode(conn, buffer + hdr, data_size); } else { str = Bytes_FromStringAndSize(buffer + hdr, data_size); } diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index fc5b533e..13673540 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -132,6 +132,8 @@ HIDDEN char *psycopg_escape_identifier(connectionObject *conn, const char *str, Py_ssize_t len); HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len); HIDDEN int psycopg_is_text_file(PyObject *f); +HIDDEN PyObject *psycopg_text_from_chars_safe( + const char *str, Py_ssize_t len, PyObject *decoder); STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj); diff --git a/psycopg/typecast.c b/psycopg/typecast.c index d83c390b..214d3f04 100644 --- a/psycopg/typecast.c +++ b/psycopg/typecast.c @@ -671,8 +671,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs) #if PY_MAJOR_VERSION < 3 s = PyString_FromStringAndSize(str, len); #else - s = PyUnicode_Decode(str, len, - ((cursorObject *)curs)->conn->pyenc, NULL); + s = conn_decode(((cursorObject *)curs)->conn, str, len); #endif } else { diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c index d55820c2..db6c5a93 100644 --- a/psycopg/typecast_basic.c +++ b/psycopg/typecast_basic.c @@ -98,12 +98,7 @@ typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) if (s == NULL) { Py_RETURN_NONE; } conn = ((cursorObject*)curs)->conn; - if (conn->cdecoder) { - return conn->cdecoder(s, len, NULL); - } - else { - return PyUnicode_Decode(s, len, conn->pyenc, NULL); - } + return conn_decode(conn, s, len); } /** BOOLEAN - cast boolean value into right python object **/ diff --git a/psycopg/utils.c b/psycopg/utils.c index 85ca9d6c..7f6b6e6e 100644 --- a/psycopg/utils.c +++ b/psycopg/utils.c @@ -278,3 +278,57 @@ exit: return res; } + + +/* Convert a C string into Python Text using a specified codec. + * + * The codec is the python function codec.getdecoder(enc). It is only used on + * Python 3 to return unicode: in Py2 the function returns a string. + * + * len is optional: use -1 to have it calculated by the function. + */ +PyObject * +psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder) +{ +#if PY_MAJOR_VERSION < 3 + + if (!str) { Py_RETURN_NONE; } + + if (len < 0) { len = strlen(str); } + + return PyString_FromStringAndSize(str, len); + +#else + + static PyObject *replace = NULL; + PyObject *rv = NULL; + PyObject *b = NULL; + PyObject *t = NULL; + + if (!str) { Py_RETURN_NONE; } + + if (len < 0) { len = strlen(str); } + + if (decoder) { + if (!replace) { + if (!(replace = PyUnicode_FromString("replace"))) { goto exit; } + } + if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; } + if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) { + goto exit; + } + + if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; } + Py_INCREF(rv); + } + else { + rv = PyUnicode_DecodeASCII(str, len, "replace"); + } + +exit: + Py_XDECREF(t); + Py_XDECREF(b); + return rv; + +#endif +}