Merge branch 'fast-codecs'

2026-01-10 18:00:48 +03:00 · 2016-12-29 22:15:46 +01:00 · 2016-12-29 22:15:46 +01:00 · 449bd4485f
commit 449bd4485f
parent a81f12f9bd f3e47a72ed
18 changed files with 361 additions and 179 deletions
--- a/2
+++ b/2
@ -25,6 +25,8 @@ New features:
 - Added `~psycopg2.extensions.quote_ident()` function (:ticket:`#359`).
 - Added `~connection.get_dsn_parameters()` connection method (:ticket:`#364`).
 - `~cursor.callproc()` now accepts a dictionary of parameters (:ticket:`#381`).
+- Using Python C API decoding functions and codecs caching for faster
+  unicode encoding/decoding (:ticket:`#473`).

 Other changes:

--- a/doc/src/extensions.rst
+++ b/doc/src/extensions.rst
@ -417,7 +417,7 @@ details.

 .. data:: encodings

-    Mapping from `PostgreSQL encoding`__ names to `Python codec`__ names.
+    Mapping from `PostgreSQL encoding`__ to `Python encoding`__ names.
    Used by Psycopg when adapting or casting unicode strings. See
    :ref:`unicode-handling`.

--- a/doc/src/usage.rst
+++ b/doc/src/usage.rst
@ -355,7 +355,7 @@ Unicode handling
 Psycopg can exchange Unicode data with a PostgreSQL database.  Python
 `!unicode` objects are automatically *encoded* in the client encoding
 defined on the database connection (the `PostgreSQL encoding`__, available in
-`connection.encoding`, is translated into a `Python codec`__ using the
+`connection.encoding`, is translated into a `Python encoding`__ using the
 `~psycopg2.extensions.encodings` mapping)::

    >>> print u, type(u)
--- a/psycopg/adapter_qstring.c
+++ b/psycopg/adapter_qstring.c
@ -36,20 +36,6 @@ static const char *default_encoding = "latin1";

 /* qstring_quote - do the quote process on plain and unicode strings */

-const char *
-_qstring_get_encoding(qstringObject *self)
-{
-    /* if the wrapped object is an unicode object we can encode it to match
-       conn->encoding but if the encoding is not specified we don't know what
-       to do and we raise an exception */
-    if (self->conn) {
-        return self->conn->codec;
-    }
-    else {
-        return self->encoding ? self->encoding : default_encoding;
-    }
-}
-
 static PyObject *
 qstring_quote(qstringObject *self)
 {
@ -59,19 +45,15 @@ qstring_quote(qstringObject *self)
    const char *encoding;
    PyObject *rv = NULL;

-    encoding = _qstring_get_encoding(self);
-    Dprintf("qstring_quote: encoding to %s", encoding);
-
    if (PyUnicode_Check(self->wrapped)) {
-        if (encoding) {
-            str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL);
-            Dprintf("qstring_quote: got encoded object at %p", str);
-            if (str == NULL) goto exit;
+        if (self->conn) {
+            if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; }
        }
        else {
-            PyErr_SetString(PyExc_TypeError,
-                "missing encoding to encode unicode object");
-            goto exit;
+            encoding = self->encoding ? self->encoding : default_encoding;
+            if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) {
+                goto exit;
+            }
        }
    }

@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args)
 static PyObject *
 qstring_get_encoding(qstringObject *self)
 {
-    const char *encoding;
-    encoding = _qstring_get_encoding(self);
-    return Text_FromUTF8(encoding);
+    if (self->conn) {
+        return conn_pgenc_to_pyenc(self->conn->encoding, NULL);
+    }
+    else {
+        return Text_FromUTF8(self->encoding ? self->encoding : default_encoding);
+    }
 }

 static int
@ -178,7 +163,7 @@ qstring_set_encoding(qstringObject *self, PyObject *pyenc)
    Py_INCREF(pyenc);
    if (!(pyenc = psycopg_ensure_bytes(pyenc))) { goto exit; }
    if (!(tmp = Bytes_AsString(pyenc))) { goto exit; }
-    if (0 > psycopg_strdup(&cenc, tmp, 0)) { goto exit; }
+    if (0 > psycopg_strdup(&cenc, tmp, -1)) { goto exit; }

    Dprintf("qstring_set_encoding: encoding set to %s", cenc);
    PyMem_Free((void *)self->encoding);
--- a/psycopg/connection.h
+++ b/psycopg/connection.h
@ -83,7 +83,6 @@ struct connectionObject {
    char *dsn;              /* data source name */
    char *critical;         /* critical error on this connection */
    char *encoding;         /* current backend encoding */
-    char *codec;            /* python codec name for encoding */

    long int closed;          /* 1 means connection has been closed;
                                 2 that something horrible happened */
@ -122,6 +121,14 @@ struct connectionObject {
    int autocommit;

    PyObject *cursor_factory;    /* default cursor factory from cursor() */
+
+    /* Optional pointer to a decoding C function, e.g. PyUnicode_DecodeUTF8 */
+    PyObject *(*cdecoder)(const char *, Py_ssize_t, const char *);
+
+    /* Pointers to python encoding/decoding functions, e.g.
+     * codecs.getdecoder('utf8') */
+    PyObject *pyencoder;        /* python codec encoding function */
+    PyObject *pydecoder;        /* python codec decoding function */
 };

 /* map isolation level values into a numeric const */
@ -132,7 +139,10 @@ typedef struct {

 /* C-callable functions in connection_int.c and connection_ext.c */
 HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str);
+HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b);
+HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len);
 HIDDEN int  conn_get_standard_conforming_strings(PGconn *pgconn);
+HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding);
 RAISES_NEG HIDDEN int  conn_get_isolation_level(connectionObject *self);
 HIDDEN int  conn_get_protocol_version(PGconn *pgconn);
 HIDDEN int  conn_get_server_version(PGconn *pgconn);
--- a/psycopg/connection_int.c
+++ b/psycopg/connection_int.c
@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = {
 PyObject *
 conn_text_from_chars(connectionObject *self, const char *str)
 {
-#if PY_MAJOR_VERSION < 3
-        return PyString_FromString(str);
-#else
-        const char *codec = self ? self->codec : "ascii";
-        return PyUnicode_Decode(str, strlen(str), codec, "replace");
-#endif
+    return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL);
+}
+
+
+/* Encode an unicode object into a bytes object in the connection encoding.
+ *
+ * If no connection or encoding is available, default to utf8
+ */
+PyObject *
+conn_encode(connectionObject *self, PyObject *u)
+{
+    PyObject *t = NULL;
+    PyObject *rv = NULL;
+
+    if (!(self && self->pyencoder)) {
+        rv = PyUnicode_AsUTF8String(u);
+        goto exit;
+    }
+
+    if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) {
+        goto exit;
+    }
+
+    if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
+    Py_INCREF(rv);
+
+exit:
+    Py_XDECREF(t);
+
+    return rv;
+}
+
+
+/* decode a c string into a Python unicode in the connection encoding
+ *
+ * len can be < 0: in this case it will be calculated
+ *
+ * If no connection or encoding is available, default to utf8
+ */
+PyObject *
+conn_decode(connectionObject *self, const char *str, Py_ssize_t len)
+{
+    PyObject *b = NULL;
+    PyObject *t = NULL;
+    PyObject *rv = NULL;
+
+    if (len < 0) { len = strlen(str); }
+
+    if (self) {
+        if (self->cdecoder) {
+            return self->cdecoder(str, len, NULL);
+        }
+        else if (self->pydecoder) {
+            if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; }
+            if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) {
+                goto exit;
+            }
+            rv = PyTuple_GetItem(t, 0);
+            Py_XINCREF(rv);
+        }
+    }
+    else {
+        return PyUnicode_FromStringAndSize(str, len);
+    }
+
+exit:
+    Py_XDECREF(t);
+    Py_XDECREF(b);
+    return rv;
 }

 /* conn_notice_callback - process notices */
@ -321,92 +384,168 @@ exit:
    return rv;
 }

-/* Convert a PostgreSQL encoding to a Python codec.
- *
- * Set 'codec' to a new copy of the codec name allocated on the Python heap.
- * Return 0 in case of success, else -1 and set an exception.
- *
- * 'enc' should be already normalized (uppercase, no - or _).
+/* set fast access functions according to the currently selected encoding
 */
-RAISES_NEG static int
-conn_encoding_to_codec(const char *enc, char **codec)
+static void
+conn_set_fast_codec(connectionObject *self)
 {
-    char *tmp;
-    Py_ssize_t size;
-    PyObject *pyenc = NULL;
-    int rv = -1;
+    Dprintf("conn_set_fast_codec: encoding=%s", self->encoding);

-    /* Find the Py codec name from the PG encoding */
-    if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) {
+    if (0 == strcmp(self->encoding, "UTF8")) {
+        Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8");
+        self->cdecoder = PyUnicode_DecodeUTF8;
+        return;
+    }
+
+    if (0 == strcmp(self->encoding, "LATIN1")) {
+        Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1");
+        self->cdecoder = PyUnicode_DecodeLatin1;
+        return;
+    }
+
+    Dprintf("conn_set_fast_codec: no fast codec");
+    self->cdecoder = NULL;
+}
+
+
+/* Return the Python encoding from a PostgreSQL encoding.
+ *
+ * Optionally return the clean version of the postgres encoding too
+ */
+PyObject *
+conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding)
+{
+    char *pgenc = NULL;
+    PyObject *rv = NULL;
+
+    if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; }
+    if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) {
        PyErr_Format(OperationalError,
-            "no Python codec for client encoding '%s'", enc);
+            "no Python encoding for PostgreSQL encoding '%s'", pgenc);
        goto exit;
    }
+    Py_INCREF(rv);

-    /* Convert the codec in a bytes string to extract the c string. */
-    Py_INCREF(pyenc);
-    if (!(pyenc = psycopg_ensure_bytes(pyenc))) {
-        goto exit;
+    if (clean_encoding) {
+        *clean_encoding = pgenc;
    }
-
-    if (-1 == Bytes_AsStringAndSize(pyenc, &tmp, &size)) {
-        goto exit;
+    else {
+        PyMem_Free(pgenc);
    }

-    /* have our own copy of the python codec name */
-    rv = psycopg_strdup(codec, tmp, size);
-
 exit:
-    Py_XDECREF(pyenc);
    return rv;
 }

-/* Read the client encoding from the connection.
+/* Convert a Postgres encoding into Python encoding and decoding functions.
 *
- * Store the encoding in the pgconn->encoding field and the name of the
- * matching python codec in codec. The buffers are allocated on the Python
- * heap.
+ * Set clean_encoding to a clean version of the Postgres encoding name
+ * and pyenc and pydec to python codec functions.
 *
- * Return 0 on success, else nonzero.
+ * Return 0 on success, else -1 and set an exception.
+ */
+RAISES_NEG static int
+conn_get_python_codec(const char *encoding,
+    char **clean_encoding, PyObject **pyenc, PyObject **pydec)
+{
+    int rv = -1;
+    char *pgenc = NULL;
+    PyObject *encname = NULL;
+    PyObject *enc_tmp = NULL, *dec_tmp = NULL;
+
+    /* get the Python name of the encoding as a C string */
+    if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; }
+    if (!(encname = psycopg_ensure_bytes(encname))) { goto exit; }
+
+    /* Look up the codec functions */
+    if (!(enc_tmp = PyCodec_Encoder(Bytes_AS_STRING(encname)))) { goto exit; }
+    if (!(dec_tmp = PyCodec_Decoder(Bytes_AS_STRING(encname)))) { goto exit; }
+
+    /* success */
+    *pyenc = enc_tmp; enc_tmp = NULL;
+    *pydec = dec_tmp; dec_tmp = NULL;
+    *clean_encoding = pgenc; pgenc = NULL;
+    rv = 0;
+
+exit:
+    Py_XDECREF(enc_tmp);
+    Py_XDECREF(dec_tmp);
+    Py_XDECREF(encname);
+    PyMem_Free(pgenc);
+
+    return rv;
+}
+
+
+/* Store the encoding in the pgconn->encoding field and set the other related
+ * encoding fields in the connection structure.
+ *
+ * Return 0 on success, else -1 and set an exception.
+ */
+RAISES_NEG static int
+conn_store_encoding(connectionObject *self, const char *encoding)
+{
+    int rv = -1;
+    char *pgenc = NULL;
+    PyObject *enc_tmp = NULL, *dec_tmp = NULL;
+
+    if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) {
+        goto exit;
+    }
+
+    /* Good, success: store the encoding/codec in the connection. */
+    {
+        char *tmp = self->encoding;
+        self->encoding = pgenc;
+        PyMem_Free(tmp);
+        pgenc = NULL;
+    }
+
+    Py_CLEAR(self->pyencoder);
+    self->pyencoder = enc_tmp;
+    enc_tmp = NULL;
+
+    Py_CLEAR(self->pydecoder);
+    self->pydecoder = dec_tmp;
+    dec_tmp = NULL;
+
+    conn_set_fast_codec(self);
+
+    rv = 0;
+
+exit:
+    Py_XDECREF(enc_tmp);
+    Py_XDECREF(dec_tmp);
+    PyMem_Free(pgenc);
+    return rv;
+}
+
+
+/* Read the client encoding from the backend and store it in the connection.
+ *
+ * Return 0 on success, else -1.
 */
 RAISES_NEG static int
 conn_read_encoding(connectionObject *self, PGconn *pgconn)
 {
-    char *enc = NULL, *codec = NULL;
-    const char *tmp;
+    const char *encoding;
    int rv = -1;

-    tmp = PQparameterStatus(pgconn, "client_encoding");
-    Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)");
-    if (!tmp) {
+    encoding = PQparameterStatus(pgconn, "client_encoding");
+    Dprintf("conn_connect: client encoding: %s", encoding ? encoding : "(none)");
+    if (!encoding) {
        PyErr_SetString(OperationalError,
            "server didn't return client encoding");
        goto exit;
    }

-    if (0 > clear_encoding_name(tmp, &enc)) {
+    if (0 > conn_store_encoding(self, encoding)) {
        goto exit;
    }

-    /* Look for this encoding in Python codecs. */
-    if (0 > conn_encoding_to_codec(enc, &codec)) {
-        goto exit;
-    }
-
-    /* Good, success: store the encoding/codec in the connection. */
-    PyMem_Free(self->encoding);
-    self->encoding = enc;
-    enc = NULL;
-
-    PyMem_Free(self->codec);
-    self->codec = codec;
-    codec = NULL;
-
    rv = 0;

 exit:
-    PyMem_Free(enc);
-    PyMem_Free(codec);
    return rv;
 }

@ -1225,21 +1364,19 @@ endlock:
 /* conn_set_client_encoding - switch client encoding on connection */

 RAISES_NEG int
-conn_set_client_encoding(connectionObject *self, const char *enc)
+conn_set_client_encoding(connectionObject *self, const char *pgenc)
 {
    PGresult *pgres = NULL;
    char *error = NULL;
    int res = -1;
-    char *codec = NULL;
    char *clean_enc = NULL;

+    /* We must know what python encoding this encoding is. */
+    if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; }
+
    /* If the current encoding is equal to the requested one we don't
       issue any query to the backend */
-    if (strcmp(self->encoding, enc) == 0) return 0;
-
-    /* We must know what python codec this encoding is. */
-    if (0 > clear_encoding_name(enc, &clean_enc)) { goto exit; }
-    if (0 > conn_encoding_to_codec(clean_enc, &codec)) { goto exit; }
+    if (strcmp(self->encoding, clean_enc) == 0) return 0;

    Py_BEGIN_ALLOW_THREADS;
    pthread_mutex_lock(&self->lock);
@ -1255,35 +1392,21 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
        goto endlock;
    }

-    /* no error, we can proceed and store the new encoding */
-    {
-        char *tmp = self->encoding;
-        self->encoding = clean_enc;
-        PyMem_Free(tmp);
-        clean_enc = NULL;
-    }
-
-    /* Store the python codec too. */
-    {
-        char *tmp = self->codec;
-        self->codec = codec;
-        PyMem_Free(tmp);
-        codec = NULL;
-    }
-
-    Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
-            self->encoding, self->codec);
-
 endlock:
    pthread_mutex_unlock(&self->lock);
    Py_END_ALLOW_THREADS;

-    if (res < 0)
+    if (res < 0) {
        pq_complete_error(self, &pgres, &error);
+        goto exit;
+    }
+
+    res = conn_store_encoding(self, pgenc);
+
+    Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding);

 exit:
    PyMem_Free(clean_enc);
-    PyMem_Free(codec);

    return res;
 }
--- a/psycopg/connection_type.c
+++ b/psycopg/connection_type.c
@ -1097,7 +1097,7 @@ connection_setup(connectionObject *self, const char *dsn, long int async)
            self, async, Py_REFCNT(self)
      );

-    if (0 > psycopg_strdup(&self->dsn, dsn, 0)) { goto exit; }
+    if (0 > psycopg_strdup(&self->dsn, dsn, -1)) { goto exit; }
    if (!(self->notice_list = PyList_New(0))) { goto exit; }
    if (!(self->notifies = PyList_New(0))) { goto exit; }
    self->async = async;
@ -1141,6 +1141,9 @@ connection_clear(connectionObject *self)
    Py_CLEAR(self->notifies);
    Py_CLEAR(self->string_types);
    Py_CLEAR(self->binary_types);
+    Py_CLEAR(self->cursor_factory);
+    Py_CLEAR(self->pyencoder);
+    Py_CLEAR(self->pydecoder);
    return 0;
 }

@ -1164,7 +1167,6 @@ connection_dealloc(PyObject* obj)

    PyMem_Free(self->dsn);
    PyMem_Free(self->encoding);
-    PyMem_Free(self->codec);
    if (self->critical) free(self->critical);
    if (self->cancel) PQfreeCancel(self->cancel);

@ -1216,6 +1218,9 @@ connection_traverse(connectionObject *self, visitproc visit, void *arg)
    Py_VISIT(self->notifies);
    Py_VISIT(self->string_types);
    Py_VISIT(self->binary_types);
+    Py_VISIT(self->cursor_factory);
+    Py_VISIT(self->pyencoder);
+    Py_VISIT(self->pydecoder);
    return 0;
 }

--- a/psycopg/cursor_type.c
+++ b/psycopg/cursor_type.c
@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic(
        Py_INCREF(sql);
    }
    else if (PyUnicode_Check(sql)) {
-        char *enc = self->conn->codec;
-        sql = PyUnicode_AsEncodedString(sql, enc, NULL);
-        /* if there was an error during the encoding from unicode to the
-           target encoding, we just let the exception propagate */
-        if (sql == NULL) { goto fail; }
+        if (!(sql = conn_encode(self->conn, sql))) { goto fail; }
    }
    else {
        /* the  is not unicode or string, raise an error */
@ -1079,7 +1075,7 @@ psyco_curs_callproc(cursorObject *self, PyObject *args)
            if (!(cpname = Bytes_AsString(pname))) { goto exit; }

            if (!(scpnames[i] = psycopg_escape_identifier(
-                    self->conn, cpname, 0))) {
+                    self->conn, cpname, -1))) {
                Py_CLEAR(pname);
                goto exit;
            }
@ -1457,12 +1453,12 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs)
        goto exit;

    if (!(quoted_delimiter = psycopg_escape_string(
-            self->conn, sep, 0, NULL, NULL))) {
+            self->conn, sep, -1, NULL, NULL))) {
        goto exit;
    }

    if (!(quoted_null = psycopg_escape_string(
-            self->conn, null, 0, NULL, NULL))) {
+            self->conn, null, -1, NULL, NULL))) {
        goto exit;
    }

@ -1551,12 +1547,12 @@ psyco_curs_copy_to(cursorObject *self, PyObject *args, PyObject *kwargs)
        goto exit;

    if (!(quoted_delimiter = psycopg_escape_string(
-            self->conn, sep, 0, NULL, NULL))) {
+            self->conn, sep, -1, NULL, NULL))) {
        goto exit;
    }

    if (!(quoted_null = psycopg_escape_string(
-            self->conn, null, 0, NULL, NULL))) {
+            self->conn, null, -1, NULL, NULL))) {
        goto exit;
    }

@ -1899,10 +1895,10 @@ cursor_setup(cursorObject *self, connectionObject *conn, const char *name)
    Dprintf("cursor_setup: parameters: name = %s, conn = %p", name, conn);

    if (name) {
-        if (0 > psycopg_strdup(&self->name, name, 0)) {
+        if (0 > psycopg_strdup(&self->name, name, -1)) {
            return -1;
        }
-        if (!(self->qname = psycopg_escape_identifier(conn, name, 0))) {
+        if (!(self->qname = psycopg_escape_identifier(conn, name, -1))) {
            return -1;
        }
    }
--- a/psycopg/error.h
+++ b/psycopg/error.h
@ -34,7 +34,7 @@ typedef struct {
    PyObject *pgerror;
    PyObject *pgcode;
    cursorObject *cursor;
-    char *codec;
+    PyObject *pydecoder;
    PGresult *pgres;
 } errorObject;

--- a/psycopg/error_type.c
+++ b/psycopg/error_type.c
@ -34,17 +34,7 @@
 PyObject *
 error_text_from_chars(errorObject *self, const char *str)
 {
-    if (str == NULL) {
-        Py_INCREF(Py_None);
-        return (Py_None);
-    }
-
-#if PY_MAJOR_VERSION < 3
-        return PyString_FromString(str);
-#else
-        return PyUnicode_Decode(str, strlen(str),
-            self->codec ? self->codec : "ascii", "replace");
-#endif
+    return psycopg_text_from_chars_safe(str, -1, self->pydecoder);
 }


@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg)
    Py_VISIT(self->pgerror);
    Py_VISIT(self->pgcode);
    Py_VISIT(self->cursor);
+    Py_VISIT(self->pydecoder);

    return ((PyTypeObject *)PyExc_StandardError)->tp_traverse(
        (PyObject *)self, visit, arg);
@ -104,6 +95,7 @@ error_clear(errorObject *self)
    Py_CLEAR(self->pgerror);
    Py_CLEAR(self->pgcode);
    Py_CLEAR(self->cursor);
+    Py_CLEAR(self->pydecoder);

    return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self);
 }
@ -113,7 +105,6 @@ error_dealloc(errorObject *self)
 {
    PyObject_GC_UnTrack((PyObject *)self);
    error_clear(self);
-    PyMem_Free(self->codec);
    CLEARPGRES(self->pgres);

    Py_TYPE(self)->tp_free((PyObject *)self);
--- a/psycopg/lobject_type.c
+++ b/psycopg/lobject_type.c
@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args)
        data = obj;
    }
    else if (PyUnicode_Check(obj)) {
-        if (!(data = PyUnicode_AsEncodedString(obj, self->conn->codec, NULL))) {
-            goto exit;
-        }
+        if (!(data = conn_encode(self->conn, obj))) { goto exit; }
    }
    else {
        PyErr_Format(PyExc_TypeError,
@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args)
    if (self->mode & LOBJECT_BINARY) {
        res = Bytes_FromStringAndSize(buffer, size);
    } else {
-        res = PyUnicode_Decode(buffer, size, self->conn->codec, NULL);
+        res = conn_decode(self->conn, buffer, size);
    }
    PyMem_Free(buffer);

--- a/psycopg/microprotocols.c
+++ b/psycopg/microprotocols.c
@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn)
    /* Convert to bytes. */
    if (res && PyUnicode_CheckExact(res)) {
        PyObject *b;
-        const char *codec;
-        codec = (conn && conn->codec) ? conn->codec : "utf8";
-        b = PyUnicode_AsEncodedString(res, codec, NULL);
+        b = conn_encode(conn, res);
        Py_DECREF(res);
        res = b;
    }
--- a/psycopg/pqpath.c
+++ b/psycopg/pqpath.c
@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
    const char *err2 = NULL;
    const char *code = NULL;
    PyObject *pyerr = NULL;
+    PyObject *pgerror = NULL, *pgcode = NULL;

    if (conn == NULL) {
        PyErr_SetString(DatabaseError,
@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
    err2 = strip_severity(err);
    Dprintf("pq_raise: err2=%s", err2);

+    /* decode now the details of the error, because after psyco_set_error
+     * decoding will fail.
+     */
+    if (!(pgerror = conn_text_from_chars(conn, err))) {
+        /* we can't really handle an exception while handling this error
+         * so just print it. */
+        PyErr_Print();
+        PyErr_Clear();
+    }
+
+    if (!(pgcode = conn_text_from_chars(conn, code))) {
+        PyErr_Print();
+        PyErr_Clear();
+    }
+
    pyerr = psyco_set_error(exc, curs, err2);

    if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) {
        errorObject *perr = (errorObject *)pyerr;

-        PyMem_Free(perr->codec);
-        psycopg_strdup(&perr->codec, conn->codec, 0);
+        Py_CLEAR(perr->pydecoder);
+        Py_XINCREF(conn->pydecoder);
+        perr->pydecoder = conn->pydecoder;

        Py_CLEAR(perr->pgerror);
-        perr->pgerror = error_text_from_chars(perr, err);
+        perr->pgerror = pgerror;
+        pgerror = NULL;

        Py_CLEAR(perr->pgcode);
-        perr->pgcode = error_text_from_chars(perr, code);
+        perr->pgcode = pgcode;
+        pgcode = NULL;

        CLEARPGRES(perr->pgres);
        if (pgres && *pgres) {
@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres)
            *pgres = NULL;
        }
    }
+
+    Py_XDECREF(pgerror);
+    Py_XDECREF(pgcode);
 }

 /* pq_set_critical, pq_resolve_critical - manage critical errors
@ -765,7 +787,7 @@ pq_tpc_command_locked(connectionObject *conn, const char *cmd, const char *tid,
    PyEval_RestoreThread(*tstate);

    /* convert the xid into the postgres transaction_id and quote it. */
-    if (!(etid = psycopg_escape_string(conn, tid, 0, NULL, NULL)))
+    if (!(etid = psycopg_escape_string(conn, tid, -1, NULL, NULL)))
    { goto exit; }

    /* prepare the command to the server */
@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs)
        /* a file may return unicode if implements io.TextIOBase */
        if (PyUnicode_Check(o)) {
            PyObject *tmp;
-            Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->codec);
-            if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->codec, NULL))) {
+            if (!(tmp = conn_encode(curs->conn, o))) {
                Dprintf("_pq_copy_in_v3: encoding() failed");
                error = 1;
                break;
@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs)

        if (len > 0 && buffer) {
            if (is_text) {
-                obj = PyUnicode_Decode(buffer, len, curs->conn->codec, NULL);
+                obj = conn_decode(curs->conn, buffer, len);
            } else {
                obj = Bytes_FromStringAndSize(buffer, len);
            }
@ -1638,7 +1659,7 @@ retry:
        Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr);

        if (repl->decode) {
-            str = PyUnicode_Decode(buffer + hdr, data_size, conn->codec, NULL);
+            str = conn_decode(conn, buffer + hdr, data_size);
        } else {
            str = Bytes_FromStringAndSize(buffer + hdr, data_size);
        }
--- a/psycopg/psycopg.h
+++ b/psycopg/psycopg.h
@ -129,9 +129,11 @@ RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const
 HIDDEN char *psycopg_escape_string(connectionObject *conn,
              const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen);
 HIDDEN char *psycopg_escape_identifier(connectionObject *conn,
-              const char *str, size_t len);
+              const char *str, Py_ssize_t len);
 HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len);
 HIDDEN int psycopg_is_text_file(PyObject *f);
+HIDDEN PyObject *psycopg_text_from_chars_safe(
+        const char *str, Py_ssize_t len, PyObject *decoder);

 STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj);

--- a/psycopg/psycopgmodule.c
+++ b/psycopg/psycopgmodule.c
@ -165,7 +165,6 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs)
 {
    PyObject *ident = NULL, *obj = NULL, *result = NULL;
    connectionObject *conn;
-    const char *str;
    char *quoted = NULL;

    static char *kwlist[] = {"ident", "scope", NULL};
@ -188,12 +187,9 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs)
    Py_INCREF(ident); /* for ensure_bytes */
    if (!(ident = psycopg_ensure_bytes(ident))) { goto exit; }

-    str = Bytes_AS_STRING(ident);
+    if (!(quoted = psycopg_escape_identifier(conn,
+        Bytes_AS_STRING(ident), Bytes_GET_SIZE(ident)))) { goto exit; }

-    quoted = psycopg_escape_identifier(conn, str, strlen(str));
-    if (!quoted) {
-        goto exit;
-    }
    result = conn_text_from_chars(conn, quoted);

 exit:
--- a/psycopg/typecast.c
+++ b/psycopg/typecast.c
@ -675,8 +675,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs)
 #if PY_MAJOR_VERSION < 3
            s = PyString_FromStringAndSize(str, len);
 #else
-            s = PyUnicode_Decode(str, len,
-                ((cursorObject *)curs)->conn->codec, NULL);
+            s = conn_decode(((cursorObject *)curs)->conn, str, len);
 #endif
        }
        else {
--- a/psycopg/typecast_basic.c
+++ b/psycopg/typecast_basic.c
@ -93,12 +93,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
 static PyObject *
 typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
 {
-    char *enc;
+    connectionObject *conn;

    if (s == NULL) { Py_RETURN_NONE; }

-    enc = ((cursorObject*)curs)->conn->codec;
-    return PyUnicode_Decode(s, len, enc, NULL);
+    conn = ((cursorObject*)curs)->conn;
+    return conn_decode(conn, s, len);
 }

 /** BOOLEAN - cast boolean value into right python object **/
--- a/psycopg/utils.c
+++ b/psycopg/utils.c
@ -40,6 +40,8 @@
 * and set an exception. The returned string includes quotes and leading E if
 * needed.
 *
+ * `len` is optional: if < 0 it will be calculated.
+ *
 * If tolen is set, it will contain the length of the escaped string,
 * including quotes.
 */
@ -50,7 +52,7 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len,
    Py_ssize_t ql;
    int eq = (conn && (conn->equote)) ? 1 : 0;

-    if (len == 0) {
+    if (len < 0) {
        len = strlen(from);
    } else if (strchr(from, '\0') != from + len) {
        PyErr_Format(PyExc_ValueError, "A string literal cannot contain NUL (0x00) characters.");
@ -92,13 +94,13 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len,

 /* Escape a string for inclusion in a query as identifier.
 *
- * 'len' is optional: if 0 the length is calculated.
+ * 'len' is optional: if < 0 it will be calculated.
 *
 * Return a string allocated by Postgres: free it using PQfreemem
 * In case of error set a Python exception.
 */
 char *
-psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len)
+psycopg_escape_identifier(connectionObject *conn, const char *str, Py_ssize_t len)
 {
    char *rv = NULL;

@ -107,7 +109,7 @@ psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len)
        goto exit;
    }

-    if (!len) { len = strlen(str); }
+    if (len < 0) { len = strlen(str); }

    rv = PQescapeIdentifier(conn->pgconn, str, len);
    if (!rv) {
@ -127,7 +129,7 @@ exit:
 /* Duplicate a string.
 *
 * Allocate a new buffer on the Python heap containing the new string.
- * 'len' is optional: if 0 the length is calculated.
+ * 'len' is optional: if < 0 the length is calculated.
 *
 * Store the return in 'to' and return 0 in case of success, else return -1
 * and raise an exception.
@ -141,7 +143,7 @@ psycopg_strdup(char **to, const char *from, Py_ssize_t len)
        *to = NULL;
        return 0;
    }
-    if (!len) { len = strlen(from); }
+    if (len < 0) { len = strlen(from); }
    if (!(*to = PyMem_Malloc(len + 1))) {
        PyErr_NoMemory();
        return -1;
@ -276,3 +278,57 @@ exit:

    return res;
 }
+
+
+/* Convert a C string into Python Text using a specified codec.
+ *
+ * The codec is the python function codec.getdecoder(enc). It is only used on
+ * Python 3 to return unicode: in Py2 the function returns a string.
+ *
+ * len is optional: use -1 to have it calculated by the function.
+ */
+PyObject *
+psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder)
+{
+#if PY_MAJOR_VERSION < 3
+
+    if (!str) { Py_RETURN_NONE; }
+
+    if (len < 0) { len = strlen(str); }
+
+    return PyString_FromStringAndSize(str, len);
+
+#else
+
+    static PyObject *replace = NULL;
+    PyObject *rv = NULL;
+    PyObject *b = NULL;
+    PyObject *t = NULL;
+
+    if (!str) { Py_RETURN_NONE; }
+
+    if (len < 0) { len = strlen(str); }
+
+    if (decoder) {
+        if (!replace) {
+            if (!(replace = PyUnicode_FromString("replace"))) { goto exit; }
+        }
+        if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; }
+        if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) {
+            goto exit;
+        }
+
+        if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; }
+        Py_INCREF(rv);
+    }
+    else {
+        rv = PyUnicode_DecodeASCII(str, len, "replace");
+    }
+
+exit:
+    Py_XDECREF(t);
+    Py_XDECREF(b);
+    return rv;
+
+#endif
+}