diff --git a/NEWS b/NEWS index 6ffa66a9..ecde78d3 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,8 @@ New features: - Added `~psycopg2.extensions.quote_ident()` function (:ticket:`#359`). - Added `~connection.get_dsn_parameters()` connection method (:ticket:`#364`). - `~cursor.callproc()` now accepts a dictionary of parameters (:ticket:`#381`). +- Using Python C API decoding functions and codecs caching for faster + unicode encoding/decoding (:ticket:`#473`). Other changes: diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst index b661895d..9c5a8538 100644 --- a/doc/src/extensions.rst +++ b/doc/src/extensions.rst @@ -417,7 +417,7 @@ details. .. data:: encodings - Mapping from `PostgreSQL encoding`__ names to `Python codec`__ names. + Mapping from `PostgreSQL encoding`__ to `Python encoding`__ names. Used by Psycopg when adapting or casting unicode strings. See :ref:`unicode-handling`. diff --git a/doc/src/usage.rst b/doc/src/usage.rst index e768f372..d9fea755 100644 --- a/doc/src/usage.rst +++ b/doc/src/usage.rst @@ -355,7 +355,7 @@ Unicode handling Psycopg can exchange Unicode data with a PostgreSQL database. Python `!unicode` objects are automatically *encoded* in the client encoding defined on the database connection (the `PostgreSQL encoding`__, available in -`connection.encoding`, is translated into a `Python codec`__ using the +`connection.encoding`, is translated into a `Python encoding`__ using the `~psycopg2.extensions.encodings` mapping):: >>> print u, type(u) diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c index 8c5a8f10..73579c57 100644 --- a/psycopg/adapter_qstring.c +++ b/psycopg/adapter_qstring.c @@ -36,20 +36,6 @@ static const char *default_encoding = "latin1"; /* qstring_quote - do the quote process on plain and unicode strings */ -const char * -_qstring_get_encoding(qstringObject *self) -{ - /* if the wrapped object is an unicode object we can encode it to match - conn->encoding but if the encoding is not specified we don't know what - to do and we raise an exception */ - if (self->conn) { - return self->conn->codec; - } - else { - return self->encoding ? self->encoding : default_encoding; - } -} - static PyObject * qstring_quote(qstringObject *self) { @@ -59,19 +45,15 @@ qstring_quote(qstringObject *self) const char *encoding; PyObject *rv = NULL; - encoding = _qstring_get_encoding(self); - Dprintf("qstring_quote: encoding to %s", encoding); - if (PyUnicode_Check(self->wrapped)) { - if (encoding) { - str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL); - Dprintf("qstring_quote: got encoded object at %p", str); - if (str == NULL) goto exit; + if (self->conn) { + if (!(str = conn_encode(self->conn, self->wrapped))) { goto exit; } } else { - PyErr_SetString(PyExc_TypeError, - "missing encoding to encode unicode object"); - goto exit; + encoding = self->encoding ? self->encoding : default_encoding; + if(!(str = PyUnicode_AsEncodedString(self->wrapped, encoding, NULL))) { + goto exit; + } } } @@ -162,9 +144,12 @@ qstring_conform(qstringObject *self, PyObject *args) static PyObject * qstring_get_encoding(qstringObject *self) { - const char *encoding; - encoding = _qstring_get_encoding(self); - return Text_FromUTF8(encoding); + if (self->conn) { + return conn_pgenc_to_pyenc(self->conn->encoding, NULL); + } + else { + return Text_FromUTF8(self->encoding ? self->encoding : default_encoding); + } } static int @@ -178,7 +163,7 @@ qstring_set_encoding(qstringObject *self, PyObject *pyenc) Py_INCREF(pyenc); if (!(pyenc = psycopg_ensure_bytes(pyenc))) { goto exit; } if (!(tmp = Bytes_AsString(pyenc))) { goto exit; } - if (0 > psycopg_strdup(&cenc, tmp, 0)) { goto exit; } + if (0 > psycopg_strdup(&cenc, tmp, -1)) { goto exit; } Dprintf("qstring_set_encoding: encoding set to %s", cenc); PyMem_Free((void *)self->encoding); diff --git a/psycopg/connection.h b/psycopg/connection.h index ec107429..2e2d51de 100644 --- a/psycopg/connection.h +++ b/psycopg/connection.h @@ -83,7 +83,6 @@ struct connectionObject { char *dsn; /* data source name */ char *critical; /* critical error on this connection */ char *encoding; /* current backend encoding */ - char *codec; /* python codec name for encoding */ long int closed; /* 1 means connection has been closed; 2 that something horrible happened */ @@ -122,6 +121,14 @@ struct connectionObject { int autocommit; PyObject *cursor_factory; /* default cursor factory from cursor() */ + + /* Optional pointer to a decoding C function, e.g. PyUnicode_DecodeUTF8 */ + PyObject *(*cdecoder)(const char *, Py_ssize_t, const char *); + + /* Pointers to python encoding/decoding functions, e.g. + * codecs.getdecoder('utf8') */ + PyObject *pyencoder; /* python codec encoding function */ + PyObject *pydecoder; /* python codec decoding function */ }; /* map isolation level values into a numeric const */ @@ -132,7 +139,10 @@ typedef struct { /* C-callable functions in connection_int.c and connection_ext.c */ HIDDEN PyObject *conn_text_from_chars(connectionObject *pgconn, const char *str); +HIDDEN PyObject *conn_encode(connectionObject *self, PyObject *b); +HIDDEN PyObject *conn_decode(connectionObject *self, const char *str, Py_ssize_t len); HIDDEN int conn_get_standard_conforming_strings(PGconn *pgconn); +HIDDEN PyObject *conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding); RAISES_NEG HIDDEN int conn_get_isolation_level(connectionObject *self); HIDDEN int conn_get_protocol_version(PGconn *pgconn); HIDDEN int conn_get_server_version(PGconn *pgconn); diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c index a34e5ef9..f92a658e 100644 --- a/psycopg/connection_int.c +++ b/psycopg/connection_int.c @@ -58,12 +58,75 @@ const IsolationLevel conn_isolevels[] = { PyObject * conn_text_from_chars(connectionObject *self, const char *str) { -#if PY_MAJOR_VERSION < 3 - return PyString_FromString(str); -#else - const char *codec = self ? self->codec : "ascii"; - return PyUnicode_Decode(str, strlen(str), codec, "replace"); -#endif + return psycopg_text_from_chars_safe(str, -1, self ? self->pydecoder : NULL); +} + + +/* Encode an unicode object into a bytes object in the connection encoding. + * + * If no connection or encoding is available, default to utf8 + */ +PyObject * +conn_encode(connectionObject *self, PyObject *u) +{ + PyObject *t = NULL; + PyObject *rv = NULL; + + if (!(self && self->pyencoder)) { + rv = PyUnicode_AsUTF8String(u); + goto exit; + } + + if (!(t = PyObject_CallFunctionObjArgs(self->pyencoder, u, NULL))) { + goto exit; + } + + if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; } + Py_INCREF(rv); + +exit: + Py_XDECREF(t); + + return rv; +} + + +/* decode a c string into a Python unicode in the connection encoding + * + * len can be < 0: in this case it will be calculated + * + * If no connection or encoding is available, default to utf8 + */ +PyObject * +conn_decode(connectionObject *self, const char *str, Py_ssize_t len) +{ + PyObject *b = NULL; + PyObject *t = NULL; + PyObject *rv = NULL; + + if (len < 0) { len = strlen(str); } + + if (self) { + if (self->cdecoder) { + return self->cdecoder(str, len, NULL); + } + else if (self->pydecoder) { + if (!(b = Bytes_FromStringAndSize(str, len))) { goto exit; } + if (!(t = PyObject_CallFunctionObjArgs(self->pydecoder, b, NULL))) { + goto exit; + } + rv = PyTuple_GetItem(t, 0); + Py_XINCREF(rv); + } + } + else { + return PyUnicode_FromStringAndSize(str, len); + } + +exit: + Py_XDECREF(t); + Py_XDECREF(b); + return rv; } /* conn_notice_callback - process notices */ @@ -321,92 +384,168 @@ exit: return rv; } -/* Convert a PostgreSQL encoding to a Python codec. - * - * Set 'codec' to a new copy of the codec name allocated on the Python heap. - * Return 0 in case of success, else -1 and set an exception. - * - * 'enc' should be already normalized (uppercase, no - or _). +/* set fast access functions according to the currently selected encoding */ -RAISES_NEG static int -conn_encoding_to_codec(const char *enc, char **codec) +static void +conn_set_fast_codec(connectionObject *self) { - char *tmp; - Py_ssize_t size; - PyObject *pyenc = NULL; - int rv = -1; + Dprintf("conn_set_fast_codec: encoding=%s", self->encoding); - /* Find the Py codec name from the PG encoding */ - if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) { + if (0 == strcmp(self->encoding, "UTF8")) { + Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8"); + self->cdecoder = PyUnicode_DecodeUTF8; + return; + } + + if (0 == strcmp(self->encoding, "LATIN1")) { + Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1"); + self->cdecoder = PyUnicode_DecodeLatin1; + return; + } + + Dprintf("conn_set_fast_codec: no fast codec"); + self->cdecoder = NULL; +} + + +/* Return the Python encoding from a PostgreSQL encoding. + * + * Optionally return the clean version of the postgres encoding too + */ +PyObject * +conn_pgenc_to_pyenc(const char *encoding, char **clean_encoding) +{ + char *pgenc = NULL; + PyObject *rv = NULL; + + if (0 > clear_encoding_name(encoding, &pgenc)) { goto exit; } + if (!(rv = PyDict_GetItemString(psycoEncodings, pgenc))) { PyErr_Format(OperationalError, - "no Python codec for client encoding '%s'", enc); + "no Python encoding for PostgreSQL encoding '%s'", pgenc); goto exit; } + Py_INCREF(rv); - /* Convert the codec in a bytes string to extract the c string. */ - Py_INCREF(pyenc); - if (!(pyenc = psycopg_ensure_bytes(pyenc))) { - goto exit; + if (clean_encoding) { + *clean_encoding = pgenc; } - - if (-1 == Bytes_AsStringAndSize(pyenc, &tmp, &size)) { - goto exit; + else { + PyMem_Free(pgenc); } - /* have our own copy of the python codec name */ - rv = psycopg_strdup(codec, tmp, size); - exit: - Py_XDECREF(pyenc); return rv; } -/* Read the client encoding from the connection. +/* Convert a Postgres encoding into Python encoding and decoding functions. * - * Store the encoding in the pgconn->encoding field and the name of the - * matching python codec in codec. The buffers are allocated on the Python - * heap. + * Set clean_encoding to a clean version of the Postgres encoding name + * and pyenc and pydec to python codec functions. * - * Return 0 on success, else nonzero. + * Return 0 on success, else -1 and set an exception. + */ +RAISES_NEG static int +conn_get_python_codec(const char *encoding, + char **clean_encoding, PyObject **pyenc, PyObject **pydec) +{ + int rv = -1; + char *pgenc = NULL; + PyObject *encname = NULL; + PyObject *enc_tmp = NULL, *dec_tmp = NULL; + + /* get the Python name of the encoding as a C string */ + if (!(encname = conn_pgenc_to_pyenc(encoding, &pgenc))) { goto exit; } + if (!(encname = psycopg_ensure_bytes(encname))) { goto exit; } + + /* Look up the codec functions */ + if (!(enc_tmp = PyCodec_Encoder(Bytes_AS_STRING(encname)))) { goto exit; } + if (!(dec_tmp = PyCodec_Decoder(Bytes_AS_STRING(encname)))) { goto exit; } + + /* success */ + *pyenc = enc_tmp; enc_tmp = NULL; + *pydec = dec_tmp; dec_tmp = NULL; + *clean_encoding = pgenc; pgenc = NULL; + rv = 0; + +exit: + Py_XDECREF(enc_tmp); + Py_XDECREF(dec_tmp); + Py_XDECREF(encname); + PyMem_Free(pgenc); + + return rv; +} + + +/* Store the encoding in the pgconn->encoding field and set the other related + * encoding fields in the connection structure. + * + * Return 0 on success, else -1 and set an exception. + */ +RAISES_NEG static int +conn_store_encoding(connectionObject *self, const char *encoding) +{ + int rv = -1; + char *pgenc = NULL; + PyObject *enc_tmp = NULL, *dec_tmp = NULL; + + if (0 > conn_get_python_codec(encoding, &pgenc, &enc_tmp, &dec_tmp)) { + goto exit; + } + + /* Good, success: store the encoding/codec in the connection. */ + { + char *tmp = self->encoding; + self->encoding = pgenc; + PyMem_Free(tmp); + pgenc = NULL; + } + + Py_CLEAR(self->pyencoder); + self->pyencoder = enc_tmp; + enc_tmp = NULL; + + Py_CLEAR(self->pydecoder); + self->pydecoder = dec_tmp; + dec_tmp = NULL; + + conn_set_fast_codec(self); + + rv = 0; + +exit: + Py_XDECREF(enc_tmp); + Py_XDECREF(dec_tmp); + PyMem_Free(pgenc); + return rv; +} + + +/* Read the client encoding from the backend and store it in the connection. + * + * Return 0 on success, else -1. */ RAISES_NEG static int conn_read_encoding(connectionObject *self, PGconn *pgconn) { - char *enc = NULL, *codec = NULL; - const char *tmp; + const char *encoding; int rv = -1; - tmp = PQparameterStatus(pgconn, "client_encoding"); - Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)"); - if (!tmp) { + encoding = PQparameterStatus(pgconn, "client_encoding"); + Dprintf("conn_connect: client encoding: %s", encoding ? encoding : "(none)"); + if (!encoding) { PyErr_SetString(OperationalError, "server didn't return client encoding"); goto exit; } - if (0 > clear_encoding_name(tmp, &enc)) { + if (0 > conn_store_encoding(self, encoding)) { goto exit; } - /* Look for this encoding in Python codecs. */ - if (0 > conn_encoding_to_codec(enc, &codec)) { - goto exit; - } - - /* Good, success: store the encoding/codec in the connection. */ - PyMem_Free(self->encoding); - self->encoding = enc; - enc = NULL; - - PyMem_Free(self->codec); - self->codec = codec; - codec = NULL; - rv = 0; exit: - PyMem_Free(enc); - PyMem_Free(codec); return rv; } @@ -1225,21 +1364,19 @@ endlock: /* conn_set_client_encoding - switch client encoding on connection */ RAISES_NEG int -conn_set_client_encoding(connectionObject *self, const char *enc) +conn_set_client_encoding(connectionObject *self, const char *pgenc) { PGresult *pgres = NULL; char *error = NULL; int res = -1; - char *codec = NULL; char *clean_enc = NULL; + /* We must know what python encoding this encoding is. */ + if (0 > clear_encoding_name(pgenc, &clean_enc)) { goto exit; } + /* If the current encoding is equal to the requested one we don't issue any query to the backend */ - if (strcmp(self->encoding, enc) == 0) return 0; - - /* We must know what python codec this encoding is. */ - if (0 > clear_encoding_name(enc, &clean_enc)) { goto exit; } - if (0 > conn_encoding_to_codec(clean_enc, &codec)) { goto exit; } + if (strcmp(self->encoding, clean_enc) == 0) return 0; Py_BEGIN_ALLOW_THREADS; pthread_mutex_lock(&self->lock); @@ -1255,35 +1392,21 @@ conn_set_client_encoding(connectionObject *self, const char *enc) goto endlock; } - /* no error, we can proceed and store the new encoding */ - { - char *tmp = self->encoding; - self->encoding = clean_enc; - PyMem_Free(tmp); - clean_enc = NULL; - } - - /* Store the python codec too. */ - { - char *tmp = self->codec; - self->codec = codec; - PyMem_Free(tmp); - codec = NULL; - } - - Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)", - self->encoding, self->codec); - endlock: pthread_mutex_unlock(&self->lock); Py_END_ALLOW_THREADS; - if (res < 0) + if (res < 0) { pq_complete_error(self, &pgres, &error); + goto exit; + } + + res = conn_store_encoding(self, pgenc); + + Dprintf("conn_set_client_encoding: encoding set to %s", self->encoding); exit: PyMem_Free(clean_enc); - PyMem_Free(codec); return res; } diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c index 485a92b7..7401bc14 100644 --- a/psycopg/connection_type.c +++ b/psycopg/connection_type.c @@ -1097,7 +1097,7 @@ connection_setup(connectionObject *self, const char *dsn, long int async) self, async, Py_REFCNT(self) ); - if (0 > psycopg_strdup(&self->dsn, dsn, 0)) { goto exit; } + if (0 > psycopg_strdup(&self->dsn, dsn, -1)) { goto exit; } if (!(self->notice_list = PyList_New(0))) { goto exit; } if (!(self->notifies = PyList_New(0))) { goto exit; } self->async = async; @@ -1141,6 +1141,9 @@ connection_clear(connectionObject *self) Py_CLEAR(self->notifies); Py_CLEAR(self->string_types); Py_CLEAR(self->binary_types); + Py_CLEAR(self->cursor_factory); + Py_CLEAR(self->pyencoder); + Py_CLEAR(self->pydecoder); return 0; } @@ -1164,7 +1167,6 @@ connection_dealloc(PyObject* obj) PyMem_Free(self->dsn); PyMem_Free(self->encoding); - PyMem_Free(self->codec); if (self->critical) free(self->critical); if (self->cancel) PQfreeCancel(self->cancel); @@ -1216,6 +1218,9 @@ connection_traverse(connectionObject *self, visitproc visit, void *arg) Py_VISIT(self->notifies); Py_VISIT(self->string_types); Py_VISIT(self->binary_types); + Py_VISIT(self->cursor_factory); + Py_VISIT(self->pyencoder); + Py_VISIT(self->pydecoder); return 0; } diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index baa5b8f7..a7303c68 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -286,11 +286,7 @@ static PyObject *_psyco_curs_validate_sql_basic( Py_INCREF(sql); } else if (PyUnicode_Check(sql)) { - char *enc = self->conn->codec; - sql = PyUnicode_AsEncodedString(sql, enc, NULL); - /* if there was an error during the encoding from unicode to the - target encoding, we just let the exception propagate */ - if (sql == NULL) { goto fail; } + if (!(sql = conn_encode(self->conn, sql))) { goto fail; } } else { /* the is not unicode or string, raise an error */ @@ -1079,7 +1075,7 @@ psyco_curs_callproc(cursorObject *self, PyObject *args) if (!(cpname = Bytes_AsString(pname))) { goto exit; } if (!(scpnames[i] = psycopg_escape_identifier( - self->conn, cpname, 0))) { + self->conn, cpname, -1))) { Py_CLEAR(pname); goto exit; } @@ -1457,12 +1453,12 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) goto exit; if (!(quoted_delimiter = psycopg_escape_string( - self->conn, sep, 0, NULL, NULL))) { + self->conn, sep, -1, NULL, NULL))) { goto exit; } if (!(quoted_null = psycopg_escape_string( - self->conn, null, 0, NULL, NULL))) { + self->conn, null, -1, NULL, NULL))) { goto exit; } @@ -1551,12 +1547,12 @@ psyco_curs_copy_to(cursorObject *self, PyObject *args, PyObject *kwargs) goto exit; if (!(quoted_delimiter = psycopg_escape_string( - self->conn, sep, 0, NULL, NULL))) { + self->conn, sep, -1, NULL, NULL))) { goto exit; } if (!(quoted_null = psycopg_escape_string( - self->conn, null, 0, NULL, NULL))) { + self->conn, null, -1, NULL, NULL))) { goto exit; } @@ -1899,10 +1895,10 @@ cursor_setup(cursorObject *self, connectionObject *conn, const char *name) Dprintf("cursor_setup: parameters: name = %s, conn = %p", name, conn); if (name) { - if (0 > psycopg_strdup(&self->name, name, 0)) { + if (0 > psycopg_strdup(&self->name, name, -1)) { return -1; } - if (!(self->qname = psycopg_escape_identifier(conn, name, 0))) { + if (!(self->qname = psycopg_escape_identifier(conn, name, -1))) { return -1; } } diff --git a/psycopg/error.h b/psycopg/error.h index 9ae6dbd3..275a7ce7 100644 --- a/psycopg/error.h +++ b/psycopg/error.h @@ -34,7 +34,7 @@ typedef struct { PyObject *pgerror; PyObject *pgcode; cursorObject *cursor; - char *codec; + PyObject *pydecoder; PGresult *pgres; } errorObject; diff --git a/psycopg/error_type.c b/psycopg/error_type.c index 75761e81..4ab21915 100644 --- a/psycopg/error_type.c +++ b/psycopg/error_type.c @@ -34,17 +34,7 @@ PyObject * error_text_from_chars(errorObject *self, const char *str) { - if (str == NULL) { - Py_INCREF(Py_None); - return (Py_None); - } - -#if PY_MAJOR_VERSION < 3 - return PyString_FromString(str); -#else - return PyUnicode_Decode(str, strlen(str), - self->codec ? self->codec : "ascii", "replace"); -#endif + return psycopg_text_from_chars_safe(str, -1, self->pydecoder); } @@ -93,6 +83,7 @@ error_traverse(errorObject *self, visitproc visit, void *arg) Py_VISIT(self->pgerror); Py_VISIT(self->pgcode); Py_VISIT(self->cursor); + Py_VISIT(self->pydecoder); return ((PyTypeObject *)PyExc_StandardError)->tp_traverse( (PyObject *)self, visit, arg); @@ -104,6 +95,7 @@ error_clear(errorObject *self) Py_CLEAR(self->pgerror); Py_CLEAR(self->pgcode); Py_CLEAR(self->cursor); + Py_CLEAR(self->pydecoder); return ((PyTypeObject *)PyExc_StandardError)->tp_clear((PyObject *)self); } @@ -113,7 +105,6 @@ error_dealloc(errorObject *self) { PyObject_GC_UnTrack((PyObject *)self); error_clear(self); - PyMem_Free(self->codec); CLEARPGRES(self->pgres); Py_TYPE(self)->tp_free((PyObject *)self); diff --git a/psycopg/lobject_type.c b/psycopg/lobject_type.c index ddda0daf..54f3a4be 100644 --- a/psycopg/lobject_type.c +++ b/psycopg/lobject_type.c @@ -86,9 +86,7 @@ psyco_lobj_write(lobjectObject *self, PyObject *args) data = obj; } else if (PyUnicode_Check(obj)) { - if (!(data = PyUnicode_AsEncodedString(obj, self->conn->codec, NULL))) { - goto exit; - } + if (!(data = conn_encode(self->conn, obj))) { goto exit; } } else { PyErr_Format(PyExc_TypeError, @@ -150,7 +148,7 @@ psyco_lobj_read(lobjectObject *self, PyObject *args) if (self->mode & LOBJECT_BINARY) { res = Bytes_FromStringAndSize(buffer, size); } else { - res = PyUnicode_Decode(buffer, size, self->conn->codec, NULL); + res = conn_decode(self->conn, buffer, size); } PyMem_Free(buffer); diff --git a/psycopg/microprotocols.c b/psycopg/microprotocols.c index 1687bc26..3ddcc485 100644 --- a/psycopg/microprotocols.c +++ b/psycopg/microprotocols.c @@ -251,9 +251,7 @@ microprotocol_getquoted(PyObject *obj, connectionObject *conn) /* Convert to bytes. */ if (res && PyUnicode_CheckExact(res)) { PyObject *b; - const char *codec; - codec = (conn && conn->codec) ? conn->codec : "utf8"; - b = PyUnicode_AsEncodedString(res, codec, NULL); + b = conn_encode(conn, res); Py_DECREF(res); res = b; } diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index d02cb708..328a2b26 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -167,6 +167,7 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) const char *err2 = NULL; const char *code = NULL; PyObject *pyerr = NULL; + PyObject *pgerror = NULL, *pgcode = NULL; if (conn == NULL) { PyErr_SetString(DatabaseError, @@ -221,19 +222,37 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) err2 = strip_severity(err); Dprintf("pq_raise: err2=%s", err2); + /* decode now the details of the error, because after psyco_set_error + * decoding will fail. + */ + if (!(pgerror = conn_text_from_chars(conn, err))) { + /* we can't really handle an exception while handling this error + * so just print it. */ + PyErr_Print(); + PyErr_Clear(); + } + + if (!(pgcode = conn_text_from_chars(conn, code))) { + PyErr_Print(); + PyErr_Clear(); + } + pyerr = psyco_set_error(exc, curs, err2); if (pyerr && PyObject_TypeCheck(pyerr, &errorType)) { errorObject *perr = (errorObject *)pyerr; - PyMem_Free(perr->codec); - psycopg_strdup(&perr->codec, conn->codec, 0); + Py_CLEAR(perr->pydecoder); + Py_XINCREF(conn->pydecoder); + perr->pydecoder = conn->pydecoder; Py_CLEAR(perr->pgerror); - perr->pgerror = error_text_from_chars(perr, err); + perr->pgerror = pgerror; + pgerror = NULL; Py_CLEAR(perr->pgcode); - perr->pgcode = error_text_from_chars(perr, code); + perr->pgcode = pgcode; + pgcode = NULL; CLEARPGRES(perr->pgres); if (pgres && *pgres) { @@ -241,6 +260,9 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult **pgres) *pgres = NULL; } } + + Py_XDECREF(pgerror); + Py_XDECREF(pgcode); } /* pq_set_critical, pq_resolve_critical - manage critical errors @@ -765,7 +787,7 @@ pq_tpc_command_locked(connectionObject *conn, const char *cmd, const char *tid, PyEval_RestoreThread(*tstate); /* convert the xid into the postgres transaction_id and quote it. */ - if (!(etid = psycopg_escape_string(conn, tid, 0, NULL, NULL))) + if (!(etid = psycopg_escape_string(conn, tid, -1, NULL, NULL))) { goto exit; } /* prepare the command to the server */ @@ -1332,8 +1354,7 @@ _pq_copy_in_v3(cursorObject *curs) /* a file may return unicode if implements io.TextIOBase */ if (PyUnicode_Check(o)) { PyObject *tmp; - Dprintf("_pq_copy_in_v3: encoding in %s", curs->conn->codec); - if (!(tmp = PyUnicode_AsEncodedString(o, curs->conn->codec, NULL))) { + if (!(tmp = conn_encode(curs->conn, o))) { Dprintf("_pq_copy_in_v3: encoding() failed"); error = 1; break; @@ -1488,7 +1509,7 @@ _pq_copy_out_v3(cursorObject *curs) if (len > 0 && buffer) { if (is_text) { - obj = PyUnicode_Decode(buffer, len, curs->conn->codec, NULL); + obj = conn_decode(curs->conn, buffer, len); } else { obj = Bytes_FromStringAndSize(buffer, len); } @@ -1638,7 +1659,7 @@ retry: Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr); if (repl->decode) { - str = PyUnicode_Decode(buffer + hdr, data_size, conn->codec, NULL); + str = conn_decode(conn, buffer + hdr, data_size); } else { str = Bytes_FromStringAndSize(buffer + hdr, data_size); } diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index 438d7636..13673540 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -129,9 +129,11 @@ RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const HIDDEN char *psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen); HIDDEN char *psycopg_escape_identifier(connectionObject *conn, - const char *str, size_t len); + const char *str, Py_ssize_t len); HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len); HIDDEN int psycopg_is_text_file(PyObject *f); +HIDDEN PyObject *psycopg_text_from_chars_safe( + const char *str, Py_ssize_t len, PyObject *decoder); STEALS(1) HIDDEN PyObject * psycopg_ensure_bytes(PyObject *obj); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index bf7d908a..c4d1517a 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -165,7 +165,6 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *ident = NULL, *obj = NULL, *result = NULL; connectionObject *conn; - const char *str; char *quoted = NULL; static char *kwlist[] = {"ident", "scope", NULL}; @@ -188,12 +187,9 @@ psyco_quote_ident(PyObject *self, PyObject *args, PyObject *kwargs) Py_INCREF(ident); /* for ensure_bytes */ if (!(ident = psycopg_ensure_bytes(ident))) { goto exit; } - str = Bytes_AS_STRING(ident); + if (!(quoted = psycopg_escape_identifier(conn, + Bytes_AS_STRING(ident), Bytes_GET_SIZE(ident)))) { goto exit; } - quoted = psycopg_escape_identifier(conn, str, strlen(str)); - if (!quoted) { - goto exit; - } result = conn_text_from_chars(conn, quoted); exit: diff --git a/psycopg/typecast.c b/psycopg/typecast.c index e6e2faa9..a4f123c3 100644 --- a/psycopg/typecast.c +++ b/psycopg/typecast.c @@ -675,8 +675,7 @@ typecast_cast(PyObject *obj, const char *str, Py_ssize_t len, PyObject *curs) #if PY_MAJOR_VERSION < 3 s = PyString_FromStringAndSize(str, len); #else - s = PyUnicode_Decode(str, len, - ((cursorObject *)curs)->conn->codec, NULL); + s = conn_decode(((cursorObject *)curs)->conn, str, len); #endif } else { diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c index a31047f3..db6c5a93 100644 --- a/psycopg/typecast_basic.c +++ b/psycopg/typecast_basic.c @@ -93,12 +93,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs) static PyObject * typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) { - char *enc; + connectionObject *conn; if (s == NULL) { Py_RETURN_NONE; } - enc = ((cursorObject*)curs)->conn->codec; - return PyUnicode_Decode(s, len, enc, NULL); + conn = ((cursorObject*)curs)->conn; + return conn_decode(conn, s, len); } /** BOOLEAN - cast boolean value into right python object **/ diff --git a/psycopg/utils.c b/psycopg/utils.c index bc6f7bec..7f6b6e6e 100644 --- a/psycopg/utils.c +++ b/psycopg/utils.c @@ -40,6 +40,8 @@ * and set an exception. The returned string includes quotes and leading E if * needed. * + * `len` is optional: if < 0 it will be calculated. + * * If tolen is set, it will contain the length of the escaped string, * including quotes. */ @@ -50,7 +52,7 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len, Py_ssize_t ql; int eq = (conn && (conn->equote)) ? 1 : 0; - if (len == 0) { + if (len < 0) { len = strlen(from); } else if (strchr(from, '\0') != from + len) { PyErr_Format(PyExc_ValueError, "A string literal cannot contain NUL (0x00) characters."); @@ -92,13 +94,13 @@ psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len, /* Escape a string for inclusion in a query as identifier. * - * 'len' is optional: if 0 the length is calculated. + * 'len' is optional: if < 0 it will be calculated. * * Return a string allocated by Postgres: free it using PQfreemem * In case of error set a Python exception. */ char * -psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len) +psycopg_escape_identifier(connectionObject *conn, const char *str, Py_ssize_t len) { char *rv = NULL; @@ -107,7 +109,7 @@ psycopg_escape_identifier(connectionObject *conn, const char *str, size_t len) goto exit; } - if (!len) { len = strlen(str); } + if (len < 0) { len = strlen(str); } rv = PQescapeIdentifier(conn->pgconn, str, len); if (!rv) { @@ -127,7 +129,7 @@ exit: /* Duplicate a string. * * Allocate a new buffer on the Python heap containing the new string. - * 'len' is optional: if 0 the length is calculated. + * 'len' is optional: if < 0 the length is calculated. * * Store the return in 'to' and return 0 in case of success, else return -1 * and raise an exception. @@ -141,7 +143,7 @@ psycopg_strdup(char **to, const char *from, Py_ssize_t len) *to = NULL; return 0; } - if (!len) { len = strlen(from); } + if (len < 0) { len = strlen(from); } if (!(*to = PyMem_Malloc(len + 1))) { PyErr_NoMemory(); return -1; @@ -276,3 +278,57 @@ exit: return res; } + + +/* Convert a C string into Python Text using a specified codec. + * + * The codec is the python function codec.getdecoder(enc). It is only used on + * Python 3 to return unicode: in Py2 the function returns a string. + * + * len is optional: use -1 to have it calculated by the function. + */ +PyObject * +psycopg_text_from_chars_safe(const char *str, Py_ssize_t len, PyObject *decoder) +{ +#if PY_MAJOR_VERSION < 3 + + if (!str) { Py_RETURN_NONE; } + + if (len < 0) { len = strlen(str); } + + return PyString_FromStringAndSize(str, len); + +#else + + static PyObject *replace = NULL; + PyObject *rv = NULL; + PyObject *b = NULL; + PyObject *t = NULL; + + if (!str) { Py_RETURN_NONE; } + + if (len < 0) { len = strlen(str); } + + if (decoder) { + if (!replace) { + if (!(replace = PyUnicode_FromString("replace"))) { goto exit; } + } + if (!(b = PyBytes_FromStringAndSize(str, len))) { goto exit; } + if (!(t = PyObject_CallFunctionObjArgs(decoder, b, replace, NULL))) { + goto exit; + } + + if (!(rv = PyTuple_GetItem(t, 0))) { goto exit; } + Py_INCREF(rv); + } + else { + rv = PyUnicode_DecodeASCII(str, len, "replace"); + } + +exit: + Py_XDECREF(t); + Py_XDECREF(b); + return rv; + +#endif +}