From 121cf3b8f8426765d983579d3a4b2e932429cd9f Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Wed, 12 Oct 2016 01:10:31 +0100 Subject: [PATCH] Optimize UTF8 and Latin1 decoding Cache a pointer to a fast decoding function when the connection encoding is set so skip a repeated codec lookup for every string. --- psycopg/connection.h | 3 +++ psycopg/connection_int.c | 29 +++++++++++++++++++++++++++++ psycopg/typecast_basic.c | 11 ++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/psycopg/connection.h b/psycopg/connection.h index ec107429..b925bd47 100644 --- a/psycopg/connection.h +++ b/psycopg/connection.h @@ -122,6 +122,9 @@ struct connectionObject { int autocommit; PyObject *cursor_factory; /* default cursor factory from cursor() */ + + /* Pointer to a decoding function, e.g. PyUnicode_DecodeUTF8 */ + PyObject *(*cdecoder)(const char *, Py_ssize_t, const char *); }; /* map isolation level values into a numeric const */ diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c index 43d0fdae..62976d48 100644 --- a/psycopg/connection_int.c +++ b/psycopg/connection_int.c @@ -361,6 +361,31 @@ exit: return rv; } + +/* set fast access functions according to the currently selected codec + */ +void +conn_set_fast_codec(connectionObject *self) +{ + Dprintf("conn_set_fast_codec: codec=%s", self->codec); + + if (0 == strcmp(self->codec, "utf_8")) { + Dprintf("conn_set_fast_codec: PyUnicode_DecodeUTF8"); + self->cdecoder = PyUnicode_DecodeUTF8; + return; + } + + if (0 == strcmp(self->codec, "iso8859_1")) { + Dprintf("conn_set_fast_codec: PyUnicode_DecodeLatin1"); + self->cdecoder = PyUnicode_DecodeLatin1; + return; + } + + Dprintf("conn_set_fast_codec: no fast codec"); + self->cdecoder = NULL; +} + + /* Read the client encoding from the connection. * * Store the encoding in the pgconn->encoding field and the name of the @@ -402,6 +427,8 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn) self->codec = codec; codec = NULL; + conn_set_fast_codec(self); + rv = 0; exit: @@ -1243,6 +1270,8 @@ conn_set_client_encoding(connectionObject *self, const char *enc) codec = NULL; } + conn_set_fast_codec(self); + Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)", self->encoding, self->codec); diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c index a31047f3..760555ef 100644 --- a/psycopg/typecast_basic.c +++ b/psycopg/typecast_basic.c @@ -93,12 +93,17 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs) static PyObject * typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) { - char *enc; + connectionObject *conn; if (s == NULL) { Py_RETURN_NONE; } - enc = ((cursorObject*)curs)->conn->codec; - return PyUnicode_Decode(s, len, enc, NULL); + conn = ((cursorObject*)curs)->conn; + if (conn->cdecoder) { + return conn->cdecoder(s, len, NULL); + } + else { + return PyUnicode_Decode(s, len, conn->codec, NULL); + } } /** BOOLEAN - cast boolean value into right python object **/