Merge branch 'fix-encoding' into devel

2025-07-14 18:22:21 +03:00 · 2011-04-09 14:47:07 +01:00 · 2011-04-09 14:47:07 +01:00 · e316d7bcd4
commit e316d7bcd4
parent bf48706868 e3605b33c1
4 changed files with 69 additions and 39 deletions
--- a/2
+++ b/2
@ -3,6 +3,8 @@ What's new in psycopg 2.4.1
  - Use own parser for bytea output, not requiring anymore the libpq 9.0
    to parse the hex format.
  - Don't fail connection if the client encoding is a non-normalized
    variant. Issue reported by Peter Eisentraut.
  - Correctly detect an empty query sent to the backend (ticket #46).
  - Allow to specify --static-libpq on setup.py command line instead of
    just in 'setup.cfg'. Patch provided by Matthew Ryan (ticket #48).
--- a/psycopg/connection_int.c
+++ b/psycopg/connection_int.c
@ -236,10 +236,45 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
    return equote;
 }
 /* Remove irrelevant chars from encoding name and turn it uppercase.
 *
 * Return a buffer allocated on Python heap,
 * NULL and set an exception on error.
 */
 static char *
 clean_encoding_name(const char *enc)
 {
    const char *i = enc;
    char *rv, *j;
    /* convert to upper case and remove '-' and '_' from string */
    if (!(j = rv = PyMem_Malloc(strlen(enc) + 1))) {
        PyErr_NoMemory();
        return NULL;
    }
    while (*i) {
        if (!isalnum(*i)) {
            ++i;
        }
        else {
            *j++ = toupper(*i++);
        }
    }
    *j = '\0';
    Dprintf("clean_encoding_name: %s -> %s", enc, rv);
    return rv;
 }
 /* Convert a PostgreSQL encoding to a Python codec.
 *
 * Return a new copy of the codec name allocated on the Python heap,
 * NULL with exception in case of error.
 *
 * 'enc' should be already normalized (uppercase, no - or _).
 */
 static char *
 conn_encoding_to_codec(const char *enc)
@ -285,7 +320,7 @@ exit:
 static int
 conn_read_encoding(connectionObject *self, PGconn *pgconn)
 {
-    char *enc = NULL, *codec = NULL, *j;
+    char *enc = NULL, *codec = NULL;
    const char *tmp;
    int rv = -1;
@ -297,16 +332,10 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
        goto exit;
    }
-    if (!(enc = PyMem_Malloc(strlen(tmp)+1))) {
+    if (!(enc = clean_encoding_name(tmp))) {
        PyErr_NoMemory();
        goto exit;
    }
    /* turn encoding in uppercase */
    j = enc;
    while (*tmp) { *j++ = toupper(*tmp++); }
    *j = '\0';
    /* Look for this encoding in Python codecs. */
    if (!(codec = conn_encoding_to_codec(enc))) {
        goto exit;
@ -965,21 +994,23 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
    PGresult *pgres = NULL;
    char *error = NULL;
    char query[48];
-    int res = 0;
+    int res = 1;
-    char *codec;
+    char *codec = NULL;
    char *clean_enc = NULL;
    /* If the current encoding is equal to the requested one we don't
       issue any query to the backend */
    if (strcmp(self->encoding, enc) == 0) return 0;
    /* We must know what python codec this encoding is. */
-    if (!(codec = conn_encoding_to_codec(enc))) { return -1; }
+    if (!(clean_enc = clean_encoding_name(enc))) { goto exit; }
    if (!(codec = conn_encoding_to_codec(clean_enc))) { goto exit; }
    Py_BEGIN_ALLOW_THREADS;
    pthread_mutex_lock(&self->lock);
    /* set encoding, no encoding string is longer than 24 bytes */
-    PyOS_snprintf(query, 47, "SET client_encoding = '%s'", enc);
+    PyOS_snprintf(query, 47, "SET client_encoding = '%s'", clean_enc);
    /* abort the current transaction, to set the encoding ouside of
       transactions */
@ -994,21 +1025,18 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
    /* no error, we can proceeed and store the new encoding */
    {
        char *tmp = self->encoding;
-        self->encoding = NULL;
+        self->encoding = clean_enc;
        PyMem_Free(tmp);
-    }
+        clean_enc = NULL;
    if (!(self->encoding = psycopg_strdup(enc, 0))) {
        res = 1;  /* don't call pq_complete_error below */
        goto endlock;
    }
    /* Store the python codec too. */
    {
        char *tmp = self->codec;
        self->codec = NULL;
        PyMem_Free(tmp);
    }
        self->codec = codec;
        PyMem_Free(tmp);
        codec = NULL;
    }
    Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
            self->encoding, self->codec);
@ -1021,6 +1049,10 @@ endlock:
    if (res < 0)
        pq_complete_error(self, &pgres, &error);
 exit:
    PyMem_Free(clean_enc);
    PyMem_Free(codec);
    return res;
 }
--- a/psycopg/connection_type.c
+++ b/psycopg/connection_type.c
@ -423,36 +423,18 @@ static PyObject *
 psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
 {
    const char *enc;
    char *buffer, *dest;
    PyObject *rv = NULL;
    Py_ssize_t len;
    EXC_IF_CONN_CLOSED(self);
    EXC_IF_CONN_ASYNC(self, set_client_encoding);
    EXC_IF_TPC_PREPARED(self, set_client_encoding);
-    if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL;
+    if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
-    /* convert to upper case and remove '-' and '_' from string */
+    if (conn_set_client_encoding(self, enc) == 0) {
    if (!(dest = buffer = PyMem_Malloc(len+1))) {
        return PyErr_NoMemory();
    }
    while (*enc) {
        if (*enc == '_' || *enc == '-') {
            ++enc;
        }
        else {
            *dest++ = toupper(*enc++);
        }
    }
    *dest = '\0';
    if (conn_set_client_encoding(self, buffer) == 0) {
        Py_INCREF(Py_None);
        rv = Py_None;
    }
    PyMem_Free(buffer);
    return rv;
 }
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@ -22,6 +22,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 # License for more details.
 import os
 import time
 import threading
 from testutils import unittest, decorate_all_tests, skip_before_postgres
@ -141,6 +142,19 @@ class ConnectionTests(unittest.TestCase):
        cur.execute("select 'foo'::text;")
        self.assertEqual(cur.fetchone()[0], u'foo')
    def test_connect_nonnormal_envvar(self):
        # We must perform encoding normalization at connection time
        self.conn.close()
        oldenc = os.environ.get('PGCLIENTENCODING')
        os.environ['PGCLIENTENCODING'] = 'utf-8'    # malformed spelling
        try:
            self.conn = psycopg2.connect(dsn)
        finally:
            if oldenc is not None:
                os.environ['PGCLIENTENCODING'] = oldenc
            else:
                del os.environ['PGCLIENTENCODING']
    def test_weakref(self):
        from weakref import ref
        conn = psycopg2.connect(dsn)