Merge branch 'fix-encoding' into devel

This commit is contained in:
Daniele Varrazzo 2011-04-09 14:47:07 +01:00
commit e316d7bcd4
4 changed files with 69 additions and 39 deletions

2
NEWS
View File

@ -3,6 +3,8 @@ What's new in psycopg 2.4.1
- Use own parser for bytea output, not requiring anymore the libpq 9.0 - Use own parser for bytea output, not requiring anymore the libpq 9.0
to parse the hex format. to parse the hex format.
- Don't fail connection if the client encoding is a non-normalized
variant. Issue reported by Peter Eisentraut.
- Correctly detect an empty query sent to the backend (ticket #46). - Correctly detect an empty query sent to the backend (ticket #46).
- Allow to specify --static-libpq on setup.py command line instead of - Allow to specify --static-libpq on setup.py command line instead of
just in 'setup.cfg'. Patch provided by Matthew Ryan (ticket #48). just in 'setup.cfg'. Patch provided by Matthew Ryan (ticket #48).

View File

@ -236,10 +236,45 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
return equote; return equote;
} }
/* Remove irrelevant chars from encoding name and turn it uppercase.
*
* Return a buffer allocated on Python heap,
* NULL and set an exception on error.
*/
static char *
clean_encoding_name(const char *enc)
{
const char *i = enc;
char *rv, *j;
/* convert to upper case and remove '-' and '_' from string */
if (!(j = rv = PyMem_Malloc(strlen(enc) + 1))) {
PyErr_NoMemory();
return NULL;
}
while (*i) {
if (!isalnum(*i)) {
++i;
}
else {
*j++ = toupper(*i++);
}
}
*j = '\0';
Dprintf("clean_encoding_name: %s -> %s", enc, rv);
return rv;
}
/* Convert a PostgreSQL encoding to a Python codec. /* Convert a PostgreSQL encoding to a Python codec.
* *
* Return a new copy of the codec name allocated on the Python heap, * Return a new copy of the codec name allocated on the Python heap,
* NULL with exception in case of error. * NULL with exception in case of error.
*
* 'enc' should be already normalized (uppercase, no - or _).
*/ */
static char * static char *
conn_encoding_to_codec(const char *enc) conn_encoding_to_codec(const char *enc)
@ -285,7 +320,7 @@ exit:
static int static int
conn_read_encoding(connectionObject *self, PGconn *pgconn) conn_read_encoding(connectionObject *self, PGconn *pgconn)
{ {
char *enc = NULL, *codec = NULL, *j; char *enc = NULL, *codec = NULL;
const char *tmp; const char *tmp;
int rv = -1; int rv = -1;
@ -297,16 +332,10 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
goto exit; goto exit;
} }
if (!(enc = PyMem_Malloc(strlen(tmp)+1))) { if (!(enc = clean_encoding_name(tmp))) {
PyErr_NoMemory();
goto exit; goto exit;
} }
/* turn encoding in uppercase */
j = enc;
while (*tmp) { *j++ = toupper(*tmp++); }
*j = '\0';
/* Look for this encoding in Python codecs. */ /* Look for this encoding in Python codecs. */
if (!(codec = conn_encoding_to_codec(enc))) { if (!(codec = conn_encoding_to_codec(enc))) {
goto exit; goto exit;
@ -965,21 +994,23 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
PGresult *pgres = NULL; PGresult *pgres = NULL;
char *error = NULL; char *error = NULL;
char query[48]; char query[48];
int res = 0; int res = 1;
char *codec; char *codec = NULL;
char *clean_enc = NULL;
/* If the current encoding is equal to the requested one we don't /* If the current encoding is equal to the requested one we don't
issue any query to the backend */ issue any query to the backend */
if (strcmp(self->encoding, enc) == 0) return 0; if (strcmp(self->encoding, enc) == 0) return 0;
/* We must know what python codec this encoding is. */ /* We must know what python codec this encoding is. */
if (!(codec = conn_encoding_to_codec(enc))) { return -1; } if (!(clean_enc = clean_encoding_name(enc))) { goto exit; }
if (!(codec = conn_encoding_to_codec(clean_enc))) { goto exit; }
Py_BEGIN_ALLOW_THREADS; Py_BEGIN_ALLOW_THREADS;
pthread_mutex_lock(&self->lock); pthread_mutex_lock(&self->lock);
/* set encoding, no encoding string is longer than 24 bytes */ /* set encoding, no encoding string is longer than 24 bytes */
PyOS_snprintf(query, 47, "SET client_encoding = '%s'", enc); PyOS_snprintf(query, 47, "SET client_encoding = '%s'", clean_enc);
/* abort the current transaction, to set the encoding ouside of /* abort the current transaction, to set the encoding ouside of
transactions */ transactions */
@ -994,21 +1025,18 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
/* no error, we can proceeed and store the new encoding */ /* no error, we can proceeed and store the new encoding */
{ {
char *tmp = self->encoding; char *tmp = self->encoding;
self->encoding = NULL; self->encoding = clean_enc;
PyMem_Free(tmp); PyMem_Free(tmp);
} clean_enc = NULL;
if (!(self->encoding = psycopg_strdup(enc, 0))) {
res = 1; /* don't call pq_complete_error below */
goto endlock;
} }
/* Store the python codec too. */ /* Store the python codec too. */
{ {
char *tmp = self->codec; char *tmp = self->codec;
self->codec = NULL;
PyMem_Free(tmp);
}
self->codec = codec; self->codec = codec;
PyMem_Free(tmp);
codec = NULL;
}
Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)", Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
self->encoding, self->codec); self->encoding, self->codec);
@ -1021,6 +1049,10 @@ endlock:
if (res < 0) if (res < 0)
pq_complete_error(self, &pgres, &error); pq_complete_error(self, &pgres, &error);
exit:
PyMem_Free(clean_enc);
PyMem_Free(codec);
return res; return res;
} }

View File

@ -423,36 +423,18 @@ static PyObject *
psyco_conn_set_client_encoding(connectionObject *self, PyObject *args) psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
{ {
const char *enc; const char *enc;
char *buffer, *dest;
PyObject *rv = NULL; PyObject *rv = NULL;
Py_ssize_t len;
EXC_IF_CONN_CLOSED(self); EXC_IF_CONN_CLOSED(self);
EXC_IF_CONN_ASYNC(self, set_client_encoding); EXC_IF_CONN_ASYNC(self, set_client_encoding);
EXC_IF_TPC_PREPARED(self, set_client_encoding); EXC_IF_TPC_PREPARED(self, set_client_encoding);
if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL; if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
/* convert to upper case and remove '-' and '_' from string */ if (conn_set_client_encoding(self, enc) == 0) {
if (!(dest = buffer = PyMem_Malloc(len+1))) {
return PyErr_NoMemory();
}
while (*enc) {
if (*enc == '_' || *enc == '-') {
++enc;
}
else {
*dest++ = toupper(*enc++);
}
}
*dest = '\0';
if (conn_set_client_encoding(self, buffer) == 0) {
Py_INCREF(Py_None); Py_INCREF(Py_None);
rv = Py_None; rv = Py_None;
} }
PyMem_Free(buffer);
return rv; return rv;
} }

View File

@ -22,6 +22,7 @@
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details. # License for more details.
import os
import time import time
import threading import threading
from testutils import unittest, decorate_all_tests, skip_before_postgres from testutils import unittest, decorate_all_tests, skip_before_postgres
@ -141,6 +142,19 @@ class ConnectionTests(unittest.TestCase):
cur.execute("select 'foo'::text;") cur.execute("select 'foo'::text;")
self.assertEqual(cur.fetchone()[0], u'foo') self.assertEqual(cur.fetchone()[0], u'foo')
def test_connect_nonnormal_envvar(self):
# We must perform encoding normalization at connection time
self.conn.close()
oldenc = os.environ.get('PGCLIENTENCODING')
os.environ['PGCLIENTENCODING'] = 'utf-8' # malformed spelling
try:
self.conn = psycopg2.connect(dsn)
finally:
if oldenc is not None:
os.environ['PGCLIENTENCODING'] = oldenc
else:
del os.environ['PGCLIENTENCODING']
def test_weakref(self): def test_weakref(self):
from weakref import ref from weakref import ref
conn = psycopg2.connect(dsn) conn = psycopg2.connect(dsn)