mirror of
https://github.com/psycopg/psycopg2.git
synced 2024-11-26 10:53:44 +03:00
Normalize the encoding name at connection
The encoding can be set by PGCLIENTENCODING, which may be an alternative spelling. Bug reported by Peter Eisentraut. At this point the idea of considering one of the random spellings such as EUC_CN as somewhat "blessed" is debunked. So just store the cleaned-up version of the encoding in the mapping table. Note that the cleaned-up version was needed by the unicode adapter: this requirement has been surpassed as the connection now contains a copy of the Python codec name set whenever the client encoding is set.
This commit is contained in:
parent
bf48706868
commit
88803695ac
|
@ -236,6 +236,39 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
|
|||
return equote;
|
||||
}
|
||||
|
||||
|
||||
/* Remove irrelevant chars from encoding name and turn it uppercase.
|
||||
*
|
||||
* Return a buffer allocated on Python heap,
|
||||
* NULL and set an exception on error.
|
||||
*/
|
||||
static char *
|
||||
clean_encoding_name(const char *enc)
|
||||
{
|
||||
const char *i = enc;
|
||||
char *rv, *j;
|
||||
|
||||
/* convert to upper case and remove '-' and '_' from string */
|
||||
if (!(j = rv = PyMem_Malloc(strlen(enc) + 1))) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
while (*i) {
|
||||
if (!isalnum(*i)) {
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
*j++ = toupper(*i++);
|
||||
}
|
||||
}
|
||||
*j = '\0';
|
||||
|
||||
Dprintf("clean_encoding_name: %s -> %s", enc, rv);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* Convert a PostgreSQL encoding to a Python codec.
|
||||
*
|
||||
* Return a new copy of the codec name allocated on the Python heap,
|
||||
|
@ -246,11 +279,16 @@ conn_encoding_to_codec(const char *enc)
|
|||
{
|
||||
char *tmp;
|
||||
Py_ssize_t size;
|
||||
char *norm_enc = NULL;
|
||||
PyObject *pyenc = NULL;
|
||||
char *rv = NULL;
|
||||
|
||||
if (!(norm_enc = clean_encoding_name(enc))) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Find the Py codec name from the PG encoding */
|
||||
if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) {
|
||||
if (!(pyenc = PyDict_GetItemString(psycoEncodings, norm_enc))) {
|
||||
PyErr_Format(OperationalError,
|
||||
"no Python codec for client encoding '%s'", enc);
|
||||
goto exit;
|
||||
|
@ -270,6 +308,7 @@ conn_encoding_to_codec(const char *enc)
|
|||
rv = psycopg_strdup(tmp, size);
|
||||
|
||||
exit:
|
||||
PyMem_Free(norm_enc);
|
||||
Py_XDECREF(pyenc);
|
||||
return rv;
|
||||
}
|
||||
|
@ -285,7 +324,7 @@ exit:
|
|||
static int
|
||||
conn_read_encoding(connectionObject *self, PGconn *pgconn)
|
||||
{
|
||||
char *enc = NULL, *codec = NULL, *j;
|
||||
char *enc = NULL, *codec = NULL;
|
||||
const char *tmp;
|
||||
int rv = -1;
|
||||
|
||||
|
@ -297,16 +336,10 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
|
|||
goto exit;
|
||||
}
|
||||
|
||||
if (!(enc = PyMem_Malloc(strlen(tmp)+1))) {
|
||||
PyErr_NoMemory();
|
||||
if (!(enc = psycopg_strdup(tmp, 0))) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* turn encoding in uppercase */
|
||||
j = enc;
|
||||
while (*tmp) { *j++ = toupper(*tmp++); }
|
||||
*j = '\0';
|
||||
|
||||
/* Look for this encoding in Python codecs. */
|
||||
if (!(codec = conn_encoding_to_codec(enc))) {
|
||||
goto exit;
|
||||
|
|
|
@ -423,36 +423,18 @@ static PyObject *
|
|||
psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
|
||||
{
|
||||
const char *enc;
|
||||
char *buffer, *dest;
|
||||
PyObject *rv = NULL;
|
||||
Py_ssize_t len;
|
||||
|
||||
EXC_IF_CONN_CLOSED(self);
|
||||
EXC_IF_CONN_ASYNC(self, set_client_encoding);
|
||||
EXC_IF_TPC_PREPARED(self, set_client_encoding);
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL;
|
||||
if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
|
||||
|
||||
/* convert to upper case and remove '-' and '_' from string */
|
||||
if (!(dest = buffer = PyMem_Malloc(len+1))) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
while (*enc) {
|
||||
if (*enc == '_' || *enc == '-') {
|
||||
++enc;
|
||||
}
|
||||
else {
|
||||
*dest++ = toupper(*enc++);
|
||||
}
|
||||
}
|
||||
*dest = '\0';
|
||||
|
||||
if (conn_set_client_encoding(self, buffer) == 0) {
|
||||
if (conn_set_client_encoding(self, enc) == 0) {
|
||||
Py_INCREF(Py_None);
|
||||
rv = Py_None;
|
||||
}
|
||||
PyMem_Free(buffer);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
from testutils import unittest, decorate_all_tests, skip_before_postgres
|
||||
|
@ -141,6 +142,19 @@ class ConnectionTests(unittest.TestCase):
|
|||
cur.execute("select 'foo'::text;")
|
||||
self.assertEqual(cur.fetchone()[0], u'foo')
|
||||
|
||||
def test_connect_nonnormal_envvar(self):
|
||||
# We must perform encoding normalization at connection time
|
||||
self.conn.close()
|
||||
oldenc = os.environ.get('PGCLIENTENCODING')
|
||||
os.environ['PGCLIENTENCODING'] = 'utf-8' # malformed spelling
|
||||
try:
|
||||
self.conn = psycopg2.connect(dsn)
|
||||
finally:
|
||||
if oldenc is not None:
|
||||
os.environ['PGCLIENTENCODING'] = oldenc
|
||||
else:
|
||||
del os.environ['PGCLIENTENCODING']
|
||||
|
||||
def test_weakref(self):
|
||||
from weakref import ref
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
|
Loading…
Reference in New Issue
Block a user