diff --git a/NEWS-2.3 b/NEWS-2.3 index 8353861c..405c1e14 100644 --- a/NEWS-2.3 +++ b/NEWS-2.3 @@ -6,6 +6,9 @@ What's new in psycopg 2.3.3 - Added `register_composite()` function to cast PostgreSQL composite types into Python tuples/namedtuples. - Connections and cursors are weakly referenceable. + - Added 'b' and 't' mode to large objects: write can deal with both bytes + strings and unicode; read can return either bytes strings or decoded + unicode. - The build script refuses to guess values if pg_config is not found. - Improved PostgreSQL-Python encodings mapping. Added a few missing encodings: EUC_CN, EUC_JIS_2004, ISO885910, ISO885916, diff --git a/doc/src/connection.rst b/doc/src/connection.rst index bf6a6f26..d0f5e1a9 100644 --- a/doc/src/connection.rst +++ b/doc/src/connection.rst @@ -490,13 +490,14 @@ The ``connection`` class .. method:: lobject([oid [, mode [, new_oid [, new_file [, lobject_factory]]]]]) - Return a new database large object. See :ref:`large-objects` for an - overview. + Return a new database large object as a `~psycopg2.extensions.lobject` + instance. + + See :ref:`large-objects` for an overview. :param oid: The OID of the object to read or write. 0 to create a new large object and and have its OID assigned automatically. - :param mode: Access mode to the object: can be ``r``, ``w``, - ``rw`` or ``n`` (meaning don't open it). + :param mode: Access mode to the object, see below. :param new_oid: Create a new object using the specified OID. The function raises `OperationalError` if the OID is already in use. Default is 0, meaning assign a new one automatically. @@ -504,13 +505,31 @@ The ``connection`` class (using the |lo_import|_ function) :param lobject_factory: Subclass of `~psycopg2.extensions.lobject` to be instantiated. - :rtype: `~psycopg2.extensions.lobject` .. |lo_import| replace:: `!lo_import()` .. _lo_import: http://www.postgresql.org/docs/9.0/static/lo-interfaces.html#LO-IMPORT + Available values for *mode* are: + + ======= ========= + *mode* meaning + ======= ========= + ``r`` Open for read only + ``w`` Open for write only + ``rw`` Open for read/write + ``n`` Don't open the file + ``b`` Don't decode read data (return data as `str` in Python 2 or `bytes` in Python 3) + ``t`` Decode read data according to `connection.encoding` (return data as `unicode` in Python 2 or `str` in Python 3) + ======= ========= + + ``b`` and ``t`` can be specified together with a read/write mode. If + neither ``b`` nor ``t`` is specified, the default is ``b`` in Python 2 + and ``t`` in Python 3. + .. versionadded:: 2.0.8 + .. versionchanged:: 2.3.3 added ``b`` and ``t`` mode and unicode + support. .. rubric:: Methods related to asynchronous support. diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst index 73b05db1..8fee890e 100644 --- a/doc/src/extensions.rst +++ b/doc/src/extensions.rst @@ -51,17 +51,29 @@ functionalities defined by the |DBAPI|_. .. attribute:: mode - The mode the database was open (``r``, ``w``, ``rw`` or ``n``). + The mode the database was open. See `connection.lobject()` for a + description of the available modes. .. method:: read(bytes=-1) Read a chunk of data from the current file position. If -1 (default) read all the remaining data. + The result is an Unicode string (decoded according to + `connection.encoding`) if the file was open in ``t`` mode, a bytes + string for ``b`` mode. + + .. versionchanged:: 2.3.3 + added Unicode support. + .. method:: write(str) Write a string to the large object. Return the number of bytes - written. + written. Unicode strings are encoded in the `connection.encoding` + before writing. + + .. versionchanged:: 2.3.3 + added Unicode support. .. method:: export(file_name) diff --git a/doc/src/usage.rst b/doc/src/usage.rst index a5efaa45..36bd36be 100644 --- a/doc/src/usage.rst +++ b/doc/src/usage.rst @@ -574,7 +574,8 @@ whole. Psycopg allows access to the large object using the `~psycopg2.extensions.lobject` class. Objects are generated using the -`connection.lobject()` factory method. +`connection.lobject()` factory method. Data can be retrieved either as bytes +or as Unicode strings. Psycopg large object support efficient import/export with file system files using the |lo_import|_ and |lo_export|_ libpq functions. diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c index c947850a..ae92d68b 100644 --- a/psycopg/connection_type.c +++ b/psycopg/connection_type.c @@ -514,15 +514,16 @@ static PyObject * psyco_conn_lobject(connectionObject *self, PyObject *args, PyObject *keywds) { Oid oid=InvalidOid, new_oid=InvalidOid; - char *smode = NULL, *new_file = NULL; - int mode=0; - PyObject *obj, *factory = NULL; + char *new_file = NULL; + const char *smode = ""; + PyObject *factory = (PyObject *)&lobjectType; + PyObject *obj; static char *kwlist[] = {"oid", "mode", "new_oid", "new_file", "cursor_factory", NULL}; - + if (!PyArg_ParseTupleAndKeywords(args, keywds, "|izizO", kwlist, - &oid, &smode, &new_oid, &new_file, + &oid, &smode, &new_oid, &new_file, &factory)) { return NULL; } @@ -537,33 +538,13 @@ psyco_conn_lobject(connectionObject *self, PyObject *args, PyObject *keywds) oid, smode); Dprintf("psyco_conn_lobject: parameters: new_oid = %d, new_file = %s", new_oid, new_file); - - /* build a mode number out of the mode string: right now we only accept - 'r', 'w' and 'rw' (but note that 'w' implies 'rw' because PostgreSQL - backend does that. */ - if (smode) { - if (strncmp("rw", smode, 2) == 0) - mode = INV_READ+INV_WRITE; - else if (smode[0] == 'r') - mode = INV_READ; - else if (smode[0] == 'w') - mode = INV_WRITE; - else if (smode[0] == 'n') - mode = -1; - else { - PyErr_SetString(PyExc_TypeError, - "mode should be one of 'r', 'w' or 'rw'"); - return NULL; - } - } - if (factory == NULL) factory = (PyObject *)&lobjectType; if (new_file) - obj = PyObject_CallFunction(factory, "Oiiis", - self, oid, mode, new_oid, new_file); + obj = PyObject_CallFunction(factory, "Oisis", + self, oid, smode, new_oid, new_file); else - obj = PyObject_CallFunction(factory, "Oiii", - self, oid, mode, new_oid); + obj = PyObject_CallFunction(factory, "Oisi", + self, oid, smode, new_oid); if (obj == NULL) return NULL; if (PyObject_IsInstance(obj, (PyObject *)&lobjectType) == 0) { @@ -572,7 +553,7 @@ psyco_conn_lobject(connectionObject *self, PyObject *args, PyObject *keywds) Py_DECREF(obj); return NULL; } - + Dprintf("psyco_conn_lobject: new lobject at %p: refcnt = " FORMAT_CODE_PY_SSIZE_T, obj, Py_REFCNT(obj)); diff --git a/psycopg/lobject.h b/psycopg/lobject.h index cddfa6e9..293f608f 100644 --- a/psycopg/lobject.h +++ b/psycopg/lobject.h @@ -42,7 +42,8 @@ typedef struct { connectionObject *conn; /* connection owning the lobject */ long int mark; /* copied from conn->mark */ - const char *smode; /* string mode if lobject was opened */ + char *smode; /* string mode if lobject was opened */ + int mode; /* numeric version of smode */ int fd; /* the file descriptor for file-like ops */ Oid oid; /* the oid for this lobject */ @@ -51,7 +52,7 @@ typedef struct { /* functions exported from lobject_int.c */ HIDDEN int lobject_open(lobjectObject *self, connectionObject *conn, - Oid oid, int mode, Oid new_oid, + Oid oid, const char *smode, Oid new_oid, const char *new_file); HIDDEN int lobject_unlink(lobjectObject *self); HIDDEN int lobject_export(lobjectObject *self, const char *filename); @@ -87,6 +88,12 @@ if (self->conn->mark != self->mark) { \ return NULL; \ } +/* Values for the lobject mode */ +#define LOBJECT_READ 1 +#define LOBJECT_WRITE 2 +#define LOBJECT_BINARY 4 +#define LOBJECT_TEXT 8 + #ifdef __cplusplus } #endif diff --git a/psycopg/lobject_int.c b/psycopg/lobject_int.c index 6ee3ecf0..252d1c93 100644 --- a/psycopg/lobject_int.c +++ b/psycopg/lobject_int.c @@ -43,15 +43,118 @@ collect_error(connectionObject *conn, char **error) *error = strdup(msg); } + +/* Check if the mode passed to the large object is valid. + * In case of success return a value >= 0 + * On error return a value < 0 and set an exception. + * + * Valid mode are [r|w|rw|n][t|b] + */ +static int +_lobject_parse_mode(const char *mode) +{ + int rv = 0; + size_t pos = 0; + + if (0 == strncmp("rw", mode, 2)) { + rv |= LOBJECT_READ | LOBJECT_WRITE; + pos += 2; + } + else { + switch (mode[0]) { + case 'r': + rv |= LOBJECT_READ; + pos += 1; + break; + case 'w': + rv |= LOBJECT_WRITE; + pos += 1; + break; + case 'n': + pos += 1; + break; + default: + rv |= LOBJECT_READ; + break; + } + } + + switch (mode[pos]) { + case 't': + rv |= LOBJECT_TEXT; + pos += 1; + break; + case 'b': + rv |= LOBJECT_BINARY; + pos += 1; + break; + default: +#if PY_MAJOR_VERSION < 3 + rv |= LOBJECT_BINARY; +#else + rv |= LOBJECT_TEXT; +#endif + break; + } + + if (pos != strlen(mode)) { + PyErr_Format(PyExc_ValueError, + "bad mode for lobject: '%s'", mode); + rv = -1; + } + + return rv; +} + + +/* Return a string representing the lobject mode. + * + * The return value is a new string allocated on the Python heap. + */ +static char * +_lobject_unparse_mode(int mode) +{ + char *buf; + char *c; + + /* the longest is 'rwt' */ + c = buf = PyMem_Malloc(4); + + if (mode & LOBJECT_READ) { *c++ = 'r'; } + if (mode & LOBJECT_WRITE) { *c++ = 'w'; } + + if (buf == c) { + /* neither read nor write */ + *c++ = 'n'; + } + else { + if (mode & LOBJECT_TEXT) { + *c++ = 't'; + } + else { + *c++ = 'b'; + } + } + *c = '\0'; + + return buf; +} + /* lobject_open - create a new/open an existing lo */ int lobject_open(lobjectObject *self, connectionObject *conn, - Oid oid, int mode, Oid new_oid, const char *new_file) + Oid oid, const char *smode, Oid new_oid, const char *new_file) { int retvalue = -1; PGresult *pgres = NULL; char *error = NULL; + int pgmode = 0; + int mode; + + if (0 > (mode = _lobject_parse_mode(smode))) { + return -1; + } Py_BEGIN_ALLOW_THREADS; pthread_mutex_lock(&(self->conn->lock)); @@ -78,19 +181,19 @@ lobject_open(lobjectObject *self, connectionObject *conn, goto end; } - mode = INV_WRITE; + mode = (mode & ~LOBJECT_READ) | LOBJECT_WRITE; } else { self->oid = oid; - if (mode == 0) mode = INV_READ; } - /* if the oid is a real one we try to open with the given mode, - unless the mode is -1, meaning "don't open!" */ - if (mode != -1) { - self->fd = lo_open(self->conn->pgconn, self->oid, mode); - Dprintf("lobject_open: large object opened with fd = %d", - self->fd); + /* if the oid is a real one we try to open with the given mode */ + if (mode & LOBJECT_READ) { pgmode |= INV_READ; } + if (mode & LOBJECT_WRITE) { pgmode |= INV_WRITE; } + if (pgmode) { + self->fd = lo_open(self->conn->pgconn, self->oid, pgmode); + Dprintf("lobject_open: large object opened with mode = %i fd = %d", + pgmode, self->fd); if (self->fd == -1) { collect_error(self->conn, &error); @@ -98,17 +201,10 @@ lobject_open(lobjectObject *self, connectionObject *conn, goto end; } } + /* set the mode for future reference */ - switch (mode) { - case -1: - self->smode = "n"; break; - case INV_READ: - self->smode = "r"; break; - case INV_WRITE: - self->smode = "w"; break; - case INV_READ+INV_WRITE: - self->smode = "rw"; break; - } + self->mode = mode; + self->smode = _lobject_unparse_mode(mode); retvalue = 0; end: diff --git a/psycopg/lobject_type.c b/psycopg/lobject_type.c index 9a5fb0b8..49f64bab 100644 --- a/psycopg/lobject_type.c +++ b/psycopg/lobject_type.c @@ -71,19 +71,48 @@ psyco_lobj_close(lobjectObject *self, PyObject *args) static PyObject * psyco_lobj_write(lobjectObject *self, PyObject *args) { - int res = 0; + char *buffer; Py_ssize_t len; - const char *buffer; + Py_ssize_t res; + PyObject *obj; + PyObject *data = NULL; + PyObject *rv = NULL; - if (!PyArg_ParseTuple(args, "s#", &buffer, &len)) return NULL; + if (!PyArg_ParseTuple(args, "O", &obj)) return NULL; EXC_IF_LOBJ_CLOSED(self); EXC_IF_LOBJ_LEVEL0(self); EXC_IF_LOBJ_UNMARKED(self); - if ((res = lobject_write(self, buffer, (size_t)len)) < 0) return NULL; + if (Bytes_Check(obj)) { + Py_INCREF(obj); + data = obj; + } + else if (PyUnicode_Check(obj)) { + if (!(data = PyUnicode_AsEncodedString(obj, self->conn->codec, NULL))) { + goto exit; + } + } + else { + PyErr_Format(PyExc_TypeError, + "lobject.write requires a string; got %s instead", + Py_TYPE(obj)->tp_name); + goto exit; + } - return PyInt_FromLong((long)res); + if (-1 == Bytes_AsStringAndSize(data, &buffer, &len)) { + goto exit; + } + + if (0 > (res = lobject_write(self, buffer, (size_t)len))) { + goto exit; + } + + rv = PyInt_FromLong((long)res); + +exit: + Py_XDECREF(data); + return rv; } /* read method - read data from the lobject */ @@ -120,9 +149,13 @@ psyco_lobj_read(lobjectObject *self, PyObject *args) return NULL; } - res = Bytes_FromStringAndSize(buffer, size); + if (self->mode & LOBJECT_BINARY) { + res = Bytes_FromStringAndSize(buffer, size); + } else { + res = PyUnicode_Decode(buffer, size, self->conn->codec, NULL); + } PyMem_Free(buffer); - + return res; } @@ -277,7 +310,7 @@ static struct PyMemberDef lobjectObject_members[] = { {"oid", T_UINT, offsetof(lobjectObject, oid), READONLY, "The backend OID associated to this lobject."}, {"mode", T_STRING, offsetof(lobjectObject, smode), READONLY, - "Open mode ('r', 'w', 'rw' or 'n')."}, + "Open mode."}, {NULL} }; @@ -293,7 +326,7 @@ static struct PyGetSetDef lobjectObject_getsets[] = { static int lobject_setup(lobjectObject *self, connectionObject *conn, - Oid oid, int mode, Oid new_oid, const char *new_file) + Oid oid, const char *smode, Oid new_oid, const char *new_file) { Dprintf("lobject_setup: init lobject object at %p", self); @@ -311,7 +344,7 @@ lobject_setup(lobjectObject *self, connectionObject *conn, self->fd = -1; self->oid = InvalidOid; - if (lobject_open(self, conn, oid, mode, new_oid, new_file) == -1) + if (lobject_open(self, conn, oid, smode, new_oid, new_file) == -1) return -1; Dprintf("lobject_setup: good lobject object at %p, refcnt = " @@ -328,6 +361,7 @@ lobject_dealloc(PyObject* obj) if (lobject_close(self) < 0) PyErr_Print(); Py_XDECREF((PyObject*)self->conn); + PyMem_Free(self->smode); Dprintf("lobject_dealloc: deleted lobject object at %p, refcnt = " FORMAT_CODE_PY_SSIZE_T, obj, Py_REFCNT(obj)); @@ -339,16 +373,16 @@ static int lobject_init(PyObject *obj, PyObject *args, PyObject *kwds) { Oid oid=InvalidOid, new_oid=InvalidOid; - int mode=0; + const char *smode = ""; const char *new_file = NULL; PyObject *conn; - if (!PyArg_ParseTuple(args, "O|iiis", - &conn, &oid, &mode, &new_oid, &new_file)) + if (!PyArg_ParseTuple(args, "O|iziz", + &conn, &oid, &smode, &new_oid, &new_file)) return -1; return lobject_setup((lobjectObject *)obj, - (connectionObject *)conn, oid, mode, new_oid, new_file); + (connectionObject *)conn, oid, smode, new_oid, new_file); } static PyObject * diff --git a/tests/test_lobject.py b/tests/test_lobject.py index b5d5d63e..3e99e868 100755 --- a/tests/test_lobject.py +++ b/tests/test_lobject.py @@ -84,7 +84,7 @@ class LargeObjectTests(LargeObjectMixin, unittest.TestCase): def test_create(self): lo = self.conn.lobject() self.assertNotEqual(lo, None) - self.assertEqual(lo.mode, "w") + self.assertEqual(lo.mode[0], "w") def test_open_non_existent(self): # By creating then removing a large object, we get an Oid that @@ -98,12 +98,12 @@ class LargeObjectTests(LargeObjectMixin, unittest.TestCase): lo2 = self.conn.lobject(lo.oid) self.assertNotEqual(lo2, None) self.assertEqual(lo2.oid, lo.oid) - self.assertEqual(lo2.mode, "r") + self.assertEqual(lo2.mode[0], "r") def test_open_for_write(self): lo = self.conn.lobject() lo2 = self.conn.lobject(lo.oid, "w") - self.assertEqual(lo2.mode, "w") + self.assertEqual(lo2.mode[0], "w") lo2.write(b("some data")) def test_open_mode_n(self): @@ -167,9 +167,34 @@ class LargeObjectTests(LargeObjectMixin, unittest.TestCase): lo.close() lo = self.conn.lobject(lo.oid) - self.assertEqual(lo.read(4), b("some")) + x = lo.read(4) + self.assertEqual(type(x), type('')) + self.assertEqual(x, "some") + self.assertEqual(lo.read(), " data") + + def test_read_binary(self): + lo = self.conn.lobject() + length = lo.write(b("some data")) + lo.close() + + lo = self.conn.lobject(lo.oid, "rb") + x = lo.read(4) + self.assertEqual(type(x), type(b(''))) + self.assertEqual(x, "some") self.assertEqual(lo.read(), b(" data")) + def test_read_text(self): + lo = self.conn.lobject() + snowman = u"\u2603" + length = lo.write(u"some data " + snowman) + lo.close() + + lo = self.conn.lobject(lo.oid, "rt") + x = lo.read(4) + self.assertEqual(type(x), type(u'')) + self.assertEqual(x, u"some") + self.assertEqual(lo.read(), u" data " + snowman) + def test_read_large(self): lo = self.conn.lobject() data = b("data") * 1000000