Merge branch 'iter-named-cursor' into python2

2025-07-21 21:50:01 +03:00 · 2011-02-05 15:28:30 +01:00 · 2011-02-05 15:28:30 +01:00 · c97fa1c476
commit c97fa1c476
parent 9433a6879f fab31e9441
4 changed files with 114 additions and 6 deletions
--- a/NEWS-2.3
+++ b/NEWS-2.3
@ -5,6 +5,7 @@ What's new in psycopg 2.3.3

  - Added `register_composite()` function to cast PostgreSQL composite types
    into Python tuples/namedtuples.
+  - More efficient iteration on named cursors.
  - The build script refuses to guess values if pg_config is not found.
  - Connections and cursors are weakly referenceable.

--- a/doc/src/cursor.rst
+++ b/doc/src/cursor.rst
@ -208,6 +208,11 @@ The ``cursor`` class
            (2, None, 'dada')
            (3, 42, 'bar')

+        .. versionchanged:: 2.3.3
+            iterating over a :ref:`named cursor <server-side-cursors>`
+            fetches `~cursor.arraysize` records at time from the backend.
+            Previously only one record was fetched per roundtrip, resulting
+             in unefficient iteration.

    .. method:: fetchone()

@ -300,6 +305,18 @@ The ``cursor`` class
        This read/write attribute specifies the number of rows to fetch at a
        time with `~cursor.fetchmany()`. It defaults to 1 meaning to fetch
        a single row at a time.
+
+        The attribute is also used when iterating a :ref:`named cursor
+        <server-side-cursors>`: when syntax such as ``for i in cursor:`` is
+        used, in order to avoid an excessive number of network roundtrips, the
+        cursor will actually fetch `!arraysize` records at time from the
+        backend. For this task the default value of 1 is a poor value: if
+        `!arraysize` is 1, a default value of 2000 will be used instead. If
+        you really want to retrieve one record at time from the backend use
+        `fetchone()` in a loop.
+
+        .. versionchanged:: 2.3.3
+            `!arraysize` used in named cursor iteration.
        

    .. attribute:: rowcount 
--- a/psycopg/cursor_type.c
+++ b/psycopg/cursor_type.c
@ -799,6 +799,61 @@ psyco_curs_fetchone(cursorObject *self, PyObject *args)
    return res;
 }

+/* Efficient cursor.next() implementation for named cursors.
+ *
+ * Fetch several records at time. Return NULL when the cursor is exhausted.
+ */
+static PyObject *
+psyco_curs_next_named(cursorObject *self)
+{
+    PyObject *res;
+
+    Dprintf("psyco_curs_next_named");
+    EXC_IF_CURS_CLOSED(self);
+    EXC_IF_ASYNC_IN_PROGRESS(self, next);
+    if (_psyco_curs_prefetch(self) < 0) return NULL;
+    EXC_IF_NO_TUPLES(self);
+
+    EXC_IF_NO_MARK(self);
+    EXC_IF_TPC_PREPARED(self->conn, next);
+
+    Dprintf("psyco_curs_next_named: row %ld", self->row);
+    Dprintf("psyco_curs_next_named: rowcount = %ld", self->rowcount);
+    if (self->row >= self->rowcount) {
+        char buffer[128];
+
+        /* fetch 'arraysize' records, but shun the default value of 1 */
+        long int size = self->arraysize;
+        if (size == 1) { size = 2000L; }
+
+        PyOS_snprintf(buffer, 127, "FETCH FORWARD %ld FROM %s",
+            size, self->name);
+        if (pq_execute(self, buffer, 0) == -1) return NULL;
+        if (_psyco_curs_prefetch(self) < 0) return NULL;
+    }
+
+    /* We exhausted the data: return NULL to stop iteration. */
+    if (self->row >= self->rowcount) {
+        return NULL;
+    }
+
+    if (self->tuple_factory == Py_None)
+        res = _psyco_curs_buildrow(self, self->row);
+    else
+        res = _psyco_curs_buildrow_with_factory(self, self->row);
+
+    self->row++; /* move the counter to next line */
+
+    /* if the query was async aggresively free pgres, to allow
+       successive requests to reallocate it */
+    if (self->row >= self->rowcount
+        && self->conn->async_cursor
+        && PyWeakref_GetObject(self->conn->async_cursor) == (PyObject*)self)
+        IFCLEARPGRES(self->pgres);
+
+    return res;
+}
+

 /* fetch many - fetch some results */

@ -1510,14 +1565,20 @@ cursor_next(PyObject *self)
 {
    PyObject *res;

-    /* we don't parse arguments: psyco_curs_fetchone will do that for us */
-    res = psyco_curs_fetchone((cursorObject*)self, NULL);
+    if (NULL == ((cursorObject*)self)->name) {
+        /* we don't parse arguments: psyco_curs_fetchone will do that for us */
+        res = psyco_curs_fetchone((cursorObject*)self, NULL);

-    /* convert a None to NULL to signal the end of iteration */
-    if (res && res == Py_None) {
-        Py_DECREF(res);
-        res = NULL;
+        /* convert a None to NULL to signal the end of iteration */
+        if (res && res == Py_None) {
+            Py_DECREF(res);
+            res = NULL;
+        }
    }
+    else {
+        res = psyco_curs_next_named((cursorObject*)self);
+    }
+
    return res;
 }

--- a/tests/test_cursor.py
+++ b/tests/test_cursor.py
@ -22,6 +22,7 @@
 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 # License for more details.

+import time
 import unittest
 import psycopg2
 import psycopg2.extensions
@ -128,6 +129,34 @@ class CursorTests(unittest.TestCase):
        del curs
        self.assert_(w() is None)

+    def test_iter_named_cursor_efficient(self):
+        curs = self.conn.cursor('tmp')
+        # if these records are fetched in the same roundtrip their
+        # timestamp will not be influenced by the pause in Python world.
+        curs.execute("""select clock_timestamp() from generate_series(1,2)""")
+        i = iter(curs)
+        t1 = i.next()[0]
+        time.sleep(0.2)
+        t2 = i.next()[0]
+        self.assert_((t2 - t1).microseconds * 1e-6 < 0.1,
+            "named cursor records fetched in 2 roundtrips (delta: %s)"
+            % (t2 - t1))
+
+    def test_iter_named_cursor_default_arraysize(self):
+        curs = self.conn.cursor('tmp')
+        curs.execute('select generate_series(1,50)')
+        rv = [ (r[0], curs.rownumber) for r in curs ]
+        # everything swallowed in one gulp
+        self.assertEqual(rv, [(i,i) for i in range(1,51)])
+
+    def test_iter_named_cursor_arraysize(self):
+        curs = self.conn.cursor('tmp')
+        curs.arraysize = 30
+        curs.execute('select generate_series(1,50)')
+        rv = [ (r[0], curs.rownumber) for r in curs ]
+        # everything swallowed in two gulps
+        self.assertEqual(rv, [(i,((i - 1) % 30) + 1) for i in range(1,51)])
+

 def test_suite():
    return unittest.TestLoader().loadTestsFromName(__name__)