Merge remote-tracking branch 'origin/fast-namedtuple'

Close #838
2025-10-29 06:47:36 +03:00 · 2019-02-06 02:42:10 +00:00 · 2019-02-06 02:42:10 +00:00 · f1e73507d0
commit f1e73507d0
parent 3f20f7934a 35ec7ad9c1
5 changed files with 171 additions and 11 deletions
--- a/1
+++ b/1
@ -30,6 +30,7 @@ New features:
  maintain columns order (:ticket:`#177`).
 - Added `~psycopg2.extensions.Diagnostics.severity_nonlocalized` attribute on
  the `~psycopg2.extensions.Diagnostics` object (:ticket:`#783`).
 - More efficient `~psycopg2.extras.NamedTupleCursor` (:ticket:`#838`).
 Other changes:
--- a/lib/_lru_cache.py
+++ b/lib/_lru_cache.py
@ -0,0 +1,104 @@
 """
 LRU cache implementation for Python 2.7
 Ported from http://code.activestate.com/recipes/578078/ and simplified for our
 use (only support maxsize > 0 and positional arguments).
 """
 from collections import namedtuple
 from functools import update_wrapper
 from threading import RLock
 _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
 def lru_cache(maxsize=100):
    """Least-recently-used cache decorator.
    Arguments to the cached function must be hashable.
    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
    """
    def decorating_function(user_function):
        cache = dict()
        stats = [0, 0]                  # make statistics updateable non-locally
        HITS, MISSES = 0, 1             # names for the stats fields
        cache_get = cache.get           # bound method to lookup key or return None
        _len = len                      # localize the global len() function
        lock = RLock()                  # linkedlist updates aren't threadsafe
        root = []                       # root of the circular doubly linked list
        root[:] = [root, root, None, None]      # initialize by pointing to self
        nonlocal_root = [root]                  # make updateable non-locally
        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3    # names for the link fields
        assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize
        def wrapper(*args):
            # size limited caching that tracks accesses by recency
            key = args
            with lock:
                link = cache_get(key)
                if link is not None:
                    # record recent use of the key by moving it to the
                    # front of the list
                    root, = nonlocal_root
                    link_prev, link_next, key, result = link
                    link_prev[NEXT] = link_next
                    link_next[PREV] = link_prev
                    last = root[PREV]
                    last[NEXT] = root[PREV] = link
                    link[PREV] = last
                    link[NEXT] = root
                    stats[HITS] += 1
                    return result
            result = user_function(*args)
            with lock:
                root, = nonlocal_root
                if key in cache:
                    # getting here means that this same key was added to the
                    # cache while the lock was released.  since the link
                    # update is already done, we need only return the
                    # computed result and update the count of misses.
                    pass
                elif _len(cache) >= maxsize:
                    # use the old root to store the new key and result
                    oldroot = root
                    oldroot[KEY] = key
                    oldroot[RESULT] = result
                    # empty the oldest link and make it the new root
                    root = nonlocal_root[0] = oldroot[NEXT]
                    oldkey = root[KEY]
                    # oldvalue = root[RESULT]
                    root[KEY] = root[RESULT] = None
                    # now update the cache dictionary for the new links
                    del cache[oldkey]
                    cache[key] = oldroot
                else:
                    # put result in a new link at the front of the list
                    last = root[PREV]
                    link = [last, root, key, result]
                    last[NEXT] = root[PREV] = cache[key] = link
                stats[MISSES] += 1
            return result
        def cache_info():
            """Report cache statistics"""
            with lock:
                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
        def cache_clear():
            """Clear the cache and cache statistics"""
            with lock:
                cache.clear()
                root = nonlocal_root[0]
                root[:] = [root, root, None, None]
                stats[:] = [0, 0]
        wrapper.__wrapped__ = user_function
        wrapper.cache_info = cache_info
        wrapper.cache_clear = cache_clear
        return update_wrapper(wrapper, user_function)
    return decorating_function
--- a/lib/compat.py
+++ b/lib/compat.py
@ -1,10 +1,15 @@
 import sys
 __all__ = ['string_types', 'text_type', 'lru_cache']
 if sys.version_info[0] == 2:
    # Python 2
    string_types = basestring,
    text_type = unicode
    from ._lru_cache import lru_cache
 else:
    # Python 3
    string_types = str,
    text_type = str
    from functools import lru_cache
--- a/lib/extras.py
+++ b/lib/extras.py
@ -35,9 +35,10 @@ import logging as _logging
 import psycopg2
 from psycopg2 import extensions as _ext
-from psycopg2.extensions import cursor as _cursor
+from .extensions import cursor as _cursor
-from psycopg2.extensions import connection as _connection
+from .extensions import connection as _connection
-from psycopg2.extensions import adapt as _A, quote_ident
+from .extensions import adapt as _A, quote_ident
 from .compat import lru_cache
 from psycopg2._psycopg import (                             # noqa
    REPLICATION_PHYSICAL, REPLICATION_LOGICAL,
@ -330,6 +331,7 @@ class NamedTupleCursor(_cursor):
        "abc'def"
    """
    Record = None
    MAX_CACHE = 1024
    def execute(self, query, vars=None):
        self.Record = None
@ -381,21 +383,30 @@ class NamedTupleCursor(_cursor):
        except StopIteration:
            return
-    def _make_nt(self):
+    # ascii except alnum and underscore
-        # ascii except alnum and underscore
+    _re_clean = _re.compile(
-        nochars = ' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~'
+        '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
        re_clean = _re.compile('[' + _re.escape(nochars) + ']')
-        def f(s):
+    def _make_nt(self):
-            s = re_clean.sub('_', s)
+        key = tuple(d[0] for d in self.description) if self.description else ()
        return self._cached_make_nt(key)
    def _do_make_nt(self, key):
        fields = []
        for s in key:
            s = self._re_clean.sub('_', s)
            # Python identifier cannot start with numbers, namedtuple fields
            # cannot start with underscore. So...
            if s[0] == '_' or '0' <= s[0] <= '9':
                s = 'f' + s
            fields.append(s)
-            return s
+        nt = namedtuple("Record", fields)
        return nt
-        return namedtuple("Record", [f(d[0]) for d in self.description or ()])
+    # Exposed for testability, and if someone wants to monkeypatch to tweak
    # the cache size.
    _cached_make_nt = lru_cache(512)(_do_make_nt)
 class LoggingConnection(_connection):
--- a/tests/test_extras_dictcursor.py
+++ b/tests/test_extras_dictcursor.py
@ -578,6 +578,45 @@ class NamedTupleCursorTest(ConnectingTestCase):
        for i, t in enumerate(curs):
            self.assertEqual(i + 1, curs.rownumber)
    def test_cache(self):
        curs = self.conn.cursor()
        curs.execute("select 10 as a, 20 as b")
        r1 = curs.fetchone()
        curs.execute("select 10 as a, 20 as c")
        r2 = curs.fetchone()
        curs.execute("select 10 as a, 30 as b")
        r3 = curs.fetchone()
        self.assert_(type(r1) is type(r3))
        self.assert_(type(r1) is not type(r2))
    def test_max_cache(self):
        from psycopg2.extras import NamedTupleCursor
        from psycopg2.compat import lru_cache
        old_func = NamedTupleCursor._cached_make_nt
        NamedTupleCursor._cached_make_nt = \
            lru_cache(8)(NamedTupleCursor._do_make_nt)
        try:
            recs = []
            curs = self.conn.cursor()
            for i in range(10):
                curs.execute("select 1 as f%s" % i)
                recs.append(curs.fetchone())
            # Still in cache
            curs.execute("select 1 as f9")
            rec = curs.fetchone()
            self.assert_(any(type(r) is type(rec) for r in recs))
            # Gone from cache
            curs.execute("select 1 as f0")
            rec = curs.fetchone()
            self.assert_(all(type(r) is not type(rec) for r in recs))
        finally:
            NamedTupleCursor._cached_make_nt = old_func
 def test_suite():
    return unittest.TestLoader().loadTestsFromName(__name__)