From 35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b Mon Sep 17 00:00:00 2001 From: Daniele Varrazzo Date: Sat, 2 Feb 2019 19:21:39 +0000 Subject: [PATCH] Use a proper LRU cache for namedtuples Previous one didn't refresh by last use. Use the stdlib version for py3 and one of our own for py2. Max size set to 512, which should be fine for everyone (tweaking is still possible by monkeypatching, as the tests do, but I don't want to make an interface of it). --- lib/_lru_cache.py | 104 ++++++++++++++++++++++++++++++++ lib/compat.py | 5 ++ lib/extras.py | 26 ++++---- tests/test_extras_dictcursor.py | 29 +++++---- 4 files changed, 138 insertions(+), 26 deletions(-) create mode 100644 lib/_lru_cache.py diff --git a/lib/_lru_cache.py b/lib/_lru_cache.py new file mode 100644 index 00000000..1e2c52d0 --- /dev/null +++ b/lib/_lru_cache.py @@ -0,0 +1,104 @@ +""" +LRU cache implementation for Python 2.7 + +Ported from http://code.activestate.com/recipes/578078/ and simplified for our +use (only support maxsize > 0 and positional arguments). +""" + +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +def lru_cache(maxsize=100): + """Least-recently-used cache decorator. + + Arguments to the cached function must be hashable. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize + + def wrapper(*args): + # size limited caching that tracks accesses by recency + key = args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the + # front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + # oldvalue = root[RESULT] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/lib/compat.py b/lib/compat.py index cfd5a88f..185b8f64 100644 --- a/lib/compat.py +++ b/lib/compat.py @@ -1,10 +1,15 @@ import sys +__all__ = ['string_types', 'text_type', 'lru_cache'] + if sys.version_info[0] == 2: # Python 2 string_types = basestring, text_type = unicode + from ._lru_cache import lru_cache + else: # Python 3 string_types = str, text_type = str + from functools import lru_cache diff --git a/lib/extras.py b/lib/extras.py index a4012e28..d73c5b24 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -35,9 +35,10 @@ import logging as _logging import psycopg2 from psycopg2 import extensions as _ext -from psycopg2.extensions import cursor as _cursor -from psycopg2.extensions import connection as _connection -from psycopg2.extensions import adapt as _A, quote_ident +from .extensions import cursor as _cursor +from .extensions import connection as _connection +from .extensions import adapt as _A, quote_ident +from .compat import lru_cache from psycopg2._psycopg import ( # noqa REPLICATION_PHYSICAL, REPLICATION_LOGICAL, @@ -386,28 +387,27 @@ class NamedTupleCursor(_cursor): _re_clean = _re.compile( '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']') - _nt_cache = OrderedDict() - def _make_nt(self): - key = tuple(d[0] for d in (self.description or ())) - nt = self._nt_cache.get(key) - if nt is not None: - return nt + key = tuple(d[0] for d in self.description) if self.description else () + return self._cached_make_nt(key) + def _do_make_nt(self, key): fields = [] for s in key: s = self._re_clean.sub('_', s) + # Python identifier cannot start with numbers, namedtuple fields + # cannot start with underscore. So... if s[0] == '_' or '0' <= s[0] <= '9': s = 'f' + s fields.append(s) nt = namedtuple("Record", fields) - self._nt_cache[key] = nt - while len(self._nt_cache) > self.MAX_CACHE: - self._nt_cache.popitem(last=False) - return nt + # Exposed for testability, and if someone wants to monkeypatch to tweak + # the cache size. + _cached_make_nt = lru_cache(512)(_do_make_nt) + class LoggingConnection(_connection): """A connection that logs all queries to a file or logger__ object. diff --git a/tests/test_extras_dictcursor.py b/tests/test_extras_dictcursor.py index e08da5f1..92a25b67 100755 --- a/tests/test_extras_dictcursor.py +++ b/tests/test_extras_dictcursor.py @@ -592,27 +592,30 @@ class NamedTupleCursorTest(ConnectingTestCase): def test_max_cache(self): from psycopg2.extras import NamedTupleCursor - old_max_cache = NamedTupleCursor.MAX_CACHE - NamedTupleCursor.MAX_CACHE = 10 + from psycopg2.compat import lru_cache + + old_func = NamedTupleCursor._cached_make_nt + NamedTupleCursor._cached_make_nt = \ + lru_cache(8)(NamedTupleCursor._do_make_nt) try: - NamedTupleCursor._nt_cache.clear() + recs = [] curs = self.conn.cursor() for i in range(10): curs.execute("select 1 as f%s" % i) - curs.fetchone() + recs.append(curs.fetchone()) - self.assertEqual(len(NamedTupleCursor._nt_cache), 10) - for i in range(10): - self.assert_(('f%s' % i,) in NamedTupleCursor._nt_cache) + # Still in cache + curs.execute("select 1 as f9") + rec = curs.fetchone() + self.assert_(any(type(r) is type(rec) for r in recs)) - curs.execute("select 1 as f10") - curs.fetchone() - self.assertEqual(len(NamedTupleCursor._nt_cache), 10) - self.assert_(('f10',) in NamedTupleCursor._nt_cache) - self.assert_(('f0',) not in NamedTupleCursor._nt_cache) + # Gone from cache + curs.execute("select 1 as f0") + rec = curs.fetchone() + self.assert_(all(type(r) is not type(rec) for r in recs)) finally: - NamedTupleCursor.MAX_CACHE = old_max_cache + NamedTupleCursor._cached_make_nt = old_func def test_suite():