Use a proper LRU cache for namedtuples

Previous one didn't refresh by last use. Use the stdlib version for py3
and one of our own for py2.

Max size set to 512, which should be fine for everyone (tweaking is
still possible by monkeypatching, as the tests do, but I don't want to
make an interface of it).
This commit is contained in:
Daniele Varrazzo 2019-02-02 19:21:39 +00:00
parent 805527fcd6
commit 35ec7ad9c1
4 changed files with 138 additions and 26 deletions

104
lib/_lru_cache.py Normal file
View File

@ -0,0 +1,104 @@
"""
LRU cache implementation for Python 2.7
Ported from http://code.activestate.com/recipes/578078/ and simplified for our
use (only support maxsize > 0 and positional arguments).
"""
from collections import namedtuple
from functools import update_wrapper
from threading import RLock
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
def lru_cache(maxsize=100):
"""Least-recently-used cache decorator.
Arguments to the cached function must be hashable.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
def decorating_function(user_function):
cache = dict()
stats = [0, 0] # make statistics updateable non-locally
HITS, MISSES = 0, 1 # names for the stats fields
cache_get = cache.get # bound method to lookup key or return None
_len = len # localize the global len() function
lock = RLock() # linkedlist updates aren't threadsafe
root = [] # root of the circular doubly linked list
root[:] = [root, root, None, None] # initialize by pointing to self
nonlocal_root = [root] # make updateable non-locally
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize
def wrapper(*args):
# size limited caching that tracks accesses by recency
key = args
with lock:
link = cache_get(key)
if link is not None:
# record recent use of the key by moving it to the
# front of the list
root, = nonlocal_root
link_prev, link_next, key, result = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = root[PREV]
last[NEXT] = root[PREV] = link
link[PREV] = last
link[NEXT] = root
stats[HITS] += 1
return result
result = user_function(*args)
with lock:
root, = nonlocal_root
if key in cache:
# getting here means that this same key was added to the
# cache while the lock was released. since the link
# update is already done, we need only return the
# computed result and update the count of misses.
pass
elif _len(cache) >= maxsize:
# use the old root to store the new key and result
oldroot = root
oldroot[KEY] = key
oldroot[RESULT] = result
# empty the oldest link and make it the new root
root = nonlocal_root[0] = oldroot[NEXT]
oldkey = root[KEY]
# oldvalue = root[RESULT]
root[KEY] = root[RESULT] = None
# now update the cache dictionary for the new links
del cache[oldkey]
cache[key] = oldroot
else:
# put result in a new link at the front of the list
last = root[PREV]
link = [last, root, key, result]
last[NEXT] = root[PREV] = cache[key] = link
stats[MISSES] += 1
return result
def cache_info():
"""Report cache statistics"""
with lock:
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
def cache_clear():
"""Clear the cache and cache statistics"""
with lock:
cache.clear()
root = nonlocal_root[0]
root[:] = [root, root, None, None]
stats[:] = [0, 0]
wrapper.__wrapped__ = user_function
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return update_wrapper(wrapper, user_function)
return decorating_function

View File

@ -1,10 +1,15 @@
import sys
__all__ = ['string_types', 'text_type', 'lru_cache']
if sys.version_info[0] == 2:
# Python 2
string_types = basestring,
text_type = unicode
from ._lru_cache import lru_cache
else:
# Python 3
string_types = str,
text_type = str
from functools import lru_cache

View File

@ -35,9 +35,10 @@ import logging as _logging
import psycopg2
from psycopg2 import extensions as _ext
from psycopg2.extensions import cursor as _cursor
from psycopg2.extensions import connection as _connection
from psycopg2.extensions import adapt as _A, quote_ident
from .extensions import cursor as _cursor
from .extensions import connection as _connection
from .extensions import adapt as _A, quote_ident
from .compat import lru_cache
from psycopg2._psycopg import ( # noqa
REPLICATION_PHYSICAL, REPLICATION_LOGICAL,
@ -386,28 +387,27 @@ class NamedTupleCursor(_cursor):
_re_clean = _re.compile(
'[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
_nt_cache = OrderedDict()
def _make_nt(self):
key = tuple(d[0] for d in (self.description or ()))
nt = self._nt_cache.get(key)
if nt is not None:
return nt
key = tuple(d[0] for d in self.description) if self.description else ()
return self._cached_make_nt(key)
def _do_make_nt(self, key):
fields = []
for s in key:
s = self._re_clean.sub('_', s)
# Python identifier cannot start with numbers, namedtuple fields
# cannot start with underscore. So...
if s[0] == '_' or '0' <= s[0] <= '9':
s = 'f' + s
fields.append(s)
nt = namedtuple("Record", fields)
self._nt_cache[key] = nt
while len(self._nt_cache) > self.MAX_CACHE:
self._nt_cache.popitem(last=False)
return nt
# Exposed for testability, and if someone wants to monkeypatch to tweak
# the cache size.
_cached_make_nt = lru_cache(512)(_do_make_nt)
class LoggingConnection(_connection):
"""A connection that logs all queries to a file or logger__ object.

View File

@ -592,27 +592,30 @@ class NamedTupleCursorTest(ConnectingTestCase):
def test_max_cache(self):
from psycopg2.extras import NamedTupleCursor
old_max_cache = NamedTupleCursor.MAX_CACHE
NamedTupleCursor.MAX_CACHE = 10
from psycopg2.compat import lru_cache
old_func = NamedTupleCursor._cached_make_nt
NamedTupleCursor._cached_make_nt = \
lru_cache(8)(NamedTupleCursor._do_make_nt)
try:
NamedTupleCursor._nt_cache.clear()
recs = []
curs = self.conn.cursor()
for i in range(10):
curs.execute("select 1 as f%s" % i)
curs.fetchone()
recs.append(curs.fetchone())
self.assertEqual(len(NamedTupleCursor._nt_cache), 10)
for i in range(10):
self.assert_(('f%s' % i,) in NamedTupleCursor._nt_cache)
# Still in cache
curs.execute("select 1 as f9")
rec = curs.fetchone()
self.assert_(any(type(r) is type(rec) for r in recs))
curs.execute("select 1 as f10")
curs.fetchone()
self.assertEqual(len(NamedTupleCursor._nt_cache), 10)
self.assert_(('f10',) in NamedTupleCursor._nt_cache)
self.assert_(('f0',) not in NamedTupleCursor._nt_cache)
# Gone from cache
curs.execute("select 1 as f0")
rec = curs.fetchone()
self.assert_(all(type(r) is not type(rec) for r in recs))
finally:
NamedTupleCursor.MAX_CACHE = old_max_cache
NamedTupleCursor._cached_make_nt = old_func
def test_suite():