Merge remote-tracking branch 'origin/fast-namedtuple'

Close #838
This commit is contained in:
Daniele Varrazzo 2019-02-06 02:42:10 +00:00
commit f1e73507d0
5 changed files with 171 additions and 11 deletions

1
NEWS
View File

@ -30,6 +30,7 @@ New features:
maintain columns order (:ticket:`#177`). maintain columns order (:ticket:`#177`).
- Added `~psycopg2.extensions.Diagnostics.severity_nonlocalized` attribute on - Added `~psycopg2.extensions.Diagnostics.severity_nonlocalized` attribute on
the `~psycopg2.extensions.Diagnostics` object (:ticket:`#783`). the `~psycopg2.extensions.Diagnostics` object (:ticket:`#783`).
- More efficient `~psycopg2.extras.NamedTupleCursor` (:ticket:`#838`).
Other changes: Other changes:

104
lib/_lru_cache.py Normal file
View File

@ -0,0 +1,104 @@
"""
LRU cache implementation for Python 2.7
Ported from http://code.activestate.com/recipes/578078/ and simplified for our
use (only support maxsize > 0 and positional arguments).
"""
from collections import namedtuple
from functools import update_wrapper
from threading import RLock
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
def lru_cache(maxsize=100):
"""Least-recently-used cache decorator.
Arguments to the cached function must be hashable.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
def decorating_function(user_function):
cache = dict()
stats = [0, 0] # make statistics updateable non-locally
HITS, MISSES = 0, 1 # names for the stats fields
cache_get = cache.get # bound method to lookup key or return None
_len = len # localize the global len() function
lock = RLock() # linkedlist updates aren't threadsafe
root = [] # root of the circular doubly linked list
root[:] = [root, root, None, None] # initialize by pointing to self
nonlocal_root = [root] # make updateable non-locally
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize
def wrapper(*args):
# size limited caching that tracks accesses by recency
key = args
with lock:
link = cache_get(key)
if link is not None:
# record recent use of the key by moving it to the
# front of the list
root, = nonlocal_root
link_prev, link_next, key, result = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = root[PREV]
last[NEXT] = root[PREV] = link
link[PREV] = last
link[NEXT] = root
stats[HITS] += 1
return result
result = user_function(*args)
with lock:
root, = nonlocal_root
if key in cache:
# getting here means that this same key was added to the
# cache while the lock was released. since the link
# update is already done, we need only return the
# computed result and update the count of misses.
pass
elif _len(cache) >= maxsize:
# use the old root to store the new key and result
oldroot = root
oldroot[KEY] = key
oldroot[RESULT] = result
# empty the oldest link and make it the new root
root = nonlocal_root[0] = oldroot[NEXT]
oldkey = root[KEY]
# oldvalue = root[RESULT]
root[KEY] = root[RESULT] = None
# now update the cache dictionary for the new links
del cache[oldkey]
cache[key] = oldroot
else:
# put result in a new link at the front of the list
last = root[PREV]
link = [last, root, key, result]
last[NEXT] = root[PREV] = cache[key] = link
stats[MISSES] += 1
return result
def cache_info():
"""Report cache statistics"""
with lock:
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
def cache_clear():
"""Clear the cache and cache statistics"""
with lock:
cache.clear()
root = nonlocal_root[0]
root[:] = [root, root, None, None]
stats[:] = [0, 0]
wrapper.__wrapped__ = user_function
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return update_wrapper(wrapper, user_function)
return decorating_function

View File

@ -1,10 +1,15 @@
import sys import sys
__all__ = ['string_types', 'text_type', 'lru_cache']
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
# Python 2 # Python 2
string_types = basestring, string_types = basestring,
text_type = unicode text_type = unicode
from ._lru_cache import lru_cache
else: else:
# Python 3 # Python 3
string_types = str, string_types = str,
text_type = str text_type = str
from functools import lru_cache

View File

@ -35,9 +35,10 @@ import logging as _logging
import psycopg2 import psycopg2
from psycopg2 import extensions as _ext from psycopg2 import extensions as _ext
from psycopg2.extensions import cursor as _cursor from .extensions import cursor as _cursor
from psycopg2.extensions import connection as _connection from .extensions import connection as _connection
from psycopg2.extensions import adapt as _A, quote_ident from .extensions import adapt as _A, quote_ident
from .compat import lru_cache
from psycopg2._psycopg import ( # noqa from psycopg2._psycopg import ( # noqa
REPLICATION_PHYSICAL, REPLICATION_LOGICAL, REPLICATION_PHYSICAL, REPLICATION_LOGICAL,
@ -330,6 +331,7 @@ class NamedTupleCursor(_cursor):
"abc'def" "abc'def"
""" """
Record = None Record = None
MAX_CACHE = 1024
def execute(self, query, vars=None): def execute(self, query, vars=None):
self.Record = None self.Record = None
@ -381,21 +383,30 @@ class NamedTupleCursor(_cursor):
except StopIteration: except StopIteration:
return return
def _make_nt(self): # ascii except alnum and underscore
# ascii except alnum and underscore _re_clean = _re.compile(
nochars = ' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~' '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
re_clean = _re.compile('[' + _re.escape(nochars) + ']')
def f(s): def _make_nt(self):
s = re_clean.sub('_', s) key = tuple(d[0] for d in self.description) if self.description else ()
return self._cached_make_nt(key)
def _do_make_nt(self, key):
fields = []
for s in key:
s = self._re_clean.sub('_', s)
# Python identifier cannot start with numbers, namedtuple fields # Python identifier cannot start with numbers, namedtuple fields
# cannot start with underscore. So... # cannot start with underscore. So...
if s[0] == '_' or '0' <= s[0] <= '9': if s[0] == '_' or '0' <= s[0] <= '9':
s = 'f' + s s = 'f' + s
fields.append(s)
return s nt = namedtuple("Record", fields)
return nt
return namedtuple("Record", [f(d[0]) for d in self.description or ()]) # Exposed for testability, and if someone wants to monkeypatch to tweak
# the cache size.
_cached_make_nt = lru_cache(512)(_do_make_nt)
class LoggingConnection(_connection): class LoggingConnection(_connection):

View File

@ -578,6 +578,45 @@ class NamedTupleCursorTest(ConnectingTestCase):
for i, t in enumerate(curs): for i, t in enumerate(curs):
self.assertEqual(i + 1, curs.rownumber) self.assertEqual(i + 1, curs.rownumber)
def test_cache(self):
curs = self.conn.cursor()
curs.execute("select 10 as a, 20 as b")
r1 = curs.fetchone()
curs.execute("select 10 as a, 20 as c")
r2 = curs.fetchone()
curs.execute("select 10 as a, 30 as b")
r3 = curs.fetchone()
self.assert_(type(r1) is type(r3))
self.assert_(type(r1) is not type(r2))
def test_max_cache(self):
from psycopg2.extras import NamedTupleCursor
from psycopg2.compat import lru_cache
old_func = NamedTupleCursor._cached_make_nt
NamedTupleCursor._cached_make_nt = \
lru_cache(8)(NamedTupleCursor._do_make_nt)
try:
recs = []
curs = self.conn.cursor()
for i in range(10):
curs.execute("select 1 as f%s" % i)
recs.append(curs.fetchone())
# Still in cache
curs.execute("select 1 as f9")
rec = curs.fetchone()
self.assert_(any(type(r) is type(rec) for r in recs))
# Gone from cache
curs.execute("select 1 as f0")
rec = curs.fetchone()
self.assert_(all(type(r) is not type(rec) for r in recs))
finally:
NamedTupleCursor._cached_make_nt = old_func
def test_suite(): def test_suite():
return unittest.TestLoader().loadTestsFromName(__name__) return unittest.TestLoader().loadTestsFromName(__name__)