Functions WIP

This commit is contained in:
Itai Shirav 2020-04-19 07:17:52 +03:00
parent 2e586fa61c
commit 3c38c8ec40
9 changed files with 67 additions and 27 deletions

1
.gitignore vendored
View File

@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py
bootstrap.py
htmldocs/
cover/
# tox
.tox/

View File

@ -152,6 +152,12 @@ When running a query, specifying a model class is not required. In case you do n
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particulary useful for querying system tables, for example:
QueryLog = db.get_model_for_table('query_log', system_table=True)
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
print(row.query)
SQL Placeholders
----------------

View File

@ -42,6 +42,9 @@ class Field(FunctionOperatorsMixin):
self.readonly = bool(self.alias or self.materialized or readonly)
self.codec = codec
def __str__(self):
return self.name
def to_python(self, value, timezone_in_use):
'''
Converts the input value into the expected Python data type, raising ValueError if the

View File

@ -4,7 +4,7 @@ from inspect import signature, Parameter
from types import FunctionType
from .utils import is_iterable, comma_join, NO_VALUE
from .query import Cond
from .query import Cond, QuerySet
def binary_operator(func):
@ -276,7 +276,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
if isinstance(arg, F):
return arg.to_sql()
if isinstance(arg, Field):
return "`%s`" % arg.name
return "`%s`" % arg
if isinstance(arg, str):
return StringField().to_db_string(arg)
if isinstance(arg, datetime):
@ -291,6 +291,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
return StringField().to_db_string(arg.tzname(None))
if arg is None:
return 'NULL'
if isinstance(arg, QuerySet):
return "(%s)" % arg
if is_iterable(arg):
return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']'
return str(arg)
@ -340,7 +342,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
@staticmethod
def gcd(a, b):
return F('gcd',a, b)
return F('gcd', a, b)
@staticmethod
def lcm(a, b):

View File

@ -94,6 +94,12 @@ class ModelBase(type):
if db_type.startswith('Array'):
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
return orm_fields.ArrayField(inner_field)
# Tuples (poor man's version - convert to array)
if db_type.startswith('Tuple'):
types = [s.strip() for s in db_type[6 : -1].split(',')]
assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type
inner_field = cls.create_ad_hoc_field(types[0])
return orm_fields.ArrayField(inner_field)
# FixedString
if db_type.startswith('FixedString'):
length = int(db_type[12 : -1])

View File

@ -410,7 +410,7 @@ class QuerySet(object):
Returns the contents of the query's `ORDER BY` clause as a string.
"""
return comma_join([
'%s DESC' % field[1:] if field[0] == '-' else field
'%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field)
for field in self._order_by
])
@ -624,7 +624,7 @@ class AggregateQuerySet(QuerySet):
"""
Returns the selected fields or expressions as a SQL string.
"""
return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
def __iter__(self):
return self._database.select(self.as_sql()) # using an ad-hoc model

View File

@ -52,19 +52,19 @@ def parse_tsv(line):
def parse_array(array_string):
"""
Parse an array string as returned by clickhouse. For example:
Parse an array or tuple string as returned by clickhouse. For example:
"['hello', 'world']" ==> ["hello", "world"]
"[1,2,3]" ==> [1, 2, 3]
"(1,2,3)" ==> [1, 2, 3]
"""
# Sanity check
if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']':
if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])':
raise ValueError('Invalid array string: "%s"' % array_string)
# Drop opening brace
array_string = array_string[1:]
# Go over the string, lopping off each value at the beginning until nothing is left
values = []
while True:
if array_string == ']':
if array_string in '])':
# End of array
return values
elif array_string[0] in ', ':

View File

@ -110,6 +110,9 @@ class FuncsTestCase(TestCaseWithData):
self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96)
self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
# In subquery
subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name)
self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4)
def test_comparison_operators(self):
one = F.plus(1, 0)
@ -174,8 +177,8 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(0 | one, 1)
# ^
self._test_func(one ^ one, 0)
#############self._test_func(one ^ 0, 1)
#############self._test_func(0 ^ one, 1)
self._test_func(one ^ 0, 1)
self._test_func(0 ^ one, 1)
# ~
self._test_func(~one, 0)
self._test_func(~~one, 1)
@ -416,6 +419,10 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(F.power(x, y))
self._test_func(F.intExp10(x))
self._test_func(F.intExp2(x))
self._test_func(F.intDivOrZero(x, y))
self._test_func(F.abs(x))
self._test_func(F.gcd(x, y))
self._test_func(F.lcm(x, y))
def test_rounding_functions(self):
x = 22.22222
@ -578,9 +585,10 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11')
self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40'))
self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1'))
# These require support for tuples:
# self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255'])
# self._test_func(F.IPv6CIDRToRange(x, y))
self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16),
[IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')])
self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32),
[IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')])
def test_aggregate_funcs(self):
self._test_aggr(F.any(Person.first_name))
@ -632,25 +640,39 @@ class FuncsTestCase(TestCaseWithData):
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
def test_quantile_funcs(self):
cond = Person.last_name > 'H'
weight_expr = F.toUInt32(F.round(Person.height))
# Quantile
self._test_aggr(F.quantile(0.9)(Person.height))
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
self._test_aggr(F.quantileExact(0.9)(Person.height))
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
weight_expr = F.toUInt32(F.round(Person.height))
# Quantile weighted
self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileTiming(0.9)(Person.height))
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond))
self._test_aggr(F.quantileTDigest(0.9)(Person.height))
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
# Quantiles
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17))
self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond))
self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond))
self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond))
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
def test_top_k_funcs(self):

View File

@ -469,9 +469,9 @@ class AggregateTestCase(TestCaseWithData):
order_by('first_name', '-height').limit_by(1, 'first_name')
self.assertEqual(qs.count(), 94)
self.assertEqual(list(qs)[89].last_name, 'Bowen')
# Test with funcs
qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n=F.count()).\
order_by('first_name', '-height').limit_by(1, F.upper(Person.first_name))
# Test with funcs and fields
qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\
order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name))
self.assertEqual(qs.count(), 94)
self.assertEqual(list(qs)[89].last_name, 'Bowen')
# Test with limit and offset, also mixing LIMIT with LIMIT BY