mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-24 18:03:42 +03:00
Functions WIP
This commit is contained in:
parent
2e586fa61c
commit
3c38c8ec40
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py
|
||||||
bootstrap.py
|
bootstrap.py
|
||||||
|
|
||||||
htmldocs/
|
htmldocs/
|
||||||
|
cover/
|
||||||
|
|
||||||
# tox
|
# tox
|
||||||
.tox/
|
.tox/
|
||||||
|
|
|
@ -152,6 +152,12 @@ When running a query, specifying a model class is not required. In case you do n
|
||||||
|
|
||||||
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
|
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
|
||||||
|
|
||||||
|
It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particulary useful for querying system tables, for example:
|
||||||
|
|
||||||
|
QueryLog = db.get_model_for_table('query_log', system_table=True)
|
||||||
|
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
|
||||||
|
print(row.query)
|
||||||
|
|
||||||
SQL Placeholders
|
SQL Placeholders
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,9 @@ class Field(FunctionOperatorsMixin):
|
||||||
self.readonly = bool(self.alias or self.materialized or readonly)
|
self.readonly = bool(self.alias or self.materialized or readonly)
|
||||||
self.codec = codec
|
self.codec = codec
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
'''
|
'''
|
||||||
Converts the input value into the expected Python data type, raising ValueError if the
|
Converts the input value into the expected Python data type, raising ValueError if the
|
||||||
|
|
|
@ -4,7 +4,7 @@ from inspect import signature, Parameter
|
||||||
from types import FunctionType
|
from types import FunctionType
|
||||||
|
|
||||||
from .utils import is_iterable, comma_join, NO_VALUE
|
from .utils import is_iterable, comma_join, NO_VALUE
|
||||||
from .query import Cond
|
from .query import Cond, QuerySet
|
||||||
|
|
||||||
|
|
||||||
def binary_operator(func):
|
def binary_operator(func):
|
||||||
|
@ -276,7 +276,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
|
||||||
if isinstance(arg, F):
|
if isinstance(arg, F):
|
||||||
return arg.to_sql()
|
return arg.to_sql()
|
||||||
if isinstance(arg, Field):
|
if isinstance(arg, Field):
|
||||||
return "`%s`" % arg.name
|
return "`%s`" % arg
|
||||||
if isinstance(arg, str):
|
if isinstance(arg, str):
|
||||||
return StringField().to_db_string(arg)
|
return StringField().to_db_string(arg)
|
||||||
if isinstance(arg, datetime):
|
if isinstance(arg, datetime):
|
||||||
|
@ -291,6 +291,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
|
||||||
return StringField().to_db_string(arg.tzname(None))
|
return StringField().to_db_string(arg.tzname(None))
|
||||||
if arg is None:
|
if arg is None:
|
||||||
return 'NULL'
|
return 'NULL'
|
||||||
|
if isinstance(arg, QuerySet):
|
||||||
|
return "(%s)" % arg
|
||||||
if is_iterable(arg):
|
if is_iterable(arg):
|
||||||
return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']'
|
return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']'
|
||||||
return str(arg)
|
return str(arg)
|
||||||
|
|
|
@ -94,6 +94,12 @@ class ModelBase(type):
|
||||||
if db_type.startswith('Array'):
|
if db_type.startswith('Array'):
|
||||||
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
|
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
|
||||||
return orm_fields.ArrayField(inner_field)
|
return orm_fields.ArrayField(inner_field)
|
||||||
|
# Tuples (poor man's version - convert to array)
|
||||||
|
if db_type.startswith('Tuple'):
|
||||||
|
types = [s.strip() for s in db_type[6 : -1].split(',')]
|
||||||
|
assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type
|
||||||
|
inner_field = cls.create_ad_hoc_field(types[0])
|
||||||
|
return orm_fields.ArrayField(inner_field)
|
||||||
# FixedString
|
# FixedString
|
||||||
if db_type.startswith('FixedString'):
|
if db_type.startswith('FixedString'):
|
||||||
length = int(db_type[12 : -1])
|
length = int(db_type[12 : -1])
|
||||||
|
|
|
@ -410,7 +410,7 @@ class QuerySet(object):
|
||||||
Returns the contents of the query's `ORDER BY` clause as a string.
|
Returns the contents of the query's `ORDER BY` clause as a string.
|
||||||
"""
|
"""
|
||||||
return comma_join([
|
return comma_join([
|
||||||
'%s DESC' % field[1:] if field[0] == '-' else field
|
'%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field)
|
||||||
for field in self._order_by
|
for field in self._order_by
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -624,7 +624,7 @@ class AggregateQuerySet(QuerySet):
|
||||||
"""
|
"""
|
||||||
Returns the selected fields or expressions as a SQL string.
|
Returns the selected fields or expressions as a SQL string.
|
||||||
"""
|
"""
|
||||||
return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self._database.select(self.as_sql()) # using an ad-hoc model
|
return self._database.select(self.as_sql()) # using an ad-hoc model
|
||||||
|
|
|
@ -52,19 +52,19 @@ def parse_tsv(line):
|
||||||
|
|
||||||
def parse_array(array_string):
|
def parse_array(array_string):
|
||||||
"""
|
"""
|
||||||
Parse an array string as returned by clickhouse. For example:
|
Parse an array or tuple string as returned by clickhouse. For example:
|
||||||
"['hello', 'world']" ==> ["hello", "world"]
|
"['hello', 'world']" ==> ["hello", "world"]
|
||||||
"[1,2,3]" ==> [1, 2, 3]
|
"(1,2,3)" ==> [1, 2, 3]
|
||||||
"""
|
"""
|
||||||
# Sanity check
|
# Sanity check
|
||||||
if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']':
|
if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])':
|
||||||
raise ValueError('Invalid array string: "%s"' % array_string)
|
raise ValueError('Invalid array string: "%s"' % array_string)
|
||||||
# Drop opening brace
|
# Drop opening brace
|
||||||
array_string = array_string[1:]
|
array_string = array_string[1:]
|
||||||
# Go over the string, lopping off each value at the beginning until nothing is left
|
# Go over the string, lopping off each value at the beginning until nothing is left
|
||||||
values = []
|
values = []
|
||||||
while True:
|
while True:
|
||||||
if array_string == ']':
|
if array_string in '])':
|
||||||
# End of array
|
# End of array
|
||||||
return values
|
return values
|
||||||
elif array_string[0] in ', ':
|
elif array_string[0] in ', ':
|
||||||
|
|
|
@ -110,6 +110,9 @@ class FuncsTestCase(TestCaseWithData):
|
||||||
self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
||||||
self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96)
|
self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96)
|
||||||
self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
||||||
|
# In subquery
|
||||||
|
subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name)
|
||||||
|
self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4)
|
||||||
|
|
||||||
def test_comparison_operators(self):
|
def test_comparison_operators(self):
|
||||||
one = F.plus(1, 0)
|
one = F.plus(1, 0)
|
||||||
|
@ -174,8 +177,8 @@ class FuncsTestCase(TestCaseWithData):
|
||||||
self._test_func(0 | one, 1)
|
self._test_func(0 | one, 1)
|
||||||
# ^
|
# ^
|
||||||
self._test_func(one ^ one, 0)
|
self._test_func(one ^ one, 0)
|
||||||
#############self._test_func(one ^ 0, 1)
|
self._test_func(one ^ 0, 1)
|
||||||
#############self._test_func(0 ^ one, 1)
|
self._test_func(0 ^ one, 1)
|
||||||
# ~
|
# ~
|
||||||
self._test_func(~one, 0)
|
self._test_func(~one, 0)
|
||||||
self._test_func(~~one, 1)
|
self._test_func(~~one, 1)
|
||||||
|
@ -416,6 +419,10 @@ class FuncsTestCase(TestCaseWithData):
|
||||||
self._test_func(F.power(x, y))
|
self._test_func(F.power(x, y))
|
||||||
self._test_func(F.intExp10(x))
|
self._test_func(F.intExp10(x))
|
||||||
self._test_func(F.intExp2(x))
|
self._test_func(F.intExp2(x))
|
||||||
|
self._test_func(F.intDivOrZero(x, y))
|
||||||
|
self._test_func(F.abs(x))
|
||||||
|
self._test_func(F.gcd(x, y))
|
||||||
|
self._test_func(F.lcm(x, y))
|
||||||
|
|
||||||
def test_rounding_functions(self):
|
def test_rounding_functions(self):
|
||||||
x = 22.22222
|
x = 22.22222
|
||||||
|
@ -578,9 +585,10 @@ class FuncsTestCase(TestCaseWithData):
|
||||||
self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11')
|
self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11')
|
||||||
self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40'))
|
self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40'))
|
||||||
self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1'))
|
self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1'))
|
||||||
# These require support for tuples:
|
self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16),
|
||||||
# self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255'])
|
[IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')])
|
||||||
# self._test_func(F.IPv6CIDRToRange(x, y))
|
self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32),
|
||||||
|
[IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')])
|
||||||
|
|
||||||
def test_aggregate_funcs(self):
|
def test_aggregate_funcs(self):
|
||||||
self._test_aggr(F.any(Person.first_name))
|
self._test_aggr(F.any(Person.first_name))
|
||||||
|
@ -632,25 +640,39 @@ class FuncsTestCase(TestCaseWithData):
|
||||||
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
|
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
|
||||||
|
|
||||||
def test_quantile_funcs(self):
|
def test_quantile_funcs(self):
|
||||||
|
cond = Person.last_name > 'H'
|
||||||
|
weight_expr = F.toUInt32(F.round(Person.height))
|
||||||
|
# Quantile
|
||||||
self._test_aggr(F.quantile(0.9)(Person.height))
|
self._test_aggr(F.quantile(0.9)(Person.height))
|
||||||
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
|
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
|
||||||
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
|
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
|
||||||
self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantileIf(0.9)(Person.height, cond))
|
||||||
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond))
|
||||||
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond))
|
||||||
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
|
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
|
||||||
|
self._test_aggr(F.quantileExact(0.9)(Person.height))
|
||||||
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
|
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
|
||||||
weight_expr = F.toUInt32(F.round(Person.height))
|
# Quantile weighted
|
||||||
|
self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr))
|
||||||
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
|
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
|
||||||
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantileTiming(0.9)(Person.height))
|
||||||
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H'))
|
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond))
|
||||||
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond))
|
||||||
|
self._test_aggr(F.quantileTDigest(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond))
|
||||||
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
|
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
|
||||||
|
# Quantiles
|
||||||
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
|
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17))
|
||||||
|
self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height))
|
||||||
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height))
|
||||||
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H'))
|
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond))
|
||||||
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
|
self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond))
|
||||||
|
self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond))
|
||||||
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
|
|
||||||
def test_top_k_funcs(self):
|
def test_top_k_funcs(self):
|
||||||
|
|
|
@ -469,9 +469,9 @@ class AggregateTestCase(TestCaseWithData):
|
||||||
order_by('first_name', '-height').limit_by(1, 'first_name')
|
order_by('first_name', '-height').limit_by(1, 'first_name')
|
||||||
self.assertEqual(qs.count(), 94)
|
self.assertEqual(qs.count(), 94)
|
||||||
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
||||||
# Test with funcs
|
# Test with funcs and fields
|
||||||
qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n=F.count()).\
|
qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\
|
||||||
order_by('first_name', '-height').limit_by(1, F.upper(Person.first_name))
|
order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name))
|
||||||
self.assertEqual(qs.count(), 94)
|
self.assertEqual(qs.count(), 94)
|
||||||
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
||||||
# Test with limit and offset, also mixing LIMIT with LIMIT BY
|
# Test with limit and offset, also mixing LIMIT with LIMIT BY
|
||||||
|
|
Loading…
Reference in New Issue
Block a user