diff --git a/.gitignore b/.gitignore index 0e9fa7b..60e5afe 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py bootstrap.py htmldocs/ +cover/ # tox .tox/ diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index c6ce1ca..74ded9f 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -152,6 +152,12 @@ When running a query, specifying a model class is not required. In case you do n This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax. +It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particulary useful for querying system tables, for example: + + QueryLog = db.get_model_for_table('query_log', system_table=True) + for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000): + print(row.query) + SQL Placeholders ---------------- diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index dbb5aee..bf370ea 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -42,6 +42,9 @@ class Field(FunctionOperatorsMixin): self.readonly = bool(self.alias or self.materialized or readonly) self.codec = codec + def __str__(self): + return self.name + def to_python(self, value, timezone_in_use): ''' Converts the input value into the expected Python data type, raising ValueError if the diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index a5bda56..8af621f 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -4,7 +4,7 @@ from inspect import signature, Parameter from types import FunctionType from .utils import is_iterable, comma_join, NO_VALUE -from .query import Cond +from .query import Cond, QuerySet def binary_operator(func): @@ -276,7 +276,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): if isinstance(arg, F): return arg.to_sql() if isinstance(arg, Field): - return "`%s`" % arg.name + return "`%s`" % arg if isinstance(arg, str): return StringField().to_db_string(arg) if isinstance(arg, datetime): @@ -291,6 +291,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return StringField().to_db_string(arg.tzname(None)) if arg is None: return 'NULL' + if isinstance(arg, QuerySet): + return "(%s)" % arg if is_iterable(arg): return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']' return str(arg) @@ -340,7 +342,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def gcd(a, b): - return F('gcd',a, b) + return F('gcd', a, b) @staticmethod def lcm(a, b): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index c2d830b..a70ad38 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -94,6 +94,12 @@ class ModelBase(type): if db_type.startswith('Array'): inner_field = cls.create_ad_hoc_field(db_type[6 : -1]) return orm_fields.ArrayField(inner_field) + # Tuples (poor man's version - convert to array) + if db_type.startswith('Tuple'): + types = [s.strip() for s in db_type[6 : -1].split(',')] + assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type + inner_field = cls.create_ad_hoc_field(types[0]) + return orm_fields.ArrayField(inner_field) # FixedString if db_type.startswith('FixedString'): length = int(db_type[12 : -1]) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 66212e8..d768fab 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -410,7 +410,7 @@ class QuerySet(object): Returns the contents of the query's `ORDER BY` clause as a string. """ return comma_join([ - '%s DESC' % field[1:] if field[0] == '-' else field + '%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field) for field in self._order_by ]) @@ -624,7 +624,7 @@ class AggregateQuerySet(QuerySet): """ Returns the selected fields or expressions as a SQL string. """ - return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) + return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) def __iter__(self): return self._database.select(self.as_sql()) # using an ad-hoc model diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 9e678fb..2f60a40 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -52,19 +52,19 @@ def parse_tsv(line): def parse_array(array_string): """ - Parse an array string as returned by clickhouse. For example: + Parse an array or tuple string as returned by clickhouse. For example: "['hello', 'world']" ==> ["hello", "world"] - "[1,2,3]" ==> [1, 2, 3] + "(1,2,3)" ==> [1, 2, 3] """ # Sanity check - if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']': + if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])': raise ValueError('Invalid array string: "%s"' % array_string) # Drop opening brace array_string = array_string[1:] # Go over the string, lopping off each value at the beginning until nothing is left values = [] while True: - if array_string == ']': + if array_string in '])': # End of array return values elif array_string[0] in ', ': diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 8ec8b27..4b15f48 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -110,6 +110,9 @@ class FuncsTestCase(TestCaseWithData): self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96) self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96) self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96) + # In subquery + subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name) + self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4) def test_comparison_operators(self): one = F.plus(1, 0) @@ -174,8 +177,8 @@ class FuncsTestCase(TestCaseWithData): self._test_func(0 | one, 1) # ^ self._test_func(one ^ one, 0) - #############self._test_func(one ^ 0, 1) - #############self._test_func(0 ^ one, 1) + self._test_func(one ^ 0, 1) + self._test_func(0 ^ one, 1) # ~ self._test_func(~one, 0) self._test_func(~~one, 1) @@ -416,6 +419,10 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.power(x, y)) self._test_func(F.intExp10(x)) self._test_func(F.intExp2(x)) + self._test_func(F.intDivOrZero(x, y)) + self._test_func(F.abs(x)) + self._test_func(F.gcd(x, y)) + self._test_func(F.lcm(x, y)) def test_rounding_functions(self): x = 22.22222 @@ -578,9 +585,10 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11') self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40')) self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1')) - # These require support for tuples: - # self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255']) - # self._test_func(F.IPv6CIDRToRange(x, y)) + self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), + [IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')]) + self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32), + [IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')]) def test_aggregate_funcs(self): self._test_aggr(F.any(Person.first_name)) @@ -632,25 +640,39 @@ class FuncsTestCase(TestCaseWithData): self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None) def test_quantile_funcs(self): + cond = Person.last_name > 'H' + weight_expr = F.toUInt32(F.round(Person.height)) + # Quantile self._test_aggr(F.quantile(0.9)(Person.height)) self._test_aggr(F.quantileOrDefault(0.9)(Person.height)) self._test_aggr(F.quantileOrNull(0.9)(Person.height)) - self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond)) self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17)) + self._test_aggr(F.quantileExact(0.9)(Person.height)) self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height)) - weight_expr = F.toUInt32(F.round(Person.height)) + # Quantile weighted + self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr)) self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr)) - self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H')) - self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileTiming(0.9)(Person.height)) + self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr)) + self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond)) + self._test_aggr(F.quantileTDigest(0.9)(Person.height)) + self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond)) self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr)) + # Quantiles self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17)) + self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height)) self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) - self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H')) - self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond)) + self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) + self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond)) + self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond)) self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) def test_top_k_funcs(self): diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 2134765..2144e55 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -469,9 +469,9 @@ class AggregateTestCase(TestCaseWithData): order_by('first_name', '-height').limit_by(1, 'first_name') self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') - # Test with funcs - qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n=F.count()).\ - order_by('first_name', '-height').limit_by(1, F.upper(Person.first_name)) + # Test with funcs and fields + qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\ + order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name)) self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') # Test with limit and offset, also mixing LIMIT with LIMIT BY