From 962a1673f9627dd24df17d7991ae927ebbe3bddd Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 16 Oct 2018 16:42:30 +0300 Subject: [PATCH 01/41] Functions WIP --- src/infi/clickhouse_orm/fields.py | 29 ++- src/infi/clickhouse_orm/models.py | 9 +- src/infi/clickhouse_orm/query.py | 353 ++++++++++++++++++++++++++++-- src/infi/clickhouse_orm/utils.py | 11 + tests/base_test_with_data.py | 2 +- tests/test_querysets.py | 151 ++++++++++++- 6 files changed, 523 insertions(+), 32 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 8c11805..83b738e 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -8,15 +8,18 @@ from calendar import timegm from decimal import Decimal, localcontext from .utils import escape, parse_array, comma_join +from .query import F class Field(object): ''' Abstract base class for all field types. ''' - creation_counter = 0 - class_default = 0 - db_type = None + name = None # this is set by the parent model + parent = None # this is set by the parent model + creation_counter = 0 # used for keeping the model fields ordered + class_default = 0 # should be overridden by concrete subclasses + db_type = None # should be overridden by concrete subclasses def __init__(self, default=None, alias=None, materialized=None, readonly=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ @@ -96,6 +99,26 @@ class Field(object): inner_field = getattr(inner_field, 'inner_field', None) return False + # Support comparison operators (for use in querysets) + + def __lt__(self, other): + return F.less(self, other) + + def __le__(self, other): + return F.lessOrEquals(self, other) + + def __eq__(self, other): + return F.equals(self, other) + + def __ne__(self, other): + return F.notEquals(self, other) + + def __gt__(self, other): + return F.greater(self, other) + + def __ge__(self, other): + return F.greaterOrEquals(self, other) + class StringField(Field): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index d008513..8a69949 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -43,7 +43,14 @@ class ModelBase(type): _writable_fields=OrderedDict([f for f in fields if not f[1].readonly]), _defaults=defaults ) - return super(ModelBase, cls).__new__(cls, str(name), bases, attrs) + model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs) + + # Let each field know its parent and its own name + for n, f in fields: + setattr(f, 'parent', model) + setattr(f, 'name', n) + + return model @classmethod def create_ad_hoc_model(cls, fields, model_name='AdHocModel'): diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 1035dd4..156329c 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -3,7 +3,8 @@ import six import pytz from copy import copy from math import ceil -from .utils import comma_join +from datetime import date, datetime +from .utils import comma_join, is_iterable # TODO @@ -22,6 +23,11 @@ class Operator(object): """ raise NotImplementedError # pragma: no cover + def _value_to_sql(self, field, value, quote=True): + if isinstance(value, F): + return value.to_sql() + return field.to_db_string(field.to_python(value, pytz.utc), quote) + class SimpleOperator(Operator): """ @@ -34,7 +40,7 @@ class SimpleOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc)) + value = self._value_to_sql(field, value) if value == '\\N' and self._sql_for_null is not None: return ' '.join([field_name, self._sql_for_null]) return ' '.join([field_name, self._sql_operator, value]) @@ -56,7 +62,7 @@ class InOperator(Operator): elif isinstance(value, six.string_types): pass else: - value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value]) + value = comma_join([self._value_to_sql(field, v) for v in value]) return '%s IN (%s)' % (field_name, value) @@ -72,7 +78,7 @@ class LikeOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc), quote=False) + value = self._value_to_sql(field, value, quote=False) value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_') pattern = self._pattern.format(value) if self._case_sensitive: @@ -88,7 +94,7 @@ class IExactOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc)) + value = self._value_to_sql(field, value) return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value) @@ -117,10 +123,8 @@ class BetweenOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value0 = field.to_db_string( - field.to_python(value[0], pytz.utc)) if value[0] is not None or len(str(value[0])) > 0 else None - value1 = field.to_db_string( - field.to_python(value[1], pytz.utc)) if value[1] is not None or len(str(value[1])) > 0 else None + value0 = self._value_to_sql(field, value[0]) if value[0] is not None or len(str(value[0])) > 0 else None + value1 = self._value_to_sql(field, value[1]) if value[1] is not None or len(str(value[1])) > 0 else None if value0 and value1: return '%s BETWEEN %s AND %s' % (field_name, value0, value1) if value0 and not value1: @@ -153,11 +157,19 @@ register_operator('iendswith', LikeOperator('%{}', False)) register_operator('iexact', IExactOperator()) -class FOV(object): +class Cond(object): """ - An object for storing Field + Operator + Value. + An abstract object for storing a single query condition Field + Operator + Value. """ + def to_sql(self, model_cls): + raise NotImplementedError + + +class FieldCond(Cond): + """ + A single query condition made up of Field + Operator + Value. + """ def __init__(self, field_name, operator, value): self._field_name = field_name self._operator = _operators.get(operator) @@ -171,13 +183,300 @@ class FOV(object): return self._operator.to_sql(model_cls, self._field_name, self._value) +class F(Cond): + """ + Represents a database function call and its arguments. + It doubles as a query condition when the function returns a boolean result. + """ + + def __init__(self, name, *args): + self.name = name + self.args = args + + def to_sql(self, *args): + args_sql = comma_join(self.arg_to_sql(arg) for arg in self.args) + return self.name + '(' + args_sql + ')' + + def arg_to_sql(self, arg): + from .fields import Field, StringField, DateTimeField, DateField + if isinstance(arg, F): + return arg.to_sql() + if isinstance(arg, Field): + return "`%s`" % arg.name + if isinstance(arg, six.string_types): + return StringField().to_db_string(arg) + if isinstance(arg, datetime): + return DateTimeField().to_db_string(arg) + if isinstance(arg, date): + return DateField().to_db_string(arg) + if isinstance(arg, bool): + return six.text_type(int(arg)) + if arg is None: + return 'NULL' + if is_iterable(arg): + return '[' + comma_join(self.arg_to_sql(x) for x in arg) + ']' + return six.text_type(arg) + + # Support comparison operators with F objects + + def __lt__(self, other): + return F.less(self, other) + + def __le__(self, other): + return F.lessOrEquals(self, other) + + def __eq__(self, other): + return F.equals(self, other) + + def __ne__(self, other): + return F.notEquals(self, other) + + def __gt__(self, other): + return F.greater(self, other) + + def __ge__(self, other): + return F.greaterOrEquals(self, other) + + # Support arithmetic operations on F objects + + def __add__(self, other): + return F.plus(self, other) + + def __radd__(self, other): + return F.plus(other, self) + + def __sub__(self, other): + return F.minus(self, other) + + def __rsub__(self, other): + return F.minus(other, self) + + def __mul__(self, other): + return F.multiply(self, other) + + def __rmul__(self, other): + return F.multiply(other, self) + + def __div__(self, other): + return F.divide(self, other) + + def __rdiv__(self, other): + return F.divide(other, self) + + def __mod__(self, other): + return F.modulo(self, other) + + def __rmod__(self, other): + return F.modulo(other, self) + + def __neg__(self): + return F.negate(self) + + def __pos__(self): + return self + + # Arithmetic functions + + @staticmethod + def plus(a, b): + return F('plus', a, b) + + @staticmethod + def minus(a, b): + return F('minus', a, b) + + @staticmethod + def multiply(a, b): + return F('multiply', a, b) + + @staticmethod + def divide(a, b): + return F('divide', a, b) + + @staticmethod + def intDiv(a, b): + return F('intDiv', a, b) + + @staticmethod + def intDivOrZero(a, b): + return F('intDivOrZero', a, b) + + @staticmethod + def modulo(a, b): + return F('modulo', a, b) + + @staticmethod + def negate(a): + return F('negate', a) + + @staticmethod + def abs(a): + return F('abs', a) + + @staticmethod + def gcd(a, b): + return F('gcd',a, b) + + @staticmethod + def lcm(a, b): + return F('lcm', a, b) + + # Comparison functions + + @staticmethod + def equals(a, b): + return F('equals', a, b) + + @staticmethod + def notEquals(a, b): + return F('notEquals', a, b) + + @staticmethod + def less(a, b): + return F('less', a, b) + + @staticmethod + def greater(a, b): + return F('greater', a, b) + + @staticmethod + def lessOrEquals(a, b): + return F('lessOrEquals', a, b) + + @staticmethod + def greaterOrEquals(a, b): + return F('greaterOrEquals', a, b) + + # Functions for working with dates and times + + @staticmethod + def toYear(d): + return F('toYear', d) + + @staticmethod + def toMonth(d): + return F('toMonth', d) + + @staticmethod + def toDayOfMonth(d): + return F('toDayOfMonth', d) + + @staticmethod + def toDayOfWeek(d): + return F('toDayOfWeek', d) + + @staticmethod + def toHour(d): + return F('toHour', d) + + @staticmethod + def toMinute(d): + return F('toMinute', d) + + @staticmethod + def toSecond(d): + return F('toSecond', d) + + @staticmethod + def toMonday(d): + return F('toMonday', d) + + @staticmethod + def toStartOfMonth(d): + return F('toStartOfMonth', d) + + @staticmethod + def toStartOfQuarter(d): + return F('toStartOfQuarter', d) + + @staticmethod + def toStartOfYear(d): + return F('toStartOfYear', d) + + @staticmethod + def toStartOfMinute(d): + return F('toStartOfMinute', d) + + @staticmethod + def toStartOfFiveMinute(d): + return F('toStartOfFiveMinute', d) + + @staticmethod + def toStartOfFifteenMinutes(d): + return F('toStartOfFifteenMinutes', d) + + @staticmethod + def toStartOfHour(d): + return F('toStartOfHour', d) + + @staticmethod + def toStartOfDay(d): + return F('toStartOfDay', d) + + @staticmethod + def toTime(d): + return F('toTime', d) + + @staticmethod + def toRelativeYearNum(d, timezone=''): + return F('toRelativeYearNum', d, timezone) + + @staticmethod + def toRelativeMonthNum(d, timezone=''): + return F('toRelativeMonthNum', d, timezone) + + @staticmethod + def toRelativeWeekNum(d, timezone=''): + return F('toRelativeWeekNum', d, timezone) + + @staticmethod + def toRelativeDayNum(d, timezone=''): + return F('toRelativeDayNum', d, timezone) + + @staticmethod + def toRelativeHourNum(d, timezone=''): + return F('toRelativeHourNum', d, timezone) + + @staticmethod + def toRelativeMinuteNum(d, timezone=''): + return F('toRelativeMinuteNum', d, timezone) + + @staticmethod + def toRelativeSecondNum(d, timezone=''): + return F('toRelativeSecondNum', d, timezone) + + @staticmethod + def now(): + return F('now') + + @staticmethod + def today(): + return F('today') + + @staticmethod + def yesterday(d): + return F('yesterday') + + @staticmethod + def timeSlot(d): + return F('timeSlot', d) + + @staticmethod + def timeSlots(start_time, duration): + return F('timeSlots', start_time, duration) + + @staticmethod + def formatDateTime(d, format, timezone=''): + return F('formatDateTime', d, format, timezone) + + class Q(object): - AND_MODE = 'AND' - OR_MODE = 'OR' + AND_MODE = ' AND ' + OR_MODE = ' OR ' - def __init__(self, **filter_fields): - self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(filter_fields)] + def __init__(self, *filter_funcs, **filter_fields): + self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in six.iteritems(filter_fields)] self._l_child = None self._r_child = None self._negate = False @@ -191,16 +490,16 @@ class Q(object): q._mode = mode # AND/OR return q - def _build_fov(self, key, value): + def _build_cond(self, key, value): if '__' in key: field_name, operator = key.rsplit('__', 1) else: field_name, operator = key, 'eq' - return FOV(field_name, operator, value) + return FieldCond(field_name, operator, value) def to_sql(self, model_cls): - if self._fovs: - sql = ' {} '.format(self._mode).join(fov.to_sql(model_cls) for fov in self._fovs) + if self._conds: + sql = self._mode.join(cond.to_sql(model_cls) for cond in self._conds) else: if self._l_child and self._r_child: sql = '({} {} {})'.format( @@ -348,10 +647,16 @@ class QuerySet(object): Add q object to query if it specified. """ qs = copy(self) - if q: - qs._q = list(self._q) + list(q) - else: - qs._q = list(self._q) + [Q(**filter_fields)] + qs._q = list(self._q) + for arg in q: + if isinstance(arg, Q): + qs._q.append(arg) + elif isinstance(arg, F): + qs._q.append(Q(arg)) + else: + raise TypeError('Invalid argument "%r" to queryset filter' % arg) + if filter_fields: + qs._q += [Q(**filter_fields)] return qs def exclude(self, **filter_fields): @@ -502,3 +807,5 @@ class AggregateQuerySet(QuerySet): sql = u'SELECT count() FROM (%s)' % self.as_sql() raw = self._database.raw(sql) return int(raw) if raw else 0 + + diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index e3eb4bb..8d4f7ee 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -98,3 +98,14 @@ def comma_join(items): Joins an iterable of strings with commas. """ return ', '.join(items) + + +def is_iterable(obj): + """ + Checks if the given object is iterable. + """ + try: + iter(obj) + return True + except TypeError: + return False diff --git a/tests/base_test_with_data.py b/tests/base_test_with_data.py index 9f183cf..f4edd90 100644 --- a/tests/base_test_with_data.py +++ b/tests/base_test_with_data.py @@ -46,7 +46,7 @@ class Person(Model): data = [ {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63", "passport": 35052255}, - + {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74", "passport": 36052255}, diff --git a/tests/test_querysets.py b/tests/test_querysets.py index a4fef14..08d086a 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals, print_function import unittest from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.query import Q +from infi.clickhouse_orm.query import Q, F from .base_test_with_data import * -import logging from datetime import date, datetime +from logging import getLogger +logger = getLogger('tests') + try: Enum # exists in Python 3.4+ except NameError: @@ -21,11 +23,11 @@ class QuerySetTestCase(TestCaseWithData): self.database.insert(self._sample_data()) def _test_qs(self, qs, expected_count): - logging.info(qs.as_sql()) + logger.info(qs.as_sql()) count = 0 for instance in qs: count += 1 - logging.info('\t[%d]\t%s' % (count, instance.to_dict())) + logger.info('\t[%d]\t%s' % (count, instance.to_dict())) self.assertEqual(count, expected_count) self.assertEqual(qs.count(), expected_count) @@ -249,6 +251,17 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs[70:80], 10) self._test_qs(qs[80:], 20) + def test_mixed_filter(self): + qs = Person.objects_in(self.database) + qs = qs.filter(Q(first_name='a'), F('greater', Person.height, 1.7), last_name='b') + self.assertEqual(qs.conditions_as_sql(), + "first_name = 'a' AND greater(`height`, 1.7) AND last_name = 'b'") + + def test_invalid_filter(self): + qs = Person.objects_in(self.database) + with self.assertRaises(TypeError): + qs.filter('foo') + class AggregateTestCase(TestCaseWithData): @@ -378,6 +391,136 @@ class AggregateTestCase(TestCaseWithData): self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') +class FuncsTestCase(TestCaseWithData): + + def setUp(self): + super(FuncsTestCase, self).setUp() + self.database.insert(self._sample_data()) + + def _test_qs(self, qs, expected_count): + logger.info(qs.as_sql()) + count = 0 + for instance in qs: + count += 1 + logger.info('\t[%d]\t%s' % (count, instance.to_dict())) + self.assertEqual(count, expected_count) + self.assertEqual(qs.count(), expected_count) + + def _test_func(self, func, expected_value=None): + sql = 'SELECT %s AS value' % func.to_sql() + logger.info(sql) + result = list(self.database.select(sql)) + logger.info('\t==> %s', result[0].value) + if expected_value is not None: + self.assertEqual(result[0].value, expected_value) + + def test_func_to_sql(self): + # No args + self.assertEqual(F('func').to_sql(), 'func()') + # String args + self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')") + # Numeric args + self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)") + # Date args + self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func('2018-12-31')") + # Datetime args + self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func('1546214400')") + # Boolean args + self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)") + # Null args + self.assertEqual(F('func', None).to_sql(), "func(NULL)") + # Fields as args + self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)") + # Funcs as args + self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') + # Iterables as args + x = [1, 'z', F('foo', 17)] + for y in [x, tuple(x), iter(x)]: + self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") + self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") + + def test_filter_float_field(self): + qs = Person.objects_in(self.database) + # Height > 2 + self._test_qs(qs.filter(F.greater(Person.height, 2)), 0) + self._test_qs(qs.filter(Person.height > 2), 0) + # Height > 1.61 + self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96) + self._test_qs(qs.filter(Person.height > 1.61), 96) + # Height < 1.61 + self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4) + self._test_qs(qs.filter(Person.height < 1.61), 4) + + def test_filter_date_field(self): + qs = Person.objects_in(self.database) + # People born on the 30th + self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3) + self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3) + self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3) + # People born on Sunday + self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18) + self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18) + self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) + # People born on 1976-10-01 + self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) + self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) + self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) + + def test_func_as_field_value(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96) + self._test_qs(qs.exclude(birthday=F.today()), 100) + self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) + + def test_comparison_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + self._test_func(one > one, 0) + self._test_func(two > one, 1) + self._test_func(one >= two, 0) + self._test_func(one >= one, 1) + self._test_func(one < one, 0) + self._test_func(one < two, 1) + self._test_func(two <= one, 0) + self._test_func(one <= one, 1) + self._test_func(one == two, 0) + self._test_func(one == one, 1) + self._test_func(one != one, 0) + self._test_func(one != two, 1) + + def test_arithmetic_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + # + + self._test_func(one + two, 3) + self._test_func(one + 2, 3) + self._test_func(2 + one, 3) + # - + self._test_func(one - two, -1) + self._test_func(one - 2, -1) + self._test_func(1 - two, -1) + # * + self._test_func(one * two, 2) + self._test_func(one * 2, 2) + self._test_func(1 * two, 2) + # / + self._test_func(one / two, 0.5) + self._test_func(one / 2, 0.5) + self._test_func(1 / two, 0.5) + # % + self._test_func(one % two, 1) + self._test_func(one % 2, 1) + self._test_func(1 % two, 1) + # sign + self._test_func(-one, -1) + self._test_func(--one, 1) + self._test_func(+one, 1) + + + + + + Color = Enum('Color', u'red blue green yellow brown white black') From 602d0671f196041131ff72889bfeca7910937c4a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 16 Oct 2018 16:42:30 +0300 Subject: [PATCH 02/41] Functions WIP --- src/infi/clickhouse_orm/fields.py | 29 ++- src/infi/clickhouse_orm/models.py | 9 +- src/infi/clickhouse_orm/query.py | 354 ++++++++++++++++++++++++++++-- src/infi/clickhouse_orm/utils.py | 11 + tests/base_test_with_data.py | 2 +- tests/test_querysets.py | 151 ++++++++++++- 6 files changed, 523 insertions(+), 33 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 8c11805..83b738e 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -8,15 +8,18 @@ from calendar import timegm from decimal import Decimal, localcontext from .utils import escape, parse_array, comma_join +from .query import F class Field(object): ''' Abstract base class for all field types. ''' - creation_counter = 0 - class_default = 0 - db_type = None + name = None # this is set by the parent model + parent = None # this is set by the parent model + creation_counter = 0 # used for keeping the model fields ordered + class_default = 0 # should be overridden by concrete subclasses + db_type = None # should be overridden by concrete subclasses def __init__(self, default=None, alias=None, materialized=None, readonly=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ @@ -96,6 +99,26 @@ class Field(object): inner_field = getattr(inner_field, 'inner_field', None) return False + # Support comparison operators (for use in querysets) + + def __lt__(self, other): + return F.less(self, other) + + def __le__(self, other): + return F.lessOrEquals(self, other) + + def __eq__(self, other): + return F.equals(self, other) + + def __ne__(self, other): + return F.notEquals(self, other) + + def __gt__(self, other): + return F.greater(self, other) + + def __ge__(self, other): + return F.greaterOrEquals(self, other) + class StringField(Field): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index d008513..8a69949 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -43,7 +43,14 @@ class ModelBase(type): _writable_fields=OrderedDict([f for f in fields if not f[1].readonly]), _defaults=defaults ) - return super(ModelBase, cls).__new__(cls, str(name), bases, attrs) + model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs) + + # Let each field know its parent and its own name + for n, f in fields: + setattr(f, 'parent', model) + setattr(f, 'name', n) + + return model @classmethod def create_ad_hoc_model(cls, fields, model_name='AdHocModel'): diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 47bb3bf..73a45d1 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -4,9 +4,9 @@ import six import pytz from copy import copy from math import ceil - from .engines import CollapsingMergeTree -from .utils import comma_join +from datetime import date, datetime +from .utils import comma_join, is_iterable # TODO @@ -25,6 +25,11 @@ class Operator(object): """ raise NotImplementedError # pragma: no cover + def _value_to_sql(self, field, value, quote=True): + if isinstance(value, F): + return value.to_sql() + return field.to_db_string(field.to_python(value, pytz.utc), quote) + class SimpleOperator(Operator): """ @@ -37,7 +42,7 @@ class SimpleOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc)) + value = self._value_to_sql(field, value) if value == '\\N' and self._sql_for_null is not None: return ' '.join([field_name, self._sql_for_null]) return ' '.join([field_name, self._sql_operator, value]) @@ -59,7 +64,7 @@ class InOperator(Operator): elif isinstance(value, six.string_types): pass else: - value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value]) + value = comma_join([self._value_to_sql(field, v) for v in value]) return '%s IN (%s)' % (field_name, value) @@ -75,7 +80,7 @@ class LikeOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc), quote=False) + value = self._value_to_sql(field, value, quote=False) value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_') pattern = self._pattern.format(value) if self._case_sensitive: @@ -91,7 +96,7 @@ class IExactOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value = field.to_db_string(field.to_python(value, pytz.utc)) + value = self._value_to_sql(field, value) return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value) @@ -120,10 +125,8 @@ class BetweenOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) - value0 = field.to_db_string( - field.to_python(value[0], pytz.utc)) if value[0] is not None or len(str(value[0])) > 0 else None - value1 = field.to_db_string( - field.to_python(value[1], pytz.utc)) if value[1] is not None or len(str(value[1])) > 0 else None + value0 = self._value_to_sql(field, value[0]) if value[0] is not None or len(str(value[0])) > 0 else None + value1 = self._value_to_sql(field, value[1]) if value[1] is not None or len(str(value[1])) > 0 else None if value0 and value1: return '%s BETWEEN %s AND %s' % (field_name, value0, value1) if value0 and not value1: @@ -156,11 +159,19 @@ register_operator('iendswith', LikeOperator('%{}', False)) register_operator('iexact', IExactOperator()) -class FOV(object): +class Cond(object): """ - An object for storing Field + Operator + Value. + An abstract object for storing a single query condition Field + Operator + Value. """ + def to_sql(self, model_cls): + raise NotImplementedError + + +class FieldCond(Cond): + """ + A single query condition made up of Field + Operator + Value. + """ def __init__(self, field_name, operator, value): self._field_name = field_name self._operator = _operators.get(operator) @@ -174,13 +185,300 @@ class FOV(object): return self._operator.to_sql(model_cls, self._field_name, self._value) +class F(Cond): + """ + Represents a database function call and its arguments. + It doubles as a query condition when the function returns a boolean result. + """ + + def __init__(self, name, *args): + self.name = name + self.args = args + + def to_sql(self, *args): + args_sql = comma_join(self.arg_to_sql(arg) for arg in self.args) + return self.name + '(' + args_sql + ')' + + def arg_to_sql(self, arg): + from .fields import Field, StringField, DateTimeField, DateField + if isinstance(arg, F): + return arg.to_sql() + if isinstance(arg, Field): + return "`%s`" % arg.name + if isinstance(arg, six.string_types): + return StringField().to_db_string(arg) + if isinstance(arg, datetime): + return DateTimeField().to_db_string(arg) + if isinstance(arg, date): + return DateField().to_db_string(arg) + if isinstance(arg, bool): + return six.text_type(int(arg)) + if arg is None: + return 'NULL' + if is_iterable(arg): + return '[' + comma_join(self.arg_to_sql(x) for x in arg) + ']' + return six.text_type(arg) + + # Support comparison operators with F objects + + def __lt__(self, other): + return F.less(self, other) + + def __le__(self, other): + return F.lessOrEquals(self, other) + + def __eq__(self, other): + return F.equals(self, other) + + def __ne__(self, other): + return F.notEquals(self, other) + + def __gt__(self, other): + return F.greater(self, other) + + def __ge__(self, other): + return F.greaterOrEquals(self, other) + + # Support arithmetic operations on F objects + + def __add__(self, other): + return F.plus(self, other) + + def __radd__(self, other): + return F.plus(other, self) + + def __sub__(self, other): + return F.minus(self, other) + + def __rsub__(self, other): + return F.minus(other, self) + + def __mul__(self, other): + return F.multiply(self, other) + + def __rmul__(self, other): + return F.multiply(other, self) + + def __div__(self, other): + return F.divide(self, other) + + def __rdiv__(self, other): + return F.divide(other, self) + + def __mod__(self, other): + return F.modulo(self, other) + + def __rmod__(self, other): + return F.modulo(other, self) + + def __neg__(self): + return F.negate(self) + + def __pos__(self): + return self + + # Arithmetic functions + + @staticmethod + def plus(a, b): + return F('plus', a, b) + + @staticmethod + def minus(a, b): + return F('minus', a, b) + + @staticmethod + def multiply(a, b): + return F('multiply', a, b) + + @staticmethod + def divide(a, b): + return F('divide', a, b) + + @staticmethod + def intDiv(a, b): + return F('intDiv', a, b) + + @staticmethod + def intDivOrZero(a, b): + return F('intDivOrZero', a, b) + + @staticmethod + def modulo(a, b): + return F('modulo', a, b) + + @staticmethod + def negate(a): + return F('negate', a) + + @staticmethod + def abs(a): + return F('abs', a) + + @staticmethod + def gcd(a, b): + return F('gcd',a, b) + + @staticmethod + def lcm(a, b): + return F('lcm', a, b) + + # Comparison functions + + @staticmethod + def equals(a, b): + return F('equals', a, b) + + @staticmethod + def notEquals(a, b): + return F('notEquals', a, b) + + @staticmethod + def less(a, b): + return F('less', a, b) + + @staticmethod + def greater(a, b): + return F('greater', a, b) + + @staticmethod + def lessOrEquals(a, b): + return F('lessOrEquals', a, b) + + @staticmethod + def greaterOrEquals(a, b): + return F('greaterOrEquals', a, b) + + # Functions for working with dates and times + + @staticmethod + def toYear(d): + return F('toYear', d) + + @staticmethod + def toMonth(d): + return F('toMonth', d) + + @staticmethod + def toDayOfMonth(d): + return F('toDayOfMonth', d) + + @staticmethod + def toDayOfWeek(d): + return F('toDayOfWeek', d) + + @staticmethod + def toHour(d): + return F('toHour', d) + + @staticmethod + def toMinute(d): + return F('toMinute', d) + + @staticmethod + def toSecond(d): + return F('toSecond', d) + + @staticmethod + def toMonday(d): + return F('toMonday', d) + + @staticmethod + def toStartOfMonth(d): + return F('toStartOfMonth', d) + + @staticmethod + def toStartOfQuarter(d): + return F('toStartOfQuarter', d) + + @staticmethod + def toStartOfYear(d): + return F('toStartOfYear', d) + + @staticmethod + def toStartOfMinute(d): + return F('toStartOfMinute', d) + + @staticmethod + def toStartOfFiveMinute(d): + return F('toStartOfFiveMinute', d) + + @staticmethod + def toStartOfFifteenMinutes(d): + return F('toStartOfFifteenMinutes', d) + + @staticmethod + def toStartOfHour(d): + return F('toStartOfHour', d) + + @staticmethod + def toStartOfDay(d): + return F('toStartOfDay', d) + + @staticmethod + def toTime(d): + return F('toTime', d) + + @staticmethod + def toRelativeYearNum(d, timezone=''): + return F('toRelativeYearNum', d, timezone) + + @staticmethod + def toRelativeMonthNum(d, timezone=''): + return F('toRelativeMonthNum', d, timezone) + + @staticmethod + def toRelativeWeekNum(d, timezone=''): + return F('toRelativeWeekNum', d, timezone) + + @staticmethod + def toRelativeDayNum(d, timezone=''): + return F('toRelativeDayNum', d, timezone) + + @staticmethod + def toRelativeHourNum(d, timezone=''): + return F('toRelativeHourNum', d, timezone) + + @staticmethod + def toRelativeMinuteNum(d, timezone=''): + return F('toRelativeMinuteNum', d, timezone) + + @staticmethod + def toRelativeSecondNum(d, timezone=''): + return F('toRelativeSecondNum', d, timezone) + + @staticmethod + def now(): + return F('now') + + @staticmethod + def today(): + return F('today') + + @staticmethod + def yesterday(d): + return F('yesterday') + + @staticmethod + def timeSlot(d): + return F('timeSlot', d) + + @staticmethod + def timeSlots(start_time, duration): + return F('timeSlots', start_time, duration) + + @staticmethod + def formatDateTime(d, format, timezone=''): + return F('formatDateTime', d, format, timezone) + + class Q(object): - AND_MODE = 'AND' - OR_MODE = 'OR' + AND_MODE = ' AND ' + OR_MODE = ' OR ' - def __init__(self, **filter_fields): - self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(filter_fields)] + def __init__(self, *filter_funcs, **filter_fields): + self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in six.iteritems(filter_fields)] self._l_child = None self._r_child = None self._negate = False @@ -194,16 +492,16 @@ class Q(object): q._mode = mode # AND/OR return q - def _build_fov(self, key, value): + def _build_cond(self, key, value): if '__' in key: field_name, operator = key.rsplit('__', 1) else: field_name, operator = key, 'eq' - return FOV(field_name, operator, value) + return FieldCond(field_name, operator, value) def to_sql(self, model_cls): - if self._fovs: - sql = ' {} '.format(self._mode).join(fov.to_sql(model_cls) for fov in self._fovs) + if self._conds: + sql = self._mode.join(cond.to_sql(model_cls) for cond in self._conds) else: if self._l_child and self._r_child: sql = '({} {} {})'.format( @@ -353,10 +651,16 @@ class QuerySet(object): Add q object to query if it specified. """ qs = copy(self) - if q: - qs._q = list(self._q) + list(q) - else: - qs._q = list(self._q) + [Q(**filter_fields)] + qs._q = list(self._q) + for arg in q: + if isinstance(arg, Q): + qs._q.append(arg) + elif isinstance(arg, F): + qs._q.append(Q(arg)) + else: + raise TypeError('Invalid argument "%r" to queryset filter' % arg) + if filter_fields: + qs._q += [Q(**filter_fields)] return qs def exclude(self, **filter_fields): @@ -519,3 +823,5 @@ class AggregateQuerySet(QuerySet): sql = u'SELECT count() FROM (%s)' % self.as_sql() raw = self._database.raw(sql) return int(raw) if raw else 0 + + diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index e3eb4bb..8d4f7ee 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -98,3 +98,14 @@ def comma_join(items): Joins an iterable of strings with commas. """ return ', '.join(items) + + +def is_iterable(obj): + """ + Checks if the given object is iterable. + """ + try: + iter(obj) + return True + except TypeError: + return False diff --git a/tests/base_test_with_data.py b/tests/base_test_with_data.py index c3fc376..eaacd7d 100644 --- a/tests/base_test_with_data.py +++ b/tests/base_test_with_data.py @@ -46,7 +46,7 @@ class Person(Model): data = [ {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63", "passport": 35052255}, - + {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74", "passport": 36052255}, diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 4c76b68..4858e03 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals, print_function import unittest from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.query import Q +from infi.clickhouse_orm.query import Q, F from .base_test_with_data import * -import logging from datetime import date, datetime +from logging import getLogger +logger = getLogger('tests') + try: Enum # exists in Python 3.4+ except NameError: @@ -21,11 +23,11 @@ class QuerySetTestCase(TestCaseWithData): self.database.insert(self._sample_data()) def _test_qs(self, qs, expected_count): - logging.info(qs.as_sql()) + logger.info(qs.as_sql()) count = 0 for instance in qs: count += 1 - logging.info('\t[%d]\t%s' % (count, instance.to_dict())) + logger.info('\t[%d]\t%s' % (count, instance.to_dict())) self.assertEqual(count, expected_count) self.assertEqual(qs.count(), expected_count) @@ -290,6 +292,17 @@ class QuerySetTestCase(TestCaseWithData): for item, exp_color in zip(res, (Color.red, Color.green, Color.white, Color.blue)): self.assertEqual(exp_color, item.color) + def test_mixed_filter(self): + qs = Person.objects_in(self.database) + qs = qs.filter(Q(first_name='a'), F('greater', Person.height, 1.7), last_name='b') + self.assertEqual(qs.conditions_as_sql(), + "first_name = 'a' AND greater(`height`, 1.7) AND last_name = 'b'") + + def test_invalid_filter(self): + qs = Person.objects_in(self.database) + with self.assertRaises(TypeError): + qs.filter('foo') + class AggregateTestCase(TestCaseWithData): @@ -419,6 +432,136 @@ class AggregateTestCase(TestCaseWithData): self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') +class FuncsTestCase(TestCaseWithData): + + def setUp(self): + super(FuncsTestCase, self).setUp() + self.database.insert(self._sample_data()) + + def _test_qs(self, qs, expected_count): + logger.info(qs.as_sql()) + count = 0 + for instance in qs: + count += 1 + logger.info('\t[%d]\t%s' % (count, instance.to_dict())) + self.assertEqual(count, expected_count) + self.assertEqual(qs.count(), expected_count) + + def _test_func(self, func, expected_value=None): + sql = 'SELECT %s AS value' % func.to_sql() + logger.info(sql) + result = list(self.database.select(sql)) + logger.info('\t==> %s', result[0].value) + if expected_value is not None: + self.assertEqual(result[0].value, expected_value) + + def test_func_to_sql(self): + # No args + self.assertEqual(F('func').to_sql(), 'func()') + # String args + self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')") + # Numeric args + self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)") + # Date args + self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func('2018-12-31')") + # Datetime args + self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func('1546214400')") + # Boolean args + self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)") + # Null args + self.assertEqual(F('func', None).to_sql(), "func(NULL)") + # Fields as args + self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)") + # Funcs as args + self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') + # Iterables as args + x = [1, 'z', F('foo', 17)] + for y in [x, tuple(x), iter(x)]: + self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") + self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") + + def test_filter_float_field(self): + qs = Person.objects_in(self.database) + # Height > 2 + self._test_qs(qs.filter(F.greater(Person.height, 2)), 0) + self._test_qs(qs.filter(Person.height > 2), 0) + # Height > 1.61 + self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96) + self._test_qs(qs.filter(Person.height > 1.61), 96) + # Height < 1.61 + self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4) + self._test_qs(qs.filter(Person.height < 1.61), 4) + + def test_filter_date_field(self): + qs = Person.objects_in(self.database) + # People born on the 30th + self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3) + self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3) + self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3) + # People born on Sunday + self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18) + self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18) + self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) + # People born on 1976-10-01 + self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) + self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) + self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) + + def test_func_as_field_value(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96) + self._test_qs(qs.exclude(birthday=F.today()), 100) + self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) + + def test_comparison_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + self._test_func(one > one, 0) + self._test_func(two > one, 1) + self._test_func(one >= two, 0) + self._test_func(one >= one, 1) + self._test_func(one < one, 0) + self._test_func(one < two, 1) + self._test_func(two <= one, 0) + self._test_func(one <= one, 1) + self._test_func(one == two, 0) + self._test_func(one == one, 1) + self._test_func(one != one, 0) + self._test_func(one != two, 1) + + def test_arithmetic_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + # + + self._test_func(one + two, 3) + self._test_func(one + 2, 3) + self._test_func(2 + one, 3) + # - + self._test_func(one - two, -1) + self._test_func(one - 2, -1) + self._test_func(1 - two, -1) + # * + self._test_func(one * two, 2) + self._test_func(one * 2, 2) + self._test_func(1 * two, 2) + # / + self._test_func(one / two, 0.5) + self._test_func(one / 2, 0.5) + self._test_func(1 / two, 0.5) + # % + self._test_func(one % two, 1) + self._test_func(one % 2, 1) + self._test_func(1 % two, 1) + # sign + self._test_func(-one, -1) + self._test_func(--one, 1) + self._test_func(+one, 1) + + + + + + Color = Enum('Color', u'red blue green yellow brown white black') From f96bd22c38658149c1b7f9f2a78d905a1a548913 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 28 Jan 2019 09:51:53 +0200 Subject: [PATCH 03/41] Functions WIP --- src/infi/clickhouse_orm/fields.py | 24 +- src/infi/clickhouse_orm/funcs.py | 578 ++++++++++++++++++++++++++++++ src/infi/clickhouse_orm/query.py | 301 +--------------- tests/test_decimal_fields.py | 8 +- tests/test_funcs.py | 261 ++++++++++++++ tests/test_querysets.py | 135 +------ 6 files changed, 858 insertions(+), 449 deletions(-) create mode 100644 src/infi/clickhouse_orm/funcs.py create mode 100644 tests/test_funcs.py diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 83b738e..c768c0c 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -8,10 +8,10 @@ from calendar import timegm from decimal import Decimal, localcontext from .utils import escape, parse_array, comma_join -from .query import F +from .funcs import F, FunctionOperatorsMixin -class Field(object): +class Field(FunctionOperatorsMixin): ''' Abstract base class for all field types. ''' @@ -99,26 +99,6 @@ class Field(object): inner_field = getattr(inner_field, 'inner_field', None) return False - # Support comparison operators (for use in querysets) - - def __lt__(self, other): - return F.less(self, other) - - def __le__(self, other): - return F.lessOrEquals(self, other) - - def __eq__(self, other): - return F.equals(self, other) - - def __ne__(self, other): - return F.notEquals(self, other) - - def __gt__(self, other): - return F.greater(self, other) - - def __ge__(self, other): - return F.greaterOrEquals(self, other) - class StringField(Field): diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py new file mode 100644 index 0000000..fadda4a --- /dev/null +++ b/src/infi/clickhouse_orm/funcs.py @@ -0,0 +1,578 @@ +import six +from datetime import date, datetime, tzinfo +import functools + +from .utils import is_iterable, comma_join +from .query import Cond + + +def binary_operator(func): + """ + Decorates a function to mark it as a binary operator. + """ + @functools.wraps(func) + def wrapper(*args, **kwargs): + ret = func(*args, **kwargs) + ret.is_binary_operator = True + return ret + return wrapper + + +class FunctionOperatorsMixin(object): + """ + A mixin for implementing Python operators using F objects. + """ + + # Comparison operators + + def __lt__(self, other): + return F.less(self, other) + + def __le__(self, other): + return F.lessOrEquals(self, other) + + def __eq__(self, other): + return F.equals(self, other) + + def __ne__(self, other): + return F.notEquals(self, other) + + def __gt__(self, other): + return F.greater(self, other) + + def __ge__(self, other): + return F.greaterOrEquals(self, other) + + # Arithmetic operators + + def __add__(self, other): + return F.plus(self, other) + + def __radd__(self, other): + return F.plus(other, self) + + def __sub__(self, other): + return F.minus(self, other) + + def __rsub__(self, other): + return F.minus(other, self) + + def __mul__(self, other): + return F.multiply(self, other) + + def __rmul__(self, other): + return F.multiply(other, self) + + def __div__(self, other): + return F.divide(self, other) + + def __rdiv__(self, other): + return F.divide(other, self) + + def __mod__(self, other): + return F.modulo(self, other) + + def __rmod__(self, other): + return F.modulo(other, self) + + def __neg__(self): + return F.negate(self) + + def __pos__(self): + return self + + # Logical operators + + def __and__(self, other): + return F._and(self, other) + + def __rand__(self, other): + return F._and(other, self) + + def __or__(self, other): + return F._or(self, other) + + def __ror__(self, other): + return F._or(other, self) + + def __xor__(self, other): + return F._xor(self, other) + + def __rxor__(self, other): + return F._xor(other, self) + + def __invert__(self): + return F._not(self) + + +class F(Cond, FunctionOperatorsMixin): + """ + Represents a database function call and its arguments. + It doubles as a query condition when the function returns a boolean result. + """ + def __init__(self, name, *args): + self.name = name + self.args = args + self.is_binary_operator = False + + def to_sql(self, *args): + """ + Generates an SQL string for this function and its arguments. + For example if the function name is a symbol of a binary operator: + (2.54 * `height`) + For other functions: + gcd(12, 300) + """ + if self.is_binary_operator: + prefix = '' + sep = ' ' + self.name + ' ' + else: + prefix = self.name + sep = ', ' + arg_strs = (self.arg_to_sql(arg) for arg in self.args) + return prefix + '(' + sep.join(arg_strs) + ')' + + def arg_to_sql(self, arg): + """ + Converts a function argument to SQL string according to its type. + Supports functions, model fields, strings, dates, datetimes, booleans, + None, numbers, timezones, arrays/iterables. + """ + from .fields import Field, StringField, DateTimeField, DateField + if isinstance(arg, F): + return arg.to_sql() + if isinstance(arg, Field): + return "`%s`" % arg.name + if isinstance(arg, six.string_types): + return StringField().to_db_string(arg) + if isinstance(arg, datetime): + return "toDateTime(%s)" % DateTimeField().to_db_string(arg) + if isinstance(arg, date): + return "toDate('%s')" % arg.isoformat() + if isinstance(arg, bool): + return six.text_type(int(arg)) + if isinstance(arg, tzinfo): + return StringField().to_db_string(arg.tzname(None)) + if arg is None: + return 'NULL' + if is_iterable(arg): + return '[' + comma_join(self.arg_to_sql(x) for x in arg) + ']' + return six.text_type(arg) + + # Arithmetic functions + + @staticmethod + @binary_operator + def plus(a, b): + return F('+', a, b) + + @staticmethod + @binary_operator + def minus(a, b): + return F('-', a, b) + + @staticmethod + @binary_operator + def multiply(a, b): + return F('*', a, b) + + @staticmethod + @binary_operator + def divide(a, b): + return F('/', a, b) + + @staticmethod + def intDiv(a, b): + return F('intDiv', a, b) + + @staticmethod + def intDivOrZero(a, b): + return F('intDivOrZero', a, b) + + @staticmethod + @binary_operator + def modulo(a, b): + return F('%', a, b) + + @staticmethod + def negate(a): + return F('negate', a) + + @staticmethod + def abs(a): + return F('abs', a) + + @staticmethod + def gcd(a, b): + return F('gcd',a, b) + + @staticmethod + def lcm(a, b): + return F('lcm', a, b) + + # Comparison functions + + @staticmethod + @binary_operator + def equals(a, b): + return F('=', a, b) + + @staticmethod + @binary_operator + def notEquals(a, b): + return F('!=', a, b) + + @staticmethod + @binary_operator + def less(a, b): + return F('<', a, b) + + @staticmethod + @binary_operator + def greater(a, b): + return F('>', a, b) + + @staticmethod + @binary_operator + def lessOrEquals(a, b): + return F('<=', a, b) + + @staticmethod + @binary_operator + def greaterOrEquals(a, b): + return F('>=', a, b) + + # Logical functions (should be used as python operators: & | ^ ~) + + @staticmethod + @binary_operator + def _and(a, b): + return F('AND', a, b) + + @staticmethod + @binary_operator + def _or(a, b): + return F('OR', a, b) + + @staticmethod + def _xor(a, b): + return F('xor', a, b) + + @staticmethod + def _not(a): + return F('not', a) + + # Functions for working with dates and times + + @staticmethod + def toYear(d): + return F('toYear', d) + + @staticmethod + def toMonth(d): + return F('toMonth', d) + + @staticmethod + def toDayOfMonth(d): + return F('toDayOfMonth', d) + + @staticmethod + def toDayOfWeek(d): + return F('toDayOfWeek', d) + + @staticmethod + def toHour(d): + return F('toHour', d) + + @staticmethod + def toMinute(d): + return F('toMinute', d) + + @staticmethod + def toSecond(d): + return F('toSecond', d) + + @staticmethod + def toMonday(d): + return F('toMonday', d) + + @staticmethod + def toStartOfMonth(d): + return F('toStartOfMonth', d) + + @staticmethod + def toStartOfQuarter(d): + return F('toStartOfQuarter', d) + + @staticmethod + def toStartOfYear(d): + return F('toStartOfYear', d) + + @staticmethod + def toStartOfMinute(d): + return F('toStartOfMinute', d) + + @staticmethod + def toStartOfFiveMinute(d): + return F('toStartOfFiveMinute', d) + + @staticmethod + def toStartOfFifteenMinutes(d): + return F('toStartOfFifteenMinutes', d) + + @staticmethod + def toStartOfHour(d): + return F('toStartOfHour', d) + + @staticmethod + def toStartOfDay(d): + return F('toStartOfDay', d) + + @staticmethod + def toTime(d, timezone=''): + return F('toTime', d, timezone) + + @staticmethod + def toRelativeYearNum(d, timezone=''): + return F('toRelativeYearNum', d, timezone) + + @staticmethod + def toRelativeMonthNum(d, timezone=''): + return F('toRelativeMonthNum', d, timezone) + + @staticmethod + def toRelativeWeekNum(d, timezone=''): + return F('toRelativeWeekNum', d, timezone) + + @staticmethod + def toRelativeDayNum(d, timezone=''): + return F('toRelativeDayNum', d, timezone) + + @staticmethod + def toRelativeHourNum(d, timezone=''): + return F('toRelativeHourNum', d, timezone) + + @staticmethod + def toRelativeMinuteNum(d, timezone=''): + return F('toRelativeMinuteNum', d, timezone) + + @staticmethod + def toRelativeSecondNum(d, timezone=''): + return F('toRelativeSecondNum', d, timezone) + + @staticmethod + def now(): + return F('now') + + @staticmethod + def today(): + return F('today') + + @staticmethod + def yesterday(): + return F('yesterday') + + @staticmethod + def timeSlot(d): + return F('timeSlot', d) + + @staticmethod + def timeSlots(start_time, duration): + return F('timeSlots', start_time, F.toUInt32(duration)) + + @staticmethod + def formatDateTime(d, format, timezone=''): + return F('formatDateTime', d, format, timezone) + + # Type conversion functions + + @staticmethod + def toUInt8(x): + return F('toUInt8', x) + + @staticmethod + def toUInt16(x): + return F('toUInt16', x) + + @staticmethod + def toUInt32(x): + return F('toUInt32', x) + + @staticmethod + def toUInt64(x): + return F('toUInt64', x) + + @staticmethod + def toInt8(x): + return F('toInt8', x) + + @staticmethod + def toInt16(x): + return F('toInt16', x) + + @staticmethod + def toInt32(x): + return F('toInt32', x) + + @staticmethod + def toInt64(x): + return F('toInt64', x) + + @staticmethod + def toFloat32(x): + return F('toFloat32', x) + + @staticmethod + def toFloat64(x): + return F('toFloat64', x) + + @staticmethod + def toUInt8OrZero(x): + return F('toUInt8OrZero', x) + + @staticmethod + def toUInt16OrZero(x): + return F('toUInt16OrZero', x) + + @staticmethod + def toUInt32OrZero(x): + return F('toUInt32OrZero', x) + + @staticmethod + def toUInt64OrZero(x): + return F('toUInt64OrZero', x) + + @staticmethod + def toInt8OrZero(x): + return F('toInt8OrZero', x) + + @staticmethod + def toInt16OrZero(x): + return F('toInt16OrZero', x) + + @staticmethod + def toInt32OrZero(x): + return F('toInt32OrZero', x) + + @staticmethod + def toInt64OrZero(x): + return F('toInt64OrZero', x) + + @staticmethod + def toFloat32OrZero(x): + return F('toFloat32OrZero', x) + + @staticmethod + def toFloat64OrZero(x): + return F('toFloat64OrZero', x) + + @staticmethod + def toDecimal32(x, scale): + return F('toDecimal32', x, scale) + + @staticmethod + def toDecimal64(x, scale): + return F('toDecimal64', x, scale) + + @staticmethod + def toDecimal128(x, scale): + return F('toDecimal128', x, scale) + + @staticmethod + def toDate(x): + return F('toDate', x) + + @staticmethod + def toDateTime(x): + return F('toDateTime', x) + + @staticmethod + def toString(x): + return F('toString', x) + + @staticmethod + def toFixedString(s, length): + return F('toFixedString', s, length) + + @staticmethod + def toStringCutToZero(s): + return F('toStringCutToZero', s) + + @staticmethod + def CAST(x, type): + return F('CAST', x, type) + + # Functions for working with strings + + @staticmethod + def empty(s): + return F('empty', s) + + @staticmethod + def notEmpty(s): + return F('notEmpty', s) + + @staticmethod + def length(s): + return F('length', s) + + @staticmethod + def lengthUTF8(s): + return F('lengthUTF8', s) + + @staticmethod + def lower(s): + return F('lower', s) + + @staticmethod + def upper(s): + return F('upper', s) + + @staticmethod + def lowerUTF8(s): + return F('lowerUTF8', s) + + @staticmethod + def upperUTF8(s): + return F('upperUTF8', s) + + @staticmethod + def reverse(s): + return F('reverse', s) + + @staticmethod + def reverseUTF8(s): + return F('reverseUTF8', s) + + @staticmethod + def concat(*args): + return F('concat', *args) + + @staticmethod + def substring(s, offset, length): + return F('substring', s, offset, length) + + @staticmethod + def substringUTF8(s, offset, length): + return F('substringUTF8', s, offset, length) + + @staticmethod + def appendTrailingCharIfAbsent(s, c): + return F('appendTrailingCharIfAbsent', s, c) + + @staticmethod + def convertCharset(s, from_charset, to_charset): + return F('convertCharset', s, from_charset, to_charset) + + @staticmethod + def base64Encode(s): + return F('base64Encode', s) + + @staticmethod + def base64Decode(s): + return F('base64Decode', s) + + @staticmethod + def tryBase64Decode(s): + return F('tryBase64Decode', s) + diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 73a45d1..6e2c82f 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -5,8 +5,8 @@ import pytz from copy import copy from math import ceil from .engines import CollapsingMergeTree -from datetime import date, datetime -from .utils import comma_join, is_iterable +from datetime import date, datetime, tzinfo +from .utils import comma_join # TODO @@ -26,6 +26,7 @@ class Operator(object): raise NotImplementedError # pragma: no cover def _value_to_sql(self, field, value, quote=True): + from infi.clickhouse_orm.funcs import F if isinstance(value, F): return value.to_sql() return field.to_db_string(field.to_python(value, pytz.utc), quote) @@ -185,293 +186,6 @@ class FieldCond(Cond): return self._operator.to_sql(model_cls, self._field_name, self._value) -class F(Cond): - """ - Represents a database function call and its arguments. - It doubles as a query condition when the function returns a boolean result. - """ - - def __init__(self, name, *args): - self.name = name - self.args = args - - def to_sql(self, *args): - args_sql = comma_join(self.arg_to_sql(arg) for arg in self.args) - return self.name + '(' + args_sql + ')' - - def arg_to_sql(self, arg): - from .fields import Field, StringField, DateTimeField, DateField - if isinstance(arg, F): - return arg.to_sql() - if isinstance(arg, Field): - return "`%s`" % arg.name - if isinstance(arg, six.string_types): - return StringField().to_db_string(arg) - if isinstance(arg, datetime): - return DateTimeField().to_db_string(arg) - if isinstance(arg, date): - return DateField().to_db_string(arg) - if isinstance(arg, bool): - return six.text_type(int(arg)) - if arg is None: - return 'NULL' - if is_iterable(arg): - return '[' + comma_join(self.arg_to_sql(x) for x in arg) + ']' - return six.text_type(arg) - - # Support comparison operators with F objects - - def __lt__(self, other): - return F.less(self, other) - - def __le__(self, other): - return F.lessOrEquals(self, other) - - def __eq__(self, other): - return F.equals(self, other) - - def __ne__(self, other): - return F.notEquals(self, other) - - def __gt__(self, other): - return F.greater(self, other) - - def __ge__(self, other): - return F.greaterOrEquals(self, other) - - # Support arithmetic operations on F objects - - def __add__(self, other): - return F.plus(self, other) - - def __radd__(self, other): - return F.plus(other, self) - - def __sub__(self, other): - return F.minus(self, other) - - def __rsub__(self, other): - return F.minus(other, self) - - def __mul__(self, other): - return F.multiply(self, other) - - def __rmul__(self, other): - return F.multiply(other, self) - - def __div__(self, other): - return F.divide(self, other) - - def __rdiv__(self, other): - return F.divide(other, self) - - def __mod__(self, other): - return F.modulo(self, other) - - def __rmod__(self, other): - return F.modulo(other, self) - - def __neg__(self): - return F.negate(self) - - def __pos__(self): - return self - - # Arithmetic functions - - @staticmethod - def plus(a, b): - return F('plus', a, b) - - @staticmethod - def minus(a, b): - return F('minus', a, b) - - @staticmethod - def multiply(a, b): - return F('multiply', a, b) - - @staticmethod - def divide(a, b): - return F('divide', a, b) - - @staticmethod - def intDiv(a, b): - return F('intDiv', a, b) - - @staticmethod - def intDivOrZero(a, b): - return F('intDivOrZero', a, b) - - @staticmethod - def modulo(a, b): - return F('modulo', a, b) - - @staticmethod - def negate(a): - return F('negate', a) - - @staticmethod - def abs(a): - return F('abs', a) - - @staticmethod - def gcd(a, b): - return F('gcd',a, b) - - @staticmethod - def lcm(a, b): - return F('lcm', a, b) - - # Comparison functions - - @staticmethod - def equals(a, b): - return F('equals', a, b) - - @staticmethod - def notEquals(a, b): - return F('notEquals', a, b) - - @staticmethod - def less(a, b): - return F('less', a, b) - - @staticmethod - def greater(a, b): - return F('greater', a, b) - - @staticmethod - def lessOrEquals(a, b): - return F('lessOrEquals', a, b) - - @staticmethod - def greaterOrEquals(a, b): - return F('greaterOrEquals', a, b) - - # Functions for working with dates and times - - @staticmethod - def toYear(d): - return F('toYear', d) - - @staticmethod - def toMonth(d): - return F('toMonth', d) - - @staticmethod - def toDayOfMonth(d): - return F('toDayOfMonth', d) - - @staticmethod - def toDayOfWeek(d): - return F('toDayOfWeek', d) - - @staticmethod - def toHour(d): - return F('toHour', d) - - @staticmethod - def toMinute(d): - return F('toMinute', d) - - @staticmethod - def toSecond(d): - return F('toSecond', d) - - @staticmethod - def toMonday(d): - return F('toMonday', d) - - @staticmethod - def toStartOfMonth(d): - return F('toStartOfMonth', d) - - @staticmethod - def toStartOfQuarter(d): - return F('toStartOfQuarter', d) - - @staticmethod - def toStartOfYear(d): - return F('toStartOfYear', d) - - @staticmethod - def toStartOfMinute(d): - return F('toStartOfMinute', d) - - @staticmethod - def toStartOfFiveMinute(d): - return F('toStartOfFiveMinute', d) - - @staticmethod - def toStartOfFifteenMinutes(d): - return F('toStartOfFifteenMinutes', d) - - @staticmethod - def toStartOfHour(d): - return F('toStartOfHour', d) - - @staticmethod - def toStartOfDay(d): - return F('toStartOfDay', d) - - @staticmethod - def toTime(d): - return F('toTime', d) - - @staticmethod - def toRelativeYearNum(d, timezone=''): - return F('toRelativeYearNum', d, timezone) - - @staticmethod - def toRelativeMonthNum(d, timezone=''): - return F('toRelativeMonthNum', d, timezone) - - @staticmethod - def toRelativeWeekNum(d, timezone=''): - return F('toRelativeWeekNum', d, timezone) - - @staticmethod - def toRelativeDayNum(d, timezone=''): - return F('toRelativeDayNum', d, timezone) - - @staticmethod - def toRelativeHourNum(d, timezone=''): - return F('toRelativeHourNum', d, timezone) - - @staticmethod - def toRelativeMinuteNum(d, timezone=''): - return F('toRelativeMinuteNum', d, timezone) - - @staticmethod - def toRelativeSecondNum(d, timezone=''): - return F('toRelativeSecondNum', d, timezone) - - @staticmethod - def now(): - return F('now') - - @staticmethod - def today(): - return F('today') - - @staticmethod - def yesterday(d): - return F('yesterday') - - @staticmethod - def timeSlot(d): - return F('timeSlot', d) - - @staticmethod - def timeSlots(start_time, duration): - return F('timeSlots', start_time, duration) - - @staticmethod - def formatDateTime(d, format, timezone=''): - return F('formatDateTime', d, format, timezone) - - class Q(object): AND_MODE = ' AND ' @@ -542,6 +256,7 @@ class QuerySet(object): self._order_by = [] self._q = [] self._fields = model_cls.fields().keys() + self._extra = {} self._limits = None self._distinct = False self._final = False @@ -590,6 +305,8 @@ class QuerySet(object): fields = '*' if self._fields: fields = comma_join('`%s`' % field for field in self._fields) + for name, func in self._extra.items(): + fields += ', %s AS %s' % (func.to_sql(), name) ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' limit = '\nLIMIT %d, %d' % self._limits if self._limits else '' final = ' FINAL' if self._final else '' @@ -645,11 +362,17 @@ class QuerySet(object): qs._fields = field_names return qs + def extra(self, **kwargs): + qs = copy(self) + qs._extra = kwargs + return qs + def filter(self, *q, **filter_fields): """ Returns a copy of this queryset that includes only rows matching the conditions. Add q object to query if it specified. """ + from infi.clickhouse_orm.funcs import F qs = copy(self) qs._q = list(self._q) for arg in q: diff --git a/tests/test_decimal_fields.py b/tests/test_decimal_fields.py index db87d62..e285f1c 100644 --- a/tests/test_decimal_fields.py +++ b/tests/test_decimal_fields.py @@ -13,15 +13,11 @@ class DecimalFieldsTest(unittest.TestCase): def setUp(self): self.database = Database('test-db') - self.database.add_setting('allow_experimental_decimal_type', 1) try: self.database.create_table(DecimalModel) except ServerError as e: - if 'Unknown setting' in e.message: - # This ClickHouse version does not support decimals yet - raise unittest.SkipTest(e.message) - else: - raise + # This ClickHouse version does not support decimals yet + raise unittest.SkipTest(e.message) def tearDown(self): self.database.drop_database() diff --git a/tests/test_funcs.py b/tests/test_funcs.py new file mode 100644 index 0000000..d3c1ab7 --- /dev/null +++ b/tests/test_funcs.py @@ -0,0 +1,261 @@ +import unittest +from .base_test_with_data import * +from .test_querysets import SampleModel +from datetime import date, datetime, tzinfo, timedelta +from infi.clickhouse_orm.database import ServerError + + +class FuncsTestCase(TestCaseWithData): + + def setUp(self): + super(FuncsTestCase, self).setUp() + self.database.insert(self._sample_data()) + + def _test_qs(self, qs, expected_count): + logger.info(qs.as_sql()) + count = 0 + for instance in qs: + count += 1 + logger.info('\t[%d]\t%s' % (count, instance.to_dict())) + self.assertEqual(count, expected_count) + self.assertEqual(qs.count(), expected_count) + + def _test_func(self, func, expected_value=None): + sql = 'SELECT %s AS value' % func.to_sql() + logger.info(sql) + result = list(self.database.select(sql)) + logger.info('\t==> %s', result[0].value) + if expected_value is not None: + self.assertEqual(result[0].value, expected_value) + + def test_func_to_sql(self): + # No args + self.assertEqual(F('func').to_sql(), 'func()') + # String args + self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')") + # Numeric args + self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)") + # Date args + self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))") + # Datetime args + self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))") + # Boolean args + self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)") + # Timezone args + self.assertEqual(F('func', pytz.utc).to_sql(), "func('UTC')") + self.assertEqual(F('func', pytz.timezone('Europe/Athens')).to_sql(), "func('Europe/Athens')") + # Null args + self.assertEqual(F('func', None).to_sql(), "func(NULL)") + # Fields as args + self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)") + # Funcs as args + self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') + # Iterables as args + x = [1, 'z', F('foo', 17)] + for y in [x, tuple(x), iter(x)]: + self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") + self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") + # Binary operator functions + self.assertEqual(F.plus(1, 2).to_sql(), "(1 + 2)") + self.assertEqual(F.lessOrEquals(1, 2).to_sql(), "(1 <= 2)") + + def test_filter_float_field(self): + qs = Person.objects_in(self.database) + # Height > 2 + self._test_qs(qs.filter(F.greater(Person.height, 2)), 0) + self._test_qs(qs.filter(Person.height > 2), 0) + # Height > 1.61 + self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96) + self._test_qs(qs.filter(Person.height > 1.61), 96) + # Height < 1.61 + self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4) + self._test_qs(qs.filter(Person.height < 1.61), 4) + + def test_filter_date_field(self): + qs = Person.objects_in(self.database) + # People born on the 30th + self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3) + self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3) + self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3) + # People born on Sunday + self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18) + self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18) + self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) + # People born on 1976-10-01 + self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) + self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) + self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) + + def test_func_as_field_value(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96) + self._test_qs(qs.exclude(birthday=F.today()), 100) + self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) + + def test_comparison_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + self._test_func(one > one, 0) + self._test_func(two > one, 1) + self._test_func(one >= two, 0) + self._test_func(one >= one, 1) + self._test_func(one < one, 0) + self._test_func(one < two, 1) + self._test_func(two <= one, 0) + self._test_func(one <= one, 1) + self._test_func(one == two, 0) + self._test_func(one == one, 1) + self._test_func(one != one, 0) + self._test_func(one != two, 1) + + def test_arithmetic_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + # + + self._test_func(one + two, 3) + self._test_func(one + 2, 3) + self._test_func(2 + one, 3) + # - + self._test_func(one - two, -1) + self._test_func(one - 2, -1) + self._test_func(1 - two, -1) + # * + self._test_func(one * two, 2) + self._test_func(one * 2, 2) + self._test_func(1 * two, 2) + # / + self._test_func(one / two, 0.5) + self._test_func(one / 2, 0.5) + self._test_func(1 / two, 0.5) + # % + self._test_func(one % two, 1) + self._test_func(one % 2, 1) + self._test_func(1 % two, 1) + # sign + self._test_func(-one, -1) + self._test_func(--one, 1) + self._test_func(+one, 1) + + def test_logical_operators(self): + one = F.plus(1, 0) + two = F.plus(1, 1) + # & + self._test_func(one & two, 1) + self._test_func(one & two, 1) + self._test_func(one & 0, 0) + self._test_func(0 & one, 0) + # | + self._test_func(one | two, 1) + self._test_func(one | 0, 1) + self._test_func(0 | one, 1) + # ^ + self._test_func(one ^ one, 0) + self._test_func(one ^ 0, 1) + self._test_func(0 ^ one, 1) + # ~ + self._test_func(~one, 0) + self._test_func(~~one, 1) + # compound + self._test_func(one & 0 | two, 1) + self._test_func(one & 0 & two, 0) + self._test_func(one & 0 | 0, 0) + self._test_func((one | 0) & two, 1) + + def test_date_functions(self): + d = date(2018, 12, 31) + dt = datetime(2018, 12, 31, 11, 22, 33) + self._test_func(F.toYear(d), 2018) + self._test_func(F.toYear(dt), 2018) + self._test_func(F.toMonth(d), 12) + self._test_func(F.toMonth(dt), 12) + self._test_func(F.toDayOfMonth(d), 31) + self._test_func(F.toDayOfMonth(dt), 31) + self._test_func(F.toDayOfWeek(d), 1) + self._test_func(F.toDayOfWeek(dt), 1) + self._test_func(F.toHour(dt), 11) + self._test_func(F.toMinute(dt), 22) + self._test_func(F.toSecond(dt), 33) + self._test_func(F.toMonday(d), d) + self._test_func(F.toMonday(dt), d) + self._test_func(F.toStartOfMonth(d), date(2018, 12, 1)) + self._test_func(F.toStartOfMonth(dt), date(2018, 12, 1)) + self._test_func(F.toStartOfQuarter(d), date(2018, 10, 1)) + self._test_func(F.toStartOfQuarter(dt), date(2018, 10, 1)) + self._test_func(F.toStartOfYear(d), date(2018, 1, 1)) + self._test_func(F.toStartOfYear(dt), date(2018, 1, 1)) + self._test_func(F.toStartOfMinute(dt), datetime(2018, 12, 31, 11, 22, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfFiveMinute(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toRelativeYearNum(dt), 2018) + self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018) + self._test_func(F.toRelativeMonthNum(dt), 2018 * 12 + 12) + self._test_func(F.toRelativeMonthNum(dt, 'Europe/Athens'), 2018 * 12 + 12) + self._test_func(F.toRelativeWeekNum(dt), 2557) + self._test_func(F.toRelativeWeekNum(dt, 'Europe/Athens'), 2557) + self._test_func(F.toRelativeDayNum(dt), 17896) + self._test_func(F.toRelativeDayNum(dt, 'Europe/Athens'), 17896) + self._test_func(F.toRelativeHourNum(dt), 429515) + self._test_func(F.toRelativeHourNum(dt, 'Europe/Athens'), 429515) + self._test_func(F.toRelativeMinuteNum(dt), 25770922) + self._test_func(F.toRelativeMinuteNum(dt, 'Europe/Athens'), 25770922) + self._test_func(F.toRelativeSecondNum(dt), 1546255353) + self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353) + self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) + self._test_func(F.today(), date.today()) + self._test_func(F.yesterday(), date.today() - timedelta(days=1)) + self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)]) + self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33') + self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33') + + def test_type_conversion_functions(self): + for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64): + self._test_func(f(17), 17) + self._test_func(f('17'), 17) + for f in (F.toUInt8OrZero, F.toUInt16OrZero, F.toUInt32OrZero, F.toUInt64OrZero, F.toInt8OrZero, F.toInt16OrZero, F.toInt32OrZero, F.toInt64OrZero, F.toFloat32OrZero, F.toFloat64OrZero): + self._test_func(f('17'), 17) + self._test_func(f('a'), 0) + for f in (F.toDecimal32, F.toDecimal64, F.toDecimal128): + self._test_func(f(17.17, 2), Decimal('17.17')) + self._test_func(f('17.17', 2), Decimal('17.17')) + self._test_func(F.toDate('2018-12-31'), date(2018, 12, 31)) + self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toString(123), '123') + self._test_func(F.toFixedString('123', 5), '123') + self._test_func(F.toStringCutToZero('123\0'), '123') + self._test_func(F.CAST(17, 'String'), '17') + + def test_string_functions(self): + self._test_func(F.empty(''), 1) + self._test_func(F.empty('x'), 0) + self._test_func(F.notEmpty(''), 0) + self._test_func(F.notEmpty('x'), 1) + self._test_func(F.length('x'), 1) + self._test_func(F.lengthUTF8('x'), 1) + self._test_func(F.lower('Ab'), 'ab') + self._test_func(F.upper('Ab'), 'AB') + self._test_func(F.lowerUTF8('Ab'), 'ab') + self._test_func(F.upperUTF8('Ab'), 'AB') + self._test_func(F.reverse('Ab'), 'bA') + self._test_func(F.reverseUTF8('Ab'), 'bA') + self._test_func(F.concat('Ab', 'Cd', 'Ef'), 'AbCdEf') + self._test_func(F.substring('123456', 3, 2), '34') + self._test_func(F.substringUTF8('123456', 3, 2), '34') + self._test_func(F.appendTrailingCharIfAbsent('Hello', '!'), 'Hello!') + self._test_func(F.appendTrailingCharIfAbsent('Hello!', '!'), 'Hello!') + self._test_func(F.convertCharset(F.convertCharset('Hello', 'latin1', 'utf16'), 'utf16', 'latin1'), 'Hello') + + def test_base64_functions(self): + try: + self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello') + self._test_func(F.tryBase64Decode(F.base64Encode('Hello')), 'Hello') + self._test_func(F.tryBase64Decode('zzz'), '') + except ServerError as e: + # ClickHouse version that doesn't support these functions + raise unittest.SkipTest(e.message) diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 4858e03..de31c0f 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals, print_function import unittest from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.query import Q, F +from infi.clickhouse_orm.query import Q +from infi.clickhouse_orm.funcs import F from .base_test_with_data import * -from datetime import date, datetime +from datetime import date, datetime, timedelta from logging import getLogger logger = getLogger('tests') @@ -432,136 +433,6 @@ class AggregateTestCase(TestCaseWithData): self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') -class FuncsTestCase(TestCaseWithData): - - def setUp(self): - super(FuncsTestCase, self).setUp() - self.database.insert(self._sample_data()) - - def _test_qs(self, qs, expected_count): - logger.info(qs.as_sql()) - count = 0 - for instance in qs: - count += 1 - logger.info('\t[%d]\t%s' % (count, instance.to_dict())) - self.assertEqual(count, expected_count) - self.assertEqual(qs.count(), expected_count) - - def _test_func(self, func, expected_value=None): - sql = 'SELECT %s AS value' % func.to_sql() - logger.info(sql) - result = list(self.database.select(sql)) - logger.info('\t==> %s', result[0].value) - if expected_value is not None: - self.assertEqual(result[0].value, expected_value) - - def test_func_to_sql(self): - # No args - self.assertEqual(F('func').to_sql(), 'func()') - # String args - self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')") - # Numeric args - self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)") - # Date args - self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func('2018-12-31')") - # Datetime args - self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func('1546214400')") - # Boolean args - self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)") - # Null args - self.assertEqual(F('func', None).to_sql(), "func(NULL)") - # Fields as args - self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)") - # Funcs as args - self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') - # Iterables as args - x = [1, 'z', F('foo', 17)] - for y in [x, tuple(x), iter(x)]: - self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") - self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") - - def test_filter_float_field(self): - qs = Person.objects_in(self.database) - # Height > 2 - self._test_qs(qs.filter(F.greater(Person.height, 2)), 0) - self._test_qs(qs.filter(Person.height > 2), 0) - # Height > 1.61 - self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96) - self._test_qs(qs.filter(Person.height > 1.61), 96) - # Height < 1.61 - self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4) - self._test_qs(qs.filter(Person.height < 1.61), 4) - - def test_filter_date_field(self): - qs = Person.objects_in(self.database) - # People born on the 30th - self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3) - self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3) - self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3) - # People born on Sunday - self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18) - self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18) - self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) - # People born on 1976-10-01 - self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) - self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) - self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) - - def test_func_as_field_value(self): - qs = Person.objects_in(self.database) - self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96) - self._test_qs(qs.exclude(birthday=F.today()), 100) - self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) - - def test_comparison_operators(self): - one = F.plus(1, 0) - two = F.plus(1, 1) - self._test_func(one > one, 0) - self._test_func(two > one, 1) - self._test_func(one >= two, 0) - self._test_func(one >= one, 1) - self._test_func(one < one, 0) - self._test_func(one < two, 1) - self._test_func(two <= one, 0) - self._test_func(one <= one, 1) - self._test_func(one == two, 0) - self._test_func(one == one, 1) - self._test_func(one != one, 0) - self._test_func(one != two, 1) - - def test_arithmetic_operators(self): - one = F.plus(1, 0) - two = F.plus(1, 1) - # + - self._test_func(one + two, 3) - self._test_func(one + 2, 3) - self._test_func(2 + one, 3) - # - - self._test_func(one - two, -1) - self._test_func(one - 2, -1) - self._test_func(1 - two, -1) - # * - self._test_func(one * two, 2) - self._test_func(one * 2, 2) - self._test_func(1 * two, 2) - # / - self._test_func(one / two, 0.5) - self._test_func(one / 2, 0.5) - self._test_func(1 / two, 0.5) - # % - self._test_func(one % two, 1) - self._test_func(one % 2, 1) - self._test_func(1 % two, 1) - # sign - self._test_func(-one, -1) - self._test_func(--one, 1) - self._test_func(+one, 1) - - - - - - Color = Enum('Color', u'red blue green yellow brown white black') From 8a21e028628fc9e323339088c69c00e4ea3f58bf Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 13 Jul 2019 22:54:16 +0300 Subject: [PATCH 04/41] Functions WIP --- src/infi/clickhouse_orm/funcs.py | 623 +++++++++++++++++++++++++++++++ tests/test_funcs.py | 187 ++++++++++ 2 files changed, 810 insertions(+) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index fadda4a..29c3728 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -576,3 +576,626 @@ class F(Cond, FunctionOperatorsMixin): def tryBase64Decode(s): return F('tryBase64Decode', s) + # Functions for searching and replacing in strings + + @staticmethod + def replace(haystack, pattern, replacement): + return F('replace', haystack, pattern, replacement) + replaceAll = replace + + @staticmethod + def replaceAll(haystack, pattern, replacement): + return F('replaceAll', haystack, pattern, replacement) + + @staticmethod + def replaceOne(haystack, pattern, replacement): + return F('replaceOne', haystack, pattern, replacement) + + @staticmethod + def replaceRegexpAll(haystack, pattern, replacement): + return F('replaceRegexpAll', haystack, pattern, replacement) + + @staticmethod + def replaceRegexpOne(haystack, pattern, replacement): + return F('replaceRegexpOne', haystack, pattern, replacement) + + @staticmethod + def regexpQuoteMeta(x): + return F('regexpQuoteMeta', x) + + # Mathematical functions + + @staticmethod + def e(): + return F('e') + + @staticmethod + def pi(): + return F('pi') + + @staticmethod + def exp(x): + return F('exp', x) + + @staticmethod + def log(x): + return F('log', x) + ln = log + + @staticmethod + def exp2(x): + return F('exp2', x) + + @staticmethod + def log2(x): + return F('log2', x) + + @staticmethod + def exp10(x): + return F('exp10', x) + + @staticmethod + def log10(x): + return F('log10', x) + + @staticmethod + def sqrt(x): + return F('sqrt', x) + + @staticmethod + def cbrt(x): + return F('cbrt', x) + + @staticmethod + def erf(x): + return F('erf', x) + + @staticmethod + def erfc(x): + return F('erfc', x) + + @staticmethod + def lgamma(x): + return F('lgamma', x) + + @staticmethod + def tgamma(x): + return F('tgamma', x) + + @staticmethod + def sin(x): + return F('sin', x) + + @staticmethod + def cos(x): + return F('cos', x) + + @staticmethod + def tan(x): + return F('tan', x) + + @staticmethod + def asin(x): + return F('asin', x) + + @staticmethod + def acos(x): + return F('acos', x) + + @staticmethod + def atan(x): + return F('atan', x) + + @staticmethod + def power(x, y): + return F('power', x, y) + pow = power + + @staticmethod + def intExp10(x): + return F('intExp10', x) + + @staticmethod + def intExp2(x): + return F('intExp2', x) + + # Rounding functions + + @staticmethod + def floor(x, n=None): + return F('floor', x, n) if n else F('floor', x) + + @staticmethod + def ceiling(x, n=None): + return F('ceiling', x, n) if n else F('ceiling', x) + ceil = ceiling + + @staticmethod + def round(x, n=None): + return F('round', x, n) if n else F('round', x) + + @staticmethod + def roundAge(x): + return F('roundAge', x) + + @staticmethod + def roundDown(x, y): + return F('roundDown', x, y) + + @staticmethod + def roundDuration(x): + return F('roundDuration', x) + + @staticmethod + def roundToExp2(x): + return F('roundToExp2', x) + + # Functions for working with arrays + + @staticmethod + def emptyArrayDate(): + return F('emptyArrayDate') + + @staticmethod + def emptyArrayDateTime(): + return F('emptyArrayDateTime') + + @staticmethod + def emptyArrayFloat32(): + return F('emptyArrayFloat32') + + @staticmethod + def emptyArrayFloat64(): + return F('emptyArrayFloat64') + + @staticmethod + def emptyArrayInt16(): + return F('emptyArrayInt16') + + @staticmethod + def emptyArrayInt32(): + return F('emptyArrayInt32') + + @staticmethod + def emptyArrayInt64(): + return F('emptyArrayInt64') + + @staticmethod + def emptyArrayInt8(): + return F('emptyArrayInt8') + + @staticmethod + def emptyArrayString(): + return F('emptyArrayString') + + @staticmethod + def emptyArrayUInt16(): + return F('emptyArrayUInt16') + + @staticmethod + def emptyArrayUInt32(): + return F('emptyArrayUInt32') + + @staticmethod + def emptyArrayUInt64(): + return F('emptyArrayUInt64') + + @staticmethod + def emptyArrayUInt8(): + return F('emptyArrayUInt8') + + @staticmethod + def emptyArrayToSingle(x): + return F('emptyArrayToSingle', x) + + @staticmethod + def range(n): + return F('range', n) + + @staticmethod + def array(*args): + return F('array', *args) + + @staticmethod + def arrayConcat(*args): + return F('arrayConcat', *args) + + @staticmethod + def arrayElement(arr, n): + return F('arrayElement', arr, n) + + @staticmethod + def has(arr, x): + return F('has', arr, x) + + @staticmethod + def hasAll(arr, x): + return F('hasAll', arr, x) + + @staticmethod + def hasAny(arr, x): + return F('hasAny', arr, x) + + @staticmethod + def indexOf(arr, x): + return F('indexOf', arr, x) + + @staticmethod + def countEqual(arr, x): + return F('countEqual', arr, x) + + @staticmethod + def arrayEnumerate(arr): + return F('arrayEnumerate', arr) + + @staticmethod + def arrayEnumerateDense(*args): + return F('arrayEnumerateDense', *args) + + @staticmethod + def arrayEnumerateDenseRanked(*args): + return F('arrayEnumerateDenseRanked', *args) + + @staticmethod + def arrayEnumerateUniq(*args): + return F('arrayEnumerateUniq', *args) + + @staticmethod + def arrayEnumerateUniqRanked(*args): + return F('arrayEnumerateUniqRanked', *args) + + @staticmethod + def arrayPopBack(arr): + return F('arrayPopBack', arr) + + @staticmethod + def arrayPopFront(arr): + return F('arrayPopFront', arr) + + @staticmethod + def arrayPushBack(arr, x): + return F('arrayPushBack', arr, x) + + @staticmethod + def arrayPushFront(arr, x): + return F('arrayPushFront', arr, x) + + @staticmethod + def arrayResize(array, size, extender=None): + return F('arrayResize',array, size, extender) if extender is not None else F('arrayResize', array, size) + + @staticmethod + def arraySlice(array, offset, length=None): + return F('arraySlice',array, offset, length) if length is not None else F('arraySlice', array, offset) + + @staticmethod + def arrayUniq(*args): + return F('arrayUniq', *args) + + @staticmethod + def arrayJoin(arr): + return F('arrayJoin', arr) + + @staticmethod + def arrayDifference(arr): + return F('arrayDifference', arr) + + @staticmethod + def arrayDistinct(x): + return F('arrayDistinct', x) + + @staticmethod + def arrayIntersect(*args): + return F('arrayIntersect', *args) + + @staticmethod + def arrayReduce(agg_func_name, *args): + return F('arrayReduce', agg_func_name, *args) + + @staticmethod + def arrayReverse(arr): + return F('arrayReverse', arr) + + # Functions for splitting and merging strings and arrays + + @staticmethod + def splitByChar(sep, s): + return F('splitByChar', sep, s) + + @staticmethod + def splitByString(sep, s): + return F('splitByString', sep, s) + + @staticmethod + def arrayStringConcat(arr, sep=None): + return F('arrayStringConcat', arr, sep) if sep else F('arrayStringConcat', arr) + + @staticmethod + def alphaTokens(s): + return F('alphaTokens', s) + + # Bit functions + + @staticmethod + def bitAnd(x, y): + return F('bitAnd', x, y) + + @staticmethod + def bitNot(x): + return F('bitNot', x) + + @staticmethod + def bitOr(x, y): + return F('bitOr', x, y) + + @staticmethod + def bitRotateLeft(x, y): + return F('bitRotateLeft', x, y) + + @staticmethod + def bitRotateRight(x, y): + return F('bitRotateRight', x, y) + + @staticmethod + def bitShiftLeft(x, y): + return F('bitShiftLeft', x, y) + + @staticmethod + def bitShiftRight(x, y): + return F('bitShiftRight', x, y) + + @staticmethod + def bitTest(x, y): + return F('bitTest', x, y) + + @staticmethod + def bitTestAll(x, *args): + return F('bitTestAll', x, *args) + + @staticmethod + def bitTestAny(x, *args): + return F('bitTestAny', x, *args) + + @staticmethod + def bitXor(x, y): + return F('bitXor', x, y) + + # Bitmap functions + + @staticmethod + def bitmapAnd(x, y): + return F('bitmapAnd', x, y) + + @staticmethod + def bitmapAndCardinality(x, y): + return F('bitmapAndCardinality', x, y) + + @staticmethod + def bitmapAndnot(x, y): + return F('bitmapAndnot', x, y) + + @staticmethod + def bitmapAndnotCardinality(x, y): + return F('bitmapAndnotCardinality', x, y) + + @staticmethod + def bitmapBuild(x): + return F('bitmapBuild', x) + + @staticmethod + def bitmapCardinality(x): + return F('bitmapCardinality', x) + + @staticmethod + def bitmapContains(haystack, needle): + return F('bitmapContains', haystack, needle) + + @staticmethod + def bitmapHasAll(x, y): + return F('bitmapHasAll', x, y) + + @staticmethod + def bitmapHasAny(x, y): + return F('bitmapHasAny', x, y) + + @staticmethod + def bitmapOr(x, y): + return F('bitmapOr', x, y) + + @staticmethod + def bitmapOrCardinality(x, y): + return F('bitmapOrCardinality', x, y) + + @staticmethod + def bitmapToArray(x): + return F('bitmapToArray', x) + + @staticmethod + def bitmapXor(x, y): + return F('bitmapXor', x, y) + + @staticmethod + def bitmapXorCardinality(x, y): + return F('bitmapXorCardinality', x, y) + + # Hash functions + + @staticmethod + def halfMD5(*args): + return F('halfMD5', *args) + + @staticmethod + def MD5(s): + return F('MD5', s) + + @staticmethod + def sipHash128(*args): + return F('sipHash128', *args) + + @staticmethod + def sipHash64(*args): + return F('sipHash64', *args) + + @staticmethod + def cityHash64(*args): + return F('cityHash64', *args) + + @staticmethod + def intHash32(x): + return F('intHash32', x) + + @staticmethod + def intHash64(x): + return F('intHash64', x) + + @staticmethod + def SHA1(s): + return F('SHA1', s) + + @staticmethod + def SHA224(s): + return F('SHA224', s) + + @staticmethod + def SHA256(s): + return F('SHA256', s) + + @staticmethod + def URLHash(url, n=None): + return F('URLHash', url, n) if n is not None else F('URLHash', url) + + @staticmethod + def farmHash64(*args): + return F('farmHash64',*args) + + @staticmethod + def javaHash(s): + return F('javaHash', s) + + @staticmethod + def hiveHash(s): + return F('hiveHash', s) + + @staticmethod + def metroHash64(*args): + return F('metroHash64', *args) + + @staticmethod + def jumpConsistentHash(x, buckets): + return F('jumpConsistentHash', x, buckets) + + @staticmethod + def murmurHash2_32(*args): + return F('murmurHash2_32', *args) + + @staticmethod + def murmurHash2_64(*args): + return F('murmurHash2_64', *args) + + @staticmethod + def murmurHash3_32(*args): + return F('murmurHash3_32', *args) + + @staticmethod + def murmurHash3_64(*args): + return F('murmurHash3_64', *args) + + @staticmethod + def murmurHash3_128(s): + return F('murmurHash3_128', s) + + @staticmethod + def xxHash32(*args): + return F('xxHash32', *args) + + @staticmethod + def xxHash64(*args): + return F('xxHash64', *args) + + # Functions for generating pseudo-random numbers + + @staticmethod + def rand(dummy=None): + return F('rand') if dummy is None else F('rand', dummy) + + @staticmethod + def rand64(dummy=None): + return F('rand64') if dummy is None else F('rand64', dummy) + + @staticmethod + def randConstant(dummy=None): + return F('randConstant') if dummy is None else F('randConstant', dummy) + + # Encoding functions + + @staticmethod + def hex(x): + return F('hex', x) + + @staticmethod + def unhex(x): + return F('unhex', x) + + @staticmethod + def UUIDNumToString(s): + return F('UUIDNumToString', s) + + @staticmethod + def UUIDStringToNum(s): + return F('UUIDStringToNum', s) + + @staticmethod + def bitmaskToArray(x): + return F('bitmaskToArray', x) + + @staticmethod + def bitmaskToList(x): + return F('bitmaskToList', x) + + + + + + + + # Higher-order functions + + # arrayMap: Function arrayMap needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + + @staticmethod + def arrayCount(*args): + return F('arrayCount', *args) + + @staticmethod + def arraySum(*args): + return F('arraySum', *args) + + @staticmethod + def arrayExists(*args): + return F('arrayExists', *args) + + @staticmethod + def arrayAll(*args): + return F('arrayAll', *args) + + # arrayFilter: Function arrayFilter needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + + # arrayFirst: Function arrayFirst needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + + # arrayFirstIndex: Function arrayFirstIndex needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + + @staticmethod + def arrayCumSum(*args): + return F('arrayCumSum', *args) + + @staticmethod + def arrayCumSumNonNegative(*args): + return F('arrayCumSumNonNegative', *args) + + @staticmethod + def arraySort(*args): + return F('arraySort', *args) + + @staticmethod + def arrayReverseSort(*args): + return F('arrayReverseSort', *args) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 573c6d6..32170ce 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -27,6 +27,7 @@ class FuncsTestCase(TestCaseWithData): logger.info('\t==> %s', result[0].value if result else '') if expected_value is not None: self.assertEqual(result[0].value, expected_value) + return result[0].value if result else None def test_func_to_sql(self): # No args @@ -259,3 +260,189 @@ class FuncsTestCase(TestCaseWithData): except ServerError as e: # ClickHouse version that doesn't support these functions raise unittest.SkipTest(e.message) + + def test_replace_functions(self): + haystack = 'hello' + self._test_func(F.replace(haystack, 'l', 'L'), 'heLLo') + self._test_func(F.replaceAll(haystack, 'l', 'L'), 'heLLo') + self._test_func(F.replaceOne(haystack, 'l', 'L'), 'heLlo') + self._test_func(F.replaceRegexpAll(haystack, '[eo]', 'X'), 'hXllX') + self._test_func(F.replaceRegexpOne(haystack, '[eo]', 'X'), 'hXllo') + self._test_func(F.regexpQuoteMeta('[eo]'), '\\[eo\\]') + + def test_math_functions(self): + x = 17 + y = 3 + self._test_func(F.e()) + self._test_func(F.pi()) + self._test_func(F.exp(x)) + self._test_func(F.exp10(x)) + self._test_func(F.exp2(x)) + self._test_func(F.log(x)) + self._test_func(F.log10(x)) + self._test_func(F.log2(x)) + self._test_func(F.ln(x)) + self._test_func(F.sqrt(x)) + self._test_func(F.cbrt(x)) + self._test_func(F.erf(x)) + self._test_func(F.erfc(x)) + self._test_func(F.lgamma(x)) + self._test_func(F.tgamma(x)) + self._test_func(F.sin(x)) + self._test_func(F.cos(x)) + self._test_func(F.tan(x)) + self._test_func(F.asin(x)) + self._test_func(F.acos(x)) + self._test_func(F.atan(x)) + self._test_func(F.pow(x, y)) + self._test_func(F.power(x, y)) + self._test_func(F.intExp10(x)) + self._test_func(F.intExp2(x)) + + def test_rounding_functions(self): + x = 22.22222 + n = 3 + self._test_func(F.floor(x), 22) + self._test_func(F.floor(x, n), 22.222) + self._test_func(F.ceil(x), 23) + self._test_func(F.ceil(x, n), 22.223) + self._test_func(F.ceiling(x), 23) + self._test_func(F.ceiling(x, n), 22.223) + self._test_func(F.round(x), 22) + self._test_func(F.round(x, n), 22.222) + self._test_func(F.roundAge(x), 18) + self._test_func(F.roundDown(x, [10, 20, 30]), 20) + self._test_func(F.roundDuration(x), 10) + self._test_func(F.roundToExp2(x), 16) + + def test_array_functions(self): + arr = [1, 2, 3] + self._test_func(F.emptyArrayDate()) + self._test_func(F.emptyArrayDateTime()) + self._test_func(F.emptyArrayFloat32()) + self._test_func(F.emptyArrayFloat64()) + self._test_func(F.emptyArrayInt16()) + self._test_func(F.emptyArrayInt32()) + self._test_func(F.emptyArrayInt64()) + self._test_func(F.emptyArrayInt8()) + self._test_func(F.emptyArrayString()) + self._test_func(F.emptyArrayToSingle(F.emptyArrayInt16()), [0]) + self._test_func(F.emptyArrayUInt16()) + self._test_func(F.emptyArrayUInt32()) + self._test_func(F.emptyArrayUInt64()) + self._test_func(F.emptyArrayUInt8()) + self._test_func(F.range(7), list(range(7))) + self._test_func(F.array(*arr), arr) + self._test_func(F.arrayConcat([1, 2], [3]), arr) + self._test_func(F.arrayElement([10, 20, 30], 2), 20) + self._test_func(F.has(arr, 2), 1) + self._test_func(F.hasAll(arr, [1, 7]), 0) + self._test_func(F.hasAny(arr, [1, 7]), 1) + self._test_func(F.indexOf(arr, 3), 3) + self._test_func(F.countEqual(arr, 2), 1) + self._test_func(F.arrayEnumerate(arr)) + self._test_func(F.arrayEnumerateDense(arr)) + self._test_func(F.arrayEnumerateDenseRanked(arr)) + self._test_func(F.arrayEnumerateUniq(arr)) + self._test_func(F.arrayEnumerateUniqRanked(arr)) + self._test_func(F.arrayPopBack(arr), [1, 2]) + self._test_func(F.arrayPopFront(arr), [2, 3]) + self._test_func(F.arrayPushBack(arr, 7), arr + [7]) + self._test_func(F.arrayPushFront(arr, 7), [7] + arr) + self._test_func(F.arrayResize(arr, 5), [1, 2, 3, 0, 0]) + self._test_func(F.arrayResize(arr, 5, 9), [1, 2, 3, 9, 9]) + self._test_func(F.arraySlice(arr, 2), [2, 3]) + self._test_func(F.arraySlice(arr, 2, 1), [2]) + self._test_func(F.arrayUniq(arr + arr), 3) + self._test_func(F.arrayJoin(arr)) + self._test_func(F.arrayDifference(arr), [0, 1, 1]) + self._test_func(F.arrayDistinct(arr + arr), arr) + self._test_func(F.arrayIntersect(arr, [3, 4]), [3]) + self._test_func(F.arrayReduce('min', arr), 1) + self._test_func(F.arrayReverse(arr), [3, 2, 1]) + + def test_split_and_merge_functions(self): + self._test_func(F.splitByChar('_', 'a_b_c'), ['a', 'b', 'c']) + self._test_func(F.splitByString('__', 'a__b__c'), ['a', 'b', 'c']) + self._test_func(F.arrayStringConcat(['a', 'b', 'c']), 'abc') + self._test_func(F.arrayStringConcat(['a', 'b', 'c'], '_'), 'a_b_c') + self._test_func(F.alphaTokens('aaa.bbb.111'), ['aaa', 'bbb']) + + def test_bit_functions(self): + x = 17 + y = 4 + z = 5 + self._test_func(F.bitAnd(x, y)) + self._test_func(F.bitNot(x)) + self._test_func(F.bitOr(x, y)) + self._test_func(F.bitRotateLeft(x, y)) + self._test_func(F.bitRotateRight(x, y)) + self._test_func(F.bitShiftLeft(x, y)) + self._test_func(F.bitShiftRight(x, y)) + self._test_func(F.bitTest(x, y)) + self._test_func(F.bitTestAll(x, y)) + self._test_func(F.bitTestAll(x, y, z)) + self._test_func(F.bitTestAny(x, y)) + self._test_func(F.bitTestAny(x, y, z)) + self._test_func(F.bitXor(x, y)) + + def test_bitmap_functions(self): + self._test_func(F.bitmapToArray(F.bitmapBuild([1, 2, 3])), [1, 2, 3]) + self._test_func(F.bitmapContains(F.bitmapBuild([1, 5, 7, 9]), F.toUInt32(9)), 1) + self._test_func(F.bitmapHasAny(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 1) + self._test_func(F.bitmapHasAll(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 0) + self._test_func(F.bitmapToArray(F.bitmapAnd(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [3]) + self._test_func(F.bitmapToArray(F.bitmapOr(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 3, 4, 5]) + self._test_func(F.bitmapToArray(F.bitmapXor(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 4, 5]) + self._test_func(F.bitmapToArray(F.bitmapAndnot(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2]) + self._test_func(F.bitmapCardinality(F.bitmapBuild([1, 2, 3, 4, 5])), 5) + self._test_func(F.bitmapAndCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 1) + self._test_func(F.bitmapOrCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 5) + self._test_func(F.bitmapXorCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 4) + self._test_func(F.bitmapAndnotCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 2) + + def test_hash_functions(self): + args = ['x', 'y', 'z'] + x = 17 + s = 'hello' + url = 'http://example.com/a/b/c/d' + self._test_func(F.hex(F.halfMD5(*args))) + self._test_func(F.hex(F.MD5(s))) + self._test_func(F.hex(F.sipHash64(*args))) + self._test_func(F.hex(F.sipHash128(s))) + self._test_func(F.hex(F.cityHash64(*args))) + self._test_func(F.hex(F.intHash32(x))) + self._test_func(F.hex(F.intHash64(x))) + self._test_func(F.hex(F.SHA1(s))) + self._test_func(F.hex(F.SHA224(s))) + self._test_func(F.hex(F.SHA256(s))) + self._test_func(F.hex(F.URLHash(url))) + self._test_func(F.hex(F.URLHash(url, 3))) + self._test_func(F.hex(F.farmHash64(*args))) + self._test_func(F.javaHash(s)) + self._test_func(F.hiveHash(s)) + self._test_func(F.hex(F.metroHash64(*args))) + self._test_func(F.jumpConsistentHash(x, 3)) + self._test_func(F.hex(F.murmurHash2_32(*args))) + self._test_func(F.hex(F.murmurHash2_64(*args))) + self._test_func(F.hex(F.murmurHash3_32(*args))) + self._test_func(F.hex(F.murmurHash3_64(*args))) + self._test_func(F.hex(F.murmurHash3_128(s))) + self._test_func(F.hex(F.xxHash32(*args))) + self._test_func(F.hex(F.xxHash64(*args))) + + def test_rand_functions(self): + self._test_func(F.rand()) + self._test_func(F.rand(17)) + self._test_func(F.rand64()) + self._test_func(F.rand64(17)) + self._test_func(F.randConstant()) + self._test_func(F.randConstant(17)) + + def test_encoding_functions(self): + uuid = '123e4567-e89b-12d3-a456-426655440000' + self._test_func(F.hex(F.unhex('0FA1')), '0FA1') + self._test_func(F.UUIDNumToString(F.UUIDStringToNum(uuid)), uuid) + self._test_func(F.bitmaskToArray(17)) + self._test_func(F.bitmaskToList(18)) + From cc0f2c4e91cfb9bfac86815eed78a6ff5f49f1ad Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 13 Jul 2019 23:01:37 +0300 Subject: [PATCH 05/41] Functions WIP --- src/infi/clickhouse_orm/funcs.py | 24 ++++++++++++++++-------- tests/test_funcs.py | 9 +++++++-- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 29c3728..bac17ba 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1136,14 +1136,6 @@ class F(Cond, FunctionOperatorsMixin): def unhex(x): return F('unhex', x) - @staticmethod - def UUIDNumToString(s): - return F('UUIDNumToString', s) - - @staticmethod - def UUIDStringToNum(s): - return F('UUIDStringToNum', s) - @staticmethod def bitmaskToArray(x): return F('bitmaskToArray', x) @@ -1152,7 +1144,23 @@ class F(Cond, FunctionOperatorsMixin): def bitmaskToList(x): return F('bitmaskToList', x) + # Functions for working with UUID + @staticmethod + def generateUUIDv4(): + return F('generateUUIDv4') + + @staticmethod + def toUUID(s): + return F('toUUID', s) + + @staticmethod + def UUIDNumToString(s): + return F('UUIDNumToString', s) + + @staticmethod + def UUIDStringToNum(s): + return F('UUIDStringToNum', s) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 32170ce..37844ed 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -440,9 +440,14 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.randConstant(17)) def test_encoding_functions(self): - uuid = '123e4567-e89b-12d3-a456-426655440000' self._test_func(F.hex(F.unhex('0FA1')), '0FA1') - self._test_func(F.UUIDNumToString(F.UUIDStringToNum(uuid)), uuid) self._test_func(F.bitmaskToArray(17)) self._test_func(F.bitmaskToList(18)) + def test_uuid_functions(self): + from uuid import UUID + uuid = self._test_func(F.generateUUIDv4()) + self.assertEqual(type(uuid), UUID) + s = str(uuid) + self._test_func(F.toUUID(s), uuid) + self._test_func(F.UUIDNumToString(F.UUIDStringToNum(s)), s) From 969070f1ae77c9a7b00ff361a850f24d41d4703a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 27 Oct 2019 19:47:59 +0200 Subject: [PATCH 06/41] - Drop py2.7 support - Add ipv4/6 fields and funcs - Support funcs as alias/materialized expressions --- buildout.cfg | 2 +- docs/field_types.md | 54 ++++++------- docs/index.md | 2 +- src/infi/clickhouse_orm/fields.py | 51 ++++++++++-- src/infi/clickhouse_orm/funcs.py | 130 ++++++++++++++++++++++++++++-- src/infi/clickhouse_orm/utils.py | 4 + tests/test_alias_fields.py | 6 +- tests/test_funcs.py | 63 ++++++++++++++- tests/test_ip_fields.py | 65 +++++++++++++++ tests/test_materialized_fields.py | 4 +- tests/test_querysets.py | 4 +- 11 files changed, 334 insertions(+), 51 deletions(-) create mode 100644 tests/test_ip_fields.py diff --git a/buildout.cfg b/buildout.cfg index 67f93bb..39503b3 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -31,7 +31,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm [isolated-python] recipe = infi.recipe.python -version = v2.7.12.4 +version = v3.7.0.4 [setup.py] recipe = infi.recipe.template.version diff --git a/docs/field_types.md b/docs/field_types.md index c1ceb40..eae9b46 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -5,31 +5,33 @@ See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/) Currently the following field types are supported: -| Class | DB Type | Pythonic Type | Comments -| ------------------ | ---------- | ------------------- | ----------------------------------------------------- -| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse -| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse -| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31 -| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC -| Int8Field | Int8 | int | Range -128 to 127 -| Int16Field | Int16 | int | Range -32768 to 32767 -| Int32Field | Int32 | int | Range -2147483648 to 2147483647 -| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807 -| UInt8Field | UInt8 | int | Range 0 to 255 -| UInt16Field | UInt16 | int | Range 0 to 65535 -| UInt32Field | UInt32 | int | Range 0 to 4294967295 -| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 -| Float32Field | Float32 | float | -| Float64Field | Float64 | float | -| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field -| Decimal32Field | Decimal32 | Decimal | Ditto -| Decimal64Field | Decimal64 | Decimal | Ditto -| Decimal128Field | Decimal128 | Decimal | Ditto -| UUIDField | UUID | Decimal | -| Enum8Field | Enum8 | Enum | See below -| Enum16Field | Enum16 | Enum | See below -| ArrayField | Array | list | See below -| NullableField | Nullable | See below | See below +| Class | DB Type | Pythonic Type | Comments +| ------------------ | ---------- | --------------------- | ----------------------------------------------------- +| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse +| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse +| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31 +| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC +| Int8Field | Int8 | int | Range -128 to 127 +| Int16Field | Int16 | int | Range -32768 to 32767 +| Int32Field | Int32 | int | Range -2147483648 to 2147483647 +| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807 +| UInt8Field | UInt8 | int | Range 0 to 255 +| UInt16Field | UInt16 | int | Range 0 to 65535 +| UInt32Field | UInt32 | int | Range 0 to 4294967295 +| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 +| Float32Field | Float32 | float | +| Float64Field | Float64 | float | +| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field +| Decimal32Field | Decimal32 | Decimal | Ditto +| Decimal64Field | Decimal64 | Decimal | Ditto +| Decimal128Field | Decimal128 | Decimal | Ditto +| UUIDField | UUID | uuid.UUID | +| IPv4Field | IPv4 | ipaddress.IPv4Address | +| IPv6Field | IPv6 | ipaddress.IPv6Address | +| Enum8Field | Enum8 | Enum | See below +| Enum16Field | Enum16 | Enum | See below +| ArrayField | Array | list | See below +| NullableField | Nullable | See below | See below DateTimeField and Time Zones ---------------------------- @@ -51,8 +53,6 @@ Working with enum fields `Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member. -Python 3.4 and higher supports Enums natively. When using previous Python versions you need to install the enum34 library. - Example of a model with an enum field: ```python diff --git a/docs/index.md b/docs/index.md index d6df2c1..1c7ead1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,7 +3,7 @@ Overview This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it. -It was tested on Python 2.7 and 3.5. +Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+. Installation ------------ diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index ee35945..7df6504 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -7,11 +7,13 @@ from calendar import timegm from decimal import Decimal, localcontext from uuid import UUID from logging import getLogger -from .utils import escape, parse_array, comma_join +from .utils import escape, parse_array, comma_join, string_or_func from .funcs import F, FunctionOperatorsMixin +from ipaddress import IPv4Address, IPv6Address logger = getLogger('clickhouse_orm') + class Field(FunctionOperatorsMixin): ''' Abstract base class for all field types. @@ -25,10 +27,10 @@ class Field(FunctionOperatorsMixin): def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ "Only one of default, alias and materialized parameters can be given" - assert alias is None or isinstance(alias, string_types) and alias != "",\ - "Alias field must be a string, if given" - assert materialized is None or isinstance(materialized, string_types) and materialized != "",\ - "Materialized field must be string, if given" + assert alias is None or isinstance(alias, F) or isinstance(alias, string_types) and alias != "",\ + "Alias parameter must be a string or function object, if given" + assert materialized is None or isinstance(materialized, F) or isinstance(materialized, string_types) and materialized != "",\ + "Materialized parameter must be a string or function object, if given" assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given" assert codec is None or isinstance(codec, string_types) and codec != "", \ "Codec field must be string, if given" @@ -85,9 +87,9 @@ class Field(FunctionOperatorsMixin): def _extra_params(self, db): sql = '' if self.alias: - sql += ' ALIAS %s' % self.alias + sql += ' ALIAS %s' % string_or_func(self.alias) elif self.materialized: - sql += ' MATERIALIZED %s' % self.materialized + sql += ' MATERIALIZED %s' % string_or_func(self.materialized) elif self.default: default = self.to_db_string(self.default) sql += ' DEFAULT %s' % default @@ -511,12 +513,47 @@ class UUIDField(Field): return escape(str(value), quote) +class IPv4Field(Field): + + class_default = 0 + db_type = 'IPv4' + + def to_python(self, value, timezone_in_use): + if isinstance(value, IPv4Address): + return value + elif isinstance(value, (binary_type,) + string_types + integer_types): + return IPv4Address(value) + else: + raise ValueError('Invalid value for IPv4Address: %r' % value) + + def to_db_string(self, value, quote=True): + return escape(str(value), quote) + + +class IPv6Field(Field): + + class_default = 0 + db_type = 'IPv6' + + def to_python(self, value, timezone_in_use): + if isinstance(value, IPv6Address): + return value + elif isinstance(value, (binary_type,) + string_types + integer_types): + return IPv6Address(value) + else: + raise ValueError('Invalid value for IPv6Address: %r' % value) + + def to_db_string(self, value, quote=True): + return escape(str(value), quote) + + class NullableField(Field): class_default = None def __init__(self, inner_field, default=None, alias=None, materialized=None, extra_null_values=None, codec=None): + assert isinstance(inner_field, Field), "The first argument of NullableField must be a Field instance. Not: {}".format(inner_field) self.inner_field = inner_field self._null_values = [None] if extra_null_values: diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index bac17ba..3de6a56 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -63,10 +63,10 @@ class FunctionOperatorsMixin(object): def __rmul__(self, other): return F.multiply(other, self) - def __div__(self, other): + def __truediv__(self, other): return F.divide(self, other) - def __rdiv__(self, other): + def __rtruediv__(self, other): return F.divide(other, self) def __mod__(self, other): @@ -115,7 +115,7 @@ class F(Cond, FunctionOperatorsMixin): self.args = args self.is_binary_operator = False - def to_sql(self, *args): + def to_sql(self, *args): # FIXME why *args ? """ Generates an SQL string for this function and its arguments. For example if the function name is a symbol of a binary operator: @@ -129,10 +129,11 @@ class F(Cond, FunctionOperatorsMixin): else: prefix = self.name sep = ', ' - arg_strs = (self.arg_to_sql(arg) for arg in self.args) + arg_strs = (F.arg_to_sql(arg) for arg in self.args) return prefix + '(' + sep.join(arg_strs) + ')' - def arg_to_sql(self, arg): + @staticmethod + def arg_to_sql(arg): """ Converts a function argument to SQL string according to its type. Supports functions, model fields, strings, dates, datetimes, booleans, @@ -156,7 +157,7 @@ class F(Cond, FunctionOperatorsMixin): if arg is None: return 'NULL' if is_iterable(arg): - return '[' + comma_join(self.arg_to_sql(x) for x in arg) + ']' + return '[' + comma_join(F.arg_to_sql(x) for x in arg) + ']' return six.text_type(arg) # Arithmetic functions @@ -384,6 +385,70 @@ class F(Cond, FunctionOperatorsMixin): def formatDateTime(d, format, timezone=''): return F('formatDateTime', d, format, timezone) + @staticmethod + def addDays(d, n, timezone=None): + return F('addDays', d, n, timezone) if timezone else F('addDays', d, n) + + @staticmethod + def addHours(d, n, timezone=None): + return F('addHours', d, n, timezone) if timezone else F('addHours', d, n) + + @staticmethod + def addMinutes(d, n, timezone=None): + return F('addMinutes', d, n, timezone) if timezone else F('addMinutes', d, n) + + @staticmethod + def addMonths(d, n, timezone=None): + return F('addMonths', d, n, timezone) if timezone else F('addMonths', d, n) + + @staticmethod + def addQuarters(d, n, timezone=None): + return F('addQuarters', d, n, timezone) if timezone else F('addQuarters', d, n) + + @staticmethod + def addSeconds(d, n, timezone=None): + return F('addSeconds', d, n, timezone) if timezone else F('addSeconds', d, n) + + @staticmethod + def addWeeks(d, n, timezone=None): + return F('addWeeks', d, n, timezone) if timezone else F('addWeeks', d, n) + + @staticmethod + def addYears(d, n, timezone=None): + return F('addYears', d, n, timezone) if timezone else F('addYears', d, n) + + @staticmethod + def subtractDays(d, n, timezone=None): + return F('subtractDays', d, n, timezone) if timezone else F('subtractDays', d, n) + + @staticmethod + def subtractHours(d, n, timezone=None): + return F('subtractHours', d, n, timezone) if timezone else F('subtractHours', d, n) + + @staticmethod + def subtractMinutes(d, n, timezone=None): + return F('subtractMinutes', d, n, timezone) if timezone else F('subtractMinutes', d, n) + + @staticmethod + def subtractMonths(d, n, timezone=None): + return F('subtractMonths', d, n, timezone) if timezone else F('subtractMonths', d, n) + + @staticmethod + def subtractQuarters(d, n, timezone=None): + return F('subtractQuarters', d, n, timezone) if timezone else F('subtractQuarters', d, n) + + @staticmethod + def subtractSeconds(d, n, timezone=None): + return F('subtractSeconds', d, n, timezone) if timezone else F('subtractSeconds', d, n) + + @staticmethod + def subtractWeeks(d, n, timezone=None): + return F('subtractWeeks', d, n, timezone) if timezone else F('subtractWeeks', d, n) + + @staticmethod + def subtractYears(d, n, timezone=None): + return F('subtractYears', d, n, timezone) if timezone else F('subtractYears', d, n) + # Type conversion functions @staticmethod @@ -502,6 +567,18 @@ class F(Cond, FunctionOperatorsMixin): def CAST(x, type): return F('CAST', x, type) + @staticmethod + def parseDateTimeBestEffort(d, timezone=None): + return F('parseDateTimeBestEffort', d, timezone) if timezone else F('parseDateTimeBestEffort', d) + + @staticmethod + def parseDateTimeBestEffortOrNull(d, timezone=None): + return F('parseDateTimeBestEffortOrNull', d, timezone) if timezone else F('parseDateTimeBestEffortOrNull', d) + + @staticmethod + def parseDateTimeBestEffortOrZero(d, timezone=None): + return F('parseDateTimeBestEffortOrZero', d, timezone) if timezone else F('parseDateTimeBestEffortOrZero', d) + # Functions for working with strings @staticmethod @@ -1162,6 +1239,47 @@ class F(Cond, FunctionOperatorsMixin): def UUIDStringToNum(s): return F('UUIDStringToNum', s) + # Functions for working with IP addresses + + @staticmethod + def IPv4CIDRToRange(ipv4, cidr): + return F('IPv4CIDRToRange', ipv4, cidr) + + @staticmethod + def IPv4NumToString(num): + return F('IPv4NumToString', num) + + @staticmethod + def IPv4NumToStringClassC(num): + return F('IPv4NumToStringClassC', num) + + @staticmethod + def IPv4StringToNum(s): + return F('IPv4StringToNum', s) + + @staticmethod + def IPv4ToIPv6(ipv4): + return F('IPv4ToIPv6', ipv4) + + @staticmethod + def IPv6CIDRToRange(ipv6, cidr): + return F('IPv6CIDRToRange', ipv6, cidr) + + @staticmethod + def IPv6NumToString(num): + return F('IPv6NumToString', num) + + @staticmethod + def IPv6StringToNum(s): + return F('IPv6StringToNum', s) + + @staticmethod + def toIPv4(ipv4): + return F('toIPv4', ipv4) + + @staticmethod + def toIPv6(ipv6): + return F('toIPv6', ipv6) diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 8d4f7ee..dcc8492 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -39,6 +39,10 @@ def unescape(value): return codecs.escape_decode(value)[0].decode('utf-8') +def string_or_func(obj): + return obj.to_sql() if hasattr(obj, 'to_sql') else obj + + def parse_tsv(line): if PY3 and isinstance(line, binary_type): line = line.decode() diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index de33993..92a2c41 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -8,7 +8,7 @@ from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * -class MaterializedFieldsTest(unittest.TestCase): +class AliasFieldsTest(unittest.TestCase): def setUp(self): self.database = Database('test-db', log_statements=True) @@ -25,7 +25,7 @@ class MaterializedFieldsTest(unittest.TestCase): ) self.database.insert([instance]) # We can't select * from table, as it doesn't select materialized and alias fields - query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \ + query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str, alias_func' \ ' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name() for model_cls in (ModelWithAliasFields, None): results = list(self.database.select(query, model_cls)) @@ -36,6 +36,7 @@ class MaterializedFieldsTest(unittest.TestCase): self.assertEqual(results[0].alias_int, instance.int_field) self.assertEqual(results[0].alias_str, instance.str_field) self.assertEqual(results[0].alias_date, instance.date_field) + self.assertEqual(results[0].alias_func, '08/30/16') def test_assignment_error(self): # I can't prevent assigning at all, in case db.select statements with model provided sets model fields. @@ -64,5 +65,6 @@ class ModelWithAliasFields(Model): alias_str = StringField(alias=u'str_field') alias_int = Int32Field(alias='int_field') alias_date = DateField(alias='date_field') + alias_func = StringField(alias=F.formatDateTime(date_field, '%D')) engine = MergeTree('date_field', ('date_field',)) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 37844ed..52a5c58 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -2,6 +2,7 @@ import unittest from .base_test_with_data import * from .test_querysets import SampleModel from datetime import date, datetime, tzinfo, timedelta +from ipaddress import IPv4Address, IPv6Address from infi.clickhouse_orm.database import ServerError @@ -84,8 +85,8 @@ class FuncsTestCase(TestCaseWithData): self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) # People born on 1976-10-01 self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) - self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) - self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) + self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1) + self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1) def test_func_as_field_value(self): qs = Person.objects_in(self.database) @@ -151,8 +152,8 @@ class FuncsTestCase(TestCaseWithData): self._test_func(0 | one, 1) # ^ self._test_func(one ^ one, 0) - self._test_func(one ^ 0, 1) - self._test_func(0 ^ one, 1) + #############self._test_func(one ^ 0, 1) + #############self._test_func(0 ^ one, 1) # ~ self._test_func(~one, 0) self._test_func(~~one, 1) @@ -214,6 +215,38 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)]) self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33') self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33') + self._test_func(F.addDays(d, 7), date(2019, 1, 7)) + self._test_func(F.addDays(dt, 7, 'Europe/Athens')) + self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.addHours(dt, 7, 'Europe/Athens')) + self._test_func(F.addMinutes(d, 7), datetime(2018, 12, 31, 0, 7, 0, tzinfo=pytz.utc)) + self._test_func(F.addMinutes(dt, 7, 'Europe/Athens')) + self._test_func(F.addMonths(d, 7), date(2019, 7, 31)) + self._test_func(F.addMonths(dt, 7, 'Europe/Athens')) + self._test_func(F.addQuarters(d, 7)) + self._test_func(F.addQuarters(dt, 7, 'Europe/Athens')) + self._test_func(F.addSeconds(d, 7)) + self._test_func(F.addSeconds(dt, 7, 'Europe/Athens')) + self._test_func(F.addWeeks(d, 7)) + self._test_func(F.addWeeks(dt, 7, 'Europe/Athens')) + self._test_func(F.addYears(d, 7)) + self._test_func(F.addYears(dt, 7, 'Europe/Athens')) + self._test_func(F.subtractDays(d, 3)) + self._test_func(F.subtractDays(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractHours(d, 3)) + self._test_func(F.subtractHours(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractMinutes(d, 3)) + self._test_func(F.subtractMinutes(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractMonths(d, 3)) + self._test_func(F.subtractMonths(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractQuarters(d, 3)) + self._test_func(F.subtractQuarters(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractSeconds(d, 3)) + self._test_func(F.subtractSeconds(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractWeeks(d, 3)) + self._test_func(F.subtractWeeks(dt, 3, 'Europe/Athens')) + self._test_func(F.subtractYears(d, 3)) + self._test_func(F.subtractYears(dt, 3, 'Europe/Athens')) def test_type_conversion_functions(self): for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64): @@ -231,6 +264,16 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toFixedString('123', 5), '123') self._test_func(F.toStringCutToZero('123\0'), '123') self._test_func(F.CAST(17, 'String'), '17') + self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM', 'Europe/Athens')) + with self.assertRaises(ServerError): + self._test_func(F.parseDateTimeBestEffort('foo')) + self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM', 'Europe/Athens')) + self._test_func(F.parseDateTimeBestEffortOrNull('foo'), None) + self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM', 'Europe/Athens')) + self._test_func(F.parseDateTimeBestEffortOrZero('foo'), DateTimeField.class_default) def test_string_functions(self): self._test_func(F.empty(''), 1) @@ -451,3 +494,15 @@ class FuncsTestCase(TestCaseWithData): s = str(uuid) self._test_func(F.toUUID(s), uuid) self._test_func(F.UUIDNumToString(F.UUIDStringToNum(s)), s) + + def test_ip_funcs(self): + self._test_func(F.IPv4NumToString(F.toUInt32(1)), '0.0.0.1') + self._test_func(F.IPv4NumToStringClassC(F.toUInt32(1)), '0.0.0.xxx') + self._test_func(F.IPv4StringToNum('0.0.0.17'), 17) + self._test_func(F.IPv6NumToString(F.IPv4ToIPv6(F.IPv4StringToNum('192.168.0.1'))), '::ffff:192.168.0.1') + self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11') + self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40')) + self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1')) + # These require support for tuples: + # self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255']) + # self._test_func(F.IPv6CIDRToRange(x, y)) diff --git a/tests/test_ip_fields.py b/tests/test_ip_fields.py new file mode 100644 index 0000000..448afc8 --- /dev/null +++ b/tests/test_ip_fields.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals +import unittest +from ipaddress import IPv4Address, IPv6Address +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.fields import Int16Field, IPv4Field, IPv6Field +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.engines import Memory + + +class IPFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db', log_statements=True) + + def tearDown(self): + self.database.drop_database() + + def test_ipv4_field(self): + # Create a model + class TestModel(Model): + i = Int16Field() + f = IPv4Field() + engine = Memory() + self.database.create_table(TestModel) + # Check valid values (all values are the same ip) + values = [ + '1.2.3.4', + b'\x01\x02\x03\x04', + 16909060, + IPv4Address('1.2.3.4') + ] + for index, value in enumerate(values): + rec = TestModel(i=index, f=value) + self.database.insert([rec]) + for rec in TestModel.objects_in(self.database): + self.assertEqual(rec.f, IPv4Address(values[0])) + # Check invalid values + for value in [None, 'zzz', -1, '123']: + with self.assertRaises(ValueError): + TestModel(i=1, f=value) + + def test_ipv6_field(self): + # Create a model + class TestModel(Model): + i = Int16Field() + f = IPv6Field() + engine = Memory() + self.database.create_table(TestModel) + # Check valid values (all values are the same ip) + values = [ + '2a02:e980:1e::1', + b'*\x02\xe9\x80\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01', + 55842696359362256756849388082849382401, + IPv6Address('2a02:e980:1e::1') + ] + for index, value in enumerate(values): + rec = TestModel(i=index, f=value) + self.database.insert([rec]) + for rec in TestModel.objects_in(self.database): + self.assertEqual(rec.f, IPv6Address(values[0])) + # Check invalid values + for value in [None, 'zzz', -1, '123']: + with self.assertRaises(ValueError): + TestModel(i=1, f=value) + diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py index 3bfadcd..ee81234 100644 --- a/tests/test_materialized_fields.py +++ b/tests/test_materialized_fields.py @@ -25,7 +25,7 @@ class MaterializedFieldsTest(unittest.TestCase): ) self.database.insert([instance]) # We can't select * from table, as it doesn't select materialized and alias fields - query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \ + query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str, mat_func' \ ' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name() for model_cls in (ModelWithMaterializedFields, None): results = list(self.database.select(query, model_cls)) @@ -36,6 +36,7 @@ class MaterializedFieldsTest(unittest.TestCase): self.assertEqual(results[0].mat_int, abs(instance.int_field)) self.assertEqual(results[0].mat_str, instance.str_field.lower()) self.assertEqual(results[0].mat_date, instance.date_time_field.date()) + self.assertEqual(results[0].mat_func, instance.str_field.lower()) def test_assignment_error(self): # I can't prevent assigning at all, in case db.select statements with model provided sets model fields. @@ -64,5 +65,6 @@ class ModelWithMaterializedFields(Model): mat_str = StringField(materialized='lower(str_field)') mat_int = Int32Field(materialized='abs(int_field)') mat_date = DateField(materialized=u'toDate(date_time_field)') + mat_func = StringField(materialized=F.lower(str_field)) engine = MergeTree('mat_date', ('mat_date',)) diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 7fe0382..6589c10 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -520,8 +520,8 @@ class FuncsTestCase(TestCaseWithData): self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) # People born on 1976-10-01 self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) - self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 01))), 1) - self._test_qs(qs.filter(Person.birthday == date(1976, 10, 01)), 1) + self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1) + self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1) def test_func_as_field_value(self): qs = Person.objects_in(self.database) From 39f34b7c85c668865e44987818fe142e5c66948e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 27 Oct 2019 20:20:26 +0200 Subject: [PATCH 07/41] Functions WIP --- src/infi/clickhouse_orm/funcs.py | 26 +++++++++++++++++++++++++- tests/test_funcs.py | 10 +++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 3de6a56..60520da 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -653,7 +653,31 @@ class F(Cond, FunctionOperatorsMixin): def tryBase64Decode(s): return F('tryBase64Decode', s) - # Functions for searching and replacing in strings + @staticmethod + def endsWith(s, suffix): + return F('endsWith', s, suffix) + + @staticmethod + def startsWith(s, prefix): + return F('startsWith', s, prefix) + + @staticmethod + def trimLeft(s): + return F('trimLeft', s) + + @staticmethod + def trimRight(s): + return F('trimRight', s) + + @staticmethod + def trimBoth(s): + return F('trimBoth', s) + + @staticmethod + def CRC32(s): + return F('CRC32', s) + + # Functions for replacing in strings @staticmethod def replace(haystack, pattern, replacement): diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 52a5c58..5068409 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -208,7 +208,7 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toRelativeMinuteNum(dt, 'Europe/Athens'), 25770922) self._test_func(F.toRelativeSecondNum(dt), 1546255353) self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353) - self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) + self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right self._test_func(F.today(), date.today()) self._test_func(F.yesterday(), date.today() - timedelta(days=1)) self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) @@ -294,6 +294,14 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.appendTrailingCharIfAbsent('Hello', '!'), 'Hello!') self._test_func(F.appendTrailingCharIfAbsent('Hello!', '!'), 'Hello!') self._test_func(F.convertCharset(F.convertCharset('Hello', 'latin1', 'utf16'), 'utf16', 'latin1'), 'Hello') + self._test_func(F.startsWith('aaa', 'aa'), True) + self._test_func(F.startsWith('aaa', 'bb'), False) + self._test_func(F.endsWith('aaa', 'aa'), True) + self._test_func(F.endsWith('aaa', 'bb'), False) + self._test_func(F.trimLeft(' abc '), 'abc ') + self._test_func(F.trimRight(' abc '), ' abc') + self._test_func(F.trimBoth(' abc '), 'abc') + self._test_func(F.CRC32('whoops'), 3361378926) def test_base64_functions(self): try: From ef30f1d1bdc99bd4399b15c7370b43e5b30f02f2 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 15 Dec 2019 19:14:16 +0200 Subject: [PATCH 08/41] Remove usage of six --- buildout.cfg | 3 +- src/infi/clickhouse_orm/database.py | 6 ++-- src/infi/clickhouse_orm/engines.py | 5 ++- src/infi/clickhouse_orm/fields.py | 45 ++++++++++++------------ src/infi/clickhouse_orm/funcs.py | 7 ++-- src/infi/clickhouse_orm/migrations.py | 11 ++---- src/infi/clickhouse_orm/models.py | 19 +++++----- src/infi/clickhouse_orm/query.py | 10 +++--- src/infi/clickhouse_orm/system_models.py | 3 +- src/infi/clickhouse_orm/utils.py | 7 ++-- tests/test_models.py | 4 +-- 11 files changed, 52 insertions(+), 68 deletions(-) diff --git a/buildout.cfg b/buildout.cfg index 39503b3..dd7cc35 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -14,8 +14,7 @@ install_requires = [ 'iso8601 >= 0.1.12', 'pytz', 'requests', - 'setuptools', - 'six' + 'setuptools' ] version_file = src/infi/clickhouse_orm/__version__.py description = A Python library for working with the ClickHouse database diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index ae47bc0..6313082 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -8,7 +8,6 @@ from .utils import escape, parse_tsv, import_submodules from math import ceil import datetime from string import Template -from six import PY3, string_types import pytz import logging @@ -174,7 +173,7 @@ class Database(object): The name must be string, and the value is converted to string in case it isn't. To remove a setting, pass `None` as the value. ''' - assert isinstance(name, string_types), 'Setting name must be a string' + assert isinstance(name, str), 'Setting name must be a string' if value is None: self.settings.pop(name, None) else: @@ -187,7 +186,6 @@ class Database(object): - `model_instances`: any iterable containing instances of a single model class. - `batch_size`: number of records to send per chunk (use a lower number if your records are very large). ''' - from six import next from io import BytesIO i = iter(model_instances) try: @@ -338,7 +336,7 @@ class Database(object): return set(obj.module_name for obj in self.select(query)) def _send(self, data, settings=None, stream=False): - if isinstance(data, string_types): + if isinstance(data, str): data = data.encode('utf-8') if self.log_statements: logger.info(data) diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index e38bfcf..d01e389 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import logging -import six from .utils import comma_join @@ -37,7 +36,7 @@ class MergeTree(Engine): def __init__(self, date_col=None, order_by=(), sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None): assert type(order_by) in (list, tuple), 'order_by must be a list or tuple' - assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present' + assert date_col is None or isinstance(date_col, str), 'date_col must be string if present' assert partition_key is None or type(partition_key) in (list, tuple),\ 'partition_key must be tuple or list if present' assert (replica_table_path is None) == (replica_name is None), \ @@ -198,7 +197,7 @@ class Merge(Engine): """ def __init__(self, table_regex): - assert isinstance(table_regex, six.string_types), "'table_regex' parameter must be string" + assert isinstance(table_regex, str), "'table_regex' parameter must be string" self.table_regex = table_regex def create_table_sql(self, db): diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 7df6504..f649440 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -1,5 +1,4 @@ from __future__ import unicode_literals -from six import string_types, text_type, binary_type, integer_types import datetime import iso8601 import pytz @@ -25,14 +24,14 @@ class Field(FunctionOperatorsMixin): db_type = None # should be overridden by concrete subclasses def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None): - assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ + assert [default, alias, materialized].count(None) >= 2, \ "Only one of default, alias and materialized parameters can be given" - assert alias is None or isinstance(alias, F) or isinstance(alias, string_types) and alias != "",\ + assert alias is None or isinstance(alias, F) or isinstance(alias, str) and alias != "",\ "Alias parameter must be a string or function object, if given" - assert materialized is None or isinstance(materialized, F) or isinstance(materialized, string_types) and materialized != "",\ + assert materialized is None or isinstance(materialized, F) or isinstance(materialized, str) and materialized != "",\ "Materialized parameter must be a string or function object, if given" assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given" - assert codec is None or isinstance(codec, string_types) and codec != "", \ + assert codec is None or isinstance(codec, str) and codec != "", \ "Codec field must be string, if given" self.creation_counter = Field.creation_counter @@ -140,9 +139,9 @@ class StringField(Field): db_type = 'String' def to_python(self, value, timezone_in_use): - if isinstance(value, text_type): + if isinstance(value, str): return value - if isinstance(value, binary_type): + if isinstance(value, bytes): return value.decode('UTF-8') raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value)) @@ -159,7 +158,7 @@ class FixedStringField(StringField): return value.rstrip('\0') def validate(self, value): - if isinstance(value, text_type): + if isinstance(value, str): value = value.encode('UTF-8') if len(value) > self._length: raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length)) @@ -179,7 +178,7 @@ class DateField(Field): return value if isinstance(value, int): return DateField.class_default + datetime.timedelta(days=value) - if isinstance(value, string_types): + if isinstance(value, str): if value == '0000-00-00': return DateField.min_value return datetime.datetime.strptime(value, '%Y-%m-%d').date() @@ -204,7 +203,7 @@ class DateTimeField(Field): return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc) if isinstance(value, int): return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc) - if isinstance(value, string_types): + if isinstance(value, str): if value == '0000-00-00 00:00:00': return self.class_default if len(value) == 10: @@ -217,7 +216,7 @@ class DateTimeField(Field): # left the date naive in case of no tzinfo set dt = iso8601.parse_date(value, default_timezone=None) except iso8601.ParseError as e: - raise ValueError(text_type(e)) + raise ValueError(str(e)) # convert naive to aware if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None: @@ -242,7 +241,7 @@ class BaseIntField(Field): def to_db_string(self, value, quote=True): # There's no need to call escape since numbers do not contain # special characters, and never need quoting - return text_type(value) + return str(value) def validate(self, value): self._range_check(value, self.min_value, self.max_value) @@ -318,7 +317,7 @@ class BaseFloatField(Field): def to_db_string(self, value, quote=True): # There's no need to call escape since numbers do not contain # special characters, and never need quoting - return text_type(value) + return str(value) class Float32Field(BaseFloatField): @@ -362,7 +361,7 @@ class DecimalField(Field): def to_db_string(self, value, quote=True): # There's no need to call escape since numbers do not contain # special characters, and never need quoting - return text_type(value) + return str(value) def _round(self, value): return value.quantize(self.exp) @@ -407,9 +406,9 @@ class BaseEnumField(Field): if isinstance(value, self.enum_cls): return value try: - if isinstance(value, text_type): + if isinstance(value, str): return self.enum_cls[value] - if isinstance(value, binary_type): + if isinstance(value, bytes): return self.enum_cls[value.decode('UTF-8')] if isinstance(value, int): return self.enum_cls(value) @@ -467,9 +466,9 @@ class ArrayField(Field): super(ArrayField, self).__init__(default, alias, materialized, readonly, codec) def to_python(self, value, timezone_in_use): - if isinstance(value, text_type): + if isinstance(value, str): value = parse_array(value) - elif isinstance(value, binary_type): + elif isinstance(value, bytes): value = parse_array(value.decode('UTF-8')) elif not isinstance(value, (list, tuple)): raise ValueError('ArrayField expects list or tuple, not %s' % type(value)) @@ -498,11 +497,11 @@ class UUIDField(Field): def to_python(self, value, timezone_in_use): if isinstance(value, UUID): return value - elif isinstance(value, binary_type): + elif isinstance(value, bytes): return UUID(bytes=value) - elif isinstance(value, string_types): + elif isinstance(value, str): return UUID(value) - elif isinstance(value, integer_types): + elif isinstance(value, int): return UUID(int=value) elif isinstance(value, tuple): return UUID(fields=value) @@ -521,7 +520,7 @@ class IPv4Field(Field): def to_python(self, value, timezone_in_use): if isinstance(value, IPv4Address): return value - elif isinstance(value, (binary_type,) + string_types + integer_types): + elif isinstance(value, (bytes, str, int)): return IPv4Address(value) else: raise ValueError('Invalid value for IPv4Address: %r' % value) @@ -538,7 +537,7 @@ class IPv6Field(Field): def to_python(self, value, timezone_in_use): if isinstance(value, IPv6Address): return value - elif isinstance(value, (binary_type,) + string_types + integer_types): + elif isinstance(value, (bytes, str, int)): return IPv6Address(value) else: raise ValueError('Invalid value for IPv6Address: %r' % value) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 60520da..31febe3 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1,4 +1,3 @@ -import six from datetime import date, datetime, tzinfo import functools @@ -144,21 +143,21 @@ class F(Cond, FunctionOperatorsMixin): return arg.to_sql() if isinstance(arg, Field): return "`%s`" % arg.name - if isinstance(arg, six.string_types): + if isinstance(arg, str): return StringField().to_db_string(arg) if isinstance(arg, datetime): return "toDateTime(%s)" % DateTimeField().to_db_string(arg) if isinstance(arg, date): return "toDate('%s')" % arg.isoformat() if isinstance(arg, bool): - return six.text_type(int(arg)) + return str(int(arg)) if isinstance(arg, tzinfo): return StringField().to_db_string(arg.tzname(None)) if arg is None: return 'NULL' if is_iterable(arg): return '[' + comma_join(F.arg_to_sql(x) for x in arg) + ']' - return six.text_type(arg) + return str(arg) # Arithmetic functions diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index 55622a9..3d8e146 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -1,13 +1,8 @@ -import six - from .models import Model, BufferModel from .fields import DateField, StringField from .engines import MergeTree from .utils import escape -from six.moves import zip -from six import iteritems - import logging logger = logging.getLogger('migrations') @@ -74,7 +69,7 @@ class AlterTable(Operation): # Identify fields that were added to the model prev_name = None - for name, field in iteritems(self.model_class.fields()): + for name, field in self.model_class.fields().items(): is_regular_field = not (field.materialized or field.alias) if name not in table_fields: logger.info(' Add column %s', name) @@ -94,7 +89,7 @@ class AlterTable(Operation): # Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save # attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47 model_fields = {name: field.get_sql(with_default_expression=False, db=database) - for name, field in iteritems(self.model_class.fields())} + for name, field in self.model_class.fields().items()} for field_name, field_sql in self._get_table_fields(database): # All fields must have been created and dropped by this moment assert field_name in model_fields, 'Model fields and table columns in disagreement' @@ -156,7 +151,7 @@ class RunSQL(Operation): ''' def __init__(self, sql): - if isinstance(sql, six.string_types): + if isinstance(sql, str): sql = [sql] assert isinstance(sql, list), "'sql' parameter must be string or list of strings" diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 833cca3..f926dc0 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -3,7 +3,7 @@ import sys from collections import OrderedDict from logging import getLogger -from six import with_metaclass, reraise, iteritems +from six import reraise import pytz from .fields import Field, StringField @@ -31,8 +31,8 @@ class ModelBase(type): fields = base_fields # Build a list of fields, in the order they were listed in the class - fields.update({n: f for n, f in iteritems(attrs) if isinstance(f, Field)}) - fields = sorted(iteritems(fields), key=lambda item: item[1].creation_counter) + fields.update({n: f for n, f in attrs.items() if isinstance(f, Field)}) + fields = sorted(fields.items(), key=lambda item: item[1].creation_counter) # Build a dictionary of default values defaults = {n: f.to_python(f.default, pytz.UTC) for n, f in fields} @@ -102,7 +102,7 @@ class ModelBase(type): return getattr(orm_fields, name)() -class Model(with_metaclass(ModelBase)): +class Model(metaclass=ModelBase): ''' A base class for ORM models. Each model class represent a ClickHouse table. For example: @@ -134,7 +134,7 @@ class Model(with_metaclass(ModelBase)): # Assign default values self.__dict__.update(self._defaults) # Assign field values from keyword arguments - for name, value in iteritems(kwargs): + for name, value in kwargs.items(): field = self.get_field(name) if field: setattr(self, name, value) @@ -154,7 +154,7 @@ class Model(with_metaclass(ModelBase)): except ValueError: tp, v, tb = sys.exc_info() new_msg = "{} (field '{}')".format(v, name) - reraise(tp, tp(new_msg), tb) + raise tp.with_traceback(tp(new_msg), tb) super(Model, self).__setattr__(name, value) def set_database(self, db): @@ -196,7 +196,7 @@ class Model(with_metaclass(ModelBase)): ''' parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())] cols = [] - for name, field in iteritems(cls.fields()): + for name, field in cls.fields().items(): cols.append(' %s %s' % (name, field.get_sql(db=db))) parts.append(',\n'.join(cols)) parts.append(')') @@ -221,7 +221,6 @@ class Model(with_metaclass(ModelBase)): - `timezone_in_use`: the timezone to use when parsing dates and datetimes. - `database`: if given, sets the database that this instance belongs to. ''' - from six import next values = iter(parse_tsv(line)) kwargs = {} for name in field_names: @@ -242,7 +241,7 @@ class Model(with_metaclass(ModelBase)): ''' data = self.__dict__ fields = self.fields(writable=not include_readonly) - return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in iteritems(fields)) + return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields.items()) def to_dict(self, include_readonly=True, field_names=None): ''' @@ -321,7 +320,7 @@ class MergeModel(Model): assert isinstance(cls.engine, Merge), "engine must be an instance of engines.Merge" parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())] cols = [] - for name, field in iteritems(cls.fields()): + for name, field in cls.fields().items(): if name != '_table': cols.append(' %s %s' % (name, field.get_sql(db=db))) parts.append(',\n'.join(cols)) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index e396312..8bd0494 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import six import pytz from copy import copy, deepcopy from math import ceil @@ -62,7 +61,7 @@ class InOperator(Operator): field = getattr(model_cls, field_name) if isinstance(value, QuerySet): value = value.as_sql() - elif isinstance(value, six.string_types): + elif isinstance(value, str): pass else: value = comma_join([self._value_to_sql(field, v) for v in value]) @@ -197,7 +196,7 @@ class Q(object): OR_MODE = 'OR' def __init__(self, *filter_funcs, **filter_fields): - self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in six.iteritems(filter_fields)] + self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in filter_fields.items()] self._children = [] self._negate = False self._mode = self.AND_MODE @@ -283,7 +282,6 @@ class Q(object): return q -@six.python_2_unicode_compatible class QuerySet(object): """ A queryset is an object that represents a database query using a specific `Model`. @@ -328,12 +326,12 @@ class QuerySet(object): return self.as_sql() def __getitem__(self, s): - if isinstance(s, six.integer_types): + if isinstance(s, int): # Single index assert s >= 0, 'negative indexes are not supported' qs = copy(self) qs._limits = (s, 1) - return six.next(iter(qs)) + return next(iter(qs)) else: # Slice assert s.step in (None, 1), 'step is not supported in slices' diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index d51ec3b..cb0cca6 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -3,7 +3,6 @@ This file contains system readonly models that can be got from the database https://clickhouse.yandex/docs/en/system_tables/ """ from __future__ import unicode_literals -from six import string_types from .database import Database from .fields import * @@ -124,7 +123,7 @@ class SystemPart(Model): :return: A list of SystemPart objects """ assert isinstance(database, Database), "database must be database.Database class instance" - assert isinstance(conditions, string_types), "conditions must be a string" + assert isinstance(conditions, str), "conditions must be a string" if conditions: conditions += " AND" field_names = ','.join(cls.fields()) diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index dcc8492..eb895a4 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -1,5 +1,4 @@ from __future__ import unicode_literals -from six import string_types, binary_type, text_type, PY3 import codecs import re @@ -28,11 +27,11 @@ def escape(value, quote=True): def escape_one(match): return SPECIAL_CHARS[match.group(0)] - if isinstance(value, string_types): + if isinstance(value, str): value = SPECIAL_CHARS_REGEX.sub(escape_one, value) if quote: value = "'" + value + "'" - return text_type(value) + return str(value) def unescape(value): @@ -44,7 +43,7 @@ def string_or_func(obj): def parse_tsv(line): - if PY3 and isinstance(line, binary_type): + if isinstance(line, bytes): line = line.decode() if line and line[-1] == '\n': line = line[:-1] diff --git a/tests/test_models.py b/tests/test_models.py index 1017d9f..2b5ad70 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -86,7 +86,7 @@ class ModelTestCase(unittest.TestCase): self.assertEqual( "Invalid value for StringField: {} (field 'str_field')".format(repr(bad_value)), - text_type(cm.exception) + str(cm.exception) ) def test_field_name_in_error_message_for_invalid_value_in_assignment(self): @@ -97,7 +97,7 @@ class ModelTestCase(unittest.TestCase): self.assertEqual( "Invalid value for Float32Field - {} (field 'float_field')".format(repr(bad_value)), - text_type(cm.exception) + str(cm.exception) ) From 0a94ac98a33546c93d9bb5f4e629824f3f086349 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 7 Feb 2020 15:30:15 +0200 Subject: [PATCH 09/41] Minor fixes --- src/infi/clickhouse_orm/models.py | 8 +++++--- src/infi/clickhouse_orm/query.py | 2 +- tests/test_database.py | 2 -- tests/test_funcs.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 7b4d603..9f32987 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -87,10 +87,12 @@ class ModelBase(type): if db_type.startswith('FixedString'): length = int(db_type[12 : -1]) return orm_fields.FixedStringField(length) - # Decimal + # Decimal / Decimal32 / Decimal64 / Decimal128 if db_type.startswith('Decimal'): - precision, scale = [int(n.strip()) for n in db_type[8 : -1].split(',')] - return orm_fields.DecimalField(precision, scale) + p = db_type.index('(') + args = [int(n.strip()) for n in db_type[p + 1 : -1].split(',')] + field_class = getattr(orm_fields, db_type[:p] + 'Field') + return field_class(*args) # Nullable if db_type.startswith('Nullable'): inner_field = cls.create_ad_hoc_field(db_type[9 : -1]) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 00a2904..c28462d 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -351,7 +351,7 @@ class QuerySet(object): - `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). - `fields`: the field names to use in the clause. """ - if isinstance(offset_limit, six.integer_types): + if isinstance(offset_limit, int): # Single limit offset_limit = (0, offset_limit) offset = offset_limit[0] diff --git a/tests/test_database.py b/tests/test_database.py index 0433bff..e6aa435 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -215,6 +215,4 @@ class DatabaseTestCase(TestCaseWithData): from infi.clickhouse_orm.models import ModelBase query = "SELECT DISTINCT type FROM system.columns" for row in self.database.select(query): - if row.type in ('IPv4', 'IPv6'): - continue # unsupported yet ModelBase.create_ad_hoc_field(row.type) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 5068409..0cd5a24 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -307,7 +307,7 @@ class FuncsTestCase(TestCaseWithData): try: self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello') self._test_func(F.tryBase64Decode(F.base64Encode('Hello')), 'Hello') - self._test_func(F.tryBase64Decode('zzz'), None) + self._test_func(F.tryBase64Decode(':-)'), None) except ServerError as e: # ClickHouse version that doesn't support these functions raise unittest.SkipTest(e.message) From ffeed4a6a4bce9c1d77a90b2e5712e87f365f809 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 8 Feb 2020 12:05:48 +0200 Subject: [PATCH 10/41] Support for function-based DEFAULT values, not only literals #36 --- docs/field_types.md | 190 +++++++++++++++++----------- src/infi/clickhouse_orm/database.py | 9 +- src/infi/clickhouse_orm/fields.py | 22 +--- src/infi/clickhouse_orm/models.py | 59 ++++++++- tests/test_alias_fields.py | 6 +- tests/test_database.py | 17 +++ tests/test_materialized_fields.py | 6 +- tests/test_models.py | 11 +- 8 files changed, 211 insertions(+), 109 deletions(-) diff --git a/docs/field_types.md b/docs/field_types.md index eae9b46..3c2d2bf 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -3,22 +3,22 @@ Field Types See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/) -Currently the following field types are supported: +The following field types are supported: | Class | DB Type | Pythonic Type | Comments | ------------------ | ---------- | --------------------- | ----------------------------------------------------- -| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse -| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse +| StringField | String | str | Encoded as UTF-8 when written to ClickHouse +| FixedStringField | FixedString| str | Encoded as UTF-8 when written to ClickHouse | DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31 | DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC | Int8Field | Int8 | int | Range -128 to 127 | Int16Field | Int16 | int | Range -32768 to 32767 | Int32Field | Int32 | int | Range -2147483648 to 2147483647 -| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807 +| Int64Field | Int64 | int | Range -9223372036854775808 to 9223372036854775807 | UInt8Field | UInt8 | int | Range 0 to 255 | UInt16Field | UInt16 | int | Range 0 to 65535 | UInt32Field | UInt32 | int | Range 0 to 4294967295 -| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 +| UInt64Field | UInt64 | int | Range 0 to 18446744073709551615 | Float32Field | Float32 | float | | Float64Field | Float64 | float | | DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field @@ -33,6 +33,113 @@ Currently the following field types are supported: | ArrayField | Array | list | See below | NullableField | Nullable | See below | See below +Field Options +---------------- +All field types accept the following arguments: + + - default + - alias + - materialized + - readonly + - codec + +Note that `default`, `alias` and `materialized` are mutually exclusive - you cannot use more than one of them in a single field. + +### default + +Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc. +The default value can be a Python value suitable for the field type, or an expression. For example: +```python +class Event(models.Model): + + name = fields.StringField(default="EVENT") + repeated = fields.UInt32Field(default=1) + created = fields.DateTimeField(default=F.now()) + + engine = engines.Memory() + ... +``` +When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` instead. For example: +```python +>>> event = Event() +>>> print(event.to_dict()) +{'name': 'EVENT', 'repeated': 1, 'created': } +``` +:warning: Due to a bug in ClickHouse versions prior to 20.1.2.4, insertion of records with expressions for default values may fail. + +### alias / materialized + +The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk. +You can use any expression, and can refer to other model fields. For example: +```python +class Event(models.Model): + + created = fields.DateTimeField() + created_date = fields.DateTimeField(materialized=F.toDate(created)) + name = fields.StringField() + normalized_name = fields.StringField(alias=F.upper(F.trim(name))) + + engine = engines.Memory() +``` +For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string: +```python + created_date = fields.DateTimeField(materialized="toDate(created)") +``` +Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. + +Usage: +```python +obj = Event(created=datetime.now(), name='MyEvent') +db = Database('my_test_db') +db.insert([obj]) +# All values will be retrieved from database +db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) +# created_date and username will contain a default value +db.select('SELECT * FROM $db.event', model_class=Event) +``` +When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` since their real values can only be known after insertion to the database. + +### readonly + +This attribute is set automatically for fields with `alias` or `materialized` attributes, you do not need to pass it yourself. + +### codec +This attribute specifies the compression algorithm to use for the field (instead of the default data compression algorithm defined in server settings). + +Supported compression algorithms: + +| Codec | Argument | Comment +| -------------------- | -------------------------------------------| ---------------------------------------------------- +| NONE | None | No compression. +| LZ4 | None | LZ4 compression. +| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9]. +| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory. +| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise. + +Codecs can be combined by separating their names with commas. The default database codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline). + +Recommended usage for codecs: +- When values for particular metric do not differ significantly from point to point, delta-encoding allows to reduce disk space usage significantly. +- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data) +- Numeric types usually enjoy best compression rates with ZSTD +- String types enjoy good compression rates with LZ4HC + +Example: +```python +class Stats(models.Model): + + id = fields.UInt64Field(codec='ZSTD(10)') + timestamp = fields.DateTimeField(codec='Delta,ZSTD') + timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') + metadata_id = fields.Int64Field(codec='LZ4') + status = fields.StringField(codec='LZ4HC(10)') + calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') + alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') + + engine = MergeTree('timestamp_date', ('id', 'timestamp')) +``` +Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse. + DateTimeField and Time Zones ---------------------------- @@ -45,8 +152,7 @@ A `DateTimeField` can be assigned values from one of the following types: The assigned value always gets converted to a timezone-aware `datetime` in UTC. If the assigned value is a timezone-aware `datetime` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC. -DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse -version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion. +DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion. Working with enum fields ------------------------ @@ -89,36 +195,6 @@ data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_l Note that multidimensional arrays are not supported yet by the ORM. -Working with materialized and alias fields ------------------------------------------- - -ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. - -See documentation [here](https://clickhouse.yandex/docs/en/query_language/queries/#default-values). - -Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. - -Usage: - -```python -class Event(models.Model): - - created = fields.DateTimeField() - created_date = fields.DateTimeField(materialized='toDate(created)') - name = fields.StringField() - username = fields.StringField(alias='name') - - engine = engines.MergeTree('created_date', ('created_date', 'created')) - -obj = Event(created=datetime.now(), name='MyEvent') -db = Database('my_test_db') -db.insert([obj]) -# All values will be retrieved from database -db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) -# created_date and username will contain a default value -db.select('SELECT * FROM $db.event', model_class=Event) -``` - Working with nullable fields ---------------------------- [ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable). @@ -149,46 +225,6 @@ NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot NOTE: Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases. -Working with field compression codecs -------------------------------------- -Besides default data compression, defined in server settings, per-field specification is also available. - -Supported compression algorithms: - -| Codec | Argument | Comment -| -------------------- | -------------------------------------------| ---------------------------------------------------- -| NONE | None | No compression. -| LZ4 | None | LZ4 compression. -| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9]. -| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory. -| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise. - -Codecs can be combined in a pipeline. Default table codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline). - -Recommended usage for codecs: -- Usually, values for particular metric, stored in path does not differ significantly from point to point. Using delta-encoding allows to reduce disk space usage significantly. -- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data) -- Numeric types usually enjoy best compression rates with ZSTD -- String types enjoy good compression rates with LZ4HC - -Usage: -```python -class Stats(models.Model): - - id = fields.UInt64Field(codec='ZSTD(10)') - timestamp = fields.DateTimeField(codec='Delta,ZSTD') - timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') - metadata_id = fields.Int64Field(codec='LZ4') - status = fields.StringField(codec='LZ4HC(10)') - calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') - alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') - - engine = MergeTree('timestamp_date', ('id', 'timestamp')) - -``` - -Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse. - Working with LowCardinality fields ---------------------------------- Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 6313082..9487109 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -199,20 +199,19 @@ class Database(object): fields_list = ','.join( ['`%s`' % name for name in first_instance.fields(writable=True)]) + fmt = 'TSKV' if model_class.has_funcs_as_defaults() else 'TabSeparated' + query = 'INSERT INTO $table (%s) FORMAT %s\n' % (fields_list, fmt) def gen(): buf = BytesIO() - query = 'INSERT INTO $table (%s) FORMAT TabSeparated\n' % fields_list buf.write(self._substitute(query, model_class).encode('utf-8')) first_instance.set_database(self) - buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8')) - buf.write('\n'.encode('utf-8')) + buf.write(first_instance.to_db_string()) # Collect lines in batches of batch_size lines = 2 for instance in i: instance.set_database(self) - buf.write(instance.to_tsv(include_readonly=False).encode('utf-8')) - buf.write('\n'.encode('utf-8')) + buf.write(instance.to_db_string()) lines += 1 if lines >= batch_size: # Return the current batch of lines diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index f649440..96127a7 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -89,6 +89,8 @@ class Field(FunctionOperatorsMixin): sql += ' ALIAS %s' % string_or_func(self.alias) elif self.materialized: sql += ' MATERIALIZED %s' % string_or_func(self.materialized) + elif isinstance(self.default, F): + sql += ' DEFAULT %s' % self.default.to_sql() elif self.default: default = self.to_db_string(self.default) sql += ' DEFAULT %s' % default @@ -112,26 +114,6 @@ class Field(FunctionOperatorsMixin): inner_field = getattr(inner_field, 'inner_field', None) return False - # Support comparison operators (for use in querysets) - - def __lt__(self, other): - return F.less(self, other) - - def __le__(self, other): - return F.lessOrEquals(self, other) - - def __eq__(self, other): - return F.equals(self, other) - - def __ne__(self, other): - return F.notEquals(self, other) - - def __gt__(self, other): - return F.greater(self, other) - - def __ge__(self, other): - return F.greaterOrEquals(self, other) - class StringField(Field): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 9f32987..939d240 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -9,11 +9,23 @@ import pytz from .fields import Field, StringField from .utils import parse_tsv from .query import QuerySet +from .funcs import F from .engines import Merge, Distributed logger = getLogger('clickhouse_orm') +class NoValue: + ''' + A sentinel for fields with an expression for a default value, + that were not assigned a value yet. + ''' + def __repr__(self): + return '' + +NO_VALUE = NoValue() + + class ModelBase(type): ''' A metaclass for ORM models. It adds the _fields list to model classes. @@ -35,13 +47,23 @@ class ModelBase(type): fields = sorted(fields.items(), key=lambda item: item[1].creation_counter) # Build a dictionary of default values - defaults = {n: f.to_python(f.default, pytz.UTC) for n, f in fields} + defaults = {} + has_funcs_as_defaults = False + for n, f in fields: + if f.alias or f.materialized: + defaults[n] = NO_VALUE + elif isinstance(f.default, F): + defaults[n] = NO_VALUE + has_funcs_as_defaults = True + else: + defaults[n] = f.to_python(f.default, pytz.UTC) attrs = dict( attrs, _fields=OrderedDict(fields), _writable_fields=OrderedDict([f for f in fields if not f[1].readonly]), - _defaults=defaults + _defaults=defaults, + _has_funcs_as_defaults=has_funcs_as_defaults ) model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs) @@ -195,6 +217,14 @@ class Model(metaclass=ModelBase): ''' return cls.__name__.lower() + @classmethod + def has_funcs_as_defaults(cls): + ''' + Return True if some of the model's fields use a function expression + as a default value. This requires special handling when inserting instances. + ''' + return cls._has_funcs_as_defaults + @classmethod def create_table_sql(cls, db): ''' @@ -249,6 +279,29 @@ class Model(metaclass=ModelBase): fields = self.fields(writable=not include_readonly) return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields.items()) + def to_tskv(self, include_readonly=True): + ''' + Returns the instance's column keys and values as a tab-separated line. A newline is not included. + Fields that were not assigned a value are omitted. + + - `include_readonly`: if false, returns only fields that can be inserted into database. + ''' + data = self.__dict__ + fields = self.fields(writable=not include_readonly) + parts = [] + for name, field in fields.items(): + if data[name] != NO_VALUE: + parts.append(name + '=' + field.to_db_string(data[name], quote=False)) + return '\t'.join(parts) + + def to_db_string(self): + ''' + Returns the instance as a bytestring ready to be inserted into the database. + ''' + s = self.to_tskv(False) if self._has_funcs_as_defaults else self.to_tsv(False) + s += '\n' + return s.encode('utf-8') + def to_dict(self, include_readonly=True, field_names=None): ''' Returns the instance's column values as a dict. @@ -409,3 +462,5 @@ class DistributedModel(Model): db.db_name, cls.table_name(), cls.engine.table_name), 'ENGINE = ' + cls.engine.create_table_sql(db)] return '\n'.join(parts) + + diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index 92a2c41..a190d02 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -3,7 +3,7 @@ import unittest from datetime import date from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * @@ -56,6 +56,10 @@ class AliasFieldsTest(unittest.TestCase): with self.assertRaises(AssertionError): StringField(alias='str_field', materialized='str_field') + def test_default_value(self): + instance = ModelWithAliasFields() + self.assertEqual(instance.alias_str, NO_VALUE) + class ModelWithAliasFields(Model): int_field = Int32Field() diff --git a/tests/test_database.py b/tests/test_database.py index e6aa435..ab9aa15 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -1,8 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals import unittest +import datetime from infi.clickhouse_orm.database import ServerError, DatabaseException +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.engines import Memory +from infi.clickhouse_orm.fields import * from .base_test_with_data import * @@ -26,6 +30,19 @@ class DatabaseTestCase(TestCaseWithData): def test_insert__medium_batches(self): self._insert_and_check(self._sample_data(), len(data), batch_size=100) + def test_insert__funcs_as_default_values(self): + class TestModel(Model): + a = DateTimeField(default=datetime.datetime(2020, 1, 1)) + b = DateField(default=F.toDate(a)) + c = Int32Field(default=7) + d = Int32Field(default=c * 5) + engine = Memory() + self.database.create_table(TestModel) + self.database.insert([TestModel()]) + t = TestModel.objects_in(self.database)[0] + self.assertEqual(str(t.b), '2020-01-01') + self.assertEqual(t.d, 35) + def test_count(self): self.database.insert(self._sample_data()) self.assertEqual(self.database.count(Person), 100) diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py index ee81234..4a8d62f 100644 --- a/tests/test_materialized_fields.py +++ b/tests/test_materialized_fields.py @@ -3,7 +3,7 @@ import unittest from datetime import date from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * @@ -56,6 +56,10 @@ class MaterializedFieldsTest(unittest.TestCase): with self.assertRaises(AssertionError): StringField(materialized='str_field', alias='str_field') + def test_default_value(self): + instance = ModelWithMaterializedFields() + self.assertEqual(instance.mat_str, NO_VALUE) + class ModelWithMaterializedFields(Model): int_field = Int32Field() diff --git a/tests/test_models.py b/tests/test_models.py index 2b5ad70..3e1548a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -3,9 +3,10 @@ import unittest import datetime import pytz -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.funcs import F class ModelTestCase(unittest.TestCase): @@ -18,6 +19,7 @@ class ModelTestCase(unittest.TestCase): self.assertEqual(instance.str_field, 'dozo') self.assertEqual(instance.int_field, 17) self.assertEqual(instance.float_field, 0) + self.assertEqual(instance.default_func, NO_VALUE) def test_assignment(self): # Check that all fields are assigned during construction @@ -64,14 +66,16 @@ class ModelTestCase(unittest.TestCase): "float_field": 7.0, "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), "alias_field": 0.0, - 'str_field': 'dozo' + "str_field": "dozo", + "default_func": NO_VALUE }) self.assertDictEqual(instance.to_dict(include_readonly=False), { "date_field": datetime.date(1973, 12, 6), "int_field": 100, "float_field": 7.0, "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - 'str_field': 'dozo' + "str_field": "dozo", + "default_func": NO_VALUE }) self.assertDictEqual( instance.to_dict(include_readonly=False, field_names=('int_field', 'alias_field', 'datetime_field')), { @@ -109,5 +113,6 @@ class SimpleModel(Model): int_field = Int32Field(default=17) float_field = Float32Field() alias_field = Float32Field(alias='float_field') + default_func = Float32Field(default=F.sqrt(float_field) + 17) engine = MergeTree('date_field', ('int_field', 'date_field')) From 4ffc27100dabeb1551471278b809fe455c33ea00 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 8 Feb 2020 12:12:42 +0200 Subject: [PATCH 11/41] Support for function-based DEFAULT values, not only literals #36 --- tests/test_compressed_fields.py | 4 ++-- tests/test_models.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_compressed_fields.py b/tests/test_compressed_fields.py index 5bb3282..3de5d22 100644 --- a/tests/test_compressed_fields.py +++ b/tests/test_compressed_fields.py @@ -4,7 +4,7 @@ import datetime import pytz from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * from infi.clickhouse_orm.utils import parse_tsv @@ -67,7 +67,7 @@ class CompressedFieldsTestCase(unittest.TestCase): "int64_field": 100, "float_field": 7.0, "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - "alias_field": 0.0, + "alias_field": NO_VALUE, 'string_field': 'dozo', 'nullable_field': None, 'uint64_field': 0, diff --git a/tests/test_models.py b/tests/test_models.py index 3e1548a..33fb6a7 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -65,7 +65,7 @@ class ModelTestCase(unittest.TestCase): "int_field": 100, "float_field": 7.0, "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - "alias_field": 0.0, + "alias_field": NO_VALUE, "str_field": "dozo", "default_func": NO_VALUE }) From 93747f77583851af4ce230ab8fa490e97b841bcc Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 8 Feb 2020 12:38:23 +0200 Subject: [PATCH 12/41] Improve docs --- docs/class_reference.md | 1167 +++++++++++++++++++++- docs/expressions.md | 85 ++ docs/field_options.md | 112 +++ docs/field_types.md | 108 +- docs/models_and_databases.md | 26 +- docs/querysets.md | 16 +- docs/table_engines.md | 2 +- docs/toc.md | 14 +- scripts/generate_ref.py | 4 + scripts/generate_toc.sh | 1 + scripts/html_to_markdown_toc.py | 6 +- src/infi/clickhouse_orm/engines.py | 6 +- src/infi/clickhouse_orm/fields.py | 11 +- src/infi/clickhouse_orm/funcs.py | 13 +- src/infi/clickhouse_orm/query.py | 2 +- src/infi/clickhouse_orm/system_models.py | 58 +- 16 files changed, 1464 insertions(+), 167 deletions(-) create mode 100644 docs/expressions.md create mode 100644 docs/field_options.md diff --git a/docs/class_reference.md b/docs/class_reference.md index e98e33b..7f038d0 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -209,6 +209,13 @@ Returns `None` unless the instance was read from the database or written to it. Gets a `Field` instance given its name, or `None` if not found. +#### Model.has_funcs_as_defaults() + + +Return True if some of the model's fields use a function expression +as a default value. This requires special handling when inserting instances. + + #### Model.is_read_only() @@ -242,6 +249,12 @@ class name converted to lowercase. Override this if you want to use a different table name. +#### to_db_string() + + +Returns the instance as a bytestring ready to be inserted into the database. + + #### to_dict(include_readonly=True, field_names=None) @@ -251,6 +264,15 @@ Returns the instance's column values as a dict. - `field_names`: an iterable of field names to return (optional) +#### to_tskv(include_readonly=True) + + +Returns the instance's column keys and values as a tab-separated line. A newline is not included. +Fields that were not assigned a value are omitted. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + #### to_tsv(include_readonly=True) @@ -317,6 +339,13 @@ Returns `None` unless the instance was read from the database or written to it. Gets a `Field` instance given its name, or `None` if not found. +#### BufferModel.has_funcs_as_defaults() + + +Return True if some of the model's fields use a function expression +as a default value. This requires special handling when inserting instances. + + #### BufferModel.is_read_only() @@ -350,6 +379,12 @@ class name converted to lowercase. Override this if you want to use a different table name. +#### to_db_string() + + +Returns the instance as a bytestring ready to be inserted into the database. + + #### to_dict(include_readonly=True, field_names=None) @@ -359,6 +394,15 @@ Returns the instance's column values as a dict. - `field_names`: an iterable of field names to return (optional) +#### to_tskv(include_readonly=True) + + +Returns the instance's column keys and values as a tab-separated line. A newline is not included. +Fields that were not assigned a value are omitted. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + #### to_tsv(include_readonly=True) @@ -458,6 +502,13 @@ Returns `None` unless the instance was read from the database or written to it. Gets a `Field` instance given its name, or `None` if not found. +#### DistributedModel.has_funcs_as_defaults() + + +Return True if some of the model's fields use a function expression +as a default value. This requires special handling when inserting instances. + + #### DistributedModel.is_read_only() @@ -487,6 +538,12 @@ class name converted to lowercase. Override this if you want to use a different table name. +#### to_db_string() + + +Returns the instance as a bytestring ready to be inserted into the database. + + #### to_dict(include_readonly=True, field_names=None) @@ -496,6 +553,15 @@ Returns the instance's column values as a dict. - `field_names`: an iterable of field names to return (optional) +#### to_tskv(include_readonly=True) + + +Returns the instance's column keys and values as a tab-separated line. A newline is not included. +Fields that were not assigned a value are omitted. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + #### to_tsv(include_readonly=True) @@ -605,6 +671,8 @@ Extends BaseEnumField ### Field +Extends FunctionOperatorsMixin + Abstract base class for all field types. @@ -632,6 +700,20 @@ Extends BaseFloatField #### Float64Field(default=None, alias=None, materialized=None, readonly=None, codec=None) +### IPv4Field + +Extends Field + +#### IPv4Field(default=None, alias=None, materialized=None, readonly=None, codec=None) + + +### IPv6Field + +Extends Field + +#### IPv6Field(default=None, alias=None, materialized=None, readonly=None, codec=None) + + ### Int16Field Extends BaseIntField @@ -781,11 +863,11 @@ https://clickhouse.yandex/docs/en/table_engines/distributed.html #### Distributed(cluster, table=None, sharding_key=None) -:param cluster: what cluster to access data from -:param table: underlying table that actually stores data. +- `cluster`: what cluster to access data from +- `table`: underlying table that actually stores data. If you are not specifying any table here, ensure that it can be inferred from your model's superclass (see models.DistributedModel.fix_engine_table) -:param sharding_key: how to distribute data among shards when inserting +- `sharding_key`: how to distribute data among shards when inserting straightly into Distributed table, optional @@ -876,6 +958,9 @@ Returns a copy of this queryset that excludes all rows matching the conditions. Pass `prewhere=True` to apply the conditions as PREWHERE instead of WHERE. +#### extra(**kwargs) + + #### filter(*q, **kwargs) @@ -999,6 +1084,9 @@ Returns a copy of this queryset that excludes all rows matching the conditions. Pass `prewhere=True` to apply the conditions as PREWHERE instead of WHERE. +#### extra(**kwargs) + + #### filter(*q, **kwargs) @@ -1075,3 +1163,1076 @@ with aggregate function calculated across all the rows. More information: https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier +infi.clickhouse_orm.funcs +------------------------- + +### F + +Extends Cond, FunctionOperatorsMixin + + +Represents a database function call and its arguments. +It doubles as a query condition when the function returns a boolean result. + +#### CAST(type) + + +#### CRC32() + + +#### IPv4CIDRToRange(cidr) + + +#### IPv4NumToString() + + +#### IPv4NumToStringClassC() + + +#### IPv4StringToNum() + + +#### IPv4ToIPv6() + + +#### IPv6CIDRToRange(cidr) + + +#### IPv6NumToString() + + +#### IPv6StringToNum() + + +#### MD5() + + +#### SHA1() + + +#### SHA224() + + +#### SHA256() + + +#### URLHash(n=None) + + +#### UUIDNumToString() + + +#### UUIDStringToNum() + + +#### F(name, *args) + + +Initializer. + + +#### abs() + + +#### acos() + + +#### addDays(n, timezone=None) + + +#### addHours(n, timezone=None) + + +#### addMinutes(n, timezone=None) + + +#### addMonths(n, timezone=None) + + +#### addQuarters(n, timezone=None) + + +#### addSeconds(n, timezone=None) + + +#### addWeeks(n, timezone=None) + + +#### addYears(n, timezone=None) + + +#### alphaTokens() + + +#### appendTrailingCharIfAbsent(c) + + +#### array() + + +#### arrayAll() + + +#### arrayConcat() + + +#### arrayCount() + + +#### arrayCumSum() + + +#### arrayCumSumNonNegative() + + +#### arrayDifference() + + +#### arrayDistinct() + + +#### arrayElement(n) + + +#### arrayEnumerate() + + +#### arrayEnumerateDense() + + +#### arrayEnumerateDenseRanked() + + +#### arrayEnumerateUniq() + + +#### arrayEnumerateUniqRanked() + + +#### arrayExists() + + +#### arrayIntersect() + + +#### arrayJoin() + + +#### arrayPopBack() + + +#### arrayPopFront() + + +#### arrayPushBack(x) + + +#### arrayPushFront(x) + + +#### arrayReduce(*args) + + +#### arrayResize(size, extender=None) + + +#### arrayReverse() + + +#### arrayReverseSort() + + +#### arraySlice(offset, length=None) + + +#### arraySort() + + +#### arrayStringConcat(sep=None) + + +#### arraySum() + + +#### arrayUniq() + + +#### asin() + + +#### atan() + + +#### base64Decode() + + +#### base64Encode() + + +#### bitAnd(y) + + +#### bitNot() + + +#### bitOr(y) + + +#### bitRotateLeft(y) + + +#### bitRotateRight(y) + + +#### bitShiftLeft(y) + + +#### bitShiftRight(y) + + +#### bitTest(y) + + +#### bitTestAll(*args) + + +#### bitTestAny(*args) + + +#### bitXor(y) + + +#### bitmapAnd(y) + + +#### bitmapAndCardinality(y) + + +#### bitmapAndnot(y) + + +#### bitmapAndnotCardinality(y) + + +#### bitmapBuild() + + +#### bitmapCardinality() + + +#### bitmapContains(needle) + + +#### bitmapHasAll(y) + + +#### bitmapHasAny(y) + + +#### bitmapOr(y) + + +#### bitmapOrCardinality(y) + + +#### bitmapToArray() + + +#### bitmapXor(y) + + +#### bitmapXorCardinality(y) + + +#### bitmaskToArray() + + +#### bitmaskToList() + + +#### cbrt() + + +#### ceiling(n=None) + + +#### ceiling(n=None) + + +#### cityHash64() + + +#### concat() + + +#### convertCharset(from_charset, to_charset) + + +#### cos() + + +#### countEqual(x) + + +#### divide(**kwargs) + + +#### e() + + +#### empty() + + +#### emptyArrayDate() + + +#### emptyArrayDateTime() + + +#### emptyArrayFloat32() + + +#### emptyArrayFloat64() + + +#### emptyArrayInt16() + + +#### emptyArrayInt32() + + +#### emptyArrayInt64() + + +#### emptyArrayInt8() + + +#### emptyArrayString() + + +#### emptyArrayToSingle() + + +#### emptyArrayUInt16() + + +#### emptyArrayUInt32() + + +#### emptyArrayUInt64() + + +#### emptyArrayUInt8() + + +#### endsWith(suffix) + + +#### equals(**kwargs) + + +#### erf() + + +#### erfc() + + +#### exp() + + +#### exp10() + + +#### exp2() + + +#### farmHash64() + + +#### floor(n=None) + + +#### formatDateTime(format, timezone="") + + +#### gcd(b) + + +#### generateUUIDv4() + + +#### greater(**kwargs) + + +#### greaterOrEquals(**kwargs) + + +#### halfMD5() + + +#### has(x) + + +#### hasAll(x) + + +#### hasAny(x) + + +#### hex() + + +#### hiveHash() + + +#### indexOf(x) + + +#### intDiv(b) + + +#### intDivOrZero(b) + + +#### intExp10() + + +#### intExp2() + + +#### intHash32() + + +#### intHash64() + + +#### javaHash() + + +#### jumpConsistentHash(buckets) + + +#### lcm(b) + + +#### length() + + +#### lengthUTF8() + + +#### less(**kwargs) + + +#### lessOrEquals(**kwargs) + + +#### lgamma() + + +#### log() + + +#### log() + + +#### log10() + + +#### log2() + + +#### lower() + + +#### lowerUTF8() + + +#### metroHash64() + + +#### minus(**kwargs) + + +#### modulo(**kwargs) + + +#### multiply(**kwargs) + + +#### murmurHash2_32() + + +#### murmurHash2_64() + + +#### murmurHash3_128() + + +#### murmurHash3_32() + + +#### murmurHash3_64() + + +#### negate() + + +#### notEmpty() + + +#### notEquals(**kwargs) + + +#### now() + + +#### parseDateTimeBestEffort(timezone=None) + + +#### parseDateTimeBestEffortOrNull(timezone=None) + + +#### parseDateTimeBestEffortOrZero(timezone=None) + + +#### pi() + + +#### plus(**kwargs) + + +#### power(y) + + +#### power(y) + + +#### rand() + + +#### rand64() + + +#### randConstant() + + +#### range() + + +#### regexpQuoteMeta() + + +#### replace(pattern, replacement) + + +#### replaceAll(pattern, replacement) + + +#### replaceOne(pattern, replacement) + + +#### replaceRegexpAll(pattern, replacement) + + +#### replaceRegexpOne(pattern, replacement) + + +#### reverse() + + +#### reverseUTF8() + + +#### round(n=None) + + +#### roundAge() + + +#### roundDown(y) + + +#### roundDuration() + + +#### roundToExp2() + + +#### sin() + + +#### sipHash128() + + +#### sipHash64() + + +#### splitByChar(s) + + +#### splitByString(s) + + +#### sqrt() + + +#### startsWith(prefix) + + +#### substring(offset, length) + + +#### substringUTF8(offset, length) + + +#### subtractDays(n, timezone=None) + + +#### subtractHours(n, timezone=None) + + +#### subtractMinutes(n, timezone=None) + + +#### subtractMonths(n, timezone=None) + + +#### subtractQuarters(n, timezone=None) + + +#### subtractSeconds(n, timezone=None) + + +#### subtractWeeks(n, timezone=None) + + +#### subtractYears(n, timezone=None) + + +#### tan() + + +#### tgamma() + + +#### timeSlot() + + +#### timeSlots(duration) + + +#### toDate() + + +#### toDateTime() + + +#### toDayOfMonth() + + +#### toDayOfWeek() + + +#### toDecimal128(scale) + + +#### toDecimal32(scale) + + +#### toDecimal64(scale) + + +#### toFixedString(length) + + +#### toFloat32() + + +#### toFloat32OrZero() + + +#### toFloat64() + + +#### toFloat64OrZero() + + +#### toHour() + + +#### toIPv4() + + +#### toIPv6() + + +#### toInt16() + + +#### toInt16OrZero() + + +#### toInt32() + + +#### toInt32OrZero() + + +#### toInt64() + + +#### toInt64OrZero() + + +#### toInt8() + + +#### toInt8OrZero() + + +#### toMinute() + + +#### toMonday() + + +#### toMonth() + + +#### toRelativeDayNum(timezone="") + + +#### toRelativeHourNum(timezone="") + + +#### toRelativeMinuteNum(timezone="") + + +#### toRelativeMonthNum(timezone="") + + +#### toRelativeSecondNum(timezone="") + + +#### toRelativeWeekNum(timezone="") + + +#### toRelativeYearNum(timezone="") + + +#### toSecond() + + +#### toStartOfDay() + + +#### toStartOfFifteenMinutes() + + +#### toStartOfFiveMinute() + + +#### toStartOfHour() + + +#### toStartOfMinute() + + +#### toStartOfMonth() + + +#### toStartOfQuarter() + + +#### toStartOfYear() + + +#### toString() + + +#### toStringCutToZero() + + +#### toTime(timezone="") + + +#### toUInt16() + + +#### toUInt16OrZero() + + +#### toUInt32() + + +#### toUInt32OrZero() + + +#### toUInt64() + + +#### toUInt64OrZero() + + +#### toUInt8() + + +#### toUInt8OrZero() + + +#### toUUID() + + +#### toYear() + + +#### to_sql(*args) + + +Generates an SQL string for this function and its arguments. +For example if the function name is a symbol of a binary operator: + (2.54 * `height`) +For other functions: + gcd(12, 300) + + +#### today() + + +#### trimBoth() + + +#### trimLeft() + + +#### trimRight() + + +#### tryBase64Decode() + + +#### unhex() + + +#### upper() + + +#### upperUTF8() + + +#### xxHash32() + + +#### xxHash64() + + +#### yesterday() + + +infi.clickhouse_orm.system_models +--------------------------------- + +### SystemPart + +Extends Model + + +Contains information about parts of a table in the MergeTree family. +This model operates only fields, described in the reference. Other fields are ignored. +https://clickhouse.yandex/docs/en/system_tables/system.parts/ + +#### SystemPart(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### attach(settings=None) + + + Add a new part or partition from the 'detached' directory to the table. + +- `settings`: Settings for executing request to ClickHouse over db.raw() method + +Returns: SQL Query + + +#### SystemPart.create_table_sql(db) + + +Returns the SQL command for creating a table for this model. + + +#### detach(settings=None) + + +Move a partition to the 'detached' directory and forget it. + +- `settings`: Settings for executing request to ClickHouse over db.raw() method + +Returns: SQL Query + + +#### drop(settings=None) + + +Delete a partition + +- `settings`: Settings for executing request to ClickHouse over db.raw() method + +Returns: SQL Query + + +#### SystemPart.drop_table_sql(db) + + +Returns the SQL command for deleting this model's table. + + +#### fetch(zookeeper_path, settings=None) + + +Download a partition from another server. + +- `zookeeper_path`: Path in zookeeper to fetch from +- `settings`: Settings for executing request to ClickHouse over db.raw() method + +Returns: SQL Query + + +#### SystemPart.fields(writable=False) + + +Returns an `OrderedDict` of the model's fields (from name to `Field` instance). +If `writable` is true, only writable fields are included. +Callers should not modify the dictionary. + + +#### freeze(settings=None) + + +Create a backup of a partition. + +- `settings`: Settings for executing request to ClickHouse over db.raw() method + +Returns: SQL Query + + +#### SystemPart.from_tsv(line, field_names, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### SystemPart.get(database, conditions="") + + +Get all data from system.parts table + +- `database`: A database object to fetch data from. +- `conditions`: WHERE clause conditions. Database condition is added automatically + +Returns: A list of SystemPart objects + + +#### SystemPart.get_active(database, conditions="") + + +Gets active data from system.parts table + +- `database`: A database object to fetch data from. +- `conditions`: WHERE clause conditions. Database and active conditions are added automatically + +Returns: A list of SystemPart objects + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### SystemPart.has_funcs_as_defaults() + + +Return True if some of the model's fields use a function expression +as a default value. This requires special handling when inserting instances. + + +#### SystemPart.is_read_only() + + +Returns true if the model is marked as read only. + + +#### SystemPart.is_system_model() + + +Returns true if the model represents a system table. + + +#### SystemPart.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### SystemPart.table_name() + + +#### to_db_string() + + +Returns the instance as a bytestring ready to be inserted into the database. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tskv(include_readonly=True) + + +Returns the instance's column keys and values as a tab-separated line. A newline is not included. +Fields that were not assigned a value are omitted. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + diff --git a/docs/expressions.md b/docs/expressions.md new file mode 100644 index 0000000..66e16e2 --- /dev/null +++ b/docs/expressions.md @@ -0,0 +1,85 @@ + +Expressions +=========== + +One of the ORM's core concepts is _expressions_, which are composed using functions, operators and model fields. Expressions are used in multiple places in the ORM: + +- When defining [field options](field_options.md) - `default`, `alias` and `materialized`. +- In [table engine](table_engines.md) parameters for engines in the `MergeTree` family. +- In [queryset](querysets.md) methods such as `filter`, `exclude`, `order_by`, `extra`, `aggregate` and `limit_by`. + +Using Expressions +----------------- + +Expressions usually include ClickHouse database functions, which are made available by the `F` class. Here's a simple function: +```python +from infi.clickhouse_orm.models import F +expr = F.today() +``` + +Functions that accept arguments can be composed, just like when using SQL: +```python +expr = F.toDayOfWeek(F.today()) +``` + +You can see the SQL expression that is represented by an ORM expression by calling its `to_sql` or `repr` methods: +```python +>>> print(expr.to_sql()) +toDayOfWeek(today()) +``` + +### Operators + +ORM expressions support Python's standard arithmetic operators, so you can compose expressions using `+`, `-`, `*`, `/` and `%`. For example: +```python +# A random integer between 1 and 10 +F.rand() % 10 + 1 +``` + +There is also support for comparison operators (`<`, `<=`, `==`, `>=`, `>`, `!=`) and logical operators (`&`, `|`, `~`, `^`) which are often used for filtering querysets: +```python +# Is it Friday the 13th? +(F.toDayOfWeek(F.today()) == 6) & (F.toDayOfMonth(F.today()) == 13) +``` + +### Referring to model fields + +To refer to a model field inside an expression, use `.` syntax, for example: +```python +# Convert the temperature from Celsius to Fahrenheit +Sensor.temperature * 1.8 + 32 +``` + +Inside model class definitions omit the class name: +```python +class Person(Model): + height_cm = fields.Float32Field() + height_inch = fields.Float32Field(alias=height_cm/2.54) + ... +``` + +### Creating new "functions" + +Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores: +```python +def normalize_string(s): + return F.replaceAll(F.upper(F.trimBoth(s)), ' ', '_') +``` + +Then we can use this expression anywhere we need it: +```python +class Event(Model): + code = fields.StringField() + normalized_code = fields.StringField(materialized=normalize_string(code)) +``` + +### Which functions are available? + +ClickHouse has many hundreds of functions, and new ones often get added. If you encounter a function that the database supports but is not available in the `F` class, please report this via a GitHub issue. You can still use the function by providing its name: +```python +expr = F("someFunctionName", arg1, arg2, ...) +``` + +--- + +[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) \ No newline at end of file diff --git a/docs/field_options.md b/docs/field_options.md new file mode 100644 index 0000000..db3e58f --- /dev/null +++ b/docs/field_options.md @@ -0,0 +1,112 @@ +Field Options +============= + +All field types accept the following arguments: + + - default + - alias + - materialized + - readonly + - codec + +Note that `default`, `alias` and `materialized` are mutually exclusive - you cannot use more than one of them in a single field. + +## default + +Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc. +The default value can be a Python value suitable for the field type, or an expression. For example: +```python +class Event(models.Model): + + name = fields.StringField(default="EVENT") + repeated = fields.UInt32Field(default=1) + created = fields.DateTimeField(default=F.now()) + + engine = engines.Memory() + ... +``` +When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` instead. For example: +```python +>>> event = Event() +>>> print(event.to_dict()) +{'name': 'EVENT', 'repeated': 1, 'created': } +``` +:warning: Due to a bug in ClickHouse versions prior to 20.1.2.4, insertion of records with expressions for default values may fail. + +## alias / materialized + +The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk. +You can use any expression, and can refer to other model fields. For example: +```python +class Event(models.Model): + + created = fields.DateTimeField() + created_date = fields.DateTimeField(materialized=F.toDate(created)) + name = fields.StringField() + normalized_name = fields.StringField(alias=F.upper(F.trim(name))) + + engine = engines.Memory() +``` +For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string: +```python + created_date = fields.DateTimeField(materialized="toDate(created)") +``` +Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. + +Usage: +```python +obj = Event(created=datetime.now(), name='MyEvent') +db = Database('my_test_db') +db.insert([obj]) +# All values will be retrieved from database +db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) +# created_date and username will contain a default value +db.select('SELECT * FROM $db.event', model_class=Event) +``` +When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` since their real values can only be known after insertion to the database. + +## codec + +This attribute specifies the compression algorithm to use for the field (instead of the default data compression algorithm defined in server settings). + +Supported compression algorithms: + +| Codec | Argument | Comment +| -------------------- | -------------------------------------------| ---------------------------------------------------- +| NONE | None | No compression. +| LZ4 | None | LZ4 compression. +| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9]. +| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory. +| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise. + +Codecs can be combined by separating their names with commas. The default database codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline). + +Recommended usage for codecs: +- When values for particular metric do not differ significantly from point to point, delta-encoding allows to reduce disk space usage significantly. +- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data) +- Numeric types usually enjoy best compression rates with ZSTD +- String types enjoy good compression rates with LZ4HC + +Example: +```python +class Stats(models.Model): + + id = fields.UInt64Field(codec='ZSTD(10)') + timestamp = fields.DateTimeField(codec='Delta,ZSTD') + timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') + metadata_id = fields.Int64Field(codec='LZ4') + status = fields.StringField(codec='LZ4HC(10)') + calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') + alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') + + engine = MergeTree('timestamp_date', ('id', 'timestamp')) +``` +Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse. + +## readonly + +This attribute is set automatically for fields with `alias` or `materialized` attributes, you do not need to pass it yourself. + +--- + +[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/docs/field_types.md b/docs/field_types.md index 3c2d2bf..434564a 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -33,112 +33,6 @@ The following field types are supported: | ArrayField | Array | list | See below | NullableField | Nullable | See below | See below -Field Options ----------------- -All field types accept the following arguments: - - - default - - alias - - materialized - - readonly - - codec - -Note that `default`, `alias` and `materialized` are mutually exclusive - you cannot use more than one of them in a single field. - -### default - -Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc. -The default value can be a Python value suitable for the field type, or an expression. For example: -```python -class Event(models.Model): - - name = fields.StringField(default="EVENT") - repeated = fields.UInt32Field(default=1) - created = fields.DateTimeField(default=F.now()) - - engine = engines.Memory() - ... -``` -When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` instead. For example: -```python ->>> event = Event() ->>> print(event.to_dict()) -{'name': 'EVENT', 'repeated': 1, 'created': } -``` -:warning: Due to a bug in ClickHouse versions prior to 20.1.2.4, insertion of records with expressions for default values may fail. - -### alias / materialized - -The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk. -You can use any expression, and can refer to other model fields. For example: -```python -class Event(models.Model): - - created = fields.DateTimeField() - created_date = fields.DateTimeField(materialized=F.toDate(created)) - name = fields.StringField() - normalized_name = fields.StringField(alias=F.upper(F.trim(name))) - - engine = engines.Memory() -``` -For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string: -```python - created_date = fields.DateTimeField(materialized="toDate(created)") -``` -Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. - -Usage: -```python -obj = Event(created=datetime.now(), name='MyEvent') -db = Database('my_test_db') -db.insert([obj]) -# All values will be retrieved from database -db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) -# created_date and username will contain a default value -db.select('SELECT * FROM $db.event', model_class=Event) -``` -When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` since their real values can only be known after insertion to the database. - -### readonly - -This attribute is set automatically for fields with `alias` or `materialized` attributes, you do not need to pass it yourself. - -### codec -This attribute specifies the compression algorithm to use for the field (instead of the default data compression algorithm defined in server settings). - -Supported compression algorithms: - -| Codec | Argument | Comment -| -------------------- | -------------------------------------------| ---------------------------------------------------- -| NONE | None | No compression. -| LZ4 | None | LZ4 compression. -| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9]. -| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory. -| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise. - -Codecs can be combined by separating their names with commas. The default database codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline). - -Recommended usage for codecs: -- When values for particular metric do not differ significantly from point to point, delta-encoding allows to reduce disk space usage significantly. -- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data) -- Numeric types usually enjoy best compression rates with ZSTD -- String types enjoy good compression rates with LZ4HC - -Example: -```python -class Stats(models.Model): - - id = fields.UInt64Field(codec='ZSTD(10)') - timestamp = fields.DateTimeField(codec='Delta,ZSTD') - timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') - metadata_id = fields.Int64Field(codec='LZ4') - status = fields.StringField(codec='LZ4HC(10)') - calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') - alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') - - engine = MergeTree('timestamp_date', ('id', 'timestamp')) -``` -Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse. DateTimeField and Time Zones ---------------------------- @@ -294,4 +188,4 @@ class BooleanField(Field): --- -[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md) \ No newline at end of file +[<< Field Options](field_options.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md) \ No newline at end of file diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index 59947db..c6ce1ca 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -31,6 +31,8 @@ Each field has a "natural" default value - empty string for string fields, zero first_name = fields.StringField(default="anonymous") +For additional details see [here](field_options.md). + ### Null values To allow null values in a field, wrap it inside a `NullableField`: @@ -39,25 +41,27 @@ To allow null values in a field, wrap it inside a `NullableField`: In this case, the default value for that field becomes `null` unless otherwise specified. +For more information about `NullableField` see [Field Types](field_types.md). + ### Materialized fields The value of a materialized field is calculated from other fields in the model. For example: - year_born = fields.Int16Field(materialized="toYear(birthday)") + year_born = fields.Int16Field(materialized=F.toYear(birthday)) Materialized fields are read-only, meaning that their values are not sent to the database when inserting records. -It is not possible to specify a default value for a materialized field. +For additional details see [here](field_options.md). ### Alias fields An alias field is a field whose value is calculated by ClickHouse on the fly, as a function of other fields. It is not physically stored by the database. For example: - weekday_born = field.UInt8Field(alias="toDayOfWeek(birthday)") + weekday_born = field.UInt8Field(alias=F.toDayOfWeek(birthday)) Alias fields are read-only, meaning that their values are not sent to the database when inserting records. -It is not possible to specify a default value for an alias field. +For additional details see [here](field_options.md). ### Table Names @@ -121,19 +125,19 @@ Reading from the Database Loading model instances from the database is simple: for person in db.select("SELECT * FROM my_test_db.person", model_class=Person): - print person.first_name, person.last_name + print(person.first_name, person.last_name) Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`. It is possible to select only a subset of the columns, and the rest will receive their default values: for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person): - print person.first_name + print(person.first_name) The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this: for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'): - print person.first_name + print(person.first_name) See [Querysets](querysets.md) for more information. @@ -144,7 +148,7 @@ Reading without a Model When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query: for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"): - print row.max_height + print(row.max_height) This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax. @@ -180,9 +184,9 @@ It is possible to paginate through model instances: >>> order_by = 'first_name, last_name' >>> page = db.paginate(Person, order_by, page_num=1, page_size=10) - >>> print page.number_of_objects + >>> print(page.number_of_objects) 2507 - >>> print page.pages_total + >>> print(page.pages_total) 251 >>> for person in page.objects: >>> # do something @@ -204,4 +208,4 @@ Note that `order_by` must be chosen so that the ordering is unique, otherwise th --- -[<< Overview](index.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) \ No newline at end of file +[<< Overview](index.md) | [Table of Contents](toc.md) | [Expressions >>](expressions.md) \ No newline at end of file diff --git a/docs/querysets.md b/docs/querysets.md index 056e794..d5dfb25 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -8,7 +8,7 @@ A queryset is an object that represents a database query using a specific Model. This queryset matches all Person instances in the database. You can get these instances using iteration: for person in qs: - print person.first_name, person.last_name + print(person.first_name, person.last_name) Filtering --------- @@ -128,7 +128,7 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu Final -------- -This method can be used only with CollapsingMergeTree engine. +This method can be used only with `CollapsingMergeTree` engine. Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field. >>> Person.objects_in(database).count() @@ -162,9 +162,9 @@ Similar to `Database.paginate`, you can go over the queryset results one page at >>> qs = Person.objects_in(database).order_by('last_name', 'first_name') >>> page = qs.paginate(page_num=1, page_size=10) - >>> print page.number_of_objects + >>> print(page.number_of_objects) 2507 - >>> print page.pages_total + >>> print(page.pages_total) 251 >>> for person in page.objects: >>> # do something @@ -185,9 +185,9 @@ Aggregation It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset: >>> qs = Person.objects_in(database).aggregate(average_height='avg(height)') - >>> print qs.count() + >>> print(qs.count()) 1 - >>> for row in qs: print row.average_height + >>> for row in qs: print(row.average_height) 1.71 The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`. @@ -215,7 +215,7 @@ To achieve this, you can use `with_totals` method. It will return extra row (las values aggregated for all rows suitable for filters. qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3] - >>> print qs.count() + >>> print(qs.count()) 4 >>> for row in qs: >>> print("'{}': {}".format(row.first_name, row.count)) @@ -225,4 +225,4 @@ values aggregated for all rows suitable for filters. --- -[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file +[<< Expressions](expressions.md) | [Table of Contents](toc.md) | [Field Options >>](field_options.md) \ No newline at end of file diff --git a/docs/table_engines.md b/docs/table_engines.md index d4ba905..eb213ff 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -1,7 +1,7 @@ Table Engines ============= -See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/table_engines/) +See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/operations/table_engines/) Each model must have an engine instance, used when creating the table in ClickHouse. diff --git a/docs/toc.md b/docs/toc.md index 0c81cb3..6a9f29a 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -30,13 +30,17 @@ * [Pagination](querysets.md#pagination) * [Aggregation](querysets.md#aggregation) + * [Field Options](field_options.md#field-options) + * [default](field_options.md#default) + * [alias / materialized](field_options.md#alias-/-materialized) + * [codec](field_options.md#codec) + * [readonly](field_options.md#readonly) + * [Field Types](field_types.md#field-types) * [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones) * [Working with enum fields](field_types.md#working-with-enum-fields) * [Working with array fields](field_types.md#working-with-array-fields) - * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) * [Working with nullable fields](field_types.md#working-with-nullable-fields) - * [Working with field compression codecs](field_types.md#working-with-field-compression-codecs) * [Working with LowCardinality fields](field_types.md#working-with-lowcardinality-fields) * [Creating custom field types](field_types.md#creating-custom-field-types) @@ -84,6 +88,8 @@ * [FixedStringField](class_reference.md#fixedstringfield) * [Float32Field](class_reference.md#float32field) * [Float64Field](class_reference.md#float64field) + * [IPv4Field](class_reference.md#ipv4field) + * [IPv6Field](class_reference.md#ipv6field) * [Int16Field](class_reference.md#int16field) * [Int32Field](class_reference.md#int32field) * [Int64Field](class_reference.md#int64field) @@ -111,4 +117,8 @@ * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) * [QuerySet](class_reference.md#queryset) * [AggregateQuerySet](class_reference.md#aggregatequeryset) + * [infi.clickhouse_orm.funcs](class_reference.md#infi.clickhouse_orm.funcs) + * [F](class_reference.md#f) + * [infi.clickhouse_orm.system_models](class_reference.md#infi.clickhouse_orm.system_models) + * [SystemPart](class_reference.md#systempart) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index 0cbdb0d..bb9df4e 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -125,6 +125,8 @@ if __name__ == '__main__': from infi.clickhouse_orm import engines from infi.clickhouse_orm import models from infi.clickhouse_orm import query + from infi.clickhouse_orm import funcs + from infi.clickhouse_orm import system_models print('Class Reference') print('===============') @@ -134,3 +136,5 @@ if __name__ == '__main__': module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) module_doc([engines.Engine] + all_subclasses(engines.Engine), False) module_doc([query.QuerySet, query.AggregateQuerySet]) + module_doc([funcs.F]) + module_doc([system_models.SystemPart]) diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 7ed82ce..a77aaaa 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -9,6 +9,7 @@ printf "# Table of Contents\n\n" > toc.md generate_one "index.md" generate_one "models_and_databases.md" generate_one "querysets.md" +generate_one "field_options.md" generate_one "field_types.md" generate_one "table_engines.md" generate_one "schema_migrations.md" diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py index 169e698..9ddd41b 100644 --- a/scripts/html_to_markdown_toc.py +++ b/scripts/html_to_markdown_toc.py @@ -1,4 +1,4 @@ -from HTMLParser import HTMLParser +from html.parser import HTMLParser import sys @@ -18,7 +18,7 @@ class HeadersToMarkdownParser(HTMLParser): if tag.lower() in HEADER_TAGS: indent = ' ' * int(self.inside[1]) fragment = self.text.lower().replace(' ', '-') - print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment) + print('%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)) self.inside = None self.text = '' @@ -28,4 +28,4 @@ class HeadersToMarkdownParser(HTMLParser): HeadersToMarkdownParser().feed(sys.stdin.read()) -print +print('') diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index d01e389..905daf6 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -216,11 +216,11 @@ class Distributed(Engine): """ def __init__(self, cluster, table=None, sharding_key=None): """ - :param cluster: what cluster to access data from - :param table: underlying table that actually stores data. + - `cluster`: what cluster to access data from + - `table`: underlying table that actually stores data. If you are not specifying any table here, ensure that it can be inferred from your model's superclass (see models.DistributedModel.fix_engine_table) - :param sharding_key: how to distribute data among shards when inserting + - `sharding_key`: how to distribute data among shards when inserting straightly into Distributed table, optional """ self.cluster = cluster diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 96127a7..4f12691 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -74,9 +74,10 @@ class Field(FunctionOperatorsMixin): def get_sql(self, with_default_expression=True, db=None): ''' Returns an SQL expression describing the field (e.g. for CREATE TABLE). - :param with_default_expression: If True, adds default value to sql. + + - `with_default_expression`: If True, adds default value to sql. It doesn't affect fields with alias and materialized values. - :param db: Database, used for checking supported features. + - `db`: Database, used for checking supported features. ''' sql = self.db_type if with_default_expression: @@ -102,8 +103,10 @@ class Field(FunctionOperatorsMixin): """ Checks if the instance if one of the types provided or if any of the inner_field child is one of the types provided, returns True if field or any inner_field is one of ths provided, False otherwise - :param types: Iterable of types to check inclusion of instance - :return: Boolean + + - `types`: Iterable of types to check inclusion of instance + + Returns: Boolean """ if isinstance(self, types): return True diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 31febe3..bd8e26f 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -110,10 +110,17 @@ class F(Cond, FunctionOperatorsMixin): It doubles as a query condition when the function returns a boolean result. """ def __init__(self, name, *args): + """ + Initializer. + + """ self.name = name self.args = args self.is_binary_operator = False + def __repr__(self): + return self.to_sql() + def to_sql(self, *args): # FIXME why *args ? """ Generates an SQL string for this function and its arguments. @@ -128,11 +135,11 @@ class F(Cond, FunctionOperatorsMixin): else: prefix = self.name sep = ', ' - arg_strs = (F.arg_to_sql(arg) for arg in self.args) + arg_strs = (F._arg_to_sql(arg) for arg in self.args) return prefix + '(' + sep.join(arg_strs) + ')' @staticmethod - def arg_to_sql(arg): + def _arg_to_sql(arg): """ Converts a function argument to SQL string according to its type. Supports functions, model fields, strings, dates, datetimes, booleans, @@ -156,7 +163,7 @@ class F(Cond, FunctionOperatorsMixin): if arg is None: return 'NULL' if is_iterable(arg): - return '[' + comma_join(F.arg_to_sql(x) for x in arg) + ']' + return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']' return str(arg) # Arithmetic functions diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index c28462d..06a3266 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -205,7 +205,7 @@ class Q(object): def is_empty(self): """ Checks if there are any conditions in Q object - :return: Boolean + Returns: Boolean """ return not bool(self._conds or self._children) diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index cb0cca6..dfbe46b 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -60,10 +60,12 @@ class SystemPart(Model): def _partition_operation_sql(self, operation, settings=None, from_part=None): """ Performs some operation over partition - :param db: Database object to execute operation on - :param operation: Operation to execute from SystemPart.OPERATIONS set - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: Operation execution result + + - `db`: Database object to execute operation on + - `operation`: Operation to execute from SystemPart.OPERATIONS set + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: Operation execution result """ operation = operation.upper() assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS) @@ -76,41 +78,51 @@ class SystemPart(Model): def detach(self, settings=None): """ Move a partition to the 'detached' directory and forget it. - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: SQL Query + + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: SQL Query """ return self._partition_operation_sql('DETACH', settings=settings) def drop(self, settings=None): """ Delete a partition - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: SQL Query + + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: SQL Query """ return self._partition_operation_sql('DROP', settings=settings) def attach(self, settings=None): """ Add a new part or partition from the 'detached' directory to the table. - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: SQL Query + + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: SQL Query """ return self._partition_operation_sql('ATTACH', settings=settings) def freeze(self, settings=None): """ Create a backup of a partition. - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: SQL Query + + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: SQL Query """ return self._partition_operation_sql('FREEZE', settings=settings) def fetch(self, zookeeper_path, settings=None): """ Download a partition from another server. - :param zookeeper_path: Path in zookeeper to fetch from - :param settings: Settings for executing request to ClickHouse over db.raw() method - :return: SQL Query + + - `zookeeper_path`: Path in zookeeper to fetch from + - `settings`: Settings for executing request to ClickHouse over db.raw() method + + Returns: SQL Query """ return self._partition_operation_sql('FETCH', settings=settings, from_part=zookeeper_path) @@ -118,9 +130,11 @@ class SystemPart(Model): def get(cls, database, conditions=""): """ Get all data from system.parts table - :param database: A database object to fetch data from. - :param conditions: WHERE clause conditions. Database condition is added automatically - :return: A list of SystemPart objects + + - `database`: A database object to fetch data from. + - `conditions`: WHERE clause conditions. Database condition is added automatically + + Returns: A list of SystemPart objects """ assert isinstance(database, Database), "database must be database.Database class instance" assert isinstance(conditions, str), "conditions must be a string" @@ -134,9 +148,11 @@ class SystemPart(Model): def get_active(cls, database, conditions=""): """ Gets active data from system.parts table - :param database: A database object to fetch data from. - :param conditions: WHERE clause conditions. Database and active conditions are added automatically - :return: A list of SystemPart objects + + - `database`: A database object to fetch data from. + - `conditions`: WHERE clause conditions. Database and active conditions are added automatically + + Returns: A list of SystemPart objects """ if conditions: conditions += ' AND ' From 25c4a6710e61c9498fc6b419ae32302c83254354 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 9 Feb 2020 19:20:56 +0200 Subject: [PATCH 13/41] - funcs support in limit_by - aggregate functions wip --- docs/expressions.md | 2 +- src/infi/clickhouse_orm/funcs.py | 88 ++++++++++++++++++++++++++++++++ src/infi/clickhouse_orm/query.py | 11 ++-- tests/test_funcs.py | 34 ++++++++++++ tests/test_querysets.py | 21 ++++++++ 5 files changed, 149 insertions(+), 7 deletions(-) diff --git a/docs/expressions.md b/docs/expressions.md index 66e16e2..a9b6838 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -6,7 +6,7 @@ One of the ORM's core concepts is _expressions_, which are composed using functi - When defining [field options](field_options.md) - `default`, `alias` and `materialized`. - In [table engine](table_engines.md) parameters for engines in the `MergeTree` family. -- In [queryset](querysets.md) methods such as `filter`, `exclude`, `order_by`, `extra`, `aggregate` and `limit_by`. +- In [queryset](querysets.md) methods such as `filter`, `exclude`, `order_by`, `aggregate` and `limit_by`. Using Expressions ----------------- diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index bd8e26f..065e46e 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1311,7 +1311,95 @@ class F(Cond, FunctionOperatorsMixin): def toIPv6(ipv6): return F('toIPv6', ipv6) + # Aggregate functions + @staticmethod + def any(x): + return F('any', x) + + @staticmethod + def anyHeavy(x): + return F('anyHeavy', x) + + @staticmethod + def anyLast(x): + return F('anyLast', x) + + @staticmethod + def argMax(x, y): + return F('argMax', x, y) + + @staticmethod + def argMin(x, y): + return F('argMin', x, y) + + @staticmethod + def avg(x): + return F('avg', x) + + @staticmethod + def corr(x, y): + return F('corr', x, y) + + @staticmethod + def count(): + return F('count') + + @staticmethod + def covarPop(x, y): + return F('covarPop', x, y) + + @staticmethod + def covarSamp(x, y): + return F('covarSamp', x, y) + + @staticmethod + def kurtPop(x): + return F('kurtPop', x) + + @staticmethod + def kurtSamp(x): + return F('kurtSamp', x) + + @staticmethod + def min(x): + return F('min', x) + + @staticmethod + def max(x): + return F('max', x) + + @staticmethod + def skewPop(x): + return F('skewPop', x) + + @staticmethod + def skewSamp(x): + return F('skewSamp', x) + + @staticmethod + def sum(x): + return F('sum', x) + + @staticmethod + def uniq(*args): + return F('uniq', *args) + + @staticmethod + def uniqExact(*args): + return F('uniqExact', *args) + + @staticmethod + def uniqHLL12(*args): + return F('uniqHLL12', *args) + + @staticmethod + def varPop(x): + return F('varPop', x) + + @staticmethod + def varSamp(x): + return F('varSamp', x) # Higher-order functions diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 06a3266..b32d9d5 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -5,12 +5,11 @@ from copy import copy, deepcopy from math import ceil from .engines import CollapsingMergeTree from datetime import date, datetime -from .utils import comma_join +from .utils import comma_join, string_or_func # TODO # - check that field names are valid -# - operators for arrays: length, has, empty class Operator(object): """ @@ -345,11 +344,11 @@ class QuerySet(object): qs._limits = (start, stop - start) return qs - def limit_by(self, offset_limit, *fields): + def limit_by(self, offset_limit, *fields_or_expr): """ Adds a LIMIT BY clause to the query. - `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). - - `fields`: the field names to use in the clause. + - `fields_or_expr`: the field names or expressions to use in the clause. """ if isinstance(offset_limit, int): # Single limit @@ -359,7 +358,7 @@ class QuerySet(object): assert offset >= 0 and limit >= 0, 'negative limits are not supported' qs = copy(self) qs._limit_by = (offset, limit) - qs._limit_by_fields = fields + qs._limit_by_fields = fields_or_expr return qs def select_fields_as_sql(self): @@ -403,7 +402,7 @@ class QuerySet(object): if self._limit_by: sql += '\nLIMIT %d, %d' % self._limit_by - sql += ' BY %s' % comma_join('`%s`' % field for field in self._limit_by_fields) + sql += ' BY %s' % comma_join(string_or_func(field) for field in self._limit_by_fields) if self._limits: sql += '\nLIMIT %d, %d' % self._limits diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 0cd5a24..d969836 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -30,6 +30,15 @@ class FuncsTestCase(TestCaseWithData): self.assertEqual(result[0].value, expected_value) return result[0].value if result else None + def _test_aggr(self, func, expected_value=None): + qs = Person.objects_in(self.database).aggregate(value=func) + logger.info(qs.as_sql()) + result = list(qs) + logger.info('\t==> %s', result[0].value if result else '') + if expected_value is not None: + self.assertEqual(result[0].value, expected_value) + return result[0].value if result else None + def test_func_to_sql(self): # No args self.assertEqual(F('func').to_sql(), 'func()') @@ -514,3 +523,28 @@ class FuncsTestCase(TestCaseWithData): # These require support for tuples: # self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255']) # self._test_func(F.IPv6CIDRToRange(x, y)) + + def test_aggregate_funcs(self): + self._test_aggr(F.any(Person.first_name)) + self._test_aggr(F.anyHeavy(Person.first_name)) + self._test_aggr(F.anyLast(Person.first_name)) + self._test_aggr(F.argMin(Person.first_name, Person.height)) + self._test_aggr(F.argMax(Person.first_name, Person.height)) + self._test_aggr(F.round(F.avg(Person.height), 4), sum(p.height for p in self._sample_data()) / 100) + self._test_aggr(F.corr(Person.height, Person.height), 1) + self._test_aggr(F.count(), 100) + self._test_aggr(F.round(F.covarPop(Person.height, Person.height), 2), 0) + self._test_aggr(F.round(F.covarSamp(Person.height, Person.height), 2), 0) + self._test_aggr(F.kurtPop(Person.height)) + self._test_aggr(F.kurtSamp(Person.height)) + self._test_aggr(F.min(Person.height), 1.59) + self._test_aggr(F.max(Person.height), 1.80) + self._test_aggr(F.skewPop(Person.height)) + self._test_aggr(F.skewSamp(Person.height)) + self._test_aggr(F.round(F.sum(Person.height), 4), sum(p.height for p in self._sample_data())) + self._test_aggr(F.uniq(Person.first_name, Person.last_name), 100) + self._test_aggr(F.uniqExact(Person.first_name, Person.last_name), 100) + self._test_aggr(F.uniqHLL12(Person.first_name, Person.last_name), 99) + self._test_aggr(F.varPop(Person.height)) + self._test_aggr(F.varSamp(Person.height)) + diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 95f70b6..2134765 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -339,6 +339,22 @@ class AggregateTestCase(TestCaseWithData): self.assertAlmostEqual(row.average_height, 1.675, places=4) self.assertEqual(row.count, 2) + def test_aggregate_with_filter__funcs(self): + # When filter comes before aggregate + qs = Person.objects_in(self.database).filter(Person.first_name=='Warren').aggregate(average_height=F.avg(Person.height), count=F.count()) + print(qs.as_sql()) + self.assertEqual(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.675, places=4) + self.assertEqual(row.count, 2) + # When filter comes after aggregate + qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count()).filter(Person.first_name=='Warren') + print(qs.as_sql()) + self.assertEqual(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.675, places=4) + self.assertEqual(row.count, 2) + def test_aggregate_with_implicit_grouping(self): qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()') print(qs.as_sql()) @@ -453,6 +469,11 @@ class AggregateTestCase(TestCaseWithData): order_by('first_name', '-height').limit_by(1, 'first_name') self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') + # Test with funcs + qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n=F.count()).\ + order_by('first_name', '-height').limit_by(1, F.upper(Person.first_name)) + self.assertEqual(qs.count(), 94) + self.assertEqual(list(qs)[89].last_name, 'Bowen') # Test with limit and offset, also mixing LIMIT with LIMIT BY qs = Person.objects_in(self.database).filter(height__gt=1.67).order_by('height', 'first_name') limited_qs = qs.limit_by((0, 3), 'height') From 9f36b17fee11f222e619278d9e9ccae06d5ee8b3 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 10 Feb 2020 10:06:21 +0200 Subject: [PATCH 14/41] - move NO_VALUE to utils - dynamic generation of func variants (...OrZero, ...OrNull) --- docs/field_options.md | 4 +- src/infi/clickhouse_orm/funcs.py | 239 +++++++++++++++++++----------- src/infi/clickhouse_orm/models.py | 13 +- src/infi/clickhouse_orm/utils.py | 11 ++ tests/test_funcs.py | 35 ++++- 5 files changed, 196 insertions(+), 106 deletions(-) diff --git a/docs/field_options.md b/docs/field_options.md index db3e58f..3905afd 100644 --- a/docs/field_options.md +++ b/docs/field_options.md @@ -25,7 +25,7 @@ class Event(models.Model): engine = engines.Memory() ... ``` -When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` instead. For example: +When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example: ```python >>> event = Event() >>> print(event.to_dict()) @@ -63,7 +63,7 @@ db.select('SELECT created, created_date, username, name FROM $db.event', model_c # created_date and username will contain a default value db.select('SELECT * FROM $db.event', model_class=Event) ``` -When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.models.NO_VALUE` since their real values can only be known after insertion to the database. +When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` since their real values can only be known after insertion to the database. ## codec diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 065e46e..9e02b0c 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1,7 +1,9 @@ from datetime import date, datetime, tzinfo -import functools +from functools import wraps +from inspect import signature, Parameter +from types import FunctionType -from .utils import is_iterable, comma_join +from .utils import is_iterable, comma_join, NO_VALUE from .query import Cond @@ -9,7 +11,7 @@ def binary_operator(func): """ Decorates a function to mark it as a binary operator. """ - @functools.wraps(func) + @wraps(func) def wrapper(*args, **kwargs): ret = func(*args, **kwargs) ret.is_binary_operator = True @@ -17,6 +19,29 @@ def binary_operator(func): return wrapper +def type_conversion(func): + """ + Decorates a function to mark it as a type conversion function. + """ + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + wrapper.f_type = 'type_conversion' + return wrapper + + +def aggregate(func): + """ + Decorates a function to mark it as an aggregate function. + """ + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + wrapper.f_type = 'aggregate' + return wrapper + + + class FunctionOperatorsMixin(object): """ A mixin for implementing Python operators using F objects. @@ -104,7 +129,57 @@ class FunctionOperatorsMixin(object): return F._not(self) -class F(Cond, FunctionOperatorsMixin): +class FMeta(type): + + FUNCTION_COMBINATORS = { + 'type_conversion': [ + {'suffix': 'OrZero'}, + {'suffix': 'OrNull'}, + ], + 'aggregate': [ + {'suffix': 'OrDefault'}, + {'suffix': 'OrNull'}, + {'suffix': 'If', 'args': ['cond']}, + {'suffix': 'OrDefaultIf', 'args': ['cond']}, + {'suffix': 'OrNullIf', 'args': ['cond']}, + ] + } + + def __init__(cls, name, bases, dct): + for name, obj in dct.items(): + if hasattr(obj, '__func__'): + f_type = getattr(obj.__func__, 'f_type', '') + for combinator in FMeta.FUNCTION_COMBINATORS.get(f_type, []): + new_name = name + combinator['suffix'] + FMeta._add_func(cls, obj.__func__, new_name, combinator.get('args')) + + @staticmethod + def _add_func(cls, base_func, new_name, extra_args): + """ + Adds a new func to the cls, based on the signature of the given base_func but with a new name. + """ + # Get the function's signature + sig = signature(base_func) + new_sig = str(sig)[1 : -1] # omit the parentheses + args = comma_join(sig.parameters) + # Add extra args + if extra_args: + if args: + args = comma_join([args] + extra_args) + new_sig = comma_join([new_sig] + extra_args) + else: + args = comma_join(extra_args) + new_sig = comma_join(extra_args) + # Get default values for args + argdefs = tuple(p.default for p in sig.parameters.values() if p.default != Parameter.empty) + # Build the new function + new_code = compile(f'def {new_name}({new_sig}): return F("{new_name}", {args})', __file__, 'exec') + new_func = FunctionType(code=new_code.co_consts[0], globals=globals(), name=new_name, argdefs=argdefs) + # Attach to class + setattr(cls, new_name, new_func) + + +class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): """ Represents a database function call and its arguments. It doubles as a query condition when the function returns a boolean result. @@ -135,7 +210,7 @@ class F(Cond, FunctionOperatorsMixin): else: prefix = self.name sep = ', ' - arg_strs = (F._arg_to_sql(arg) for arg in self.args) + arg_strs = (F._arg_to_sql(arg) for arg in self.args if arg != NO_VALUE) return prefix + '(' + sep.join(arg_strs) + ')' @staticmethod @@ -392,168 +467,143 @@ class F(Cond, FunctionOperatorsMixin): return F('formatDateTime', d, format, timezone) @staticmethod - def addDays(d, n, timezone=None): - return F('addDays', d, n, timezone) if timezone else F('addDays', d, n) + def addDays(d, n, timezone=NO_VALUE): + return F('addDays', d, n, timezone) @staticmethod - def addHours(d, n, timezone=None): - return F('addHours', d, n, timezone) if timezone else F('addHours', d, n) + def addHours(d, n, timezone=NO_VALUE): + return F('addHours', d, n, timezone) @staticmethod - def addMinutes(d, n, timezone=None): - return F('addMinutes', d, n, timezone) if timezone else F('addMinutes', d, n) + def addMinutes(d, n, timezone=NO_VALUE): + return F('addMinutes', d, n, timezone) @staticmethod - def addMonths(d, n, timezone=None): - return F('addMonths', d, n, timezone) if timezone else F('addMonths', d, n) + def addMonths(d, n, timezone=NO_VALUE): + return F('addMonths', d, n, timezone) @staticmethod - def addQuarters(d, n, timezone=None): - return F('addQuarters', d, n, timezone) if timezone else F('addQuarters', d, n) + def addQuarters(d, n, timezone=NO_VALUE): + return F('addQuarters', d, n, timezone) @staticmethod - def addSeconds(d, n, timezone=None): - return F('addSeconds', d, n, timezone) if timezone else F('addSeconds', d, n) + def addSeconds(d, n, timezone=NO_VALUE): + return F('addSeconds', d, n, timezone) @staticmethod - def addWeeks(d, n, timezone=None): - return F('addWeeks', d, n, timezone) if timezone else F('addWeeks', d, n) + def addWeeks(d, n, timezone=NO_VALUE): + return F('addWeeks', d, n, timezone) @staticmethod - def addYears(d, n, timezone=None): - return F('addYears', d, n, timezone) if timezone else F('addYears', d, n) + def addYears(d, n, timezone=NO_VALUE): + return F('addYears', d, n, timezone) @staticmethod - def subtractDays(d, n, timezone=None): - return F('subtractDays', d, n, timezone) if timezone else F('subtractDays', d, n) + def subtractDays(d, n, timezone=NO_VALUE): + return F('subtractDays', d, n, timezone) @staticmethod - def subtractHours(d, n, timezone=None): - return F('subtractHours', d, n, timezone) if timezone else F('subtractHours', d, n) + def subtractHours(d, n, timezone=NO_VALUE): + return F('subtractHours', d, n, timezone) @staticmethod - def subtractMinutes(d, n, timezone=None): - return F('subtractMinutes', d, n, timezone) if timezone else F('subtractMinutes', d, n) + def subtractMinutes(d, n, timezone=NO_VALUE): + return F('subtractMinutes', d, n, timezone) @staticmethod - def subtractMonths(d, n, timezone=None): - return F('subtractMonths', d, n, timezone) if timezone else F('subtractMonths', d, n) + def subtractMonths(d, n, timezone=NO_VALUE): + return F('subtractMonths', d, n, timezone) @staticmethod - def subtractQuarters(d, n, timezone=None): - return F('subtractQuarters', d, n, timezone) if timezone else F('subtractQuarters', d, n) + def subtractQuarters(d, n, timezone=NO_VALUE): + return F('subtractQuarters', d, n, timezone) @staticmethod - def subtractSeconds(d, n, timezone=None): - return F('subtractSeconds', d, n, timezone) if timezone else F('subtractSeconds', d, n) + def subtractSeconds(d, n, timezone=NO_VALUE): + return F('subtractSeconds', d, n, timezone) @staticmethod - def subtractWeeks(d, n, timezone=None): - return F('subtractWeeks', d, n, timezone) if timezone else F('subtractWeeks', d, n) + def subtractWeeks(d, n, timezone=NO_VALUE): + return F('subtractWeeks', d, n, timezone) @staticmethod - def subtractYears(d, n, timezone=None): - return F('subtractYears', d, n, timezone) if timezone else F('subtractYears', d, n) + def subtractYears(d, n, timezone=NO_VALUE): + return F('subtractYears', d, n, timezone) # Type conversion functions @staticmethod + @type_conversion def toUInt8(x): return F('toUInt8', x) @staticmethod + @type_conversion def toUInt16(x): return F('toUInt16', x) @staticmethod + @type_conversion def toUInt32(x): return F('toUInt32', x) @staticmethod + @type_conversion def toUInt64(x): return F('toUInt64', x) @staticmethod + @type_conversion def toInt8(x): return F('toInt8', x) @staticmethod + @type_conversion def toInt16(x): return F('toInt16', x) @staticmethod + @type_conversion def toInt32(x): return F('toInt32', x) @staticmethod + @type_conversion def toInt64(x): return F('toInt64', x) @staticmethod + @type_conversion def toFloat32(x): return F('toFloat32', x) @staticmethod + @type_conversion def toFloat64(x): return F('toFloat64', x) @staticmethod - def toUInt8OrZero(x): - return F('toUInt8OrZero', x) - - @staticmethod - def toUInt16OrZero(x): - return F('toUInt16OrZero', x) - - @staticmethod - def toUInt32OrZero(x): - return F('toUInt32OrZero', x) - - @staticmethod - def toUInt64OrZero(x): - return F('toUInt64OrZero', x) - - @staticmethod - def toInt8OrZero(x): - return F('toInt8OrZero', x) - - @staticmethod - def toInt16OrZero(x): - return F('toInt16OrZero', x) - - @staticmethod - def toInt32OrZero(x): - return F('toInt32OrZero', x) - - @staticmethod - def toInt64OrZero(x): - return F('toInt64OrZero', x) - - @staticmethod - def toFloat32OrZero(x): - return F('toFloat32OrZero', x) - - @staticmethod - def toFloat64OrZero(x): - return F('toFloat64OrZero', x) - - @staticmethod + @type_conversion def toDecimal32(x, scale): return F('toDecimal32', x, scale) @staticmethod + @type_conversion def toDecimal64(x, scale): return F('toDecimal64', x, scale) @staticmethod + @type_conversion def toDecimal128(x, scale): return F('toDecimal128', x, scale) @staticmethod + @type_conversion def toDate(x): return F('toDate', x) @staticmethod + @type_conversion def toDateTime(x): return F('toDateTime', x) @@ -574,16 +624,9 @@ class F(Cond, FunctionOperatorsMixin): return F('CAST', x, type) @staticmethod - def parseDateTimeBestEffort(d, timezone=None): - return F('parseDateTimeBestEffort', d, timezone) if timezone else F('parseDateTimeBestEffort', d) - - @staticmethod - def parseDateTimeBestEffortOrNull(d, timezone=None): - return F('parseDateTimeBestEffortOrNull', d, timezone) if timezone else F('parseDateTimeBestEffortOrNull', d) - - @staticmethod - def parseDateTimeBestEffortOrZero(d, timezone=None): - return F('parseDateTimeBestEffortOrZero', d, timezone) if timezone else F('parseDateTimeBestEffortOrZero', d) + @type_conversion + def parseDateTimeBestEffort(d, timezone=NO_VALUE): + return F('parseDateTimeBestEffort', d, timezone) # Functions for working with strings @@ -1314,90 +1357,112 @@ class F(Cond, FunctionOperatorsMixin): # Aggregate functions @staticmethod + @aggregate def any(x): return F('any', x) @staticmethod + @aggregate def anyHeavy(x): return F('anyHeavy', x) @staticmethod + @aggregate def anyLast(x): return F('anyLast', x) @staticmethod + @aggregate def argMax(x, y): return F('argMax', x, y) @staticmethod + @aggregate def argMin(x, y): return F('argMin', x, y) @staticmethod + @aggregate def avg(x): return F('avg', x) @staticmethod + @aggregate def corr(x, y): return F('corr', x, y) @staticmethod + @aggregate def count(): return F('count') @staticmethod + @aggregate def covarPop(x, y): return F('covarPop', x, y) @staticmethod + @aggregate def covarSamp(x, y): return F('covarSamp', x, y) @staticmethod + @aggregate def kurtPop(x): return F('kurtPop', x) @staticmethod + @aggregate def kurtSamp(x): return F('kurtSamp', x) @staticmethod + @aggregate def min(x): return F('min', x) @staticmethod + @aggregate def max(x): return F('max', x) @staticmethod + @aggregate def skewPop(x): return F('skewPop', x) @staticmethod + @aggregate def skewSamp(x): return F('skewSamp', x) @staticmethod + @aggregate def sum(x): return F('sum', x) @staticmethod + @aggregate def uniq(*args): return F('uniq', *args) @staticmethod + @aggregate def uniqExact(*args): return F('uniqExact', *args) @staticmethod + @aggregate def uniqHLL12(*args): return F('uniqHLL12', *args) @staticmethod + @aggregate def varPop(x): return F('varPop', x) @staticmethod + @aggregate def varSamp(x): return F('varSamp', x) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 939d240..aca3e95 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -7,7 +7,7 @@ from six import reraise import pytz from .fields import Field, StringField -from .utils import parse_tsv +from .utils import parse_tsv, NO_VALUE from .query import QuerySet from .funcs import F from .engines import Merge, Distributed @@ -15,17 +15,6 @@ from .engines import Merge, Distributed logger = getLogger('clickhouse_orm') -class NoValue: - ''' - A sentinel for fields with an expression for a default value, - that were not assigned a value yet. - ''' - def __repr__(self): - return '' - -NO_VALUE = NoValue() - - class ModelBase(type): ''' A metaclass for ORM models. It adds the _fields list to model classes. diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index eb895a4..9e678fb 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -112,3 +112,14 @@ def is_iterable(obj): return True except TypeError: return False + + +class NoValue: + ''' + A sentinel for fields with an expression for a default value, + that were not assigned a value yet. + ''' + def __repr__(self): + return 'NO_VALUE' + +NO_VALUE = NoValue() diff --git a/tests/test_funcs.py b/tests/test_funcs.py index d969836..77481f5 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -4,6 +4,7 @@ from .test_querysets import SampleModel from datetime import date, datetime, tzinfo, timedelta from ipaddress import IPv4Address, IPv6Address from infi.clickhouse_orm.database import ServerError +from infi.clickhouse_orm.utils import NO_VALUE class FuncsTestCase(TestCaseWithData): @@ -21,21 +22,21 @@ class FuncsTestCase(TestCaseWithData): self.assertEqual(count, expected_count) self.assertEqual(qs.count(), expected_count) - def _test_func(self, func, expected_value=None): + def _test_func(self, func, expected_value=NO_VALUE): sql = 'SELECT %s AS value' % func.to_sql() logger.info(sql) result = list(self.database.select(sql)) logger.info('\t==> %s', result[0].value if result else '') - if expected_value is not None: + if expected_value != NO_VALUE: self.assertEqual(result[0].value, expected_value) return result[0].value if result else None - def _test_aggr(self, func, expected_value=None): + def _test_aggr(self, func, expected_value=NO_VALUE): qs = Person.objects_in(self.database).aggregate(value=func) logger.info(qs.as_sql()) result = list(qs) logger.info('\t==> %s', result[0].value if result else '') - if expected_value is not None: + if expected_value != NO_VALUE: self.assertEqual(result[0].value, expected_value) return result[0].value if result else None @@ -316,7 +317,7 @@ class FuncsTestCase(TestCaseWithData): try: self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello') self._test_func(F.tryBase64Decode(F.base64Encode('Hello')), 'Hello') - self._test_func(F.tryBase64Decode(':-)'), None) + self._test_func(F.tryBase64Decode(':-)')) except ServerError as e: # ClickHouse version that doesn't support these functions raise unittest.SkipTest(e.message) @@ -548,3 +549,27 @@ class FuncsTestCase(TestCaseWithData): self._test_aggr(F.varPop(Person.height)) self._test_aggr(F.varSamp(Person.height)) + def test_aggregate_funcs__or_default(self): + self.database.raw('TRUNCATE TABLE person') + self._test_aggr(F.countOrDefault(), 0) + self._test_aggr(F.maxOrDefault(Person.height), 0) + + def test_aggregate_funcs__or_null(self): + self.database.raw('TRUNCATE TABLE person') + self._test_aggr(F.countOrNull(), None) + self._test_aggr(F.maxOrNull(Person.height), None) + + def test_aggregate_funcs__if(self): + self._test_aggr(F.argMinIf(Person.first_name, Person.height, Person.last_name > 'H')) + self._test_aggr(F.countIf(Person.last_name > 'H'), 57) + self._test_aggr(F.minIf(Person.height, Person.last_name > 'H'), 1.6) + + def test_aggregate_funcs__or_default_if(self): + self._test_aggr(F.argMinOrDefaultIf(Person.first_name, Person.height, Person.last_name > 'Z')) + self._test_aggr(F.countOrDefaultIf(Person.last_name > 'Z'), 0) + self._test_aggr(F.minOrDefaultIf(Person.height, Person.last_name > 'Z'), 0) + + def test_aggregate_funcs__or_null_if(self): + self._test_aggr(F.argMinOrNullIf(Person.first_name, Person.height, Person.last_name > 'Z')) + self._test_aggr(F.countOrNullIf(Person.last_name > 'Z'), None) + self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None) From db3dc70ebfe700f14e5cb9177eecf20d9ec08027 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 9 Apr 2020 16:40:02 +0300 Subject: [PATCH 15/41] Functions WIP --- src/infi/clickhouse_orm/funcs.py | 349 ++++++++++++++++++++++++++----- src/infi/clickhouse_orm/query.py | 8 - tests/test_funcs.py | 97 +++++++++ 3 files changed, 396 insertions(+), 58 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 9e02b0c..227f12f 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -22,6 +22,8 @@ def binary_operator(func): def type_conversion(func): """ Decorates a function to mark it as a type conversion function. + The metaclass automatically generates "OrZero" and "OrNull" combinators + for the decorated function. """ @wraps(func) def wrapper(*args, **kwargs): @@ -33,6 +35,8 @@ def type_conversion(func): def aggregate(func): """ Decorates a function to mark it as an aggregate function. + The metaclass automatically generates combinators such as "OrDefault", + "OrNull", "If" etc. for the decorated function. """ @wraps(func) def wrapper(*args, **kwargs): @@ -41,6 +45,36 @@ def aggregate(func): return wrapper +def with_utf8_support(func): + """ + Decorates a function to mark it as a string function that has a UTF8 variant. + The metaclass automatically generates a "UTF8" combinator for the decorated function. + """ + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + wrapper.f_type = 'with_utf8_support' + return wrapper + + +def parametric(func): + """ + Decorates a function to convert it to a parametric function, such + as `quantile(level)(expr)`. + """ + @wraps(func) + def wrapper(*parameters): + @wraps(func) + def inner(*args, **kwargs): + f = func(*args, **kwargs) + # Append the parameter to the function name + parameters_str = comma_join([str(p) for p in parameters]) + f.name = '%s(%s)' % (f.name, parameters_str) + return f + return inner + wrapper.f_parametric = True + return wrapper + class FunctionOperatorsMixin(object): """ @@ -93,6 +127,12 @@ class FunctionOperatorsMixin(object): def __rtruediv__(self, other): return F.divide(other, self) + def __floordiv__(self, other): + return F.intDiv(self, other) + + def __rfloordiv__(self, other): + return F.intDiv(other, self) + def __mod__(self, other): return F.modulo(self, other) @@ -139,9 +179,12 @@ class FMeta(type): 'aggregate': [ {'suffix': 'OrDefault'}, {'suffix': 'OrNull'}, - {'suffix': 'If', 'args': ['cond']}, + {'suffix': 'If', 'args': ['cond']}, {'suffix': 'OrDefaultIf', 'args': ['cond']}, - {'suffix': 'OrNullIf', 'args': ['cond']}, + {'suffix': 'OrNullIf', 'args': ['cond']}, + ], + 'with_utf8_support': [ + {'suffix': 'UTF8'}, ] } @@ -175,6 +218,9 @@ class FMeta(type): # Build the new function new_code = compile(f'def {new_name}({new_sig}): return F("{new_name}", {args})', __file__, 'exec') new_func = FunctionType(code=new_code.co_consts[0], globals=globals(), name=new_name, argdefs=argdefs) + # If base_func was parametric, new_func should be too + if getattr(base_func, 'f_parametric', False): + new_func = parametric(new_func) # Attach to class setattr(cls, new_name, new_func) @@ -350,10 +396,30 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def toYear(d): return F('toYear', d) + @staticmethod + def toISOYear(d, timezone=''): + return F('toISOYear', d, timezone) + + @staticmethod + def toQuarter(d, timezone=''): + return F('toQuarter', d, timezone) + @staticmethod def toMonth(d): return F('toMonth', d) + @staticmethod + def toWeek(d, mode=0, timezone=''): + return F('toWeek', d, mode, timezone) + + @staticmethod + def toISOWeek(d, timezone=''): + return F('toISOWeek', d, timezone) + + @staticmethod + def toDayOfYear(d): + return F('toDayOfYear', d) + @staticmethod def toDayOfMonth(d): return F('toDayOfMonth', d) @@ -390,6 +456,18 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def toStartOfYear(d): return F('toStartOfYear', d) + @staticmethod + def toStartOfISOYear(d): + return F('toStartOfISOYear', d) + + @staticmethod + def toStartOfTenMinutes(d): + return F('toStartOfTenMinutes', d) + + @staticmethod + def toStartOfWeek(d, mode=0): + return F('toStartOfWeek', d) + @staticmethod def toStartOfMinute(d): return F('toStartOfMinute', d) @@ -414,6 +492,26 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def toTime(d, timezone=''): return F('toTime', d, timezone) + @staticmethod + def toTimeZone(dt, timezone): + return F('toTimeZone', dt, timezone) + + @staticmethod + def toUnixTimestamp(dt, timezone=''): + return F('toUnixTimestamp', dt, timezone) + + @staticmethod + def toYYYYMM(dt, timezone=''): + return F('toYYYYMM', dt, timezone) + + @staticmethod + def toYYYYMMDD(dt, timezone=''): + return F('toYYYYMMDD', dt, timezone) + + @staticmethod + def toYYYYMMDDhhmmss(dt, timezone=''): + return F('toYYYYMMDDhhmmss', dt, timezone) + @staticmethod def toRelativeYearNum(d, timezone=''): return F('toRelativeYearNum', d, timezone) @@ -639,49 +737,34 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return F('notEmpty', s) @staticmethod + @with_utf8_support def length(s): return F('length', s) @staticmethod - def lengthUTF8(s): - return F('lengthUTF8', s) - - @staticmethod + @with_utf8_support def lower(s): return F('lower', s) @staticmethod + @with_utf8_support def upper(s): return F('upper', s) @staticmethod - def lowerUTF8(s): - return F('lowerUTF8', s) - - @staticmethod - def upperUTF8(s): - return F('upperUTF8', s) - - @staticmethod + @with_utf8_support def reverse(s): return F('reverse', s) - @staticmethod - def reverseUTF8(s): - return F('reverseUTF8', s) - @staticmethod def concat(*args): return F('concat', *args) @staticmethod + @with_utf8_support def substring(s, offset, length): return F('substring', s, offset, length) - @staticmethod - def substringUTF8(s, offset, length): - return F('substringUTF8', s, offset, length) - @staticmethod def appendTrailingCharIfAbsent(s, c): return F('appendTrailingCharIfAbsent', s, c) @@ -726,6 +809,58 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def CRC32(s): return F('CRC32', s) + # Functions for searching in strings + + @staticmethod + @with_utf8_support + def position(haystack, needle): + return F('position', haystack, needle) + + @staticmethod + @with_utf8_support + def positionCaseInsensitive(haystack, needle): + return F('positionCaseInsensitive', haystack, needle) + + @staticmethod + def like(haystack, pattern): + return F('like', haystack, pattern) + + @staticmethod + def notLike(haystack, pattern): + return F('notLike', haystack, pattern) + + @staticmethod + def match(haystack, pattern): + return F('match', haystack, pattern) + + @staticmethod + def extract(haystack, pattern): + return F('extract', haystack, pattern) + + @staticmethod + def extractAll(haystack, pattern): + return F('extractAll', haystack, pattern) + + @staticmethod + @with_utf8_support + def ngramDistance(haystack, needle): + return F('ngramDistance', haystack, needle) + + @staticmethod + @with_utf8_support + def ngramDistanceCaseInsensitive(haystack, needle): + return F('ngramDistanceCaseInsensitive', haystack, needle) + + @staticmethod + @with_utf8_support + def ngramSearch(haystack, needle): + return F('ngramSearch', haystack, needle) + + @staticmethod + @with_utf8_support + def ngramSearchCaseInsensitive(haystack, needle): + return F('ngramSearchCaseInsensitive', haystack, needle) + # Functions for replacing in strings @staticmethod @@ -1012,11 +1147,11 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def arrayResize(array, size, extender=None): - return F('arrayResize',array, size, extender) if extender is not None else F('arrayResize', array, size) + return F('arrayResize', array, size, extender) if extender is not None else F('arrayResize', array, size) @staticmethod def arraySlice(array, offset, length=None): - return F('arraySlice',array, offset, length) if length is not None else F('arraySlice', array, offset) + return F('arraySlice', array, offset, length) if length is not None else F('arraySlice', array, offset) @staticmethod def arrayUniq(*args): @@ -1466,45 +1601,159 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def varSamp(x): return F('varSamp', x) - - # Higher-order functions - - # arrayMap: Function arrayMap needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + @staticmethod + @aggregate + @parametric + def quantile(expr): + return F('quantile', expr) @staticmethod - def arrayCount(*args): - return F('arrayCount', *args) + @aggregate + @parametric + def quantileDeterministic(expr, determinator): + return F('quantileDeterministic', expr, determinator) @staticmethod - def arraySum(*args): - return F('arraySum', *args) + @aggregate + @parametric + def quantileExact(expr): + return F('quantileExact', expr) @staticmethod - def arrayExists(*args): - return F('arrayExists', *args) + @aggregate + @parametric + def quantileExactWeighted(expr, weight): + return F('quantileExactWeighted', expr, weight) @staticmethod - def arrayAll(*args): - return F('arrayAll', *args) - - # arrayFilter: Function arrayFilter needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) - - # arrayFirst: Function arrayFirst needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) - - # arrayFirstIndex: Function arrayFirstIndex needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42) + @aggregate + @parametric + def quantileTiming(expr): + return F('quantileTiming', expr) @staticmethod - def arrayCumSum(*args): - return F('arrayCumSum', *args) + @aggregate + @parametric + def quantileTimingWeighted(expr, weight): + return F('quantileTimingWeighted', expr, weight) @staticmethod - def arrayCumSumNonNegative(*args): - return F('arrayCumSumNonNegative', *args) + @aggregate + @parametric + def quantileTDigest(expr): + return F('quantileTDigest', expr) @staticmethod - def arraySort(*args): - return F('arraySort', *args) + @aggregate + @parametric + def quantileTDigestWeighted(expr, weight): + return F('quantileTDigestWeighted', expr, weight) @staticmethod - def arrayReverseSort(*args): - return F('arrayReverseSort', *args) + @aggregate + @parametric + def quantiles(expr): + return F('quantiles', expr) + + @staticmethod + @aggregate + @parametric + def quantilesDeterministic(expr, determinator): + return F('quantilesDeterministic', expr, determinator) + + @staticmethod + @aggregate + @parametric + def quantilesExact(expr): + return F('quantilesExact', expr) + + @staticmethod + @aggregate + @parametric + def quantilesExactWeighted(expr, weight): + return F('quantilesExactWeighted', expr, weight) + + @staticmethod + @aggregate + @parametric + def quantilesTiming(expr): + return F('quantilesTiming', expr) + + @staticmethod + @aggregate + @parametric + def quantilesTimingWeighted(expr, weight): + return F('quantilesTimingWeighted', expr, weight) + + @staticmethod + @aggregate + @parametric + def quantilesTDigest(expr): + return F('quantilesTDigest', expr) + + @staticmethod + @aggregate + @parametric + def quantilesTDigestWeighted(expr, weight): + return F('quantilesTDigestWeighted', expr, weight) + + @staticmethod + @aggregate + @parametric + def topK(expr): + return F('topK', expr) + + @staticmethod + @aggregate + @parametric + def topKWeighted(expr, weight): + return F('topKWeighted', expr, weight) + + # Null handling functions + + @staticmethod + def ifNull(x, y): + return F('ifNull', x, y) + + @staticmethod + def nullIf(x, y): + return F('nullIf', x, y) + + @staticmethod + def isNotNull(x): + return F('isNotNull', x) + + @staticmethod + def isNull(x): + return F('isNull', x) + + @staticmethod + def coalesce(*args): + return F('coalesce', *args) + + # Misc functions + + @staticmethod + def ifNotFinite(x, y): + return F('ifNotFinite', x, y) + + @staticmethod + def isFinite(x): + return F('isFinite', x) + + @staticmethod + def isInfinite(x): + return F('isInfinite', x) + + @staticmethod + def isNaN(x): + return F('isNaN', x) + + @staticmethod + def least(x, y): + return F('least', x, y) + + @staticmethod + def greatest(x, y): + return F('greatest', x, y) + diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index b32d9d5..69a7f06 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -301,7 +301,6 @@ class QuerySet(object): self._grouping_fields = [] self._grouping_with_totals = False self._fields = model_cls.fields().keys() - self._extra = {} self._limits = None self._limit_by = None self._limit_by_fields = None @@ -368,8 +367,6 @@ class QuerySet(object): fields = '*' if self._fields: fields = comma_join('`%s`' % field for field in self._fields) - for name, func in self._extra.items(): - fields += ', %s AS %s' % (func.to_sql(), name) return fields def as_sql(self): @@ -457,11 +454,6 @@ class QuerySet(object): qs._fields = field_names return qs - def extra(self, **kwargs): - qs = copy(self) - qs._extra = kwargs - return qs - def _filter_or_exclude(self, *q, **kwargs): from .funcs import F diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 77481f5..4820368 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -139,6 +139,11 @@ class FuncsTestCase(TestCaseWithData): self._test_func(one / two, 0.5) self._test_func(one / 2, 0.5) self._test_func(1 / two, 0.5) + # // + self._test_func(one // two, 0) + self._test_func(two // one, 2) + self._test_func(one // 2, 0) + self._test_func(1 // two, 0) # % self._test_func(one % two, 1) self._test_func(one % 2, 1) @@ -178,8 +183,17 @@ class FuncsTestCase(TestCaseWithData): dt = datetime(2018, 12, 31, 11, 22, 33) self._test_func(F.toYear(d), 2018) self._test_func(F.toYear(dt), 2018) + self._test_func(F.toISOYear(dt, 'Europe/Athens'), 2019) # 2018-12-31 is ISO year 2019, week 1, day 1 + self._test_func(F.toQuarter(d), 4) + self._test_func(F.toQuarter(dt), 4) self._test_func(F.toMonth(d), 12) self._test_func(F.toMonth(dt), 12) + self._test_func(F.toWeek(d), 52) + self._test_func(F.toWeek(dt), 52) + self._test_func(F.toISOWeek(d), 1) # 2018-12-31 is ISO year 2019, week 1, day 1 + self._test_func(F.toISOWeek(dt), 1) + self._test_func(F.toDayOfYear(d), 365) + self._test_func(F.toDayOfYear(dt), 365) self._test_func(F.toDayOfMonth(d), 31) self._test_func(F.toDayOfMonth(dt), 31) self._test_func(F.toDayOfWeek(d), 1) @@ -200,10 +214,24 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfISOYear(dt), date(2018, 12, 31)) + self._test_func(F.toStartOfTenMinutes(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc)) + self._test_func(F.toStartOfWeek(dt), date(2018, 12, 30)) self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toUnixTimestamp(dt, 'UTC'), int(dt.replace(tzinfo=pytz.utc).timestamp())) + self._test_func(F.toYYYYMM(d), 201812) + self._test_func(F.toYYYYMM(dt), 201812) + self._test_func(F.toYYYYMM(dt, 'Europe/Athens'), 201812) + self._test_func(F.toYYYYMMDD(d), 20181231) + self._test_func(F.toYYYYMMDD(dt), 20181231) + self._test_func(F.toYYYYMMDD(dt, 'Europe/Athens'), 20181231) + self._test_func(F.toYYYYMMDDhhmmss(d), 20181231000000) + self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233) + self._test_func(F.toYYYYMMDDhhmmss(dt, 'Europe/Athens'), 20181231132233) self._test_func(F.toRelativeYearNum(dt), 2018) self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018) self._test_func(F.toRelativeMonthNum(dt), 2018 * 12 + 12) @@ -313,6 +341,25 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.trimBoth(' abc '), 'abc') self._test_func(F.CRC32('whoops'), 3361378926) + def test_string_search_functions(self): + self._test_func(F.position('Hello, world!', '!'), 13) + self._test_func(F.positionCaseInsensitive('Hello, world!', 'hello'), 1) + self._test_func(F.positionUTF8('Привет, мир!', '!'), 12) + self._test_func(F.positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'), 9) + self._test_func(F.like('Hello, world!', '%ll%'), 1) + self._test_func(F.notLike('Hello, world!', '%ll%'), 0) + self._test_func(F.match('Hello, world!', '[lmnop]{3}'), 1) + self._test_func(F.extract('Hello, world!', '[lmnop]{3}'), 'llo') + self._test_func(F.extractAll('Hello, world!', '[a-z]+'), ['ello', 'world']) + self._test_func(F.ngramDistance('Hello', 'Hello'), 0) + self._test_func(F.ngramDistanceCaseInsensitive('Hello', 'hello'), 0) + self._test_func(F.ngramDistanceUTF8('Hello', 'Hello'), 0) + self._test_func(F.ngramDistanceCaseInsensitiveUTF8('Hello', 'hello'), 0) + self._test_func(F.ngramSearch('Hello', 'Hello'), 1) + self._test_func(F.ngramSearchCaseInsensitive('Hello', 'hello'), 1) + self._test_func(F.ngramSearchUTF8('Hello', 'Hello'), 1) + self._test_func(F.ngramSearchCaseInsensitiveUTF8('Hello', 'hello'), 1) + def test_base64_functions(self): try: self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello') @@ -573,3 +620,53 @@ class FuncsTestCase(TestCaseWithData): self._test_aggr(F.argMinOrNullIf(Person.first_name, Person.height, Person.last_name > 'Z')) self._test_aggr(F.countOrNullIf(Person.last_name > 'Z'), None) self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None) + + def test_quantile_funcs(self): + self._test_aggr(F.quantile(0.9)(Person.height)) + self._test_aggr(F.quantileOrDefault(0.9)(Person.height)) + self._test_aggr(F.quantileOrNull(0.9)(Person.height)) + self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17)) + self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height)) + weight_expr = F.toUInt32(F.round(Person.height)) + self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr)) + self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H')) + self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr)) + self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) + self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H')) + self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) + + def test_top_k_funcs(self): + self._test_aggr(F.topK(3)(Person.height)) + self._test_aggr(F.topKOrDefault(3)(Person.height)) + self._test_aggr(F.topKIf(3)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.topKOrDefaultIf(3)(Person.height, Person.last_name > 'H')) + weight_expr = F.toUInt32(F.round(Person.height)) + self._test_aggr(F.topKWeighted(3)(Person.height, weight_expr)) + self._test_aggr(F.topKWeightedOrDefault(3)(Person.height, weight_expr)) + self._test_aggr(F.topKWeightedIf(3)(Person.height, weight_expr, Person.last_name > 'H')) + self._test_aggr(F.topKWeightedOrDefaultIf(3)(Person.height, weight_expr, Person.last_name > 'H')) + + def test_null_funcs(self): + self._test_func(F.ifNull(17, 18), 17) + self._test_func(F.ifNull(None, 18), 18) + self._test_func(F.nullIf(17, 18), 17) + self._test_func(F.nullIf(18, 18), None) + self._test_func(F.isNotNull(17), 1) + self._test_func(F.isNull(17), 0) + self._test_func(F.coalesce(None, None, 17, 18), 17) + + def test_misc_funcs(self): + self._test_func(F.ifNotFinite(17, 18), 17) + self._test_func(F.isFinite(17), 1) + self._test_func(F.isInfinite(17), 0) + self._test_func(F.isNaN(17), 0) + self._test_func(F.least(17, 18), 17) + self._test_func(F.greatest(17, 18), 18) From 19439e45ef9a6dc7b5543c8d9986ea5971504cea Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 14 Apr 2020 06:24:37 +0300 Subject: [PATCH 16/41] Functions WIP --- docs/expressions.md | 5 +-- src/infi/clickhouse_orm/funcs.py | 59 ++++++++++++++++++++++++++++++-- tests/test_funcs.py | 12 ++++++- 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/docs/expressions.md b/docs/expressions.md index a9b6838..bad4cf0 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -30,7 +30,7 @@ toDayOfWeek(today()) ### Operators -ORM expressions support Python's standard arithmetic operators, so you can compose expressions using `+`, `-`, `*`, `/` and `%`. For example: +ORM expressions support Python's standard arithmetic operators, so you can compose expressions using `+`, `-`, `*`, `/`, `//` and `%`. For example: ```python # A random integer between 1 and 10 F.rand() % 10 + 1 @@ -75,11 +75,12 @@ class Event(Model): ### Which functions are available? -ClickHouse has many hundreds of functions, and new ones often get added. If you encounter a function that the database supports but is not available in the `F` class, please report this via a GitHub issue. You can still use the function by providing its name: +ClickHouse has many hundreds of functions, and new ones often get added. Many, but not all of them, are already covered by the ORM. If you encounter a function that the database supports but is not available in the `F` class, please report this via a GitHub issue. You can still use the function by providing its name: ```python expr = F("someFunctionName", arg1, arg2, ...) ``` +Note that higher-order database functions (those that use lambda expressions) are not supported. --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) \ No newline at end of file diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 227f12f..a5bda56 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1,4 +1,4 @@ -from datetime import date, datetime, tzinfo +from datetime import date, datetime, tzinfo, timedelta from functools import wraps from inspect import signature, Parameter from types import FunctionType @@ -168,6 +168,12 @@ class FunctionOperatorsMixin(object): def __invert__(self): return F._not(self) + def isIn(self, others): + return F._in(self, others) + + def isNotIn(self, others): + return F._notIn(self, others) + class FMeta(type): @@ -242,7 +248,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def __repr__(self): return self.to_sql() - def to_sql(self, *args): # FIXME why *args ? + def to_sql(self, *args): """ Generates an SQL string for this function and its arguments. For example if the function name is a symbol of a binary operator: @@ -263,7 +269,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def _arg_to_sql(arg): """ Converts a function argument to SQL string according to its type. - Supports functions, model fields, strings, dates, datetimes, booleans, + Supports functions, model fields, strings, dates, datetimes, timedeltas, booleans, None, numbers, timezones, arrays/iterables. """ from .fields import Field, StringField, DateTimeField, DateField @@ -277,6 +283,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return "toDateTime(%s)" % DateTimeField().to_db_string(arg) if isinstance(arg, date): return "toDate('%s')" % arg.isoformat() + if isinstance(arg, timedelta): + return "toIntervalSecond(%d)" % int(arg.total_seconds()) if isinstance(arg, bool): return str(int(arg)) if isinstance(arg, tzinfo): @@ -390,6 +398,18 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def _not(a): return F('not', a) + # in / not in + + @staticmethod + @binary_operator + def _in(a, b): + return F('IN', a, b) + + @staticmethod + @binary_operator + def _notIn(a, b): + return F('NOT IN', a, b) + # Functions for working with dates and times @staticmethod @@ -628,6 +648,39 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def subtractYears(d, n, timezone=NO_VALUE): return F('subtractYears', d, n, timezone) + @staticmethod + def toIntervalSecond(number): + return F('toIntervalSecond', number) + + @staticmethod + def toIntervalMinute(number): + return F('toIntervalMinute', number) + + @staticmethod + def toIntervalHour(number): + return F('toIntervalHour', number) + + @staticmethod + def toIntervalDay(number): + return F('toIntervalDay', number) + + @staticmethod + def toIntervalWeek(number): + return F('toIntervalWeek', number) + + @staticmethod + def toIntervalMonth(number): + return F('toIntervalMonth', number) + + @staticmethod + def toIntervalQuarter(number): + return F('toIntervalQuarter', number) + + @staticmethod + def toIntervalYear(number): + return F('toIntervalYear', number) + + # Type conversion functions @staticmethod diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 4820368..8ec8b27 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -104,6 +104,13 @@ class FuncsTestCase(TestCaseWithData): self._test_qs(qs.exclude(birthday=F.today()), 100) self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) + def test_in_and_not_in(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(Person.first_name.isIn(['Ciaran', 'Elton'])), 4) + self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96) + self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96) + self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96) + def test_comparison_operators(self): one = F.plus(1, 0) two = F.plus(1, 1) @@ -247,7 +254,7 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toRelativeSecondNum(dt), 1546255353) self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353) self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right - self._test_func(F.today(), date.today()) + self._test_func(F.today(), date.today()) # FIXME this may fail if the timing is just right self._test_func(F.yesterday(), date.today() - timedelta(days=1)) self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)]) @@ -285,6 +292,9 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.subtractWeeks(dt, 3, 'Europe/Athens')) self._test_func(F.subtractYears(d, 3)) self._test_func(F.subtractYears(dt, 3, 'Europe/Athens')) + self._test_func(F.now() + F.toIntervalSecond(3) + F.toIntervalMinute(3) + F.toIntervalHour(3) + F.toIntervalDay(3)) + self._test_func(F.now() + F.toIntervalWeek(3) + F.toIntervalMonth(3) + F.toIntervalQuarter(3) + F.toIntervalYear(3)) + self._test_func(F.now() + F.toIntervalSecond(3000) - F.toIntervalDay(3000) == F.now() + timedelta(seconds=3000, days=-3000)) def test_type_conversion_functions(self): for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64): From 7b3eb943e2578ee03b35f46fcd80df247666b515 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 14 Apr 2020 23:03:11 +0300 Subject: [PATCH 17/41] Functions WIP --- docs/expressions.md | 10 +++++++ src/infi/clickhouse_orm/database.py | 20 +++++++++++++- src/infi/clickhouse_orm/fields.py | 2 +- src/infi/clickhouse_orm/models.py | 2 +- src/infi/clickhouse_orm/query.py | 7 +++-- src/infi/clickhouse_orm/system_models.py | 6 ++--- tests/test_database.py | 33 ++++++++++++++++++++++-- 7 files changed, 68 insertions(+), 12 deletions(-) diff --git a/docs/expressions.md b/docs/expressions.md index bad4cf0..dda04e7 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -58,6 +58,16 @@ class Person(Model): ... ``` +### Parametric functions + +Some of ClickHouse's aggregate functions can accept not only argument columns, but a set of parameters - constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example: +```python +# Most common last names +F.topK(5)(Person.last_name) +# Find 90th, 95th and 99th percentile of heights +F.quantiles(0.9, 0.95, 0.99)(Person.height) +``` + ### Creating new "functions" Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores: diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 9487109..6679bc9 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -165,6 +165,24 @@ class Database(object): r = self._send(sql % (self.db_name, model_class.table_name())) return r.text.strip() == '1' + def get_model_for_table(self, table_name, system_table=False): + ''' + Generates a model class from an existing table in the database. + This can be used for querying tables which don't have a corresponding model class, + for example system tables. + + - `table_name`: the table to create a model for + - `system_table`: whether the table is a system table, or belongs to the current database + ''' + db_name = 'system' if system_table else self.db_name + sql = "DESCRIBE `%s`.`%s` FORMAT TSV" % (db_name, table_name) + lines = self._send(sql).iter_lines() + fields = [parse_tsv(line)[:2] for line in lines] + model = ModelBase.create_ad_hoc_model(fields, table_name) + if system_table: + model._system = model._readonly = True + return model + def add_setting(self, name, value): ''' Adds a database setting that will be sent with every request. @@ -363,7 +381,7 @@ class Database(object): mapping = dict(db="`%s`" % self.db_name) if model_class: if model_class.is_system_model(): - mapping['table'] = model_class.table_name() + mapping['table'] = "`system`.`%s`" % model_class.table_name() else: mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name()) query = Template(query).safe_substitute(mapping) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 4f12691..dbb5aee 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -423,7 +423,7 @@ class BaseEnumField(Field): import re from enum import Enum members = {} - for match in re.finditer("'(\w+)' = (\d+)", db_type): + for match in re.finditer("'([\w ]+)' = (\d+)", db_type): members[match.group(1)] = int(match.group(2)) enum_cls = Enum('AdHocEnum', members) field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index aca3e95..c2d830b 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -68,7 +68,7 @@ class ModelBase(type): # fields is a list of tuples (name, db_type) # Check if model exists in cache fields = list(fields) - cache_key = str(fields) + cache_key = model_name + ' ' + str(fields) if cache_key in cls.ad_hoc_model_cache: return cls.ad_hoc_model_cache[cache_key] # Create an ad hoc model class diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 69a7f06..66212e8 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -375,10 +375,9 @@ class QuerySet(object): """ distinct = 'DISTINCT ' if self._distinct else '' final = ' FINAL' if self._final else '' - table_name = self._model_cls.table_name() - if not self._model_cls.is_system_model(): - table_name = '`%s`' % table_name - + table_name = '`%s`' % self._model_cls.table_name() + if self._model_cls.is_system_model(): + table_name = '`system`.' + table_name params = (distinct, self.select_fields_as_sql(), table_name, final) sql = u'SELECT %s%s\nFROM %s%s' % params diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index dfbe46b..bcb3217 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -51,7 +51,7 @@ class SystemPart(Model): @classmethod def table_name(cls): - return 'system.parts' + return 'parts' """ Next methods return SQL for some operations, which can be done with partitions @@ -141,8 +141,8 @@ class SystemPart(Model): if conditions: conditions += " AND" field_names = ','.join(cls.fields()) - return database.select("SELECT %s FROM %s WHERE %s database='%s'" % - (field_names, cls.table_name(), conditions, database.db_name), model_class=cls) + return database.select("SELECT %s FROM `system`.%s WHERE %s database='%s'" % + (field_names, cls.table_name(), conditions, database.db_name), model_class=cls) @classmethod def get_active(cls, database, conditions=""): diff --git a/tests/test_database.py b/tests/test_database.py index ab9aa15..a2bb8d7 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -166,8 +166,12 @@ class DatabaseTestCase(TestCaseWithData): Database(self.database.db_name, username='default', password='wrong') exc = cm.exception - self.assertEqual(exc.code, 193) - self.assertTrue(exc.message.startswith('Wrong password for user default')) + print(exc.code, exc.message) + self.assertIn(exc.code, (193, 516)) + if exc.code == 193: + self.assertTrue('Wrong password for user default' in exc.message) + else: + self.assertTrue('default: Authentication failed: password is incorrect' in exc.message) def test_nonexisting_db(self): db = Database('db_not_here', autocreate=False) @@ -233,3 +237,28 @@ class DatabaseTestCase(TestCaseWithData): query = "SELECT DISTINCT type FROM system.columns" for row in self.database.select(query): ModelBase.create_ad_hoc_field(row.type) + + def test_get_model_for_table(self): + # Tests that get_model_for_table works for a non-system model + model = self.database.get_model_for_table('person') + self.assertFalse(model.is_system_model()) + self.assertFalse(model.is_read_only()) + self.assertEqual(model.table_name(), 'person') + # Read a few records + list(model.objects_in(self.database)[:10]) + # Inserts should work too + self.database.insert([ + model(first_name='aaa', last_name='bbb', height=1.77) + ]) + + def test_get_model_for_table__system(self): + # Tests that get_model_for_table works for all system tables + query = "SELECT name FROM system.tables WHERE database='system'" + for row in self.database.select(query): + print(row.name) + model = self.database.get_model_for_table(row.name, system_table=True) + self.assertTrue(model.is_system_model()) + self.assertTrue(model.is_read_only()) + self.assertEqual(model.table_name(), row.name) + # Read a few records + list(model.objects_in(self.database)[:10]) From 3c38c8ec400267fc4b4035b104e0f3e63a1d5d55 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 19 Apr 2020 07:17:52 +0300 Subject: [PATCH 18/41] Functions WIP --- .gitignore | 1 + docs/models_and_databases.md | 6 ++++ src/infi/clickhouse_orm/fields.py | 3 ++ src/infi/clickhouse_orm/funcs.py | 8 +++-- src/infi/clickhouse_orm/models.py | 6 ++++ src/infi/clickhouse_orm/query.py | 4 +-- src/infi/clickhouse_orm/utils.py | 8 ++--- tests/test_funcs.py | 52 ++++++++++++++++++++++--------- tests/test_querysets.py | 6 ++-- 9 files changed, 67 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 0e9fa7b..60e5afe 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py bootstrap.py htmldocs/ +cover/ # tox .tox/ diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index c6ce1ca..74ded9f 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -152,6 +152,12 @@ When running a query, specifying a model class is not required. In case you do n This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax. +It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particulary useful for querying system tables, for example: + + QueryLog = db.get_model_for_table('query_log', system_table=True) + for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000): + print(row.query) + SQL Placeholders ---------------- diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index dbb5aee..bf370ea 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -42,6 +42,9 @@ class Field(FunctionOperatorsMixin): self.readonly = bool(self.alias or self.materialized or readonly) self.codec = codec + def __str__(self): + return self.name + def to_python(self, value, timezone_in_use): ''' Converts the input value into the expected Python data type, raising ValueError if the diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index a5bda56..8af621f 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -4,7 +4,7 @@ from inspect import signature, Parameter from types import FunctionType from .utils import is_iterable, comma_join, NO_VALUE -from .query import Cond +from .query import Cond, QuerySet def binary_operator(func): @@ -276,7 +276,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): if isinstance(arg, F): return arg.to_sql() if isinstance(arg, Field): - return "`%s`" % arg.name + return "`%s`" % arg if isinstance(arg, str): return StringField().to_db_string(arg) if isinstance(arg, datetime): @@ -291,6 +291,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return StringField().to_db_string(arg.tzname(None)) if arg is None: return 'NULL' + if isinstance(arg, QuerySet): + return "(%s)" % arg if is_iterable(arg): return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']' return str(arg) @@ -340,7 +342,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def gcd(a, b): - return F('gcd',a, b) + return F('gcd', a, b) @staticmethod def lcm(a, b): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index c2d830b..a70ad38 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -94,6 +94,12 @@ class ModelBase(type): if db_type.startswith('Array'): inner_field = cls.create_ad_hoc_field(db_type[6 : -1]) return orm_fields.ArrayField(inner_field) + # Tuples (poor man's version - convert to array) + if db_type.startswith('Tuple'): + types = [s.strip() for s in db_type[6 : -1].split(',')] + assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type + inner_field = cls.create_ad_hoc_field(types[0]) + return orm_fields.ArrayField(inner_field) # FixedString if db_type.startswith('FixedString'): length = int(db_type[12 : -1]) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 66212e8..d768fab 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -410,7 +410,7 @@ class QuerySet(object): Returns the contents of the query's `ORDER BY` clause as a string. """ return comma_join([ - '%s DESC' % field[1:] if field[0] == '-' else field + '%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field) for field in self._order_by ]) @@ -624,7 +624,7 @@ class AggregateQuerySet(QuerySet): """ Returns the selected fields or expressions as a SQL string. """ - return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) + return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) def __iter__(self): return self._database.select(self.as_sql()) # using an ad-hoc model diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 9e678fb..2f60a40 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -52,19 +52,19 @@ def parse_tsv(line): def parse_array(array_string): """ - Parse an array string as returned by clickhouse. For example: + Parse an array or tuple string as returned by clickhouse. For example: "['hello', 'world']" ==> ["hello", "world"] - "[1,2,3]" ==> [1, 2, 3] + "(1,2,3)" ==> [1, 2, 3] """ # Sanity check - if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']': + if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])': raise ValueError('Invalid array string: "%s"' % array_string) # Drop opening brace array_string = array_string[1:] # Go over the string, lopping off each value at the beginning until nothing is left values = [] while True: - if array_string == ']': + if array_string in '])': # End of array return values elif array_string[0] in ', ': diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 8ec8b27..4b15f48 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -110,6 +110,9 @@ class FuncsTestCase(TestCaseWithData): self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96) self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96) self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96) + # In subquery + subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name) + self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4) def test_comparison_operators(self): one = F.plus(1, 0) @@ -174,8 +177,8 @@ class FuncsTestCase(TestCaseWithData): self._test_func(0 | one, 1) # ^ self._test_func(one ^ one, 0) - #############self._test_func(one ^ 0, 1) - #############self._test_func(0 ^ one, 1) + self._test_func(one ^ 0, 1) + self._test_func(0 ^ one, 1) # ~ self._test_func(~one, 0) self._test_func(~~one, 1) @@ -416,6 +419,10 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.power(x, y)) self._test_func(F.intExp10(x)) self._test_func(F.intExp2(x)) + self._test_func(F.intDivOrZero(x, y)) + self._test_func(F.abs(x)) + self._test_func(F.gcd(x, y)) + self._test_func(F.lcm(x, y)) def test_rounding_functions(self): x = 22.22222 @@ -578,9 +585,10 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11') self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40')) self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1')) - # These require support for tuples: - # self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), ['192.168.0.0','192.168.255.255']) - # self._test_func(F.IPv6CIDRToRange(x, y)) + self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16), + [IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')]) + self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32), + [IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')]) def test_aggregate_funcs(self): self._test_aggr(F.any(Person.first_name)) @@ -632,25 +640,39 @@ class FuncsTestCase(TestCaseWithData): self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None) def test_quantile_funcs(self): + cond = Person.last_name > 'H' + weight_expr = F.toUInt32(F.round(Person.height)) + # Quantile self._test_aggr(F.quantile(0.9)(Person.height)) self._test_aggr(F.quantileOrDefault(0.9)(Person.height)) self._test_aggr(F.quantileOrNull(0.9)(Person.height)) - self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond)) self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17)) + self._test_aggr(F.quantileExact(0.9)(Person.height)) self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height)) - weight_expr = F.toUInt32(F.round(Person.height)) + # Quantile weighted + self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr)) self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr)) - self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H')) - self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantileTiming(0.9)(Person.height)) + self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond)) + self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr)) + self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond)) + self._test_aggr(F.quantileTDigest(0.9)(Person.height)) + self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond)) self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr)) + # Quantiles self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17)) + self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height)) self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) - self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) - self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H')) - self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H')) + self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond)) + self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) + self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond)) + self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height)) + self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond)) self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr)) def test_top_k_funcs(self): diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 2134765..2144e55 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -469,9 +469,9 @@ class AggregateTestCase(TestCaseWithData): order_by('first_name', '-height').limit_by(1, 'first_name') self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') - # Test with funcs - qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n=F.count()).\ - order_by('first_name', '-height').limit_by(1, F.upper(Person.first_name)) + # Test with funcs and fields + qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\ + order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name)) self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') # Test with limit and offset, also mixing LIMIT with LIMIT BY From 127824c02630d898fc03792f166ec2257f0cfd6b Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 1 May 2020 16:28:15 +0300 Subject: [PATCH 19/41] TRIVIAL add __repr__ to Field class --- src/infi/clickhouse_orm/fields.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index bf370ea..0290e87 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -45,6 +45,9 @@ class Field(FunctionOperatorsMixin): def __str__(self): return self.name + def __repr__(self): + return '<%s>' % self.__class__.__name__ + def to_python(self, value, timezone_in_use): ''' Converts the input value into the expected Python data type, raising ValueError if the From 6dee1015932914e0e3310ca1cc2bd212baf87954 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 1 May 2020 20:11:40 +0300 Subject: [PATCH 20/41] - improve imports - documentation updates --- README.md | 13 +- docs/class_reference.md | 1195 ++++++++++++++++------ docs/expressions.md | 2 +- docs/querysets.md | 2 +- docs/toc.md | 4 +- scripts/generate_ref.py | 2 +- src/infi/clickhouse_orm/__init__.py | 12 + src/infi/clickhouse_orm/database.py | 4 + src/infi/clickhouse_orm/engines.py | 6 +- src/infi/clickhouse_orm/fields.py | 7 +- src/infi/clickhouse_orm/funcs.py | 4 + src/infi/clickhouse_orm/migrations.py | 6 +- src/infi/clickhouse_orm/models.py | 4 +- src/infi/clickhouse_orm/query.py | 4 + src/infi/clickhouse_orm/system_models.py | 4 + src/infi/clickhouse_orm/utils.py | 5 + tests/test_alias_fields.py | 1 + tests/test_database.py | 1 + tests/test_datetime_fields.py | 2 + tests/test_engines.py | 1 + tests/test_funcs.py | 17 +- tests/test_materialized_fields.py | 1 + tests/test_querysets.py | 2 +- 23 files changed, 988 insertions(+), 311 deletions(-) diff --git a/README.md b/README.md index 9820948..9958aba 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import Memory +from infi.clickhouse_orm.funcs import F class CPUStats(Model): @@ -45,13 +46,13 @@ Querying the table is easy, using either the query builder or raw SQL: ```python # Calculate what percentage of the time CPU 1 was over 95% busy -total = CPUStats.objects_in(db).filter(cpu_id=1).count() -busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count() -print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total) +total = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1).count() +busy = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() +print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)) # Calculate the average usage per CPU -for row in CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)'): - print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row) +for row in CPUStats.objects_in(db).aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): + print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)) ``` -To learn more please visit the [documentation](docs/toc.md). \ No newline at end of file +To learn more please visit the [documentation](docs/toc.md). diff --git a/docs/class_reference.md b/docs/class_reference.md index 392c9f1..b191051 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -77,6 +77,17 @@ Deletes the database on the ClickHouse server. Drops the database table of the given model class, if it exists. +#### get_model_for_table(table_name, system_table=False) + + +Generates a model class from an existing table in the database. +This can be used for querying tables which don't have a corresponding model class, +for example system tables. + +- `table_name`: the table to create a model for +- `system_table`: whether the table is a system table, or belongs to the current database + + #### insert(model_instances, batch_size=1000) @@ -958,9 +969,6 @@ Returns a copy of this queryset that excludes all rows matching the conditions. Pass `prewhere=True` to apply the conditions as PREWHERE instead of WHERE. -#### extra(**kwargs) - - #### filter(*q, **kwargs) @@ -975,12 +983,12 @@ Adds a FINAL modifier to table, meaning data will be collapsed to final version. Can be used with `CollapsingMergeTree` engine only. -#### limit_by(offset_limit, *fields) +#### limit_by(offset_limit, *fields_or_expr) Adds a LIMIT BY clause to the query. - `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). -- `fields`: the field names to use in the clause. +- `fields_or_expr`: the field names or expressions to use in the clause. #### only(*field_names) @@ -1084,9 +1092,6 @@ Returns a copy of this queryset that excludes all rows matching the conditions. Pass `prewhere=True` to apply the conditions as PREWHERE instead of WHERE. -#### extra(**kwargs) - - #### filter(*q, **kwargs) @@ -1109,12 +1114,12 @@ be names of grouping fields or calculated fields that this queryset was created with. -#### limit_by(offset_limit, *fields) +#### limit_by(offset_limit, *fields_or_expr) Adds a LIMIT BY clause to the query. - `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). -- `fields`: the field names to use in the clause. +- `fields_or_expr`: the field names or expressions to use in the clause. #### only(*field_names) @@ -1163,6 +1168,14 @@ with aggregate function calculated across all the rows. More information: https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier +### Q + +#### Q(*filter_funcs, **filter_fields) + + +#### to_sql(model_cls) + + infi.clickhouse_orm.funcs ------------------------- @@ -1237,54 +1250,132 @@ Initializer. #### acos() -#### addDays(n, timezone=None) +#### addDays(n, timezone=NO_VALUE) -#### addHours(n, timezone=None) +#### addHours(n, timezone=NO_VALUE) -#### addMinutes(n, timezone=None) +#### addMinutes(n, timezone=NO_VALUE) -#### addMonths(n, timezone=None) +#### addMonths(n, timezone=NO_VALUE) -#### addQuarters(n, timezone=None) +#### addQuarters(n, timezone=NO_VALUE) -#### addSeconds(n, timezone=None) +#### addSeconds(n, timezone=NO_VALUE) -#### addWeeks(n, timezone=None) +#### addWeeks(n, timezone=NO_VALUE) -#### addYears(n, timezone=None) +#### addYears(n, timezone=NO_VALUE) #### alphaTokens() +#### any(**kwargs) + + +#### anyHeavy(**kwargs) + + +#### anyHeavyIf(cond) + + +#### anyHeavyOrDefault() + + +#### anyHeavyOrDefaultIf(cond) + + +#### anyHeavyOrNull() + + +#### anyHeavyOrNullIf(cond) + + +#### anyIf(cond) + + +#### anyLast(**kwargs) + + +#### anyLastIf(cond) + + +#### anyLastOrDefault() + + +#### anyLastOrDefaultIf(cond) + + +#### anyLastOrNull() + + +#### anyLastOrNullIf(cond) + + +#### anyOrDefault() + + +#### anyOrDefaultIf(cond) + + +#### anyOrNull() + + +#### anyOrNullIf(cond) + + #### appendTrailingCharIfAbsent(c) +#### argMax(**kwargs) + + +#### argMaxIf(y, cond) + + +#### argMaxOrDefault(y) + + +#### argMaxOrDefaultIf(y, cond) + + +#### argMaxOrNull(y) + + +#### argMaxOrNullIf(y, cond) + + +#### argMin(**kwargs) + + +#### argMinIf(y, cond) + + +#### argMinOrDefault(y) + + +#### argMinOrDefaultIf(y, cond) + + +#### argMinOrNull(y) + + +#### argMinOrNullIf(y, cond) + + #### array() -#### arrayAll() - - #### arrayConcat() -#### arrayCount() - - -#### arrayCumSum() - - -#### arrayCumSumNonNegative() - - #### arrayDifference() @@ -1309,9 +1400,6 @@ Initializer. #### arrayEnumerateUniqRanked() -#### arrayExists() - - #### arrayIntersect() @@ -1339,21 +1427,12 @@ Initializer. #### arrayReverse() -#### arrayReverseSort() - - #### arraySlice(offset, length=None) -#### arraySort() - - #### arrayStringConcat(sep=None) -#### arraySum() - - #### arrayUniq() @@ -1363,6 +1442,24 @@ Initializer. #### atan() +#### avg(**kwargs) + + +#### avgIf(cond) + + +#### avgOrDefault() + + +#### avgOrDefaultIf(cond) + + +#### avgOrNull() + + +#### avgOrNullIf(cond) + + #### base64Decode() @@ -1462,18 +1559,93 @@ Initializer. #### cityHash64() +#### coalesce() + + #### concat() #### convertCharset(from_charset, to_charset) +#### corr(**kwargs) + + +#### corrIf(y, cond) + + +#### corrOrDefault(y) + + +#### corrOrDefaultIf(y, cond) + + +#### corrOrNull(y) + + +#### corrOrNullIf(y, cond) + + #### cos() +#### count(**kwargs) + + #### countEqual(x) +#### countIf() + + +#### countOrDefault() + + +#### countOrDefaultIf() + + +#### countOrNull() + + +#### countOrNullIf() + + +#### covarPop(**kwargs) + + +#### covarPopIf(y, cond) + + +#### covarPopOrDefault(y) + + +#### covarPopOrDefaultIf(y, cond) + + +#### covarPopOrNull(y) + + +#### covarPopOrNullIf(y, cond) + + +#### covarSamp(**kwargs) + + +#### covarSampIf(y, cond) + + +#### covarSampOrDefault(y) + + +#### covarSampOrDefaultIf(y, cond) + + +#### covarSampOrNull(y) + + +#### covarSampOrNullIf(y, cond) + + #### divide(**kwargs) @@ -1546,6 +1718,12 @@ Initializer. #### exp2() +#### extract(pattern) + + +#### extractAll(pattern) + + #### farmHash64() @@ -1567,6 +1745,9 @@ Initializer. #### greaterOrEquals(**kwargs) +#### greatest(y) + + #### halfMD5() @@ -1585,6 +1766,12 @@ Initializer. #### hiveHash() +#### ifNotFinite(y) + + +#### ifNull(y) + + #### indexOf(x) @@ -1606,16 +1793,76 @@ Initializer. #### intHash64() +#### isFinite() + + +#### isIn(others) + + +#### isInfinite() + + +#### isNaN() + + +#### isNotIn(others) + + +#### isNotNull() + + +#### isNull() + + #### javaHash() #### jumpConsistentHash(buckets) +#### kurtPop(**kwargs) + + +#### kurtPopIf(cond) + + +#### kurtPopOrDefault() + + +#### kurtPopOrDefaultIf(cond) + + +#### kurtPopOrNull() + + +#### kurtPopOrNullIf(cond) + + +#### kurtSamp(**kwargs) + + +#### kurtSampIf(cond) + + +#### kurtSampOrDefault() + + +#### kurtSampOrDefaultIf(cond) + + +#### kurtSampOrNull() + + +#### kurtSampOrNullIf(cond) + + #### lcm(b) -#### length() +#### least(y) + + +#### length(**kwargs) #### lengthUTF8() @@ -1630,6 +1877,9 @@ Initializer. #### lgamma() +#### like(pattern) + + #### log() @@ -1642,15 +1892,54 @@ Initializer. #### log2() -#### lower() +#### lower(**kwargs) #### lowerUTF8() +#### match(pattern) + + +#### max(**kwargs) + + +#### maxIf(cond) + + +#### maxOrDefault() + + +#### maxOrDefaultIf(cond) + + +#### maxOrNull() + + +#### maxOrNullIf(cond) + + #### metroHash64() +#### min(**kwargs) + + +#### minIf(cond) + + +#### minOrDefault() + + +#### minOrDefaultIf(cond) + + +#### minOrNull() + + +#### minOrNullIf(cond) + + #### minus(**kwargs) @@ -1678,22 +1967,52 @@ Initializer. #### negate() +#### ngramDistance(**kwargs) + + +#### ngramDistanceCaseInsensitive(**kwargs) + + +#### ngramDistanceCaseInsensitiveUTF8(needle) + + +#### ngramDistanceUTF8(needle) + + +#### ngramSearch(**kwargs) + + +#### ngramSearchCaseInsensitive(**kwargs) + + +#### ngramSearchCaseInsensitiveUTF8(needle) + + +#### ngramSearchUTF8(needle) + + #### notEmpty() #### notEquals(**kwargs) +#### notLike(pattern) + + #### now() -#### parseDateTimeBestEffort(timezone=None) +#### nullIf(y) -#### parseDateTimeBestEffortOrNull(timezone=None) +#### parseDateTimeBestEffort(**kwargs) -#### parseDateTimeBestEffortOrZero(timezone=None) +#### parseDateTimeBestEffortOrNull(timezone=NO_VALUE) + + +#### parseDateTimeBestEffortOrZero(timezone=NO_VALUE) #### pi() @@ -1702,12 +2021,312 @@ Initializer. #### plus(**kwargs) +#### position(**kwargs) + + +#### positionCaseInsensitive(**kwargs) + + +#### positionCaseInsensitiveUTF8(needle) + + +#### positionUTF8(needle) + + #### power(y) #### power(y) +#### quantile(**kwargs) + + +#### quantileDeterministic(**kwargs) + + +#### quantileDeterministicIf() + + +#### quantileDeterministicOrDefault() + + +#### quantileDeterministicOrDefaultIf() + + +#### quantileDeterministicOrNull() + + +#### quantileDeterministicOrNullIf() + + +#### quantileExact(**kwargs) + + +#### quantileExactIf() + + +#### quantileExactOrDefault() + + +#### quantileExactOrDefaultIf() + + +#### quantileExactOrNull() + + +#### quantileExactOrNullIf() + + +#### quantileExactWeighted(**kwargs) + + +#### quantileExactWeightedIf() + + +#### quantileExactWeightedOrDefault() + + +#### quantileExactWeightedOrDefaultIf() + + +#### quantileExactWeightedOrNull() + + +#### quantileExactWeightedOrNullIf() + + +#### quantileIf() + + +#### quantileOrDefault() + + +#### quantileOrDefaultIf() + + +#### quantileOrNull() + + +#### quantileOrNullIf() + + +#### quantileTDigest(**kwargs) + + +#### quantileTDigestIf() + + +#### quantileTDigestOrDefault() + + +#### quantileTDigestOrDefaultIf() + + +#### quantileTDigestOrNull() + + +#### quantileTDigestOrNullIf() + + +#### quantileTDigestWeighted(**kwargs) + + +#### quantileTDigestWeightedIf() + + +#### quantileTDigestWeightedOrDefault() + + +#### quantileTDigestWeightedOrDefaultIf() + + +#### quantileTDigestWeightedOrNull() + + +#### quantileTDigestWeightedOrNullIf() + + +#### quantileTiming(**kwargs) + + +#### quantileTimingIf() + + +#### quantileTimingOrDefault() + + +#### quantileTimingOrDefaultIf() + + +#### quantileTimingOrNull() + + +#### quantileTimingOrNullIf() + + +#### quantileTimingWeighted(**kwargs) + + +#### quantileTimingWeightedIf() + + +#### quantileTimingWeightedOrDefault() + + +#### quantileTimingWeightedOrDefaultIf() + + +#### quantileTimingWeightedOrNull() + + +#### quantileTimingWeightedOrNullIf() + + +#### quantiles(**kwargs) + + +#### quantilesDeterministic(**kwargs) + + +#### quantilesDeterministicIf() + + +#### quantilesDeterministicOrDefault() + + +#### quantilesDeterministicOrDefaultIf() + + +#### quantilesDeterministicOrNull() + + +#### quantilesDeterministicOrNullIf() + + +#### quantilesExact(**kwargs) + + +#### quantilesExactIf() + + +#### quantilesExactOrDefault() + + +#### quantilesExactOrDefaultIf() + + +#### quantilesExactOrNull() + + +#### quantilesExactOrNullIf() + + +#### quantilesExactWeighted(**kwargs) + + +#### quantilesExactWeightedIf() + + +#### quantilesExactWeightedOrDefault() + + +#### quantilesExactWeightedOrDefaultIf() + + +#### quantilesExactWeightedOrNull() + + +#### quantilesExactWeightedOrNullIf() + + +#### quantilesIf() + + +#### quantilesOrDefault() + + +#### quantilesOrDefaultIf() + + +#### quantilesOrNull() + + +#### quantilesOrNullIf() + + +#### quantilesTDigest(**kwargs) + + +#### quantilesTDigestIf() + + +#### quantilesTDigestOrDefault() + + +#### quantilesTDigestOrDefaultIf() + + +#### quantilesTDigestOrNull() + + +#### quantilesTDigestOrNullIf() + + +#### quantilesTDigestWeighted(**kwargs) + + +#### quantilesTDigestWeightedIf() + + +#### quantilesTDigestWeightedOrDefault() + + +#### quantilesTDigestWeightedOrDefaultIf() + + +#### quantilesTDigestWeightedOrNull() + + +#### quantilesTDigestWeightedOrNullIf() + + +#### quantilesTiming(**kwargs) + + +#### quantilesTimingIf() + + +#### quantilesTimingOrDefault() + + +#### quantilesTimingOrDefaultIf() + + +#### quantilesTimingOrNull() + + +#### quantilesTimingOrNullIf() + + +#### quantilesTimingWeighted(**kwargs) + + +#### quantilesTimingWeightedIf() + + +#### quantilesTimingWeightedOrDefault() + + +#### quantilesTimingWeightedOrDefaultIf() + + +#### quantilesTimingWeightedOrNull() + + +#### quantilesTimingWeightedOrNullIf() + + #### rand() @@ -1738,7 +2357,7 @@ Initializer. #### replaceRegexpOne(pattern, replacement) -#### reverse() +#### reverse(**kwargs) #### reverseUTF8() @@ -1768,6 +2387,42 @@ Initializer. #### sipHash64() +#### skewPop(**kwargs) + + +#### skewPopIf(cond) + + +#### skewPopOrDefault() + + +#### skewPopOrDefaultIf(cond) + + +#### skewPopOrNull() + + +#### skewPopOrNullIf(cond) + + +#### skewSamp(**kwargs) + + +#### skewSampIf(cond) + + +#### skewSampOrDefault() + + +#### skewSampOrDefaultIf(cond) + + +#### skewSampOrNull() + + +#### skewSampOrNullIf(cond) + + #### splitByChar(s) @@ -1780,34 +2435,52 @@ Initializer. #### startsWith(prefix) -#### substring(offset, length) +#### substring(**kwargs) #### substringUTF8(offset, length) -#### subtractDays(n, timezone=None) +#### subtractDays(n, timezone=NO_VALUE) -#### subtractHours(n, timezone=None) +#### subtractHours(n, timezone=NO_VALUE) -#### subtractMinutes(n, timezone=None) +#### subtractMinutes(n, timezone=NO_VALUE) -#### subtractMonths(n, timezone=None) +#### subtractMonths(n, timezone=NO_VALUE) -#### subtractQuarters(n, timezone=None) +#### subtractQuarters(n, timezone=NO_VALUE) -#### subtractSeconds(n, timezone=None) +#### subtractSeconds(n, timezone=NO_VALUE) -#### subtractWeeks(n, timezone=None) +#### subtractWeeks(n, timezone=NO_VALUE) -#### subtractYears(n, timezone=None) +#### subtractYears(n, timezone=NO_VALUE) + + +#### sum(**kwargs) + + +#### sumIf(cond) + + +#### sumOrDefault() + + +#### sumOrDefaultIf(cond) + + +#### sumOrNull() + + +#### sumOrNullIf(cond) #### tan() @@ -1822,10 +2495,22 @@ Initializer. #### timeSlots(duration) -#### toDate() +#### toDate(**kwargs) -#### toDateTime() +#### toDateOrNull() + + +#### toDateOrZero() + + +#### toDateTime(**kwargs) + + +#### toDateTimeOrNull() + + +#### toDateTimeOrZero() #### toDayOfMonth() @@ -1834,25 +2519,52 @@ Initializer. #### toDayOfWeek() -#### toDecimal128(scale) +#### toDayOfYear() -#### toDecimal32(scale) +#### toDecimal128(**kwargs) -#### toDecimal64(scale) +#### toDecimal128OrNull(scale) + + +#### toDecimal128OrZero(scale) + + +#### toDecimal32(**kwargs) + + +#### toDecimal32OrNull(scale) + + +#### toDecimal32OrZero(scale) + + +#### toDecimal64(**kwargs) + + +#### toDecimal64OrNull(scale) + + +#### toDecimal64OrZero(scale) #### toFixedString(length) -#### toFloat32() +#### toFloat32(**kwargs) + + +#### toFloat32OrNull() #### toFloat32OrZero() -#### toFloat64() +#### toFloat64(**kwargs) + + +#### toFloat64OrNull() #### toFloat64OrZero() @@ -1867,30 +2579,72 @@ Initializer. #### toIPv6() -#### toInt16() +#### toISOWeek(timezone="") + + +#### toISOYear(timezone="") + + +#### toInt16(**kwargs) + + +#### toInt16OrNull() #### toInt16OrZero() -#### toInt32() +#### toInt32(**kwargs) + + +#### toInt32OrNull() #### toInt32OrZero() -#### toInt64() +#### toInt64(**kwargs) + + +#### toInt64OrNull() #### toInt64OrZero() -#### toInt8() +#### toInt8(**kwargs) + + +#### toInt8OrNull() #### toInt8OrZero() +#### toIntervalDay() + + +#### toIntervalHour() + + +#### toIntervalMinute() + + +#### toIntervalMonth() + + +#### toIntervalQuarter() + + +#### toIntervalSecond() + + +#### toIntervalWeek() + + +#### toIntervalYear() + + #### toMinute() @@ -1900,6 +2654,9 @@ Initializer. #### toMonth() +#### toQuarter(timezone="") + + #### toRelativeDayNum(timezone="") @@ -1936,6 +2693,9 @@ Initializer. #### toStartOfHour() +#### toStartOfISOYear() + + #### toStartOfMinute() @@ -1945,6 +2705,12 @@ Initializer. #### toStartOfQuarter() +#### toStartOfTenMinutes() + + +#### toStartOfWeek(mode=0) + + #### toStartOfYear() @@ -1957,25 +2723,40 @@ Initializer. #### toTime(timezone="") -#### toUInt16() +#### toTimeZone(timezone) + + +#### toUInt16(**kwargs) + + +#### toUInt16OrNull() #### toUInt16OrZero() -#### toUInt32() +#### toUInt32(**kwargs) + + +#### toUInt32OrNull() #### toUInt32OrZero() -#### toUInt64() +#### toUInt64(**kwargs) + + +#### toUInt64OrNull() #### toUInt64OrZero() -#### toUInt8() +#### toUInt8(**kwargs) + + +#### toUInt8OrNull() #### toUInt8OrZero() @@ -1984,6 +2765,21 @@ Initializer. #### toUUID() +#### toUnixTimestamp(timezone="") + + +#### toWeek(mode=0, timezone="") + + +#### toYYYYMM(timezone="") + + +#### toYYYYMMDD(timezone="") + + +#### toYYYYMMDDhhmmss(timezone="") + + #### toYear() @@ -2000,6 +2796,42 @@ For other functions: #### today() +#### topK(**kwargs) + + +#### topKIf() + + +#### topKOrDefault() + + +#### topKOrDefaultIf() + + +#### topKOrNull() + + +#### topKOrNullIf() + + +#### topKWeighted(**kwargs) + + +#### topKWeightedIf() + + +#### topKWeightedOrDefault() + + +#### topKWeightedOrDefaultIf() + + +#### topKWeightedOrNull() + + +#### topKWeightedOrNullIf() + + #### trimBoth() @@ -2015,224 +2847,9 @@ For other functions: #### unhex() -#### upper() +#### uniq(**kwargs) -#### upperUTF8() - - -#### xxHash32() - - -#### xxHash64() - - -#### yesterday() - - -infi.clickhouse_orm.system_models ---------------------------------- - -### SystemPart - -Extends Model - - -Contains information about parts of a table in the MergeTree family. -This model operates only fields, described in the reference. Other fields are ignored. -https://clickhouse.yandex/docs/en/system_tables/system.parts/ - -#### SystemPart(**kwargs) - - -Creates a model instance, using keyword arguments as field values. -Since values are immediately converted to their Pythonic type, -invalid values will cause a `ValueError` to be raised. -Unrecognized field names will cause an `AttributeError`. - - -#### attach(settings=None) - - - Add a new part or partition from the 'detached' directory to the table. - -- `settings`: Settings for executing request to ClickHouse over db.raw() method - -Returns: SQL Query - - -#### SystemPart.create_table_sql(db) - - -Returns the SQL command for creating a table for this model. - - -#### detach(settings=None) - - -Move a partition to the 'detached' directory and forget it. - -- `settings`: Settings for executing request to ClickHouse over db.raw() method - -Returns: SQL Query - - -#### drop(settings=None) - - -Delete a partition - -- `settings`: Settings for executing request to ClickHouse over db.raw() method - -Returns: SQL Query - - -#### SystemPart.drop_table_sql(db) - - -Returns the SQL command for deleting this model's table. - - -#### fetch(zookeeper_path, settings=None) - - -Download a partition from another server. - -- `zookeeper_path`: Path in zookeeper to fetch from -- `settings`: Settings for executing request to ClickHouse over db.raw() method - -Returns: SQL Query - - -#### SystemPart.fields(writable=False) - - -Returns an `OrderedDict` of the model's fields (from name to `Field` instance). -If `writable` is true, only writable fields are included. -Callers should not modify the dictionary. - - -#### freeze(settings=None) - - -Create a backup of a partition. - -- `settings`: Settings for executing request to ClickHouse over db.raw() method - -Returns: SQL Query - - -#### SystemPart.from_tsv(line, field_names, timezone_in_use=UTC, database=None) - - -Create a model instance from a tab-separated line. The line may or may not include a newline. -The `field_names` list must match the fields defined in the model, but does not have to include all of them. - -- `line`: the TSV-formatted data. -- `field_names`: names of the model fields in the data. -- `timezone_in_use`: the timezone to use when parsing dates and datetimes. -- `database`: if given, sets the database that this instance belongs to. - - -#### SystemPart.get(database, conditions="") - - -Get all data from system.parts table - -- `database`: A database object to fetch data from. -- `conditions`: WHERE clause conditions. Database condition is added automatically - -Returns: A list of SystemPart objects - - -#### SystemPart.get_active(database, conditions="") - - -Gets active data from system.parts table - -- `database`: A database object to fetch data from. -- `conditions`: WHERE clause conditions. Database and active conditions are added automatically - -Returns: A list of SystemPart objects - - -#### get_database() - - -Gets the `Database` that this model instance belongs to. -Returns `None` unless the instance was read from the database or written to it. - - -#### get_field(name) - - -Gets a `Field` instance given its name, or `None` if not found. - - -#### SystemPart.has_funcs_as_defaults() - - -Return True if some of the model's fields use a function expression -as a default value. This requires special handling when inserting instances. - - -#### SystemPart.is_read_only() - - -Returns true if the model is marked as read only. - - -#### SystemPart.is_system_model() - - -Returns true if the model represents a system table. - - -#### SystemPart.objects_in(database) - - -Returns a `QuerySet` for selecting instances of this model class. - - -#### set_database(db) - - -Sets the `Database` that this model instance belongs to. -This is done automatically when the instance is read from the database or written to it. - - -#### SystemPart.table_name() - - -#### to_db_string() - - -Returns the instance as a bytestring ready to be inserted into the database. - - -#### to_dict(include_readonly=True, field_names=None) - - -Returns the instance's column values as a dict. - -- `include_readonly`: if false, returns only fields that can be inserted into database. -- `field_names`: an iterable of field names to return (optional) - - -#### to_tskv(include_readonly=True) - - -Returns the instance's column keys and values as a tab-separated line. A newline is not included. -Fields that were not assigned a value are omitted. - -- `include_readonly`: if false, returns only fields that can be inserted into database. - - -#### to_tsv(include_readonly=True) - - -Returns the instance's column values as a tab-separated line. A newline is not included. - -- `include_readonly`: if false, returns only fields that can be inserted into database. +#### uniqExact(**kwargs) diff --git a/docs/expressions.md b/docs/expressions.md index dda04e7..bda4c84 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -93,4 +93,4 @@ expr = F("someFunctionName", arg1, arg2, ...) Note that higher-order database functions (those that use lambda expressions) are not supported. --- -[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) \ No newline at end of file +[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md) diff --git a/docs/querysets.md b/docs/querysets.md index d5dfb25..7c09f97 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -225,4 +225,4 @@ values aggregated for all rows suitable for filters. --- -[<< Expressions](expressions.md) | [Table of Contents](toc.md) | [Field Options >>](field_options.md) \ No newline at end of file +[<< Importing ORM Classes](importing_orm_classes.md) | [Table of Contents](toc.md) | [Field Options >>](field_options.md) diff --git a/docs/toc.md b/docs/toc.md index 6a9f29a..8b98153 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -48,6 +48,7 @@ * [Simple Engines](table_engines.md#simple-engines) * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) * [Custom partitioning](table_engines.md#custom-partitioning) + * [Primary key](table_engines.md#primary-key) * [Data Replication](table_engines.md#data-replication) * [Buffer Engine](table_engines.md#buffer-engine) * [Merge Engine](table_engines.md#merge-engine) @@ -117,8 +118,7 @@ * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) * [QuerySet](class_reference.md#queryset) * [AggregateQuerySet](class_reference.md#aggregatequeryset) + * [Q](class_reference.md#q) * [infi.clickhouse_orm.funcs](class_reference.md#infi.clickhouse_orm.funcs) * [F](class_reference.md#f) - * [infi.clickhouse_orm.system_models](class_reference.md#infi.clickhouse_orm.system_models) - * [SystemPart](class_reference.md#systempart) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index bb9df4e..6e537ec 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -135,6 +135,6 @@ if __name__ == '__main__': module_doc([models.Model, models.BufferModel, models.DistributedModel]) module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) module_doc([engines.Engine] + all_subclasses(engines.Engine), False) - module_doc([query.QuerySet, query.AggregateQuerySet]) + module_doc([query.QuerySet, query.AggregateQuerySet, query.Q]) module_doc([funcs.F]) module_doc([system_models.SystemPart]) diff --git a/src/infi/clickhouse_orm/__init__.py b/src/infi/clickhouse_orm/__init__.py index 5284146..c7982cc 100644 --- a/src/infi/clickhouse_orm/__init__.py +++ b/src/infi/clickhouse_orm/__init__.py @@ -1 +1,13 @@ __import__("pkg_resources").declare_namespace(__name__) + +from infi.clickhouse_orm.database import * +from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.funcs import * +from infi.clickhouse_orm.migrations import * +from infi.clickhouse_orm.models import * +from infi.clickhouse_orm.query import * +from infi.clickhouse_orm.system_models import * + +from inspect import isclass +__all__ = [c.__name__ for c in locals().values() if isclass(c)] diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 6679bc9..54f153f 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -411,3 +411,7 @@ class Database(object): def _is_connection_readonly(self): r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'") return r.text.strip() != '0' + + +# Expose only relevant classes in import * +__all__ = [c.__name__ for c in [Page, DatabaseException, ServerError, Database]] diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index aa8697d..b8a3e36 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import logging -from .utils import comma_join +from .utils import comma_join, get_subclass_names logger = logging.getLogger('clickhouse_orm') @@ -262,3 +262,7 @@ class Distributed(Engine): if self.sharding_key: params.append(self.sharding_key) return params + + +# Expose only relevant classes in import * +__all__ = get_subclass_names(locals(), Engine) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 267fad1..601b8ad 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -6,7 +6,7 @@ from calendar import timegm from decimal import Decimal, localcontext from uuid import UUID from logging import getLogger -from .utils import escape, parse_array, comma_join, string_or_func +from .utils import escape, parse_array, comma_join, string_or_func, get_subclass_names from .funcs import F, FunctionOperatorsMixin from ipaddress import IPv4Address, IPv6Address @@ -598,3 +598,8 @@ class LowCardinalityField(Field): if with_default_expression: sql += self._extra_params(db) return sql + + +# Expose only relevant classes in import * +__all__ = get_subclass_names(locals(), Field) + diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 8af621f..4391139 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -1812,3 +1812,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def greatest(x, y): return F('greatest', x, y) + +# Expose only relevant classes in import * +__all__ = ['F'] + diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index 3d8e146..cf93d9a 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -1,7 +1,7 @@ from .models import Model, BufferModel from .fields import DateField, StringField from .engines import MergeTree -from .utils import escape +from .utils import escape, get_subclass_names import logging logger = logging.getLogger('migrations') @@ -177,3 +177,7 @@ class MigrationHistory(Model): @classmethod def table_name(cls): return 'infi_clickhouse_orm_migrations' + + +# Expose only relevant classes in import * +__all__ = get_subclass_names(locals(), Operation) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index a70ad38..21db1ad 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -7,7 +7,7 @@ from six import reraise import pytz from .fields import Field, StringField -from .utils import parse_tsv, NO_VALUE +from .utils import parse_tsv, NO_VALUE, get_subclass_names from .query import QuerySet from .funcs import F from .engines import Merge, Distributed @@ -459,3 +459,5 @@ class DistributedModel(Model): return '\n'.join(parts) +# Expose only relevant classes in import * +__all__ = get_subclass_names(locals(), Model) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index d768fab..c6868ba 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -646,3 +646,7 @@ class AggregateQuerySet(QuerySet): qs = copy(self) qs._grouping_with_totals = True return qs + + +# Expose only relevant classes in import * +__all__ = [c.__name__ for c in [Q, QuerySet, AggregateQuerySet]] diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index bcb3217..c66592e 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -158,3 +158,7 @@ class SystemPart(Model): conditions += ' AND ' conditions += 'active' return SystemPart.get(database, conditions=conditions) + + +# Expose only relevant classes in import * +__all__ = [c.__name__ for c in [SystemPart]] diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 2f60a40..2e29b95 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -114,6 +114,11 @@ def is_iterable(obj): return False +def get_subclass_names(locals, base_class): + from inspect import isclass + return [c.__name__ for c in locals.values() if isclass(c) and issubclass(c, base_class)] + + class NoValue: ''' A sentinel for fields with an expression for a default value, diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index a190d02..1df20de 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -6,6 +6,7 @@ from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.funcs import F class AliasFieldsTest(unittest.TestCase): diff --git a/tests/test_database.py b/tests/test_database.py index 48688b0..30ab1c3 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -7,6 +7,7 @@ from infi.clickhouse_orm.database import ServerError, DatabaseException from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.engines import Memory from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.funcs import F from .base_test_with_data import * diff --git a/tests/test_datetime_fields.py b/tests/test_datetime_fields.py index abb8c47..3387ee9 100644 --- a/tests/test_datetime_fields.py +++ b/tests/test_datetime_fields.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals import unittest +import datetime +import pytz from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.models import Model diff --git a/tests/test_engines.py b/tests/test_engines.py index 2d874a9..84e46d8 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals import unittest +import datetime from infi.clickhouse_orm.system_models import SystemPart from infi.clickhouse_orm.database import Database, DatabaseException, ServerError diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 4b15f48..77dfec9 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -2,9 +2,14 @@ import unittest from .base_test_with_data import * from .test_querysets import SampleModel from datetime import date, datetime, tzinfo, timedelta +import pytz from ipaddress import IPv4Address, IPv6Address +import logging +from decimal import Decimal + from infi.clickhouse_orm.database import ServerError from infi.clickhouse_orm.utils import NO_VALUE +from infi.clickhouse_orm.funcs import F class FuncsTestCase(TestCaseWithData): @@ -14,28 +19,28 @@ class FuncsTestCase(TestCaseWithData): self.database.insert(self._sample_data()) def _test_qs(self, qs, expected_count): - logger.info(qs.as_sql()) + logging.info(qs.as_sql()) count = 0 for instance in qs: count += 1 - logger.info('\t[%d]\t%s' % (count, instance.to_dict())) + logging.info('\t[%d]\t%s' % (count, instance.to_dict())) self.assertEqual(count, expected_count) self.assertEqual(qs.count(), expected_count) def _test_func(self, func, expected_value=NO_VALUE): sql = 'SELECT %s AS value' % func.to_sql() - logger.info(sql) + logging.info(sql) result = list(self.database.select(sql)) - logger.info('\t==> %s', result[0].value if result else '') + logging.info('\t==> %s', result[0].value if result else '') if expected_value != NO_VALUE: self.assertEqual(result[0].value, expected_value) return result[0].value if result else None def _test_aggr(self, func, expected_value=NO_VALUE): qs = Person.objects_in(self.database).aggregate(value=func) - logger.info(qs.as_sql()) + logging.info(qs.as_sql()) result = list(qs) - logger.info('\t==> %s', result[0].value if result else '') + logging.info('\t==> %s', result[0].value if result else '') if expected_value != NO_VALUE: self.assertEqual(result[0].value, expected_value) return result[0].value if result else None diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py index 4a8d62f..af469cd 100644 --- a/tests/test_materialized_fields.py +++ b/tests/test_materialized_fields.py @@ -6,6 +6,7 @@ from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.models import Model, NO_VALUE from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.funcs import F class MaterializedFieldsTest(unittest.TestCase): diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 2144e55..9b1fa53 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals, print_function import unittest - from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.query import Q from infi.clickhouse_orm.funcs import F from .base_test_with_data import * from datetime import date, datetime from enum import Enum +from decimal import Decimal from logging import getLogger logger = getLogger('tests') From 00bf7eeb755f9df950a149908eade25637ad09e8 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 16 May 2020 09:24:31 +0300 Subject: [PATCH 21/41] count() and paginate() - accept conditions as expressions and Q objects --- src/infi/clickhouse_orm/database.py | 10 ++++++++-- tests/test_database.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 54f153f..d42e224 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -249,9 +249,12 @@ class Database(object): - `model_class`: the model to count. - `conditions`: optional SQL conditions (contents of the WHERE clause). ''' + from infi.clickhouse_orm.query import Q query = 'SELECT count() FROM $table' if conditions: - query += ' WHERE ' + conditions + if isinstance(conditions, Q): + conditions = conditions.to_sql(model_class) + query += ' WHERE ' + str(conditions) query = self._substitute(query, model_class) r = self._send(query) return int(r.text) if r.text else 0 @@ -303,6 +306,7 @@ class Database(object): The result is a namedtuple containing `objects` (list), `number_of_objects`, `pages_total`, `number` (of the current page), and `page_size`. ''' + from infi.clickhouse_orm.query import Q count = self.count(model_class, conditions) pages_total = int(ceil(count / float(page_size))) if page_num == -1: @@ -312,7 +316,9 @@ class Database(object): offset = (page_num - 1) * page_size query = 'SELECT * FROM $table' if conditions: - query += ' WHERE ' + conditions + if isinstance(conditions, Q): + conditions = conditions.to_sql(model_class) + query += ' WHERE ' + str(conditions) query += ' ORDER BY %s' % order_by query += ' LIMIT %d, %d' % (offset, page_size) query = self._substitute(query, model_class) diff --git a/tests/test_database.py b/tests/test_database.py index 30ab1c3..15d928f 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -8,6 +8,7 @@ from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.engines import Memory from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.funcs import F +from infi.clickhouse_orm.query import Q from .base_test_with_data import * @@ -47,9 +48,14 @@ class DatabaseTestCase(TestCaseWithData): def test_count(self): self.database.insert(self._sample_data()) self.assertEqual(self.database.count(Person), 100) + # Conditions as string self.assertEqual(self.database.count(Person, "first_name = 'Courtney'"), 2) self.assertEqual(self.database.count(Person, "birthday > '2000-01-01'"), 22) self.assertEqual(self.database.count(Person, "birthday < '1970-03-01'"), 0) + # Conditions as expression + self.assertEqual(self.database.count(Person, Person.birthday > datetime.date(2000, 1, 1)), 22) + # Conditions as Q object + self.assertEqual(self.database.count(Person, Q(birthday__gt=datetime.date(2000, 1, 1))), 22) def test_select(self): self._insert_and_check(self._sample_data(), len(data)) @@ -146,8 +152,15 @@ class DatabaseTestCase(TestCaseWithData): def test_pagination_with_conditions(self): self._insert_and_check(self._sample_data(), len(data)) + # Conditions as string page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'") self.assertEqual(page.number_of_objects, 10) + # Conditions as expression + page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Person.first_name < 'Ava') + self.assertEqual(page.number_of_objects, 10) + # Conditions as Q object + page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Q(first_name__lt='Ava')) + self.assertEqual(page.number_of_objects, 10) def test_special_chars(self): s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r' @@ -263,3 +276,4 @@ class DatabaseTestCase(TestCaseWithData): self.assertEqual(model.table_name(), row.name) # Read a few records list(model.objects_in(self.database)[:10]) + From 613e594fa949e2182e04daf10235168216bf2bf7 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 16 May 2020 09:25:05 +0300 Subject: [PATCH 22/41] docs --- docs/importing_orm_classes.md | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 docs/importing_orm_classes.md diff --git a/docs/importing_orm_classes.md b/docs/importing_orm_classes.md new file mode 100644 index 0000000..0f1cd70 --- /dev/null +++ b/docs/importing_orm_classes.md @@ -0,0 +1,89 @@ + +Importing ORM Classes +===================== + +The ORM supports different styles of importing and referring to its classes, so choose what works for you from the options below. + +Importing Everything +-------------------- + +It is safe to use `import *` from `infi.clickhouse_orm` or its submodules. Only classes that are needed by users of the ORM will get imported, and nothing else: +```python +from infi.clickhouse_orm import * +``` +This is exactly equivalent to the following import statements: +```python +from infi.clickhouse_orm.database import * +from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.funcs import * +from infi.clickhouse_orm.migrations import * +from infi.clickhouse_orm.models import * +from infi.clickhouse_orm.query import * +from infi.clickhouse_orm.system_models import * +``` +By importing everything, all of the ORM's public classes can be used directly. For example: +```python +from infi.clickhouse_orm import * + +class Event(Model): + + name = StringField(default="EVENT") + repeated = UInt32Field(default=1) + created = DateTimeField(default=F.now()) + + engine = Memory() +``` + +Importing Everything into a Namespace +------------------------------------- + +To prevent potential name clashes and to make the code more readable, you can import the ORM's classes into a namespace of your choosing, e.g. `orm`. For brevity, it is recommended to import the `F` class explicitly: +```python +import infi.clickhouse_orm as orm +from infi.clickhouse_orm.funcs import F + +class Event(orm.Model): + + name = orm.StringField(default="EVENT") + repeated = orm.UInt32Field(default=1) + created = orm.DateTimeField(default=F.now()) + + engine = orm.Memory() +``` + +Importing Specific Submodules +----------------------------- + +It is possible to import only the submodules you need, and use their names to qualify the ORM's class names. This option is more verbose, but makes it clear where each class comes from. For example: +```python +from infi.clickhouse_orm import models, fields, engines, F + +class Event(models.Model): + + name = fields.StringField(default="EVENT") + repeated = fields.UInt32Field(default=1) + created = fields.DateTimeField(default=F.now()) + + engine = engines.Memory() +``` + +Importing Specific Classes +-------------------------- + +If you prefer, you can import only the specific ORM classes that you need directly from `infi.clickhouse_orm`: +```python +from infi.clickhouse_orm import Model, StringField, UInt32Field, DateTimeField, F, Memory + +class Event(Model): + + name = StringField(default="EVENT") + repeated = UInt32Field(default=1) + created = DateTimeField(default=F.now()) + + engine = Memory() +``` + +--- + +[<< Expressions](expressions.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) From 9697157d6bac1eb8571884d75eefc0dfc18e7deb Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 16 May 2020 12:15:01 +0300 Subject: [PATCH 23/41] allow expressions in MergeTree params --- src/infi/clickhouse_orm/engines.py | 9 +++++---- src/infi/clickhouse_orm/funcs.py | 2 +- src/infi/clickhouse_orm/utils.py | 7 +++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index b8a3e36..a361b63 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -77,11 +77,12 @@ class MergeTree(Engine): # https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ # Let's check version and use new syntax if available if db.server_version >= (1, 1, 54310): - partition_sql = "PARTITION BY %s ORDER BY %s" \ - % ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by)) + partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \ + % (comma_join(self.partition_key, stringify=True), + comma_join(self.order_by, stringify=True)) if self.primary_key: - partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key) + partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key, stringify=True) if self.sampling_expr: partition_sql += " SAMPLE BY %s" % self.sampling_expr @@ -113,7 +114,7 @@ class MergeTree(Engine): params.append(self.date_col) if self.sampling_expr: params.append(self.sampling_expr) - params.append('(%s)' % comma_join(self.order_by)) + params.append('(%s)' % comma_join(self.order_by, stringify=True)) params.append(str(self.index_granularity)) return params diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 4391139..ba05234 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -68,7 +68,7 @@ def parametric(func): def inner(*args, **kwargs): f = func(*args, **kwargs) # Append the parameter to the function name - parameters_str = comma_join([str(p) for p in parameters]) + parameters_str = comma_join(parameters, stringify=True) f.name = '%s(%s)' % (f.name, parameters_str) return f return inner diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 2e29b95..a487bfe 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -96,11 +96,14 @@ def import_submodules(package_name): } -def comma_join(items): +def comma_join(items, stringify=False): """ Joins an iterable of strings with commas. """ - return ', '.join(items) + if stringify: + return ', '.join(str(item) for item in items) + else: + return ', '.join(items) def is_iterable(obj): From 9e119f33e63ba9c2555b1e4e34e2f119e68ec3e3 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 16 May 2020 12:15:14 +0300 Subject: [PATCH 24/41] docs --- CHANGELOG.md | 11 +++++++++++ docs/expressions.md | 8 +++++--- docs/importing_orm_classes.md | 2 +- docs/models_and_databases.md | 2 +- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb498d2..273617e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ Change Log ========== +v2.0.0 +------ +- Dropped support for Python 2.x +- New flexible syntax for database expressions and functions +- Expressions as default values for model fields +- Support for IPv4 and IPv6 fields +- Automatic generation of models by inspecting existing tables +- Convenient ways to import ORM classes + +See [What's new in version 2](docs/whats_new_in_version_2.md) for details. + v1.4.0 ------ - Added primary_key parameter to MergeTree engines (M1hacka) diff --git a/docs/expressions.md b/docs/expressions.md index bda4c84..89eeb8c 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -13,7 +13,7 @@ Using Expressions Expressions usually include ClickHouse database functions, which are made available by the `F` class. Here's a simple function: ```python -from infi.clickhouse_orm.models import F +from infi.clickhouse_orm import F expr = F.today() ``` @@ -22,9 +22,9 @@ Functions that accept arguments can be composed, just like when using SQL: expr = F.toDayOfWeek(F.today()) ``` -You can see the SQL expression that is represented by an ORM expression by calling its `to_sql` or `repr` methods: +You can see the SQL expression that is represented by an ORM expression by calling its `to_sql` method or converting it to a string: ```python ->>> print(expr.to_sql()) +>>> print(expr) toDayOfWeek(today()) ``` @@ -42,6 +42,8 @@ There is also support for comparison operators (`<`, `<=`, `==`, `>=`, `>`, `!=` (F.toDayOfWeek(F.today()) == 6) & (F.toDayOfMonth(F.today()) == 13) ``` +Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect. + ### Referring to model fields To refer to a model field inside an expression, use `.` syntax, for example: diff --git a/docs/importing_orm_classes.md b/docs/importing_orm_classes.md index 0f1cd70..77d04e4 100644 --- a/docs/importing_orm_classes.md +++ b/docs/importing_orm_classes.md @@ -41,7 +41,7 @@ Importing Everything into a Namespace To prevent potential name clashes and to make the code more readable, you can import the ORM's classes into a namespace of your choosing, e.g. `orm`. For brevity, it is recommended to import the `F` class explicitly: ```python import infi.clickhouse_orm as orm -from infi.clickhouse_orm.funcs import F +from infi.clickhouse_orm import F class Event(orm.Model): diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index 74ded9f..cf75081 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -152,7 +152,7 @@ When running a query, specifying a model class is not required. In case you do n This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax. -It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particulary useful for querying system tables, for example: +It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particularly useful for querying system tables, for example: QueryLog = db.get_model_for_table('query_log', system_table=True) for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000): From f084b6e95fe3cd6dc5136028db86f86cf6cc0bae Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 18:07:45 +0300 Subject: [PATCH 25/41] docs --- docs/querysets.md | 134 +++++++++++++++++++------------ docs/whats_new_in_version_2.md | 58 ++++++++++++++ tests/test_querysets.py | 139 ++------------------------------- 3 files changed, 147 insertions(+), 184 deletions(-) create mode 100644 docs/whats_new_in_version_2.md diff --git a/docs/querysets.md b/docs/querysets.md index 7c09f97..82d093d 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -1,3 +1,4 @@ + Querysets ========= @@ -16,31 +17,76 @@ Filtering The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example: >>> qs = Person.objects_in(database) - >>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01') + >>> qs = qs.filter(F.like(Person.first_name, 'V%')).exclude(Person.birthday < '2000-01-01') >>> qs.conditions_as_sql() - u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')" + "first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')" -It is possible to specify several fields to filter or exclude by: +It is possible to specify several expressions to filter or exclude by, and they will be ANDed together: - >>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75) + >>> qs = Person.objects_in(database).filter(Person.last_name == 'Smith', Person.height > 1.75) >>> qs.conditions_as_sql() - u"last_name = 'Smith' AND height > 1.75" + "last_name = 'Smith' AND height > 1.75" -For filters with compound conditions you can use `Q` objects inside `filter` with overloaded operators `&` (AND), `|` (OR) and `~` (NOT): - - >>> qs = Person.objects_in(database).filter((Q(first_name='Ciaran', last_name='Carver') | Q(height_lte=1.8)) & ~Q(first_name='David')) - >>> qs.conditions_as_sql() - u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))" - -By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. -For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter +For compound conditions you can use the overloaded operators `&` (AND), `|` (OR) and `~` (NOT): >>> qs = Person.objects_in(database) - >>> qs = qs.filter(first_name__startswith='V', prewhere=True) - >>> qs.conditions_as_sql(prewhere=True) - u"first_name LIKE 'V%'" + >>> qs = qs.filter(((Person.first_name == 'Ciaran') & (Person.last_name == 'Carver')) | (Person.height <= 1.8) & ~(Person.first_name = 'David')) + >>> qs.conditions_as_sql() + "((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))" -There are different operators that can be used, by passing `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators. +Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect. + +### Using `IN` and `NOT IN` + +Filtering queries using ClickHouse's `IN` and `NOT IN` operators requires using the `isIn` and `isNotIn` functions (trying to use Python's `in` keyword will not work!). +For example: +```python +# Is it Monday, Tuesday or Wednesday? +F.isIn(F.toDayOfWeek(F.now()), [1, 2, 3]) +# This will not work: +F.toDayOfWeek(F.now()) in [1, 2, 3] +``` + +In case of model fields, there is a simplified syntax: +```python +# Filtering using F.isIn: +qs.filter(F.isIn(Person.first_name, ['Robert', 'Rob', 'Robbie'])) +# Simpler syntax using isIn directly on the field: +qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie'])) +``` + +The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names: +```python +# A list of values +qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"])) +# A queryset +subquery = IrishLastName.objects_in(database).only("name") +qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery)) +``` + +### Specifying PREWHERE conditions + +By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. +For better aggregation performance you can add them to `PREWHERE` section by adding a `prewhere=True` parameter: + + >>> qs = Person.objects_in(database) + >>> qs = qs.filter(F.like(Person.first_name, 'V%'), prewhere=True) + >>> qs.conditions_as_sql(prewhere=True) + "first_name LIKE 'V%'" + +### Old-style filter conditions + +Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still supported, so you can use it alongside or even intermixed with new-style functions and expressions. + +The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example: +```python +qs = Position.objects.in(database) +# New style +qs = qs.filter(Position.x > 100, Position.y < 20, Position.terrain == 'water') +# Old style +qs = qs.filter(x__gt=100, y__lt=20, terrain='water') +``` +Below are all the supported operators. | Operator | Equivalent SQL | Comments | | -------- | -------------------------------------------- | ---------------------------------- | @@ -51,8 +97,8 @@ There are different operators that can be used, by passing `____>> Person.objects_in(database).count() 100 @@ -144,8 +169,7 @@ It is possible to get a specific item from the queryset by index: qs = Person.objects_in(database).order_by('last_name', 'first_name') first = qs[0] -It is also possible to get a range a instances using a slice. This returns a queryset, -that you can either iterate over or convert to a list. +It is also possible to get a range a instances using a slice. This returns a queryset, that you can either iterate over or convert to a list. qs = Person.objects_in(database).order_by('last_name', 'first_name') first_ten_people = list(qs[:10]) @@ -153,7 +177,7 @@ that you can either iterate over or convert to a list. You should use `order_by` to ensure a consistent ordering of the results. -Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`. +Trying to use negative indexes or a slice with a step (e.g. [0 : 100 : 2]) is not supported and will raise an `AssertionError`. Pagination ---------- @@ -184,7 +208,7 @@ Aggregation It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset: - >>> qs = Person.objects_in(database).aggregate(average_height='avg(height)') + >>> qs = Person.objects_in(database).aggregate(average_height=F.avg(Person.height)) >>> print(qs.count()) 1 >>> for row in qs: print(row.average_height) @@ -192,29 +216,35 @@ It is possible to use aggregation functions over querysets using the `aggregate` The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`. -You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this: +You can pass fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this: - qs = Person.objects_in(database).aggregate('last_name', num='count()') + qs = Person.objects_in(database).aggregate(Person.last_name, num=F.count()) The underlying SQL query would be something like this: - SELECT last_name, count() AS num FROM person GROUP BY last_name + SELECT last_name, count() AS num + FROM person + GROUP BY last_name If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday: - qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday') + qs = Person.objects_in(database).aggregate(weekday=F.toDayOfWeek(Person.birthday), num=F.count()).group_by('weekday') This queryset is translated to: - SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday + SELECT toDayOfWeek(birthday) AS weekday, count() AS num + FROM person + GROUP BY weekday -After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model. +After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the aggregated queryset on calculated fields, only on fields that exist in the model. + +### Adding totals If you limit aggregation results, it might be useful to get total aggregation values for all rows. To achieve this, you can use `with_totals` method. It will return extra row (last) with values aggregated for all rows suitable for filters. - qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3] + qs = Person.objects_in(database).aggregate(Person.first_name, num=F.count()).with_totals().order_by('-count')[:3] >>> print(qs.count()) 4 >>> for row in qs: diff --git a/docs/whats_new_in_version_2.md b/docs/whats_new_in_version_2.md new file mode 100644 index 0000000..378adca --- /dev/null +++ b/docs/whats_new_in_version_2.md @@ -0,0 +1,58 @@ +What's New in Version 2 +======================= + +## Python 3.5+ Only + +This version of the ORM no longer support Python 2. + +## New flexible syntax for database expressions and functions + +Expressions that use model fields, database functions and Python operators are now first-class citizens of the ORM. They provide infinite expressivity and flexibility when defining models and generating queries. + +Example of expressions in model definition: +```python +class Temperature(Model): + + station_id = UInt16Field() + timestamp = DateTimeField(default=F.now()) # function as default value + degrees_celsius = Float32Field() + degrees_fahrenheit = Float32Field(alias=degrees_celsius * 1.8 + 32) # expression as field alias + + # expressions in engine definition + engine = MergeTree(partition_key=[F.toYYYYMM(timestamp)], order_by=[station_id, timestamp]) +``` + +Example of expressions in queries: +```python +db = Database('default') +start = F.toStartOfMonth(F.now()) +expr = (Temperature.timestamp > start) & (Temperature.station_id == 123) & (Temperature.degrees_celsius > 30) +for t in Temperature.objects_in(db).filter(expr): + print(t.timestamp, t.degrees_celsius) +``` + +See [Expressions](expressions.md). + +## Support for IPv4 and IPv6 fields + +Two new fields classes were added: `IPv4Field` and `IPv6Field`. Their values are represented by Python's `ipaddress.IPv4Address` and `ipaddress.IPv6Address`. + +See [Field Types](field_types.md). + +## Automatic generation of models by inspecting existing tables + +It is now easy to generate a model class on the fly for an existing table in the database using `Database.get_model_for_table`. This is particularly useful for querying system tables, for example: +```python +QueryLog = db.get_model_for_table('query_log', system_table=True) +for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000): + print(row.query) +``` + +## Convenient ways to import ORM classes + +You can now import all ORM classes directly from `infi.clickhouse_orm`, without worrying about sub-modules. For example: +```python +from infi.clickhouse_orm import Database, Model, StringField, DateTimeField, MergeTree +``` +See [Importing ORM Classes](importing_orm_classes.md). + diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 9b1fa53..6409ffa 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -322,6 +322,13 @@ class AggregateTestCase(TestCaseWithData): for row in qs: self.assertAlmostEqual(row.average_height, 1.6923, places=4) self.assertEqual(row.count, 100) + # With functions + qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count()) + print(qs.as_sql()) + self.assertEqual(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.6923, places=4) + self.assertEqual(row.count, 100) def test_aggregate_with_filter(self): # When filter comes before aggregate @@ -484,138 +491,6 @@ class AggregateTestCase(TestCaseWithData): self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver']) - - -class FuncsTestCase(TestCaseWithData): - - def setUp(self): - super(FuncsTestCase, self).setUp() - self.database.insert(self._sample_data()) - - def _test_qs(self, qs, expected_count): - logger.info(qs.as_sql()) - count = 0 - for instance in qs: - count += 1 - logger.info('\t[%d]\t%s' % (count, instance.to_dict())) - self.assertEqual(count, expected_count) - self.assertEqual(qs.count(), expected_count) - - def _test_func(self, func, expected_value=None): - sql = 'SELECT %s AS value' % func.to_sql() - logger.info(sql) - result = list(self.database.select(sql)) - logger.info('\t==> %s', result[0].value) - if expected_value is not None: - self.assertEqual(result[0].value, expected_value) - - def test_func_to_sql(self): - # No args - self.assertEqual(F('func').to_sql(), 'func()') - # String args - self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')") - # Numeric args - self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)") - # Date args - self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))") - # Datetime args - self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))") - # Boolean args - self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)") - # Null args - self.assertEqual(F('func', None).to_sql(), "func(NULL)") - # Fields as args - self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)") - # Funcs as args - self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') - # Iterables as args - x = [1, 'z', F('foo', 17)] - for y in [x, tuple(x), iter(x)]: - self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") - self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") - - def test_filter_float_field(self): - qs = Person.objects_in(self.database) - # Height > 2 - self._test_qs(qs.filter(F.greater(Person.height, 2)), 0) - self._test_qs(qs.filter(Person.height > 2), 0) - # Height > 1.61 - self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96) - self._test_qs(qs.filter(Person.height > 1.61), 96) - # Height < 1.61 - self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4) - self._test_qs(qs.filter(Person.height < 1.61), 4) - - def test_filter_date_field(self): - qs = Person.objects_in(self.database) - # People born on the 30th - self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3) - self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3) - self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3) - # People born on Sunday - self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18) - self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18) - self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18) - # People born on 1976-10-01 - self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1) - self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1) - self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1) - - def test_func_as_field_value(self): - qs = Person.objects_in(self.database) - self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96) - self._test_qs(qs.exclude(birthday=F.today()), 100) - self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100) - - def test_comparison_operators(self): - one = F.plus(1, 0) - two = F.plus(1, 1) - self._test_func(one > one, 0) - self._test_func(two > one, 1) - self._test_func(one >= two, 0) - self._test_func(one >= one, 1) - self._test_func(one < one, 0) - self._test_func(one < two, 1) - self._test_func(two <= one, 0) - self._test_func(one <= one, 1) - self._test_func(one == two, 0) - self._test_func(one == one, 1) - self._test_func(one != one, 0) - self._test_func(one != two, 1) - - def test_arithmetic_operators(self): - one = F.plus(1, 0) - two = F.plus(1, 1) - # + - self._test_func(one + two, 3) - self._test_func(one + 2, 3) - self._test_func(2 + one, 3) - # - - self._test_func(one - two, -1) - self._test_func(one - 2, -1) - self._test_func(1 - two, -1) - # * - self._test_func(one * two, 2) - self._test_func(one * 2, 2) - self._test_func(1 * two, 2) - # / - self._test_func(one / two, 0.5) - self._test_func(one / 2, 0.5) - self._test_func(1 / two, 0.5) - # % - self._test_func(one % two, 1) - self._test_func(one % 2, 1) - self._test_func(1 % two, 1) - # sign - self._test_func(-one, -1) - self._test_func(--one, 1) - self._test_func(+one, 1) - - - - - - Color = Enum('Color', u'red blue green yellow brown white black') From e97e48a695d9ac9e39edb764119aa6c598bf18d9 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 18:17:52 +0300 Subject: [PATCH 26/41] Fix test in for latest clickhouse version --- tests/test_database.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_database.py b/tests/test_database.py index 15d928f..78c3fdb 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -275,5 +275,10 @@ class DatabaseTestCase(TestCaseWithData): self.assertTrue(model.is_read_only()) self.assertEqual(model.table_name(), row.name) # Read a few records - list(model.objects_in(self.database)[:10]) - + try: + list(model.objects_in(self.database)[:10]) + except ServerError as e: + if 'Not enough privileges' in e.message: + pass + else: + raise From 113ac7ad4a78709d05d618cad9e3a429a04310fd Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 19:18:10 +0300 Subject: [PATCH 27/41] docs --- README.md | 13 ++---- docs/class_reference.md | 8 ++-- docs/expressions.md | 11 +++-- docs/field_options.md | 40 ++++++++-------- docs/field_types.md | 59 +++++++++++------------- docs/index.md | 2 +- docs/models_and_databases.md | 26 +++++------ docs/ref.md | 2 +- docs/system_models.md | 7 ++- docs/table_engines.md | 56 +++++++++++----------- docs/toc.md | 5 +- src/infi/clickhouse_orm/engines.py | 12 ++--- src/infi/clickhouse_orm/models.py | 2 +- src/infi/clickhouse_orm/query.py | 2 +- src/infi/clickhouse_orm/system_models.py | 8 ++-- 15 files changed, 125 insertions(+), 128 deletions(-) diff --git a/README.md b/README.md index 9958aba..8e300bd 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,7 @@ Let's jump right in with a simple example of monitoring CPU usage. First we need connect to the database and create a table for the model: ```python -from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.models import Model -from infi.clickhouse_orm.fields import * -from infi.clickhouse_orm.engines import Memory -from infi.clickhouse_orm.funcs import F +from infi.clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F class CPUStats(Model): @@ -46,12 +42,13 @@ Querying the table is easy, using either the query builder or raw SQL: ```python # Calculate what percentage of the time CPU 1 was over 95% busy -total = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1).count() -busy = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() +queryset = CPUStats.objects_in(db) +total = queryset.filter(CPUStats.cpu_id == 1).count() +busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)) # Calculate the average usage per CPU -for row in CPUStats.objects_in(db).aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): +for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)) ``` diff --git a/docs/class_reference.md b/docs/class_reference.md index b191051..285f9b4 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -840,7 +840,7 @@ Extends Engine Buffers the data to write in RAM, periodically flushing it to another table. Must be used in conjuction with a `BufferModel`. -Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/). +Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/). #### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) @@ -853,7 +853,7 @@ Extends Engine The Merge engine (not to be confused with MergeTree) does not store data itself, but allows reading from any number of other tables simultaneously. Writing to a table is not supported -https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge +https://clickhouse.tech/docs/en/engines/table-engines/special/merge/ #### Merge(table_regex) @@ -869,7 +869,7 @@ Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. See full documentation here -https://clickhouse.yandex/docs/en/table_engines/distributed.html +https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/ #### Distributed(cluster, table=None, sharding_key=None) @@ -1165,7 +1165,7 @@ Returns the selected fields or expressions as a SQL string. Adds WITH TOTALS modifier ot GROUP BY, making query return extra row with aggregate function calculated across all the rows. More information: -https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier +https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier ### Q diff --git a/docs/expressions.md b/docs/expressions.md index 89eeb8c..d9237bd 100644 --- a/docs/expressions.md +++ b/docs/expressions.md @@ -55,14 +55,14 @@ Sensor.temperature * 1.8 + 32 Inside model class definitions omit the class name: ```python class Person(Model): - height_cm = fields.Float32Field() - height_inch = fields.Float32Field(alias=height_cm/2.54) + height_cm = Float32Field() + height_inch = Float32Field(alias=height_cm/2.54) ... ``` ### Parametric functions -Some of ClickHouse's aggregate functions can accept not only argument columns, but a set of parameters - constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example: +Some of ClickHouse's aggregate functions can accept one or more parameters - constants for initialization that affect the way the function works. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example: ```python # Most common last names F.topK(5)(Person.last_name) @@ -81,8 +81,8 @@ def normalize_string(s): Then we can use this expression anywhere we need it: ```python class Event(Model): - code = fields.StringField() - normalized_code = fields.StringField(materialized=normalize_string(code)) + code = StringField() + normalized_code = StringField(materialized=normalize_string(code)) ``` ### Which functions are available? @@ -93,6 +93,7 @@ expr = F("someFunctionName", arg1, arg2, ...) ``` Note that higher-order database functions (those that use lambda expressions) are not supported. + --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md) diff --git a/docs/field_options.md b/docs/field_options.md index 3905afd..3019c98 100644 --- a/docs/field_options.md +++ b/docs/field_options.md @@ -16,13 +16,13 @@ Note that `default`, `alias` and `materialized` are mutually exclusive - you can Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc. The default value can be a Python value suitable for the field type, or an expression. For example: ```python -class Event(models.Model): +class Event(Model): - name = fields.StringField(default="EVENT") - repeated = fields.UInt32Field(default=1) - created = fields.DateTimeField(default=F.now()) + name = StringField(default="EVENT") + repeated = UInt32Field(default=1) + created = DateTimeField(default=F.now()) - engine = engines.Memory() + engine = Memory() ... ``` When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example: @@ -38,18 +38,18 @@ When creating a model instance, any fields you do not specify get their default The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk. You can use any expression, and can refer to other model fields. For example: ```python -class Event(models.Model): +class Event(Model): - created = fields.DateTimeField() - created_date = fields.DateTimeField(materialized=F.toDate(created)) - name = fields.StringField() - normalized_name = fields.StringField(alias=F.upper(F.trim(name))) + created = DateTimeField() + created_date = DateTimeField(materialized=F.toDate(created)) + name = StringField() + normalized_name = StringField(alias=F.upper(F.trim(name))) - engine = engines.Memory() + engine = Memory() ``` For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string: ```python - created_date = fields.DateTimeField(materialized="toDate(created)") + created_date = DateTimeField(materialized="toDate(created)") ``` Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. @@ -89,15 +89,15 @@ Recommended usage for codecs: Example: ```python -class Stats(models.Model): +class Stats(Model): - id = fields.UInt64Field(codec='ZSTD(10)') - timestamp = fields.DateTimeField(codec='Delta,ZSTD') - timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') - metadata_id = fields.Int64Field(codec='LZ4') - status = fields.StringField(codec='LZ4HC(10)') - calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') - alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') + id = UInt64Field(codec='ZSTD(10)') + timestamp = DateTimeField(codec='Delta,ZSTD') + timestamp_date = DateField(codec='Delta(4),ZSTD(22)') + metadata_id = Int64Field(codec='LZ4') + status = StringField(codec='LZ4HC(10)') + calculation = NullableField(Float32Field(), codec='ZSTD') + alerts = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC') engine = MergeTree('timestamp_date', ('id', 'timestamp')) ``` diff --git a/docs/field_types.md b/docs/field_types.md index 434564a..95e77e1 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -1,7 +1,7 @@ Field Types =========== -See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/) +See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/sql-reference/data-types/) The following field types are supported: @@ -58,14 +58,14 @@ Example of a model with an enum field: ```python Gender = Enum('Gender', 'male female unspecified') -class Person(models.Model): +class Person(Model): - first_name = fields.StringField() - last_name = fields.StringField() - birthday = fields.DateField() - gender = fields.Enum32Field(Gender) + first_name = StringField() + last_name = StringField() + birthday = DateField() + gender = Enum32Field(Gender) - engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) ``` @@ -76,13 +76,13 @@ Working with array fields You can create array fields containing any data type, for example: ```python -class SensorData(models.Model): +class SensorData(Model): - date = fields.DateField() - temperatures = fields.ArrayField(fields.Float32Field()) - humidity_levels = fields.ArrayField(fields.UInt8Field()) + date = DateField() + temperatures = ArrayField(Float32Field()) + humidity_levels = ArrayField(UInt8Field()) - engine = engines.MergeTree('date', ('date',)) + engine = MergeTree('date', ('date',)) data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) ``` @@ -91,19 +91,19 @@ Note that multidimensional arrays are not supported yet by the ORM. Working with nullable fields ---------------------------- -[ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable). +[ClickHouse provides a NULL value support](https://clickhouse.tech/docs/en/sql-reference/data-types/nullable/). Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example: ```python -class EventData(models.Model): +class EventData(Model): - date = fields.DateField() - comment = fields.NullableField(fields.StringField(), extra_null_values={''}) - score = fields.NullableField(fields.UInt8Field()) - serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) + date = DateField() + comment = NullableField(StringField(), extra_null_values={''}) + score = NullableField(UInt8Field()) + serie = NullableField(ArrayField(UInt8Field())) - engine = engines.MergeTree('date', ('date',)) + engine = MergeTree('date', ('date',)) score_event = EventData(date=date.today(), comment=None, score=5, serie=None) @@ -124,7 +124,7 @@ Working with LowCardinality fields Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries and compaction of columns for low entropy data. -[More specifically](https://github.com/yandex/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily. +[More specifically](https://github.com/tech/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily. If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently. LowCardinality works independently of (generic) fields compression. @@ -133,19 +133,16 @@ The compression ratios of LowCardinality fields for text data may be significant LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form. -You can find further information about LowCardinality in [this presentation](https://github.com/yandex/clickhouse-presentations/blob/master/meetup19/string_optimization.pdf). +You can find further information [here](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/). Usage example: ```python -class LowCardinalityModel(models.Model): - date = fields.DateField() - int32 = fields.LowCardinalityField(fields.Int32Field()) - float32 = fields.LowCardinalityField(fields.Float32Field()) - string = fields.LowCardinalityField(fields.StringField()) - nullable = fields.LowCardinalityField(fields.NullableField(fields.StringField())) - array = fields.ArrayField(fields.LowCardinalityField(fields.UInt64Field())) - - engine = MergeTree('date', ('date',)) +class LowCardinalityModel(Model): + date = DateField() + string = LowCardinalityField(StringField()) + nullable = LowCardinalityField(NullableField(StringField())) + array = ArrayField(LowCardinalityField(DateField())) + ... ``` Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example. @@ -162,7 +159,7 @@ For example, we can create a BooleanField which will hold `True` and `False` val Here's the full implementation: ```python -from infi.clickhouse_orm.fields import Field +from infi.clickhouse_orm import Field class BooleanField(Field): diff --git a/docs/index.md b/docs/index.md index 1c7ead1..db75910 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,7 +1,7 @@ Overview ======== -This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it. +This project is simple ORM for working with the [ClickHouse database](https://clickhouse.tech/). It allows you to define model classes whose instances can be written to the database and read from it. Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+. diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index cf75081..b1f262c 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -10,16 +10,16 @@ Defining Models Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`: - from infi.clickhouse_orm import models, fields, engines + from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree - class Person(models.Model): + class Person(Model): - first_name = fields.StringField() - last_name = fields.StringField() - birthday = fields.DateField() - height = fields.Float32Field() + first_name = StringField() + last_name = StringField() + birthday = DateField() + height = Float32Field() - engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md). @@ -29,7 +29,7 @@ A model must have an `engine`, which determines how its table is stored on disk Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter: - first_name = fields.StringField(default="anonymous") + first_name = StringField(default="anonymous") For additional details see [here](field_options.md). @@ -37,7 +37,7 @@ For additional details see [here](field_options.md). To allow null values in a field, wrap it inside a `NullableField`: - birthday = fields.NullableField(fields.DateField()) + birthday = NullableField(DateField()) In this case, the default value for that field becomes `null` unless otherwise specified. @@ -47,7 +47,7 @@ For more information about `NullableField` see [Field Types](field_types.md). The value of a materialized field is calculated from other fields in the model. For example: - year_born = fields.Int16Field(materialized=F.toYear(birthday)) + year_born = Int16Field(materialized=F.toYear(birthday)) Materialized fields are read-only, meaning that their values are not sent to the database when inserting records. @@ -67,7 +67,7 @@ For additional details see [here](field_options.md). The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method: - class Person(models.Model): + class Person(Model): ... @@ -100,7 +100,7 @@ Inserting to the Database To write your instances to ClickHouse, you need a `Database` instance: - from infi.clickhouse_orm.database import Database + from infi.clickhouse_orm import Database db = Database('my_test_db') @@ -136,7 +136,7 @@ It is possible to select only a subset of the columns, and the rest will receive The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this: - for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'): + for person in Person.objects_in(db).filter(Person.last_name == 'Smith').only('first_name'): print(person.first_name) See [Querysets](querysets.md) for more information. diff --git a/docs/ref.md b/docs/ref.md index e750d18..4679b2b 100644 --- a/docs/ref.md +++ b/docs/ref.md @@ -448,7 +448,7 @@ Extends Engine Extends Engine Here we define Buffer engine -Read more here https://clickhouse.yandex/reference_en.html#Buffer +Read more here https://clickhouse.tech/reference_en.html#Buffer #### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) diff --git a/docs/system_models.md b/docs/system_models.md index 56ae447..01979b3 100644 --- a/docs/system_models.md +++ b/docs/system_models.md @@ -1,7 +1,7 @@ System Models ============= -[Clickhouse docs](https://clickhouse.yandex/docs/en/system_tables/). +[Clickhouse docs](https://clickhouse.tech/docs/en/operations/system-tables/). System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. @@ -14,7 +14,7 @@ Currently the following system models are supported: Partitions and Parts -------------------- -[ClickHouse docs](https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts). +[ClickHouse docs](https://clickhouse.tech/docs/en/sql-reference/statements/alter/#alter_manipulations-with-partitions). A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. @@ -30,8 +30,7 @@ A partition in a table is data for a single calendar month. Table "system.parts" Usage example: - from infi.clickhouse_orm.database import Database - from infi.clickhouse_orm.system_models import SystemPart + from infi.clickhouse_orm import Database, SystemPart db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database if len(partitions) > 0: diff --git a/docs/table_engines.md b/docs/table_engines.md index 92eaf2d..e785c51 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -1,7 +1,7 @@ Table Engines ============= -See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/operations/table_engines/) +See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/engines/table-engines/) Each model must have an engine instance, used when creating the table in ClickHouse. @@ -24,11 +24,11 @@ Simple Engines `TinyLog`, `Log` and `Memory` engines do not require any parameters: - engine = engines.TinyLog() + engine = TinyLog() - engine = engines.Log() + engine = Log() - engine = engines.Memory() + engine = Memory() Engines in the MergeTree Family @@ -36,28 +36,28 @@ Engines in the MergeTree Family To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns: - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) + engine = MergeTree('EventDate', ('CounterID', 'EventDate')) You may also provide a sampling expression: - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)') + engine = MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr=F.intHash32(UserID)) A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column: - engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') + engine = CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') For a `SummingMergeTree` you can optionally specify the summing columns: - engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), - summing_cols=('Shows', 'Clicks', 'Cost')) + engine = SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), + summing_cols=('Shows', 'Clicks', 'Cost')) For a `ReplacingMergeTree` you can optionally specify the version column: - engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') + engine = ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') ### Custom partitioning -ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310 +ClickHouse supports [custom partitioning](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/) expressions since version 1.1.54310 You can use custom partitioning with any `MergeTree` family engine. To set custom partitioning: @@ -69,12 +69,12 @@ Standard monthly partitioning by date column can be specified using the `toYYYYM Example: - engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', - partition_key=('toYYYYMM(EventDate)', 'BannerID')) + engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', + partition_key=(F.toYYYYMM(EventDate), 'BannerID')) ### Primary key -ClickHouse supports [custom primary key](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/#primary-keys-and-indexes-in-queries/) expressions since version 1.1.54310 +ClickHouse supports [custom primary key](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#primary-keys-and-indexes-in-queries) expressions since version 1.1.54310 You can use custom primary key with any `MergeTree` family engine. To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built. @@ -83,34 +83,34 @@ By default primary key is equal to order_by expression Example: - engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', - partition_key=('toYYYYMM(EventDate)', 'BannerID'), primary_key=('OrderID',)) + engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', + partition_key=(F.toYYYYMM(EventDate), 'BannerID'), primary_key=('OrderID',)) ### Data Replication Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`: - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), - replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', - replica_name='{replica}') + engine = MergeTree('EventDate', ('CounterID', 'EventDate'), + replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', + replica_name='{replica}') Buffer Engine ------------- A `Buffer` engine is only used in conjunction with a `BufferModel`. -The model should be a subclass of both `models.BufferModel` and the main model. +The model should be a subclass of both `BufferModel` and the main model. The main model is also passed to the engine: - class PersonBuffer(models.BufferModel, Person): + class PersonBuffer(BufferModel, Person): - engine = engines.Buffer(Person) + engine = Buffer(Person) Additional buffer parameters can optionally be specified: - engine = engines.Buffer(Person, num_layers=16, min_time=10, - max_time=100, min_rows=10000, max_rows=1000000, - min_bytes=10000000, max_bytes=100000000) + engine = Buffer(Person, num_layers=16, min_time=10, + max_time=100, min_rows=10000, max_rows=1000000, + min_bytes=10000000, max_bytes=100000000) Then you can insert objects into Buffer model and they will be handled by ClickHouse properly: @@ -123,14 +123,14 @@ Then you can insert objects into Buffer model and they will be handled by ClickH Merge Engine ------------- -[ClickHouse docs](https://clickhouse.yandex/docs/en/table_engines/merge/) +[ClickHouse docs](https://clickhouse.tech/docs/en/operations/table_engines/merge/) A `Merge` engine is only used in conjunction with a `MergeModel`. This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it. Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected. - class MergeTable(models.MergeModel): - engine = engines.Merge('^table_prefix') + class MergeTable(MergeModel): + engine = Merge('^table_prefix') --- diff --git a/docs/toc.md b/docs/toc.md index 8b98153..09525ba 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -20,7 +20,9 @@ * [Querysets](querysets.md#querysets) * [Filtering](querysets.md#filtering) - * [Using the in Operator](querysets.md#using-the-in-operator) + * [Using IN and NOT IN](querysets.md#using-in-and-not-in) + * [Specifying PREWHERE conditions](querysets.md#specifying-prewhere-conditions) + * [Old-style filter conditions](querysets.md#old-style-filter-conditions) * [Counting and Checking Existence](querysets.md#counting-and-checking-existence) * [Ordering](querysets.md#ordering) * [Omitting Fields](querysets.md#omitting-fields) @@ -29,6 +31,7 @@ * [Slicing](querysets.md#slicing) * [Pagination](querysets.md#pagination) * [Aggregation](querysets.md#aggregation) + * [Adding totals](querysets.md#adding-totals) * [Field Options](field_options.md#field-options) * [default](field_options.md#default) diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index a361b63..7fb83be 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -74,7 +74,7 @@ class MergeTree(Engine): name = 'Replicated' + name # In ClickHouse 1.1.54310 custom partitioning key was introduced - # https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ + # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/ # Let's check version and use new syntax if available if db.server_version >= (1, 1, 54310): partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \ @@ -94,7 +94,7 @@ class MergeTree(Engine): from infi.clickhouse_orm.database import DatabaseException raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. " "Please update your server or use date_col syntax." - "https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/") + "https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/") else: partition_sql = '' @@ -107,7 +107,7 @@ class MergeTree(Engine): params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name] # In ClickHouse 1.1.54310 custom partitioning key was introduced - # https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ + # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/ # These parameters are process in create_table_sql directly. # In previous ClickHouse versions this this syntax does not work. if db.server_version < (1, 1, 54310): @@ -172,7 +172,7 @@ class Buffer(Engine): """ Buffers the data to write in RAM, periodically flushing it to another table. Must be used in conjuction with a `BufferModel`. - Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/). + Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/). """ #Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) @@ -203,7 +203,7 @@ class Merge(Engine): The Merge engine (not to be confused with MergeTree) does not store data itself, but allows reading from any number of other tables simultaneously. Writing to a table is not supported - https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge + https://clickhouse.tech/docs/en/engines/table-engines/special/merge/ """ def __init__(self, table_regex): @@ -222,7 +222,7 @@ class Distributed(Engine): During a read, the table indexes on remote servers are used, if there are any. See full documentation here - https://clickhouse.yandex/docs/en/table_engines/distributed.html + https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/ """ def __init__(self, cluster, table=None, sharding_key=None): """ diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 21db1ad..5ce7280 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -362,7 +362,7 @@ class MergeModel(Model): ''' Model for Merge engine Predefines virtual _table column an controls that rows can't be inserted to this table type - https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge + https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge ''' readonly = True diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index c6868ba..cf30ce9 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -641,7 +641,7 @@ class AggregateQuerySet(QuerySet): """ Adds WITH TOTALS modifier ot GROUP BY, making query return extra row with aggregate function calculated across all the rows. More information: - https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier + https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier """ qs = copy(self) qs._grouping_with_totals = True diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index c66592e..69b67fa 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -1,6 +1,6 @@ """ This file contains system readonly models that can be got from the database -https://clickhouse.yandex/docs/en/system_tables/ +https://clickhouse.tech/docs/en/system_tables/ """ from __future__ import unicode_literals @@ -14,7 +14,7 @@ class SystemPart(Model): """ Contains information about parts of a table in the MergeTree family. This model operates only fields, described in the reference. Other fields are ignored. - https://clickhouse.yandex/docs/en/system_tables/system.parts/ + https://clickhouse.tech/docs/en/system_tables/system.parts/ """ OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'}) @@ -27,7 +27,7 @@ class SystemPart(Model): partition = StringField() # Name of the partition, in the format YYYYMM. name = StringField() # Name of the part. - # This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts), + # This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts), # but is absent in ClickHouse (in version 1.1.54245) # replicated = UInt8Field() # Whether the part belongs to replicated data. @@ -55,7 +55,7 @@ class SystemPart(Model): """ Next methods return SQL for some operations, which can be done with partitions - https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts + https://clickhouse.tech/docs/en/query_language/queries/#manipulations-with-partitions-and-parts """ def _partition_operation_sql(self, operation, settings=None, from_part=None): """ From 6702cffe72f346cecd2bd23bf0f7877df40e7c5f Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 19:38:51 +0300 Subject: [PATCH 28/41] TRIVIAL add test --- tests/test_engines.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_engines.py b/tests/test_engines.py index 84e46d8..0258186 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -2,11 +2,7 @@ from __future__ import unicode_literals import unittest import datetime -from infi.clickhouse_orm.system_models import SystemPart -from infi.clickhouse_orm.database import Database, DatabaseException, ServerError -from infi.clickhouse_orm.models import Model, MergeModel, DistributedModel -from infi.clickhouse_orm.fields import * -from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm import * import logging logging.getLogger("requests").setLevel(logging.WARNING) @@ -38,6 +34,11 @@ class EnginesTestCase(_EnginesHelperTestCase): engine = MergeTree('date', ('date', 'event_id', 'event_group', 'intHash32(event_id)'), sampling_expr='intHash32(event_id)') self._create_and_insert(TestModel) + def test_merge_tree_with_sampling__funcs(self): + class TestModel(SampleModel): + engine = MergeTree('date', ('date', 'event_id', 'event_group', F.intHash32(SampleModel.event_id)), sampling_expr=F.intHash32(SampleModel.event_id)) + self._create_and_insert(TestModel) + def test_merge_tree_with_granularity(self): class TestModel(SampleModel): engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096) From 56cf86a246173c84862a93e3720c9bce8521162a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 23:02:34 +0300 Subject: [PATCH 29/41] docs --- docs/querysets.md | 6 +++--- docs/toc.md | 12 ++++++------ scripts/html_to_markdown_toc.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/querysets.md b/docs/querysets.md index 82d093d..d85ca06 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -57,9 +57,9 @@ qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie'])) The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names: ```python -# A list of values +# Last name is in a list of values qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"])) -# A queryset +# Last name is in a subquery subquery = IrishLastName.objects_in(database).only("name") qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery)) ``` @@ -76,7 +76,7 @@ For better aggregation performance you can add them to `PREWHERE` section by add ### Old-style filter conditions -Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still supported, so you can use it alongside or even intermixed with new-style functions and expressions. +Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still available, so you can use it alongside or even intermixed with new-style functions and expressions. The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example: ```python diff --git a/docs/toc.md b/docs/toc.md index 09525ba..5805eaa 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -68,14 +68,14 @@ * [Tests](contributing.md#tests) * [Class Reference](class_reference.md#class-reference) - * [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database) + * [infi.clickhouse_orm.database](class_reference.md#inficlickhouse_ormdatabase) * [Database](class_reference.md#database) * [DatabaseException](class_reference.md#databaseexception) - * [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models) + * [infi.clickhouse_orm.models](class_reference.md#inficlickhouse_ormmodels) * [Model](class_reference.md#model) * [BufferModel](class_reference.md#buffermodel) * [DistributedModel](class_reference.md#distributedmodel) - * [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields) + * [infi.clickhouse_orm.fields](class_reference.md#inficlickhouse_ormfields) * [ArrayField](class_reference.md#arrayfield) * [BaseEnumField](class_reference.md#baseenumfield) * [BaseFloatField](class_reference.md#basefloatfield) @@ -106,7 +106,7 @@ * [UInt64Field](class_reference.md#uint64field) * [UInt8Field](class_reference.md#uint8field) * [UUIDField](class_reference.md#uuidfield) - * [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines) + * [infi.clickhouse_orm.engines](class_reference.md#inficlickhouse_ormengines) * [Engine](class_reference.md#engine) * [TinyLog](class_reference.md#tinylog) * [Log](class_reference.md#log) @@ -118,10 +118,10 @@ * [CollapsingMergeTree](class_reference.md#collapsingmergetree) * [SummingMergeTree](class_reference.md#summingmergetree) * [ReplacingMergeTree](class_reference.md#replacingmergetree) - * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) + * [infi.clickhouse_orm.query](class_reference.md#inficlickhouse_ormquery) * [QuerySet](class_reference.md#queryset) * [AggregateQuerySet](class_reference.md#aggregatequeryset) * [Q](class_reference.md#q) - * [infi.clickhouse_orm.funcs](class_reference.md#infi.clickhouse_orm.funcs) + * [infi.clickhouse_orm.funcs](class_reference.md#inficlickhouse_ormfuncs) * [F](class_reference.md#f) diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py index 9ddd41b..552137f 100644 --- a/scripts/html_to_markdown_toc.py +++ b/scripts/html_to_markdown_toc.py @@ -17,7 +17,7 @@ class HeadersToMarkdownParser(HTMLParser): def handle_endtag(self, tag): if tag.lower() in HEADER_TAGS: indent = ' ' * int(self.inside[1]) - fragment = self.text.lower().replace(' ', '-') + fragment = self.text.lower().replace(' ', '-').replace('.', '') print('%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)) self.inside = None self.text = '' From f0bef7f75d5d8a0445329819f873ef8f9bf3006d Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 23:02:55 +0300 Subject: [PATCH 30/41] Allow assignment of NO_VALUE to fields --- src/infi/clickhouse_orm/models.py | 2 +- tests/test_alias_fields.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 5ce7280..b89f3f7 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -170,7 +170,7 @@ class Model(metaclass=ModelBase): This may raise a `ValueError`. ''' field = self.get_field(name) - if field: + if field and (value != NO_VALUE): try: value = field.to_python(value, pytz.utc) field.validate(value) diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index 1df20de..48d408f 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -60,6 +60,10 @@ class AliasFieldsTest(unittest.TestCase): def test_default_value(self): instance = ModelWithAliasFields() self.assertEqual(instance.alias_str, NO_VALUE) + # Check that NO_VALUE can be assigned to a field + instance.str_field = NO_VALUE + # Check that NO_VALUE can be assigned when creating a new instance + instance2 = ModelWithAliasFields(**instance.to_dict()) class ModelWithAliasFields(Model): From 85d0fb66b6d4f55fc2dfa2266cd6dbf33228ba93 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 23:07:59 +0300 Subject: [PATCH 31/41] Use Python 3.8 --- buildout.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildout.cfg b/buildout.cfg index dd7cc35..872c54b 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -30,7 +30,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm [isolated-python] recipe = infi.recipe.python -version = v3.7.0.4 +version = v3.8.0.2 [setup.py] recipe = infi.recipe.template.version From 39eea8490f96233227c14326884660edf6534daa Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 28 May 2020 23:16:37 +0300 Subject: [PATCH 32/41] Get rid of some python warnings --- src/infi/clickhouse_orm/fields.py | 2 +- tests/test_querysets.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 601b8ad..0ee90e9 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -429,7 +429,7 @@ class BaseEnumField(Field): import re from enum import Enum members = {} - for match in re.finditer("'([\w ]+)' = (-?\d+)", db_type): + for match in re.finditer(r"'([\w ]+)' = (-?\d+)", db_type): members[match.group(1)] = int(match.group(2)) enum_cls = Enum('AdHocEnum', members) field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 6409ffa..bca8076 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -484,11 +484,11 @@ class AggregateTestCase(TestCaseWithData): # Test with limit and offset, also mixing LIMIT with LIMIT BY qs = Person.objects_in(self.database).filter(height__gt=1.67).order_by('height', 'first_name') limited_qs = qs.limit_by((0, 3), 'height') - self.assertEquals([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora']) + self.assertEqual([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora']) limited_qs = qs.limit_by((3, 3), 'height') - self.assertEquals([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay']) + self.assertEqual([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay']) limited_qs = qs.limit_by((6, 3), 'height') - self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver']) + self.assertEqual([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver']) Color = Enum('Color', u'red blue green yellow brown white black') From 33ad54d6deb5695a13d258fa2c0be14186b85d94 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 00:05:28 +0300 Subject: [PATCH 33/41] Skip some tests when the server isn't in UTC timezone --- tests/test_funcs.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 77dfec9..bbdca2a 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -213,7 +213,6 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toDayOfMonth(dt), 31) self._test_func(F.toDayOfWeek(d), 1) self._test_func(F.toDayOfWeek(dt), 1) - self._test_func(F.toHour(dt), 11) self._test_func(F.toMinute(dt), 22) self._test_func(F.toSecond(dt), 33) self._test_func(F.toMonday(d), d) @@ -228,15 +227,10 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toStartOfFiveMinute(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) - self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfISOYear(dt), date(2018, 12, 31)) self._test_func(F.toStartOfTenMinutes(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc)) self._test_func(F.toStartOfWeek(dt), date(2018, 12, 30)) self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) - self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) - self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) - self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) - self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toUnixTimestamp(dt, 'UTC'), int(dt.replace(tzinfo=pytz.utc).timestamp())) self._test_func(F.toYYYYMM(d), 201812) self._test_func(F.toYYYYMM(dt), 201812) @@ -245,7 +239,6 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toYYYYMMDD(dt), 20181231) self._test_func(F.toYYYYMMDD(dt, 'Europe/Athens'), 20181231) self._test_func(F.toYYYYMMDDhhmmss(d), 20181231000000) - self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233) self._test_func(F.toYYYYMMDDhhmmss(dt, 'Europe/Athens'), 20181231132233) self._test_func(F.toRelativeYearNum(dt), 2018) self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018) @@ -262,17 +255,14 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toRelativeSecondNum(dt), 1546255353) self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353) self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right - self._test_func(F.today(), date.today()) # FIXME this may fail if the timing is just right - self._test_func(F.yesterday(), date.today() - timedelta(days=1)) + self._test_func(F.today(), datetime.utcnow().date()) + self._test_func(F.yesterday(), datetime.utcnow().date() - timedelta(days=1)) self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)]) - self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33') self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33') self._test_func(F.addDays(d, 7), date(2019, 1, 7)) self._test_func(F.addDays(dt, 7, 'Europe/Athens')) - self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc)) self._test_func(F.addHours(dt, 7, 'Europe/Athens')) - self._test_func(F.addMinutes(d, 7), datetime(2018, 12, 31, 0, 7, 0, tzinfo=pytz.utc)) self._test_func(F.addMinutes(dt, 7, 'Europe/Athens')) self._test_func(F.addMonths(d, 7), date(2019, 7, 31)) self._test_func(F.addMonths(dt, 7, 'Europe/Athens')) @@ -304,6 +294,22 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.now() + F.toIntervalWeek(3) + F.toIntervalMonth(3) + F.toIntervalQuarter(3) + F.toIntervalYear(3)) self._test_func(F.now() + F.toIntervalSecond(3000) - F.toIntervalDay(3000) == F.now() + timedelta(seconds=3000, days=-3000)) + def test_date_functions__utc_only(self): + if self.database.server_timezone != pytz.utc: + raise unittest.SkipTest('This test must run with UTC as the server timezone') + d = date(2018, 12, 31) + dt = datetime(2018, 12, 31, 11, 22, 33) + self._test_func(F.toHour(dt), 11) + self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233) + self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33') + self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc)) + self._test_func(F.addMinutes(d, 7), datetime(2018, 12, 31, 0, 7, 0, tzinfo=pytz.utc)) + def test_type_conversion_functions(self): for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64): self._test_func(f(17), 17) @@ -315,22 +321,26 @@ class FuncsTestCase(TestCaseWithData): self._test_func(f(17.17, 2), Decimal('17.17')) self._test_func(f('17.17', 2), Decimal('17.17')) self._test_func(F.toDate('2018-12-31'), date(2018, 12, 31)) - self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toString(123), '123') self._test_func(F.toFixedString('123', 5), '123') self._test_func(F.toStringCutToZero('123\0'), '123') self._test_func(F.CAST(17, 'String'), '17') - self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM', 'Europe/Athens')) with self.assertRaises(ServerError): self._test_func(F.parseDateTimeBestEffort('foo')) - self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM', 'Europe/Athens')) self._test_func(F.parseDateTimeBestEffortOrNull('foo'), None) - self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM', 'Europe/Athens')) self._test_func(F.parseDateTimeBestEffortOrZero('foo'), DateTimeField.class_default) + def test_type_conversion_functions__utc_only(self): + if self.database.server_timezone != pytz.utc: + raise unittest.SkipTest('This test must run with UTC as the server timezone') + self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) + def test_string_functions(self): self._test_func(F.empty(''), 1) self._test_func(F.empty('x'), 0) From a8c88a499a10cdf9a205928a0264ff5d8d31d8a6 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 00:12:00 +0300 Subject: [PATCH 34/41] Skip some tests when the server isn't in UTC timezone --- tests/test_funcs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index bbdca2a..4d7a699 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -254,9 +254,6 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toRelativeMinuteNum(dt, 'Europe/Athens'), 25770922) self._test_func(F.toRelativeSecondNum(dt), 1546255353) self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353) - self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right - self._test_func(F.today(), datetime.utcnow().date()) - self._test_func(F.yesterday(), datetime.utcnow().date() - timedelta(days=1)) self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)) self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)]) self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33') @@ -305,6 +302,9 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc)) self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right + self._test_func(F.today(), datetime.utcnow().date()) + self._test_func(F.yesterday(), datetime.utcnow().date() - timedelta(days=1)) self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233) self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33') self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc)) From bde5c75eba8831d6df47d1a21a19061925cf065b Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 01:36:53 +0300 Subject: [PATCH 35/41] Python 3.5 does not support f-strings --- src/infi/clickhouse_orm/funcs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index ba05234..b5c9bbe 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -222,7 +222,8 @@ class FMeta(type): # Get default values for args argdefs = tuple(p.default for p in sig.parameters.values() if p.default != Parameter.empty) # Build the new function - new_code = compile(f'def {new_name}({new_sig}): return F("{new_name}", {args})', __file__, 'exec') + new_code = compile('def {new_name}({new_sig}): return F("{new_name}", {args})'.format(**locals()), + __file__, 'exec') new_func = FunctionType(code=new_code.co_consts[0], globals=globals(), name=new_name, argdefs=argdefs) # If base_func was parametric, new_func should be too if getattr(base_func, 'f_parametric', False): From efebfc67ed6c8c480df1a0ec6636c5618aa63891 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 01:37:13 +0300 Subject: [PATCH 36/41] Ignore functions that don't exist in the used ClickHouse version --- tests/test_funcs.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 4d7a699..a2db093 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -30,11 +30,17 @@ class FuncsTestCase(TestCaseWithData): def _test_func(self, func, expected_value=NO_VALUE): sql = 'SELECT %s AS value' % func.to_sql() logging.info(sql) - result = list(self.database.select(sql)) - logging.info('\t==> %s', result[0].value if result else '') - if expected_value != NO_VALUE: - self.assertEqual(result[0].value, expected_value) - return result[0].value if result else None + try: + result = list(self.database.select(sql)) + logging.info('\t==> %s', result[0].value if result else '') + if expected_value != NO_VALUE: + self.assertEqual(result[0].value, expected_value) + return result[0].value if result else None + except ServerError as e: + if 'Unknown function' in e.message: + logging.warning(e.message) + return # ignore functions that don't exist in the used ClickHouse version + raise def _test_aggr(self, func, expected_value=NO_VALUE): qs = Person.objects_in(self.database).aggregate(value=func) From 3fb3936a8acccd3032f0a4fc362a04329e71f14a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 01:59:07 +0300 Subject: [PATCH 37/41] Fix tests that fail on older ClickHouse versions --- src/infi/clickhouse_orm/funcs.py | 6 ++++++ tests/test_database.py | 2 ++ tests/test_funcs.py | 12 +++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index b5c9bbe..f137a45 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -294,6 +294,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return 'NULL' if isinstance(arg, QuerySet): return "(%s)" % arg + if isinstance(arg, tuple): + return '(' + comma_join(F._arg_to_sql(x) for x in arg) + ')' if is_iterable(arg): return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']' return str(arg) @@ -406,11 +408,15 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod @binary_operator def _in(a, b): + if is_iterable(b) and not isinstance(b, (tuple, QuerySet)): + b = tuple(b) return F('IN', a, b) @staticmethod @binary_operator def _notIn(a, b): + if is_iterable(b) and not isinstance(b, (tuple, QuerySet)): + b = tuple(b) return F('NOT IN', a, b) # Functions for working with dates and times diff --git a/tests/test_database.py b/tests/test_database.py index 78c3fdb..2fd9864 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -33,6 +33,8 @@ class DatabaseTestCase(TestCaseWithData): self._insert_and_check(self._sample_data(), len(data), batch_size=100) def test_insert__funcs_as_default_values(self): + if self.database.server_version < (20, 1, 2, 4): + raise unittest.SkipTest('Buggy in server versions before 20.1.2.4') class TestModel(Model): a = DateTimeField(default=datetime.datetime(2020, 1, 1)) b = DateField(default=F.toDate(a)) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index a2db093..06157d4 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -75,9 +75,11 @@ class FuncsTestCase(TestCaseWithData): self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))') # Iterables as args x = [1, 'z', F('foo', 17)] - for y in [x, tuple(x), iter(x)]: + for y in [x, iter(x)]: self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)") - self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])") + # Tuples as args + self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([(1, 2), (3, 4)])") + self.assertEqual(F('func', tuple(x), 5).to_sql(), "func((1, 'z', foo(17)), 5)") # Binary operator functions self.assertEqual(F.plus(1, 2).to_sql(), "(1 + 2)") self.assertEqual(F.lessOrEquals(1, 2).to_sql(), "(1 <= 2)") @@ -187,9 +189,9 @@ class FuncsTestCase(TestCaseWithData): self._test_func(one | 0, 1) self._test_func(0 | one, 1) # ^ - self._test_func(one ^ one, 0) - self._test_func(one ^ 0, 1) - self._test_func(0 ^ one, 1) + self._test_func(one ^ one) + self._test_func(one ^ 0) + self._test_func(0 ^ one) # ~ self._test_func(~one, 0) self._test_func(~~one, 1) From 5d97b4c84af4a89e2f841042ed21cba111b6eb91 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 11:08:30 +0300 Subject: [PATCH 38/41] Fix tests that fail on older ClickHouse versions --- tests/test_funcs.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 06157d4..b296cbc 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -45,11 +45,17 @@ class FuncsTestCase(TestCaseWithData): def _test_aggr(self, func, expected_value=NO_VALUE): qs = Person.objects_in(self.database).aggregate(value=func) logging.info(qs.as_sql()) - result = list(qs) - logging.info('\t==> %s', result[0].value if result else '') - if expected_value != NO_VALUE: - self.assertEqual(result[0].value, expected_value) - return result[0].value if result else None + try: + result = list(qs) + logging.info('\t==> %s', result[0].value if result else '') + if expected_value != NO_VALUE: + self.assertEqual(result[0].value, expected_value) + return result[0].value if result else None + except ServerError as e: + if 'Unknown function' in e.message: + logging.warning(e.message) + return # ignore functions that don't exist in the used ClickHouse version + raise def test_func_to_sql(self): # No args @@ -584,8 +590,9 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.rand(17)) self._test_func(F.rand64()) self._test_func(F.rand64(17)) - self._test_func(F.randConstant()) - self._test_func(F.randConstant(17)) + if self.database.server_version >= (19, 15): # buggy in older versions + self._test_func(F.randConstant()) + self._test_func(F.randConstant(17)) def test_encoding_functions(self): self._test_func(F.hex(F.unhex('0FA1')), '0FA1') From 0c2d0f0ffdb16538451909a226b94fd61f88ae93 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 11:32:49 +0300 Subject: [PATCH 39/41] Fix tests that fail on older ClickHouse versions --- src/infi/clickhouse_orm/funcs.py | 2 +- tests/test_compressed_fields.py | 3 ++- tests/test_ip_fields.py | 4 ++++ tests/test_querysets.py | 2 ++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index f137a45..39dc871 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -431,7 +431,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def toQuarter(d, timezone=''): - return F('toQuarter', d, timezone) + return F('toQuarter', d, timezone) if timezone else F('toQuarter', d) @staticmethod def toMonth(d): diff --git a/tests/test_compressed_fields.py b/tests/test_compressed_fields.py index 3de5d22..3c8282e 100644 --- a/tests/test_compressed_fields.py +++ b/tests/test_compressed_fields.py @@ -89,8 +89,9 @@ class CompressedFieldsTestCase(unittest.TestCase): "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc) }) - # This test will fail on clickhouse version < 19.1.16, use skip test def test_confirm_compression_codec(self): + if self.database.server_version < (19, 17): + raise unittest.SkipTest('ClickHouse version too old') instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]') self.database.insert([instance]) r = self.database.raw("select name, compression_codec from system.columns where table = '{}' and database='{}' FORMAT TabSeparatedWithNamesAndTypes".format(instance.table_name(), self.database.db_name)) diff --git a/tests/test_ip_fields.py b/tests/test_ip_fields.py index 448afc8..f829db2 100644 --- a/tests/test_ip_fields.py +++ b/tests/test_ip_fields.py @@ -16,6 +16,8 @@ class IPFieldsTest(unittest.TestCase): self.database.drop_database() def test_ipv4_field(self): + if self.database.server_version < (19, 17): + raise unittest.SkipTest('ClickHouse version too old') # Create a model class TestModel(Model): i = Int16Field() @@ -40,6 +42,8 @@ class IPFieldsTest(unittest.TestCase): TestModel(i=1, f=value) def test_ipv6_field(self): + if self.database.server_version < (19, 17): + raise unittest.SkipTest('ClickHouse version too old') # Create a model class TestModel(Model): i = Int16Field() diff --git a/tests/test_querysets.py b/tests/test_querysets.py index bca8076..cec4616 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -471,6 +471,8 @@ class AggregateTestCase(TestCaseWithData): self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') def test_limit_by(self): + if self.database.server_version < (19, 17): + raise unittest.SkipTest('ClickHouse version too old') # Test without offset qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n='count()').\ order_by('first_name', '-height').limit_by(1, 'first_name') From b2af10b11cd1deeae98a4c2e00161232b7cbc003 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 12:23:20 +0300 Subject: [PATCH 40/41] Fix tests that fail on older ClickHouse versions --- src/infi/clickhouse_orm/funcs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 39dc871..8d59528 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -443,7 +443,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def toISOWeek(d, timezone=''): - return F('toISOWeek', d, timezone) + return F('toISOWeek', d, timezone) if timezone else F('toISOWeek', d) @staticmethod def toDayOfYear(d): @@ -531,15 +531,15 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): @staticmethod def toYYYYMM(dt, timezone=''): - return F('toYYYYMM', dt, timezone) + return F('toYYYYMM', dt, timezone) if timezone else F('toYYYYMM', dt) @staticmethod def toYYYYMMDD(dt, timezone=''): - return F('toYYYYMMDD', dt, timezone) + return F('toYYYYMMDD', dt, timezone) if timezone else F('toYYYYMMDD', dt) @staticmethod def toYYYYMMDDhhmmss(dt, timezone=''): - return F('toYYYYMMDDhhmmss', dt, timezone) + return F('toYYYYMMDDhhmmss', dt, timezone) if timezone else F('toYYYYMMDDhhmmss', dt) @staticmethod def toRelativeYearNum(d, timezone=''): From 3ec54e510c981975e3bdae55d216d97f4b559df8 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 29 May 2020 12:53:14 +0300 Subject: [PATCH 41/41] Fix tests that fail on older ClickHouse versions --- tests/test_alias_fields.py | 4 ++-- tests/test_engines.py | 3 +++ tests/test_funcs.py | 5 +++-- tests/test_uuid_fields.py | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index 48d408f..6f8f1f2 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -37,7 +37,7 @@ class AliasFieldsTest(unittest.TestCase): self.assertEqual(results[0].alias_int, instance.int_field) self.assertEqual(results[0].alias_str, instance.str_field) self.assertEqual(results[0].alias_date, instance.date_field) - self.assertEqual(results[0].alias_func, '08/30/16') + self.assertEqual(results[0].alias_func, 201608) def test_assignment_error(self): # I can't prevent assigning at all, in case db.select statements with model provided sets model fields. @@ -74,6 +74,6 @@ class ModelWithAliasFields(Model): alias_str = StringField(alias=u'str_field') alias_int = Int32Field(alias='int_field') alias_date = DateField(alias='date_field') - alias_func = StringField(alias=F.formatDateTime(date_field, '%D')) + alias_func = Int32Field(alias=F.toYYYYMM(date_field)) engine = MergeTree('date_field', ('date_field',)) diff --git a/tests/test_engines.py b/tests/test_engines.py index 0258186..06c9f02 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -168,6 +168,9 @@ class EnginesTestCase(_EnginesHelperTestCase): self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', '')) def test_custom_primary_key(self): + if self.database.server_version < (18, 1): + raise unittest.SkipTest('ClickHouse version too old') + class TestModel(SampleModel): engine = MergeTree( order_by=('date', 'event_id', 'event_group'), diff --git a/tests/test_funcs.py b/tests/test_funcs.py index b296cbc..fa352d8 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -560,9 +560,7 @@ class FuncsTestCase(TestCaseWithData): x = 17 s = 'hello' url = 'http://example.com/a/b/c/d' - self._test_func(F.hex(F.halfMD5(*args))) self._test_func(F.hex(F.MD5(s))) - self._test_func(F.hex(F.sipHash64(*args))) self._test_func(F.hex(F.sipHash128(s))) self._test_func(F.hex(F.cityHash64(*args))) self._test_func(F.hex(F.intHash32(x))) @@ -584,6 +582,9 @@ class FuncsTestCase(TestCaseWithData): self._test_func(F.hex(F.murmurHash3_128(s))) self._test_func(F.hex(F.xxHash32(*args))) self._test_func(F.hex(F.xxHash64(*args))) + if self.database.server_version >= (18, 1): + self._test_func(F.hex(F.halfMD5(*args))) + self._test_func(F.hex(F.sipHash64(*args))) def test_rand_functions(self): self._test_func(F.rand()) diff --git a/tests/test_uuid_fields.py b/tests/test_uuid_fields.py index 37bee2c..d81e8eb 100644 --- a/tests/test_uuid_fields.py +++ b/tests/test_uuid_fields.py @@ -16,6 +16,8 @@ class UUIDFieldsTest(unittest.TestCase): self.database.drop_database() def test_uuid_field(self): + if self.database.server_version < (18, 1): + raise unittest.SkipTest('ClickHouse version too old') # Create a model class TestModel(Model): i = Int16Field()