From 4848c7f813732768ccf1c214e7be7904c2d8391c Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 12 Dec 2019 22:09:27 +0200 Subject: [PATCH 1/5] Support LowCardinality columns in ad-hoc queries --- CHANGELOG.md | 4 ++++ src/infi/clickhouse_orm/models.py | 4 ++++ tests/base_test_with_data.py | 2 +- tests/test_database.py | 9 +++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97042a9..642e3f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ Change Log ========== +Unreleased +---------- +- Support LowCardinality columns in ad-hoc queries + v1.2.0 ------ - Add support for per-field compression codecs (rbelio, Chocorean) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 5f8b085..7965d02 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -88,6 +88,10 @@ class ModelBase(type): if db_type.startswith('Nullable'): inner_field = cls.create_ad_hoc_field(db_type[9 : -1]) return orm_fields.NullableField(inner_field) + # LowCardinality + if db_type.startswith('LowCardinality'): + inner_field = cls.create_ad_hoc_field(db_type[15 : -1]) + return orm_fields.LowCardinalityField(inner_field) # Simple fields name = db_type + 'Field' if not hasattr(orm_fields, name): diff --git a/tests/base_test_with_data.py b/tests/base_test_with_data.py index f080f85..8cbea48 100644 --- a/tests/base_test_with_data.py +++ b/tests/base_test_with_data.py @@ -35,7 +35,7 @@ class TestCaseWithData(unittest.TestCase): class Person(Model): first_name = StringField() - last_name = StringField() + last_name = LowCardinalityField(StringField()) birthday = DateField() height = Float32Field() passport = NullableField(UInt32Field()) diff --git a/tests/test_database.py b/tests/test_database.py index 2a50590..0433bff 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -209,3 +209,12 @@ class DatabaseTestCase(TestCaseWithData): # Remove the setting and see that now it works self.database.add_setting('max_columns_to_read', None) list(self.database.select('SELECT * from system.tables')) + + def test_create_ad_hoc_field(self): + # Tests that create_ad_hoc_field works for all column types in the database + from infi.clickhouse_orm.models import ModelBase + query = "SELECT DISTINCT type FROM system.columns" + for row in self.database.select(query): + if row.type in ('IPv4', 'IPv6'): + continue # unsupported yet + ModelBase.create_ad_hoc_field(row.type) From 6d7b6250c55c1850a623cf47bd0d97738cf2e1a1 Mon Sep 17 00:00:00 2001 From: utapyngo Date: Mon, 20 Jan 2020 19:53:38 +0700 Subject: [PATCH 2/5] Support for using LIMIT N BY feature See https://clickhouse.yandex/docs/en/query_language/select/#limit-by-clause --- src/infi/clickhouse_orm/query.py | 24 ++++++++++++++++++++++++ tests/test_querysets.py | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 16de5ba..9298deb 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -293,6 +293,8 @@ class QuerySet(object): self._grouping_with_totals = False self._fields = model_cls.fields().keys() self._limits = None + self._limit_by = None + self._limit_by_fields = None self._distinct = False self._final = False @@ -332,6 +334,24 @@ class QuerySet(object): qs._limits = (start, stop - start) return qs + def limit_by(self, offset_limit, *fields): + if isinstance(offset_limit, six.integer_types): + # Single limit + assert offset_limit >= 0, 'negative limits are not supported' + qs = copy(self) + qs._limit_by = (0, offset_limit) + qs._limit_by_fields = fields + return qs + else: + # Offset, limit + offset = offset_limit[0] + limit = offset_limit[1] + assert offset >= 0 and limit >= 0, 'negative limits are not supported' + qs = copy(self) + qs._limit_by = (offset, limit) + qs._limit_by_fields = fields + return qs + def select_fields_as_sql(self): """ Returns the selected fields or expressions as a SQL string. @@ -369,6 +389,10 @@ class QuerySet(object): if self._limits: sql += '\nLIMIT %d, %d' % self._limits + if self._limit_by: + sql += '\nLIMIT %d, %d' % self._limit_by + sql += ' BY %s' % comma_join('`%s`' % field for field in self._limit_by_fields) + return sql def order_by_as_sql(self): diff --git a/tests/test_querysets.py b/tests/test_querysets.py index b17933b..e1a7f08 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -432,6 +432,12 @@ class AggregateTestCase(TestCaseWithData): qs = Mdl.objects_in(self.database).filter(the__next__number__gt=1) self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') + def test_limit_by(self): + qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n='count()').\ + order_by('first_name', '-height').limit_by(1, 'first_name') + self.assertEqual(qs.count(), 94) + self.assertEqual(list(qs)[89].last_name, 'Bowen') + Color = Enum('Color', u'red blue green yellow brown white black') From acccfbcaad05ae685373bb542a706ba02e5ae8be Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 7 Feb 2020 13:36:55 +0200 Subject: [PATCH 3/5] Support for using LIMIT N BY feature (simplify, additional testing, documentation) --- CHANGELOG.md | 1 + docs/class_reference.md | 16 ++++++++++++++++ src/infi/clickhouse_orm/query.py | 33 ++++++++++++++++---------------- tests/test_querysets.py | 11 +++++++++++ 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 642e3f7..61c6538 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Change Log Unreleased ---------- - Support LowCardinality columns in ad-hoc queries +- Support for LIMIT BY in querysets (utapyngo) v1.2.0 ------ diff --git a/docs/class_reference.md b/docs/class_reference.md index bcace93..e98e33b 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -890,6 +890,14 @@ Adds a FINAL modifier to table, meaning data will be collapsed to final version. Can be used with `CollapsingMergeTree` engine only. +#### limit_by(offset_limit, *fields) + + +Adds a LIMIT BY clause to the query. +- `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). +- `fields`: the field names to use in the clause. + + #### only(*field_names) @@ -1013,6 +1021,14 @@ be names of grouping fields or calculated fields that this queryset was created with. +#### limit_by(offset_limit, *fields) + + +Adds a LIMIT BY clause to the query. +- `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). +- `fields`: the field names to use in the clause. + + #### only(*field_names) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 9298deb..f1fb119 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -335,22 +335,21 @@ class QuerySet(object): return qs def limit_by(self, offset_limit, *fields): + """ + Adds a LIMIT BY clause to the query. + - `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit). + - `fields`: the field names to use in the clause. + """ if isinstance(offset_limit, six.integer_types): # Single limit - assert offset_limit >= 0, 'negative limits are not supported' - qs = copy(self) - qs._limit_by = (0, offset_limit) - qs._limit_by_fields = fields - return qs - else: - # Offset, limit - offset = offset_limit[0] - limit = offset_limit[1] - assert offset >= 0 and limit >= 0, 'negative limits are not supported' - qs = copy(self) - qs._limit_by = (offset, limit) - qs._limit_by_fields = fields - return qs + offset_limit = (0, offset_limit) + offset = offset_limit[0] + limit = offset_limit[1] + assert offset >= 0 and limit >= 0, 'negative limits are not supported' + qs = copy(self) + qs._limit_by = (offset, limit) + qs._limit_by_fields = fields + return qs def select_fields_as_sql(self): """ @@ -386,13 +385,13 @@ class QuerySet(object): if self._order_by: sql += '\nORDER BY ' + self.order_by_as_sql() - if self._limits: - sql += '\nLIMIT %d, %d' % self._limits - if self._limit_by: sql += '\nLIMIT %d, %d' % self._limit_by sql += ' BY %s' % comma_join('`%s`' % field for field in self._limit_by_fields) + if self._limits: + sql += '\nLIMIT %d, %d' % self._limits + return sql def order_by_as_sql(self): diff --git a/tests/test_querysets.py b/tests/test_querysets.py index e1a7f08..4a6a17b 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -433,10 +433,21 @@ class AggregateTestCase(TestCaseWithData): self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1') def test_limit_by(self): + # Test without offset qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n='count()').\ order_by('first_name', '-height').limit_by(1, 'first_name') self.assertEqual(qs.count(), 94) self.assertEqual(list(qs)[89].last_name, 'Bowen') + # Test with limit and offset, also mixing LIMIT with LIMIT BY + qs = Person.objects_in(self.database).filter(height__gt=1.67).order_by('height', 'first_name') + limited_qs = qs.limit_by((0, 3), 'height') + self.assertEquals([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora']) + limited_qs = qs.limit_by((3, 3), 'height') + self.assertEquals([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay']) + limited_qs = qs.limit_by((6, 3), 'height') + self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver']) + + Color = Enum('Color', u'red blue green yellow brown white black') From 17b5c629ac7a724ef9ec1927aa796757c58f8711 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 7 Feb 2020 13:37:11 +0200 Subject: [PATCH 4/5] Convert to python 3 --- scripts/generate_ref.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index c7d31b1..0cbdb0d 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -52,8 +52,6 @@ def get_method_sig(method): default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index) if default_arg.has_default: val = default_arg.default_value - if isinstance(val, basestring): - val = '"' + val + '"' args.append("%s=%s" % (arg, val)) else: args.append(arg) @@ -73,45 +71,45 @@ def docstring(obj): indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip()) # Output the lines without the indentation for line in lines: - print line[indentation:] - print + print(line[indentation:]) + print() def class_doc(cls, list_methods=True): bases = ', '.join([b.__name__ for b in cls.__bases__]) - print '###', cls.__name__ - print + print('###', cls.__name__) + print() if bases != 'object': - print 'Extends', bases - print + print('Extends', bases) + print() docstring(cls) - for name, method in inspect.getmembers(cls, inspect.ismethod): + for name, method in inspect.getmembers(cls, lambda m: inspect.ismethod(m) or inspect.isfunction(m)): if name == '__init__': # Initializer - print '####', get_method_sig(method).replace(name, cls.__name__) + print('####', get_method_sig(method).replace(name, cls.__name__)) elif name[0] == '_': # Private method continue - elif method.__self__ == cls: + elif hasattr(method, '__self__') and method.__self__ == cls: # Class method if not list_methods: continue - print '#### %s.%s' % (cls.__name__, get_method_sig(method)) + print('#### %s.%s' % (cls.__name__, get_method_sig(method))) else: # Regular method if not list_methods: continue - print '####', get_method_sig(method) - print + print('####', get_method_sig(method)) + print() docstring(method) - print + print() def module_doc(classes, list_methods=True): mdl = classes[0].__module__ - print mdl - print '-' * len(mdl) - print + print(mdl) + print('-' * len(mdl)) + print() for cls in classes: class_doc(cls, list_methods) @@ -128,9 +126,9 @@ if __name__ == '__main__': from infi.clickhouse_orm import models from infi.clickhouse_orm import query - print 'Class Reference' - print '===============' - print + print('Class Reference') + print('===============') + print() module_doc([database.Database, database.DatabaseException]) module_doc([models.Model, models.BufferModel, models.DistributedModel]) module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) From 40e26d68b684e698a92279a7a914707da8021ddd Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 7 Feb 2020 14:58:42 +0200 Subject: [PATCH 5/5] Releasing v1.3.0 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61c6538..bcef883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v1.3.0 +------ - Support LowCardinality columns in ad-hoc queries - Support for LIMIT BY in querysets (utapyngo)