From 5f4023f120c2cb7a908b4aa0fae477b54bc867c0 Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 12 Dec 2018 15:33:35 +0500 Subject: [PATCH 1/3] Added with_totals method --- docs/querysets.md | 13 +++++++++++++ src/infi/clickhouse_orm/query.py | 17 +++++++++++++++++ tests/test_querysets.py | 11 +++++++++++ 3 files changed, 41 insertions(+) diff --git a/docs/querysets.md b/docs/querysets.md index c4e84ea..b42fe1b 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -199,6 +199,19 @@ This queryset is translated to: After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model. +If you limit aggregation results, it might be useful to get total aggregation values for all rows. +To achieve this, you can use `with_totals` method. It will return extra row (last) with +values aggregated for all rows suitable for filters. + + qs = Person.objects_in(database).aggregate('first_name' num='count()').with_totals().order_by('-count')[:3] + >>> print qs.count() + 4 + >>> for row in qs: + >>> print(row.first_name, row.count) + 'Cassandra' 2 + 'Alexandra' 2 + '' 100 + --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 0052a0c..50ffd68 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -287,6 +287,7 @@ class QuerySet(object): self._where_q = Q() self._prewhere_q = Q() self._grouping_fields = [] + self._grouping_with_totals = False self._fields = model_cls.fields().keys() self._limits = None self._distinct = False @@ -348,6 +349,9 @@ class QuerySet(object): if self._grouping_fields: sql += '\nGROUP BY %s' % comma_join('`%s`' % field for field in self._grouping_fields) + if self._grouping_with_totals: + sql += ' WITH TOTALS' + if self._order_by: sql += '\nORDER BY ' + self.order_by_as_sql() @@ -551,6 +555,9 @@ class AggregateQuerySet(QuerySet): def select_fields_as_sql(self): return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) + def group_by_as_sql(self): + return 'GROUP BY' + def __iter__(self): return self._database.select(self.as_sql()) # using an ad-hoc model @@ -561,3 +568,13 @@ class AggregateQuerySet(QuerySet): sql = u'SELECT count() FROM (%s)' % self.as_sql() raw = self._database.raw(sql) return int(raw) if raw else 0 + + def with_totals(self): + """ + Adds WITH TOTALS modifier ot GROUP BY, making query return extra row + with aggregate function calculated across all the rows. More information: + https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier + """ + qs = copy(self) + qs._grouping_with_totals = True + return qs diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 14db99c..d0baaad 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -370,6 +370,17 @@ class AggregateTestCase(TestCaseWithData): print(qs.as_sql()) self.assertEqual(qs.count(), 1) + def test_aggregate_with_totals(self): + qs = Person.objects_in(self.database).aggregate('first_name', count='count()').\ + with_totals().order_by('-count')[:5] + print(qs.as_sql()) + result = list(qs) + self.assertEqual(len(result), 6) + for row in result[:-1]: + self.assertEqual(2, row.count) + + self.assertEqual(100, result[-1].count) + def test_double_underscore_field(self): class Mdl(Model): the__number = Int32Field() From 191eac44243db25b6e8bf108454956136a4c7527 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 27 Feb 2019 08:56:04 +0200 Subject: [PATCH 2/3] Remove unused method --- src/infi/clickhouse_orm/query.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 083c595..728b977 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -576,9 +576,6 @@ class AggregateQuerySet(QuerySet): def select_fields_as_sql(self): return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) - def group_by_as_sql(self): - return 'GROUP BY' - def __iter__(self): return self._database.select(self.as_sql()) # using an ad-hoc model From 7946a2a2725ff33706534cdcac1e1eacc13e2ec3 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 27 Feb 2019 08:58:41 +0200 Subject: [PATCH 3/3] Update docs --- CHANGELOG.md | 1 + docs/class_reference.md | 14 ++++++++++++++ docs/querysets.md | 20 ++++++++++---------- src/infi/clickhouse_orm/query.py | 6 ++++++ 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 158e22d..400cb6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Change Log Unreleased ---------- - Add PREWHERE support to querysets (M1hacka) +- Add WITH TOTALS support to querysets (M1hacka) - Extend date field range (trthhrtz) - Fix parsing of server errors in ClickHouse v19.3.3+ - Fix pagination when asking for the last page on a query that matches no records diff --git a/docs/class_reference.md b/docs/class_reference.md index 4876817..49e71f3 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -912,6 +912,9 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`, #### select_fields_as_sql() +Returns the selected fields or expressions as a SQL string. + + ### AggregateQuerySet Extends QuerySet @@ -1030,3 +1033,14 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`, #### select_fields_as_sql() +Returns the selected fields or expressions as a SQL string. + + +#### with_totals() + + +Adds WITH TOTALS modifier ot GROUP BY, making query return extra row +with aggregate function calculated across all the rows. More information: +https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier + + diff --git a/docs/querysets.md b/docs/querysets.md index 09465df..056e794 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -32,14 +32,14 @@ For filters with compound conditions you can use `Q` objects inside `filter` wit >>> qs.conditions_as_sql() u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))" -By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. +By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter >>> qs = Person.objects_in(database) >>> qs = qs.filter(first_name__startswith='V', prewhere=True) >>> qs.conditions_as_sql(prewhere=True) u"first_name LIKE 'V%'" - + There are different operators that can be used, by passing `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators. | Operator | Equivalent SQL | Comments | @@ -128,14 +128,14 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu Final -------- -This method can be used only with CollapsingMergeTree engine. +This method can be used only with CollapsingMergeTree engine. Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field. >>> Person.objects_in(database).count() 100 >>> Person.objects_in(database).final().count() 94 - + Slicing ------- @@ -214,14 +214,14 @@ If you limit aggregation results, it might be useful to get total aggregation va To achieve this, you can use `with_totals` method. It will return extra row (last) with values aggregated for all rows suitable for filters. - qs = Person.objects_in(database).aggregate('first_name' num='count()').with_totals().order_by('-count')[:3] + qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3] >>> print qs.count() 4 - >>> for row in qs: - >>> print(row.first_name, row.count) - 'Cassandra' 2 - 'Alexandra' 2 - '' 100 + >>> for row in qs: + >>> print("'{}': {}".format(row.first_name, row.count)) + 'Cassandra': 2 + 'Alexandra': 2 + '': 100 --- diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 728b977..ab7a705 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -333,6 +333,9 @@ class QuerySet(object): return qs def select_fields_as_sql(self): + """ + Returns the selected fields or expressions as a SQL string. + """ return comma_join('`%s`' % field for field in self._fields) if self._fields else '*' def as_sql(self): @@ -574,6 +577,9 @@ class AggregateQuerySet(QuerySet): raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet') def select_fields_as_sql(self): + """ + Returns the selected fields or expressions as a SQL string. + """ return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) def __iter__(self):