diff --git a/CHANGELOG.md b/CHANGELOG.md index 158e22d..400cb6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Change Log Unreleased ---------- - Add PREWHERE support to querysets (M1hacka) +- Add WITH TOTALS support to querysets (M1hacka) - Extend date field range (trthhrtz) - Fix parsing of server errors in ClickHouse v19.3.3+ - Fix pagination when asking for the last page on a query that matches no records diff --git a/docs/class_reference.md b/docs/class_reference.md index 4876817..49e71f3 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -912,6 +912,9 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`, #### select_fields_as_sql() +Returns the selected fields or expressions as a SQL string. + + ### AggregateQuerySet Extends QuerySet @@ -1030,3 +1033,14 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`, #### select_fields_as_sql() +Returns the selected fields or expressions as a SQL string. + + +#### with_totals() + + +Adds WITH TOTALS modifier ot GROUP BY, making query return extra row +with aggregate function calculated across all the rows. More information: +https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier + + diff --git a/docs/querysets.md b/docs/querysets.md index 2329000..056e794 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -32,14 +32,14 @@ For filters with compound conditions you can use `Q` objects inside `filter` wit >>> qs.conditions_as_sql() u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))" -By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. +By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter >>> qs = Person.objects_in(database) >>> qs = qs.filter(first_name__startswith='V', prewhere=True) >>> qs.conditions_as_sql(prewhere=True) u"first_name LIKE 'V%'" - + There are different operators that can be used, by passing `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators. | Operator | Equivalent SQL | Comments | @@ -128,14 +128,14 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu Final -------- -This method can be used only with CollapsingMergeTree engine. +This method can be used only with CollapsingMergeTree engine. Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field. >>> Person.objects_in(database).count() 100 >>> Person.objects_in(database).final().count() 94 - + Slicing ------- @@ -210,6 +210,19 @@ This queryset is translated to: After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model. +If you limit aggregation results, it might be useful to get total aggregation values for all rows. +To achieve this, you can use `with_totals` method. It will return extra row (last) with +values aggregated for all rows suitable for filters. + + qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3] + >>> print qs.count() + 4 + >>> for row in qs: + >>> print("'{}': {}".format(row.first_name, row.count)) + 'Cassandra': 2 + 'Alexandra': 2 + '': 100 + --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 10dfe97..ab7a705 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -290,6 +290,7 @@ class QuerySet(object): self._where_q = Q() self._prewhere_q = Q() self._grouping_fields = [] + self._grouping_with_totals = False self._fields = model_cls.fields().keys() self._limits = None self._distinct = False @@ -332,6 +333,9 @@ class QuerySet(object): return qs def select_fields_as_sql(self): + """ + Returns the selected fields or expressions as a SQL string. + """ return comma_join('`%s`' % field for field in self._fields) if self._fields else '*' def as_sql(self): @@ -353,6 +357,9 @@ class QuerySet(object): if self._grouping_fields: sql += '\nGROUP BY %s' % comma_join('`%s`' % field for field in self._grouping_fields) + if self._grouping_with_totals: + sql += ' WITH TOTALS' + if self._order_by: sql += '\nORDER BY ' + self.order_by_as_sql() @@ -570,6 +577,9 @@ class AggregateQuerySet(QuerySet): raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet') def select_fields_as_sql(self): + """ + Returns the selected fields or expressions as a SQL string. + """ return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) def __iter__(self): @@ -582,3 +592,13 @@ class AggregateQuerySet(QuerySet): sql = u'SELECT count() FROM (%s)' % self.as_sql() raw = self._database.raw(sql) return int(raw) if raw else 0 + + def with_totals(self): + """ + Adds WITH TOTALS modifier ot GROUP BY, making query return extra row + with aggregate function calculated across all the rows. More information: + https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier + """ + qs = copy(self) + qs._grouping_with_totals = True + return qs diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 298d859..ea3d73c 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -411,6 +411,17 @@ class AggregateTestCase(TestCaseWithData): print(qs.as_sql()) self.assertEqual(qs.count(), 1) + def test_aggregate_with_totals(self): + qs = Person.objects_in(self.database).aggregate('first_name', count='count()').\ + with_totals().order_by('-count')[:5] + print(qs.as_sql()) + result = list(qs) + self.assertEqual(len(result), 6) + for row in result[:-1]: + self.assertEqual(2, row.count) + + self.assertEqual(100, result[-1].count) + def test_double_underscore_field(self): class Mdl(Model): the__number = Int32Field()