Merge branch 'carrotquest-qs-with-totals' into develop

This commit is contained in:
Itai Shirav 2019-02-27 08:59:02 +02:00
commit 595f2023e3
5 changed files with 63 additions and 4 deletions

View File

@ -4,6 +4,7 @@ Change Log
Unreleased Unreleased
---------- ----------
- Add PREWHERE support to querysets (M1hacka) - Add PREWHERE support to querysets (M1hacka)
- Add WITH TOTALS support to querysets (M1hacka)
- Extend date field range (trthhrtz) - Extend date field range (trthhrtz)
- Fix parsing of server errors in ClickHouse v19.3.3+ - Fix parsing of server errors in ClickHouse v19.3.3+
- Fix pagination when asking for the last page on a query that matches no records - Fix pagination when asking for the last page on a query that matches no records

View File

@ -912,6 +912,9 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`,
#### select_fields_as_sql() #### select_fields_as_sql()
Returns the selected fields or expressions as a SQL string.
### AggregateQuerySet ### AggregateQuerySet
Extends QuerySet Extends QuerySet
@ -1030,3 +1033,14 @@ The result is a namedtuple containing `objects` (list), `number_of_objects`,
#### select_fields_as_sql() #### select_fields_as_sql()
Returns the selected fields or expressions as a SQL string.
#### with_totals()
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
with aggregate function calculated across all the rows. More information:
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier

View File

@ -32,14 +32,14 @@ For filters with compound conditions you can use `Q` objects inside `filter` wit
>>> qs.conditions_as_sql() >>> qs.conditions_as_sql()
u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))" u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause. By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter
>>> qs = Person.objects_in(database) >>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V', prewhere=True) >>> qs = qs.filter(first_name__startswith='V', prewhere=True)
>>> qs.conditions_as_sql(prewhere=True) >>> qs.conditions_as_sql(prewhere=True)
u"first_name LIKE 'V%'" u"first_name LIKE 'V%'"
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators. There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
| Operator | Equivalent SQL | Comments | | Operator | Equivalent SQL | Comments |
@ -128,14 +128,14 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu
Final Final
-------- --------
This method can be used only with CollapsingMergeTree engine. This method can be used only with CollapsingMergeTree engine.
Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field. Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field.
>>> Person.objects_in(database).count() >>> Person.objects_in(database).count()
100 100
>>> Person.objects_in(database).final().count() >>> Person.objects_in(database).final().count()
94 94
Slicing Slicing
------- -------
@ -210,6 +210,19 @@ This queryset is translated to:
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model. After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
To achieve this, you can use `with_totals` method. It will return extra row (last) with
values aggregated for all rows suitable for filters.
qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3]
>>> print qs.count()
4
>>> for row in qs:
>>> print("'{}': {}".format(row.first_name, row.count))
'Cassandra': 2
'Alexandra': 2
'': 100
--- ---
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)

View File

@ -290,6 +290,7 @@ class QuerySet(object):
self._where_q = Q() self._where_q = Q()
self._prewhere_q = Q() self._prewhere_q = Q()
self._grouping_fields = [] self._grouping_fields = []
self._grouping_with_totals = False
self._fields = model_cls.fields().keys() self._fields = model_cls.fields().keys()
self._limits = None self._limits = None
self._distinct = False self._distinct = False
@ -332,6 +333,9 @@ class QuerySet(object):
return qs return qs
def select_fields_as_sql(self): def select_fields_as_sql(self):
"""
Returns the selected fields or expressions as a SQL string.
"""
return comma_join('`%s`' % field for field in self._fields) if self._fields else '*' return comma_join('`%s`' % field for field in self._fields) if self._fields else '*'
def as_sql(self): def as_sql(self):
@ -353,6 +357,9 @@ class QuerySet(object):
if self._grouping_fields: if self._grouping_fields:
sql += '\nGROUP BY %s' % comma_join('`%s`' % field for field in self._grouping_fields) sql += '\nGROUP BY %s' % comma_join('`%s`' % field for field in self._grouping_fields)
if self._grouping_with_totals:
sql += ' WITH TOTALS'
if self._order_by: if self._order_by:
sql += '\nORDER BY ' + self.order_by_as_sql() sql += '\nORDER BY ' + self.order_by_as_sql()
@ -570,6 +577,9 @@ class AggregateQuerySet(QuerySet):
raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet') raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet')
def select_fields_as_sql(self): def select_fields_as_sql(self):
"""
Returns the selected fields or expressions as a SQL string.
"""
return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
def __iter__(self): def __iter__(self):
@ -582,3 +592,13 @@ class AggregateQuerySet(QuerySet):
sql = u'SELECT count() FROM (%s)' % self.as_sql() sql = u'SELECT count() FROM (%s)' % self.as_sql()
raw = self._database.raw(sql) raw = self._database.raw(sql)
return int(raw) if raw else 0 return int(raw) if raw else 0
def with_totals(self):
"""
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
with aggregate function calculated across all the rows. More information:
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier
"""
qs = copy(self)
qs._grouping_with_totals = True
return qs

View File

@ -411,6 +411,17 @@ class AggregateTestCase(TestCaseWithData):
print(qs.as_sql()) print(qs.as_sql())
self.assertEqual(qs.count(), 1) self.assertEqual(qs.count(), 1)
def test_aggregate_with_totals(self):
qs = Person.objects_in(self.database).aggregate('first_name', count='count()').\
with_totals().order_by('-count')[:5]
print(qs.as_sql())
result = list(qs)
self.assertEqual(len(result), 6)
for row in result[:-1]:
self.assertEqual(2, row.count)
self.assertEqual(100, result[-1].count)
def test_double_underscore_field(self): def test_double_underscore_field(self):
class Mdl(Model): class Mdl(Model):
the__number = Int32Field() the__number = Int32Field()