Add distinct method to querysets

This commit is contained in:
Itai Shirav 2017-09-10 17:17:04 +03:00
parent 7bbcae574a
commit 59564f8c70
7 changed files with 79 additions and 17 deletions

View File

@ -3,6 +3,7 @@ Change Log
Unreleased
----------
- Add `distinct` method to querysets
- Add `AlterTableWithBuffer` migration operation
v0.9.6

View File

@ -7,7 +7,7 @@ infi.clickhouse_orm.database
### Database
Database instances connect to a specific ClickHouse database for running queries,
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
@ -71,7 +71,7 @@ Insert records into the database.
Executes schema migrations.
- `migrations_package_name` - fully qualified name of the Python package
- `migrations_package_name` - fully qualified name of the Python package
containing the migrations.
- `up_to` - number of the last migration to apply.
@ -89,7 +89,7 @@ Selects records and returns a single page of model instances.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
- `settings`: query settings to send as HTTP GET parameters
The result is a namedtuple containing `objects` (list), `number_of_objects`,
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
@ -128,7 +128,7 @@ infi.clickhouse_orm.models
A base class for ORM models. Each model class represent a ClickHouse table. For example:
class CPUStats(Model):
timestamp = DateTimeField()
cpu_id = UInt16Field()
@ -172,7 +172,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
#### get_database()
Gets the `Database` that this model instance belongs to.
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
@ -191,7 +191,7 @@ Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
@ -261,7 +261,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
#### get_database()
Gets the `Database` that this model instance belongs to.
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
@ -280,7 +280,7 @@ Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
@ -585,6 +585,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of matching model instances.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs)
@ -678,6 +685,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of rows after aggregation.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs)

View File

@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f
qs = Person.objects_in(database).only('first_name', 'birthday')
Distinct
--------
Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted.
>>> Person.objects_in(database).only('first_name').count()
100
>>> Person.objects_in(database).only('first_name').distinct().count()
94
Slicing
-------

View File

@ -20,6 +20,7 @@
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields)
* [Distinct](querysets.md#distinct)
* [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation)

View File

@ -51,7 +51,10 @@ def get_method_sig(method):
for arg in argspec.args:
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
if default_arg.has_default:
args.append("%s=%s" % (arg, default_arg.default_value))
val = default_arg.default_value
if isinstance(val, basestring):
val = '"' + val + '"'
args.append("%s=%s" % (arg, val))
else:
args.append(arg)
arg_index += 1

View File

@ -187,6 +187,7 @@ class QuerySet(object):
self._q = []
self._fields = []
self._limits = None
self._distinct = False
def __iter__(self):
"""
@ -228,14 +229,15 @@ class QuerySet(object):
"""
Returns the whole query as a SQL string.
"""
distinct = 'DISTINCT ' if self._distinct else ''
fields = '*'
if self._fields:
fields = comma_join('`%s`' % field for field in self._fields)
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
params = (fields, self._model_cls.table_name(),
params = (distinct, fields, self._model_cls.table_name(),
self.conditions_as_sql(), ordering, limit)
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params
return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params
def order_by_as_sql(self):
"""
@ -259,6 +261,11 @@ class QuerySet(object):
"""
Returns the number of matching model instances.
"""
if self._distinct:
# Use a subquery, since a simple count won't be accurate
sql = u'SELECT count() FROM (%s)' % self.as_sql()
raw = self._database.raw(sql)
return int(raw) if raw else 0
return self._database.count(self._model_cls, self.conditions_as_sql())
def order_by(self, *field_names):
@ -296,7 +303,7 @@ class QuerySet(object):
return qs
def paginate(self, page_num=1, page_size=100):
'''
"""
Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages.
@ -306,7 +313,7 @@ class QuerySet(object):
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
'''
"""
from .database import Page
count = self.count()
pages_total = int(ceil(count / float(page_size)))
@ -323,8 +330,17 @@ class QuerySet(object):
page_size=page_size
)
def distinct(self):
"""
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
"""
qs = copy(self)
qs._distinct = True
return qs
def aggregate(self, *args, **kwargs):
'''
"""
Returns an `AggregateQuerySet` over this query, with `args` serving as
grouping fields and `kwargs` serving as calculated fields. At least one
calculated field is required. For example:
@ -337,7 +353,7 @@ class QuerySet(object):
WHERE data > '2017-08-01'
GROUP BY event_type
```
'''
"""
return AggregateQuerySet(self, args, kwargs)
@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet):
self._order_by = list(base_qs._order_by)
self._q = list(base_qs._q)
self._limits = base_qs._limits
self._distinct = base_qs._distinct
def group_by(self, *args):
"""
@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet):
"""
Returns the whole query as a SQL string.
"""
distinct = 'DISTINCT ' if self._distinct else ''
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
params = dict(
distinct=distinct,
grouping=grouping or "''",
fields=fields,
table=self._model_cls.table_name(),
conds=self.conditions_as_sql()
)
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
if self._order_by:
sql += '\nORDER BY ' + self.order_by_as_sql()
if self._limits:

View File

@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql())
count = 0
for instance in qs:
logging.info('\t%s' % instance.to_dict())
count += 1
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEquals(count, expected_count)
self.assertEquals(qs.count(), expected_count)
def test_no_filtering(self):
@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData):
page = qs.paginate(1, 100)
self.assertEquals(page.number_of_objects, 10)
def test_distinct(self):
qs = Person.objects_in(self.database).distinct()
self._test_qs(qs, 100)
self._test_qs(qs.only('first_name'), 94)
class AggregateTestCase(TestCaseWithData):
@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData):
qs = qs.filter(weekday=1)
self.assertEquals(qs.count(), 1)
def test_aggregate_with_distinct(self):
# In this case distinct has no effect
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct()
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
Color = Enum('Color', u'red blue green yellow brown white black')