mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-13 04:46:33 +03:00
Add distinct
method to querysets
This commit is contained in:
parent
7bbcae574a
commit
59564f8c70
|
@ -3,6 +3,7 @@ Change Log
|
||||||
|
|
||||||
Unreleased
|
Unreleased
|
||||||
----------
|
----------
|
||||||
|
- Add `distinct` method to querysets
|
||||||
- Add `AlterTableWithBuffer` migration operation
|
- Add `AlterTableWithBuffer` migration operation
|
||||||
|
|
||||||
v0.9.6
|
v0.9.6
|
||||||
|
|
|
@ -7,7 +7,7 @@ infi.clickhouse_orm.database
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
|
|
||||||
Database instances connect to a specific ClickHouse database for running queries,
|
Database instances connect to a specific ClickHouse database for running queries,
|
||||||
inserting data and other operations.
|
inserting data and other operations.
|
||||||
|
|
||||||
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
|
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
|
||||||
|
@ -71,7 +71,7 @@ Insert records into the database.
|
||||||
|
|
||||||
Executes schema migrations.
|
Executes schema migrations.
|
||||||
|
|
||||||
- `migrations_package_name` - fully qualified name of the Python package
|
- `migrations_package_name` - fully qualified name of the Python package
|
||||||
containing the migrations.
|
containing the migrations.
|
||||||
- `up_to` - number of the last migration to apply.
|
- `up_to` - number of the last migration to apply.
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ Selects records and returns a single page of model instances.
|
||||||
- `conditions`: optional SQL conditions (contents of the WHERE clause).
|
- `conditions`: optional SQL conditions (contents of the WHERE clause).
|
||||||
- `settings`: query settings to send as HTTP GET parameters
|
- `settings`: query settings to send as HTTP GET parameters
|
||||||
|
|
||||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||||
`pages_total`, `number` (of the current page), and `page_size`.
|
`pages_total`, `number` (of the current page), and `page_size`.
|
||||||
|
|
||||||
|
|
||||||
|
@ -128,7 +128,7 @@ infi.clickhouse_orm.models
|
||||||
|
|
||||||
|
|
||||||
A base class for ORM models. Each model class represent a ClickHouse table. For example:
|
A base class for ORM models. Each model class represent a ClickHouse table. For example:
|
||||||
|
|
||||||
class CPUStats(Model):
|
class CPUStats(Model):
|
||||||
timestamp = DateTimeField()
|
timestamp = DateTimeField()
|
||||||
cpu_id = UInt16Field()
|
cpu_id = UInt16Field()
|
||||||
|
@ -172,7 +172,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
|
||||||
#### get_database()
|
#### get_database()
|
||||||
|
|
||||||
|
|
||||||
Gets the `Database` that this model instance belongs to.
|
Gets the `Database` that this model instance belongs to.
|
||||||
Returns `None` unless the instance was read from the database or written to it.
|
Returns `None` unless the instance was read from the database or written to it.
|
||||||
|
|
||||||
|
|
||||||
|
@ -191,7 +191,7 @@ Returns a `QuerySet` for selecting instances of this model class.
|
||||||
#### set_database(db)
|
#### set_database(db)
|
||||||
|
|
||||||
|
|
||||||
Sets the `Database` that this model instance belongs to.
|
Sets the `Database` that this model instance belongs to.
|
||||||
This is done automatically when the instance is read from the database or written to it.
|
This is done automatically when the instance is read from the database or written to it.
|
||||||
|
|
||||||
|
|
||||||
|
@ -261,7 +261,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
|
||||||
#### get_database()
|
#### get_database()
|
||||||
|
|
||||||
|
|
||||||
Gets the `Database` that this model instance belongs to.
|
Gets the `Database` that this model instance belongs to.
|
||||||
Returns `None` unless the instance was read from the database or written to it.
|
Returns `None` unless the instance was read from the database or written to it.
|
||||||
|
|
||||||
|
|
||||||
|
@ -280,7 +280,7 @@ Returns a `QuerySet` for selecting instances of this model class.
|
||||||
#### set_database(db)
|
#### set_database(db)
|
||||||
|
|
||||||
|
|
||||||
Sets the `Database` that this model instance belongs to.
|
Sets the `Database` that this model instance belongs to.
|
||||||
This is done automatically when the instance is read from the database or written to it.
|
This is done automatically when the instance is read from the database or written to it.
|
||||||
|
|
||||||
|
|
||||||
|
@ -585,6 +585,13 @@ Returns the contents of the query's `WHERE` clause as a string.
|
||||||
Returns the number of matching model instances.
|
Returns the number of matching model instances.
|
||||||
|
|
||||||
|
|
||||||
|
#### distinct()
|
||||||
|
|
||||||
|
|
||||||
|
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||||
|
in the results will be omitted.
|
||||||
|
|
||||||
|
|
||||||
#### exclude(**kwargs)
|
#### exclude(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -678,6 +685,13 @@ Returns the contents of the query's `WHERE` clause as a string.
|
||||||
Returns the number of rows after aggregation.
|
Returns the number of rows after aggregation.
|
||||||
|
|
||||||
|
|
||||||
|
#### distinct()
|
||||||
|
|
||||||
|
|
||||||
|
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||||
|
in the results will be omitted.
|
||||||
|
|
||||||
|
|
||||||
#### exclude(**kwargs)
|
#### exclude(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f
|
||||||
|
|
||||||
qs = Person.objects_in(database).only('first_name', 'birthday')
|
qs = Person.objects_in(database).only('first_name', 'birthday')
|
||||||
|
|
||||||
|
Distinct
|
||||||
|
--------
|
||||||
|
|
||||||
|
Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted.
|
||||||
|
|
||||||
|
>>> Person.objects_in(database).only('first_name').count()
|
||||||
|
100
|
||||||
|
>>> Person.objects_in(database).only('first_name').distinct().count()
|
||||||
|
94
|
||||||
|
|
||||||
Slicing
|
Slicing
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
||||||
* [Ordering](querysets.md#ordering)
|
* [Ordering](querysets.md#ordering)
|
||||||
* [Omitting Fields](querysets.md#omitting-fields)
|
* [Omitting Fields](querysets.md#omitting-fields)
|
||||||
|
* [Distinct](querysets.md#distinct)
|
||||||
* [Slicing](querysets.md#slicing)
|
* [Slicing](querysets.md#slicing)
|
||||||
* [Pagination](querysets.md#pagination)
|
* [Pagination](querysets.md#pagination)
|
||||||
* [Aggregation](querysets.md#aggregation)
|
* [Aggregation](querysets.md#aggregation)
|
||||||
|
|
|
@ -51,7 +51,10 @@ def get_method_sig(method):
|
||||||
for arg in argspec.args:
|
for arg in argspec.args:
|
||||||
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
|
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
|
||||||
if default_arg.has_default:
|
if default_arg.has_default:
|
||||||
args.append("%s=%s" % (arg, default_arg.default_value))
|
val = default_arg.default_value
|
||||||
|
if isinstance(val, basestring):
|
||||||
|
val = '"' + val + '"'
|
||||||
|
args.append("%s=%s" % (arg, val))
|
||||||
else:
|
else:
|
||||||
args.append(arg)
|
args.append(arg)
|
||||||
arg_index += 1
|
arg_index += 1
|
||||||
|
|
|
@ -187,6 +187,7 @@ class QuerySet(object):
|
||||||
self._q = []
|
self._q = []
|
||||||
self._fields = []
|
self._fields = []
|
||||||
self._limits = None
|
self._limits = None
|
||||||
|
self._distinct = False
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""
|
"""
|
||||||
|
@ -228,14 +229,15 @@ class QuerySet(object):
|
||||||
"""
|
"""
|
||||||
Returns the whole query as a SQL string.
|
Returns the whole query as a SQL string.
|
||||||
"""
|
"""
|
||||||
|
distinct = 'DISTINCT ' if self._distinct else ''
|
||||||
fields = '*'
|
fields = '*'
|
||||||
if self._fields:
|
if self._fields:
|
||||||
fields = comma_join('`%s`' % field for field in self._fields)
|
fields = comma_join('`%s`' % field for field in self._fields)
|
||||||
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
|
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
|
||||||
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
|
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
|
||||||
params = (fields, self._model_cls.table_name(),
|
params = (distinct, fields, self._model_cls.table_name(),
|
||||||
self.conditions_as_sql(), ordering, limit)
|
self.conditions_as_sql(), ordering, limit)
|
||||||
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params
|
return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params
|
||||||
|
|
||||||
def order_by_as_sql(self):
|
def order_by_as_sql(self):
|
||||||
"""
|
"""
|
||||||
|
@ -259,6 +261,11 @@ class QuerySet(object):
|
||||||
"""
|
"""
|
||||||
Returns the number of matching model instances.
|
Returns the number of matching model instances.
|
||||||
"""
|
"""
|
||||||
|
if self._distinct:
|
||||||
|
# Use a subquery, since a simple count won't be accurate
|
||||||
|
sql = u'SELECT count() FROM (%s)' % self.as_sql()
|
||||||
|
raw = self._database.raw(sql)
|
||||||
|
return int(raw) if raw else 0
|
||||||
return self._database.count(self._model_cls, self.conditions_as_sql())
|
return self._database.count(self._model_cls, self.conditions_as_sql())
|
||||||
|
|
||||||
def order_by(self, *field_names):
|
def order_by(self, *field_names):
|
||||||
|
@ -296,7 +303,7 @@ class QuerySet(object):
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
def paginate(self, page_num=1, page_size=100):
|
def paginate(self, page_num=1, page_size=100):
|
||||||
'''
|
"""
|
||||||
Returns a single page of model instances that match the queryset.
|
Returns a single page of model instances that match the queryset.
|
||||||
Note that `order_by` should be used first, to ensure a correct
|
Note that `order_by` should be used first, to ensure a correct
|
||||||
partitioning of records into pages.
|
partitioning of records into pages.
|
||||||
|
@ -306,7 +313,7 @@ class QuerySet(object):
|
||||||
|
|
||||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||||
`pages_total`, `number` (of the current page), and `page_size`.
|
`pages_total`, `number` (of the current page), and `page_size`.
|
||||||
'''
|
"""
|
||||||
from .database import Page
|
from .database import Page
|
||||||
count = self.count()
|
count = self.count()
|
||||||
pages_total = int(ceil(count / float(page_size)))
|
pages_total = int(ceil(count / float(page_size)))
|
||||||
|
@ -323,8 +330,17 @@ class QuerySet(object):
|
||||||
page_size=page_size
|
page_size=page_size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def distinct(self):
|
||||||
|
"""
|
||||||
|
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||||
|
in the results will be omitted.
|
||||||
|
"""
|
||||||
|
qs = copy(self)
|
||||||
|
qs._distinct = True
|
||||||
|
return qs
|
||||||
|
|
||||||
def aggregate(self, *args, **kwargs):
|
def aggregate(self, *args, **kwargs):
|
||||||
'''
|
"""
|
||||||
Returns an `AggregateQuerySet` over this query, with `args` serving as
|
Returns an `AggregateQuerySet` over this query, with `args` serving as
|
||||||
grouping fields and `kwargs` serving as calculated fields. At least one
|
grouping fields and `kwargs` serving as calculated fields. At least one
|
||||||
calculated field is required. For example:
|
calculated field is required. For example:
|
||||||
|
@ -337,7 +353,7 @@ class QuerySet(object):
|
||||||
WHERE data > '2017-08-01'
|
WHERE data > '2017-08-01'
|
||||||
GROUP BY event_type
|
GROUP BY event_type
|
||||||
```
|
```
|
||||||
'''
|
"""
|
||||||
return AggregateQuerySet(self, args, kwargs)
|
return AggregateQuerySet(self, args, kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet):
|
||||||
self._order_by = list(base_qs._order_by)
|
self._order_by = list(base_qs._order_by)
|
||||||
self._q = list(base_qs._q)
|
self._q = list(base_qs._q)
|
||||||
self._limits = base_qs._limits
|
self._limits = base_qs._limits
|
||||||
|
self._distinct = base_qs._distinct
|
||||||
|
|
||||||
def group_by(self, *args):
|
def group_by(self, *args):
|
||||||
"""
|
"""
|
||||||
|
@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet):
|
||||||
"""
|
"""
|
||||||
Returns the whole query as a SQL string.
|
Returns the whole query as a SQL string.
|
||||||
"""
|
"""
|
||||||
|
distinct = 'DISTINCT ' if self._distinct else ''
|
||||||
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
|
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
|
||||||
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
||||||
params = dict(
|
params = dict(
|
||||||
|
distinct=distinct,
|
||||||
grouping=grouping or "''",
|
grouping=grouping or "''",
|
||||||
fields=fields,
|
fields=fields,
|
||||||
table=self._model_cls.table_name(),
|
table=self._model_cls.table_name(),
|
||||||
conds=self.conditions_as_sql()
|
conds=self.conditions_as_sql()
|
||||||
)
|
)
|
||||||
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
|
sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
|
||||||
if self._order_by:
|
if self._order_by:
|
||||||
sql += '\nORDER BY ' + self.order_by_as_sql()
|
sql += '\nORDER BY ' + self.order_by_as_sql()
|
||||||
if self._limits:
|
if self._limits:
|
||||||
|
|
|
@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
|
|
||||||
def _test_qs(self, qs, expected_count):
|
def _test_qs(self, qs, expected_count):
|
||||||
logging.info(qs.as_sql())
|
logging.info(qs.as_sql())
|
||||||
|
count = 0
|
||||||
for instance in qs:
|
for instance in qs:
|
||||||
logging.info('\t%s' % instance.to_dict())
|
count += 1
|
||||||
|
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
||||||
|
self.assertEquals(count, expected_count)
|
||||||
self.assertEquals(qs.count(), expected_count)
|
self.assertEquals(qs.count(), expected_count)
|
||||||
|
|
||||||
def test_no_filtering(self):
|
def test_no_filtering(self):
|
||||||
|
@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
page = qs.paginate(1, 100)
|
page = qs.paginate(1, 100)
|
||||||
self.assertEquals(page.number_of_objects, 10)
|
self.assertEquals(page.number_of_objects, 10)
|
||||||
|
|
||||||
|
def test_distinct(self):
|
||||||
|
qs = Person.objects_in(self.database).distinct()
|
||||||
|
self._test_qs(qs, 100)
|
||||||
|
self._test_qs(qs.only('first_name'), 94)
|
||||||
|
|
||||||
|
|
||||||
class AggregateTestCase(TestCaseWithData):
|
class AggregateTestCase(TestCaseWithData):
|
||||||
|
|
||||||
|
@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData):
|
||||||
qs = qs.filter(weekday=1)
|
qs = qs.filter(weekday=1)
|
||||||
self.assertEquals(qs.count(), 1)
|
self.assertEquals(qs.count(), 1)
|
||||||
|
|
||||||
|
def test_aggregate_with_distinct(self):
|
||||||
|
# In this case distinct has no effect
|
||||||
|
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct()
|
||||||
|
print(qs.as_sql())
|
||||||
|
self.assertEquals(qs.count(), 1)
|
||||||
|
|
||||||
|
|
||||||
Color = Enum('Color', u'red blue green yellow brown white black')
|
Color = Enum('Color', u'red blue green yellow brown white black')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user