mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-22 09:06:41 +03:00
Add distinct
method to querysets
This commit is contained in:
parent
7bbcae574a
commit
59564f8c70
|
@ -3,6 +3,7 @@ Change Log
|
|||
|
||||
Unreleased
|
||||
----------
|
||||
- Add `distinct` method to querysets
|
||||
- Add `AlterTableWithBuffer` migration operation
|
||||
|
||||
v0.9.6
|
||||
|
|
|
@ -7,7 +7,7 @@ infi.clickhouse_orm.database
|
|||
### Database
|
||||
|
||||
|
||||
Database instances connect to a specific ClickHouse database for running queries,
|
||||
Database instances connect to a specific ClickHouse database for running queries,
|
||||
inserting data and other operations.
|
||||
|
||||
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
|
||||
|
@ -71,7 +71,7 @@ Insert records into the database.
|
|||
|
||||
Executes schema migrations.
|
||||
|
||||
- `migrations_package_name` - fully qualified name of the Python package
|
||||
- `migrations_package_name` - fully qualified name of the Python package
|
||||
containing the migrations.
|
||||
- `up_to` - number of the last migration to apply.
|
||||
|
||||
|
@ -89,7 +89,7 @@ Selects records and returns a single page of model instances.
|
|||
- `conditions`: optional SQL conditions (contents of the WHERE clause).
|
||||
- `settings`: query settings to send as HTTP GET parameters
|
||||
|
||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||
`pages_total`, `number` (of the current page), and `page_size`.
|
||||
|
||||
|
||||
|
@ -128,7 +128,7 @@ infi.clickhouse_orm.models
|
|||
|
||||
|
||||
A base class for ORM models. Each model class represent a ClickHouse table. For example:
|
||||
|
||||
|
||||
class CPUStats(Model):
|
||||
timestamp = DateTimeField()
|
||||
cpu_id = UInt16Field()
|
||||
|
@ -172,7 +172,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
|
|||
#### get_database()
|
||||
|
||||
|
||||
Gets the `Database` that this model instance belongs to.
|
||||
Gets the `Database` that this model instance belongs to.
|
||||
Returns `None` unless the instance was read from the database or written to it.
|
||||
|
||||
|
||||
|
@ -191,7 +191,7 @@ Returns a `QuerySet` for selecting instances of this model class.
|
|||
#### set_database(db)
|
||||
|
||||
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
This is done automatically when the instance is read from the database or written to it.
|
||||
|
||||
|
||||
|
@ -261,7 +261,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
|
|||
#### get_database()
|
||||
|
||||
|
||||
Gets the `Database` that this model instance belongs to.
|
||||
Gets the `Database` that this model instance belongs to.
|
||||
Returns `None` unless the instance was read from the database or written to it.
|
||||
|
||||
|
||||
|
@ -280,7 +280,7 @@ Returns a `QuerySet` for selecting instances of this model class.
|
|||
#### set_database(db)
|
||||
|
||||
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
This is done automatically when the instance is read from the database or written to it.
|
||||
|
||||
|
||||
|
@ -585,6 +585,13 @@ Returns the contents of the query's `WHERE` clause as a string.
|
|||
Returns the number of matching model instances.
|
||||
|
||||
|
||||
#### distinct()
|
||||
|
||||
|
||||
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||
in the results will be omitted.
|
||||
|
||||
|
||||
#### exclude(**kwargs)
|
||||
|
||||
|
||||
|
@ -678,6 +685,13 @@ Returns the contents of the query's `WHERE` clause as a string.
|
|||
Returns the number of rows after aggregation.
|
||||
|
||||
|
||||
#### distinct()
|
||||
|
||||
|
||||
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||
in the results will be omitted.
|
||||
|
||||
|
||||
#### exclude(**kwargs)
|
||||
|
||||
|
||||
|
|
|
@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f
|
|||
|
||||
qs = Person.objects_in(database).only('first_name', 'birthday')
|
||||
|
||||
Distinct
|
||||
--------
|
||||
|
||||
Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted.
|
||||
|
||||
>>> Person.objects_in(database).only('first_name').count()
|
||||
100
|
||||
>>> Person.objects_in(database).only('first_name').distinct().count()
|
||||
94
|
||||
|
||||
Slicing
|
||||
-------
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
||||
* [Ordering](querysets.md#ordering)
|
||||
* [Omitting Fields](querysets.md#omitting-fields)
|
||||
* [Distinct](querysets.md#distinct)
|
||||
* [Slicing](querysets.md#slicing)
|
||||
* [Pagination](querysets.md#pagination)
|
||||
* [Aggregation](querysets.md#aggregation)
|
||||
|
|
|
@ -51,7 +51,10 @@ def get_method_sig(method):
|
|||
for arg in argspec.args:
|
||||
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
|
||||
if default_arg.has_default:
|
||||
args.append("%s=%s" % (arg, default_arg.default_value))
|
||||
val = default_arg.default_value
|
||||
if isinstance(val, basestring):
|
||||
val = '"' + val + '"'
|
||||
args.append("%s=%s" % (arg, val))
|
||||
else:
|
||||
args.append(arg)
|
||||
arg_index += 1
|
||||
|
|
|
@ -187,6 +187,7 @@ class QuerySet(object):
|
|||
self._q = []
|
||||
self._fields = []
|
||||
self._limits = None
|
||||
self._distinct = False
|
||||
|
||||
def __iter__(self):
|
||||
"""
|
||||
|
@ -228,14 +229,15 @@ class QuerySet(object):
|
|||
"""
|
||||
Returns the whole query as a SQL string.
|
||||
"""
|
||||
distinct = 'DISTINCT ' if self._distinct else ''
|
||||
fields = '*'
|
||||
if self._fields:
|
||||
fields = comma_join('`%s`' % field for field in self._fields)
|
||||
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
|
||||
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
|
||||
params = (fields, self._model_cls.table_name(),
|
||||
params = (distinct, fields, self._model_cls.table_name(),
|
||||
self.conditions_as_sql(), ordering, limit)
|
||||
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params
|
||||
return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params
|
||||
|
||||
def order_by_as_sql(self):
|
||||
"""
|
||||
|
@ -259,6 +261,11 @@ class QuerySet(object):
|
|||
"""
|
||||
Returns the number of matching model instances.
|
||||
"""
|
||||
if self._distinct:
|
||||
# Use a subquery, since a simple count won't be accurate
|
||||
sql = u'SELECT count() FROM (%s)' % self.as_sql()
|
||||
raw = self._database.raw(sql)
|
||||
return int(raw) if raw else 0
|
||||
return self._database.count(self._model_cls, self.conditions_as_sql())
|
||||
|
||||
def order_by(self, *field_names):
|
||||
|
@ -296,7 +303,7 @@ class QuerySet(object):
|
|||
return qs
|
||||
|
||||
def paginate(self, page_num=1, page_size=100):
|
||||
'''
|
||||
"""
|
||||
Returns a single page of model instances that match the queryset.
|
||||
Note that `order_by` should be used first, to ensure a correct
|
||||
partitioning of records into pages.
|
||||
|
@ -306,7 +313,7 @@ class QuerySet(object):
|
|||
|
||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||
`pages_total`, `number` (of the current page), and `page_size`.
|
||||
'''
|
||||
"""
|
||||
from .database import Page
|
||||
count = self.count()
|
||||
pages_total = int(ceil(count / float(page_size)))
|
||||
|
@ -323,8 +330,17 @@ class QuerySet(object):
|
|||
page_size=page_size
|
||||
)
|
||||
|
||||
def distinct(self):
|
||||
"""
|
||||
Adds a DISTINCT clause to the query, meaning that any duplicate rows
|
||||
in the results will be omitted.
|
||||
"""
|
||||
qs = copy(self)
|
||||
qs._distinct = True
|
||||
return qs
|
||||
|
||||
def aggregate(self, *args, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Returns an `AggregateQuerySet` over this query, with `args` serving as
|
||||
grouping fields and `kwargs` serving as calculated fields. At least one
|
||||
calculated field is required. For example:
|
||||
|
@ -337,7 +353,7 @@ class QuerySet(object):
|
|||
WHERE data > '2017-08-01'
|
||||
GROUP BY event_type
|
||||
```
|
||||
'''
|
||||
"""
|
||||
return AggregateQuerySet(self, args, kwargs)
|
||||
|
||||
|
||||
|
@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet):
|
|||
self._order_by = list(base_qs._order_by)
|
||||
self._q = list(base_qs._q)
|
||||
self._limits = base_qs._limits
|
||||
self._distinct = base_qs._distinct
|
||||
|
||||
def group_by(self, *args):
|
||||
"""
|
||||
|
@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet):
|
|||
"""
|
||||
Returns the whole query as a SQL string.
|
||||
"""
|
||||
distinct = 'DISTINCT ' if self._distinct else ''
|
||||
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
|
||||
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
||||
params = dict(
|
||||
distinct=distinct,
|
||||
grouping=grouping or "''",
|
||||
fields=fields,
|
||||
table=self._model_cls.table_name(),
|
||||
conds=self.conditions_as_sql()
|
||||
)
|
||||
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
|
||||
sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
|
||||
if self._order_by:
|
||||
sql += '\nORDER BY ' + self.order_by_as_sql()
|
||||
if self._limits:
|
||||
|
|
|
@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
|
|||
|
||||
def _test_qs(self, qs, expected_count):
|
||||
logging.info(qs.as_sql())
|
||||
count = 0
|
||||
for instance in qs:
|
||||
logging.info('\t%s' % instance.to_dict())
|
||||
count += 1
|
||||
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
||||
self.assertEquals(count, expected_count)
|
||||
self.assertEquals(qs.count(), expected_count)
|
||||
|
||||
def test_no_filtering(self):
|
||||
|
@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData):
|
|||
page = qs.paginate(1, 100)
|
||||
self.assertEquals(page.number_of_objects, 10)
|
||||
|
||||
def test_distinct(self):
|
||||
qs = Person.objects_in(self.database).distinct()
|
||||
self._test_qs(qs, 100)
|
||||
self._test_qs(qs.only('first_name'), 94)
|
||||
|
||||
|
||||
class AggregateTestCase(TestCaseWithData):
|
||||
|
||||
|
@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData):
|
|||
qs = qs.filter(weekday=1)
|
||||
self.assertEquals(qs.count(), 1)
|
||||
|
||||
def test_aggregate_with_distinct(self):
|
||||
# In this case distinct has no effect
|
||||
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct()
|
||||
print(qs.as_sql())
|
||||
self.assertEquals(qs.count(), 1)
|
||||
|
||||
|
||||
Color = Enum('Color', u'red blue green yellow brown white black')
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user