Add distinct method to querysets

This commit is contained in:
Itai Shirav 2017-09-10 17:17:04 +03:00
parent 7bbcae574a
commit 59564f8c70
7 changed files with 79 additions and 17 deletions

View File

@ -3,6 +3,7 @@ Change Log
Unreleased Unreleased
---------- ----------
- Add `distinct` method to querysets
- Add `AlterTableWithBuffer` migration operation - Add `AlterTableWithBuffer` migration operation
v0.9.6 v0.9.6

View File

@ -585,6 +585,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of matching model instances. Returns the number of matching model instances.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs) #### exclude(**kwargs)
@ -678,6 +685,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of rows after aggregation. Returns the number of rows after aggregation.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs) #### exclude(**kwargs)

View File

@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f
qs = Person.objects_in(database).only('first_name', 'birthday') qs = Person.objects_in(database).only('first_name', 'birthday')
Distinct
--------
Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted.
>>> Person.objects_in(database).only('first_name').count()
100
>>> Person.objects_in(database).only('first_name').distinct().count()
94
Slicing Slicing
------- -------

View File

@ -20,6 +20,7 @@
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence) * [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering) * [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields) * [Omitting Fields](querysets.md#omitting-fields)
* [Distinct](querysets.md#distinct)
* [Slicing](querysets.md#slicing) * [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination) * [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation) * [Aggregation](querysets.md#aggregation)

View File

@ -51,7 +51,10 @@ def get_method_sig(method):
for arg in argspec.args: for arg in argspec.args:
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index) default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
if default_arg.has_default: if default_arg.has_default:
args.append("%s=%s" % (arg, default_arg.default_value)) val = default_arg.default_value
if isinstance(val, basestring):
val = '"' + val + '"'
args.append("%s=%s" % (arg, val))
else: else:
args.append(arg) args.append(arg)
arg_index += 1 arg_index += 1

View File

@ -187,6 +187,7 @@ class QuerySet(object):
self._q = [] self._q = []
self._fields = [] self._fields = []
self._limits = None self._limits = None
self._distinct = False
def __iter__(self): def __iter__(self):
""" """
@ -228,14 +229,15 @@ class QuerySet(object):
""" """
Returns the whole query as a SQL string. Returns the whole query as a SQL string.
""" """
distinct = 'DISTINCT ' if self._distinct else ''
fields = '*' fields = '*'
if self._fields: if self._fields:
fields = comma_join('`%s`' % field for field in self._fields) fields = comma_join('`%s`' % field for field in self._fields)
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
limit = '\nLIMIT %d, %d' % self._limits if self._limits else '' limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
params = (fields, self._model_cls.table_name(), params = (distinct, fields, self._model_cls.table_name(),
self.conditions_as_sql(), ordering, limit) self.conditions_as_sql(), ordering, limit)
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params
def order_by_as_sql(self): def order_by_as_sql(self):
""" """
@ -259,6 +261,11 @@ class QuerySet(object):
""" """
Returns the number of matching model instances. Returns the number of matching model instances.
""" """
if self._distinct:
# Use a subquery, since a simple count won't be accurate
sql = u'SELECT count() FROM (%s)' % self.as_sql()
raw = self._database.raw(sql)
return int(raw) if raw else 0
return self._database.count(self._model_cls, self.conditions_as_sql()) return self._database.count(self._model_cls, self.conditions_as_sql())
def order_by(self, *field_names): def order_by(self, *field_names):
@ -296,7 +303,7 @@ class QuerySet(object):
return qs return qs
def paginate(self, page_num=1, page_size=100): def paginate(self, page_num=1, page_size=100):
''' """
Returns a single page of model instances that match the queryset. Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages. partitioning of records into pages.
@ -306,7 +313,7 @@ class QuerySet(object):
The result is a namedtuple containing `objects` (list), `number_of_objects`, The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`. `pages_total`, `number` (of the current page), and `page_size`.
''' """
from .database import Page from .database import Page
count = self.count() count = self.count()
pages_total = int(ceil(count / float(page_size))) pages_total = int(ceil(count / float(page_size)))
@ -323,8 +330,17 @@ class QuerySet(object):
page_size=page_size page_size=page_size
) )
def distinct(self):
"""
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
"""
qs = copy(self)
qs._distinct = True
return qs
def aggregate(self, *args, **kwargs): def aggregate(self, *args, **kwargs):
''' """
Returns an `AggregateQuerySet` over this query, with `args` serving as Returns an `AggregateQuerySet` over this query, with `args` serving as
grouping fields and `kwargs` serving as calculated fields. At least one grouping fields and `kwargs` serving as calculated fields. At least one
calculated field is required. For example: calculated field is required. For example:
@ -337,7 +353,7 @@ class QuerySet(object):
WHERE data > '2017-08-01' WHERE data > '2017-08-01'
GROUP BY event_type GROUP BY event_type
``` ```
''' """
return AggregateQuerySet(self, args, kwargs) return AggregateQuerySet(self, args, kwargs)
@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet):
self._order_by = list(base_qs._order_by) self._order_by = list(base_qs._order_by)
self._q = list(base_qs._q) self._q = list(base_qs._q)
self._limits = base_qs._limits self._limits = base_qs._limits
self._distinct = base_qs._distinct
def group_by(self, *args): def group_by(self, *args):
""" """
@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet):
""" """
Returns the whole query as a SQL string. Returns the whole query as a SQL string.
""" """
distinct = 'DISTINCT ' if self._distinct else ''
grouping = comma_join('`%s`' % field for field in self._grouping_fields) grouping = comma_join('`%s`' % field for field in self._grouping_fields)
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
params = dict( params = dict(
distinct=distinct,
grouping=grouping or "''", grouping=grouping or "''",
fields=fields, fields=fields,
table=self._model_cls.table_name(), table=self._model_cls.table_name(),
conds=self.conditions_as_sql() conds=self.conditions_as_sql()
) )
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
if self._order_by: if self._order_by:
sql += '\nORDER BY ' + self.order_by_as_sql() sql += '\nORDER BY ' + self.order_by_as_sql()
if self._limits: if self._limits:

View File

@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
def _test_qs(self, qs, expected_count): def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql()) logging.info(qs.as_sql())
count = 0
for instance in qs: for instance in qs:
logging.info('\t%s' % instance.to_dict()) count += 1
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEquals(count, expected_count)
self.assertEquals(qs.count(), expected_count) self.assertEquals(qs.count(), expected_count)
def test_no_filtering(self): def test_no_filtering(self):
@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData):
page = qs.paginate(1, 100) page = qs.paginate(1, 100)
self.assertEquals(page.number_of_objects, 10) self.assertEquals(page.number_of_objects, 10)
def test_distinct(self):
qs = Person.objects_in(self.database).distinct()
self._test_qs(qs, 100)
self._test_qs(qs.only('first_name'), 94)
class AggregateTestCase(TestCaseWithData): class AggregateTestCase(TestCaseWithData):
@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData):
qs = qs.filter(weekday=1) qs = qs.filter(weekday=1)
self.assertEquals(qs.count(), 1) self.assertEquals(qs.count(), 1)
def test_aggregate_with_distinct(self):
# In this case distinct has no effect
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct()
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
Color = Enum('Color', u'red blue green yellow brown white black') Color = Enum('Color', u'red blue green yellow brown white black')