This commit is contained in:
Itai Shirav 2020-05-28 18:07:45 +03:00
parent 9e119f33e6
commit f084b6e95f
3 changed files with 147 additions and 184 deletions

View File

@ -1,3 +1,4 @@
Querysets
=========
@ -16,31 +17,76 @@ Filtering
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01')
>>> qs = qs.filter(F.like(Person.first_name, 'V%')).exclude(Person.birthday < '2000-01-01')
>>> qs.conditions_as_sql()
u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
It is possible to specify several fields to filter or exclude by:
It is possible to specify several expressions to filter or exclude by, and they will be ANDed together:
>>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75)
>>> qs = Person.objects_in(database).filter(Person.last_name == 'Smith', Person.height > 1.75)
>>> qs.conditions_as_sql()
u"last_name = 'Smith' AND height > 1.75"
"last_name = 'Smith' AND height > 1.75"
For filters with compound conditions you can use `Q` objects inside `filter` with overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
>>> qs = Person.objects_in(database).filter((Q(first_name='Ciaran', last_name='Carver') | Q(height_lte=1.8)) & ~Q(first_name='David'))
>>> qs.conditions_as_sql()
u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter
For compound conditions you can use the overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V', prewhere=True)
>>> qs.conditions_as_sql(prewhere=True)
u"first_name LIKE 'V%'"
>>> qs = qs.filter(((Person.first_name == 'Ciaran') & (Person.last_name == 'Carver')) | (Person.height <= 1.8) & ~(Person.first_name = 'David'))
>>> qs.conditions_as_sql()
"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
### Using `IN` and `NOT IN`
Filtering queries using ClickHouse's `IN` and `NOT IN` operators requires using the `isIn` and `isNotIn` functions (trying to use Python's `in` keyword will not work!).
For example:
```python
# Is it Monday, Tuesday or Wednesday?
F.isIn(F.toDayOfWeek(F.now()), [1, 2, 3])
# This will not work:
F.toDayOfWeek(F.now()) in [1, 2, 3]
```
In case of model fields, there is a simplified syntax:
```python
# Filtering using F.isIn:
qs.filter(F.isIn(Person.first_name, ['Robert', 'Rob', 'Robbie']))
# Simpler syntax using isIn directly on the field:
qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie']))
```
The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names:
```python
# A list of values
qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"]))
# A queryset
subquery = IrishLastName.objects_in(database).only("name")
qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery))
```
### Specifying PREWHERE conditions
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
For better aggregation performance you can add them to `PREWHERE` section by adding a `prewhere=True` parameter:
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(F.like(Person.first_name, 'V%'), prewhere=True)
>>> qs.conditions_as_sql(prewhere=True)
"first_name LIKE 'V%'"
### Old-style filter conditions
Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still supported, so you can use it alongside or even intermixed with new-style functions and expressions.
The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example:
```python
qs = Position.objects.in(database)
# New style
qs = qs.filter(Position.x > 100, Position.y < 20, Position.terrain == 'water')
# Old style
qs = qs.filter(x__gt=100, y__lt=20, terrain='water')
```
Below are all the supported operators.
| Operator | Equivalent SQL | Comments |
| -------- | -------------------------------------------- | ---------------------------------- |
@ -51,8 +97,8 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| `lt` | `field < value` | |
| `lte` | `field <= value` | |
| `between` | `field BETWEEN value1 AND value2` | |
| `in` | `field IN (values)` | See below |
| `not_in` | `field NOT IN (values)` | See below |
| `in` | `field IN (values)` | |
| `not_in` | `field NOT IN (values)` | |
| `contains` | `field LIKE '%value%'` | For string fields only |
| `startswith` | `field LIKE 'value%'` | For string fields only |
| `endswith` | `field LIKE '%value'` | For string fields only |
@ -61,27 +107,6 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
### Using the `in` Operator
The `in` and `not_in` operators expect one of three types of values:
* A list or tuple of simple values
* A string, which is used verbatim as the contents of the parentheses
* Another queryset (subquery)
For example if we want to select only people with Irish last names:
# A list of simple values
qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"])
# A string
subquery = "SELECT name from $db.irishlastname"
qs = Person.objects_in(database).filter(last_name__in=subquery)
# A queryset
subquery = IrishLastName.objects_in(database).only("name")
qs = Person.objects_in(database).filter(last_name__in=subquery)
Counting and Checking Existence
-------------------------------
@ -129,7 +154,7 @@ Final
--------
This method can be used only with `CollapsingMergeTree` engine.
Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field.
Adds a FINAL modifier to the query, meaning that the selected data is fully "collapsed" by the engine's sign field.
>>> Person.objects_in(database).count()
100
@ -144,8 +169,7 @@ It is possible to get a specific item from the queryset by index:
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first = qs[0]
It is also possible to get a range a instances using a slice. This returns a queryset,
that you can either iterate over or convert to a list.
It is also possible to get a range a instances using a slice. This returns a queryset, that you can either iterate over or convert to a list.
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first_ten_people = list(qs[:10])
@ -184,7 +208,7 @@ Aggregation
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
>>> qs = Person.objects_in(database).aggregate(average_height='avg(height)')
>>> qs = Person.objects_in(database).aggregate(average_height=F.avg(Person.height))
>>> print(qs.count())
1
>>> for row in qs: print(row.average_height)
@ -192,29 +216,35 @@ It is possible to use aggregation functions over querysets using the `aggregate`
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
You can pass fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
qs = Person.objects_in(database).aggregate('last_name', num='count()')
qs = Person.objects_in(database).aggregate(Person.last_name, num=F.count())
The underlying SQL query would be something like this:
SELECT last_name, count() AS num FROM person GROUP BY last_name
SELECT last_name, count() AS num
FROM person
GROUP BY last_name
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday')
qs = Person.objects_in(database).aggregate(weekday=F.toDayOfWeek(Person.birthday), num=F.count()).group_by('weekday')
This queryset is translated to:
SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday
SELECT toDayOfWeek(birthday) AS weekday, count() AS num
FROM person
GROUP BY weekday
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the aggregated queryset on calculated fields, only on fields that exist in the model.
### Adding totals
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
To achieve this, you can use `with_totals` method. It will return extra row (last) with
values aggregated for all rows suitable for filters.
qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3]
qs = Person.objects_in(database).aggregate(Person.first_name, num=F.count()).with_totals().order_by('-count')[:3]
>>> print(qs.count())
4
>>> for row in qs:

View File

@ -0,0 +1,58 @@
What's New in Version 2
=======================
## Python 3.5+ Only
This version of the ORM no longer support Python 2.
## New flexible syntax for database expressions and functions
Expressions that use model fields, database functions and Python operators are now first-class citizens of the ORM. They provide infinite expressivity and flexibility when defining models and generating queries.
Example of expressions in model definition:
```python
class Temperature(Model):
station_id = UInt16Field()
timestamp = DateTimeField(default=F.now()) # function as default value
degrees_celsius = Float32Field()
degrees_fahrenheit = Float32Field(alias=degrees_celsius * 1.8 + 32) # expression as field alias
# expressions in engine definition
engine = MergeTree(partition_key=[F.toYYYYMM(timestamp)], order_by=[station_id, timestamp])
```
Example of expressions in queries:
```python
db = Database('default')
start = F.toStartOfMonth(F.now())
expr = (Temperature.timestamp > start) & (Temperature.station_id == 123) & (Temperature.degrees_celsius > 30)
for t in Temperature.objects_in(db).filter(expr):
print(t.timestamp, t.degrees_celsius)
```
See [Expressions](expressions.md).
## Support for IPv4 and IPv6 fields
Two new fields classes were added: `IPv4Field` and `IPv6Field`. Their values are represented by Python's `ipaddress.IPv4Address` and `ipaddress.IPv6Address`.
See [Field Types](field_types.md).
## Automatic generation of models by inspecting existing tables
It is now easy to generate a model class on the fly for an existing table in the database using `Database.get_model_for_table`. This is particularly useful for querying system tables, for example:
```python
QueryLog = db.get_model_for_table('query_log', system_table=True)
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
print(row.query)
```
## Convenient ways to import ORM classes
You can now import all ORM classes directly from `infi.clickhouse_orm`, without worrying about sub-modules. For example:
```python
from infi.clickhouse_orm import Database, Model, StringField, DateTimeField, MergeTree
```
See [Importing ORM Classes](importing_orm_classes.md).

View File

@ -322,6 +322,13 @@ class AggregateTestCase(TestCaseWithData):
for row in qs:
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
self.assertEqual(row.count, 100)
# With functions
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count())
print(qs.as_sql())
self.assertEqual(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
self.assertEqual(row.count, 100)
def test_aggregate_with_filter(self):
# When filter comes before aggregate
@ -484,138 +491,6 @@ class AggregateTestCase(TestCaseWithData):
self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver'])
class FuncsTestCase(TestCaseWithData):
def setUp(self):
super(FuncsTestCase, self).setUp()
self.database.insert(self._sample_data())
def _test_qs(self, qs, expected_count):
logger.info(qs.as_sql())
count = 0
for instance in qs:
count += 1
logger.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEqual(count, expected_count)
self.assertEqual(qs.count(), expected_count)
def _test_func(self, func, expected_value=None):
sql = 'SELECT %s AS value' % func.to_sql()
logger.info(sql)
result = list(self.database.select(sql))
logger.info('\t==> %s', result[0].value)
if expected_value is not None:
self.assertEqual(result[0].value, expected_value)
def test_func_to_sql(self):
# No args
self.assertEqual(F('func').to_sql(), 'func()')
# String args
self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')")
# Numeric args
self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)")
# Date args
self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))")
# Datetime args
self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))")
# Boolean args
self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)")
# Null args
self.assertEqual(F('func', None).to_sql(), "func(NULL)")
# Fields as args
self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)")
# Funcs as args
self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))')
# Iterables as args
x = [1, 'z', F('foo', 17)]
for y in [x, tuple(x), iter(x)]:
self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)")
self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])")
def test_filter_float_field(self):
qs = Person.objects_in(self.database)
# Height > 2
self._test_qs(qs.filter(F.greater(Person.height, 2)), 0)
self._test_qs(qs.filter(Person.height > 2), 0)
# Height > 1.61
self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96)
self._test_qs(qs.filter(Person.height > 1.61), 96)
# Height < 1.61
self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4)
self._test_qs(qs.filter(Person.height < 1.61), 4)
def test_filter_date_field(self):
qs = Person.objects_in(self.database)
# People born on the 30th
self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3)
self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3)
self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3)
# People born on Sunday
self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18)
self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18)
self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18)
# People born on 1976-10-01
self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1)
self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1)
self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1)
def test_func_as_field_value(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96)
self._test_qs(qs.exclude(birthday=F.today()), 100)
self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100)
def test_comparison_operators(self):
one = F.plus(1, 0)
two = F.plus(1, 1)
self._test_func(one > one, 0)
self._test_func(two > one, 1)
self._test_func(one >= two, 0)
self._test_func(one >= one, 1)
self._test_func(one < one, 0)
self._test_func(one < two, 1)
self._test_func(two <= one, 0)
self._test_func(one <= one, 1)
self._test_func(one == two, 0)
self._test_func(one == one, 1)
self._test_func(one != one, 0)
self._test_func(one != two, 1)
def test_arithmetic_operators(self):
one = F.plus(1, 0)
two = F.plus(1, 1)
# +
self._test_func(one + two, 3)
self._test_func(one + 2, 3)
self._test_func(2 + one, 3)
# -
self._test_func(one - two, -1)
self._test_func(one - 2, -1)
self._test_func(1 - two, -1)
# *
self._test_func(one * two, 2)
self._test_func(one * 2, 2)
self._test_func(1 * two, 2)
# /
self._test_func(one / two, 0.5)
self._test_func(one / 2, 0.5)
self._test_func(1 / two, 0.5)
# %
self._test_func(one % two, 1)
self._test_func(one % 2, 1)
self._test_func(1 % two, 1)
# sign
self._test_func(-one, -1)
self._test_func(--one, 1)
self._test_func(+one, 1)
Color = Enum('Color', u'red blue green yellow brown white black')