mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-24 18:03:42 +03:00
docs
This commit is contained in:
parent
9e119f33e6
commit
f084b6e95f
|
@ -1,3 +1,4 @@
|
|||
|
||||
Querysets
|
||||
=========
|
||||
|
||||
|
@ -16,31 +17,76 @@ Filtering
|
|||
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
|
||||
|
||||
>>> qs = Person.objects_in(database)
|
||||
>>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01')
|
||||
>>> qs = qs.filter(F.like(Person.first_name, 'V%')).exclude(Person.birthday < '2000-01-01')
|
||||
>>> qs.conditions_as_sql()
|
||||
u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
|
||||
"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
|
||||
|
||||
It is possible to specify several fields to filter or exclude by:
|
||||
It is possible to specify several expressions to filter or exclude by, and they will be ANDed together:
|
||||
|
||||
>>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75)
|
||||
>>> qs = Person.objects_in(database).filter(Person.last_name == 'Smith', Person.height > 1.75)
|
||||
>>> qs.conditions_as_sql()
|
||||
u"last_name = 'Smith' AND height > 1.75"
|
||||
"last_name = 'Smith' AND height > 1.75"
|
||||
|
||||
For filters with compound conditions you can use `Q` objects inside `filter` with overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
|
||||
|
||||
>>> qs = Person.objects_in(database).filter((Q(first_name='Ciaran', last_name='Carver') | Q(height_lte=1.8)) & ~Q(first_name='David'))
|
||||
>>> qs.conditions_as_sql()
|
||||
u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
|
||||
|
||||
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
|
||||
For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter
|
||||
For compound conditions you can use the overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
|
||||
|
||||
>>> qs = Person.objects_in(database)
|
||||
>>> qs = qs.filter(first_name__startswith='V', prewhere=True)
|
||||
>>> qs.conditions_as_sql(prewhere=True)
|
||||
u"first_name LIKE 'V%'"
|
||||
>>> qs = qs.filter(((Person.first_name == 'Ciaran') & (Person.last_name == 'Carver')) | (Person.height <= 1.8) & ~(Person.first_name = 'David'))
|
||||
>>> qs.conditions_as_sql()
|
||||
"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
|
||||
|
||||
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
|
||||
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
|
||||
|
||||
### Using `IN` and `NOT IN`
|
||||
|
||||
Filtering queries using ClickHouse's `IN` and `NOT IN` operators requires using the `isIn` and `isNotIn` functions (trying to use Python's `in` keyword will not work!).
|
||||
For example:
|
||||
```python
|
||||
# Is it Monday, Tuesday or Wednesday?
|
||||
F.isIn(F.toDayOfWeek(F.now()), [1, 2, 3])
|
||||
# This will not work:
|
||||
F.toDayOfWeek(F.now()) in [1, 2, 3]
|
||||
```
|
||||
|
||||
In case of model fields, there is a simplified syntax:
|
||||
```python
|
||||
# Filtering using F.isIn:
|
||||
qs.filter(F.isIn(Person.first_name, ['Robert', 'Rob', 'Robbie']))
|
||||
# Simpler syntax using isIn directly on the field:
|
||||
qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie']))
|
||||
```
|
||||
|
||||
The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names:
|
||||
```python
|
||||
# A list of values
|
||||
qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"]))
|
||||
# A queryset
|
||||
subquery = IrishLastName.objects_in(database).only("name")
|
||||
qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery))
|
||||
```
|
||||
|
||||
### Specifying PREWHERE conditions
|
||||
|
||||
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
|
||||
For better aggregation performance you can add them to `PREWHERE` section by adding a `prewhere=True` parameter:
|
||||
|
||||
>>> qs = Person.objects_in(database)
|
||||
>>> qs = qs.filter(F.like(Person.first_name, 'V%'), prewhere=True)
|
||||
>>> qs.conditions_as_sql(prewhere=True)
|
||||
"first_name LIKE 'V%'"
|
||||
|
||||
### Old-style filter conditions
|
||||
|
||||
Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still supported, so you can use it alongside or even intermixed with new-style functions and expressions.
|
||||
|
||||
The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example:
|
||||
```python
|
||||
qs = Position.objects.in(database)
|
||||
# New style
|
||||
qs = qs.filter(Position.x > 100, Position.y < 20, Position.terrain == 'water')
|
||||
# Old style
|
||||
qs = qs.filter(x__gt=100, y__lt=20, terrain='water')
|
||||
```
|
||||
Below are all the supported operators.
|
||||
|
||||
| Operator | Equivalent SQL | Comments |
|
||||
| -------- | -------------------------------------------- | ---------------------------------- |
|
||||
|
@ -51,8 +97,8 @@ There are different operators that can be used, by passing `<fieldname>__<operat
|
|||
| `lt` | `field < value` | |
|
||||
| `lte` | `field <= value` | |
|
||||
| `between` | `field BETWEEN value1 AND value2` | |
|
||||
| `in` | `field IN (values)` | See below |
|
||||
| `not_in` | `field NOT IN (values)` | See below |
|
||||
| `in` | `field IN (values)` | |
|
||||
| `not_in` | `field NOT IN (values)` | |
|
||||
| `contains` | `field LIKE '%value%'` | For string fields only |
|
||||
| `startswith` | `field LIKE 'value%'` | For string fields only |
|
||||
| `endswith` | `field LIKE '%value'` | For string fields only |
|
||||
|
@ -61,27 +107,6 @@ There are different operators that can be used, by passing `<fieldname>__<operat
|
|||
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
|
||||
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
|
||||
|
||||
### Using the `in` Operator
|
||||
|
||||
The `in` and `not_in` operators expect one of three types of values:
|
||||
|
||||
* A list or tuple of simple values
|
||||
* A string, which is used verbatim as the contents of the parentheses
|
||||
* Another queryset (subquery)
|
||||
|
||||
For example if we want to select only people with Irish last names:
|
||||
|
||||
# A list of simple values
|
||||
qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"])
|
||||
|
||||
# A string
|
||||
subquery = "SELECT name from $db.irishlastname"
|
||||
qs = Person.objects_in(database).filter(last_name__in=subquery)
|
||||
|
||||
# A queryset
|
||||
subquery = IrishLastName.objects_in(database).only("name")
|
||||
qs = Person.objects_in(database).filter(last_name__in=subquery)
|
||||
|
||||
Counting and Checking Existence
|
||||
-------------------------------
|
||||
|
||||
|
@ -129,7 +154,7 @@ Final
|
|||
--------
|
||||
|
||||
This method can be used only with `CollapsingMergeTree` engine.
|
||||
Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field.
|
||||
Adds a FINAL modifier to the query, meaning that the selected data is fully "collapsed" by the engine's sign field.
|
||||
|
||||
>>> Person.objects_in(database).count()
|
||||
100
|
||||
|
@ -144,8 +169,7 @@ It is possible to get a specific item from the queryset by index:
|
|||
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
||||
first = qs[0]
|
||||
|
||||
It is also possible to get a range a instances using a slice. This returns a queryset,
|
||||
that you can either iterate over or convert to a list.
|
||||
It is also possible to get a range a instances using a slice. This returns a queryset, that you can either iterate over or convert to a list.
|
||||
|
||||
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
||||
first_ten_people = list(qs[:10])
|
||||
|
@ -184,7 +208,7 @@ Aggregation
|
|||
|
||||
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
|
||||
|
||||
>>> qs = Person.objects_in(database).aggregate(average_height='avg(height)')
|
||||
>>> qs = Person.objects_in(database).aggregate(average_height=F.avg(Person.height))
|
||||
>>> print(qs.count())
|
||||
1
|
||||
>>> for row in qs: print(row.average_height)
|
||||
|
@ -192,29 +216,35 @@ It is possible to use aggregation functions over querysets using the `aggregate`
|
|||
|
||||
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
|
||||
|
||||
You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
|
||||
You can pass fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
|
||||
|
||||
qs = Person.objects_in(database).aggregate('last_name', num='count()')
|
||||
qs = Person.objects_in(database).aggregate(Person.last_name, num=F.count())
|
||||
|
||||
The underlying SQL query would be something like this:
|
||||
|
||||
SELECT last_name, count() AS num FROM person GROUP BY last_name
|
||||
SELECT last_name, count() AS num
|
||||
FROM person
|
||||
GROUP BY last_name
|
||||
|
||||
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
|
||||
|
||||
qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday')
|
||||
qs = Person.objects_in(database).aggregate(weekday=F.toDayOfWeek(Person.birthday), num=F.count()).group_by('weekday')
|
||||
|
||||
This queryset is translated to:
|
||||
|
||||
SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday
|
||||
SELECT toDayOfWeek(birthday) AS weekday, count() AS num
|
||||
FROM person
|
||||
GROUP BY weekday
|
||||
|
||||
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
|
||||
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the aggregated queryset on calculated fields, only on fields that exist in the model.
|
||||
|
||||
### Adding totals
|
||||
|
||||
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
|
||||
To achieve this, you can use `with_totals` method. It will return extra row (last) with
|
||||
values aggregated for all rows suitable for filters.
|
||||
|
||||
qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3]
|
||||
qs = Person.objects_in(database).aggregate(Person.first_name, num=F.count()).with_totals().order_by('-count')[:3]
|
||||
>>> print(qs.count())
|
||||
4
|
||||
>>> for row in qs:
|
||||
|
|
58
docs/whats_new_in_version_2.md
Normal file
58
docs/whats_new_in_version_2.md
Normal file
|
@ -0,0 +1,58 @@
|
|||
What's New in Version 2
|
||||
=======================
|
||||
|
||||
## Python 3.5+ Only
|
||||
|
||||
This version of the ORM no longer support Python 2.
|
||||
|
||||
## New flexible syntax for database expressions and functions
|
||||
|
||||
Expressions that use model fields, database functions and Python operators are now first-class citizens of the ORM. They provide infinite expressivity and flexibility when defining models and generating queries.
|
||||
|
||||
Example of expressions in model definition:
|
||||
```python
|
||||
class Temperature(Model):
|
||||
|
||||
station_id = UInt16Field()
|
||||
timestamp = DateTimeField(default=F.now()) # function as default value
|
||||
degrees_celsius = Float32Field()
|
||||
degrees_fahrenheit = Float32Field(alias=degrees_celsius * 1.8 + 32) # expression as field alias
|
||||
|
||||
# expressions in engine definition
|
||||
engine = MergeTree(partition_key=[F.toYYYYMM(timestamp)], order_by=[station_id, timestamp])
|
||||
```
|
||||
|
||||
Example of expressions in queries:
|
||||
```python
|
||||
db = Database('default')
|
||||
start = F.toStartOfMonth(F.now())
|
||||
expr = (Temperature.timestamp > start) & (Temperature.station_id == 123) & (Temperature.degrees_celsius > 30)
|
||||
for t in Temperature.objects_in(db).filter(expr):
|
||||
print(t.timestamp, t.degrees_celsius)
|
||||
```
|
||||
|
||||
See [Expressions](expressions.md).
|
||||
|
||||
## Support for IPv4 and IPv6 fields
|
||||
|
||||
Two new fields classes were added: `IPv4Field` and `IPv6Field`. Their values are represented by Python's `ipaddress.IPv4Address` and `ipaddress.IPv6Address`.
|
||||
|
||||
See [Field Types](field_types.md).
|
||||
|
||||
## Automatic generation of models by inspecting existing tables
|
||||
|
||||
It is now easy to generate a model class on the fly for an existing table in the database using `Database.get_model_for_table`. This is particularly useful for querying system tables, for example:
|
||||
```python
|
||||
QueryLog = db.get_model_for_table('query_log', system_table=True)
|
||||
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
|
||||
print(row.query)
|
||||
```
|
||||
|
||||
## Convenient ways to import ORM classes
|
||||
|
||||
You can now import all ORM classes directly from `infi.clickhouse_orm`, without worrying about sub-modules. For example:
|
||||
```python
|
||||
from infi.clickhouse_orm import Database, Model, StringField, DateTimeField, MergeTree
|
||||
```
|
||||
See [Importing ORM Classes](importing_orm_classes.md).
|
||||
|
|
@ -322,6 +322,13 @@ class AggregateTestCase(TestCaseWithData):
|
|||
for row in qs:
|
||||
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
|
||||
self.assertEqual(row.count, 100)
|
||||
# With functions
|
||||
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count())
|
||||
print(qs.as_sql())
|
||||
self.assertEqual(qs.count(), 1)
|
||||
for row in qs:
|
||||
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
|
||||
self.assertEqual(row.count, 100)
|
||||
|
||||
def test_aggregate_with_filter(self):
|
||||
# When filter comes before aggregate
|
||||
|
@ -484,138 +491,6 @@ class AggregateTestCase(TestCaseWithData):
|
|||
self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver'])
|
||||
|
||||
|
||||
|
||||
|
||||
class FuncsTestCase(TestCaseWithData):
|
||||
|
||||
def setUp(self):
|
||||
super(FuncsTestCase, self).setUp()
|
||||
self.database.insert(self._sample_data())
|
||||
|
||||
def _test_qs(self, qs, expected_count):
|
||||
logger.info(qs.as_sql())
|
||||
count = 0
|
||||
for instance in qs:
|
||||
count += 1
|
||||
logger.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
||||
self.assertEqual(count, expected_count)
|
||||
self.assertEqual(qs.count(), expected_count)
|
||||
|
||||
def _test_func(self, func, expected_value=None):
|
||||
sql = 'SELECT %s AS value' % func.to_sql()
|
||||
logger.info(sql)
|
||||
result = list(self.database.select(sql))
|
||||
logger.info('\t==> %s', result[0].value)
|
||||
if expected_value is not None:
|
||||
self.assertEqual(result[0].value, expected_value)
|
||||
|
||||
def test_func_to_sql(self):
|
||||
# No args
|
||||
self.assertEqual(F('func').to_sql(), 'func()')
|
||||
# String args
|
||||
self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')")
|
||||
# Numeric args
|
||||
self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)")
|
||||
# Date args
|
||||
self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))")
|
||||
# Datetime args
|
||||
self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))")
|
||||
# Boolean args
|
||||
self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)")
|
||||
# Null args
|
||||
self.assertEqual(F('func', None).to_sql(), "func(NULL)")
|
||||
# Fields as args
|
||||
self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)")
|
||||
# Funcs as args
|
||||
self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))')
|
||||
# Iterables as args
|
||||
x = [1, 'z', F('foo', 17)]
|
||||
for y in [x, tuple(x), iter(x)]:
|
||||
self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)")
|
||||
self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([[1, 2], [3, 4]])")
|
||||
|
||||
def test_filter_float_field(self):
|
||||
qs = Person.objects_in(self.database)
|
||||
# Height > 2
|
||||
self._test_qs(qs.filter(F.greater(Person.height, 2)), 0)
|
||||
self._test_qs(qs.filter(Person.height > 2), 0)
|
||||
# Height > 1.61
|
||||
self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96)
|
||||
self._test_qs(qs.filter(Person.height > 1.61), 96)
|
||||
# Height < 1.61
|
||||
self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4)
|
||||
self._test_qs(qs.filter(Person.height < 1.61), 4)
|
||||
|
||||
def test_filter_date_field(self):
|
||||
qs = Person.objects_in(self.database)
|
||||
# People born on the 30th
|
||||
self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3)
|
||||
self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3)
|
||||
self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3)
|
||||
# People born on Sunday
|
||||
self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18)
|
||||
self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18)
|
||||
self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18)
|
||||
# People born on 1976-10-01
|
||||
self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1)
|
||||
self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1)
|
||||
self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1)
|
||||
|
||||
def test_func_as_field_value(self):
|
||||
qs = Person.objects_in(self.database)
|
||||
self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96)
|
||||
self._test_qs(qs.exclude(birthday=F.today()), 100)
|
||||
self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100)
|
||||
|
||||
def test_comparison_operators(self):
|
||||
one = F.plus(1, 0)
|
||||
two = F.plus(1, 1)
|
||||
self._test_func(one > one, 0)
|
||||
self._test_func(two > one, 1)
|
||||
self._test_func(one >= two, 0)
|
||||
self._test_func(one >= one, 1)
|
||||
self._test_func(one < one, 0)
|
||||
self._test_func(one < two, 1)
|
||||
self._test_func(two <= one, 0)
|
||||
self._test_func(one <= one, 1)
|
||||
self._test_func(one == two, 0)
|
||||
self._test_func(one == one, 1)
|
||||
self._test_func(one != one, 0)
|
||||
self._test_func(one != two, 1)
|
||||
|
||||
def test_arithmetic_operators(self):
|
||||
one = F.plus(1, 0)
|
||||
two = F.plus(1, 1)
|
||||
# +
|
||||
self._test_func(one + two, 3)
|
||||
self._test_func(one + 2, 3)
|
||||
self._test_func(2 + one, 3)
|
||||
# -
|
||||
self._test_func(one - two, -1)
|
||||
self._test_func(one - 2, -1)
|
||||
self._test_func(1 - two, -1)
|
||||
# *
|
||||
self._test_func(one * two, 2)
|
||||
self._test_func(one * 2, 2)
|
||||
self._test_func(1 * two, 2)
|
||||
# /
|
||||
self._test_func(one / two, 0.5)
|
||||
self._test_func(one / 2, 0.5)
|
||||
self._test_func(1 / two, 0.5)
|
||||
# %
|
||||
self._test_func(one % two, 1)
|
||||
self._test_func(one % 2, 1)
|
||||
self._test_func(1 % two, 1)
|
||||
# sign
|
||||
self._test_func(-one, -1)
|
||||
self._test_func(--one, 1)
|
||||
self._test_func(+one, 1)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Color = Enum('Color', u'red blue green yellow brown white black')
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user