mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-08-10 22:54:48 +03:00
Compare commits
145 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
45a9200ff6 | ||
|
623f3e7dac | ||
|
070b2c3ff4 | ||
|
d7a26a81bb | ||
|
09aeddf677 | ||
|
2777d3084c | ||
|
7b15567a62 | ||
|
359809e819 | ||
|
272729153c | ||
|
232a8d29ad | ||
|
4cbaf5e0fb | ||
|
17ab9c046b | ||
|
cb7d7efd93 | ||
|
1ae66f63c3 | ||
|
da591dc649 | ||
|
ed33c09dd7 | ||
|
3b0aaebe50 | ||
|
0787efc1cc | ||
|
76d432b838 | ||
|
5abe39ed24 | ||
|
0f4547be87 | ||
|
fbf2207690 | ||
|
6a5889280d | ||
|
e14de715f7 | ||
|
45d807eb02 | ||
|
7c90c1e4c3 | ||
|
a8ab206849 | ||
|
97f776792f | ||
|
779f4146e3 | ||
|
ca57a223ca | ||
|
10a8a3c706 | ||
|
8ccf87ad04 | ||
|
c4ab81ebe1 | ||
|
ebdadb3aee | ||
|
db194d733f | ||
|
97773d6dc7 | ||
|
80b220c1e3 | ||
|
7acfc411d8 | ||
|
c0bdbb7664 | ||
|
bc900c2ef1 | ||
|
436c296609 | ||
|
40a1e21348 | ||
|
633c7ee1e9 | ||
|
f35333a7b6 | ||
|
3d12bdb556 | ||
|
c5a083e2bc | ||
|
8fea844d70 | ||
|
c45bd07cea | ||
|
f98dead1ab | ||
|
7fe76c185d | ||
|
45552fdb97 | ||
|
0dece65b7b | ||
|
888f8dc4da | ||
|
2a38c5200c | ||
|
f30cb87e60 | ||
|
bbab55e6d6 | ||
|
62ad18d8ff | ||
|
ed51ad5be6 | ||
|
667cde1685 | ||
|
aaa1038a70 | ||
|
4be1b0437f | ||
|
635197de38 | ||
|
22cd908a49 | ||
|
ffd9bab0ef | ||
|
393209e624 | ||
|
4616f8cb0e | ||
|
afd9bb5a09 | ||
|
4f0624d35c | ||
|
3ec54e510c | ||
|
b2af10b11c | ||
|
0c2d0f0ffd | ||
|
5d97b4c84a | ||
|
3fb3936a8a | ||
|
efebfc67ed | ||
|
bde5c75eba | ||
|
a8c88a499a | ||
|
33ad54d6de | ||
|
39eea8490f | ||
|
85d0fb66b6 | ||
|
f0bef7f75d | ||
|
56cf86a246 | ||
|
6702cffe72 | ||
|
113ac7ad4a | ||
|
e97e48a695 | ||
|
f084b6e95f | ||
|
9e119f33e6 | ||
|
9697157d6b | ||
|
ce784d7af7 | ||
|
0c89989519 | ||
|
fcb28b8c9f | ||
|
613e594fa9 | ||
|
00bf7eeb75 | ||
|
6dee101593 | ||
|
677e08f723 | ||
|
3019647339 | ||
|
d1ba26af20 | ||
|
7497a3f021 | ||
|
bce87c40c1 | ||
|
c27c125d61 | ||
|
eb97f60cac | ||
|
669a40dd5e | ||
|
127824c026 | ||
|
3c38c8ec40 | ||
|
2e586fa61c | ||
|
7b3eb943e2 | ||
|
19439e45ef | ||
|
db3dc70ebf | ||
|
27eac13767 | ||
|
54c67f2777 | ||
|
39a812b134 | ||
|
9f36b17fee | ||
|
25c4a6710e | ||
|
93747f7758 | ||
|
4ffc27100d | ||
|
ffeed4a6a4 | ||
|
0a94ac98a3 | ||
|
c23947c28f | ||
|
262ce13f4d | ||
|
40e26d68b6 | ||
|
17b5c629ac | ||
|
acccfbcaad | ||
|
2d434fe61f | ||
|
6d7b6250c5 | ||
|
ef30f1d1bd | ||
|
4848c7f813 | ||
|
39f34b7c85 | ||
|
969070f1ae | ||
|
1b37c38bbf | ||
|
cc0f2c4e91 | ||
|
8a21e02862 | ||
|
342f06e7b0 | ||
|
88510aaa42 | ||
|
4749918014 | ||
|
7da75e10eb | ||
|
8d5e47a957 | ||
|
7fcbad44b9 | ||
|
38bb4981b8 | ||
|
bcc4c29d10 | ||
|
3ba44608f3 | ||
|
2d3441b127 | ||
|
1889ac6372 | ||
|
4d2ebd65fb | ||
|
f96bd22c38 | ||
|
602d0671f1 | ||
|
962a1673f9 |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py
|
||||||
bootstrap.py
|
bootstrap.py
|
||||||
|
|
||||||
htmldocs/
|
htmldocs/
|
||||||
|
cover/
|
||||||
|
|
||||||
# tox
|
# tox
|
||||||
.tox/
|
.tox/
|
||||||
|
|
60
CHANGELOG.md
60
CHANGELOG.md
|
@ -1,6 +1,66 @@
|
||||||
Change Log
|
Change Log
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
v2.1.3
|
||||||
|
------
|
||||||
|
- Fix pagination for models with alias columns
|
||||||
|
|
||||||
|
v2.1.2
|
||||||
|
------
|
||||||
|
- Add `QuerySet.model` to support django-rest-framework 3
|
||||||
|
|
||||||
|
v2.1.1
|
||||||
|
------
|
||||||
|
- Improve support of ClickHouse v21.9 (mangototango)
|
||||||
|
- Ignore non-numeric parts in ClickHouse version (mangototango)
|
||||||
|
- Fix precedence of ~ operator in Q objects (mangototango)
|
||||||
|
- Support for adding a column to the beginning of a table (meanmail)
|
||||||
|
- Add stddevPop and stddevSamp functions (k.peskov)
|
||||||
|
|
||||||
|
v2.1.0
|
||||||
|
------
|
||||||
|
- Support for model constraints
|
||||||
|
- Support for data skipping indexes
|
||||||
|
- Support for mutations: `QuerySet.update` and `QuerySet.delete`
|
||||||
|
- Added functions for working with external dictionaries
|
||||||
|
- Support FINAL for `ReplacingMergeTree` (chripede)
|
||||||
|
- Added `DateTime64Field` (NiyazNz)
|
||||||
|
- Make `DateTimeField` and `DateTime64Field` timezone-aware (NiyazNz)
|
||||||
|
|
||||||
|
**Backwards incompatible changes**
|
||||||
|
|
||||||
|
Previously, `DateTimeField` always converted its value from the database timezone to UTC. This is no longer the case: the field's value now preserves the timezone it was defined with, or if not specified - the database's global timezone. This change has no effect if your database timezone is set to UTC.
|
||||||
|
|
||||||
|
v2.0.1
|
||||||
|
------
|
||||||
|
- Remove unnecessary import of `six`
|
||||||
|
|
||||||
|
v2.0.0
|
||||||
|
------
|
||||||
|
- Dropped support for Python 2.x
|
||||||
|
- New flexible syntax for database expressions and functions
|
||||||
|
- Expressions as default values for model fields
|
||||||
|
- Support for IPv4 and IPv6 fields
|
||||||
|
- Automatic generation of models by inspecting existing tables
|
||||||
|
- Convenient ways to import ORM classes
|
||||||
|
|
||||||
|
See [What's new in version 2](docs/whats_new_in_version_2.md) for details.
|
||||||
|
|
||||||
|
v1.4.0
|
||||||
|
------
|
||||||
|
- Added primary_key parameter to MergeTree engines (M1hacka)
|
||||||
|
- Support negative enum values (Romamo)
|
||||||
|
|
||||||
|
v1.3.0
|
||||||
|
------
|
||||||
|
- Support LowCardinality columns in ad-hoc queries
|
||||||
|
- Support for LIMIT BY in querysets (utapyngo)
|
||||||
|
|
||||||
|
v1.2.0
|
||||||
|
------
|
||||||
|
- Add support for per-field compression codecs (rbelio, Chocorean)
|
||||||
|
- Add support for low cardinality fields (rbelio)
|
||||||
|
|
||||||
v1.1.0
|
v1.1.0
|
||||||
------
|
------
|
||||||
- Add PREWHERE support to querysets (M1hacka)
|
- Add PREWHERE support to querysets (M1hacka)
|
||||||
|
|
20
README.md
20
README.md
|
@ -8,10 +8,7 @@ Let's jump right in with a simple example of monitoring CPU usage. First we need
|
||||||
connect to the database and create a table for the model:
|
connect to the database and create a table for the model:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
|
||||||
from infi.clickhouse_orm.models import Model
|
|
||||||
from infi.clickhouse_orm.fields import *
|
|
||||||
from infi.clickhouse_orm.engines import Memory
|
|
||||||
|
|
||||||
class CPUStats(Model):
|
class CPUStats(Model):
|
||||||
|
|
||||||
|
@ -45,13 +42,16 @@ Querying the table is easy, using either the query builder or raw SQL:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Calculate what percentage of the time CPU 1 was over 95% busy
|
# Calculate what percentage of the time CPU 1 was over 95% busy
|
||||||
total = CPUStats.objects_in(db).filter(cpu_id=1).count()
|
queryset = CPUStats.objects_in(db)
|
||||||
busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count()
|
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
||||||
print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)
|
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
||||||
|
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
||||||
|
|
||||||
# Calculate the average usage per CPU
|
# Calculate the average usage per CPU
|
||||||
for row in CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)'):
|
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
||||||
print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)
|
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
||||||
```
|
```
|
||||||
|
|
||||||
To learn more please visit the [documentation](docs/toc.md).
|
This and other examples can be found in the `examples` folder.
|
||||||
|
|
||||||
|
To learn more please visit the [documentation](docs/toc.md).
|
||||||
|
|
|
@ -14,8 +14,7 @@ install_requires = [
|
||||||
'iso8601 >= 0.1.12',
|
'iso8601 >= 0.1.12',
|
||||||
'pytz',
|
'pytz',
|
||||||
'requests',
|
'requests',
|
||||||
'setuptools',
|
'setuptools'
|
||||||
'six'
|
|
||||||
]
|
]
|
||||||
version_file = src/infi/clickhouse_orm/__version__.py
|
version_file = src/infi/clickhouse_orm/__version__.py
|
||||||
description = A Python library for working with the ClickHouse database
|
description = A Python library for working with the ClickHouse database
|
||||||
|
@ -31,7 +30,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm
|
||||||
|
|
||||||
[isolated-python]
|
[isolated-python]
|
||||||
recipe = infi.recipe.python
|
recipe = infi.recipe.python
|
||||||
version = v2.7.12.4
|
version = v3.8.12
|
||||||
|
|
||||||
[setup.py]
|
[setup.py]
|
||||||
recipe = infi.recipe.template.version
|
recipe = infi.recipe.template.version
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
99
docs/expressions.md
Normal file
99
docs/expressions.md
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
|
||||||
|
Expressions
|
||||||
|
===========
|
||||||
|
|
||||||
|
One of the ORM's core concepts is _expressions_, which are composed using functions, operators and model fields. Expressions are used in multiple places in the ORM:
|
||||||
|
|
||||||
|
- When defining [field options](field_options.md) - `default`, `alias` and `materialized`.
|
||||||
|
- In [table engine](table_engines.md) parameters for engines in the `MergeTree` family.
|
||||||
|
- In [queryset](querysets.md) methods such as `filter`, `exclude`, `order_by`, `aggregate` and `limit_by`.
|
||||||
|
|
||||||
|
Using Expressions
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Expressions usually include ClickHouse database functions, which are made available by the `F` class. Here's a simple function:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import F
|
||||||
|
expr = F.today()
|
||||||
|
```
|
||||||
|
|
||||||
|
Functions that accept arguments can be composed, just like when using SQL:
|
||||||
|
```python
|
||||||
|
expr = F.toDayOfWeek(F.today())
|
||||||
|
```
|
||||||
|
|
||||||
|
You can see the SQL expression that is represented by an ORM expression by calling its `to_sql` method or converting it to a string:
|
||||||
|
```python
|
||||||
|
>>> print(expr)
|
||||||
|
toDayOfWeek(today())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Operators
|
||||||
|
|
||||||
|
ORM expressions support Python's standard arithmetic operators, so you can compose expressions using `+`, `-`, `*`, `/`, `//` and `%`. For example:
|
||||||
|
```python
|
||||||
|
# A random integer between 1 and 10
|
||||||
|
F.rand() % 10 + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
There is also support for comparison operators (`<`, `<=`, `==`, `>=`, `>`, `!=`) and logical operators (`&`, `|`, `~`, `^`) which are often used for filtering querysets:
|
||||||
|
```python
|
||||||
|
# Is it Friday the 13th?
|
||||||
|
(F.toDayOfWeek(F.today()) == 6) & (F.toDayOfMonth(F.today()) == 13)
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
|
||||||
|
|
||||||
|
### Referring to model fields
|
||||||
|
|
||||||
|
To refer to a model field inside an expression, use `<class>.<field>` syntax, for example:
|
||||||
|
```python
|
||||||
|
# Convert the temperature from Celsius to Fahrenheit
|
||||||
|
Sensor.temperature * 1.8 + 32
|
||||||
|
```
|
||||||
|
|
||||||
|
Inside model class definitions omit the class name:
|
||||||
|
```python
|
||||||
|
class Person(Model):
|
||||||
|
height_cm = Float32Field()
|
||||||
|
height_inch = Float32Field(alias=height_cm/2.54)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parametric functions
|
||||||
|
|
||||||
|
Some of ClickHouse's aggregate functions can accept one or more parameters - constants for initialization that affect the way the function works. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
|
||||||
|
```python
|
||||||
|
# Most common last names
|
||||||
|
F.topK(5)(Person.last_name)
|
||||||
|
# Find 90th, 95th and 99th percentile of heights
|
||||||
|
F.quantiles(0.9, 0.95, 0.99)(Person.height)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating new "functions"
|
||||||
|
|
||||||
|
Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores:
|
||||||
|
```python
|
||||||
|
def normalize_string(s):
|
||||||
|
return F.replaceAll(F.upper(F.trimBoth(s)), ' ', '_')
|
||||||
|
```
|
||||||
|
|
||||||
|
Then we can use this expression anywhere we need it:
|
||||||
|
```python
|
||||||
|
class Event(Model):
|
||||||
|
code = StringField()
|
||||||
|
normalized_code = StringField(materialized=normalize_string(code))
|
||||||
|
```
|
||||||
|
|
||||||
|
### Which functions are available?
|
||||||
|
|
||||||
|
ClickHouse has many hundreds of functions, and new ones often get added. Many, but not all of them, are already covered by the ORM. If you encounter a function that the database supports but is not available in the `F` class, please report this via a GitHub issue. You can still use the function by providing its name:
|
||||||
|
```python
|
||||||
|
expr = F("someFunctionName", arg1, arg2, ...)
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that higher-order database functions (those that use lambda expressions) are not supported.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md)
|
112
docs/field_options.md
Normal file
112
docs/field_options.md
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
Field Options
|
||||||
|
=============
|
||||||
|
|
||||||
|
All field types accept the following arguments:
|
||||||
|
|
||||||
|
- default
|
||||||
|
- alias
|
||||||
|
- materialized
|
||||||
|
- readonly
|
||||||
|
- codec
|
||||||
|
|
||||||
|
Note that `default`, `alias` and `materialized` are mutually exclusive - you cannot use more than one of them in a single field.
|
||||||
|
|
||||||
|
## default
|
||||||
|
|
||||||
|
Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc.
|
||||||
|
The default value can be a Python value suitable for the field type, or an expression. For example:
|
||||||
|
```python
|
||||||
|
class Event(Model):
|
||||||
|
|
||||||
|
name = StringField(default="EVENT")
|
||||||
|
repeated = UInt32Field(default=1)
|
||||||
|
created = DateTimeField(default=F.now())
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
...
|
||||||
|
```
|
||||||
|
When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example:
|
||||||
|
```python
|
||||||
|
>>> event = Event()
|
||||||
|
>>> print(event.to_dict())
|
||||||
|
{'name': 'EVENT', 'repeated': 1, 'created': <NO_VALUE>}
|
||||||
|
```
|
||||||
|
:warning: Due to a bug in ClickHouse versions prior to 20.1.2.4, insertion of records with expressions for default values may fail.
|
||||||
|
|
||||||
|
## alias / materialized
|
||||||
|
|
||||||
|
The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk.
|
||||||
|
You can use any expression, and can refer to other model fields. For example:
|
||||||
|
```python
|
||||||
|
class Event(Model):
|
||||||
|
|
||||||
|
created = DateTimeField()
|
||||||
|
created_date = DateTimeField(materialized=F.toDate(created))
|
||||||
|
name = StringField()
|
||||||
|
normalized_name = StringField(alias=F.upper(F.trim(name)))
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
```
|
||||||
|
For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string:
|
||||||
|
```python
|
||||||
|
created_date = DateTimeField(materialized="toDate(created)")
|
||||||
|
```
|
||||||
|
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
```python
|
||||||
|
obj = Event(created=datetime.now(), name='MyEvent')
|
||||||
|
db = Database('my_test_db')
|
||||||
|
db.insert([obj])
|
||||||
|
# All values will be retrieved from database
|
||||||
|
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
|
||||||
|
# created_date and username will contain a default value
|
||||||
|
db.select('SELECT * FROM $db.event', model_class=Event)
|
||||||
|
```
|
||||||
|
When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` since their real values can only be known after insertion to the database.
|
||||||
|
|
||||||
|
## codec
|
||||||
|
|
||||||
|
This attribute specifies the compression algorithm to use for the field (instead of the default data compression algorithm defined in server settings).
|
||||||
|
|
||||||
|
Supported compression algorithms:
|
||||||
|
|
||||||
|
| Codec | Argument | Comment
|
||||||
|
| -------------------- | -------------------------------------------| ----------------------------------------------------
|
||||||
|
| NONE | None | No compression.
|
||||||
|
| LZ4 | None | LZ4 compression.
|
||||||
|
| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9].
|
||||||
|
| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory.
|
||||||
|
| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise.
|
||||||
|
|
||||||
|
Codecs can be combined by separating their names with commas. The default database codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline).
|
||||||
|
|
||||||
|
Recommended usage for codecs:
|
||||||
|
- When values for particular metric do not differ significantly from point to point, delta-encoding allows to reduce disk space usage significantly.
|
||||||
|
- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data)
|
||||||
|
- Numeric types usually enjoy best compression rates with ZSTD
|
||||||
|
- String types enjoy good compression rates with LZ4HC
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
class Stats(Model):
|
||||||
|
|
||||||
|
id = UInt64Field(codec='ZSTD(10)')
|
||||||
|
timestamp = DateTimeField(codec='Delta,ZSTD')
|
||||||
|
timestamp_date = DateField(codec='Delta(4),ZSTD(22)')
|
||||||
|
metadata_id = Int64Field(codec='LZ4')
|
||||||
|
status = StringField(codec='LZ4HC(10)')
|
||||||
|
calculation = NullableField(Float32Field(), codec='ZSTD')
|
||||||
|
alerts = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
|
||||||
|
|
||||||
|
engine = MergeTree('timestamp_date', ('id', 'timestamp'))
|
||||||
|
```
|
||||||
|
Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse.
|
||||||
|
|
||||||
|
## readonly
|
||||||
|
|
||||||
|
This attribute is set automatically for fields with `alias` or `materialized` attributes, you do not need to pass it yourself.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)
|
|
@ -1,71 +1,78 @@
|
||||||
Field Types
|
Field Types
|
||||||
===========
|
===========
|
||||||
|
|
||||||
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/)
|
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/sql-reference/data-types/)
|
||||||
|
|
||||||
Currently the following field types are supported:
|
The following field types are supported:
|
||||||
|
|
||||||
|
| Class | DB Type | Pythonic Type | Comments
|
||||||
|
| ------------------ | ---------- | --------------------- | -----------------------------------------------------
|
||||||
|
| StringField | String | str | Encoded as UTF-8 when written to ClickHouse
|
||||||
|
| FixedStringField | FixedString| str | Encoded as UTF-8 when written to ClickHouse
|
||||||
|
| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31
|
||||||
|
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Timezone aware
|
||||||
|
| DateTime64Field | DateTime64 | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Timezone aware
|
||||||
|
| Int8Field | Int8 | int | Range -128 to 127
|
||||||
|
| Int16Field | Int16 | int | Range -32768 to 32767
|
||||||
|
| Int32Field | Int32 | int | Range -2147483648 to 2147483647
|
||||||
|
| Int64Field | Int64 | int | Range -9223372036854775808 to 9223372036854775807
|
||||||
|
| UInt8Field | UInt8 | int | Range 0 to 255
|
||||||
|
| UInt16Field | UInt16 | int | Range 0 to 65535
|
||||||
|
| UInt32Field | UInt32 | int | Range 0 to 4294967295
|
||||||
|
| UInt64Field | UInt64 | int | Range 0 to 18446744073709551615
|
||||||
|
| Float32Field | Float32 | float |
|
||||||
|
| Float64Field | Float64 | float |
|
||||||
|
| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field
|
||||||
|
| Decimal32Field | Decimal32 | Decimal | Ditto
|
||||||
|
| Decimal64Field | Decimal64 | Decimal | Ditto
|
||||||
|
| Decimal128Field | Decimal128 | Decimal | Ditto
|
||||||
|
| UUIDField | UUID | uuid.UUID |
|
||||||
|
| IPv4Field | IPv4 | ipaddress.IPv4Address |
|
||||||
|
| IPv6Field | IPv6 | ipaddress.IPv6Address |
|
||||||
|
| Enum8Field | Enum8 | Enum | See below
|
||||||
|
| Enum16Field | Enum16 | Enum | See below
|
||||||
|
| ArrayField | Array | list | See below
|
||||||
|
| NullableField | Nullable | See below | See below
|
||||||
|
|
||||||
| Class | DB Type | Pythonic Type | Comments
|
|
||||||
| ------------------ | ---------- | ------------------- | -----------------------------------------------------
|
|
||||||
| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
|
|
||||||
| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
|
|
||||||
| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31
|
|
||||||
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC
|
|
||||||
| Int8Field | Int8 | int | Range -128 to 127
|
|
||||||
| Int16Field | Int16 | int | Range -32768 to 32767
|
|
||||||
| Int32Field | Int32 | int | Range -2147483648 to 2147483647
|
|
||||||
| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807
|
|
||||||
| UInt8Field | UInt8 | int | Range 0 to 255
|
|
||||||
| UInt16Field | UInt16 | int | Range 0 to 65535
|
|
||||||
| UInt32Field | UInt32 | int | Range 0 to 4294967295
|
|
||||||
| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615
|
|
||||||
| Float32Field | Float32 | float |
|
|
||||||
| Float64Field | Float64 | float |
|
|
||||||
| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field
|
|
||||||
| Decimal32Field | Decimal32 | Decimal | Ditto
|
|
||||||
| Decimal64Field | Decimal64 | Decimal | Ditto
|
|
||||||
| Decimal128Field | Decimal128 | Decimal | Ditto
|
|
||||||
| UUIDField | UUID | Decimal |
|
|
||||||
| Enum8Field | Enum8 | Enum | See below
|
|
||||||
| Enum16Field | Enum16 | Enum | See below
|
|
||||||
| ArrayField | Array | list | See below
|
|
||||||
| NullableField | Nullable | See below | See below
|
|
||||||
|
|
||||||
DateTimeField and Time Zones
|
DateTimeField and Time Zones
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
||||||
A `DateTimeField` can be assigned values from one of the following types:
|
`DateTimeField` and `DateTime64Field` can accept a `timezone` parameter (either the timezone name or a `pytz` timezone instance). This timezone will be used as the column timezone in ClickHouse. If not provided, the fields will use the timezone defined in the database configuration.
|
||||||
|
|
||||||
|
A `DateTimeField` and `DateTime64Field` can be assigned values from one of the following types:
|
||||||
|
|
||||||
- datetime
|
- datetime
|
||||||
- date
|
- date
|
||||||
- integer - number of seconds since the Unix epoch
|
- integer - number of seconds since the Unix epoch
|
||||||
|
- float (DateTime64Field only) - number of seconds and microseconds since the Unix epoch
|
||||||
- string in `YYYY-MM-DD HH:MM:SS` format or [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)-compatible format
|
- string in `YYYY-MM-DD HH:MM:SS` format or [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601)-compatible format
|
||||||
|
|
||||||
The assigned value always gets converted to a timezone-aware `datetime` in UTC. If the assigned value is a timezone-aware `datetime` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
|
The assigned value always gets converted to a timezone-aware `datetime` in UTC. The only exception is when the assigned value is a timezone-aware `datetime`, in which case it will not be changed.
|
||||||
|
|
||||||
|
DateTime values that are read from the database are kept in the database-defined timezone - either the one defined for the field, or the global timezone defined in the database configuration.
|
||||||
|
|
||||||
|
It is strongly recommended to set the server timezone to UTC and to store all datetime values in that timezone, in order to prevent confusion and subtle bugs. Conversion to a different timezone should only be performed when the value needs to be displayed.
|
||||||
|
|
||||||
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse
|
|
||||||
version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion.
|
|
||||||
|
|
||||||
Working with enum fields
|
Working with enum fields
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
`Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member.
|
`Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member.
|
||||||
|
|
||||||
Python 3.4 and higher supports Enums natively. When using previous Python versions you need to install the enum34 library.
|
|
||||||
|
|
||||||
Example of a model with an enum field:
|
Example of a model with an enum field:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
Gender = Enum('Gender', 'male female unspecified')
|
Gender = Enum('Gender', 'male female unspecified')
|
||||||
|
|
||||||
class Person(models.Model):
|
class Person(Model):
|
||||||
|
|
||||||
first_name = fields.StringField()
|
first_name = StringField()
|
||||||
last_name = fields.StringField()
|
last_name = StringField()
|
||||||
birthday = fields.DateField()
|
birthday = DateField()
|
||||||
gender = fields.Enum32Field(Gender)
|
gender = Enum32Field(Gender)
|
||||||
|
|
||||||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||||
|
|
||||||
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
|
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
|
||||||
```
|
```
|
||||||
|
@ -76,65 +83,34 @@ Working with array fields
|
||||||
You can create array fields containing any data type, for example:
|
You can create array fields containing any data type, for example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class SensorData(models.Model):
|
class SensorData(Model):
|
||||||
|
|
||||||
date = fields.DateField()
|
date = DateField()
|
||||||
temperatures = fields.ArrayField(fields.Float32Field())
|
temperatures = ArrayField(Float32Field())
|
||||||
humidity_levels = fields.ArrayField(fields.UInt8Field())
|
humidity_levels = ArrayField(UInt8Field())
|
||||||
|
|
||||||
engine = engines.MergeTree('date', ('date',))
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that multidimensional arrays are not supported yet by the ORM.
|
Note that multidimensional arrays are not supported yet by the ORM.
|
||||||
|
|
||||||
Working with materialized and alias fields
|
|
||||||
------------------------------------------
|
|
||||||
|
|
||||||
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
|
|
||||||
|
|
||||||
See documentation [here](https://clickhouse.yandex/docs/en/query_language/queries/#default-values).
|
|
||||||
|
|
||||||
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Event(models.Model):
|
|
||||||
|
|
||||||
created = fields.DateTimeField()
|
|
||||||
created_date = fields.DateTimeField(materialized='toDate(created)')
|
|
||||||
name = fields.StringField()
|
|
||||||
username = fields.StringField(alias='name')
|
|
||||||
|
|
||||||
engine = engines.MergeTree('created_date', ('created_date', 'created'))
|
|
||||||
|
|
||||||
obj = Event(created=datetime.now(), name='MyEvent')
|
|
||||||
db = Database('my_test_db')
|
|
||||||
db.insert([obj])
|
|
||||||
# All values will be retrieved from database
|
|
||||||
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
|
|
||||||
# created_date and username will contain a default value
|
|
||||||
db.select('SELECT * FROM $db.event', model_class=Event)
|
|
||||||
```
|
|
||||||
|
|
||||||
Working with nullable fields
|
Working with nullable fields
|
||||||
----------------------------
|
----------------------------
|
||||||
From [some time](https://github.com/yandex/ClickHouse/pull/70) ClickHouse provides a NULL value support.
|
[ClickHouse provides a NULL value support](https://clickhouse.tech/docs/en/sql-reference/data-types/nullable/).
|
||||||
Also see some information [here](https://github.com/yandex/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00395_nullable.sql).
|
|
||||||
|
|
||||||
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
|
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class EventData(models.Model):
|
class EventData(Model):
|
||||||
|
|
||||||
date = fields.DateField()
|
date = DateField()
|
||||||
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
|
comment = NullableField(StringField(), extra_null_values={''})
|
||||||
score = fields.NullableField(fields.UInt8Field())
|
score = NullableField(UInt8Field())
|
||||||
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
|
serie = NullableField(ArrayField(UInt8Field()))
|
||||||
|
|
||||||
engine = engines.MergeTree('date', ('date',))
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
|
||||||
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
|
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
|
||||||
|
@ -148,6 +124,36 @@ to `None`.
|
||||||
|
|
||||||
NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable.
|
NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable.
|
||||||
|
|
||||||
|
NOTE: Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases.
|
||||||
|
|
||||||
|
Working with LowCardinality fields
|
||||||
|
----------------------------------
|
||||||
|
Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries
|
||||||
|
and compaction of columns for low entropy data.
|
||||||
|
|
||||||
|
[More specifically](https://github.com/tech/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
|
||||||
|
If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently.
|
||||||
|
|
||||||
|
LowCardinality works independently of (generic) fields compression.
|
||||||
|
LowCardinality fields are subsequently compressed as usual.
|
||||||
|
The compression ratios of LowCardinality fields for text data may be significantly better than without LowCardinality.
|
||||||
|
|
||||||
|
LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form.
|
||||||
|
|
||||||
|
You can find further information [here](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/).
|
||||||
|
|
||||||
|
Usage example:
|
||||||
|
```python
|
||||||
|
class LowCardinalityModel(Model):
|
||||||
|
date = DateField()
|
||||||
|
string = LowCardinalityField(StringField())
|
||||||
|
nullable = LowCardinalityField(NullableField(StringField()))
|
||||||
|
array = ArrayField(LowCardinalityField(DateField()))
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example.
|
||||||
|
|
||||||
Creating custom field types
|
Creating custom field types
|
||||||
---------------------------
|
---------------------------
|
||||||
Sometimes it is convenient to use data types that are supported in Python, but have no corresponding column type in ClickHouse. In these cases it is possible to define a custom field class that knows how to convert the Pythonic object to a suitable representation in the database, and vice versa.
|
Sometimes it is convenient to use data types that are supported in Python, but have no corresponding column type in ClickHouse. In these cases it is possible to define a custom field class that knows how to convert the Pythonic object to a suitable representation in the database, and vice versa.
|
||||||
|
@ -160,7 +166,7 @@ For example, we can create a BooleanField which will hold `True` and `False` val
|
||||||
Here's the full implementation:
|
Here's the full implementation:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from infi.clickhouse_orm.fields import Field
|
from infi.clickhouse_orm import Field
|
||||||
|
|
||||||
class BooleanField(Field):
|
class BooleanField(Field):
|
||||||
|
|
||||||
|
@ -186,4 +192,4 @@ class BooleanField(Field):
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)
|
[<< Field Options](field_options.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)
|
||||||
|
|
89
docs/importing_orm_classes.md
Normal file
89
docs/importing_orm_classes.md
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
|
||||||
|
Importing ORM Classes
|
||||||
|
=====================
|
||||||
|
|
||||||
|
The ORM supports different styles of importing and referring to its classes, so choose what works for you from the options below.
|
||||||
|
|
||||||
|
Importing Everything
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
It is safe to use `import *` from `infi.clickhouse_orm` or its submodules. Only classes that are needed by users of the ORM will get imported, and nothing else:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
```
|
||||||
|
This is exactly equivalent to the following import statements:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm.database import *
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.funcs import *
|
||||||
|
from infi.clickhouse_orm.migrations import *
|
||||||
|
from infi.clickhouse_orm.models import *
|
||||||
|
from infi.clickhouse_orm.query import *
|
||||||
|
from infi.clickhouse_orm.system_models import *
|
||||||
|
```
|
||||||
|
By importing everything, all of the ORM's public classes can be used directly. For example:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
|
||||||
|
class Event(Model):
|
||||||
|
|
||||||
|
name = StringField(default="EVENT")
|
||||||
|
repeated = UInt32Field(default=1)
|
||||||
|
created = DateTimeField(default=F.now())
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
```
|
||||||
|
|
||||||
|
Importing Everything into a Namespace
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
To prevent potential name clashes and to make the code more readable, you can import the ORM's classes into a namespace of your choosing, e.g. `orm`. For brevity, it is recommended to import the `F` class explicitly:
|
||||||
|
```python
|
||||||
|
import infi.clickhouse_orm as orm
|
||||||
|
from infi.clickhouse_orm import F
|
||||||
|
|
||||||
|
class Event(orm.Model):
|
||||||
|
|
||||||
|
name = orm.StringField(default="EVENT")
|
||||||
|
repeated = orm.UInt32Field(default=1)
|
||||||
|
created = orm.DateTimeField(default=F.now())
|
||||||
|
|
||||||
|
engine = orm.Memory()
|
||||||
|
```
|
||||||
|
|
||||||
|
Importing Specific Submodules
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
It is possible to import only the submodules you need, and use their names to qualify the ORM's class names. This option is more verbose, but makes it clear where each class comes from. For example:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import models, fields, engines, F
|
||||||
|
|
||||||
|
class Event(models.Model):
|
||||||
|
|
||||||
|
name = fields.StringField(default="EVENT")
|
||||||
|
repeated = fields.UInt32Field(default=1)
|
||||||
|
created = fields.DateTimeField(default=F.now())
|
||||||
|
|
||||||
|
engine = engines.Memory()
|
||||||
|
```
|
||||||
|
|
||||||
|
Importing Specific Classes
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
If you prefer, you can import only the specific ORM classes that you need directly from `infi.clickhouse_orm`:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import Model, StringField, UInt32Field, DateTimeField, F, Memory
|
||||||
|
|
||||||
|
class Event(Model):
|
||||||
|
|
||||||
|
name = StringField(default="EVENT")
|
||||||
|
repeated = UInt32Field(default=1)
|
||||||
|
created = DateTimeField(default=F.now())
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[<< Expressions](expressions.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md)
|
|
@ -1,9 +1,9 @@
|
||||||
Overview
|
Overview
|
||||||
========
|
========
|
||||||
|
|
||||||
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it.
|
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.tech/). It allows you to define model classes whose instances can be written to the database and read from it.
|
||||||
|
|
||||||
It was tested on Python 2.7 and 3.5.
|
Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+.
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
------------
|
------------
|
||||||
|
|
|
@ -9,17 +9,18 @@ Defining Models
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
|
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
|
||||||
|
|
||||||
from infi.clickhouse_orm import models, fields, engines
|
class Person(Model):
|
||||||
|
|
||||||
class Person(models.Model):
|
first_name = StringField()
|
||||||
|
last_name = StringField()
|
||||||
|
birthday = DateField()
|
||||||
|
height = Float32Field()
|
||||||
|
|
||||||
first_name = fields.StringField()
|
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||||
last_name = fields.StringField()
|
```
|
||||||
birthday = fields.DateField()
|
|
||||||
height = fields.Float32Field()
|
|
||||||
|
|
||||||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
|
||||||
|
|
||||||
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
|
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
|
||||||
|
|
||||||
|
@ -29,47 +30,83 @@ A model must have an `engine`, which determines how its table is stored on disk
|
||||||
|
|
||||||
Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter:
|
Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter:
|
||||||
|
|
||||||
first_name = fields.StringField(default="anonymous")
|
first_name = StringField(default="anonymous")
|
||||||
|
|
||||||
|
For additional details see [here](field_options.md).
|
||||||
|
|
||||||
### Null values
|
### Null values
|
||||||
|
|
||||||
To allow null values in a field, wrap it inside a `NullableField`:
|
To allow null values in a field, wrap it inside a `NullableField`:
|
||||||
|
|
||||||
birthday = fields.NullableField(fields.DateField())
|
birthday = NullableField(DateField())
|
||||||
|
|
||||||
In this case, the default value for that field becomes `null` unless otherwise specified.
|
In this case, the default value for that field becomes `null` unless otherwise specified.
|
||||||
|
|
||||||
|
For more information about `NullableField` see [Field Types](field_types.md).
|
||||||
|
|
||||||
### Materialized fields
|
### Materialized fields
|
||||||
|
|
||||||
The value of a materialized field is calculated from other fields in the model. For example:
|
The value of a materialized field is calculated from other fields in the model. For example:
|
||||||
|
|
||||||
year_born = fields.Int16Field(materialized="toYear(birthday)")
|
year_born = Int16Field(materialized=F.toYear(birthday))
|
||||||
|
|
||||||
Materialized fields are read-only, meaning that their values are not sent to the database when inserting records.
|
Materialized fields are read-only, meaning that their values are not sent to the database when inserting records.
|
||||||
|
|
||||||
It is not possible to specify a default value for a materialized field.
|
For additional details see [here](field_options.md).
|
||||||
|
|
||||||
### Alias fields
|
### Alias fields
|
||||||
|
|
||||||
An alias field is a field whose value is calculated by ClickHouse on the fly, as a function of other fields. It is not physically stored by the database. For example:
|
An alias field is a field whose value is calculated by ClickHouse on the fly, as a function of other fields. It is not physically stored by the database. For example:
|
||||||
|
|
||||||
weekday_born = field.UInt8Field(alias="toDayOfWeek(birthday)")
|
weekday_born = field.UInt8Field(alias=F.toDayOfWeek(birthday))
|
||||||
|
|
||||||
Alias fields are read-only, meaning that their values are not sent to the database when inserting records.
|
Alias fields are read-only, meaning that their values are not sent to the database when inserting records.
|
||||||
|
|
||||||
It is not possible to specify a default value for an alias field.
|
For additional details see [here](field_options.md).
|
||||||
|
|
||||||
### Table Names
|
### Table Names
|
||||||
|
|
||||||
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
|
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
|
||||||
|
```python
|
||||||
|
class Person(Model):
|
||||||
|
|
||||||
class Person(models.Model):
|
...
|
||||||
|
|
||||||
...
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'people'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model Constraints
|
||||||
|
|
||||||
|
It is possible to define constraints which ClickHouse verifies when data is inserted. Trying to insert invalid records will raise a `ServerError`. Each constraint has a name and an expression to validate. For example:
|
||||||
|
```python
|
||||||
|
class Person(Model):
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
# Ensure that the birthday is not a future date
|
||||||
|
birthday_is_in_the_past = Constraint(birthday <= F.today())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Skipping Indexes
|
||||||
|
|
||||||
|
Models that use an engine from the `MergeTree` family can define additional indexes over one or more columns or expressions. These indexes are used in SELECT queries for reducing the amount of data to read from the disk by skipping big blocks of data that do not satisfy the query's conditions.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
```python
|
||||||
|
class Person(Model):
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
# A minmax index that can help find people taller or shorter than some height
|
||||||
|
height_index = Index(height, type=Index.minmax(), granularity=2)
|
||||||
|
|
||||||
|
# A trigram index that can help find substrings inside people names
|
||||||
|
names_index = Index((F.lower(first_name), F.lower(last_name)),
|
||||||
|
type=Index.ngrambf_v1(3, 256, 2, 0), granularity=1)
|
||||||
|
```
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def table_name(cls):
|
|
||||||
return 'people'
|
|
||||||
|
|
||||||
Using Models
|
Using Models
|
||||||
------------
|
------------
|
||||||
|
@ -96,7 +133,7 @@ Inserting to the Database
|
||||||
|
|
||||||
To write your instances to ClickHouse, you need a `Database` instance:
|
To write your instances to ClickHouse, you need a `Database` instance:
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm import Database
|
||||||
|
|
||||||
db = Database('my_test_db')
|
db = Database('my_test_db')
|
||||||
|
|
||||||
|
@ -121,19 +158,19 @@ Reading from the Database
|
||||||
Loading model instances from the database is simple:
|
Loading model instances from the database is simple:
|
||||||
|
|
||||||
for person in db.select("SELECT * FROM my_test_db.person", model_class=Person):
|
for person in db.select("SELECT * FROM my_test_db.person", model_class=Person):
|
||||||
print person.first_name, person.last_name
|
print(person.first_name, person.last_name)
|
||||||
|
|
||||||
Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`.
|
Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`.
|
||||||
|
|
||||||
It is possible to select only a subset of the columns, and the rest will receive their default values:
|
It is possible to select only a subset of the columns, and the rest will receive their default values:
|
||||||
|
|
||||||
for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person):
|
for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person):
|
||||||
print person.first_name
|
print(person.first_name)
|
||||||
|
|
||||||
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
|
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
|
||||||
|
|
||||||
for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'):
|
for person in Person.objects_in(db).filter(Person.last_name == 'Smith').only('first_name'):
|
||||||
print person.first_name
|
print(person.first_name)
|
||||||
|
|
||||||
See [Querysets](querysets.md) for more information.
|
See [Querysets](querysets.md) for more information.
|
||||||
|
|
||||||
|
@ -144,10 +181,16 @@ Reading without a Model
|
||||||
When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query:
|
When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query:
|
||||||
|
|
||||||
for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"):
|
for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"):
|
||||||
print row.max_height
|
print(row.max_height)
|
||||||
|
|
||||||
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
|
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
|
||||||
|
|
||||||
|
It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particularly useful for querying system tables, for example:
|
||||||
|
|
||||||
|
QueryLog = db.get_model_for_table('query_log', system_table=True)
|
||||||
|
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
|
||||||
|
print(row.query)
|
||||||
|
|
||||||
SQL Placeholders
|
SQL Placeholders
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
|
@ -180,9 +223,9 @@ It is possible to paginate through model instances:
|
||||||
|
|
||||||
>>> order_by = 'first_name, last_name'
|
>>> order_by = 'first_name, last_name'
|
||||||
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
|
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
|
||||||
>>> print page.number_of_objects
|
>>> print(page.number_of_objects)
|
||||||
2507
|
2507
|
||||||
>>> print page.pages_total
|
>>> print(page.pages_total)
|
||||||
251
|
251
|
||||||
>>> for person in page.objects:
|
>>> for person in page.objects:
|
||||||
>>> # do something
|
>>> # do something
|
||||||
|
@ -204,4 +247,4 @@ Note that `order_by` must be chosen so that the ordering is unique, otherwise th
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[<< Overview](index.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md)
|
[<< Overview](index.md) | [Table of Contents](toc.md) | [Expressions >>](expressions.md)
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
Querysets
|
Querysets
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
@ -8,7 +9,7 @@ A queryset is an object that represents a database query using a specific Model.
|
||||||
This queryset matches all Person instances in the database. You can get these instances using iteration:
|
This queryset matches all Person instances in the database. You can get these instances using iteration:
|
||||||
|
|
||||||
for person in qs:
|
for person in qs:
|
||||||
print person.first_name, person.last_name
|
print(person.first_name, person.last_name)
|
||||||
|
|
||||||
Filtering
|
Filtering
|
||||||
---------
|
---------
|
||||||
|
@ -16,31 +17,76 @@ Filtering
|
||||||
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
|
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database)
|
>>> qs = Person.objects_in(database)
|
||||||
>>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01')
|
>>> qs = qs.filter(F.like(Person.first_name, 'V%')).exclude(Person.birthday < '2000-01-01')
|
||||||
>>> qs.conditions_as_sql()
|
>>> qs.conditions_as_sql()
|
||||||
u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
|
"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
|
||||||
|
|
||||||
It is possible to specify several fields to filter or exclude by:
|
It is possible to specify several expressions to filter or exclude by, and they will be ANDed together:
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75)
|
>>> qs = Person.objects_in(database).filter(Person.last_name == 'Smith', Person.height > 1.75)
|
||||||
>>> qs.conditions_as_sql()
|
>>> qs.conditions_as_sql()
|
||||||
u"last_name = 'Smith' AND height > 1.75"
|
"last_name = 'Smith' AND height > 1.75"
|
||||||
|
|
||||||
For filters with compound conditions you can use `Q` objects inside `filter` with overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
|
For compound conditions you can use the overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database).filter((Q(first_name='Ciaran', last_name='Carver') | Q(height_lte=1.8)) & ~Q(first_name='David'))
|
|
||||||
>>> qs.conditions_as_sql()
|
|
||||||
u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
|
|
||||||
|
|
||||||
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
|
|
||||||
For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter
|
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database)
|
>>> qs = Person.objects_in(database)
|
||||||
>>> qs = qs.filter(first_name__startswith='V', prewhere=True)
|
>>> qs = qs.filter(((Person.first_name == 'Ciaran') & (Person.last_name == 'Carver')) | (Person.height <= 1.8) & ~(Person.first_name = 'David'))
|
||||||
>>> qs.conditions_as_sql(prewhere=True)
|
>>> qs.conditions_as_sql()
|
||||||
u"first_name LIKE 'V%'"
|
"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
|
||||||
|
|
||||||
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
|
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
|
||||||
|
|
||||||
|
### Using `IN` and `NOT IN`
|
||||||
|
|
||||||
|
Filtering queries using ClickHouse's `IN` and `NOT IN` operators requires using the `isIn` and `isNotIn` functions (trying to use Python's `in` keyword will not work!).
|
||||||
|
For example:
|
||||||
|
```python
|
||||||
|
# Is it Monday, Tuesday or Wednesday?
|
||||||
|
F.isIn(F.toDayOfWeek(F.now()), [1, 2, 3])
|
||||||
|
# This will not work:
|
||||||
|
F.toDayOfWeek(F.now()) in [1, 2, 3]
|
||||||
|
```
|
||||||
|
|
||||||
|
In case of model fields, there is a simplified syntax:
|
||||||
|
```python
|
||||||
|
# Filtering using F.isIn:
|
||||||
|
qs.filter(F.isIn(Person.first_name, ['Robert', 'Rob', 'Robbie']))
|
||||||
|
# Simpler syntax using isIn directly on the field:
|
||||||
|
qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie']))
|
||||||
|
```
|
||||||
|
|
||||||
|
The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names:
|
||||||
|
```python
|
||||||
|
# Last name is in a list of values
|
||||||
|
qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"]))
|
||||||
|
# Last name is in a subquery
|
||||||
|
subquery = IrishLastName.objects_in(database).only("name")
|
||||||
|
qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery))
|
||||||
|
```
|
||||||
|
|
||||||
|
### Specifying PREWHERE conditions
|
||||||
|
|
||||||
|
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
|
||||||
|
For better aggregation performance you can add them to `PREWHERE` section by adding a `prewhere=True` parameter:
|
||||||
|
|
||||||
|
>>> qs = Person.objects_in(database)
|
||||||
|
>>> qs = qs.filter(F.like(Person.first_name, 'V%'), prewhere=True)
|
||||||
|
>>> qs.conditions_as_sql(prewhere=True)
|
||||||
|
"first_name LIKE 'V%'"
|
||||||
|
|
||||||
|
### Old-style filter conditions
|
||||||
|
|
||||||
|
Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still available, so you can use it alongside or even intermixed with new-style functions and expressions.
|
||||||
|
|
||||||
|
The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example:
|
||||||
|
```python
|
||||||
|
qs = Position.objects.in(database)
|
||||||
|
# New style
|
||||||
|
qs = qs.filter(Position.x > 100, Position.y < 20, Position.terrain == 'water')
|
||||||
|
# Old style
|
||||||
|
qs = qs.filter(x__gt=100, y__lt=20, terrain='water')
|
||||||
|
```
|
||||||
|
Below are all the supported operators.
|
||||||
|
|
||||||
| Operator | Equivalent SQL | Comments |
|
| Operator | Equivalent SQL | Comments |
|
||||||
| -------- | -------------------------------------------- | ---------------------------------- |
|
| -------- | -------------------------------------------- | ---------------------------------- |
|
||||||
|
@ -51,8 +97,8 @@ There are different operators that can be used, by passing `<fieldname>__<operat
|
||||||
| `lt` | `field < value` | |
|
| `lt` | `field < value` | |
|
||||||
| `lte` | `field <= value` | |
|
| `lte` | `field <= value` | |
|
||||||
| `between` | `field BETWEEN value1 AND value2` | |
|
| `between` | `field BETWEEN value1 AND value2` | |
|
||||||
| `in` | `field IN (values)` | See below |
|
| `in` | `field IN (values)` | |
|
||||||
| `not_in` | `field NOT IN (values)` | See below |
|
| `not_in` | `field NOT IN (values)` | |
|
||||||
| `contains` | `field LIKE '%value%'` | For string fields only |
|
| `contains` | `field LIKE '%value%'` | For string fields only |
|
||||||
| `startswith` | `field LIKE 'value%'` | For string fields only |
|
| `startswith` | `field LIKE 'value%'` | For string fields only |
|
||||||
| `endswith` | `field LIKE '%value'` | For string fields only |
|
| `endswith` | `field LIKE '%value'` | For string fields only |
|
||||||
|
@ -61,27 +107,6 @@ There are different operators that can be used, by passing `<fieldname>__<operat
|
||||||
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
|
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
|
||||||
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
|
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
|
||||||
|
|
||||||
### Using the `in` Operator
|
|
||||||
|
|
||||||
The `in` and `not_in` operators expect one of three types of values:
|
|
||||||
|
|
||||||
* A list or tuple of simple values
|
|
||||||
* A string, which is used verbatim as the contents of the parentheses
|
|
||||||
* Another queryset (subquery)
|
|
||||||
|
|
||||||
For example if we want to select only people with Irish last names:
|
|
||||||
|
|
||||||
# A list of simple values
|
|
||||||
qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"])
|
|
||||||
|
|
||||||
# A string
|
|
||||||
subquery = "SELECT name from $db.irishlastname"
|
|
||||||
qs = Person.objects_in(database).filter(last_name__in=subquery)
|
|
||||||
|
|
||||||
# A queryset
|
|
||||||
subquery = IrishLastName.objects_in(database).only("name")
|
|
||||||
qs = Person.objects_in(database).filter(last_name__in=subquery)
|
|
||||||
|
|
||||||
Counting and Checking Existence
|
Counting and Checking Existence
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
@ -126,10 +151,10 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu
|
||||||
94
|
94
|
||||||
|
|
||||||
Final
|
Final
|
||||||
--------
|
-----
|
||||||
|
|
||||||
This method can be used only with CollapsingMergeTree engine.
|
This method can be used only with `CollapsingMergeTree` engine.
|
||||||
Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field.
|
Adds a FINAL modifier to the query, meaning that the selected data is fully "collapsed" by the engine's sign field.
|
||||||
|
|
||||||
>>> Person.objects_in(database).count()
|
>>> Person.objects_in(database).count()
|
||||||
100
|
100
|
||||||
|
@ -144,8 +169,7 @@ It is possible to get a specific item from the queryset by index:
|
||||||
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
||||||
first = qs[0]
|
first = qs[0]
|
||||||
|
|
||||||
It is also possible to get a range a instances using a slice. This returns a queryset,
|
It is also possible to get a range a instances using a slice. This returns a queryset, that you can either iterate over or convert to a list.
|
||||||
that you can either iterate over or convert to a list.
|
|
||||||
|
|
||||||
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
||||||
first_ten_people = list(qs[:10])
|
first_ten_people = list(qs[:10])
|
||||||
|
@ -153,7 +177,7 @@ that you can either iterate over or convert to a list.
|
||||||
|
|
||||||
You should use `order_by` to ensure a consistent ordering of the results.
|
You should use `order_by` to ensure a consistent ordering of the results.
|
||||||
|
|
||||||
Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`.
|
Trying to use negative indexes or a slice with a step (e.g. [0 : 100 : 2]) is not supported and will raise an `AssertionError`.
|
||||||
|
|
||||||
Pagination
|
Pagination
|
||||||
----------
|
----------
|
||||||
|
@ -162,9 +186,9 @@ Similar to `Database.paginate`, you can go over the queryset results one page at
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
>>> qs = Person.objects_in(database).order_by('last_name', 'first_name')
|
||||||
>>> page = qs.paginate(page_num=1, page_size=10)
|
>>> page = qs.paginate(page_num=1, page_size=10)
|
||||||
>>> print page.number_of_objects
|
>>> print(page.number_of_objects)
|
||||||
2507
|
2507
|
||||||
>>> print page.pages_total
|
>>> print(page.pages_total)
|
||||||
251
|
251
|
||||||
>>> for person in page.objects:
|
>>> for person in page.objects:
|
||||||
>>> # do something
|
>>> # do something
|
||||||
|
@ -179,43 +203,66 @@ The `paginate` method returns a `namedtuple` containing the following fields:
|
||||||
|
|
||||||
Note that you should use `QuerySet.order_by` so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages).
|
Note that you should use `QuerySet.order_by` so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages).
|
||||||
|
|
||||||
|
Mutations
|
||||||
|
---------
|
||||||
|
|
||||||
|
To delete all records that match a queryset's conditions use the `delete` method:
|
||||||
|
|
||||||
|
Person.objects_in(database).filter(first_name='Max').delete()
|
||||||
|
|
||||||
|
To update records that match a queryset's conditions call the `update` method and provide the field names to update and the expressions to use (as keyword arguments):
|
||||||
|
|
||||||
|
Person.objects_in(database).filter(first_name='Max').update(first_name='Maximilian')
|
||||||
|
|
||||||
|
Note a few caveats:
|
||||||
|
|
||||||
|
- ClickHouse cannot update columns that are used in the calculation of the primary or the partition key.
|
||||||
|
- Mutations happen in the background, so they are not immediate.
|
||||||
|
- Only tables in the `MergeTree` family support mutations.
|
||||||
|
|
||||||
Aggregation
|
Aggregation
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
|
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
|
||||||
|
|
||||||
>>> qs = Person.objects_in(database).aggregate(average_height='avg(height)')
|
>>> qs = Person.objects_in(database).aggregate(average_height=F.avg(Person.height))
|
||||||
>>> print qs.count()
|
>>> print(qs.count())
|
||||||
1
|
1
|
||||||
>>> for row in qs: print row.average_height
|
>>> for row in qs: print(row.average_height)
|
||||||
1.71
|
1.71
|
||||||
|
|
||||||
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
|
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
|
||||||
|
|
||||||
You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
|
You can pass fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
|
||||||
|
|
||||||
qs = Person.objects_in(database).aggregate('last_name', num='count()')
|
qs = Person.objects_in(database).aggregate(Person.last_name, num=F.count())
|
||||||
|
|
||||||
The underlying SQL query would be something like this:
|
The underlying SQL query would be something like this:
|
||||||
|
|
||||||
SELECT last_name, count() AS num FROM person GROUP BY last_name
|
SELECT last_name, count() AS num
|
||||||
|
FROM person
|
||||||
|
GROUP BY last_name
|
||||||
|
|
||||||
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
|
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
|
||||||
|
|
||||||
qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday')
|
qs = Person.objects_in(database).aggregate(weekday=F.toDayOfWeek(Person.birthday), num=F.count()).group_by('weekday')
|
||||||
|
|
||||||
This queryset is translated to:
|
This queryset is translated to:
|
||||||
|
|
||||||
SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday
|
SELECT toDayOfWeek(birthday) AS weekday, count() AS num
|
||||||
|
FROM person
|
||||||
|
GROUP BY weekday
|
||||||
|
|
||||||
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
|
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the aggregated queryset on calculated fields, only on fields that exist in the model.
|
||||||
|
|
||||||
|
### Adding totals
|
||||||
|
|
||||||
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
|
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
|
||||||
To achieve this, you can use `with_totals` method. It will return extra row (last) with
|
To achieve this, you can use `with_totals` method. It will return extra row (last) with
|
||||||
values aggregated for all rows suitable for filters.
|
values aggregated for all rows suitable for filters.
|
||||||
|
|
||||||
qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3]
|
qs = Person.objects_in(database).aggregate(Person.first_name, num=F.count()).with_totals().order_by('-count')[:3]
|
||||||
>>> print qs.count()
|
>>> print(qs.count())
|
||||||
4
|
4
|
||||||
>>> for row in qs:
|
>>> for row in qs:
|
||||||
>>> print("'{}': {}".format(row.first_name, row.count))
|
>>> print("'{}': {}".format(row.first_name, row.count))
|
||||||
|
@ -225,4 +272,4 @@ values aggregated for all rows suitable for filters.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)
|
[<< Importing ORM Classes](importing_orm_classes.md) | [Table of Contents](toc.md) | [Field Options >>](field_options.md)
|
||||||
|
|
|
@ -448,7 +448,7 @@ Extends Engine
|
||||||
Extends Engine
|
Extends Engine
|
||||||
|
|
||||||
Here we define Buffer engine
|
Here we define Buffer engine
|
||||||
Read more here https://clickhouse.yandex/reference_en.html#Buffer
|
Read more here https://clickhouse.tech/reference_en.html#Buffer
|
||||||
|
|
||||||
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
|
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
|
||||||
|
|
||||||
|
|
|
@ -33,19 +33,19 @@ Each migration file is expected to contain a list of `operations`, for example:
|
||||||
The following operations are supported:
|
The following operations are supported:
|
||||||
|
|
||||||
|
|
||||||
**CreateTable**
|
### CreateTable
|
||||||
|
|
||||||
A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing.
|
A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing.
|
||||||
|
|
||||||
In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table.
|
In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table.
|
||||||
|
|
||||||
|
|
||||||
**DropTable**
|
### DropTable
|
||||||
|
|
||||||
A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing.
|
A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing.
|
||||||
|
|
||||||
|
|
||||||
**AlterTable**
|
### AlterTable
|
||||||
|
|
||||||
A migration operation that compares the table of a given model class to the model’s fields, and alters the table to match the model. The operation can:
|
A migration operation that compares the table of a given model class to the model’s fields, and alters the table to match the model. The operation can:
|
||||||
|
|
||||||
|
@ -56,14 +56,19 @@ A migration operation that compares the table of a given model class to the mode
|
||||||
Default values are not altered by this operation.
|
Default values are not altered by this operation.
|
||||||
|
|
||||||
|
|
||||||
**AlterTableWithBuffer**
|
### AlterTableWithBuffer
|
||||||
|
|
||||||
A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified.
|
A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified.
|
||||||
|
|
||||||
Applying this migration operation to a regular table has the same effect as an `AlterTable` operation.
|
Applying this migration operation to a regular table has the same effect as an `AlterTable` operation.
|
||||||
|
|
||||||
|
|
||||||
**RunPython**
|
### AlterConstraints
|
||||||
|
|
||||||
|
A migration operation that adds new constraints from the model to the database table, and drops obsolete ones. Constraints are identified by their names, so a change in an existing constraint will not be detected unless its name was changed too. ClickHouse does not check that the constraints hold for existing data in the table.
|
||||||
|
|
||||||
|
|
||||||
|
### RunPython
|
||||||
|
|
||||||
A migration operation that runs a Python function. The function receives the `Database` instance to operate on.
|
A migration operation that runs a Python function. The function receives the `Database` instance to operate on.
|
||||||
|
|
||||||
|
@ -77,9 +82,9 @@ A migration operation that runs a Python function. The function receives the `Da
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
**RunSQL**
|
### RunSQL
|
||||||
|
|
||||||
A migration operation that runs raw SQL queries. It expects a string containing an SQL query, or an array of SQL-query strings.
|
A migration operation that runs raw SQL statements. It expects a string containing an SQL statements, or a list of statements.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
System Models
|
System Models
|
||||||
=============
|
=============
|
||||||
|
|
||||||
[Clickhouse docs](https://clickhouse.yandex/docs/en/system_tables/).
|
[Clickhouse docs](https://clickhouse.tech/docs/en/operations/system-tables/).
|
||||||
|
|
||||||
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
|
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ Currently the following system models are supported:
|
||||||
Partitions and Parts
|
Partitions and Parts
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
[ClickHouse docs](https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts).
|
[ClickHouse docs](https://clickhouse.tech/docs/en/sql-reference/statements/alter/#alter_manipulations-with-partitions).
|
||||||
|
|
||||||
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
|
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
|
||||||
|
|
||||||
|
@ -30,8 +30,7 @@ A partition in a table is data for a single calendar month. Table "system.parts"
|
||||||
|
|
||||||
Usage example:
|
Usage example:
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm import Database, SystemPart
|
||||||
from infi.clickhouse_orm.system_models import SystemPart
|
|
||||||
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
|
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
|
||||||
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
|
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
|
||||||
if len(partitions) > 0:
|
if len(partitions) > 0:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
Table Engines
|
Table Engines
|
||||||
=============
|
=============
|
||||||
|
|
||||||
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/table_engines/)
|
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/engines/table-engines/)
|
||||||
|
|
||||||
Each model must have an engine instance, used when creating the table in ClickHouse.
|
Each model must have an engine instance, used when creating the table in ClickHouse.
|
||||||
|
|
||||||
|
@ -24,11 +24,11 @@ Simple Engines
|
||||||
|
|
||||||
`TinyLog`, `Log` and `Memory` engines do not require any parameters:
|
`TinyLog`, `Log` and `Memory` engines do not require any parameters:
|
||||||
|
|
||||||
engine = engines.TinyLog()
|
engine = TinyLog()
|
||||||
|
|
||||||
engine = engines.Log()
|
engine = Log()
|
||||||
|
|
||||||
engine = engines.Memory()
|
engine = Memory()
|
||||||
|
|
||||||
|
|
||||||
Engines in the MergeTree Family
|
Engines in the MergeTree Family
|
||||||
|
@ -36,28 +36,28 @@ Engines in the MergeTree Family
|
||||||
|
|
||||||
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
|
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
|
||||||
|
|
||||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'))
|
engine = MergeTree('EventDate', ('CounterID', 'EventDate'))
|
||||||
|
|
||||||
You may also provide a sampling expression:
|
You may also provide a sampling expression:
|
||||||
|
|
||||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)')
|
engine = MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr=F.intHash32(UserID))
|
||||||
|
|
||||||
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
|
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
|
||||||
|
|
||||||
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
|
engine = CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
|
||||||
|
|
||||||
For a `SummingMergeTree` you can optionally specify the summing columns:
|
For a `SummingMergeTree` you can optionally specify the summing columns:
|
||||||
|
|
||||||
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
|
engine = SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
|
||||||
summing_cols=('Shows', 'Clicks', 'Cost'))
|
summing_cols=('Shows', 'Clicks', 'Cost'))
|
||||||
|
|
||||||
For a `ReplacingMergeTree` you can optionally specify the version column:
|
For a `ReplacingMergeTree` you can optionally specify the version column:
|
||||||
|
|
||||||
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
|
engine = ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
|
||||||
|
|
||||||
### Custom partitioning
|
### Custom partitioning
|
||||||
|
|
||||||
ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310
|
ClickHouse supports [custom partitioning](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/) expressions since version 1.1.54310
|
||||||
|
|
||||||
You can use custom partitioning with any `MergeTree` family engine.
|
You can use custom partitioning with any `MergeTree` family engine.
|
||||||
To set custom partitioning:
|
To set custom partitioning:
|
||||||
|
@ -69,35 +69,48 @@ Standard monthly partitioning by date column can be specified using the `toYYYYM
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||||
partition_key=('toYYYYMM(EventDate)', 'BannerID'))
|
partition_key=(F.toYYYYMM(EventDate), 'BannerID'))
|
||||||
|
|
||||||
|
|
||||||
|
### Primary key
|
||||||
|
ClickHouse supports [custom primary key](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#primary-keys-and-indexes-in-queries) expressions since version 1.1.54310
|
||||||
|
|
||||||
|
You can use custom primary key with any `MergeTree` family engine.
|
||||||
|
To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built.
|
||||||
|
|
||||||
|
By default primary key is equal to order_by expression
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||||
|
partition_key=(F.toYYYYMM(EventDate), 'BannerID'), primary_key=('OrderID',))
|
||||||
|
|
||||||
### Data Replication
|
### Data Replication
|
||||||
|
|
||||||
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
|
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
|
||||||
|
|
||||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'),
|
engine = MergeTree('EventDate', ('CounterID', 'EventDate'),
|
||||||
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
|
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
|
||||||
replica_name='{replica}')
|
replica_name='{replica}')
|
||||||
|
|
||||||
|
|
||||||
Buffer Engine
|
Buffer Engine
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
A `Buffer` engine is only used in conjunction with a `BufferModel`.
|
A `Buffer` engine is only used in conjunction with a `BufferModel`.
|
||||||
The model should be a subclass of both `models.BufferModel` and the main model.
|
The model should be a subclass of both `BufferModel` and the main model.
|
||||||
The main model is also passed to the engine:
|
The main model is also passed to the engine:
|
||||||
|
|
||||||
class PersonBuffer(models.BufferModel, Person):
|
class PersonBuffer(BufferModel, Person):
|
||||||
|
|
||||||
engine = engines.Buffer(Person)
|
engine = Buffer(Person)
|
||||||
|
|
||||||
Additional buffer parameters can optionally be specified:
|
Additional buffer parameters can optionally be specified:
|
||||||
|
|
||||||
engine = engines.Buffer(Person, num_layers=16, min_time=10,
|
engine = Buffer(Person, num_layers=16, min_time=10,
|
||||||
max_time=100, min_rows=10000, max_rows=1000000,
|
max_time=100, min_rows=10000, max_rows=1000000,
|
||||||
min_bytes=10000000, max_bytes=100000000)
|
min_bytes=10000000, max_bytes=100000000)
|
||||||
|
|
||||||
Then you can insert objects into Buffer model and they will be handled by ClickHouse properly:
|
Then you can insert objects into Buffer model and they will be handled by ClickHouse properly:
|
||||||
|
|
||||||
|
@ -110,14 +123,14 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
|
||||||
Merge Engine
|
Merge Engine
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
[ClickHouse docs](https://clickhouse.yandex/docs/en/table_engines/merge/)
|
[ClickHouse docs](https://clickhouse.tech/docs/en/operations/table_engines/merge/)
|
||||||
|
|
||||||
A `Merge` engine is only used in conjunction with a `MergeModel`.
|
A `Merge` engine is only used in conjunction with a `MergeModel`.
|
||||||
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
|
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
|
||||||
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
|
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
|
||||||
|
|
||||||
class MergeTable(models.MergeModel):
|
class MergeTable(MergeModel):
|
||||||
engine = engines.Merge('^table_prefix')
|
engine = Merge('^table_prefix')
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
44
docs/toc.md
44
docs/toc.md
|
@ -10,6 +10,8 @@
|
||||||
* [Materialized fields](models_and_databases.md#materialized-fields)
|
* [Materialized fields](models_and_databases.md#materialized-fields)
|
||||||
* [Alias fields](models_and_databases.md#alias-fields)
|
* [Alias fields](models_and_databases.md#alias-fields)
|
||||||
* [Table Names](models_and_databases.md#table-names)
|
* [Table Names](models_and_databases.md#table-names)
|
||||||
|
* [Model Constraints](models_and_databases.md#model-constraints)
|
||||||
|
* [Data Skipping Indexes](models_and_databases.md#data-skipping-indexes)
|
||||||
* [Using Models](models_and_databases.md#using-models)
|
* [Using Models](models_and_databases.md#using-models)
|
||||||
* [Inserting to the Database](models_and_databases.md#inserting-to-the-database)
|
* [Inserting to the Database](models_and_databases.md#inserting-to-the-database)
|
||||||
* [Reading from the Database](models_and_databases.md#reading-from-the-database)
|
* [Reading from the Database](models_and_databases.md#reading-from-the-database)
|
||||||
|
@ -20,7 +22,9 @@
|
||||||
|
|
||||||
* [Querysets](querysets.md#querysets)
|
* [Querysets](querysets.md#querysets)
|
||||||
* [Filtering](querysets.md#filtering)
|
* [Filtering](querysets.md#filtering)
|
||||||
* [Using the in Operator](querysets.md#using-the-in-operator)
|
* [Using IN and NOT IN](querysets.md#using-in-and-not-in)
|
||||||
|
* [Specifying PREWHERE conditions](querysets.md#specifying-prewhere-conditions)
|
||||||
|
* [Old-style filter conditions](querysets.md#old-style-filter-conditions)
|
||||||
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
||||||
* [Ordering](querysets.md#ordering)
|
* [Ordering](querysets.md#ordering)
|
||||||
* [Omitting Fields](querysets.md#omitting-fields)
|
* [Omitting Fields](querysets.md#omitting-fields)
|
||||||
|
@ -28,26 +32,42 @@
|
||||||
* [Final](querysets.md#final)
|
* [Final](querysets.md#final)
|
||||||
* [Slicing](querysets.md#slicing)
|
* [Slicing](querysets.md#slicing)
|
||||||
* [Pagination](querysets.md#pagination)
|
* [Pagination](querysets.md#pagination)
|
||||||
|
* [Mutations](querysets.md#mutations)
|
||||||
* [Aggregation](querysets.md#aggregation)
|
* [Aggregation](querysets.md#aggregation)
|
||||||
|
* [Adding totals](querysets.md#adding-totals)
|
||||||
|
|
||||||
|
* [Field Options](field_options.md#field-options)
|
||||||
|
* [default](field_options.md#default)
|
||||||
|
* [alias / materialized](field_options.md#alias-/-materialized)
|
||||||
|
* [codec](field_options.md#codec)
|
||||||
|
* [readonly](field_options.md#readonly)
|
||||||
|
|
||||||
* [Field Types](field_types.md#field-types)
|
* [Field Types](field_types.md#field-types)
|
||||||
* [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones)
|
* [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones)
|
||||||
* [Working with enum fields](field_types.md#working-with-enum-fields)
|
* [Working with enum fields](field_types.md#working-with-enum-fields)
|
||||||
* [Working with array fields](field_types.md#working-with-array-fields)
|
* [Working with array fields](field_types.md#working-with-array-fields)
|
||||||
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
|
|
||||||
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
|
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
|
||||||
|
* [Working with LowCardinality fields](field_types.md#working-with-lowcardinality-fields)
|
||||||
* [Creating custom field types](field_types.md#creating-custom-field-types)
|
* [Creating custom field types](field_types.md#creating-custom-field-types)
|
||||||
|
|
||||||
* [Table Engines](table_engines.md#table-engines)
|
* [Table Engines](table_engines.md#table-engines)
|
||||||
* [Simple Engines](table_engines.md#simple-engines)
|
* [Simple Engines](table_engines.md#simple-engines)
|
||||||
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
|
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
|
||||||
* [Custom partitioning](table_engines.md#custom-partitioning)
|
* [Custom partitioning](table_engines.md#custom-partitioning)
|
||||||
|
* [Primary key](table_engines.md#primary-key)
|
||||||
* [Data Replication](table_engines.md#data-replication)
|
* [Data Replication](table_engines.md#data-replication)
|
||||||
* [Buffer Engine](table_engines.md#buffer-engine)
|
* [Buffer Engine](table_engines.md#buffer-engine)
|
||||||
* [Merge Engine](table_engines.md#merge-engine)
|
* [Merge Engine](table_engines.md#merge-engine)
|
||||||
|
|
||||||
* [Schema Migrations](schema_migrations.md#schema-migrations)
|
* [Schema Migrations](schema_migrations.md#schema-migrations)
|
||||||
* [Writing Migrations](schema_migrations.md#writing-migrations)
|
* [Writing Migrations](schema_migrations.md#writing-migrations)
|
||||||
|
* [CreateTable](schema_migrations.md#createtable)
|
||||||
|
* [DropTable](schema_migrations.md#droptable)
|
||||||
|
* [AlterTable](schema_migrations.md#altertable)
|
||||||
|
* [AlterTableWithBuffer](schema_migrations.md#altertablewithbuffer)
|
||||||
|
* [AlterConstraints](schema_migrations.md#alterconstraints)
|
||||||
|
* [RunPython](schema_migrations.md#runpython)
|
||||||
|
* [RunSQL](schema_migrations.md#runsql)
|
||||||
* [Running Migrations](schema_migrations.md#running-migrations)
|
* [Running Migrations](schema_migrations.md#running-migrations)
|
||||||
|
|
||||||
* [System Models](system_models.md#system-models)
|
* [System Models](system_models.md#system-models)
|
||||||
|
@ -58,19 +78,23 @@
|
||||||
* [Tests](contributing.md#tests)
|
* [Tests](contributing.md#tests)
|
||||||
|
|
||||||
* [Class Reference](class_reference.md#class-reference)
|
* [Class Reference](class_reference.md#class-reference)
|
||||||
* [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database)
|
* [infi.clickhouse_orm.database](class_reference.md#inficlickhouse_ormdatabase)
|
||||||
* [Database](class_reference.md#database)
|
* [Database](class_reference.md#database)
|
||||||
* [DatabaseException](class_reference.md#databaseexception)
|
* [DatabaseException](class_reference.md#databaseexception)
|
||||||
* [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models)
|
* [infi.clickhouse_orm.models](class_reference.md#inficlickhouse_ormmodels)
|
||||||
* [Model](class_reference.md#model)
|
* [Model](class_reference.md#model)
|
||||||
* [BufferModel](class_reference.md#buffermodel)
|
* [BufferModel](class_reference.md#buffermodel)
|
||||||
|
* [MergeModel](class_reference.md#mergemodel)
|
||||||
* [DistributedModel](class_reference.md#distributedmodel)
|
* [DistributedModel](class_reference.md#distributedmodel)
|
||||||
* [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields)
|
* [Constraint](class_reference.md#constraint)
|
||||||
|
* [Index](class_reference.md#index)
|
||||||
|
* [infi.clickhouse_orm.fields](class_reference.md#inficlickhouse_ormfields)
|
||||||
* [ArrayField](class_reference.md#arrayfield)
|
* [ArrayField](class_reference.md#arrayfield)
|
||||||
* [BaseEnumField](class_reference.md#baseenumfield)
|
* [BaseEnumField](class_reference.md#baseenumfield)
|
||||||
* [BaseFloatField](class_reference.md#basefloatfield)
|
* [BaseFloatField](class_reference.md#basefloatfield)
|
||||||
* [BaseIntField](class_reference.md#baseintfield)
|
* [BaseIntField](class_reference.md#baseintfield)
|
||||||
* [DateField](class_reference.md#datefield)
|
* [DateField](class_reference.md#datefield)
|
||||||
|
* [DateTime64Field](class_reference.md#datetime64field)
|
||||||
* [DateTimeField](class_reference.md#datetimefield)
|
* [DateTimeField](class_reference.md#datetimefield)
|
||||||
* [Decimal128Field](class_reference.md#decimal128field)
|
* [Decimal128Field](class_reference.md#decimal128field)
|
||||||
* [Decimal32Field](class_reference.md#decimal32field)
|
* [Decimal32Field](class_reference.md#decimal32field)
|
||||||
|
@ -82,10 +106,13 @@
|
||||||
* [FixedStringField](class_reference.md#fixedstringfield)
|
* [FixedStringField](class_reference.md#fixedstringfield)
|
||||||
* [Float32Field](class_reference.md#float32field)
|
* [Float32Field](class_reference.md#float32field)
|
||||||
* [Float64Field](class_reference.md#float64field)
|
* [Float64Field](class_reference.md#float64field)
|
||||||
|
* [IPv4Field](class_reference.md#ipv4field)
|
||||||
|
* [IPv6Field](class_reference.md#ipv6field)
|
||||||
* [Int16Field](class_reference.md#int16field)
|
* [Int16Field](class_reference.md#int16field)
|
||||||
* [Int32Field](class_reference.md#int32field)
|
* [Int32Field](class_reference.md#int32field)
|
||||||
* [Int64Field](class_reference.md#int64field)
|
* [Int64Field](class_reference.md#int64field)
|
||||||
* [Int8Field](class_reference.md#int8field)
|
* [Int8Field](class_reference.md#int8field)
|
||||||
|
* [LowCardinalityField](class_reference.md#lowcardinalityfield)
|
||||||
* [NullableField](class_reference.md#nullablefield)
|
* [NullableField](class_reference.md#nullablefield)
|
||||||
* [StringField](class_reference.md#stringfield)
|
* [StringField](class_reference.md#stringfield)
|
||||||
* [UInt16Field](class_reference.md#uint16field)
|
* [UInt16Field](class_reference.md#uint16field)
|
||||||
|
@ -93,7 +120,7 @@
|
||||||
* [UInt64Field](class_reference.md#uint64field)
|
* [UInt64Field](class_reference.md#uint64field)
|
||||||
* [UInt8Field](class_reference.md#uint8field)
|
* [UInt8Field](class_reference.md#uint8field)
|
||||||
* [UUIDField](class_reference.md#uuidfield)
|
* [UUIDField](class_reference.md#uuidfield)
|
||||||
* [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines)
|
* [infi.clickhouse_orm.engines](class_reference.md#inficlickhouse_ormengines)
|
||||||
* [Engine](class_reference.md#engine)
|
* [Engine](class_reference.md#engine)
|
||||||
* [TinyLog](class_reference.md#tinylog)
|
* [TinyLog](class_reference.md#tinylog)
|
||||||
* [Log](class_reference.md#log)
|
* [Log](class_reference.md#log)
|
||||||
|
@ -105,7 +132,10 @@
|
||||||
* [CollapsingMergeTree](class_reference.md#collapsingmergetree)
|
* [CollapsingMergeTree](class_reference.md#collapsingmergetree)
|
||||||
* [SummingMergeTree](class_reference.md#summingmergetree)
|
* [SummingMergeTree](class_reference.md#summingmergetree)
|
||||||
* [ReplacingMergeTree](class_reference.md#replacingmergetree)
|
* [ReplacingMergeTree](class_reference.md#replacingmergetree)
|
||||||
* [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query)
|
* [infi.clickhouse_orm.query](class_reference.md#inficlickhouse_ormquery)
|
||||||
* [QuerySet](class_reference.md#queryset)
|
* [QuerySet](class_reference.md#queryset)
|
||||||
* [AggregateQuerySet](class_reference.md#aggregatequeryset)
|
* [AggregateQuerySet](class_reference.md#aggregatequeryset)
|
||||||
|
* [Q](class_reference.md#q)
|
||||||
|
* [infi.clickhouse_orm.funcs](class_reference.md#inficlickhouse_ormfuncs)
|
||||||
|
* [F](class_reference.md#f)
|
||||||
|
|
||||||
|
|
58
docs/whats_new_in_version_2.md
Normal file
58
docs/whats_new_in_version_2.md
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
What's New in Version 2
|
||||||
|
=======================
|
||||||
|
|
||||||
|
## Python 3.5+ Only
|
||||||
|
|
||||||
|
This version of the ORM no longer support Python 2.
|
||||||
|
|
||||||
|
## New flexible syntax for database expressions and functions
|
||||||
|
|
||||||
|
Expressions that use model fields, database functions and Python operators are now first-class citizens of the ORM. They provide infinite expressivity and flexibility when defining models and generating queries.
|
||||||
|
|
||||||
|
Example of expressions in model definition:
|
||||||
|
```python
|
||||||
|
class Temperature(Model):
|
||||||
|
|
||||||
|
station_id = UInt16Field()
|
||||||
|
timestamp = DateTimeField(default=F.now()) # function as default value
|
||||||
|
degrees_celsius = Float32Field()
|
||||||
|
degrees_fahrenheit = Float32Field(alias=degrees_celsius * 1.8 + 32) # expression as field alias
|
||||||
|
|
||||||
|
# expressions in engine definition
|
||||||
|
engine = MergeTree(partition_key=[F.toYYYYMM(timestamp)], order_by=[station_id, timestamp])
|
||||||
|
```
|
||||||
|
|
||||||
|
Example of expressions in queries:
|
||||||
|
```python
|
||||||
|
db = Database('default')
|
||||||
|
start = F.toStartOfMonth(F.now())
|
||||||
|
expr = (Temperature.timestamp > start) & (Temperature.station_id == 123) & (Temperature.degrees_celsius > 30)
|
||||||
|
for t in Temperature.objects_in(db).filter(expr):
|
||||||
|
print(t.timestamp, t.degrees_celsius)
|
||||||
|
```
|
||||||
|
|
||||||
|
See [Expressions](expressions.md).
|
||||||
|
|
||||||
|
## Support for IPv4 and IPv6 fields
|
||||||
|
|
||||||
|
Two new fields classes were added: `IPv4Field` and `IPv6Field`. Their values are represented by Python's `ipaddress.IPv4Address` and `ipaddress.IPv6Address`.
|
||||||
|
|
||||||
|
See [Field Types](field_types.md).
|
||||||
|
|
||||||
|
## Automatic generation of models by inspecting existing tables
|
||||||
|
|
||||||
|
It is now easy to generate a model class on the fly for an existing table in the database using `Database.get_model_for_table`. This is particularly useful for querying system tables, for example:
|
||||||
|
```python
|
||||||
|
QueryLog = db.get_model_for_table('query_log', system_table=True)
|
||||||
|
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
|
||||||
|
print(row.query)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Convenient ways to import ORM classes
|
||||||
|
|
||||||
|
You can now import all ORM classes directly from `infi.clickhouse_orm`, without worrying about sub-modules. For example:
|
||||||
|
```python
|
||||||
|
from infi.clickhouse_orm import Database, Model, StringField, DateTimeField, MergeTree
|
||||||
|
```
|
||||||
|
See [Importing ORM Classes](importing_orm_classes.md).
|
||||||
|
|
1
examples/cpu_usage/.gitignore
vendored
Normal file
1
examples/cpu_usage/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/env/
|
22
examples/cpu_usage/README.md
Normal file
22
examples/cpu_usage/README.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# CPU Usage
|
||||||
|
|
||||||
|
This basic example uses `psutil` to collect a simple time-series of per-CPU usage percent. It then prints out some aggregate statistics based on the collected data.
|
||||||
|
|
||||||
|
## Running the code
|
||||||
|
|
||||||
|
Create a virtualenv and install the required libraries:
|
||||||
|
```
|
||||||
|
virtualenv -p python3.6 env
|
||||||
|
source env/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the `collect` script to populate the database with the CPU statistics. Let it run for a bit before pressing CTRL+C.
|
||||||
|
```
|
||||||
|
python collect.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the `results` script to display the CPU statistics:
|
||||||
|
```
|
||||||
|
python results.py
|
||||||
|
```
|
20
examples/cpu_usage/collect.py
Normal file
20
examples/cpu_usage/collect.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
import psutil, time, datetime
|
||||||
|
from infi.clickhouse_orm import Database
|
||||||
|
from models import CPUStats
|
||||||
|
|
||||||
|
|
||||||
|
db = Database('demo')
|
||||||
|
db.create_table(CPUStats)
|
||||||
|
|
||||||
|
|
||||||
|
psutil.cpu_percent(percpu=True) # first sample should be discarded
|
||||||
|
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
stats = psutil.cpu_percent(percpu=True)
|
||||||
|
timestamp = datetime.datetime.now()
|
||||||
|
print(timestamp)
|
||||||
|
db.insert([
|
||||||
|
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
|
||||||
|
for cpu_id, cpu_percent in enumerate(stats)
|
||||||
|
])
|
11
examples/cpu_usage/models.py
Normal file
11
examples/cpu_usage/models.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from infi.clickhouse_orm import Model, DateTimeField, UInt16Field, Float32Field, Memory
|
||||||
|
|
||||||
|
|
||||||
|
class CPUStats(Model):
|
||||||
|
|
||||||
|
timestamp = DateTimeField()
|
||||||
|
cpu_id = UInt16Field()
|
||||||
|
cpu_percent = Float32Field()
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
|
2
examples/cpu_usage/requirements.txt
Normal file
2
examples/cpu_usage/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
infi.clickhouse_orm
|
||||||
|
psutil
|
13
examples/cpu_usage/results.py
Normal file
13
examples/cpu_usage/results.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
from infi.clickhouse_orm import Database, F
|
||||||
|
from models import CPUStats
|
||||||
|
|
||||||
|
|
||||||
|
db = Database('demo')
|
||||||
|
queryset = CPUStats.objects_in(db)
|
||||||
|
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
||||||
|
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
||||||
|
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
||||||
|
|
||||||
|
# Calculate the average usage per CPU
|
||||||
|
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
||||||
|
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
1
examples/db_explorer/.gitignore
vendored
Normal file
1
examples/db_explorer/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/env/
|
36
examples/db_explorer/README.md
Normal file
36
examples/db_explorer/README.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# DB Explorer
|
||||||
|
|
||||||
|
This is a simple Flask web application that connects to ClickHouse and displays the list of existing databases. Clicking on a database name drills down into it, showing its list of tables. Clicking on a table drills down further, showing details about the table and its columns.
|
||||||
|
|
||||||
|
For each table or column, the application displays the compressed size on disk, the uncompressed size, and the ratio between them. Additionally, several pie charts are shown - top tables by size, top tables by rows, and top columns by size (in a table).
|
||||||
|
|
||||||
|
The pie charts are generated using the `pygal` charting library.
|
||||||
|
|
||||||
|
ORM concepts that are demonstrated by this example:
|
||||||
|
|
||||||
|
- Creating ORM models from existing tables using `Database.get_model_for_table`
|
||||||
|
- Queryset filtering
|
||||||
|
- Queryset aggregation
|
||||||
|
|
||||||
|
## Running the code
|
||||||
|
|
||||||
|
Create a virtualenv and install the required libraries:
|
||||||
|
```
|
||||||
|
virtualenv -p python3.6 env
|
||||||
|
source env/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the server and open http://127.0.0.1:5000/ in your browser:
|
||||||
|
```
|
||||||
|
python server.py
|
||||||
|
```
|
||||||
|
|
||||||
|
By default the server connects to ClickHouse running on http://localhost:8123/ without a username or password, but you can change this using command line arguments:
|
||||||
|
```
|
||||||
|
python server.py http://myclickhouse:8123/
|
||||||
|
```
|
||||||
|
or:
|
||||||
|
```
|
||||||
|
python server.py http://myclickhouse:8123/ admin secret123
|
||||||
|
```
|
62
examples/db_explorer/charts.py
Normal file
62
examples/db_explorer/charts.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
import pygal
|
||||||
|
from pygal.style import RotateStyle
|
||||||
|
from jinja2.filters import do_filesizeformat
|
||||||
|
|
||||||
|
|
||||||
|
# Formatting functions
|
||||||
|
number_formatter = lambda v: '{:,}'.format(v)
|
||||||
|
bytes_formatter = lambda v: do_filesizeformat(v, True)
|
||||||
|
|
||||||
|
|
||||||
|
def tables_piechart(db, by_field, value_formatter):
|
||||||
|
'''
|
||||||
|
Generate a pie chart of the top n tables in the database.
|
||||||
|
`db` - the database instance
|
||||||
|
`by_field` - the field name to sort by
|
||||||
|
`value_formatter` - a function to use for formatting the numeric values
|
||||||
|
'''
|
||||||
|
Tables = db.get_model_for_table('tables', system_table=True)
|
||||||
|
qs = Tables.objects_in(db).filter(database=db.db_name, is_temporary=False).exclude(engine='Buffer')
|
||||||
|
tuples = [(getattr(table, by_field), table.name) for table in qs]
|
||||||
|
return _generate_piechart(tuples, value_formatter)
|
||||||
|
|
||||||
|
|
||||||
|
def columns_piechart(db, tbl_name, by_field, value_formatter):
|
||||||
|
'''
|
||||||
|
Generate a pie chart of the top n columns in the table.
|
||||||
|
`db` - the database instance
|
||||||
|
`tbl_name` - the table name
|
||||||
|
`by_field` - the field name to sort by
|
||||||
|
`value_formatter` - a function to use for formatting the numeric values
|
||||||
|
'''
|
||||||
|
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||||
|
qs = ColumnsTable.objects_in(db).filter(database=db.db_name, table=tbl_name)
|
||||||
|
tuples = [(getattr(col, by_field), col.name) for col in qs]
|
||||||
|
return _generate_piechart(tuples, value_formatter)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_top_tuples(tuples, n=15):
|
||||||
|
'''
|
||||||
|
Given a list of tuples (value, name), this function sorts
|
||||||
|
the list and returns only the top n results. All other tuples
|
||||||
|
are aggregated to a single "others" tuple.
|
||||||
|
'''
|
||||||
|
non_zero_tuples = [t for t in tuples if t[0]]
|
||||||
|
sorted_tuples = sorted(non_zero_tuples, reverse=True)
|
||||||
|
if len(sorted_tuples) > n:
|
||||||
|
others = (sum(t[0] for t in sorted_tuples[n:]), 'others')
|
||||||
|
sorted_tuples = sorted_tuples[:n] + [others]
|
||||||
|
return sorted_tuples
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_piechart(tuples, value_formatter):
|
||||||
|
'''
|
||||||
|
Generates a pie chart.
|
||||||
|
`tuples` - a list of (value, name) tuples to include in the chart
|
||||||
|
`value_formatter` - a function to use for formatting the values
|
||||||
|
'''
|
||||||
|
style = RotateStyle('#9e6ffe', background='white', legend_font_family='Roboto', legend_font_size=18, tooltip_font_family='Roboto', tooltip_font_size=24)
|
||||||
|
chart = pygal.Pie(style=style, margin=0, title=' ', value_formatter=value_formatter, truncate_legend=-1)
|
||||||
|
for t in _get_top_tuples(tuples):
|
||||||
|
chart.add(t[1], t[0])
|
||||||
|
return chart.render(is_unicode=True, disable_xml_declaration=True)
|
15
examples/db_explorer/requirements.txt
Normal file
15
examples/db_explorer/requirements.txt
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
certifi==2020.4.5.2
|
||||||
|
chardet==3.0.4
|
||||||
|
click==7.1.2
|
||||||
|
Flask==1.1.2
|
||||||
|
idna==2.9
|
||||||
|
infi.clickhouse-orm==2.0.1
|
||||||
|
iso8601==0.1.12
|
||||||
|
itsdangerous==1.1.0
|
||||||
|
Jinja2==2.11.3
|
||||||
|
MarkupSafe==1.1.1
|
||||||
|
pygal==2.4.0
|
||||||
|
pytz==2020.1
|
||||||
|
requests==2.23.0
|
||||||
|
urllib3==1.26.5
|
||||||
|
Werkzeug==1.0.1
|
87
examples/db_explorer/server.py
Normal file
87
examples/db_explorer/server.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
from infi.clickhouse_orm import Database, F
|
||||||
|
from charts import tables_piechart, columns_piechart, number_formatter, bytes_formatter
|
||||||
|
from flask import Flask
|
||||||
|
from flask import render_template
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def homepage_view():
|
||||||
|
'''
|
||||||
|
Root view that lists all databases.
|
||||||
|
'''
|
||||||
|
db = _get_db('system')
|
||||||
|
# Get all databases in the system.databases table
|
||||||
|
DatabasesTable = db.get_model_for_table('databases', system_table=True)
|
||||||
|
databases = DatabasesTable.objects_in(db).exclude(name='system')
|
||||||
|
databases = databases.order_by(F.lower(DatabasesTable.name))
|
||||||
|
# Generate the page
|
||||||
|
return render_template('homepage.html', db=db, databases=databases)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/<db_name>/')
|
||||||
|
def database_view(db_name):
|
||||||
|
'''
|
||||||
|
A view that displays information about a single database.
|
||||||
|
'''
|
||||||
|
db = _get_db(db_name)
|
||||||
|
# Get all the tables in the database, by aggregating information from system.columns
|
||||||
|
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||||
|
tables = ColumnsTable.objects_in(db).filter(database=db_name).aggregate(
|
||||||
|
ColumnsTable.table,
|
||||||
|
compressed_size=F.sum(ColumnsTable.data_compressed_bytes),
|
||||||
|
uncompressed_size=F.sum(ColumnsTable.data_uncompressed_bytes),
|
||||||
|
ratio=F.sum(ColumnsTable.data_uncompressed_bytes) / F.sum(ColumnsTable.data_compressed_bytes)
|
||||||
|
)
|
||||||
|
tables = tables.order_by(F.lower(ColumnsTable.table))
|
||||||
|
# Generate the page
|
||||||
|
return render_template('database.html',
|
||||||
|
db=db,
|
||||||
|
tables=tables,
|
||||||
|
tables_piechart_by_rows=tables_piechart(db, 'total_rows', value_formatter=number_formatter),
|
||||||
|
tables_piechart_by_size=tables_piechart(db, 'total_bytes', value_formatter=bytes_formatter),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/<db_name>/<tbl_name>/')
|
||||||
|
def table_view(db_name, tbl_name):
|
||||||
|
'''
|
||||||
|
A view that displays information about a single table.
|
||||||
|
'''
|
||||||
|
db = _get_db(db_name)
|
||||||
|
# Get table information from system.tables
|
||||||
|
TablesTable = db.get_model_for_table('tables', system_table=True)
|
||||||
|
tbl_info = TablesTable.objects_in(db).filter(database=db_name, name=tbl_name)[0]
|
||||||
|
# Get the SQL used for creating the table
|
||||||
|
create_table_sql = db.raw('SHOW CREATE TABLE %s FORMAT TabSeparatedRaw' % tbl_name)
|
||||||
|
# Get all columns in the table from system.columns
|
||||||
|
ColumnsTable = db.get_model_for_table('columns', system_table=True)
|
||||||
|
columns = ColumnsTable.objects_in(db).filter(database=db_name, table=tbl_name)
|
||||||
|
# Generate the page
|
||||||
|
return render_template('table.html',
|
||||||
|
db=db,
|
||||||
|
tbl_name=tbl_name,
|
||||||
|
tbl_info=tbl_info,
|
||||||
|
create_table_sql=create_table_sql,
|
||||||
|
columns=columns,
|
||||||
|
piechart=columns_piechart(db, tbl_name, 'data_compressed_bytes', value_formatter=bytes_formatter),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_db(db_name):
|
||||||
|
'''
|
||||||
|
Returns a Database instance using connection information
|
||||||
|
from the command line arguments (optional).
|
||||||
|
'''
|
||||||
|
db_url = sys.argv[1] if len(sys.argv) > 1 else 'http://localhost:8123/'
|
||||||
|
username = sys.argv[2] if len(sys.argv) > 2 else None
|
||||||
|
password = sys.argv[3] if len(sys.argv) > 3 else None
|
||||||
|
return Database(db_name, db_url, username, password, readonly=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
_get_db('system') # fail early on db connection problems
|
||||||
|
app.run(debug=True)
|
22
examples/db_explorer/templates/base.html
Normal file
22
examples/db_explorer/templates/base.html
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>ClickHouse Explorer</title>
|
||||||
|
<link rel="icon" href="data:,">
|
||||||
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300italic,700,700italic">
|
||||||
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.css">
|
||||||
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/milligram/1.4.0/milligram.css">
|
||||||
|
<script type="text/javascript" src="http://kozea.github.com/pygal.js/latest/pygal-tooltips.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="container">
|
||||||
|
|
||||||
|
{% block contents %}
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
54
examples/db_explorer/templates/database.html
Normal file
54
examples/db_explorer/templates/database.html
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block contents %}
|
||||||
|
|
||||||
|
<h1>{{ db.db_name }}</h1>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<a href="..">Home</a>
|
||||||
|
»
|
||||||
|
{{ db.db_name }}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
<div class="column">
|
||||||
|
<h2>Top Tables by Size</h2>
|
||||||
|
{% autoescape false %}
|
||||||
|
{{ tables_piechart_by_size }}
|
||||||
|
{% endautoescape %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="column">
|
||||||
|
<h2>Top Tables by Rows</h2>
|
||||||
|
{% autoescape false %}
|
||||||
|
{{ tables_piechart_by_rows }}
|
||||||
|
{% endautoescape %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h2>Tables ({{ tables.count() }})</h2>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Uncompressed Size</th>
|
||||||
|
<th>Compressed Size</th>
|
||||||
|
<th>Compression Ratio</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for table in tables %}
|
||||||
|
<tr>
|
||||||
|
<td><a href="{{ table.table|urlencode }}/">{{ table.table }}</a></th>
|
||||||
|
<td>{{ table.uncompressed_size|filesizeformat(true) }}</td>
|
||||||
|
<td>{{ table.compressed_size|filesizeformat(true) }}</td>
|
||||||
|
<td>{% if table.uncompressed_size %} {{ "%.2f" % table.ratio }} {% else %} 1 {% endif %} : 1</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
{% endblock %}
|
41
examples/db_explorer/templates/homepage.html
Normal file
41
examples/db_explorer/templates/homepage.html
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block contents %}
|
||||||
|
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
<div class="column-50">
|
||||||
|
|
||||||
|
<h1>ClickHouse Explorer</h1>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>URL</th>
|
||||||
|
<td>{{ db.db_url }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Version</th>
|
||||||
|
<td>{{ db.server_version|join('.') }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Timezone</th>
|
||||||
|
<td>{{ db.server_timezone }}</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h2>Databases ({{ databases.count() }})</h2>
|
||||||
|
<ul>
|
||||||
|
{% for d in databases %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ d.name|urlencode }}/">{{ d.name }}</a>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
{% endblock %}
|
79
examples/db_explorer/templates/table.html
Normal file
79
examples/db_explorer/templates/table.html
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block contents %}
|
||||||
|
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<a href="../..">Home</a>
|
||||||
|
»
|
||||||
|
<a href="..">{{ db.db_name }}</a>
|
||||||
|
»
|
||||||
|
{{ tbl_name }}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h1>{{ tbl_name }}</h1>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
<div class="column">
|
||||||
|
<h2>Details</h2>
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Total rows</th>
|
||||||
|
<td>{{ "{:,}".format(tbl_info.total_rows) }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Total size</th>
|
||||||
|
<td>{{ tbl_info.total_bytes|filesizeformat(true) }}</td>
|
||||||
|
</tr>
|
||||||
|
{% if tbl_info.total_rows %}
|
||||||
|
<tr>
|
||||||
|
<th>Average row size</th>
|
||||||
|
<td>{{ (tbl_info.total_bytes / tbl_info.total_rows)|filesizeformat(true) }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
<tr>
|
||||||
|
<th>Engine</th>
|
||||||
|
<td>{{ tbl_info.engine }}</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="column">
|
||||||
|
<h2>Top Columns by Size</h2>
|
||||||
|
{% autoescape false %}
|
||||||
|
{{ piechart }}
|
||||||
|
{% endautoescape %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h2>Columns ({{ columns.count() }})</h2>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Uncompressed Size</th>
|
||||||
|
<th>Compressed Size</th>
|
||||||
|
<th>Compression Ratio</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for col in columns %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ col.name }}</td>
|
||||||
|
<td>{{ col.type }}</td>
|
||||||
|
<td>{{ col.data_uncompressed_bytes|filesizeformat(true) }}</td>
|
||||||
|
<td>{{ col.data_compressed_bytes|filesizeformat(true) }}</td>
|
||||||
|
<td>{% if col.data_compressed_bytes %} {{ "%.2f" % (col.data_uncompressed_bytes / col.data_compressed_bytes) }} {% else %} 1 {% endif %} : 1</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h2>Table Definition</h2>
|
||||||
|
<pre><code>{{ create_table_sql }}</code></pre>
|
||||||
|
|
||||||
|
{% endblock %}
|
2
examples/full_text_search/.gitignore
vendored
Normal file
2
examples/full_text_search/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
/ebooks/
|
||||||
|
/env/
|
80
examples/full_text_search/README.md
Normal file
80
examples/full_text_search/README.md
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Full Text Search
|
||||||
|
|
||||||
|
This example shows how ClickHouse might be used for searching for word sequences in texts. It's a nice proof of concept, but for production use there are probably better solutions, such as Elasticsearch.
|
||||||
|
|
||||||
|
## Running the code
|
||||||
|
|
||||||
|
Create a virtualenv and install the required libraries:
|
||||||
|
```
|
||||||
|
virtualenv -p python3.6 env
|
||||||
|
source env/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
Run the `download_ebooks` script to download a dozen classical books from [The Gutenberg Project](http://www.gutenberg.org/):
|
||||||
|
```
|
||||||
|
python download_ebooks.py
|
||||||
|
```
|
||||||
|
Run the `load` script to populate the database with the downloaded texts:
|
||||||
|
```
|
||||||
|
python load.py
|
||||||
|
```
|
||||||
|
And finally, run the full text search:
|
||||||
|
```
|
||||||
|
python search.py "cheshire cat"
|
||||||
|
```
|
||||||
|
Asterisks can be used as wildcards (each asterisk stands for one word):
|
||||||
|
```
|
||||||
|
python search.py "much * than"
|
||||||
|
```
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
The `models.py` file defines an ORM model for storing each word in the indexed texts:
|
||||||
|
```python
|
||||||
|
class Fragment(Model):
|
||||||
|
|
||||||
|
language = LowCardinalityField(StringField(default='EN'))
|
||||||
|
document = LowCardinalityField(StringField())
|
||||||
|
idx = UInt64Field()
|
||||||
|
word = StringField()
|
||||||
|
stem = StringField()
|
||||||
|
|
||||||
|
# An index for faster search by document and fragment idx
|
||||||
|
index = Index((document, idx), type=Index.minmax(), granularity=1)
|
||||||
|
|
||||||
|
# The primary key allows efficient lookup of stems
|
||||||
|
engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',))
|
||||||
|
```
|
||||||
|
The `document` (name) and `idx` (running number of the word inside the document) fields identify the specific word. The `word` field stores the original word as it appears in the text, while the `stem` contains the word after normalization, and that's the field which is used for matching the search terms. Stemming the words makes the matching less strict, so that searching for "swallowed" will also find documents that mention "swallow" or "swallowing".
|
||||||
|
|
||||||
|
Here's what some records in the fragment table might look like:
|
||||||
|
|
||||||
|
| language | document | idx | word | stem |
|
||||||
|
|----------|-------------------------|------|------------------|---------------|
|
||||||
|
| EN | Moby Dick; or The Whale | 4510 | whenever | whenev |
|
||||||
|
| EN | Moby Dick; or The Whale | 4511 | it | it |
|
||||||
|
| EN | Moby Dick; or The Whale | 4512 | is | is |
|
||||||
|
| EN | Moby Dick; or The Whale | 4513 | a | a |
|
||||||
|
| EN | Moby Dick; or The Whale | 4514 | damp, | damp |
|
||||||
|
| EN | Moby Dick; or The Whale | 4515 | drizzly | drizzli |
|
||||||
|
| EN | Moby Dick; or The Whale | 4516 | November | novemb |
|
||||||
|
| EN | Moby Dick; or The Whale | 4517 | in | in |
|
||||||
|
| EN | Moby Dick; or The Whale | 4518 | my | my |
|
||||||
|
| EN | Moby Dick; or The Whale | 4519 | soul; | soul |
|
||||||
|
|
||||||
|
Let's say we're looking for the terms "drizzly November". Finding the first in the sequence (after stemming it) is fast and easy:
|
||||||
|
```python
|
||||||
|
query = Fragment.objects_in(db).filter(stem='drizzli').only(Fragment.document, Fragment.idx)
|
||||||
|
```
|
||||||
|
We're interested only in the `document` and `idx` fields, since they identify a specific word.
|
||||||
|
|
||||||
|
To find the next word in the search terms, we need a subquery similar to the first one, with an additional condition that its index will be one greater than the index of the first word:
|
||||||
|
```python
|
||||||
|
subquery = Fragment.objects_in(db).filter(stem='novemb').only(Fragment.document, Fragment.idx)
|
||||||
|
query = query.filter(F.isIn((Fragment.document, Fragment.idx + 1), subquery))
|
||||||
|
```
|
||||||
|
And so on, by adding another subquery for each additional search term we can construct the whole sequence of words.
|
||||||
|
|
||||||
|
As for wildcard support, when encountering a wildcard in the search terms we simply skip it - it does not need a subquery (since it can match any word). It only increases the index count so that the query conditions will "skip" one word in the sequence.
|
||||||
|
|
||||||
|
The algorithm for building this compound query can be found in the `build_query` function.
|
27
examples/full_text_search/download_ebooks.py
Normal file
27
examples/full_text_search/download_ebooks.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def download_ebook(id):
|
||||||
|
print(id, end=' ')
|
||||||
|
# Download the ebook's text
|
||||||
|
r = requests.get('https://www.gutenberg.org/files/{id}/{id}-0.txt'.format(id=id))
|
||||||
|
if r.status_code == 404:
|
||||||
|
print('NOT FOUND, SKIPPING')
|
||||||
|
return
|
||||||
|
r.raise_for_status()
|
||||||
|
# Find the ebook's title
|
||||||
|
text = r.content.decode('utf-8')
|
||||||
|
for line in text.splitlines():
|
||||||
|
if line.startswith('Title:'):
|
||||||
|
title = line[6:].strip()
|
||||||
|
print(title)
|
||||||
|
# Save the ebook
|
||||||
|
with open('ebooks/{}.txt'.format(title), 'wb') as f:
|
||||||
|
f.write(r.content)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
os.makedirs('ebooks', exist_ok=True)
|
||||||
|
for i in [1342, 11, 84, 2701, 25525, 1661, 98, 74, 43, 215, 1400, 76]:
|
||||||
|
download_ebook(i)
|
61
examples/full_text_search/load.py
Normal file
61
examples/full_text_search/load.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
import sys
|
||||||
|
import nltk
|
||||||
|
from nltk.stem.porter import PorterStemmer
|
||||||
|
from glob import glob
|
||||||
|
from infi.clickhouse_orm import Database
|
||||||
|
from models import Fragment
|
||||||
|
|
||||||
|
|
||||||
|
def trim_punctuation(word):
|
||||||
|
'''
|
||||||
|
Trim punctuation characters from the beginning and end of the word
|
||||||
|
'''
|
||||||
|
start = end = len(word)
|
||||||
|
for i in range(len(word)):
|
||||||
|
if word[i].isalnum():
|
||||||
|
start = min(start, i)
|
||||||
|
end = i + 1
|
||||||
|
return word[start : end]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_file(filename):
|
||||||
|
'''
|
||||||
|
Parses a text file at the give path.
|
||||||
|
Returns a generator of tuples (original_word, stemmed_word)
|
||||||
|
The original_word may include punctuation characters.
|
||||||
|
'''
|
||||||
|
stemmer = PorterStemmer()
|
||||||
|
with open(filename, 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
for word in line.split():
|
||||||
|
yield (word, stemmer.stem(trim_punctuation(word)))
|
||||||
|
|
||||||
|
|
||||||
|
def get_fragments(filename):
|
||||||
|
'''
|
||||||
|
Converts a text file at the given path to a generator
|
||||||
|
of Fragment instances.
|
||||||
|
'''
|
||||||
|
from os import path
|
||||||
|
document = path.splitext(path.basename(filename))[0]
|
||||||
|
idx = 0
|
||||||
|
for word, stem in parse_file(filename):
|
||||||
|
idx += 1
|
||||||
|
yield Fragment(document=document, idx=idx, word=word, stem=stem)
|
||||||
|
print('{} - {} words'.format(filename, idx))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# Load NLTK data if necessary
|
||||||
|
nltk.download('punkt')
|
||||||
|
nltk.download('wordnet')
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db = Database('default')
|
||||||
|
db.create_table(Fragment)
|
||||||
|
|
||||||
|
# Load files from the command line or everything under ebooks/
|
||||||
|
filenames = sys.argv[1:] or glob('ebooks/*.txt')
|
||||||
|
for filename in filenames:
|
||||||
|
db.insert(get_fragments(filename), batch_size=100000)
|
16
examples/full_text_search/models.py
Normal file
16
examples/full_text_search/models.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
|
||||||
|
|
||||||
|
class Fragment(Model):
|
||||||
|
|
||||||
|
language = LowCardinalityField(StringField(), default='EN')
|
||||||
|
document = LowCardinalityField(StringField())
|
||||||
|
idx = UInt64Field()
|
||||||
|
word = StringField()
|
||||||
|
stem = StringField()
|
||||||
|
|
||||||
|
# An index for faster search by document and fragment idx
|
||||||
|
index = Index((document, idx), type=Index.minmax(), granularity=1)
|
||||||
|
|
||||||
|
# The primary key allows efficient lookup of stems
|
||||||
|
engine = MergeTree(order_by=(stem, document, idx), partition_key=('language',))
|
4
examples/full_text_search/requirements.txt
Normal file
4
examples/full_text_search/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
infi.clickhouse_orm
|
||||||
|
nltk
|
||||||
|
requests
|
||||||
|
colorama
|
90
examples/full_text_search/search.py
Normal file
90
examples/full_text_search/search.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
import sys
|
||||||
|
from colorama import init, Fore, Back, Style
|
||||||
|
from nltk.stem.porter import PorterStemmer
|
||||||
|
from infi.clickhouse_orm import Database, F
|
||||||
|
from models import Fragment
|
||||||
|
from load import trim_punctuation
|
||||||
|
|
||||||
|
|
||||||
|
# The wildcard character
|
||||||
|
WILDCARD = '*'
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_search_terms(text):
|
||||||
|
'''
|
||||||
|
Convert the text to search into a list of stemmed words.
|
||||||
|
'''
|
||||||
|
stemmer = PorterStemmer()
|
||||||
|
stems = []
|
||||||
|
for word in text.split():
|
||||||
|
if word == WILDCARD:
|
||||||
|
stems.append(WILDCARD)
|
||||||
|
else:
|
||||||
|
stems.append(stemmer.stem(trim_punctuation(word)))
|
||||||
|
return stems
|
||||||
|
|
||||||
|
|
||||||
|
def build_query(db, stems):
|
||||||
|
'''
|
||||||
|
Returns a queryset instance for finding sequences of Fragment instances
|
||||||
|
that matche the list of stemmed words.
|
||||||
|
'''
|
||||||
|
# Start by searching for the first stemmed word
|
||||||
|
all_fragments = Fragment.objects_in(db)
|
||||||
|
query = all_fragments.filter(stem=stems[0]).only(Fragment.document, Fragment.idx)
|
||||||
|
# Add the following words to the queryset
|
||||||
|
for i, stem in enumerate(stems):
|
||||||
|
# Skip the first word (it's already in the query), and wildcards
|
||||||
|
if i == 0 or stem == WILDCARD:
|
||||||
|
continue
|
||||||
|
# Create a subquery that finds instances of the i'th word
|
||||||
|
subquery = all_fragments.filter(stem=stem).only(Fragment.document, Fragment.idx)
|
||||||
|
# Add it to the query, requiring that it will appear i places away from the first word
|
||||||
|
query = query.filter(F.isIn((Fragment.document, Fragment.idx + i), subquery))
|
||||||
|
# Sort the results
|
||||||
|
query = query.order_by(Fragment.document, Fragment.idx)
|
||||||
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def get_matching_text(db, document, from_idx, to_idx, extra=5):
|
||||||
|
'''
|
||||||
|
Reconstructs the document text between the given indexes (inclusive),
|
||||||
|
plus `extra` words before and after the match. The words that are
|
||||||
|
included in the given range are highlighted in green.
|
||||||
|
'''
|
||||||
|
text = []
|
||||||
|
conds = (Fragment.document == document) & (Fragment.idx >= from_idx - extra) & (Fragment.idx <= to_idx + extra)
|
||||||
|
for fragment in Fragment.objects_in(db).filter(conds).order_by('document', 'idx'):
|
||||||
|
word = fragment.word
|
||||||
|
if fragment.idx == from_idx:
|
||||||
|
word = Fore.GREEN + word
|
||||||
|
if fragment.idx == to_idx:
|
||||||
|
word = word + Style.RESET_ALL
|
||||||
|
text.append(word)
|
||||||
|
return ' '.join(text)
|
||||||
|
|
||||||
|
|
||||||
|
def find(db, text):
|
||||||
|
'''
|
||||||
|
Performs the search for the given text, and prints out the matches.
|
||||||
|
'''
|
||||||
|
stems = prepare_search_terms(text)
|
||||||
|
query = build_query(db, stems)
|
||||||
|
print('\n' + Fore.MAGENTA + str(query) + Style.RESET_ALL + '\n')
|
||||||
|
for match in query:
|
||||||
|
text = get_matching_text(db, match.document, match.idx, match.idx + len(stems) - 1)
|
||||||
|
print(Fore.CYAN + match.document + ':' + Style.RESET_ALL, text)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# Initialize colored output
|
||||||
|
init()
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db = Database('default')
|
||||||
|
|
||||||
|
# Search
|
||||||
|
text = ' '.join(sys.argv[1:])
|
||||||
|
if text:
|
||||||
|
find(db, text)
|
|
@ -52,8 +52,6 @@ def get_method_sig(method):
|
||||||
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
|
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
|
||||||
if default_arg.has_default:
|
if default_arg.has_default:
|
||||||
val = default_arg.default_value
|
val = default_arg.default_value
|
||||||
if isinstance(val, basestring):
|
|
||||||
val = '"' + val + '"'
|
|
||||||
args.append("%s=%s" % (arg, val))
|
args.append("%s=%s" % (arg, val))
|
||||||
else:
|
else:
|
||||||
args.append(arg)
|
args.append(arg)
|
||||||
|
@ -73,45 +71,45 @@ def docstring(obj):
|
||||||
indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip())
|
indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip())
|
||||||
# Output the lines without the indentation
|
# Output the lines without the indentation
|
||||||
for line in lines:
|
for line in lines:
|
||||||
print line[indentation:]
|
print(line[indentation:])
|
||||||
print
|
print()
|
||||||
|
|
||||||
|
|
||||||
def class_doc(cls, list_methods=True):
|
def class_doc(cls, list_methods=True):
|
||||||
bases = ', '.join([b.__name__ for b in cls.__bases__])
|
bases = ', '.join([b.__name__ for b in cls.__bases__])
|
||||||
print '###', cls.__name__
|
print('###', cls.__name__)
|
||||||
print
|
print()
|
||||||
if bases != 'object':
|
if bases != 'object':
|
||||||
print 'Extends', bases
|
print('Extends', bases)
|
||||||
print
|
print()
|
||||||
docstring(cls)
|
docstring(cls)
|
||||||
for name, method in inspect.getmembers(cls, inspect.ismethod):
|
for name, method in inspect.getmembers(cls, lambda m: inspect.ismethod(m) or inspect.isfunction(m)):
|
||||||
if name == '__init__':
|
if name == '__init__':
|
||||||
# Initializer
|
# Initializer
|
||||||
print '####', get_method_sig(method).replace(name, cls.__name__)
|
print('####', get_method_sig(method).replace(name, cls.__name__))
|
||||||
elif name[0] == '_':
|
elif name[0] == '_':
|
||||||
# Private method
|
# Private method
|
||||||
continue
|
continue
|
||||||
elif method.__self__ == cls:
|
elif hasattr(method, '__self__') and method.__self__ == cls:
|
||||||
# Class method
|
# Class method
|
||||||
if not list_methods:
|
if not list_methods:
|
||||||
continue
|
continue
|
||||||
print '#### %s.%s' % (cls.__name__, get_method_sig(method))
|
print('#### %s.%s' % (cls.__name__, get_method_sig(method)))
|
||||||
else:
|
else:
|
||||||
# Regular method
|
# Regular method
|
||||||
if not list_methods:
|
if not list_methods:
|
||||||
continue
|
continue
|
||||||
print '####', get_method_sig(method)
|
print('####', get_method_sig(method))
|
||||||
print
|
print()
|
||||||
docstring(method)
|
docstring(method)
|
||||||
print
|
print()
|
||||||
|
|
||||||
|
|
||||||
def module_doc(classes, list_methods=True):
|
def module_doc(classes, list_methods=True):
|
||||||
mdl = classes[0].__module__
|
mdl = classes[0].__module__
|
||||||
print mdl
|
print(mdl)
|
||||||
print '-' * len(mdl)
|
print('-' * len(mdl))
|
||||||
print
|
print()
|
||||||
for cls in classes:
|
for cls in classes:
|
||||||
class_doc(cls, list_methods)
|
class_doc(cls, list_methods)
|
||||||
|
|
||||||
|
@ -127,12 +125,16 @@ if __name__ == '__main__':
|
||||||
from infi.clickhouse_orm import engines
|
from infi.clickhouse_orm import engines
|
||||||
from infi.clickhouse_orm import models
|
from infi.clickhouse_orm import models
|
||||||
from infi.clickhouse_orm import query
|
from infi.clickhouse_orm import query
|
||||||
|
from infi.clickhouse_orm import funcs
|
||||||
|
from infi.clickhouse_orm import system_models
|
||||||
|
|
||||||
print 'Class Reference'
|
print('Class Reference')
|
||||||
print '==============='
|
print('===============')
|
||||||
print
|
print()
|
||||||
module_doc([database.Database, database.DatabaseException])
|
module_doc([database.Database, database.DatabaseException])
|
||||||
module_doc([models.Model, models.BufferModel, models.DistributedModel])
|
module_doc([models.Model, models.BufferModel, models.MergeModel, models.DistributedModel, models.Constraint, models.Index])
|
||||||
module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False)
|
module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False)
|
||||||
module_doc([engines.Engine] + all_subclasses(engines.Engine), False)
|
module_doc([engines.Engine] + all_subclasses(engines.Engine), False)
|
||||||
module_doc([query.QuerySet, query.AggregateQuerySet])
|
module_doc([query.QuerySet, query.AggregateQuerySet, query.Q])
|
||||||
|
module_doc([funcs.F])
|
||||||
|
module_doc([system_models.SystemPart])
|
||||||
|
|
|
@ -9,6 +9,7 @@ printf "# Table of Contents\n\n" > toc.md
|
||||||
generate_one "index.md"
|
generate_one "index.md"
|
||||||
generate_one "models_and_databases.md"
|
generate_one "models_and_databases.md"
|
||||||
generate_one "querysets.md"
|
generate_one "querysets.md"
|
||||||
|
generate_one "field_options.md"
|
||||||
generate_one "field_types.md"
|
generate_one "field_types.md"
|
||||||
generate_one "table_engines.md"
|
generate_one "table_engines.md"
|
||||||
generate_one "schema_migrations.md"
|
generate_one "schema_migrations.md"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from HTMLParser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,8 +17,8 @@ class HeadersToMarkdownParser(HTMLParser):
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if tag.lower() in HEADER_TAGS:
|
if tag.lower() in HEADER_TAGS:
|
||||||
indent = ' ' * int(self.inside[1])
|
indent = ' ' * int(self.inside[1])
|
||||||
fragment = self.text.lower().replace(' ', '-')
|
fragment = self.text.lower().replace(' ', '-').replace('.', '')
|
||||||
print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)
|
print('%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment))
|
||||||
self.inside = None
|
self.inside = None
|
||||||
self.text = ''
|
self.text = ''
|
||||||
|
|
||||||
|
@ -28,4 +28,4 @@ class HeadersToMarkdownParser(HTMLParser):
|
||||||
|
|
||||||
|
|
||||||
HeadersToMarkdownParser().feed(sys.stdin.read())
|
HeadersToMarkdownParser().feed(sys.stdin.read())
|
||||||
print
|
print('')
|
||||||
|
|
|
@ -1 +1,13 @@
|
||||||
__import__("pkg_resources").declare_namespace(__name__)
|
__import__("pkg_resources").declare_namespace(__name__)
|
||||||
|
|
||||||
|
from infi.clickhouse_orm.database import *
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.funcs import *
|
||||||
|
from infi.clickhouse_orm.migrations import *
|
||||||
|
from infi.clickhouse_orm.models import *
|
||||||
|
from infi.clickhouse_orm.query import *
|
||||||
|
from infi.clickhouse_orm.system_models import *
|
||||||
|
|
||||||
|
from inspect import isclass
|
||||||
|
__all__ = [c.__name__ for c in locals().values() if isclass(c)]
|
||||||
|
|
|
@ -8,7 +8,6 @@ from .utils import escape, parse_tsv, import_submodules
|
||||||
from math import ceil
|
from math import ceil
|
||||||
import datetime
|
import datetime
|
||||||
from string import Template
|
from string import Template
|
||||||
from six import PY3, string_types
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -52,6 +51,11 @@ class ServerError(DatabaseException):
|
||||||
Code:\ (?P<code>\d+),
|
Code:\ (?P<code>\d+),
|
||||||
\ e\.displayText\(\)\ =\ (?P<type1>[^ \n]+):\ (?P<msg>.+)
|
\ e\.displayText\(\)\ =\ (?P<type1>[^ \n]+):\ (?P<msg>.+)
|
||||||
''', re.VERBOSE | re.DOTALL),
|
''', re.VERBOSE | re.DOTALL),
|
||||||
|
# ClickHouse v21+
|
||||||
|
re.compile(r'''
|
||||||
|
Code:\ (?P<code>\d+).
|
||||||
|
\ (?P<type1>[^ \n]+):\ (?P<msg>.+)
|
||||||
|
''', re.VERBOSE | re.DOTALL),
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -120,6 +124,10 @@ class Database(object):
|
||||||
self.server_version = self._get_server_version()
|
self.server_version = self._get_server_version()
|
||||||
# Versions 1.1.53981 and below don't have timezone function
|
# Versions 1.1.53981 and below don't have timezone function
|
||||||
self.server_timezone = self._get_server_timezone() if self.server_version > (1, 1, 53981) else pytz.utc
|
self.server_timezone = self._get_server_timezone() if self.server_version > (1, 1, 53981) else pytz.utc
|
||||||
|
# Versions 19.1.16 and above support codec compression
|
||||||
|
self.has_codec_support = self.server_version >= (19, 1, 16)
|
||||||
|
# Version 19.0 and above support LowCardinality
|
||||||
|
self.has_low_cardinality_support = self.server_version >= (19, 0)
|
||||||
|
|
||||||
def create_database(self):
|
def create_database(self):
|
||||||
'''
|
'''
|
||||||
|
@ -162,6 +170,24 @@ class Database(object):
|
||||||
r = self._send(sql % (self.db_name, model_class.table_name()))
|
r = self._send(sql % (self.db_name, model_class.table_name()))
|
||||||
return r.text.strip() == '1'
|
return r.text.strip() == '1'
|
||||||
|
|
||||||
|
def get_model_for_table(self, table_name, system_table=False):
|
||||||
|
'''
|
||||||
|
Generates a model class from an existing table in the database.
|
||||||
|
This can be used for querying tables which don't have a corresponding model class,
|
||||||
|
for example system tables.
|
||||||
|
|
||||||
|
- `table_name`: the table to create a model for
|
||||||
|
- `system_table`: whether the table is a system table, or belongs to the current database
|
||||||
|
'''
|
||||||
|
db_name = 'system' if system_table else self.db_name
|
||||||
|
sql = "DESCRIBE `%s`.`%s` FORMAT TSV" % (db_name, table_name)
|
||||||
|
lines = self._send(sql).iter_lines()
|
||||||
|
fields = [parse_tsv(line)[:2] for line in lines]
|
||||||
|
model = ModelBase.create_ad_hoc_model(fields, table_name)
|
||||||
|
if system_table:
|
||||||
|
model._system = model._readonly = True
|
||||||
|
return model
|
||||||
|
|
||||||
def add_setting(self, name, value):
|
def add_setting(self, name, value):
|
||||||
'''
|
'''
|
||||||
Adds a database setting that will be sent with every request.
|
Adds a database setting that will be sent with every request.
|
||||||
|
@ -170,7 +196,7 @@ class Database(object):
|
||||||
The name must be string, and the value is converted to string in case
|
The name must be string, and the value is converted to string in case
|
||||||
it isn't. To remove a setting, pass `None` as the value.
|
it isn't. To remove a setting, pass `None` as the value.
|
||||||
'''
|
'''
|
||||||
assert isinstance(name, string_types), 'Setting name must be a string'
|
assert isinstance(name, str), 'Setting name must be a string'
|
||||||
if value is None:
|
if value is None:
|
||||||
self.settings.pop(name, None)
|
self.settings.pop(name, None)
|
||||||
else:
|
else:
|
||||||
|
@ -183,7 +209,6 @@ class Database(object):
|
||||||
- `model_instances`: any iterable containing instances of a single model class.
|
- `model_instances`: any iterable containing instances of a single model class.
|
||||||
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
|
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
|
||||||
'''
|
'''
|
||||||
from six import next
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
i = iter(model_instances)
|
i = iter(model_instances)
|
||||||
try:
|
try:
|
||||||
|
@ -197,20 +222,19 @@ class Database(object):
|
||||||
|
|
||||||
fields_list = ','.join(
|
fields_list = ','.join(
|
||||||
['`%s`' % name for name in first_instance.fields(writable=True)])
|
['`%s`' % name for name in first_instance.fields(writable=True)])
|
||||||
|
fmt = 'TSKV' if model_class.has_funcs_as_defaults() else 'TabSeparated'
|
||||||
|
query = 'INSERT INTO $table (%s) FORMAT %s\n' % (fields_list, fmt)
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
buf = BytesIO()
|
buf = BytesIO()
|
||||||
query = 'INSERT INTO $table (%s) FORMAT TabSeparated\n' % fields_list
|
|
||||||
buf.write(self._substitute(query, model_class).encode('utf-8'))
|
buf.write(self._substitute(query, model_class).encode('utf-8'))
|
||||||
first_instance.set_database(self)
|
first_instance.set_database(self)
|
||||||
buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
|
buf.write(first_instance.to_db_string())
|
||||||
buf.write('\n'.encode('utf-8'))
|
|
||||||
# Collect lines in batches of batch_size
|
# Collect lines in batches of batch_size
|
||||||
lines = 2
|
lines = 2
|
||||||
for instance in i:
|
for instance in i:
|
||||||
instance.set_database(self)
|
instance.set_database(self)
|
||||||
buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
|
buf.write(instance.to_db_string())
|
||||||
buf.write('\n'.encode('utf-8'))
|
|
||||||
lines += 1
|
lines += 1
|
||||||
if lines >= batch_size:
|
if lines >= batch_size:
|
||||||
# Return the current batch of lines
|
# Return the current batch of lines
|
||||||
|
@ -230,9 +254,12 @@ class Database(object):
|
||||||
- `model_class`: the model to count.
|
- `model_class`: the model to count.
|
||||||
- `conditions`: optional SQL conditions (contents of the WHERE clause).
|
- `conditions`: optional SQL conditions (contents of the WHERE clause).
|
||||||
'''
|
'''
|
||||||
|
from infi.clickhouse_orm.query import Q
|
||||||
query = 'SELECT count() FROM $table'
|
query = 'SELECT count() FROM $table'
|
||||||
if conditions:
|
if conditions:
|
||||||
query += ' WHERE ' + conditions
|
if isinstance(conditions, Q):
|
||||||
|
conditions = conditions.to_sql(model_class)
|
||||||
|
query += ' WHERE ' + str(conditions)
|
||||||
query = self._substitute(query, model_class)
|
query = self._substitute(query, model_class)
|
||||||
r = self._send(query)
|
r = self._send(query)
|
||||||
return int(r.text) if r.text else 0
|
return int(r.text) if r.text else 0
|
||||||
|
@ -284,6 +311,7 @@ class Database(object):
|
||||||
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
The result is a namedtuple containing `objects` (list), `number_of_objects`,
|
||||||
`pages_total`, `number` (of the current page), and `page_size`.
|
`pages_total`, `number` (of the current page), and `page_size`.
|
||||||
'''
|
'''
|
||||||
|
from infi.clickhouse_orm.query import Q
|
||||||
count = self.count(model_class, conditions)
|
count = self.count(model_class, conditions)
|
||||||
pages_total = int(ceil(count / float(page_size)))
|
pages_total = int(ceil(count / float(page_size)))
|
||||||
if page_num == -1:
|
if page_num == -1:
|
||||||
|
@ -291,9 +319,12 @@ class Database(object):
|
||||||
elif page_num < 1:
|
elif page_num < 1:
|
||||||
raise ValueError('Invalid page number: %d' % page_num)
|
raise ValueError('Invalid page number: %d' % page_num)
|
||||||
offset = (page_num - 1) * page_size
|
offset = (page_num - 1) * page_size
|
||||||
query = 'SELECT * FROM $table'
|
query = 'SELECT {} FROM $table'.format(", ".join(model_class.fields().keys()))
|
||||||
|
|
||||||
if conditions:
|
if conditions:
|
||||||
query += ' WHERE ' + conditions
|
if isinstance(conditions, Q):
|
||||||
|
conditions = conditions.to_sql(model_class)
|
||||||
|
query += ' WHERE ' + str(conditions)
|
||||||
query += ' ORDER BY %s' % order_by
|
query += ' ORDER BY %s' % order_by
|
||||||
query += ' LIMIT %d, %d' % (offset, page_size)
|
query += ' LIMIT %d, %d' % (offset, page_size)
|
||||||
query = self._substitute(query, model_class)
|
query = self._substitute(query, model_class)
|
||||||
|
@ -334,7 +365,7 @@ class Database(object):
|
||||||
return set(obj.module_name for obj in self.select(query))
|
return set(obj.module_name for obj in self.select(query))
|
||||||
|
|
||||||
def _send(self, data, settings=None, stream=False):
|
def _send(self, data, settings=None, stream=False):
|
||||||
if isinstance(data, string_types):
|
if isinstance(data, str):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
if self.log_statements:
|
if self.log_statements:
|
||||||
logger.info(data)
|
logger.info(data)
|
||||||
|
@ -362,7 +393,7 @@ class Database(object):
|
||||||
mapping = dict(db="`%s`" % self.db_name)
|
mapping = dict(db="`%s`" % self.db_name)
|
||||||
if model_class:
|
if model_class:
|
||||||
if model_class.is_system_model():
|
if model_class.is_system_model():
|
||||||
mapping['table'] = model_class.table_name()
|
mapping['table'] = "`system`.`%s`" % model_class.table_name()
|
||||||
else:
|
else:
|
||||||
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
|
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
|
||||||
query = Template(query).safe_substitute(mapping)
|
query = Template(query).safe_substitute(mapping)
|
||||||
|
@ -383,7 +414,7 @@ class Database(object):
|
||||||
except ServerError as e:
|
except ServerError as e:
|
||||||
logger.exception('Cannot determine server version (%s), assuming 1.1.0', e)
|
logger.exception('Cannot determine server version (%s), assuming 1.1.0', e)
|
||||||
ver = '1.1.0'
|
ver = '1.1.0'
|
||||||
return tuple(int(n) for n in ver.split('.')) if as_tuple else ver
|
return tuple(int(n) for n in ver.split('.') if n.isdigit()) if as_tuple else ver
|
||||||
|
|
||||||
def _is_existing_database(self):
|
def _is_existing_database(self):
|
||||||
r = self._send("SELECT count() FROM system.databases WHERE name = '%s'" % self.db_name)
|
r = self._send("SELECT count() FROM system.databases WHERE name = '%s'" % self.db_name)
|
||||||
|
@ -392,3 +423,7 @@ class Database(object):
|
||||||
def _is_connection_readonly(self):
|
def _is_connection_readonly(self):
|
||||||
r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'")
|
r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'")
|
||||||
return r.text.strip() != '0'
|
return r.text.strip() != '0'
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = [c.__name__ for c in [Page, DatabaseException, ServerError, Database]]
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import six
|
|
||||||
|
|
||||||
from .utils import comma_join
|
from .utils import comma_join, get_subclass_names
|
||||||
|
|
||||||
logger = logging.getLogger('clickhouse_orm')
|
logger = logging.getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
@ -35,12 +34,14 @@ class Memory(Engine):
|
||||||
class MergeTree(Engine):
|
class MergeTree(Engine):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
assert type(order_by) in (list, tuple), 'order_by must be a list or tuple'
|
assert type(order_by) in (list, tuple), 'order_by must be a list or tuple'
|
||||||
assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present'
|
assert date_col is None or isinstance(date_col, str), 'date_col must be string if present'
|
||||||
|
assert primary_key is None or type(primary_key) in (list, tuple), 'primary_key must be a list or tuple'
|
||||||
assert partition_key is None or type(partition_key) in (list, tuple),\
|
assert partition_key is None or type(partition_key) in (list, tuple),\
|
||||||
'partition_key must be tuple or list if present'
|
'partition_key must be tuple or list if present'
|
||||||
assert (replica_table_path is None) == (replica_name == None), \
|
assert (replica_table_path is None) == (replica_name is None), \
|
||||||
'both replica_table_path and replica_name must be specified'
|
'both replica_table_path and replica_name must be specified'
|
||||||
|
|
||||||
# These values conflict with each other (old and new syntax of table engines.
|
# These values conflict with each other (old and new syntax of table engines.
|
||||||
|
@ -48,6 +49,7 @@ class MergeTree(Engine):
|
||||||
assert date_col or partition_key, "You must set either date_col or partition_key"
|
assert date_col or partition_key, "You must set either date_col or partition_key"
|
||||||
self.date_col = date_col
|
self.date_col = date_col
|
||||||
self.partition_key = partition_key if partition_key else ('toYYYYMM(`%s`)' % date_col,)
|
self.partition_key = partition_key if partition_key else ('toYYYYMM(`%s`)' % date_col,)
|
||||||
|
self.primary_key = primary_key
|
||||||
|
|
||||||
self.order_by = order_by
|
self.order_by = order_by
|
||||||
self.sampling_expr = sampling_expr
|
self.sampling_expr = sampling_expr
|
||||||
|
@ -72,11 +74,15 @@ class MergeTree(Engine):
|
||||||
name = 'Replicated' + name
|
name = 'Replicated' + name
|
||||||
|
|
||||||
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
||||||
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
||||||
# Let's check version and use new syntax if available
|
# Let's check version and use new syntax if available
|
||||||
if db.server_version >= (1, 1, 54310):
|
if db.server_version >= (1, 1, 54310):
|
||||||
partition_sql = "PARTITION BY %s ORDER BY %s" \
|
partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \
|
||||||
% ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by))
|
% (comma_join(self.partition_key, stringify=True),
|
||||||
|
comma_join(self.order_by, stringify=True))
|
||||||
|
|
||||||
|
if self.primary_key:
|
||||||
|
partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key, stringify=True)
|
||||||
|
|
||||||
if self.sampling_expr:
|
if self.sampling_expr:
|
||||||
partition_sql += " SAMPLE BY %s" % self.sampling_expr
|
partition_sql += " SAMPLE BY %s" % self.sampling_expr
|
||||||
|
@ -88,7 +94,7 @@ class MergeTree(Engine):
|
||||||
from infi.clickhouse_orm.database import DatabaseException
|
from infi.clickhouse_orm.database import DatabaseException
|
||||||
raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
|
raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
|
||||||
"Please update your server or use date_col syntax."
|
"Please update your server or use date_col syntax."
|
||||||
"https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/")
|
"https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/")
|
||||||
else:
|
else:
|
||||||
partition_sql = ''
|
partition_sql = ''
|
||||||
|
|
||||||
|
@ -101,14 +107,14 @@ class MergeTree(Engine):
|
||||||
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
|
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
|
||||||
|
|
||||||
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
||||||
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
||||||
# These parameters are process in create_table_sql directly.
|
# These parameters are process in create_table_sql directly.
|
||||||
# In previous ClickHouse versions this this syntax does not work.
|
# In previous ClickHouse versions this this syntax does not work.
|
||||||
if db.server_version < (1, 1, 54310):
|
if db.server_version < (1, 1, 54310):
|
||||||
params.append(self.date_col)
|
params.append(self.date_col)
|
||||||
if self.sampling_expr:
|
if self.sampling_expr:
|
||||||
params.append(self.sampling_expr)
|
params.append(self.sampling_expr)
|
||||||
params.append('(%s)' % comma_join(self.order_by))
|
params.append('(%s)' % comma_join(self.order_by, stringify=True))
|
||||||
params.append(str(self.index_granularity))
|
params.append(str(self.index_granularity))
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -117,9 +123,10 @@ class MergeTree(Engine):
|
||||||
class CollapsingMergeTree(MergeTree):
|
class CollapsingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), sign_col='sign', sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), sign_col='sign', sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(CollapsingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
super(CollapsingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
||||||
replica_table_path, replica_name, partition_key)
|
replica_table_path, replica_name, partition_key, primary_key)
|
||||||
self.sign_col = sign_col
|
self.sign_col = sign_col
|
||||||
|
|
||||||
def _build_sql_params(self, db):
|
def _build_sql_params(self, db):
|
||||||
|
@ -131,9 +138,10 @@ class CollapsingMergeTree(MergeTree):
|
||||||
class SummingMergeTree(MergeTree):
|
class SummingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), summing_cols=None, sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), summing_cols=None, sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(SummingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity, replica_table_path,
|
super(SummingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity, replica_table_path,
|
||||||
replica_name, partition_key)
|
replica_name, partition_key, primary_key)
|
||||||
assert type is None or type(summing_cols) in (list, tuple), 'summing_cols must be a list or tuple'
|
assert type is None or type(summing_cols) in (list, tuple), 'summing_cols must be a list or tuple'
|
||||||
self.summing_cols = summing_cols
|
self.summing_cols = summing_cols
|
||||||
|
|
||||||
|
@ -147,9 +155,10 @@ class SummingMergeTree(MergeTree):
|
||||||
class ReplacingMergeTree(MergeTree):
|
class ReplacingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), ver_col=None, sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), ver_col=None, sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(ReplacingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
super(ReplacingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
||||||
replica_table_path, replica_name, partition_key)
|
replica_table_path, replica_name, partition_key, primary_key)
|
||||||
self.ver_col = ver_col
|
self.ver_col = ver_col
|
||||||
|
|
||||||
def _build_sql_params(self, db):
|
def _build_sql_params(self, db):
|
||||||
|
@ -163,7 +172,7 @@ class Buffer(Engine):
|
||||||
"""
|
"""
|
||||||
Buffers the data to write in RAM, periodically flushing it to another table.
|
Buffers the data to write in RAM, periodically flushing it to another table.
|
||||||
Must be used in conjuction with a `BufferModel`.
|
Must be used in conjuction with a `BufferModel`.
|
||||||
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/).
|
Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||||
|
@ -194,11 +203,11 @@ class Merge(Engine):
|
||||||
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
||||||
but allows reading from any number of other tables simultaneously.
|
but allows reading from any number of other tables simultaneously.
|
||||||
Writing to a table is not supported
|
Writing to a table is not supported
|
||||||
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, table_regex):
|
def __init__(self, table_regex):
|
||||||
assert isinstance(table_regex, six.string_types), "'table_regex' parameter must be string"
|
assert isinstance(table_regex, str), "'table_regex' parameter must be string"
|
||||||
self.table_regex = table_regex
|
self.table_regex = table_regex
|
||||||
|
|
||||||
def create_table_sql(self, db):
|
def create_table_sql(self, db):
|
||||||
|
@ -213,15 +222,15 @@ class Distributed(Engine):
|
||||||
During a read, the table indexes on remote servers are used, if there are any.
|
During a read, the table indexes on remote servers are used, if there are any.
|
||||||
|
|
||||||
See full documentation here
|
See full documentation here
|
||||||
https://clickhouse.yandex/docs/en/table_engines/distributed.html
|
https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
|
||||||
"""
|
"""
|
||||||
def __init__(self, cluster, table=None, sharding_key=None):
|
def __init__(self, cluster, table=None, sharding_key=None):
|
||||||
"""
|
"""
|
||||||
:param cluster: what cluster to access data from
|
- `cluster`: what cluster to access data from
|
||||||
:param table: underlying table that actually stores data.
|
- `table`: underlying table that actually stores data.
|
||||||
If you are not specifying any table here, ensure that it can be inferred
|
If you are not specifying any table here, ensure that it can be inferred
|
||||||
from your model's superclass (see models.DistributedModel.fix_engine_table)
|
from your model's superclass (see models.DistributedModel.fix_engine_table)
|
||||||
:param sharding_key: how to distribute data among shards when inserting
|
- `sharding_key`: how to distribute data among shards when inserting
|
||||||
straightly into Distributed table, optional
|
straightly into Distributed table, optional
|
||||||
"""
|
"""
|
||||||
self.cluster = cluster
|
self.cluster = cluster
|
||||||
|
@ -254,3 +263,7 @@ class Distributed(Engine):
|
||||||
if self.sharding_key:
|
if self.sharding_key:
|
||||||
params.append(self.sharding_key)
|
params.append(self.sharding_key)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = get_subclass_names(locals(), Engine)
|
||||||
|
|
|
@ -1,32 +1,39 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from six import string_types, text_type, binary_type, integer_types
|
|
||||||
import datetime
|
import datetime
|
||||||
import iso8601
|
import iso8601
|
||||||
import pytz
|
import pytz
|
||||||
import time
|
|
||||||
from calendar import timegm
|
from calendar import timegm
|
||||||
from decimal import Decimal, localcontext
|
from decimal import Decimal, localcontext
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
from logging import getLogger
|
||||||
|
from pytz import BaseTzInfo
|
||||||
|
from .utils import escape, parse_array, comma_join, string_or_func, get_subclass_names
|
||||||
|
from .funcs import F, FunctionOperatorsMixin
|
||||||
|
from ipaddress import IPv4Address, IPv6Address
|
||||||
|
|
||||||
from .utils import escape, parse_array, comma_join
|
logger = getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
|
||||||
class Field(object):
|
class Field(FunctionOperatorsMixin):
|
||||||
'''
|
'''
|
||||||
Abstract base class for all field types.
|
Abstract base class for all field types.
|
||||||
'''
|
'''
|
||||||
creation_counter = 0
|
name = None # this is set by the parent model
|
||||||
class_default = 0
|
parent = None # this is set by the parent model
|
||||||
db_type = None
|
creation_counter = 0 # used for keeping the model fields ordered
|
||||||
|
class_default = 0 # should be overridden by concrete subclasses
|
||||||
|
db_type = None # should be overridden by concrete subclasses
|
||||||
|
|
||||||
def __init__(self, default=None, alias=None, materialized=None, readonly=None):
|
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None):
|
||||||
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
|
assert [default, alias, materialized].count(None) >= 2, \
|
||||||
"Only one of default, alias and materialized parameters can be given"
|
"Only one of default, alias and materialized parameters can be given"
|
||||||
assert alias is None or isinstance(alias, string_types) and alias != "",\
|
assert alias is None or isinstance(alias, F) or isinstance(alias, str) and alias != "",\
|
||||||
"Alias field must be string field name, if given"
|
"Alias parameter must be a string or function object, if given"
|
||||||
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
|
assert materialized is None or isinstance(materialized, F) or isinstance(materialized, str) and materialized != "",\
|
||||||
"Materialized field must be string, if given"
|
"Materialized parameter must be a string or function object, if given"
|
||||||
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
|
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
|
||||||
|
assert codec is None or isinstance(codec, str) and codec != "", \
|
||||||
|
"Codec field must be string, if given"
|
||||||
|
|
||||||
self.creation_counter = Field.creation_counter
|
self.creation_counter = Field.creation_counter
|
||||||
Field.creation_counter += 1
|
Field.creation_counter += 1
|
||||||
|
@ -34,6 +41,13 @@ class Field(object):
|
||||||
self.alias = alias
|
self.alias = alias
|
||||||
self.materialized = materialized
|
self.materialized = materialized
|
||||||
self.readonly = bool(self.alias or self.materialized or readonly)
|
self.readonly = bool(self.alias or self.materialized or readonly)
|
||||||
|
self.codec = codec
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<%s>' % self.__class__.__name__
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
'''
|
'''
|
||||||
|
@ -64,29 +78,49 @@ class Field(object):
|
||||||
'''
|
'''
|
||||||
return escape(value, quote)
|
return escape(value, quote)
|
||||||
|
|
||||||
def get_sql(self, with_default_expression=True):
|
def get_sql(self, with_default_expression=True, db=None):
|
||||||
'''
|
'''
|
||||||
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
|
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
|
||||||
:param with_default_expression: If True, adds default value to sql.
|
|
||||||
|
- `with_default_expression`: If True, adds default value to sql.
|
||||||
It doesn't affect fields with alias and materialized values.
|
It doesn't affect fields with alias and materialized values.
|
||||||
|
- `db`: Database, used for checking supported features.
|
||||||
'''
|
'''
|
||||||
|
sql = self.db_type
|
||||||
|
args = self.get_db_type_args()
|
||||||
|
if args:
|
||||||
|
sql += '(%s)' % comma_join(args)
|
||||||
if with_default_expression:
|
if with_default_expression:
|
||||||
if self.alias:
|
sql += self._extra_params(db)
|
||||||
return '%s ALIAS %s' % (self.db_type, self.alias)
|
return sql
|
||||||
elif self.materialized:
|
|
||||||
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
|
def get_db_type_args(self):
|
||||||
else:
|
"""Returns field type arguments"""
|
||||||
default = self.to_db_string(self.default)
|
return []
|
||||||
return '%s DEFAULT %s' % (self.db_type, default)
|
|
||||||
else:
|
def _extra_params(self, db):
|
||||||
return self.db_type
|
sql = ''
|
||||||
|
if self.alias:
|
||||||
|
sql += ' ALIAS %s' % string_or_func(self.alias)
|
||||||
|
elif self.materialized:
|
||||||
|
sql += ' MATERIALIZED %s' % string_or_func(self.materialized)
|
||||||
|
elif isinstance(self.default, F):
|
||||||
|
sql += ' DEFAULT %s' % self.default.to_sql()
|
||||||
|
elif self.default:
|
||||||
|
default = self.to_db_string(self.default)
|
||||||
|
sql += ' DEFAULT %s' % default
|
||||||
|
if self.codec and db and db.has_codec_support and not self.alias:
|
||||||
|
sql += ' CODEC(%s)' % self.codec
|
||||||
|
return sql
|
||||||
|
|
||||||
def isinstance(self, types):
|
def isinstance(self, types):
|
||||||
"""
|
"""
|
||||||
Checks if the instance if one of the types provided or if any of the inner_field child is one of the types
|
Checks if the instance if one of the types provided or if any of the inner_field child is one of the types
|
||||||
provided, returns True if field or any inner_field is one of ths provided, False otherwise
|
provided, returns True if field or any inner_field is one of ths provided, False otherwise
|
||||||
:param types: Iterable of types to check inclusion of instance
|
|
||||||
:return: Boolean
|
- `types`: Iterable of types to check inclusion of instance
|
||||||
|
|
||||||
|
Returns: Boolean
|
||||||
"""
|
"""
|
||||||
if isinstance(self, types):
|
if isinstance(self, types):
|
||||||
return True
|
return True
|
||||||
|
@ -104,9 +138,9 @@ class StringField(Field):
|
||||||
db_type = 'String'
|
db_type = 'String'
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, str):
|
||||||
return value
|
return value
|
||||||
if isinstance(value, binary_type):
|
if isinstance(value, bytes):
|
||||||
return value.decode('UTF-8')
|
return value.decode('UTF-8')
|
||||||
raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value))
|
raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value))
|
||||||
|
|
||||||
|
@ -123,7 +157,7 @@ class FixedStringField(StringField):
|
||||||
return value.rstrip('\0')
|
return value.rstrip('\0')
|
||||||
|
|
||||||
def validate(self, value):
|
def validate(self, value):
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, str):
|
||||||
value = value.encode('UTF-8')
|
value = value.encode('UTF-8')
|
||||||
if len(value) > self._length:
|
if len(value) > self._length:
|
||||||
raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length))
|
raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length))
|
||||||
|
@ -143,7 +177,7 @@ class DateField(Field):
|
||||||
return value
|
return value
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return DateField.class_default + datetime.timedelta(days=value)
|
return DateField.class_default + datetime.timedelta(days=value)
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, str):
|
||||||
if value == '0000-00-00':
|
if value == '0000-00-00':
|
||||||
return DateField.min_value
|
return DateField.min_value
|
||||||
return datetime.datetime.strptime(value, '%Y-%m-%d').date()
|
return datetime.datetime.strptime(value, '%Y-%m-%d').date()
|
||||||
|
@ -161,14 +195,28 @@ class DateTimeField(Field):
|
||||||
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
|
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
|
||||||
db_type = 'DateTime'
|
db_type = 'DateTime'
|
||||||
|
|
||||||
|
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None,
|
||||||
|
timezone=None):
|
||||||
|
super().__init__(default, alias, materialized, readonly, codec)
|
||||||
|
# assert not timezone, 'Temporarily field timezone is not supported'
|
||||||
|
if timezone:
|
||||||
|
timezone = timezone if isinstance(timezone, BaseTzInfo) else pytz.timezone(timezone)
|
||||||
|
self.timezone = timezone
|
||||||
|
|
||||||
|
def get_db_type_args(self):
|
||||||
|
args = []
|
||||||
|
if self.timezone:
|
||||||
|
args.append(escape(self.timezone.zone))
|
||||||
|
return args
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, datetime.datetime):
|
if isinstance(value, datetime.datetime):
|
||||||
return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc)
|
return value if value.tzinfo else value.replace(tzinfo=pytz.utc)
|
||||||
if isinstance(value, datetime.date):
|
if isinstance(value, datetime.date):
|
||||||
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
|
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, str):
|
||||||
if value == '0000-00-00 00:00:00':
|
if value == '0000-00-00 00:00:00':
|
||||||
return self.class_default
|
return self.class_default
|
||||||
if len(value) == 10:
|
if len(value) == 10:
|
||||||
|
@ -181,18 +229,66 @@ class DateTimeField(Field):
|
||||||
# left the date naive in case of no tzinfo set
|
# left the date naive in case of no tzinfo set
|
||||||
dt = iso8601.parse_date(value, default_timezone=None)
|
dt = iso8601.parse_date(value, default_timezone=None)
|
||||||
except iso8601.ParseError as e:
|
except iso8601.ParseError as e:
|
||||||
raise ValueError(text_type(e))
|
raise ValueError(str(e))
|
||||||
|
|
||||||
# convert naive to aware
|
# convert naive to aware
|
||||||
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
|
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
|
||||||
dt = timezone_in_use.localize(dt)
|
dt = timezone_in_use.localize(dt)
|
||||||
return dt.astimezone(pytz.utc)
|
return dt
|
||||||
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
|
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
|
||||||
|
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
return escape('%010d' % timegm(value.utctimetuple()), quote)
|
return escape('%010d' % timegm(value.utctimetuple()), quote)
|
||||||
|
|
||||||
|
|
||||||
|
class DateTime64Field(DateTimeField):
|
||||||
|
db_type = 'DateTime64'
|
||||||
|
|
||||||
|
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None,
|
||||||
|
timezone=None, precision=6):
|
||||||
|
super().__init__(default, alias, materialized, readonly, codec, timezone)
|
||||||
|
assert precision is None or isinstance(precision, int), 'Precision must be int type'
|
||||||
|
self.precision = precision
|
||||||
|
|
||||||
|
def get_db_type_args(self):
|
||||||
|
args = [str(self.precision)]
|
||||||
|
if self.timezone:
|
||||||
|
args.append(escape(self.timezone.zone))
|
||||||
|
return args
|
||||||
|
|
||||||
|
def to_db_string(self, value, quote=True):
|
||||||
|
"""
|
||||||
|
Returns the field's value prepared for writing to the database
|
||||||
|
|
||||||
|
Returns string in 0000000000.000000 format, where remainder digits count is equal to precision
|
||||||
|
"""
|
||||||
|
return escape(
|
||||||
|
'{timestamp:0{width}.{precision}f}'.format(
|
||||||
|
timestamp=value.timestamp(),
|
||||||
|
width=11 + self.precision,
|
||||||
|
precision=self.precision),
|
||||||
|
quote
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_python(self, value, timezone_in_use):
|
||||||
|
try:
|
||||||
|
return super().to_python(value, timezone_in_use)
|
||||||
|
except ValueError:
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
||||||
|
if isinstance(value, str):
|
||||||
|
left_part = value.split('.')[0]
|
||||||
|
if left_part == '0000-00-00 00:00:00':
|
||||||
|
return self.class_default
|
||||||
|
if len(left_part) == 10:
|
||||||
|
try:
|
||||||
|
value = float(value)
|
||||||
|
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
class BaseIntField(Field):
|
class BaseIntField(Field):
|
||||||
'''
|
'''
|
||||||
Abstract base class for all integer-type fields.
|
Abstract base class for all integer-type fields.
|
||||||
|
@ -206,7 +302,7 @@ class BaseIntField(Field):
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
# There's no need to call escape since numbers do not contain
|
# There's no need to call escape since numbers do not contain
|
||||||
# special characters, and never need quoting
|
# special characters, and never need quoting
|
||||||
return text_type(value)
|
return str(value)
|
||||||
|
|
||||||
def validate(self, value):
|
def validate(self, value):
|
||||||
self._range_check(value, self.min_value, self.max_value)
|
self._range_check(value, self.min_value, self.max_value)
|
||||||
|
@ -282,7 +378,7 @@ class BaseFloatField(Field):
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
# There's no need to call escape since numbers do not contain
|
# There's no need to call escape since numbers do not contain
|
||||||
# special characters, and never need quoting
|
# special characters, and never need quoting
|
||||||
return text_type(value)
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
class Float32Field(BaseFloatField):
|
class Float32Field(BaseFloatField):
|
||||||
|
@ -326,7 +422,7 @@ class DecimalField(Field):
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
# There's no need to call escape since numbers do not contain
|
# There's no need to call escape since numbers do not contain
|
||||||
# special characters, and never need quoting
|
# special characters, and never need quoting
|
||||||
return text_type(value)
|
return str(value)
|
||||||
|
|
||||||
def _round(self, value):
|
def _round(self, value):
|
||||||
return value.quantize(self.exp)
|
return value.quantize(self.exp)
|
||||||
|
@ -361,20 +457,27 @@ class BaseEnumField(Field):
|
||||||
Abstract base class for all enum-type fields.
|
Abstract base class for all enum-type fields.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, enum_cls, default=None, alias=None, materialized=None, readonly=None):
|
def __init__(self, enum_cls, default=None, alias=None, materialized=None, readonly=None, codec=None):
|
||||||
self.enum_cls = enum_cls
|
self.enum_cls = enum_cls
|
||||||
if default is None:
|
if default is None:
|
||||||
default = list(enum_cls)[0]
|
default = list(enum_cls)[0]
|
||||||
super(BaseEnumField, self).__init__(default, alias, materialized, readonly)
|
super(BaseEnumField, self).__init__(default, alias, materialized, readonly, codec)
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, self.enum_cls):
|
if isinstance(value, self.enum_cls):
|
||||||
return value
|
return value
|
||||||
try:
|
try:
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, str):
|
||||||
return self.enum_cls[value]
|
try:
|
||||||
if isinstance(value, binary_type):
|
return self.enum_cls[value]
|
||||||
return self.enum_cls[value.decode('UTF-8')]
|
except Exception:
|
||||||
|
return self.enum_cls(value)
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
decoded = value.decode('UTF-8')
|
||||||
|
try:
|
||||||
|
return self.enum_cls[decoded]
|
||||||
|
except Exception:
|
||||||
|
return self.enum_cls(decoded)
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return self.enum_cls(value)
|
return self.enum_cls(value)
|
||||||
except (KeyError, ValueError):
|
except (KeyError, ValueError):
|
||||||
|
@ -384,13 +487,8 @@ class BaseEnumField(Field):
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
return escape(value.name, quote)
|
return escape(value.name, quote)
|
||||||
|
|
||||||
def get_sql(self, with_default_expression=True):
|
def get_db_type_args(self):
|
||||||
values = ['%s = %d' % (escape(item.name), item.value) for item in self.enum_cls]
|
return ['%s = %d' % (escape(item.name), item.value) for item in self.enum_cls]
|
||||||
sql = '%s(%s)' % (self.db_type, ' ,'.join(values))
|
|
||||||
if with_default_expression:
|
|
||||||
default = self.to_db_string(self.default)
|
|
||||||
sql = '%s DEFAULT %s' % (sql, default)
|
|
||||||
return sql
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_ad_hoc_field(cls, db_type):
|
def create_ad_hoc_field(cls, db_type):
|
||||||
|
@ -399,12 +497,9 @@ class BaseEnumField(Field):
|
||||||
this method returns a matching enum field.
|
this method returns a matching enum field.
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
try:
|
from enum import Enum
|
||||||
Enum # exists in Python 3.4+
|
|
||||||
except NameError:
|
|
||||||
from enum import Enum # use the enum34 library instead
|
|
||||||
members = {}
|
members = {}
|
||||||
for match in re.finditer("'(\w+)' = (\d+)", db_type):
|
for match in re.finditer(r"'([\w ]+)' = (-?\d+)", db_type):
|
||||||
members[match.group(1)] = int(match.group(2))
|
members[match.group(1)] = int(match.group(2))
|
||||||
enum_cls = Enum('AdHocEnum', members)
|
enum_cls = Enum('AdHocEnum', members)
|
||||||
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
|
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
|
||||||
|
@ -425,16 +520,16 @@ class ArrayField(Field):
|
||||||
|
|
||||||
class_default = []
|
class_default = []
|
||||||
|
|
||||||
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None):
|
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None):
|
||||||
assert isinstance(inner_field, Field), "The first argument of ArrayField must be a Field instance"
|
assert isinstance(inner_field, Field), "The first argument of ArrayField must be a Field instance"
|
||||||
assert not isinstance(inner_field, ArrayField), "Multidimensional array fields are not supported by the ORM"
|
assert not isinstance(inner_field, ArrayField), "Multidimensional array fields are not supported by the ORM"
|
||||||
self.inner_field = inner_field
|
self.inner_field = inner_field
|
||||||
super(ArrayField, self).__init__(default, alias, materialized, readonly)
|
super(ArrayField, self).__init__(default, alias, materialized, readonly, codec)
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, str):
|
||||||
value = parse_array(value)
|
value = parse_array(value)
|
||||||
elif isinstance(value, binary_type):
|
elif isinstance(value, bytes):
|
||||||
value = parse_array(value.decode('UTF-8'))
|
value = parse_array(value.decode('UTF-8'))
|
||||||
elif not isinstance(value, (list, tuple)):
|
elif not isinstance(value, (list, tuple)):
|
||||||
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
|
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
|
||||||
|
@ -448,9 +543,11 @@ class ArrayField(Field):
|
||||||
array = [self.inner_field.to_db_string(v, quote=True) for v in value]
|
array = [self.inner_field.to_db_string(v, quote=True) for v in value]
|
||||||
return '[' + comma_join(array) + ']'
|
return '[' + comma_join(array) + ']'
|
||||||
|
|
||||||
def get_sql(self, with_default_expression=True):
|
def get_sql(self, with_default_expression=True, db=None):
|
||||||
from .utils import escape
|
sql = 'Array(%s)' % self.inner_field.get_sql(with_default_expression=False, db=db)
|
||||||
return 'Array(%s)' % self.inner_field.get_sql(with_default_expression=False)
|
if with_default_expression and self.codec and db and db.has_codec_support:
|
||||||
|
sql+= ' CODEC(%s)' % self.codec
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
class UUIDField(Field):
|
class UUIDField(Field):
|
||||||
|
@ -461,11 +558,11 @@ class UUIDField(Field):
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, UUID):
|
if isinstance(value, UUID):
|
||||||
return value
|
return value
|
||||||
elif isinstance(value, binary_type):
|
elif isinstance(value, bytes):
|
||||||
return UUID(bytes=value)
|
return UUID(bytes=value)
|
||||||
elif isinstance(value, string_types):
|
elif isinstance(value, str):
|
||||||
return UUID(value)
|
return UUID(value)
|
||||||
elif isinstance(value, integer_types):
|
elif isinstance(value, int):
|
||||||
return UUID(int=value)
|
return UUID(int=value)
|
||||||
elif isinstance(value, tuple):
|
elif isinstance(value, tuple):
|
||||||
return UUID(fields=value)
|
return UUID(fields=value)
|
||||||
|
@ -476,17 +573,52 @@ class UUIDField(Field):
|
||||||
return escape(str(value), quote)
|
return escape(str(value), quote)
|
||||||
|
|
||||||
|
|
||||||
|
class IPv4Field(Field):
|
||||||
|
|
||||||
|
class_default = 0
|
||||||
|
db_type = 'IPv4'
|
||||||
|
|
||||||
|
def to_python(self, value, timezone_in_use):
|
||||||
|
if isinstance(value, IPv4Address):
|
||||||
|
return value
|
||||||
|
elif isinstance(value, (bytes, str, int)):
|
||||||
|
return IPv4Address(value)
|
||||||
|
else:
|
||||||
|
raise ValueError('Invalid value for IPv4Address: %r' % value)
|
||||||
|
|
||||||
|
def to_db_string(self, value, quote=True):
|
||||||
|
return escape(str(value), quote)
|
||||||
|
|
||||||
|
|
||||||
|
class IPv6Field(Field):
|
||||||
|
|
||||||
|
class_default = 0
|
||||||
|
db_type = 'IPv6'
|
||||||
|
|
||||||
|
def to_python(self, value, timezone_in_use):
|
||||||
|
if isinstance(value, IPv6Address):
|
||||||
|
return value
|
||||||
|
elif isinstance(value, (bytes, str, int)):
|
||||||
|
return IPv6Address(value)
|
||||||
|
else:
|
||||||
|
raise ValueError('Invalid value for IPv6Address: %r' % value)
|
||||||
|
|
||||||
|
def to_db_string(self, value, quote=True):
|
||||||
|
return escape(str(value), quote)
|
||||||
|
|
||||||
|
|
||||||
class NullableField(Field):
|
class NullableField(Field):
|
||||||
|
|
||||||
class_default = None
|
class_default = None
|
||||||
|
|
||||||
def __init__(self, inner_field, default=None, alias=None, materialized=None,
|
def __init__(self, inner_field, default=None, alias=None, materialized=None,
|
||||||
extra_null_values=None):
|
extra_null_values=None, codec=None):
|
||||||
|
assert isinstance(inner_field, Field), "The first argument of NullableField must be a Field instance. Not: {}".format(inner_field)
|
||||||
self.inner_field = inner_field
|
self.inner_field = inner_field
|
||||||
self._null_values = [None]
|
self._null_values = [None]
|
||||||
if extra_null_values:
|
if extra_null_values:
|
||||||
self._null_values.extend(extra_null_values)
|
self._null_values.extend(extra_null_values)
|
||||||
super(NullableField, self).__init__(default, alias, materialized, readonly=None)
|
super(NullableField, self).__init__(default, alias, materialized, readonly=None, codec=codec)
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
if value == '\\N' or value in self._null_values:
|
if value == '\\N' or value in self._null_values:
|
||||||
|
@ -501,14 +633,42 @@ class NullableField(Field):
|
||||||
return '\\N'
|
return '\\N'
|
||||||
return self.inner_field.to_db_string(value, quote=quote)
|
return self.inner_field.to_db_string(value, quote=quote)
|
||||||
|
|
||||||
def get_sql(self, with_default_expression=True):
|
def get_sql(self, with_default_expression=True, db=None):
|
||||||
s = 'Nullable(%s)' % self.inner_field.get_sql(with_default_expression=False)
|
sql = 'Nullable(%s)' % self.inner_field.get_sql(with_default_expression=False, db=db)
|
||||||
if with_default_expression:
|
if with_default_expression:
|
||||||
if self.alias:
|
sql += self._extra_params(db)
|
||||||
s = '%s ALIAS %s' % (s, self.alias)
|
return sql
|
||||||
elif self.materialized:
|
|
||||||
s = '%s MATERIALIZED %s' % (s, self.materialized)
|
|
||||||
elif self.default:
|
class LowCardinalityField(Field):
|
||||||
default = self.to_db_string(self.default)
|
|
||||||
s = '%s DEFAULT %s' % (s, default)
|
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None):
|
||||||
return s
|
assert isinstance(inner_field, Field), "The first argument of LowCardinalityField must be a Field instance. Not: {}".format(inner_field)
|
||||||
|
assert not isinstance(inner_field, LowCardinalityField), "LowCardinality inner fields are not supported by the ORM"
|
||||||
|
assert not isinstance(inner_field, ArrayField), "Array field inside LowCardinality are not supported by the ORM. Use Array(LowCardinality) instead"
|
||||||
|
self.inner_field = inner_field
|
||||||
|
self.class_default = self.inner_field.class_default
|
||||||
|
super(LowCardinalityField, self).__init__(default, alias, materialized, readonly, codec)
|
||||||
|
|
||||||
|
def to_python(self, value, timezone_in_use):
|
||||||
|
return self.inner_field.to_python(value, timezone_in_use)
|
||||||
|
|
||||||
|
def validate(self, value):
|
||||||
|
self.inner_field.validate(value)
|
||||||
|
|
||||||
|
def to_db_string(self, value, quote=True):
|
||||||
|
return self.inner_field.to_db_string(value, quote=quote)
|
||||||
|
|
||||||
|
def get_sql(self, with_default_expression=True, db=None):
|
||||||
|
if db and db.has_low_cardinality_support:
|
||||||
|
sql = 'LowCardinality(%s)' % self.inner_field.get_sql(with_default_expression=False)
|
||||||
|
else:
|
||||||
|
sql = self.inner_field.get_sql(with_default_expression=False)
|
||||||
|
logger.warning('LowCardinalityField not supported on clickhouse-server version < 19.0 using {} as fallback'.format(self.inner_field.__class__.__name__))
|
||||||
|
if with_default_expression:
|
||||||
|
sql += self._extra_params(db)
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = get_subclass_names(locals(), Field)
|
||||||
|
|
1827
src/infi/clickhouse_orm/funcs.py
Normal file
1827
src/infi/clickhouse_orm/funcs.py
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,13 @@
|
||||||
import six
|
|
||||||
|
|
||||||
from .models import Model, BufferModel
|
from .models import Model, BufferModel
|
||||||
from .fields import DateField, StringField
|
from .fields import DateField, StringField
|
||||||
from .engines import MergeTree
|
from .engines import MergeTree
|
||||||
from .utils import escape
|
from .utils import escape, get_subclass_names
|
||||||
|
|
||||||
from six.moves import zip
|
|
||||||
from six import iteritems
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger('migrations')
|
logger = logging.getLogger('migrations')
|
||||||
|
|
||||||
|
|
||||||
class Operation(object):
|
class Operation():
|
||||||
'''
|
'''
|
||||||
Base class for migration operations.
|
Base class for migration operations.
|
||||||
'''
|
'''
|
||||||
|
@ -21,22 +16,40 @@ class Operation(object):
|
||||||
raise NotImplementedError() # pragma: no cover
|
raise NotImplementedError() # pragma: no cover
|
||||||
|
|
||||||
|
|
||||||
class CreateTable(Operation):
|
class ModelOperation(Operation):
|
||||||
|
'''
|
||||||
|
Base class for migration operations that work on a specific model.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, model_class):
|
||||||
|
'''
|
||||||
|
Initializer.
|
||||||
|
'''
|
||||||
|
self.model_class = model_class
|
||||||
|
self.table_name = model_class.table_name()
|
||||||
|
|
||||||
|
def _alter_table(self, database, cmd):
|
||||||
|
'''
|
||||||
|
Utility for running ALTER TABLE commands.
|
||||||
|
'''
|
||||||
|
cmd = "ALTER TABLE $db.`%s` %s" % (self.table_name, cmd)
|
||||||
|
logger.debug(cmd)
|
||||||
|
database.raw(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
class CreateTable(ModelOperation):
|
||||||
'''
|
'''
|
||||||
A migration operation that creates a table for a given model class.
|
A migration operation that creates a table for a given model class.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, model_class):
|
|
||||||
self.model_class = model_class
|
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
logger.info(' Create table %s', self.model_class.table_name())
|
logger.info(' Create table %s', self.table_name)
|
||||||
if issubclass(self.model_class, BufferModel):
|
if issubclass(self.model_class, BufferModel):
|
||||||
database.create_table(self.model_class.engine.main_model)
|
database.create_table(self.model_class.engine.main_model)
|
||||||
database.create_table(self.model_class)
|
database.create_table(self.model_class)
|
||||||
|
|
||||||
|
|
||||||
class AlterTable(Operation):
|
class AlterTable(ModelOperation):
|
||||||
'''
|
'''
|
||||||
A migration operation that compares the table of a given model class to
|
A migration operation that compares the table of a given model class to
|
||||||
the model's fields, and alters the table to match the model. The operation can:
|
the model's fields, and alters the table to match the model. The operation can:
|
||||||
|
@ -46,20 +59,12 @@ class AlterTable(Operation):
|
||||||
Default values are not altered by this operation.
|
Default values are not altered by this operation.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, model_class):
|
|
||||||
self.model_class = model_class
|
|
||||||
|
|
||||||
def _get_table_fields(self, database):
|
def _get_table_fields(self, database):
|
||||||
query = "DESC `%s`.`%s`" % (database.db_name, self.model_class.table_name())
|
query = "DESC `%s`.`%s`" % (database.db_name, self.table_name)
|
||||||
return [(row.name, row.type) for row in database.select(query)]
|
return [(row.name, row.type) for row in database.select(query)]
|
||||||
|
|
||||||
def _alter_table(self, database, cmd):
|
|
||||||
cmd = "ALTER TABLE `%s`.`%s` %s" % (database.db_name, self.model_class.table_name(), cmd)
|
|
||||||
logger.debug(cmd)
|
|
||||||
database._send(cmd)
|
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
logger.info(' Alter table %s', self.model_class.table_name())
|
logger.info(' Alter table %s', self.table_name)
|
||||||
|
|
||||||
# Note that MATERIALIZED and ALIAS fields are always at the end of the DESC,
|
# Note that MATERIALIZED and ALIAS fields are always at the end of the DESC,
|
||||||
# ADD COLUMN ... AFTER doesn't affect it
|
# ADD COLUMN ... AFTER doesn't affect it
|
||||||
|
@ -74,14 +79,16 @@ class AlterTable(Operation):
|
||||||
|
|
||||||
# Identify fields that were added to the model
|
# Identify fields that were added to the model
|
||||||
prev_name = None
|
prev_name = None
|
||||||
for name, field in iteritems(self.model_class.fields()):
|
for name, field in self.model_class.fields().items():
|
||||||
is_regular_field = not (field.materialized or field.alias)
|
is_regular_field = not (field.materialized or field.alias)
|
||||||
if name not in table_fields:
|
if name not in table_fields:
|
||||||
logger.info(' Add column %s', name)
|
logger.info(' Add column %s', name)
|
||||||
assert prev_name, 'Cannot add a column to the beginning of the table'
|
cmd = 'ADD COLUMN %s %s' % (name, field.get_sql(db=database))
|
||||||
cmd = 'ADD COLUMN %s %s' % (name, field.get_sql())
|
|
||||||
if is_regular_field:
|
if is_regular_field:
|
||||||
cmd += ' AFTER %s' % prev_name
|
if prev_name:
|
||||||
|
cmd += ' AFTER %s' % prev_name
|
||||||
|
else:
|
||||||
|
cmd += ' FIRST'
|
||||||
self._alter_table(database, cmd)
|
self._alter_table(database, cmd)
|
||||||
|
|
||||||
if is_regular_field:
|
if is_regular_field:
|
||||||
|
@ -93,8 +100,8 @@ class AlterTable(Operation):
|
||||||
# The order of class attributes can be changed any time, so we can't count on it
|
# The order of class attributes can be changed any time, so we can't count on it
|
||||||
# Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save
|
# Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save
|
||||||
# attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47
|
# attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47
|
||||||
model_fields = {name: field.get_sql(with_default_expression=False)
|
model_fields = {name: field.get_sql(with_default_expression=False, db=database)
|
||||||
for name, field in iteritems(self.model_class.fields())}
|
for name, field in self.model_class.fields().items()}
|
||||||
for field_name, field_sql in self._get_table_fields(database):
|
for field_name, field_sql in self._get_table_fields(database):
|
||||||
# All fields must have been created and dropped by this moment
|
# All fields must have been created and dropped by this moment
|
||||||
assert field_name in model_fields, 'Model fields and table columns in disagreement'
|
assert field_name in model_fields, 'Model fields and table columns in disagreement'
|
||||||
|
@ -105,16 +112,13 @@ class AlterTable(Operation):
|
||||||
self._alter_table(database, 'MODIFY COLUMN %s %s' % (field_name, model_fields[field_name]))
|
self._alter_table(database, 'MODIFY COLUMN %s %s' % (field_name, model_fields[field_name]))
|
||||||
|
|
||||||
|
|
||||||
class AlterTableWithBuffer(Operation):
|
class AlterTableWithBuffer(ModelOperation):
|
||||||
'''
|
'''
|
||||||
A migration operation for altering a buffer table and its underlying on-disk table.
|
A migration operation for altering a buffer table and its underlying on-disk table.
|
||||||
The buffer table is dropped, the on-disk table is altered, and then the buffer table
|
The buffer table is dropped, the on-disk table is altered, and then the buffer table
|
||||||
is re-created.
|
is re-created.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, model_class):
|
|
||||||
self.model_class = model_class
|
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
if issubclass(self.model_class, BufferModel):
|
if issubclass(self.model_class, BufferModel):
|
||||||
DropTable(self.model_class).apply(database)
|
DropTable(self.model_class).apply(database)
|
||||||
|
@ -124,25 +128,108 @@ class AlterTableWithBuffer(Operation):
|
||||||
AlterTable(self.model_class).apply(database)
|
AlterTable(self.model_class).apply(database)
|
||||||
|
|
||||||
|
|
||||||
class DropTable(Operation):
|
class DropTable(ModelOperation):
|
||||||
'''
|
'''
|
||||||
A migration operation that drops the table of a given model class.
|
A migration operation that drops the table of a given model class.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, model_class):
|
def apply(self, database):
|
||||||
self.model_class = model_class
|
logger.info(' Drop table %s', self.table_name)
|
||||||
|
database.drop_table(self.model_class)
|
||||||
|
|
||||||
|
|
||||||
|
class AlterConstraints(ModelOperation):
|
||||||
|
'''
|
||||||
|
A migration operation that adds new constraints from the model to the database
|
||||||
|
table, and drops obsolete ones. Constraints are identified by their names, so
|
||||||
|
a change in an existing constraint will not be detected unless its name was changed too.
|
||||||
|
ClickHouse does not check that the constraints hold for existing data in the table.
|
||||||
|
'''
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
logger.info(' Drop table %s', self.model_class.table_name())
|
logger.info(' Alter constraints for %s', self.table_name)
|
||||||
database.drop_table(self.model_class)
|
existing = self._get_constraint_names(database)
|
||||||
|
# Go over constraints in the model
|
||||||
|
for constraint in self.model_class._constraints.values():
|
||||||
|
# Check if it's a new constraint
|
||||||
|
if constraint.name not in existing:
|
||||||
|
logger.info(' Add constraint %s', constraint.name)
|
||||||
|
self._alter_table(database, 'ADD %s' % constraint.create_table_sql())
|
||||||
|
else:
|
||||||
|
existing.remove(constraint.name)
|
||||||
|
# Remaining constraints in `existing` are obsolete
|
||||||
|
for name in existing:
|
||||||
|
logger.info(' Drop constraint %s', name)
|
||||||
|
self._alter_table(database, 'DROP CONSTRAINT `%s`' % name)
|
||||||
|
|
||||||
|
def _get_constraint_names(self, database):
|
||||||
|
'''
|
||||||
|
Returns a set containing the names of existing constraints in the table.
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
table_def = database.raw('SHOW CREATE TABLE $db.`%s`' % self.table_name)
|
||||||
|
matches = re.findall(r'\sCONSTRAINT\s+`?(.+?)`?\s+CHECK\s', table_def)
|
||||||
|
return set(matches)
|
||||||
|
|
||||||
|
|
||||||
|
class AlterIndexes(ModelOperation):
|
||||||
|
'''
|
||||||
|
A migration operation that adds new indexes from the model to the database
|
||||||
|
table, and drops obsolete ones. Indexes are identified by their names, so
|
||||||
|
a change in an existing index will not be detected unless its name was changed too.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, model_class, reindex=False):
|
||||||
|
'''
|
||||||
|
Initializer.
|
||||||
|
By default ClickHouse does not build indexes over existing data, only for
|
||||||
|
new data. Passing `reindex=True` will run `OPTIMIZE TABLE` in order to build
|
||||||
|
the indexes over the existing data.
|
||||||
|
'''
|
||||||
|
super().__init__(model_class)
|
||||||
|
self.reindex = reindex
|
||||||
|
|
||||||
|
def apply(self, database):
|
||||||
|
logger.info(' Alter indexes for %s', self.table_name)
|
||||||
|
existing = self._get_index_names(database)
|
||||||
|
logger.info(existing)
|
||||||
|
# Go over indexes in the model
|
||||||
|
for index in self.model_class._indexes.values():
|
||||||
|
# Check if it's a new index
|
||||||
|
if index.name not in existing:
|
||||||
|
logger.info(' Add index %s', index.name)
|
||||||
|
self._alter_table(database, 'ADD %s' % index.create_table_sql())
|
||||||
|
else:
|
||||||
|
existing.remove(index.name)
|
||||||
|
# Remaining indexes in `existing` are obsolete
|
||||||
|
for name in existing:
|
||||||
|
logger.info(' Drop index %s', name)
|
||||||
|
self._alter_table(database, 'DROP INDEX `%s`' % name)
|
||||||
|
# Reindex
|
||||||
|
if self.reindex:
|
||||||
|
logger.info(' Build indexes on table')
|
||||||
|
database.raw('OPTIMIZE TABLE $db.`%s` FINAL' % self.table_name)
|
||||||
|
|
||||||
|
def _get_index_names(self, database):
|
||||||
|
'''
|
||||||
|
Returns a set containing the names of existing indexes in the table.
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
table_def = database.raw('SHOW CREATE TABLE $db.`%s`' % self.table_name)
|
||||||
|
matches = re.findall(r'\sINDEX\s+`?(.+?)`?\s+', table_def)
|
||||||
|
return set(matches)
|
||||||
|
|
||||||
|
|
||||||
class RunPython(Operation):
|
class RunPython(Operation):
|
||||||
'''
|
'''
|
||||||
A migration operation that executes given python function on database
|
A migration operation that executes a Python function.
|
||||||
'''
|
'''
|
||||||
def __init__(self, func):
|
def __init__(self, func):
|
||||||
assert callable(func), "'func' parameter must be function"
|
'''
|
||||||
|
Initializer. The given Python function will be called with a single
|
||||||
|
argument - the Database instance to apply the migration to.
|
||||||
|
'''
|
||||||
|
assert callable(func), "'func' argument must be function"
|
||||||
self._func = func
|
self._func = func
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
|
@ -152,14 +239,17 @@ class RunPython(Operation):
|
||||||
|
|
||||||
class RunSQL(Operation):
|
class RunSQL(Operation):
|
||||||
'''
|
'''
|
||||||
A migration operation that executes given SQL on database
|
A migration operation that executes arbitrary SQL statements.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, sql):
|
def __init__(self, sql):
|
||||||
if isinstance(sql, six.string_types):
|
'''
|
||||||
|
Initializer. The given sql argument must be a valid SQL statement or
|
||||||
|
list of statements.
|
||||||
|
'''
|
||||||
|
if isinstance(sql, str):
|
||||||
sql = [sql]
|
sql = [sql]
|
||||||
|
assert isinstance(sql, list), "'sql' argument must be string or list of strings"
|
||||||
assert isinstance(sql, list), "'sql' parameter must be string or list of strings"
|
|
||||||
self._sql = sql
|
self._sql = sql
|
||||||
|
|
||||||
def apply(self, database):
|
def apply(self, database):
|
||||||
|
@ -182,3 +272,7 @@ class MigrationHistory(Model):
|
||||||
@classmethod
|
@classmethod
|
||||||
def table_name(cls):
|
def table_name(cls):
|
||||||
return 'infi_clickhouse_orm_migrations'
|
return 'infi_clickhouse_orm_migrations'
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = get_subclass_names(locals(), Operation)
|
||||||
|
|
|
@ -1,19 +1,124 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import sys
|
import sys
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from itertools import chain
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
|
||||||
from six import with_metaclass, reraise, iteritems
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from .fields import Field, StringField
|
from .fields import Field, StringField
|
||||||
from .utils import parse_tsv
|
from .utils import parse_tsv, NO_VALUE, get_subclass_names, arg_to_sql, unescape
|
||||||
from .query import QuerySet
|
from .query import QuerySet
|
||||||
|
from .funcs import F
|
||||||
from .engines import Merge, Distributed
|
from .engines import Merge, Distributed
|
||||||
|
|
||||||
logger = getLogger('clickhouse_orm')
|
logger = getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Constraint:
|
||||||
|
'''
|
||||||
|
Defines a model constraint.
|
||||||
|
'''
|
||||||
|
|
||||||
|
name = None # this is set by the parent model
|
||||||
|
parent = None # this is set by the parent model
|
||||||
|
|
||||||
|
def __init__(self, expr):
|
||||||
|
'''
|
||||||
|
Initializer. Expects an expression that ClickHouse will verify when inserting data.
|
||||||
|
'''
|
||||||
|
self.expr = expr
|
||||||
|
|
||||||
|
def create_table_sql(self):
|
||||||
|
'''
|
||||||
|
Returns the SQL statement for defining this constraint during table creation.
|
||||||
|
'''
|
||||||
|
return 'CONSTRAINT `%s` CHECK %s' % (self.name, arg_to_sql(self.expr))
|
||||||
|
|
||||||
|
|
||||||
|
class Index:
|
||||||
|
'''
|
||||||
|
Defines a data-skipping index.
|
||||||
|
'''
|
||||||
|
|
||||||
|
name = None # this is set by the parent model
|
||||||
|
parent = None # this is set by the parent model
|
||||||
|
|
||||||
|
def __init__(self, expr, type, granularity):
|
||||||
|
'''
|
||||||
|
Initializer.
|
||||||
|
|
||||||
|
- `expr` - a column, expression, or tuple of columns and expressions to index.
|
||||||
|
- `type` - the index type. Use one of the following methods to specify the type:
|
||||||
|
`Index.minmax`, `Index.set`, `Index.ngrambf_v1`, `Index.tokenbf_v1` or `Index.bloom_filter`.
|
||||||
|
- `granularity` - index block size (number of multiples of the `index_granularity` defined by the engine).
|
||||||
|
'''
|
||||||
|
self.expr = expr
|
||||||
|
self.type = type
|
||||||
|
self.granularity = granularity
|
||||||
|
|
||||||
|
def create_table_sql(self):
|
||||||
|
'''
|
||||||
|
Returns the SQL statement for defining this index during table creation.
|
||||||
|
'''
|
||||||
|
return 'INDEX `%s` %s TYPE %s GRANULARITY %d' % (self.name, arg_to_sql(self.expr), self.type, self.granularity)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def minmax():
|
||||||
|
'''
|
||||||
|
An index that stores extremes of the specified expression (if the expression is tuple, then it stores
|
||||||
|
extremes for each element of tuple). The stored info is used for skipping blocks of data like the primary key.
|
||||||
|
'''
|
||||||
|
return 'minmax'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set(max_rows):
|
||||||
|
'''
|
||||||
|
An index that stores unique values of the specified expression (no more than max_rows rows,
|
||||||
|
or unlimited if max_rows=0). Uses the values to check if the WHERE expression is not satisfiable
|
||||||
|
on a block of data.
|
||||||
|
'''
|
||||||
|
return 'set(%d)' % max_rows
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed):
|
||||||
|
'''
|
||||||
|
An index that stores a Bloom filter containing all ngrams from a block of data.
|
||||||
|
Works only with strings. Can be used for optimization of equals, like and in expressions.
|
||||||
|
|
||||||
|
- `n` — ngram size
|
||||||
|
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||||
|
for example 256 or 512, because it can be compressed well).
|
||||||
|
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||||
|
- `random_seed` — The seed for Bloom filter hash functions.
|
||||||
|
'''
|
||||||
|
return 'ngrambf_v1(%d, %d, %d, %d)' % (n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed):
|
||||||
|
'''
|
||||||
|
An index that stores a Bloom filter containing string tokens. Tokens are sequences
|
||||||
|
separated by non-alphanumeric characters.
|
||||||
|
|
||||||
|
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||||
|
for example 256 or 512, because it can be compressed well).
|
||||||
|
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||||
|
- `random_seed` — The seed for Bloom filter hash functions.
|
||||||
|
'''
|
||||||
|
return 'tokenbf_v1(%d, %d, %d)' % (size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bloom_filter(false_positive=0.025):
|
||||||
|
'''
|
||||||
|
An index that stores a Bloom filter containing values of the index expression.
|
||||||
|
|
||||||
|
- `false_positive` - the probability (between 0 and 1) of receiving a false positive
|
||||||
|
response from the filter
|
||||||
|
'''
|
||||||
|
return 'bloom_filter(%f)' % false_positive
|
||||||
|
|
||||||
|
|
||||||
class ModelBase(type):
|
class ModelBase(type):
|
||||||
'''
|
'''
|
||||||
A metaclass for ORM models. It adds the _fields list to model classes.
|
A metaclass for ORM models. It adds the _fields list to model classes.
|
||||||
|
@ -22,35 +127,66 @@ class ModelBase(type):
|
||||||
ad_hoc_model_cache = {}
|
ad_hoc_model_cache = {}
|
||||||
|
|
||||||
def __new__(cls, name, bases, attrs):
|
def __new__(cls, name, bases, attrs):
|
||||||
# Collect fields from parent classes
|
|
||||||
base_fields = dict()
|
# Collect fields, constraints and indexes from parent classes
|
||||||
|
fields = {}
|
||||||
|
constraints = {}
|
||||||
|
indexes = {}
|
||||||
for base in bases:
|
for base in bases:
|
||||||
if isinstance(base, ModelBase):
|
if isinstance(base, ModelBase):
|
||||||
base_fields.update(base._fields)
|
fields.update(base._fields)
|
||||||
|
constraints.update(base._constraints)
|
||||||
|
indexes.update(base._indexes)
|
||||||
|
|
||||||
fields = base_fields
|
# Add fields, constraints and indexes from this class
|
||||||
|
for n, obj in attrs.items():
|
||||||
|
if isinstance(obj, Field):
|
||||||
|
fields[n] = obj
|
||||||
|
elif isinstance(obj, Constraint):
|
||||||
|
constraints[n] = obj
|
||||||
|
elif isinstance(obj, Index):
|
||||||
|
indexes[n] = obj
|
||||||
|
|
||||||
# Build a list of fields, in the order they were listed in the class
|
# Convert fields to a list of (name, field) tuples, in the order they were listed in the class
|
||||||
fields.update({n: f for n, f in iteritems(attrs) if isinstance(f, Field)})
|
fields = sorted(fields.items(), key=lambda item: item[1].creation_counter)
|
||||||
fields = sorted(iteritems(fields), key=lambda item: item[1].creation_counter)
|
|
||||||
|
|
||||||
# Build a dictionary of default values
|
# Build a dictionary of default values
|
||||||
defaults = {n: f.to_python(f.default, pytz.UTC) for n, f in fields}
|
defaults = {}
|
||||||
|
has_funcs_as_defaults = False
|
||||||
|
for n, f in fields:
|
||||||
|
if f.alias or f.materialized:
|
||||||
|
defaults[n] = NO_VALUE
|
||||||
|
elif isinstance(f.default, F):
|
||||||
|
defaults[n] = NO_VALUE
|
||||||
|
has_funcs_as_defaults = True
|
||||||
|
else:
|
||||||
|
defaults[n] = f.to_python(f.default, pytz.UTC)
|
||||||
|
|
||||||
|
# Create the model class
|
||||||
attrs = dict(
|
attrs = dict(
|
||||||
attrs,
|
attrs,
|
||||||
_fields=OrderedDict(fields),
|
_fields=OrderedDict(fields),
|
||||||
|
_constraints=constraints,
|
||||||
|
_indexes=indexes,
|
||||||
_writable_fields=OrderedDict([f for f in fields if not f[1].readonly]),
|
_writable_fields=OrderedDict([f for f in fields if not f[1].readonly]),
|
||||||
_defaults=defaults
|
_defaults=defaults,
|
||||||
|
_has_funcs_as_defaults=has_funcs_as_defaults
|
||||||
)
|
)
|
||||||
return super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
|
model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
|
||||||
|
|
||||||
|
# Let each field, constraint and index know its parent and its own name
|
||||||
|
for n, obj in chain(fields, constraints.items(), indexes.items()):
|
||||||
|
setattr(obj, 'parent', model)
|
||||||
|
setattr(obj, 'name', n)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
|
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
|
||||||
# fields is a list of tuples (name, db_type)
|
# fields is a list of tuples (name, db_type)
|
||||||
# Check if model exists in cache
|
# Check if model exists in cache
|
||||||
fields = list(fields)
|
fields = list(fields)
|
||||||
cache_key = str(fields)
|
cache_key = model_name + ' ' + str(fields)
|
||||||
if cache_key in cls.ad_hoc_model_cache:
|
if cache_key in cls.ad_hoc_model_cache:
|
||||||
return cls.ad_hoc_model_cache[cache_key]
|
return cls.ad_hoc_model_cache[cache_key]
|
||||||
# Create an ad hoc model class
|
# Create an ad hoc model class
|
||||||
|
@ -70,24 +206,45 @@ class ModelBase(type):
|
||||||
return orm_fields.BaseEnumField.create_ad_hoc_field(db_type)
|
return orm_fields.BaseEnumField.create_ad_hoc_field(db_type)
|
||||||
# DateTime with timezone
|
# DateTime with timezone
|
||||||
if db_type.startswith('DateTime('):
|
if db_type.startswith('DateTime('):
|
||||||
# Some functions return DateTimeField with timezone in brackets
|
timezone = db_type[9:-1]
|
||||||
return orm_fields.DateTimeField()
|
return orm_fields.DateTimeField(
|
||||||
|
timezone=timezone[1:-1] if timezone else None
|
||||||
|
)
|
||||||
|
# DateTime64
|
||||||
|
if db_type.startswith('DateTime64('):
|
||||||
|
precision, *timezone = [s.strip() for s in db_type[11:-1].split(',')]
|
||||||
|
return orm_fields.DateTime64Field(
|
||||||
|
precision=int(precision),
|
||||||
|
timezone=timezone[0][1:-1] if timezone else None
|
||||||
|
)
|
||||||
# Arrays
|
# Arrays
|
||||||
if db_type.startswith('Array'):
|
if db_type.startswith('Array'):
|
||||||
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
|
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
|
||||||
return orm_fields.ArrayField(inner_field)
|
return orm_fields.ArrayField(inner_field)
|
||||||
|
# Tuples (poor man's version - convert to array)
|
||||||
|
if db_type.startswith('Tuple'):
|
||||||
|
types = [s.strip() for s in db_type[6 : -1].split(',')]
|
||||||
|
assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type
|
||||||
|
inner_field = cls.create_ad_hoc_field(types[0])
|
||||||
|
return orm_fields.ArrayField(inner_field)
|
||||||
# FixedString
|
# FixedString
|
||||||
if db_type.startswith('FixedString'):
|
if db_type.startswith('FixedString'):
|
||||||
length = int(db_type[12 : -1])
|
length = int(db_type[12 : -1])
|
||||||
return orm_fields.FixedStringField(length)
|
return orm_fields.FixedStringField(length)
|
||||||
# Decimal
|
# Decimal / Decimal32 / Decimal64 / Decimal128
|
||||||
if db_type.startswith('Decimal'):
|
if db_type.startswith('Decimal'):
|
||||||
precision, scale = [int(n.strip()) for n in db_type[8 : -1].split(',')]
|
p = db_type.index('(')
|
||||||
return orm_fields.DecimalField(precision, scale)
|
args = [int(n.strip()) for n in db_type[p + 1 : -1].split(',')]
|
||||||
|
field_class = getattr(orm_fields, db_type[:p] + 'Field')
|
||||||
|
return field_class(*args)
|
||||||
# Nullable
|
# Nullable
|
||||||
if db_type.startswith('Nullable'):
|
if db_type.startswith('Nullable'):
|
||||||
inner_field = cls.create_ad_hoc_field(db_type[9 : -1])
|
inner_field = cls.create_ad_hoc_field(db_type[9 : -1])
|
||||||
return orm_fields.NullableField(inner_field)
|
return orm_fields.NullableField(inner_field)
|
||||||
|
# LowCardinality
|
||||||
|
if db_type.startswith('LowCardinality'):
|
||||||
|
inner_field = cls.create_ad_hoc_field(db_type[15 : -1])
|
||||||
|
return orm_fields.LowCardinalityField(inner_field)
|
||||||
# Simple fields
|
# Simple fields
|
||||||
name = db_type + 'Field'
|
name = db_type + 'Field'
|
||||||
if not hasattr(orm_fields, name):
|
if not hasattr(orm_fields, name):
|
||||||
|
@ -95,7 +252,7 @@ class ModelBase(type):
|
||||||
return getattr(orm_fields, name)()
|
return getattr(orm_fields, name)()
|
||||||
|
|
||||||
|
|
||||||
class Model(with_metaclass(ModelBase)):
|
class Model(metaclass=ModelBase):
|
||||||
'''
|
'''
|
||||||
A base class for ORM models. Each model class represent a ClickHouse table. For example:
|
A base class for ORM models. Each model class represent a ClickHouse table. For example:
|
||||||
|
|
||||||
|
@ -127,7 +284,7 @@ class Model(with_metaclass(ModelBase)):
|
||||||
# Assign default values
|
# Assign default values
|
||||||
self.__dict__.update(self._defaults)
|
self.__dict__.update(self._defaults)
|
||||||
# Assign field values from keyword arguments
|
# Assign field values from keyword arguments
|
||||||
for name, value in iteritems(kwargs):
|
for name, value in kwargs.items():
|
||||||
field = self.get_field(name)
|
field = self.get_field(name)
|
||||||
if field:
|
if field:
|
||||||
setattr(self, name, value)
|
setattr(self, name, value)
|
||||||
|
@ -140,14 +297,14 @@ class Model(with_metaclass(ModelBase)):
|
||||||
This may raise a `ValueError`.
|
This may raise a `ValueError`.
|
||||||
'''
|
'''
|
||||||
field = self.get_field(name)
|
field = self.get_field(name)
|
||||||
if field:
|
if field and (value != NO_VALUE):
|
||||||
try:
|
try:
|
||||||
value = field.to_python(value, pytz.utc)
|
value = field.to_python(value, pytz.utc)
|
||||||
field.validate(value)
|
field.validate(value)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
tp, v, tb = sys.exc_info()
|
tp, v, tb = sys.exc_info()
|
||||||
new_msg = "{} (field '{}')".format(v, name)
|
new_msg = "{} (field '{}')".format(v, name)
|
||||||
reraise(tp, tp(new_msg), tb)
|
raise tp.with_traceback(tp(new_msg), tb)
|
||||||
super(Model, self).__setattr__(name, value)
|
super(Model, self).__setattr__(name, value)
|
||||||
|
|
||||||
def set_database(self, db):
|
def set_database(self, db):
|
||||||
|
@ -182,16 +339,32 @@ class Model(with_metaclass(ModelBase)):
|
||||||
'''
|
'''
|
||||||
return cls.__name__.lower()
|
return cls.__name__.lower()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def has_funcs_as_defaults(cls):
|
||||||
|
'''
|
||||||
|
Return True if some of the model's fields use a function expression
|
||||||
|
as a default value. This requires special handling when inserting instances.
|
||||||
|
'''
|
||||||
|
return cls._has_funcs_as_defaults
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_table_sql(cls, db):
|
def create_table_sql(cls, db):
|
||||||
'''
|
'''
|
||||||
Returns the SQL command for creating a table for this model.
|
Returns the SQL statement for creating a table for this model.
|
||||||
'''
|
'''
|
||||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
||||||
cols = []
|
# Fields
|
||||||
for name, field in iteritems(cls.fields()):
|
items = []
|
||||||
cols.append(' %s %s' % (name, field.get_sql()))
|
for name, field in cls.fields().items():
|
||||||
parts.append(',\n'.join(cols))
|
items.append(' %s %s' % (name, field.get_sql(db=db)))
|
||||||
|
# Constraints
|
||||||
|
for c in cls._constraints.values():
|
||||||
|
items.append(' %s' % c.create_table_sql())
|
||||||
|
# Indexes
|
||||||
|
for i in cls._indexes.values():
|
||||||
|
items.append(' %s' % i.create_table_sql())
|
||||||
|
parts.append(',\n'.join(items))
|
||||||
|
# Engine
|
||||||
parts.append(')')
|
parts.append(')')
|
||||||
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
|
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
|
||||||
return '\n'.join(parts)
|
return '\n'.join(parts)
|
||||||
|
@ -211,15 +384,15 @@ class Model(with_metaclass(ModelBase)):
|
||||||
|
|
||||||
- `line`: the TSV-formatted data.
|
- `line`: the TSV-formatted data.
|
||||||
- `field_names`: names of the model fields in the data.
|
- `field_names`: names of the model fields in the data.
|
||||||
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
|
- `timezone_in_use`: the timezone to use when parsing dates and datetimes. Some fields use their own timezones.
|
||||||
- `database`: if given, sets the database that this instance belongs to.
|
- `database`: if given, sets the database that this instance belongs to.
|
||||||
'''
|
'''
|
||||||
from six import next
|
|
||||||
values = iter(parse_tsv(line))
|
values = iter(parse_tsv(line))
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
for name in field_names:
|
for name in field_names:
|
||||||
field = getattr(cls, name)
|
field = getattr(cls, name)
|
||||||
kwargs[name] = field.to_python(next(values), timezone_in_use)
|
field_timezone = getattr(field, 'timezone', None) or timezone_in_use
|
||||||
|
kwargs[name] = field.to_python(next(values), field_timezone)
|
||||||
|
|
||||||
obj = cls(**kwargs)
|
obj = cls(**kwargs)
|
||||||
if database is not None:
|
if database is not None:
|
||||||
|
@ -235,7 +408,30 @@ class Model(with_metaclass(ModelBase)):
|
||||||
'''
|
'''
|
||||||
data = self.__dict__
|
data = self.__dict__
|
||||||
fields = self.fields(writable=not include_readonly)
|
fields = self.fields(writable=not include_readonly)
|
||||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in iteritems(fields))
|
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields.items())
|
||||||
|
|
||||||
|
def to_tskv(self, include_readonly=True):
|
||||||
|
'''
|
||||||
|
Returns the instance's column keys and values as a tab-separated line. A newline is not included.
|
||||||
|
Fields that were not assigned a value are omitted.
|
||||||
|
|
||||||
|
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||||
|
'''
|
||||||
|
data = self.__dict__
|
||||||
|
fields = self.fields(writable=not include_readonly)
|
||||||
|
parts = []
|
||||||
|
for name, field in fields.items():
|
||||||
|
if data[name] != NO_VALUE:
|
||||||
|
parts.append(name + '=' + field.to_db_string(data[name], quote=False))
|
||||||
|
return '\t'.join(parts)
|
||||||
|
|
||||||
|
def to_db_string(self):
|
||||||
|
'''
|
||||||
|
Returns the instance as a bytestring ready to be inserted into the database.
|
||||||
|
'''
|
||||||
|
s = self.to_tskv(False) if self._has_funcs_as_defaults else self.to_tsv(False)
|
||||||
|
s += '\n'
|
||||||
|
return s.encode('utf-8')
|
||||||
|
|
||||||
def to_dict(self, include_readonly=True, field_names=None):
|
def to_dict(self, include_readonly=True, field_names=None):
|
||||||
'''
|
'''
|
||||||
|
@ -289,7 +485,7 @@ class BufferModel(Model):
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_table_sql(cls, db):
|
def create_table_sql(cls, db):
|
||||||
'''
|
'''
|
||||||
Returns the SQL command for creating a table for this model.
|
Returns the SQL statement for creating a table for this model.
|
||||||
'''
|
'''
|
||||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` AS `%s`.`%s`' % (db.db_name, cls.table_name(), db.db_name,
|
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` AS `%s`.`%s`' % (db.db_name, cls.table_name(), db.db_name,
|
||||||
cls.engine.main_model.table_name())]
|
cls.engine.main_model.table_name())]
|
||||||
|
@ -302,7 +498,7 @@ class MergeModel(Model):
|
||||||
'''
|
'''
|
||||||
Model for Merge engine
|
Model for Merge engine
|
||||||
Predefines virtual _table column an controls that rows can't be inserted to this table type
|
Predefines virtual _table column an controls that rows can't be inserted to this table type
|
||||||
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge
|
||||||
'''
|
'''
|
||||||
readonly = True
|
readonly = True
|
||||||
|
|
||||||
|
@ -311,12 +507,15 @@ class MergeModel(Model):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_table_sql(cls, db):
|
def create_table_sql(cls, db):
|
||||||
|
'''
|
||||||
|
Returns the SQL statement for creating a table for this model.
|
||||||
|
'''
|
||||||
assert isinstance(cls.engine, Merge), "engine must be an instance of engines.Merge"
|
assert isinstance(cls.engine, Merge), "engine must be an instance of engines.Merge"
|
||||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
||||||
cols = []
|
cols = []
|
||||||
for name, field in iteritems(cls.fields()):
|
for name, field in cls.fields().items():
|
||||||
if name != '_table':
|
if name != '_table':
|
||||||
cols.append(' %s %s' % (name, field.get_sql()))
|
cols.append(' %s %s' % (name, field.get_sql(db=db)))
|
||||||
parts.append(',\n'.join(cols))
|
parts.append(',\n'.join(cols))
|
||||||
parts.append(')')
|
parts.append(')')
|
||||||
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
|
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
|
||||||
|
@ -327,10 +526,14 @@ class MergeModel(Model):
|
||||||
|
|
||||||
class DistributedModel(Model):
|
class DistributedModel(Model):
|
||||||
"""
|
"""
|
||||||
Model for Distributed engine
|
Model class for use with a `Distributed` engine.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def set_database(self, db):
|
def set_database(self, db):
|
||||||
|
'''
|
||||||
|
Sets the `Database` that this model instance belongs to.
|
||||||
|
This is done automatically when the instance is read from the database or written to it.
|
||||||
|
'''
|
||||||
assert isinstance(self.engine, Distributed), "engine must be an instance of engines.Distributed"
|
assert isinstance(self.engine, Distributed), "engine must be an instance of engines.Distributed"
|
||||||
res = super(DistributedModel, self).set_database(db)
|
res = super(DistributedModel, self).set_database(db)
|
||||||
return res
|
return res
|
||||||
|
@ -388,6 +591,9 @@ class DistributedModel(Model):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_table_sql(cls, db):
|
def create_table_sql(cls, db):
|
||||||
|
'''
|
||||||
|
Returns the SQL statement for creating a table for this model.
|
||||||
|
'''
|
||||||
assert isinstance(cls.engine, Distributed), "engine must be engines.Distributed instance"
|
assert isinstance(cls.engine, Distributed), "engine must be engines.Distributed instance"
|
||||||
|
|
||||||
cls.fix_engine_table()
|
cls.fix_engine_table()
|
||||||
|
@ -397,3 +603,7 @@ class DistributedModel(Model):
|
||||||
db.db_name, cls.table_name(), cls.engine.table_name),
|
db.db_name, cls.table_name(), cls.engine.table_name),
|
||||||
'ENGINE = ' + cls.engine.create_table_sql(db)]
|
'ENGINE = ' + cls.engine.create_table_sql(db)]
|
||||||
return '\n'.join(parts)
|
return '\n'.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = get_subclass_names(locals(), (Model, Constraint, Index))
|
||||||
|
|
|
@ -1,17 +1,14 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import six
|
|
||||||
import pytz
|
import pytz
|
||||||
from copy import copy, deepcopy
|
from copy import copy, deepcopy
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
from datetime import date, datetime
|
||||||
from .engines import CollapsingMergeTree
|
from .utils import comma_join, string_or_func, arg_to_sql
|
||||||
from .utils import comma_join
|
|
||||||
|
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# - check that field names are valid
|
# - check that field names are valid
|
||||||
# - operators for arrays: length, has, empty
|
|
||||||
|
|
||||||
class Operator(object):
|
class Operator(object):
|
||||||
"""
|
"""
|
||||||
|
@ -25,6 +22,12 @@ class Operator(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError # pragma: no cover
|
raise NotImplementedError # pragma: no cover
|
||||||
|
|
||||||
|
def _value_to_sql(self, field, value, quote=True):
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
if isinstance(value, F):
|
||||||
|
return value.to_sql()
|
||||||
|
return field.to_db_string(field.to_python(value, pytz.utc), quote)
|
||||||
|
|
||||||
|
|
||||||
class SimpleOperator(Operator):
|
class SimpleOperator(Operator):
|
||||||
"""
|
"""
|
||||||
|
@ -37,7 +40,7 @@ class SimpleOperator(Operator):
|
||||||
|
|
||||||
def to_sql(self, model_cls, field_name, value):
|
def to_sql(self, model_cls, field_name, value):
|
||||||
field = getattr(model_cls, field_name)
|
field = getattr(model_cls, field_name)
|
||||||
value = field.to_db_string(field.to_python(value, pytz.utc))
|
value = self._value_to_sql(field, value)
|
||||||
if value == '\\N' and self._sql_for_null is not None:
|
if value == '\\N' and self._sql_for_null is not None:
|
||||||
return ' '.join([field_name, self._sql_for_null])
|
return ' '.join([field_name, self._sql_for_null])
|
||||||
return ' '.join([field_name, self._sql_operator, value])
|
return ' '.join([field_name, self._sql_operator, value])
|
||||||
|
@ -56,10 +59,10 @@ class InOperator(Operator):
|
||||||
field = getattr(model_cls, field_name)
|
field = getattr(model_cls, field_name)
|
||||||
if isinstance(value, QuerySet):
|
if isinstance(value, QuerySet):
|
||||||
value = value.as_sql()
|
value = value.as_sql()
|
||||||
elif isinstance(value, six.string_types):
|
elif isinstance(value, str):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value])
|
value = comma_join([self._value_to_sql(field, v) for v in value])
|
||||||
return '%s IN (%s)' % (field_name, value)
|
return '%s IN (%s)' % (field_name, value)
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,7 +78,7 @@ class LikeOperator(Operator):
|
||||||
|
|
||||||
def to_sql(self, model_cls, field_name, value):
|
def to_sql(self, model_cls, field_name, value):
|
||||||
field = getattr(model_cls, field_name)
|
field = getattr(model_cls, field_name)
|
||||||
value = field.to_db_string(field.to_python(value, pytz.utc), quote=False)
|
value = self._value_to_sql(field, value, quote=False)
|
||||||
value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_')
|
value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_')
|
||||||
pattern = self._pattern.format(value)
|
pattern = self._pattern.format(value)
|
||||||
if self._case_sensitive:
|
if self._case_sensitive:
|
||||||
|
@ -91,7 +94,7 @@ class IExactOperator(Operator):
|
||||||
|
|
||||||
def to_sql(self, model_cls, field_name, value):
|
def to_sql(self, model_cls, field_name, value):
|
||||||
field = getattr(model_cls, field_name)
|
field = getattr(model_cls, field_name)
|
||||||
value = field.to_db_string(field.to_python(value, pytz.utc))
|
value = self._value_to_sql(field, value)
|
||||||
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
|
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,10 +123,8 @@ class BetweenOperator(Operator):
|
||||||
|
|
||||||
def to_sql(self, model_cls, field_name, value):
|
def to_sql(self, model_cls, field_name, value):
|
||||||
field = getattr(model_cls, field_name)
|
field = getattr(model_cls, field_name)
|
||||||
value0 = field.to_db_string(
|
value0 = self._value_to_sql(field, value[0]) if value[0] is not None or len(str(value[0])) > 0 else None
|
||||||
field.to_python(value[0], pytz.utc)) if value[0] is not None or len(str(value[0])) > 0 else None
|
value1 = self._value_to_sql(field, value[1]) if value[1] is not None or len(str(value[1])) > 0 else None
|
||||||
value1 = field.to_db_string(
|
|
||||||
field.to_python(value[1], pytz.utc)) if value[1] is not None or len(str(value[1])) > 0 else None
|
|
||||||
if value0 and value1:
|
if value0 and value1:
|
||||||
return '%s BETWEEN %s AND %s' % (field_name, value0, value1)
|
return '%s BETWEEN %s AND %s' % (field_name, value0, value1)
|
||||||
if value0 and not value1:
|
if value0 and not value1:
|
||||||
|
@ -156,11 +157,19 @@ register_operator('iendswith', LikeOperator('%{}', False))
|
||||||
register_operator('iexact', IExactOperator())
|
register_operator('iexact', IExactOperator())
|
||||||
|
|
||||||
|
|
||||||
class FOV(object):
|
class Cond(object):
|
||||||
"""
|
"""
|
||||||
An object for storing Field + Operator + Value.
|
An abstract object for storing a single query condition Field + Operator + Value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def to_sql(self, model_cls):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class FieldCond(Cond):
|
||||||
|
"""
|
||||||
|
A single query condition made up of Field + Operator + Value.
|
||||||
|
"""
|
||||||
def __init__(self, field_name, operator, value):
|
def __init__(self, field_name, operator, value):
|
||||||
self._field_name = field_name
|
self._field_name = field_name
|
||||||
self._operator = _operators.get(operator)
|
self._operator = _operators.get(operator)
|
||||||
|
@ -184,8 +193,8 @@ class Q(object):
|
||||||
AND_MODE = 'AND'
|
AND_MODE = 'AND'
|
||||||
OR_MODE = 'OR'
|
OR_MODE = 'OR'
|
||||||
|
|
||||||
def __init__(self, **filter_fields):
|
def __init__(self, *filter_funcs, **filter_fields):
|
||||||
self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(filter_fields)]
|
self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in filter_fields.items()]
|
||||||
self._children = []
|
self._children = []
|
||||||
self._negate = False
|
self._negate = False
|
||||||
self._mode = self.AND_MODE
|
self._mode = self.AND_MODE
|
||||||
|
@ -194,16 +203,16 @@ class Q(object):
|
||||||
def is_empty(self):
|
def is_empty(self):
|
||||||
"""
|
"""
|
||||||
Checks if there are any conditions in Q object
|
Checks if there are any conditions in Q object
|
||||||
:return: Boolean
|
Returns: Boolean
|
||||||
"""
|
"""
|
||||||
return not bool(self._fovs or self._children)
|
return not bool(self._conds or self._children)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _construct_from(cls, l_child, r_child, mode):
|
def _construct_from(cls, l_child, r_child, mode):
|
||||||
if mode == l_child._mode:
|
if mode == l_child._mode and not l_child._negate:
|
||||||
q = deepcopy(l_child)
|
q = deepcopy(l_child)
|
||||||
q._children.append(deepcopy(r_child))
|
q._children.append(deepcopy(r_child))
|
||||||
elif mode == r_child._mode:
|
elif mode == r_child._mode and not r_child._negate:
|
||||||
q = deepcopy(r_child)
|
q = deepcopy(r_child)
|
||||||
q._children.append(deepcopy(l_child))
|
q._children.append(deepcopy(l_child))
|
||||||
else:
|
else:
|
||||||
|
@ -214,18 +223,18 @@ class Q(object):
|
||||||
|
|
||||||
return q
|
return q
|
||||||
|
|
||||||
def _build_fov(self, key, value):
|
def _build_cond(self, key, value):
|
||||||
if '__' in key:
|
if '__' in key:
|
||||||
field_name, operator = key.rsplit('__', 1)
|
field_name, operator = key.rsplit('__', 1)
|
||||||
else:
|
else:
|
||||||
field_name, operator = key, 'eq'
|
field_name, operator = key, 'eq'
|
||||||
return FOV(field_name, operator, value)
|
return FieldCond(field_name, operator, value)
|
||||||
|
|
||||||
def to_sql(self, model_cls):
|
def to_sql(self, model_cls):
|
||||||
condition_sql = []
|
condition_sql = []
|
||||||
|
|
||||||
if self._fovs:
|
if self._conds:
|
||||||
condition_sql.extend([fov.to_sql(model_cls) for fov in self._fovs])
|
condition_sql.extend([cond.to_sql(model_cls) for cond in self._conds])
|
||||||
|
|
||||||
if self._children:
|
if self._children:
|
||||||
condition_sql.extend([child.to_sql(model_cls) for child in self._children if child])
|
condition_sql.extend([child.to_sql(model_cls) for child in self._children if child])
|
||||||
|
@ -261,7 +270,7 @@ class Q(object):
|
||||||
|
|
||||||
def __deepcopy__(self, memodict={}):
|
def __deepcopy__(self, memodict={}):
|
||||||
q = Q()
|
q = Q()
|
||||||
q._fovs = [deepcopy(fov) for fov in self._fovs]
|
q._conds = [deepcopy(cond) for cond in self._conds]
|
||||||
q._negate = self._negate
|
q._negate = self._negate
|
||||||
q._mode = self._mode
|
q._mode = self._mode
|
||||||
|
|
||||||
|
@ -271,7 +280,6 @@ class Q(object):
|
||||||
return q
|
return q
|
||||||
|
|
||||||
|
|
||||||
@six.python_2_unicode_compatible
|
|
||||||
class QuerySet(object):
|
class QuerySet(object):
|
||||||
"""
|
"""
|
||||||
A queryset is an object that represents a database query using a specific `Model`.
|
A queryset is an object that represents a database query using a specific `Model`.
|
||||||
|
@ -284,6 +292,7 @@ class QuerySet(object):
|
||||||
Initializer. It is possible to create a queryset like this, but the standard
|
Initializer. It is possible to create a queryset like this, but the standard
|
||||||
way is to use `MyModel.objects_in(database)`.
|
way is to use `MyModel.objects_in(database)`.
|
||||||
"""
|
"""
|
||||||
|
self.model = model_cls
|
||||||
self._model_cls = model_cls
|
self._model_cls = model_cls
|
||||||
self._database = database
|
self._database = database
|
||||||
self._order_by = []
|
self._order_by = []
|
||||||
|
@ -293,6 +302,8 @@ class QuerySet(object):
|
||||||
self._grouping_with_totals = False
|
self._grouping_with_totals = False
|
||||||
self._fields = model_cls.fields().keys()
|
self._fields = model_cls.fields().keys()
|
||||||
self._limits = None
|
self._limits = None
|
||||||
|
self._limit_by = None
|
||||||
|
self._limit_by_fields = None
|
||||||
self._distinct = False
|
self._distinct = False
|
||||||
self._final = False
|
self._final = False
|
||||||
|
|
||||||
|
@ -315,12 +326,12 @@ class QuerySet(object):
|
||||||
return self.as_sql()
|
return self.as_sql()
|
||||||
|
|
||||||
def __getitem__(self, s):
|
def __getitem__(self, s):
|
||||||
if isinstance(s, six.integer_types):
|
if isinstance(s, int):
|
||||||
# Single index
|
# Single index
|
||||||
assert s >= 0, 'negative indexes are not supported'
|
assert s >= 0, 'negative indexes are not supported'
|
||||||
qs = copy(self)
|
qs = copy(self)
|
||||||
qs._limits = (s, 1)
|
qs._limits = (s, 1)
|
||||||
return six.next(iter(qs))
|
return next(iter(qs))
|
||||||
else:
|
else:
|
||||||
# Slice
|
# Slice
|
||||||
assert s.step in (None, 1), 'step is not supported in slices'
|
assert s.step in (None, 1), 'step is not supported in slices'
|
||||||
|
@ -332,11 +343,31 @@ class QuerySet(object):
|
||||||
qs._limits = (start, stop - start)
|
qs._limits = (start, stop - start)
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
def limit_by(self, offset_limit, *fields_or_expr):
|
||||||
|
"""
|
||||||
|
Adds a LIMIT BY clause to the query.
|
||||||
|
- `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit).
|
||||||
|
- `fields_or_expr`: the field names or expressions to use in the clause.
|
||||||
|
"""
|
||||||
|
if isinstance(offset_limit, int):
|
||||||
|
# Single limit
|
||||||
|
offset_limit = (0, offset_limit)
|
||||||
|
offset = offset_limit[0]
|
||||||
|
limit = offset_limit[1]
|
||||||
|
assert offset >= 0 and limit >= 0, 'negative limits are not supported'
|
||||||
|
qs = copy(self)
|
||||||
|
qs._limit_by = (offset, limit)
|
||||||
|
qs._limit_by_fields = fields_or_expr
|
||||||
|
return qs
|
||||||
|
|
||||||
def select_fields_as_sql(self):
|
def select_fields_as_sql(self):
|
||||||
"""
|
"""
|
||||||
Returns the selected fields or expressions as a SQL string.
|
Returns the selected fields or expressions as a SQL string.
|
||||||
"""
|
"""
|
||||||
return comma_join('`%s`' % field for field in self._fields) if self._fields else '*'
|
fields = '*'
|
||||||
|
if self._fields:
|
||||||
|
fields = comma_join('`%s`' % field for field in self._fields)
|
||||||
|
return fields
|
||||||
|
|
||||||
def as_sql(self):
|
def as_sql(self):
|
||||||
"""
|
"""
|
||||||
|
@ -344,10 +375,9 @@ class QuerySet(object):
|
||||||
"""
|
"""
|
||||||
distinct = 'DISTINCT ' if self._distinct else ''
|
distinct = 'DISTINCT ' if self._distinct else ''
|
||||||
final = ' FINAL' if self._final else ''
|
final = ' FINAL' if self._final else ''
|
||||||
table_name = self._model_cls.table_name()
|
table_name = '`%s`' % self._model_cls.table_name()
|
||||||
if not self._model_cls.is_system_model():
|
if self._model_cls.is_system_model():
|
||||||
table_name = '`%s`' % table_name
|
table_name = '`system`.' + table_name
|
||||||
|
|
||||||
params = (distinct, self.select_fields_as_sql(), table_name, final)
|
params = (distinct, self.select_fields_as_sql(), table_name, final)
|
||||||
sql = u'SELECT %s%s\nFROM %s%s' % params
|
sql = u'SELECT %s%s\nFROM %s%s' % params
|
||||||
|
|
||||||
|
@ -366,6 +396,10 @@ class QuerySet(object):
|
||||||
if self._order_by:
|
if self._order_by:
|
||||||
sql += '\nORDER BY ' + self.order_by_as_sql()
|
sql += '\nORDER BY ' + self.order_by_as_sql()
|
||||||
|
|
||||||
|
if self._limit_by:
|
||||||
|
sql += '\nLIMIT %d, %d' % self._limit_by
|
||||||
|
sql += ' BY %s' % comma_join(string_or_func(field) for field in self._limit_by_fields)
|
||||||
|
|
||||||
if self._limits:
|
if self._limits:
|
||||||
sql += '\nLIMIT %d, %d' % self._limits
|
sql += '\nLIMIT %d, %d' % self._limits
|
||||||
|
|
||||||
|
@ -376,7 +410,7 @@ class QuerySet(object):
|
||||||
Returns the contents of the query's `ORDER BY` clause as a string.
|
Returns the contents of the query's `ORDER BY` clause as a string.
|
||||||
"""
|
"""
|
||||||
return comma_join([
|
return comma_join([
|
||||||
'%s DESC' % field[1:] if field[0] == '-' else field
|
'%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field)
|
||||||
for field in self._order_by
|
for field in self._order_by
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -420,14 +454,21 @@ class QuerySet(object):
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
def _filter_or_exclude(self, *q, **kwargs):
|
def _filter_or_exclude(self, *q, **kwargs):
|
||||||
|
from .funcs import F
|
||||||
|
|
||||||
inverse = kwargs.pop('_inverse', False)
|
inverse = kwargs.pop('_inverse', False)
|
||||||
prewhere = kwargs.pop('prewhere', False)
|
prewhere = kwargs.pop('prewhere', False)
|
||||||
|
|
||||||
qs = copy(self)
|
qs = copy(self)
|
||||||
|
|
||||||
condition = Q()
|
condition = Q()
|
||||||
for q_obj in q:
|
for arg in q:
|
||||||
condition &= q_obj
|
if isinstance(arg, Q):
|
||||||
|
condition &= arg
|
||||||
|
elif isinstance(arg, F):
|
||||||
|
condition &= Q(arg)
|
||||||
|
else:
|
||||||
|
raise TypeError('Invalid argument "%r" to queryset filter' % arg)
|
||||||
|
|
||||||
if kwargs:
|
if kwargs:
|
||||||
condition &= Q(**kwargs)
|
condition &= Q(**kwargs)
|
||||||
|
@ -497,15 +538,50 @@ class QuerySet(object):
|
||||||
def final(self):
|
def final(self):
|
||||||
"""
|
"""
|
||||||
Adds a FINAL modifier to table, meaning data will be collapsed to final version.
|
Adds a FINAL modifier to table, meaning data will be collapsed to final version.
|
||||||
Can be used with `CollapsingMergeTree` engine only.
|
Can be used with the `CollapsingMergeTree` and `ReplacingMergeTree` engines only.
|
||||||
"""
|
"""
|
||||||
if not isinstance(self._model_cls.engine, CollapsingMergeTree):
|
from .engines import CollapsingMergeTree, ReplacingMergeTree
|
||||||
raise TypeError('final() method can be used only with CollapsingMergeTree engine')
|
if not isinstance(self._model_cls.engine, (CollapsingMergeTree, ReplacingMergeTree)):
|
||||||
|
raise TypeError('final() method can be used only with the CollapsingMergeTree and ReplacingMergeTree engines')
|
||||||
|
|
||||||
qs = copy(self)
|
qs = copy(self)
|
||||||
qs._final = True
|
qs._final = True
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
def delete(self):
|
||||||
|
"""
|
||||||
|
Deletes all records matched by this queryset's conditions.
|
||||||
|
Note that ClickHouse performs deletions in the background, so they are not immediate.
|
||||||
|
"""
|
||||||
|
self._verify_mutation_allowed()
|
||||||
|
conditions = (self._where_q & self._prewhere_q).to_sql(self._model_cls)
|
||||||
|
sql = 'ALTER TABLE $db.`%s` DELETE WHERE %s' % (self._model_cls.table_name(), conditions)
|
||||||
|
self._database.raw(sql)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def update(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Updates all records matched by this queryset's conditions.
|
||||||
|
Keyword arguments specify the field names and expressions to use for the update.
|
||||||
|
Note that ClickHouse performs updates in the background, so they are not immediate.
|
||||||
|
"""
|
||||||
|
assert kwargs, 'No fields specified for update'
|
||||||
|
self._verify_mutation_allowed()
|
||||||
|
fields = comma_join('`%s` = %s' % (name, arg_to_sql(expr)) for name, expr in kwargs.items())
|
||||||
|
conditions = (self._where_q & self._prewhere_q).to_sql(self._model_cls)
|
||||||
|
sql = 'ALTER TABLE $db.`%s` UPDATE %s WHERE %s' % (self._model_cls.table_name(), fields, conditions)
|
||||||
|
self._database.raw(sql)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _verify_mutation_allowed(self):
|
||||||
|
'''
|
||||||
|
Checks that the queryset's state allows mutations. Raises an AssertionError if not.
|
||||||
|
'''
|
||||||
|
assert not self._limits, 'Mutations are not allowed after slicing the queryset'
|
||||||
|
assert not self._limit_by, 'Mutations are not allowed after calling limit_by(...)'
|
||||||
|
assert not self._distinct, 'Mutations are not allowed after calling distinct()'
|
||||||
|
assert not self._final, 'Mutations are not allowed after calling final()'
|
||||||
|
|
||||||
def aggregate(self, *args, **kwargs):
|
def aggregate(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Returns an `AggregateQuerySet` over this query, with `args` serving as
|
Returns an `AggregateQuerySet` over this query, with `args` serving as
|
||||||
|
@ -583,7 +659,7 @@ class AggregateQuerySet(QuerySet):
|
||||||
"""
|
"""
|
||||||
Returns the selected fields or expressions as a SQL string.
|
Returns the selected fields or expressions as a SQL string.
|
||||||
"""
|
"""
|
||||||
return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self._database.select(self.as_sql()) # using an ad-hoc model
|
return self._database.select(self.as_sql()) # using an ad-hoc model
|
||||||
|
@ -600,8 +676,15 @@ class AggregateQuerySet(QuerySet):
|
||||||
"""
|
"""
|
||||||
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
|
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
|
||||||
with aggregate function calculated across all the rows. More information:
|
with aggregate function calculated across all the rows. More information:
|
||||||
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier
|
https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
|
||||||
"""
|
"""
|
||||||
qs = copy(self)
|
qs = copy(self)
|
||||||
qs._grouping_with_totals = True
|
qs._grouping_with_totals = True
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
def _verify_mutation_allowed(self):
|
||||||
|
raise AssertionError('Cannot mutate an AggregateQuerySet')
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = [c.__name__ for c in [Q, QuerySet, AggregateQuerySet]]
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
"""
|
"""
|
||||||
This file contains system readonly models that can be got from the database
|
This file contains system readonly models that can be got from the database
|
||||||
https://clickhouse.yandex/docs/en/system_tables/
|
https://clickhouse.tech/docs/en/system_tables/
|
||||||
"""
|
"""
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from six import string_types
|
|
||||||
|
|
||||||
from .database import Database
|
from .database import Database
|
||||||
from .fields import *
|
from .fields import *
|
||||||
|
@ -15,7 +14,7 @@ class SystemPart(Model):
|
||||||
"""
|
"""
|
||||||
Contains information about parts of a table in the MergeTree family.
|
Contains information about parts of a table in the MergeTree family.
|
||||||
This model operates only fields, described in the reference. Other fields are ignored.
|
This model operates only fields, described in the reference. Other fields are ignored.
|
||||||
https://clickhouse.yandex/docs/en/system_tables/system.parts/
|
https://clickhouse.tech/docs/en/system_tables/system.parts/
|
||||||
"""
|
"""
|
||||||
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
||||||
|
|
||||||
|
@ -28,7 +27,7 @@ class SystemPart(Model):
|
||||||
partition = StringField() # Name of the partition, in the format YYYYMM.
|
partition = StringField() # Name of the partition, in the format YYYYMM.
|
||||||
name = StringField() # Name of the part.
|
name = StringField() # Name of the part.
|
||||||
|
|
||||||
# This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts),
|
# This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts),
|
||||||
# but is absent in ClickHouse (in version 1.1.54245)
|
# but is absent in ClickHouse (in version 1.1.54245)
|
||||||
# replicated = UInt8Field() # Whether the part belongs to replicated data.
|
# replicated = UInt8Field() # Whether the part belongs to replicated data.
|
||||||
|
|
||||||
|
@ -52,19 +51,21 @@ class SystemPart(Model):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def table_name(cls):
|
def table_name(cls):
|
||||||
return 'system.parts'
|
return 'parts'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Next methods return SQL for some operations, which can be done with partitions
|
Next methods return SQL for some operations, which can be done with partitions
|
||||||
https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
|
https://clickhouse.tech/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
|
||||||
"""
|
"""
|
||||||
def _partition_operation_sql(self, operation, settings=None, from_part=None):
|
def _partition_operation_sql(self, operation, settings=None, from_part=None):
|
||||||
"""
|
"""
|
||||||
Performs some operation over partition
|
Performs some operation over partition
|
||||||
:param db: Database object to execute operation on
|
|
||||||
:param operation: Operation to execute from SystemPart.OPERATIONS set
|
- `db`: Database object to execute operation on
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
- `operation`: Operation to execute from SystemPart.OPERATIONS set
|
||||||
:return: Operation execution result
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: Operation execution result
|
||||||
"""
|
"""
|
||||||
operation = operation.upper()
|
operation = operation.upper()
|
||||||
assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS)
|
assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS)
|
||||||
|
@ -77,41 +78,51 @@ class SystemPart(Model):
|
||||||
def detach(self, settings=None):
|
def detach(self, settings=None):
|
||||||
"""
|
"""
|
||||||
Move a partition to the 'detached' directory and forget it.
|
Move a partition to the 'detached' directory and forget it.
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
|
||||||
:return: SQL Query
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: SQL Query
|
||||||
"""
|
"""
|
||||||
return self._partition_operation_sql('DETACH', settings=settings)
|
return self._partition_operation_sql('DETACH', settings=settings)
|
||||||
|
|
||||||
def drop(self, settings=None):
|
def drop(self, settings=None):
|
||||||
"""
|
"""
|
||||||
Delete a partition
|
Delete a partition
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
|
||||||
:return: SQL Query
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: SQL Query
|
||||||
"""
|
"""
|
||||||
return self._partition_operation_sql('DROP', settings=settings)
|
return self._partition_operation_sql('DROP', settings=settings)
|
||||||
|
|
||||||
def attach(self, settings=None):
|
def attach(self, settings=None):
|
||||||
"""
|
"""
|
||||||
Add a new part or partition from the 'detached' directory to the table.
|
Add a new part or partition from the 'detached' directory to the table.
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
|
||||||
:return: SQL Query
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: SQL Query
|
||||||
"""
|
"""
|
||||||
return self._partition_operation_sql('ATTACH', settings=settings)
|
return self._partition_operation_sql('ATTACH', settings=settings)
|
||||||
|
|
||||||
def freeze(self, settings=None):
|
def freeze(self, settings=None):
|
||||||
"""
|
"""
|
||||||
Create a backup of a partition.
|
Create a backup of a partition.
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
|
||||||
:return: SQL Query
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: SQL Query
|
||||||
"""
|
"""
|
||||||
return self._partition_operation_sql('FREEZE', settings=settings)
|
return self._partition_operation_sql('FREEZE', settings=settings)
|
||||||
|
|
||||||
def fetch(self, zookeeper_path, settings=None):
|
def fetch(self, zookeeper_path, settings=None):
|
||||||
"""
|
"""
|
||||||
Download a partition from another server.
|
Download a partition from another server.
|
||||||
:param zookeeper_path: Path in zookeeper to fetch from
|
|
||||||
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
- `zookeeper_path`: Path in zookeeper to fetch from
|
||||||
:return: SQL Query
|
- `settings`: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
|
||||||
|
Returns: SQL Query
|
||||||
"""
|
"""
|
||||||
return self._partition_operation_sql('FETCH', settings=settings, from_part=zookeeper_path)
|
return self._partition_operation_sql('FETCH', settings=settings, from_part=zookeeper_path)
|
||||||
|
|
||||||
|
@ -119,27 +130,35 @@ class SystemPart(Model):
|
||||||
def get(cls, database, conditions=""):
|
def get(cls, database, conditions=""):
|
||||||
"""
|
"""
|
||||||
Get all data from system.parts table
|
Get all data from system.parts table
|
||||||
:param database: A database object to fetch data from.
|
|
||||||
:param conditions: WHERE clause conditions. Database condition is added automatically
|
- `database`: A database object to fetch data from.
|
||||||
:return: A list of SystemPart objects
|
- `conditions`: WHERE clause conditions. Database condition is added automatically
|
||||||
|
|
||||||
|
Returns: A list of SystemPart objects
|
||||||
"""
|
"""
|
||||||
assert isinstance(database, Database), "database must be database.Database class instance"
|
assert isinstance(database, Database), "database must be database.Database class instance"
|
||||||
assert isinstance(conditions, string_types), "conditions must be a string"
|
assert isinstance(conditions, str), "conditions must be a string"
|
||||||
if conditions:
|
if conditions:
|
||||||
conditions += " AND"
|
conditions += " AND"
|
||||||
field_names = ','.join(cls.fields())
|
field_names = ','.join(cls.fields())
|
||||||
return database.select("SELECT %s FROM %s WHERE %s database='%s'" %
|
return database.select("SELECT %s FROM `system`.%s WHERE %s database='%s'" %
|
||||||
(field_names, cls.table_name(), conditions, database.db_name), model_class=cls)
|
(field_names, cls.table_name(), conditions, database.db_name), model_class=cls)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_active(cls, database, conditions=""):
|
def get_active(cls, database, conditions=""):
|
||||||
"""
|
"""
|
||||||
Gets active data from system.parts table
|
Gets active data from system.parts table
|
||||||
:param database: A database object to fetch data from.
|
|
||||||
:param conditions: WHERE clause conditions. Database and active conditions are added automatically
|
- `database`: A database object to fetch data from.
|
||||||
:return: A list of SystemPart objects
|
- `conditions`: WHERE clause conditions. Database and active conditions are added automatically
|
||||||
|
|
||||||
|
Returns: A list of SystemPart objects
|
||||||
"""
|
"""
|
||||||
if conditions:
|
if conditions:
|
||||||
conditions += ' AND '
|
conditions += ' AND '
|
||||||
conditions += 'active'
|
conditions += 'active'
|
||||||
return SystemPart.get(database, conditions=conditions)
|
return SystemPart.get(database, conditions=conditions)
|
||||||
|
|
||||||
|
|
||||||
|
# Expose only relevant classes in import *
|
||||||
|
__all__ = [c.__name__ for c in [SystemPart]]
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
from six import string_types, binary_type, text_type, PY3
|
|
||||||
import codecs
|
import codecs
|
||||||
import re
|
import re
|
||||||
|
from datetime import date, datetime, tzinfo, timedelta
|
||||||
|
|
||||||
|
|
||||||
SPECIAL_CHARS = {
|
SPECIAL_CHARS = {
|
||||||
|
@ -28,19 +27,57 @@ def escape(value, quote=True):
|
||||||
def escape_one(match):
|
def escape_one(match):
|
||||||
return SPECIAL_CHARS[match.group(0)]
|
return SPECIAL_CHARS[match.group(0)]
|
||||||
|
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, str):
|
||||||
value = SPECIAL_CHARS_REGEX.sub(escape_one, value)
|
value = SPECIAL_CHARS_REGEX.sub(escape_one, value)
|
||||||
if quote:
|
if quote:
|
||||||
value = "'" + value + "'"
|
value = "'" + value + "'"
|
||||||
return text_type(value)
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
def unescape(value):
|
def unescape(value):
|
||||||
return codecs.escape_decode(value)[0].decode('utf-8')
|
return codecs.escape_decode(value)[0].decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def string_or_func(obj):
|
||||||
|
return obj.to_sql() if hasattr(obj, 'to_sql') else obj
|
||||||
|
|
||||||
|
|
||||||
|
def arg_to_sql(arg):
|
||||||
|
"""
|
||||||
|
Converts a function argument to SQL string according to its type.
|
||||||
|
Supports functions, model fields, strings, dates, datetimes, timedeltas, booleans,
|
||||||
|
None, numbers, timezones, arrays/iterables.
|
||||||
|
"""
|
||||||
|
from infi.clickhouse_orm import Field, StringField, DateTimeField, DateField, F, QuerySet
|
||||||
|
if isinstance(arg, F):
|
||||||
|
return arg.to_sql()
|
||||||
|
if isinstance(arg, Field):
|
||||||
|
return "`%s`" % arg
|
||||||
|
if isinstance(arg, str):
|
||||||
|
return StringField().to_db_string(arg)
|
||||||
|
if isinstance(arg, datetime):
|
||||||
|
return "toDateTime(%s)" % DateTimeField().to_db_string(arg)
|
||||||
|
if isinstance(arg, date):
|
||||||
|
return "toDate('%s')" % arg.isoformat()
|
||||||
|
if isinstance(arg, timedelta):
|
||||||
|
return "toIntervalSecond(%d)" % int(arg.total_seconds())
|
||||||
|
if isinstance(arg, bool):
|
||||||
|
return str(int(arg))
|
||||||
|
if isinstance(arg, tzinfo):
|
||||||
|
return StringField().to_db_string(arg.tzname(None))
|
||||||
|
if arg is None:
|
||||||
|
return 'NULL'
|
||||||
|
if isinstance(arg, QuerySet):
|
||||||
|
return "(%s)" % arg
|
||||||
|
if isinstance(arg, tuple):
|
||||||
|
return '(' + comma_join(arg_to_sql(x) for x in arg) + ')'
|
||||||
|
if is_iterable(arg):
|
||||||
|
return '[' + comma_join(arg_to_sql(x) for x in arg) + ']'
|
||||||
|
return str(arg)
|
||||||
|
|
||||||
|
|
||||||
def parse_tsv(line):
|
def parse_tsv(line):
|
||||||
if PY3 and isinstance(line, binary_type):
|
if isinstance(line, bytes):
|
||||||
line = line.decode()
|
line = line.decode()
|
||||||
if line and line[-1] == '\n':
|
if line and line[-1] == '\n':
|
||||||
line = line[:-1]
|
line = line[:-1]
|
||||||
|
@ -49,19 +86,19 @@ def parse_tsv(line):
|
||||||
|
|
||||||
def parse_array(array_string):
|
def parse_array(array_string):
|
||||||
"""
|
"""
|
||||||
Parse an array string as returned by clickhouse. For example:
|
Parse an array or tuple string as returned by clickhouse. For example:
|
||||||
"['hello', 'world']" ==> ["hello", "world"]
|
"['hello', 'world']" ==> ["hello", "world"]
|
||||||
"[1,2,3]" ==> [1, 2, 3]
|
"(1,2,3)" ==> [1, 2, 3]
|
||||||
"""
|
"""
|
||||||
# Sanity check
|
# Sanity check
|
||||||
if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']':
|
if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])':
|
||||||
raise ValueError('Invalid array string: "%s"' % array_string)
|
raise ValueError('Invalid array string: "%s"' % array_string)
|
||||||
# Drop opening brace
|
# Drop opening brace
|
||||||
array_string = array_string[1:]
|
array_string = array_string[1:]
|
||||||
# Go over the string, lopping off each value at the beginning until nothing is left
|
# Go over the string, lopping off each value at the beginning until nothing is left
|
||||||
values = []
|
values = []
|
||||||
while True:
|
while True:
|
||||||
if array_string == ']':
|
if array_string in '])':
|
||||||
# End of array
|
# End of array
|
||||||
return values
|
return values
|
||||||
elif array_string[0] in ', ':
|
elif array_string[0] in ', ':
|
||||||
|
@ -93,8 +130,38 @@ def import_submodules(package_name):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def comma_join(items):
|
def comma_join(items, stringify=False):
|
||||||
"""
|
"""
|
||||||
Joins an iterable of strings with commas.
|
Joins an iterable of strings with commas.
|
||||||
"""
|
"""
|
||||||
return ', '.join(items)
|
if stringify:
|
||||||
|
return ', '.join(str(item) for item in items)
|
||||||
|
else:
|
||||||
|
return ', '.join(items)
|
||||||
|
|
||||||
|
|
||||||
|
def is_iterable(obj):
|
||||||
|
"""
|
||||||
|
Checks if the given object is iterable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
iter(obj)
|
||||||
|
return True
|
||||||
|
except TypeError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_subclass_names(locals, base_class):
|
||||||
|
from inspect import isclass
|
||||||
|
return [c.__name__ for c in locals.values() if isclass(c) and issubclass(c, base_class)]
|
||||||
|
|
||||||
|
|
||||||
|
class NoValue:
|
||||||
|
'''
|
||||||
|
A sentinel for fields with an expression for a default value,
|
||||||
|
that were not assigned a value yet.
|
||||||
|
'''
|
||||||
|
def __repr__(self):
|
||||||
|
return 'NO_VALUE'
|
||||||
|
|
||||||
|
NO_VALUE = NoValue()
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
|
@ -21,6 +20,10 @@ class TestCaseWithData(unittest.TestCase):
|
||||||
self.database.drop_table(Person)
|
self.database.drop_table(Person)
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def _insert_all(self):
|
||||||
|
self.database.insert(self._sample_data())
|
||||||
|
self.assertTrue(self.database.count(Person))
|
||||||
|
|
||||||
def _insert_and_check(self, data, count, batch_size=1000):
|
def _insert_and_check(self, data, count, batch_size=1000):
|
||||||
self.database.insert(data, batch_size=batch_size)
|
self.database.insert(data, batch_size=batch_size)
|
||||||
self.assertEqual(count, self.database.count(Person))
|
self.assertEqual(count, self.database.count(Person))
|
||||||
|
@ -32,10 +35,11 @@ class TestCaseWithData(unittest.TestCase):
|
||||||
yield Person(**entry)
|
yield Person(**entry)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Person(Model):
|
class Person(Model):
|
||||||
|
|
||||||
first_name = StringField()
|
first_name = StringField()
|
||||||
last_name = StringField()
|
last_name = LowCardinalityField(StringField())
|
||||||
birthday = DateField()
|
birthday = DateField()
|
||||||
height = Float32Field()
|
height = Float32Field()
|
||||||
passport = NullableField(UInt32Field())
|
passport = NullableField(UInt32Field())
|
||||||
|
|
7
tests/sample_migrations/0015.py
Normal file
7
tests/sample_migrations/0015.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterTable(Model4_compressed),
|
||||||
|
migrations.AlterTable(Model2LowCardinality)
|
||||||
|
]
|
6
tests/sample_migrations/0016.py
Normal file
6
tests/sample_migrations/0016.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateTable(ModelWithConstraints)
|
||||||
|
]
|
6
tests/sample_migrations/0017.py
Normal file
6
tests/sample_migrations/0017.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterConstraints(ModelWithConstraints2)
|
||||||
|
]
|
6
tests/sample_migrations/0018.py
Normal file
6
tests/sample_migrations/0018.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateTable(ModelWithIndex)
|
||||||
|
]
|
6
tests/sample_migrations/0019.py
Normal file
6
tests/sample_migrations/0019.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterIndexes(ModelWithIndex2, reindex=True)
|
||||||
|
]
|
|
@ -1,14 +1,14 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model, NO_VALUE
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
|
||||||
|
|
||||||
class MaterializedFieldsTest(unittest.TestCase):
|
class AliasFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.database = Database('test-db', log_statements=True)
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
@ -25,7 +25,7 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.database.insert([instance])
|
self.database.insert([instance])
|
||||||
# We can't select * from table, as it doesn't select materialized and alias fields
|
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||||
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
|
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str, alias_func' \
|
||||||
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
|
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
|
||||||
for model_cls in (ModelWithAliasFields, None):
|
for model_cls in (ModelWithAliasFields, None):
|
||||||
results = list(self.database.select(query, model_cls))
|
results = list(self.database.select(query, model_cls))
|
||||||
|
@ -36,6 +36,7 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
self.assertEqual(results[0].alias_int, instance.int_field)
|
self.assertEqual(results[0].alias_int, instance.int_field)
|
||||||
self.assertEqual(results[0].alias_str, instance.str_field)
|
self.assertEqual(results[0].alias_str, instance.str_field)
|
||||||
self.assertEqual(results[0].alias_date, instance.date_field)
|
self.assertEqual(results[0].alias_date, instance.date_field)
|
||||||
|
self.assertEqual(results[0].alias_func, 201608)
|
||||||
|
|
||||||
def test_assignment_error(self):
|
def test_assignment_error(self):
|
||||||
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||||
|
@ -55,6 +56,14 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
with self.assertRaises(AssertionError):
|
with self.assertRaises(AssertionError):
|
||||||
StringField(alias='str_field', materialized='str_field')
|
StringField(alias='str_field', materialized='str_field')
|
||||||
|
|
||||||
|
def test_default_value(self):
|
||||||
|
instance = ModelWithAliasFields()
|
||||||
|
self.assertEqual(instance.alias_str, NO_VALUE)
|
||||||
|
# Check that NO_VALUE can be assigned to a field
|
||||||
|
instance.str_field = NO_VALUE
|
||||||
|
# Check that NO_VALUE can be assigned when creating a new instance
|
||||||
|
instance2 = ModelWithAliasFields(**instance.to_dict())
|
||||||
|
|
||||||
|
|
||||||
class ModelWithAliasFields(Model):
|
class ModelWithAliasFields(Model):
|
||||||
int_field = Int32Field()
|
int_field = Int32Field()
|
||||||
|
@ -64,5 +73,6 @@ class ModelWithAliasFields(Model):
|
||||||
alias_str = StringField(alias=u'str_field')
|
alias_str = StringField(alias=u'str_field')
|
||||||
alias_int = Int32Field(alias='int_field')
|
alias_int = Int32Field(alias='int_field')
|
||||||
alias_date = DateField(alias='date_field')
|
alias_date = DateField(alias='date_field')
|
||||||
|
alias_func = Int32Field(alias=F.toYYYYMM(date_field))
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('date_field',))
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.models import BufferModel
|
from infi.clickhouse_orm.models import BufferModel
|
||||||
|
|
123
tests/test_compressed_fields.py
Normal file
123
tests/test_compressed_fields.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
import unittest
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from infi.clickhouse_orm.database import Database
|
||||||
|
from infi.clickhouse_orm.models import Model, NO_VALUE
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.utils import parse_tsv
|
||||||
|
|
||||||
|
|
||||||
|
class CompressedFieldsTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
self.database.create_table(CompressedModel)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_defaults(self):
|
||||||
|
# Check that all fields have their explicit or implicit defaults
|
||||||
|
instance = CompressedModel()
|
||||||
|
self.database.insert([instance])
|
||||||
|
self.assertEqual(instance.date_field, datetime.date(1970, 1, 1))
|
||||||
|
self.assertEqual(instance.datetime_field, datetime.datetime(1970, 1, 1, tzinfo=pytz.utc))
|
||||||
|
self.assertEqual(instance.string_field, 'dozo')
|
||||||
|
self.assertEqual(instance.int64_field, 42)
|
||||||
|
self.assertEqual(instance.float_field, 0)
|
||||||
|
self.assertEqual(instance.nullable_field, None)
|
||||||
|
self.assertEqual(instance.array_field, [])
|
||||||
|
|
||||||
|
def test_assignment(self):
|
||||||
|
# Check that all fields are assigned during construction
|
||||||
|
kwargs = dict(
|
||||||
|
uint64_field=217,
|
||||||
|
date_field=datetime.date(1973, 12, 6),
|
||||||
|
datetime_field=datetime.datetime(2000, 5, 24, 10, 22, tzinfo=pytz.utc),
|
||||||
|
string_field='aloha',
|
||||||
|
int64_field=-50,
|
||||||
|
float_field=3.14,
|
||||||
|
nullable_field=-2.718281,
|
||||||
|
array_field=['123456789123456','','a']
|
||||||
|
)
|
||||||
|
instance = CompressedModel(**kwargs)
|
||||||
|
self.database.insert([instance])
|
||||||
|
for name, value in kwargs.items():
|
||||||
|
self.assertEqual(kwargs[name], getattr(instance, name))
|
||||||
|
|
||||||
|
def test_string_conversion(self):
|
||||||
|
# Check field conversion from string during construction
|
||||||
|
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', nullable_field=None, array_field='[a,b,c]')
|
||||||
|
self.assertEqual(instance.date_field, datetime.date(1973, 12, 6))
|
||||||
|
self.assertEqual(instance.int64_field, 100)
|
||||||
|
self.assertEqual(instance.float_field, 7)
|
||||||
|
self.assertEqual(instance.nullable_field, None)
|
||||||
|
self.assertEqual(instance.array_field, ['a', 'b', 'c'])
|
||||||
|
# Check field conversion from string during assignment
|
||||||
|
instance.int64_field = '99'
|
||||||
|
self.assertEqual(instance.int64_field, 99)
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]')
|
||||||
|
self.assertDictEqual(instance.to_dict(), {
|
||||||
|
"date_field": datetime.date(1973, 12, 6),
|
||||||
|
"int64_field": 100,
|
||||||
|
"float_field": 7.0,
|
||||||
|
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
||||||
|
"alias_field": NO_VALUE,
|
||||||
|
'string_field': 'dozo',
|
||||||
|
'nullable_field': None,
|
||||||
|
'uint64_field': 0,
|
||||||
|
'array_field': ['a','b','c']
|
||||||
|
})
|
||||||
|
self.assertDictEqual(instance.to_dict(include_readonly=False), {
|
||||||
|
"date_field": datetime.date(1973, 12, 6),
|
||||||
|
"int64_field": 100,
|
||||||
|
"float_field": 7.0,
|
||||||
|
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
||||||
|
'string_field': 'dozo',
|
||||||
|
'nullable_field': None,
|
||||||
|
'uint64_field': 0,
|
||||||
|
'array_field': ['a', 'b', 'c']
|
||||||
|
})
|
||||||
|
self.assertDictEqual(
|
||||||
|
instance.to_dict(include_readonly=False, field_names=('int64_field', 'alias_field', 'datetime_field')), {
|
||||||
|
"int64_field": 100,
|
||||||
|
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc)
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_confirm_compression_codec(self):
|
||||||
|
if self.database.server_version < (19, 17):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]')
|
||||||
|
self.database.insert([instance])
|
||||||
|
r = self.database.raw("select name, compression_codec from system.columns where table = '{}' and database='{}' FORMAT TabSeparatedWithNamesAndTypes".format(instance.table_name(), self.database.db_name))
|
||||||
|
lines = r.splitlines()
|
||||||
|
field_names = parse_tsv(lines[0])
|
||||||
|
field_types = parse_tsv(lines[1])
|
||||||
|
data = [tuple(parse_tsv(line)) for line in lines[2:]]
|
||||||
|
self.assertListEqual(data, [('uint64_field', 'CODEC(ZSTD(10))'),
|
||||||
|
('datetime_field', 'CODEC(Delta(4), ZSTD(1))'),
|
||||||
|
('date_field', 'CODEC(Delta(4), ZSTD(22))'),
|
||||||
|
('int64_field', 'CODEC(LZ4)'),
|
||||||
|
('string_field', 'CODEC(LZ4HC(10))'),
|
||||||
|
('nullable_field', 'CODEC(ZSTD(1))'),
|
||||||
|
('array_field', 'CODEC(Delta(2), LZ4HC(0))'),
|
||||||
|
('float_field', 'CODEC(NONE)'),
|
||||||
|
('alias_field', '')])
|
||||||
|
|
||||||
|
|
||||||
|
class CompressedModel(Model):
|
||||||
|
uint64_field = UInt64Field(codec='ZSTD(10)')
|
||||||
|
datetime_field = DateTimeField(codec='Delta,ZSTD')
|
||||||
|
date_field = DateField(codec='Delta(4),ZSTD(22)')
|
||||||
|
int64_field = Int64Field(default=42, codec='LZ4')
|
||||||
|
string_field = StringField(default='dozo', codec='LZ4HC(10)')
|
||||||
|
nullable_field = NullableField(Float32Field(), codec='ZSTD')
|
||||||
|
array_field = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
|
||||||
|
float_field = Float32Field(codec='NONE')
|
||||||
|
alias_field = Float32Field(alias='float_field', codec='ZSTD(4)')
|
||||||
|
|
||||||
|
engine = MergeTree('datetime_field', ('uint64_field', 'datetime_field'))
|
44
tests/test_constraints.py
Normal file
44
tests/test_constraints.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
from .base_test_with_data import Person
|
||||||
|
|
||||||
|
|
||||||
|
class ConstraintsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
if self.database.server_version < (19, 14, 3, 3):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
self.database.create_table(PersonWithConstraints)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_insert_valid_values(self):
|
||||||
|
self.database.insert([
|
||||||
|
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="2000-01-01", height=1.66)
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_insert_invalid_values(self):
|
||||||
|
with self.assertRaises(ServerError) as e:
|
||||||
|
self.database.insert([
|
||||||
|
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="2100-01-01", height=1.66)
|
||||||
|
])
|
||||||
|
self.assertEqual(e.code, 469)
|
||||||
|
self.assertTrue('Constraint `birthday_in_the_past`' in e.message)
|
||||||
|
|
||||||
|
with self.assertRaises(ServerError) as e:
|
||||||
|
self.database.insert([
|
||||||
|
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="1970-01-01", height=3)
|
||||||
|
])
|
||||||
|
self.assertEqual(e.code, 469)
|
||||||
|
self.assertTrue('Constraint `max_height`' in e.message)
|
||||||
|
|
||||||
|
|
||||||
|
class PersonWithConstraints(Person):
|
||||||
|
|
||||||
|
birthday_in_the_past = Constraint(Person.birthday <= F.today())
|
||||||
|
max_height = Constraint(Person.height <= 2.75)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
from infi.clickhouse_orm.fields import Field, Int16Field
|
from infi.clickhouse_orm.fields import Field, Int16Field
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
import datetime
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import ServerError, DatabaseException
|
from infi.clickhouse_orm.database import ServerError, DatabaseException
|
||||||
|
from infi.clickhouse_orm.models import Model
|
||||||
|
from infi.clickhouse_orm.engines import Memory
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
from infi.clickhouse_orm.query import Q
|
||||||
from .base_test_with_data import *
|
from .base_test_with_data import *
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,12 +31,32 @@ class DatabaseTestCase(TestCaseWithData):
|
||||||
def test_insert__medium_batches(self):
|
def test_insert__medium_batches(self):
|
||||||
self._insert_and_check(self._sample_data(), len(data), batch_size=100)
|
self._insert_and_check(self._sample_data(), len(data), batch_size=100)
|
||||||
|
|
||||||
|
def test_insert__funcs_as_default_values(self):
|
||||||
|
if self.database.server_version < (20, 1, 2, 4):
|
||||||
|
raise unittest.SkipTest('Buggy in server versions before 20.1.2.4')
|
||||||
|
class TestModel(Model):
|
||||||
|
a = DateTimeField(default=datetime.datetime(2020, 1, 1))
|
||||||
|
b = DateField(default=F.toDate(a))
|
||||||
|
c = Int32Field(default=7)
|
||||||
|
d = Int32Field(default=c * 5)
|
||||||
|
engine = Memory()
|
||||||
|
self.database.create_table(TestModel)
|
||||||
|
self.database.insert([TestModel()])
|
||||||
|
t = TestModel.objects_in(self.database)[0]
|
||||||
|
self.assertEqual(str(t.b), '2020-01-01')
|
||||||
|
self.assertEqual(t.d, 35)
|
||||||
|
|
||||||
def test_count(self):
|
def test_count(self):
|
||||||
self.database.insert(self._sample_data())
|
self.database.insert(self._sample_data())
|
||||||
self.assertEqual(self.database.count(Person), 100)
|
self.assertEqual(self.database.count(Person), 100)
|
||||||
|
# Conditions as string
|
||||||
self.assertEqual(self.database.count(Person, "first_name = 'Courtney'"), 2)
|
self.assertEqual(self.database.count(Person, "first_name = 'Courtney'"), 2)
|
||||||
self.assertEqual(self.database.count(Person, "birthday > '2000-01-01'"), 22)
|
self.assertEqual(self.database.count(Person, "birthday > '2000-01-01'"), 22)
|
||||||
self.assertEqual(self.database.count(Person, "birthday < '1970-03-01'"), 0)
|
self.assertEqual(self.database.count(Person, "birthday < '1970-03-01'"), 0)
|
||||||
|
# Conditions as expression
|
||||||
|
self.assertEqual(self.database.count(Person, Person.birthday > datetime.date(2000, 1, 1)), 22)
|
||||||
|
# Conditions as Q object
|
||||||
|
self.assertEqual(self.database.count(Person, Q(birthday__gt=datetime.date(2000, 1, 1))), 22)
|
||||||
|
|
||||||
def test_select(self):
|
def test_select(self):
|
||||||
self._insert_and_check(self._sample_data(), len(data))
|
self._insert_and_check(self._sample_data(), len(data))
|
||||||
|
@ -128,8 +153,15 @@ class DatabaseTestCase(TestCaseWithData):
|
||||||
|
|
||||||
def test_pagination_with_conditions(self):
|
def test_pagination_with_conditions(self):
|
||||||
self._insert_and_check(self._sample_data(), len(data))
|
self._insert_and_check(self._sample_data(), len(data))
|
||||||
|
# Conditions as string
|
||||||
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'")
|
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'")
|
||||||
self.assertEqual(page.number_of_objects, 10)
|
self.assertEqual(page.number_of_objects, 10)
|
||||||
|
# Conditions as expression
|
||||||
|
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Person.first_name < 'Ava')
|
||||||
|
self.assertEqual(page.number_of_objects, 10)
|
||||||
|
# Conditions as Q object
|
||||||
|
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Q(first_name__lt='Ava'))
|
||||||
|
self.assertEqual(page.number_of_objects, 10)
|
||||||
|
|
||||||
def test_special_chars(self):
|
def test_special_chars(self):
|
||||||
s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r'
|
s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r'
|
||||||
|
@ -149,8 +181,13 @@ class DatabaseTestCase(TestCaseWithData):
|
||||||
Database(self.database.db_name, username='default', password='wrong')
|
Database(self.database.db_name, username='default', password='wrong')
|
||||||
|
|
||||||
exc = cm.exception
|
exc = cm.exception
|
||||||
self.assertEqual(exc.code, 193)
|
print(exc.code, exc.message)
|
||||||
self.assertTrue(exc.message.startswith('Wrong password for user default'))
|
if exc.code == 193: # ClickHouse version < 20.3
|
||||||
|
self.assertTrue(exc.message.startswith('Wrong password for user default'))
|
||||||
|
elif exc.code == 516: # ClickHouse version >= 20.3
|
||||||
|
self.assertTrue(exc.message.startswith('default: Authentication failed'))
|
||||||
|
else:
|
||||||
|
raise Exception('Unexpected error code - %s %s' % (exc.code, exc.message))
|
||||||
|
|
||||||
def test_nonexisting_db(self):
|
def test_nonexisting_db(self):
|
||||||
db = Database('db_not_here', autocreate=False)
|
db = Database('db_not_here', autocreate=False)
|
||||||
|
@ -209,3 +246,48 @@ class DatabaseTestCase(TestCaseWithData):
|
||||||
# Remove the setting and see that now it works
|
# Remove the setting and see that now it works
|
||||||
self.database.add_setting('max_columns_to_read', None)
|
self.database.add_setting('max_columns_to_read', None)
|
||||||
list(self.database.select('SELECT * from system.tables'))
|
list(self.database.select('SELECT * from system.tables'))
|
||||||
|
|
||||||
|
def test_create_ad_hoc_field(self):
|
||||||
|
# Tests that create_ad_hoc_field works for all column types in the database
|
||||||
|
from infi.clickhouse_orm.models import ModelBase
|
||||||
|
query = "SELECT DISTINCT type FROM system.columns"
|
||||||
|
for row in self.database.select(query):
|
||||||
|
if row.type.startswith('Map'):
|
||||||
|
continue # Not supported yet
|
||||||
|
ModelBase.create_ad_hoc_field(row.type)
|
||||||
|
|
||||||
|
def test_get_model_for_table(self):
|
||||||
|
# Tests that get_model_for_table works for a non-system model
|
||||||
|
model = self.database.get_model_for_table('person')
|
||||||
|
self.assertFalse(model.is_system_model())
|
||||||
|
self.assertFalse(model.is_read_only())
|
||||||
|
self.assertEqual(model.table_name(), 'person')
|
||||||
|
# Read a few records
|
||||||
|
list(model.objects_in(self.database)[:10])
|
||||||
|
# Inserts should work too
|
||||||
|
self.database.insert([
|
||||||
|
model(first_name='aaa', last_name='bbb', height=1.77)
|
||||||
|
])
|
||||||
|
|
||||||
|
def test_get_model_for_table__system(self):
|
||||||
|
# Tests that get_model_for_table works for all system tables
|
||||||
|
query = "SELECT name FROM system.tables WHERE database='system'"
|
||||||
|
for row in self.database.select(query):
|
||||||
|
print(row.name)
|
||||||
|
if row.name in ('distributed_ddl_queue',):
|
||||||
|
continue # Not supported
|
||||||
|
try:
|
||||||
|
model = self.database.get_model_for_table(row.name, system_table=True)
|
||||||
|
except NotImplementedError:
|
||||||
|
continue # Table contains an unsupported field type
|
||||||
|
self.assertTrue(model.is_system_model())
|
||||||
|
self.assertTrue(model.is_read_only())
|
||||||
|
self.assertEqual(model.table_name(), row.name)
|
||||||
|
# Read a few records
|
||||||
|
try:
|
||||||
|
list(model.objects_in(self.database)[:10])
|
||||||
|
except ServerError as e:
|
||||||
|
if 'Not enough privileges' in e.message:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model
|
||||||
|
@ -11,6 +12,8 @@ class DateFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.database = Database('test-db', log_statements=True)
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
if self.database.server_version < (20, 1, 2, 4):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
self.database.create_table(ModelWithDate)
|
self.database.create_table(ModelWithDate)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
|
@ -18,8 +21,17 @@ class DateFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_ad_hoc_model(self):
|
def test_ad_hoc_model(self):
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
ModelWithDate(date_field='2016-08-30', datetime_field='2016-08-30 03:50:00'),
|
ModelWithDate(
|
||||||
ModelWithDate(date_field='2016-08-31', datetime_field='2016-08-31 01:30:00')
|
date_field='2016-08-30',
|
||||||
|
datetime_field='2016-08-30 03:50:00',
|
||||||
|
datetime64_field='2016-08-30 03:50:00.123456',
|
||||||
|
datetime64_3_field='2016-08-30 03:50:00.123456'
|
||||||
|
),
|
||||||
|
ModelWithDate(
|
||||||
|
date_field='2016-08-31',
|
||||||
|
datetime_field='2016-08-31 01:30:00',
|
||||||
|
datetime64_field='2016-08-31 01:30:00.123456',
|
||||||
|
datetime64_3_field='2016-08-31 01:30:00.123456')
|
||||||
])
|
])
|
||||||
|
|
||||||
# toStartOfHour returns DateTime('Asia/Yekaterinburg') in my case, so I test it here to
|
# toStartOfHour returns DateTime('Asia/Yekaterinburg') in my case, so I test it here to
|
||||||
|
@ -33,10 +45,75 @@ class DateFieldsTest(unittest.TestCase):
|
||||||
self.assertEqual(results[1].datetime_field, datetime.datetime(2016, 8, 31, 1, 30, 0, tzinfo=pytz.UTC))
|
self.assertEqual(results[1].datetime_field, datetime.datetime(2016, 8, 31, 1, 30, 0, tzinfo=pytz.UTC))
|
||||||
self.assertEqual(results[1].hour_start, datetime.datetime(2016, 8, 31, 1, 0, 0, tzinfo=pytz.UTC))
|
self.assertEqual(results[1].hour_start, datetime.datetime(2016, 8, 31, 1, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
|
||||||
|
self.assertEqual(results[0].datetime64_field, datetime.datetime(2016, 8, 30, 3, 50, 0, 123456, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[0].datetime64_3_field, datetime.datetime(2016, 8, 30, 3, 50, 0, 123000,
|
||||||
|
tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime64_field, datetime.datetime(2016, 8, 31, 1, 30, 0, 123456, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime64_3_field, datetime.datetime(2016, 8, 31, 1, 30, 0, 123000,
|
||||||
|
tzinfo=pytz.UTC))
|
||||||
|
|
||||||
|
|
||||||
class ModelWithDate(Model):
|
class ModelWithDate(Model):
|
||||||
|
|
||||||
date_field = DateField()
|
date_field = DateField()
|
||||||
datetime_field = DateTimeField()
|
datetime_field = DateTimeField()
|
||||||
|
datetime64_field = DateTime64Field()
|
||||||
|
datetime64_3_field = DateTime64Field(precision=3)
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('date_field',))
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithTz(Model):
|
||||||
|
datetime_no_tz_field = DateTimeField() # server tz
|
||||||
|
datetime_tz_field = DateTimeField(timezone='Europe/Madrid')
|
||||||
|
datetime64_tz_field = DateTime64Field(timezone='Europe/Madrid')
|
||||||
|
datetime_utc_field = DateTimeField(timezone=pytz.UTC)
|
||||||
|
|
||||||
|
engine = MergeTree('datetime_no_tz_field', ('datetime_no_tz_field',))
|
||||||
|
|
||||||
|
|
||||||
|
class DateTimeFieldWithTzTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
if self.database.server_version < (20, 1, 2, 4):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
self.database.create_table(ModelWithTz)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_ad_hoc_model(self):
|
||||||
|
self.database.insert([
|
||||||
|
ModelWithTz(
|
||||||
|
datetime_no_tz_field='2020-06-11 04:00:00',
|
||||||
|
datetime_tz_field='2020-06-11 04:00:00',
|
||||||
|
datetime64_tz_field='2020-06-11 04:00:00',
|
||||||
|
datetime_utc_field='2020-06-11 04:00:00',
|
||||||
|
),
|
||||||
|
ModelWithTz(
|
||||||
|
datetime_no_tz_field='2020-06-11 07:00:00+0300',
|
||||||
|
datetime_tz_field='2020-06-11 07:00:00+0300',
|
||||||
|
datetime64_tz_field='2020-06-11 07:00:00+0300',
|
||||||
|
datetime_utc_field='2020-06-11 07:00:00+0300',
|
||||||
|
),
|
||||||
|
])
|
||||||
|
query = 'SELECT * from $db.modelwithtz ORDER BY datetime_no_tz_field'
|
||||||
|
results = list(self.database.select(query))
|
||||||
|
|
||||||
|
self.assertEqual(results[0].datetime_no_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[0].datetime_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[0].datetime64_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[0].datetime_utc_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime_no_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime64_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
self.assertEqual(results[1].datetime_utc_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
|
||||||
|
|
||||||
|
self.assertEqual(results[0].datetime_no_tz_field.tzinfo.zone, self.database.server_timezone.zone)
|
||||||
|
self.assertEqual(results[0].datetime_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
|
||||||
|
self.assertEqual(results[0].datetime64_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
|
||||||
|
self.assertEqual(results[0].datetime_utc_field.tzinfo.zone, pytz.timezone('UTC').zone)
|
||||||
|
self.assertEqual(results[1].datetime_no_tz_field.tzinfo.zone, self.database.server_timezone.zone)
|
||||||
|
self.assertEqual(results[1].datetime_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
|
||||||
|
self.assertEqual(results[1].datetime64_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
|
||||||
|
self.assertEqual(results[1].datetime_utc_field.tzinfo.zone, pytz.timezone('UTC').zone)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|
||||||
|
@ -13,15 +12,11 @@ class DecimalFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.database = Database('test-db', log_statements=True)
|
self.database = Database('test-db', log_statements=True)
|
||||||
self.database.add_setting('allow_experimental_decimal_type', 1)
|
|
||||||
try:
|
try:
|
||||||
self.database.create_table(DecimalModel)
|
self.database.create_table(DecimalModel)
|
||||||
except ServerError as e:
|
except ServerError as e:
|
||||||
if 'Unknown setting' in e.message:
|
# This ClickHouse version does not support decimals yet
|
||||||
# This ClickHouse version does not support decimals yet
|
raise unittest.SkipTest(e.message)
|
||||||
raise unittest.SkipTest(e.message)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
|
|
131
tests/test_dictionaries.py
Normal file
131
tests/test_dictionaries.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
import unittest
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
|
||||||
|
|
||||||
|
class DictionaryTestMixin:
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
if self.database.server_version < (20, 1, 11, 73):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
self._create_dictionary()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def _test_func(self, func, expected_value):
|
||||||
|
sql = 'SELECT %s AS value' % func.to_sql()
|
||||||
|
logging.info(sql)
|
||||||
|
result = list(self.database.select(sql))
|
||||||
|
logging.info('\t==> %s', result[0].value if result else '<empty>')
|
||||||
|
print('Comparing %s to %s' % (result[0].value, expected_value))
|
||||||
|
self.assertEqual(result[0].value, expected_value)
|
||||||
|
return result[0].value if result else None
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleDictionaryTest(DictionaryTestMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
def _create_dictionary(self):
|
||||||
|
# Create a table to be used as source for the dictionary
|
||||||
|
self.database.create_table(NumberName)
|
||||||
|
self.database.insert(
|
||||||
|
NumberName(number=i, name=name)
|
||||||
|
for i, name in enumerate('Zero One Two Three Four Five Six Seven Eight Nine Ten'.split())
|
||||||
|
)
|
||||||
|
# Create the dictionary
|
||||||
|
self.database.raw("""
|
||||||
|
CREATE DICTIONARY numbers_dict(
|
||||||
|
number UInt64,
|
||||||
|
name String DEFAULT '?'
|
||||||
|
)
|
||||||
|
PRIMARY KEY number
|
||||||
|
SOURCE(CLICKHOUSE(
|
||||||
|
HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' DB 'test-db' TABLE 'numbername'
|
||||||
|
))
|
||||||
|
LIFETIME(100)
|
||||||
|
LAYOUT(HASHED());
|
||||||
|
""")
|
||||||
|
self.dict_name = 'test-db.numbers_dict'
|
||||||
|
|
||||||
|
def test_dictget(self):
|
||||||
|
self._test_func(F.dictGet(self.dict_name, 'name', F.toUInt64(3)), 'Three')
|
||||||
|
self._test_func(F.dictGet(self.dict_name, 'name', F.toUInt64(99)), '?')
|
||||||
|
|
||||||
|
def test_dictgetordefault(self):
|
||||||
|
self._test_func(F.dictGetOrDefault(self.dict_name, 'name', F.toUInt64(3), 'n/a'), 'Three')
|
||||||
|
self._test_func(F.dictGetOrDefault(self.dict_name, 'name', F.toUInt64(99), 'n/a'), 'n/a')
|
||||||
|
|
||||||
|
def test_dicthas(self):
|
||||||
|
self._test_func(F.dictHas(self.dict_name, F.toUInt64(3)), 1)
|
||||||
|
self._test_func(F.dictHas(self.dict_name, F.toUInt64(99)), 0)
|
||||||
|
|
||||||
|
|
||||||
|
class HierarchicalDictionaryTest(DictionaryTestMixin, unittest.TestCase):
|
||||||
|
|
||||||
|
def _create_dictionary(self):
|
||||||
|
# Create a table to be used as source for the dictionary
|
||||||
|
self.database.create_table(Region)
|
||||||
|
self.database.insert([
|
||||||
|
Region(region_id=1, parent_region=0, region_name='Russia'),
|
||||||
|
Region(region_id=2, parent_region=1, region_name='Moscow'),
|
||||||
|
Region(region_id=3, parent_region=2, region_name='Center'),
|
||||||
|
Region(region_id=4, parent_region=0, region_name='Great Britain'),
|
||||||
|
Region(region_id=5, parent_region=4, region_name='London'),
|
||||||
|
])
|
||||||
|
# Create the dictionary
|
||||||
|
self.database.raw("""
|
||||||
|
CREATE DICTIONARY regions_dict(
|
||||||
|
region_id UInt64,
|
||||||
|
parent_region UInt64 HIERARCHICAL,
|
||||||
|
region_name String DEFAULT '?'
|
||||||
|
)
|
||||||
|
PRIMARY KEY region_id
|
||||||
|
SOURCE(CLICKHOUSE(
|
||||||
|
HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' DB 'test-db' TABLE 'region'
|
||||||
|
))
|
||||||
|
LIFETIME(100)
|
||||||
|
LAYOUT(HASHED());
|
||||||
|
""")
|
||||||
|
self.dict_name = 'test-db.regions_dict'
|
||||||
|
|
||||||
|
def test_dictget(self):
|
||||||
|
self._test_func(F.dictGet(self.dict_name, 'region_name', F.toUInt64(3)), 'Center')
|
||||||
|
self._test_func(F.dictGet(self.dict_name, 'parent_region', F.toUInt64(3)), 2)
|
||||||
|
self._test_func(F.dictGet(self.dict_name, 'region_name', F.toUInt64(99)), '?')
|
||||||
|
|
||||||
|
def test_dictgetordefault(self):
|
||||||
|
self._test_func(F.dictGetOrDefault(self.dict_name, 'region_name', F.toUInt64(3), 'n/a'), 'Center')
|
||||||
|
self._test_func(F.dictGetOrDefault(self.dict_name, 'region_name', F.toUInt64(99), 'n/a'), 'n/a')
|
||||||
|
|
||||||
|
def test_dicthas(self):
|
||||||
|
self._test_func(F.dictHas(self.dict_name, F.toUInt64(3)), 1)
|
||||||
|
self._test_func(F.dictHas(self.dict_name, F.toUInt64(99)), 0)
|
||||||
|
|
||||||
|
def test_dictgethierarchy(self):
|
||||||
|
self._test_func(F.dictGetHierarchy(self.dict_name, F.toUInt64(3)), [3, 2, 1])
|
||||||
|
self._test_func(F.dictGetHierarchy(self.dict_name, F.toUInt64(99)), [])
|
||||||
|
|
||||||
|
def test_dictisin(self):
|
||||||
|
self._test_func(F.dictIsIn(self.dict_name, F.toUInt64(3), F.toUInt64(1)), 1)
|
||||||
|
self._test_func(F.dictIsIn(self.dict_name, F.toUInt64(3), F.toUInt64(4)), 0)
|
||||||
|
self._test_func(F.dictIsIn(self.dict_name, F.toUInt64(99), F.toUInt64(4)), 0)
|
||||||
|
|
||||||
|
|
||||||
|
class NumberName(Model):
|
||||||
|
''' A table to act as a source for the dictionary '''
|
||||||
|
|
||||||
|
number = UInt64Field()
|
||||||
|
name = StringField()
|
||||||
|
|
||||||
|
engine = Memory()
|
||||||
|
|
||||||
|
|
||||||
|
class Region(Model):
|
||||||
|
|
||||||
|
region_id = UInt64Field()
|
||||||
|
parent_region = UInt64Field()
|
||||||
|
region_name = StringField()
|
||||||
|
|
||||||
|
engine = Memory()
|
|
@ -1,11 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
import datetime
|
||||||
|
|
||||||
from infi.clickhouse_orm.system_models import SystemPart
|
from infi.clickhouse_orm import *
|
||||||
from infi.clickhouse_orm.database import Database, DatabaseException, ServerError
|
|
||||||
from infi.clickhouse_orm.models import Model, MergeModel, DistributedModel
|
|
||||||
from infi.clickhouse_orm.fields import *
|
|
||||||
from infi.clickhouse_orm.engines import *
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logging.getLogger("requests").setLevel(logging.WARNING)
|
logging.getLogger("requests").setLevel(logging.WARNING)
|
||||||
|
@ -21,10 +17,10 @@ class _EnginesHelperTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
class EnginesTestCase(_EnginesHelperTestCase):
|
class EnginesTestCase(_EnginesHelperTestCase):
|
||||||
def _create_and_insert(self, model_class):
|
def _create_and_insert(self, model_class, **kwargs):
|
||||||
self.database.create_table(model_class)
|
self.database.create_table(model_class)
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
model_class(date='2017-01-01', event_id=23423, event_group=13, event_count=7, event_version=1)
|
model_class(date='2017-01-01', event_id=23423, event_group=13, event_count=7, event_version=1, **kwargs)
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_merge_tree(self):
|
def test_merge_tree(self):
|
||||||
|
@ -37,6 +33,11 @@ class EnginesTestCase(_EnginesHelperTestCase):
|
||||||
engine = MergeTree('date', ('date', 'event_id', 'event_group', 'intHash32(event_id)'), sampling_expr='intHash32(event_id)')
|
engine = MergeTree('date', ('date', 'event_id', 'event_group', 'intHash32(event_id)'), sampling_expr='intHash32(event_id)')
|
||||||
self._create_and_insert(TestModel)
|
self._create_and_insert(TestModel)
|
||||||
|
|
||||||
|
def test_merge_tree_with_sampling__funcs(self):
|
||||||
|
class TestModel(SampleModel):
|
||||||
|
engine = MergeTree('date', ('date', 'event_id', 'event_group', F.intHash32(SampleModel.event_id)), sampling_expr=F.intHash32(SampleModel.event_id))
|
||||||
|
self._create_and_insert(TestModel)
|
||||||
|
|
||||||
def test_merge_tree_with_granularity(self):
|
def test_merge_tree_with_granularity(self):
|
||||||
class TestModel(SampleModel):
|
class TestModel(SampleModel):
|
||||||
engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096)
|
engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096)
|
||||||
|
@ -154,7 +155,7 @@ class EnginesTestCase(_EnginesHelperTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
self._create_and_insert(TestModel)
|
self._create_and_insert(TestModel)
|
||||||
self._create_and_insert(TestCollapseModel)
|
self._create_and_insert(TestCollapseModel, sign=1)
|
||||||
|
|
||||||
# Result order may be different, lets sort manually
|
# Result order may be different, lets sort manually
|
||||||
parts = sorted(list(SystemPart.get(self.database)), key=lambda x: x.table)
|
parts = sorted(list(SystemPart.get(self.database)), key=lambda x: x.table)
|
||||||
|
@ -165,6 +166,32 @@ class EnginesTestCase(_EnginesHelperTestCase):
|
||||||
self.assertEqual('testmodel', parts[1].table)
|
self.assertEqual('testmodel', parts[1].table)
|
||||||
self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', ''))
|
self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', ''))
|
||||||
|
|
||||||
|
def test_custom_primary_key(self):
|
||||||
|
if self.database.server_version < (18, 1):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
|
||||||
|
class TestModel(SampleModel):
|
||||||
|
engine = MergeTree(
|
||||||
|
order_by=('date', 'event_id', 'event_group'),
|
||||||
|
partition_key=('toYYYYMM(date)',),
|
||||||
|
primary_key=('date', 'event_id')
|
||||||
|
)
|
||||||
|
|
||||||
|
class TestCollapseModel(SampleModel):
|
||||||
|
sign = Int8Field()
|
||||||
|
|
||||||
|
engine = CollapsingMergeTree(
|
||||||
|
sign_col='sign',
|
||||||
|
order_by=('date', 'event_id', 'event_group'),
|
||||||
|
partition_key=('toYYYYMM(date)',),
|
||||||
|
primary_key=('date', 'event_id')
|
||||||
|
)
|
||||||
|
|
||||||
|
self._create_and_insert(TestModel)
|
||||||
|
self._create_and_insert(TestCollapseModel, sign=1)
|
||||||
|
|
||||||
|
self.assertEqual(2, len(list(SystemPart.get(self.database))))
|
||||||
|
|
||||||
|
|
||||||
class SampleModel(Model):
|
class SampleModel(Model):
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
|
@ -6,10 +5,7 @@ from infi.clickhouse_orm.models import Model
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
|
||||||
try:
|
from enum import Enum
|
||||||
Enum # exists in Python 3.4+
|
|
||||||
except NameError:
|
|
||||||
from enum import Enum # use the enum34 library instead
|
|
||||||
|
|
||||||
|
|
||||||
class EnumFieldsTest(unittest.TestCase):
|
class EnumFieldsTest(unittest.TestCase):
|
||||||
|
@ -25,29 +21,35 @@ class EnumFieldsTest(unittest.TestCase):
|
||||||
def test_insert_and_select(self):
|
def test_insert_and_select(self):
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
||||||
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange)
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange),
|
||||||
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.cherry)
|
||||||
])
|
])
|
||||||
query = 'SELECT * from $table ORDER BY date_field'
|
query = 'SELECT * from $table ORDER BY date_field'
|
||||||
results = list(self.database.select(query, ModelWithEnum))
|
results = list(self.database.select(query, ModelWithEnum))
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 3)
|
||||||
self.assertEqual(results[0].enum_field, Fruit.apple)
|
self.assertEqual(results[0].enum_field, Fruit.apple)
|
||||||
self.assertEqual(results[1].enum_field, Fruit.orange)
|
self.assertEqual(results[1].enum_field, Fruit.orange)
|
||||||
|
self.assertEqual(results[2].enum_field, Fruit.cherry)
|
||||||
|
|
||||||
def test_ad_hoc_model(self):
|
def test_ad_hoc_model(self):
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
||||||
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange)
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange),
|
||||||
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.cherry)
|
||||||
])
|
])
|
||||||
query = 'SELECT * from $db.modelwithenum ORDER BY date_field'
|
query = 'SELECT * from $db.modelwithenum ORDER BY date_field'
|
||||||
results = list(self.database.select(query))
|
results = list(self.database.select(query))
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 3)
|
||||||
self.assertEqual(results[0].enum_field.name, Fruit.apple.name)
|
self.assertEqual(results[0].enum_field.name, Fruit.apple.name)
|
||||||
self.assertEqual(results[0].enum_field.value, Fruit.apple.value)
|
self.assertEqual(results[0].enum_field.value, Fruit.apple.value)
|
||||||
self.assertEqual(results[1].enum_field.name, Fruit.orange.name)
|
self.assertEqual(results[1].enum_field.name, Fruit.orange.name)
|
||||||
self.assertEqual(results[1].enum_field.value, Fruit.orange.value)
|
self.assertEqual(results[1].enum_field.value, Fruit.orange.value)
|
||||||
|
self.assertEqual(results[2].enum_field.name, Fruit.cherry.name)
|
||||||
|
self.assertEqual(results[2].enum_field.value, Fruit.cherry.value)
|
||||||
|
|
||||||
def test_conversion(self):
|
def test_conversion(self):
|
||||||
self.assertEqual(ModelWithEnum(enum_field=3).enum_field, Fruit.orange)
|
self.assertEqual(ModelWithEnum(enum_field=3).enum_field, Fruit.orange)
|
||||||
|
self.assertEqual(ModelWithEnum(enum_field=-7).enum_field, Fruit.cherry)
|
||||||
self.assertEqual(ModelWithEnum(enum_field='apple').enum_field, Fruit.apple)
|
self.assertEqual(ModelWithEnum(enum_field='apple').enum_field, Fruit.apple)
|
||||||
self.assertEqual(ModelWithEnum(enum_field=Fruit.banana).enum_field, Fruit.banana)
|
self.assertEqual(ModelWithEnum(enum_field=Fruit.banana).enum_field, Fruit.banana)
|
||||||
|
|
||||||
|
@ -69,7 +71,7 @@ class EnumFieldsTest(unittest.TestCase):
|
||||||
self.assertEqual(results[0].enum_array, instance.enum_array)
|
self.assertEqual(results[0].enum_array, instance.enum_array)
|
||||||
|
|
||||||
|
|
||||||
Fruit = Enum('Fruit', u'apple banana orange')
|
Fruit = Enum('Fruit', [('apple', 1), ('banana', 2), ('orange', 3), ('cherry', -7)])
|
||||||
|
|
||||||
|
|
||||||
class ModelWithEnum(Model):
|
class ModelWithEnum(Model):
|
||||||
|
@ -86,3 +88,4 @@ class ModelWithEnumArray(Model):
|
||||||
enum_array = ArrayField(Enum16Field(Fruit))
|
enum_array = ArrayField(Enum16Field(Fruit))
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('date_field',))
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
|
|
738
tests/test_funcs.py
Normal file
738
tests/test_funcs.py
Normal file
|
@ -0,0 +1,738 @@
|
||||||
|
import unittest
|
||||||
|
from .base_test_with_data import *
|
||||||
|
from .test_querysets import SampleModel
|
||||||
|
from datetime import date, datetime, tzinfo, timedelta
|
||||||
|
import pytz
|
||||||
|
from ipaddress import IPv4Address, IPv6Address
|
||||||
|
import logging
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
from infi.clickhouse_orm.database import ServerError
|
||||||
|
from infi.clickhouse_orm.utils import NO_VALUE
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
|
||||||
|
|
||||||
|
class FuncsTestCase(TestCaseWithData):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(FuncsTestCase, self).setUp()
|
||||||
|
self.database.insert(self._sample_data())
|
||||||
|
|
||||||
|
def _test_qs(self, qs, expected_count):
|
||||||
|
logging.info(qs.as_sql())
|
||||||
|
count = 0
|
||||||
|
for instance in qs:
|
||||||
|
count += 1
|
||||||
|
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
||||||
|
self.assertEqual(count, expected_count)
|
||||||
|
self.assertEqual(qs.count(), expected_count)
|
||||||
|
|
||||||
|
def _test_func(self, func, expected_value=NO_VALUE):
|
||||||
|
sql = 'SELECT %s AS value' % func.to_sql()
|
||||||
|
logging.info(sql)
|
||||||
|
try:
|
||||||
|
result = list(self.database.select(sql))
|
||||||
|
logging.info('\t==> %s', result[0].value if result else '<empty>')
|
||||||
|
if expected_value != NO_VALUE:
|
||||||
|
print('Comparing %s to %s' % (result[0].value, expected_value))
|
||||||
|
self.assertEqual(result[0].value, expected_value)
|
||||||
|
return result[0].value if result else None
|
||||||
|
except ServerError as e:
|
||||||
|
if 'Unknown function' in e.message:
|
||||||
|
logging.warning(e.message)
|
||||||
|
return # ignore functions that don't exist in the used ClickHouse version
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _test_aggr(self, func, expected_value=NO_VALUE):
|
||||||
|
qs = Person.objects_in(self.database).aggregate(value=func)
|
||||||
|
logging.info(qs.as_sql())
|
||||||
|
try:
|
||||||
|
result = list(qs)
|
||||||
|
logging.info('\t==> %s', result[0].value if result else '<empty>')
|
||||||
|
if expected_value != NO_VALUE:
|
||||||
|
self.assertEqual(result[0].value, expected_value)
|
||||||
|
return result[0].value if result else None
|
||||||
|
except ServerError as e:
|
||||||
|
if 'Unknown function' in e.message:
|
||||||
|
logging.warning(e.message)
|
||||||
|
return # ignore functions that don't exist in the used ClickHouse version
|
||||||
|
raise
|
||||||
|
|
||||||
|
def test_func_to_sql(self):
|
||||||
|
# No args
|
||||||
|
self.assertEqual(F('func').to_sql(), 'func()')
|
||||||
|
# String args
|
||||||
|
self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')")
|
||||||
|
# Numeric args
|
||||||
|
self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)")
|
||||||
|
# Date args
|
||||||
|
self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))")
|
||||||
|
# Datetime args
|
||||||
|
self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))")
|
||||||
|
# Boolean args
|
||||||
|
self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)")
|
||||||
|
# Timezone args
|
||||||
|
self.assertEqual(F('func', pytz.utc).to_sql(), "func('UTC')")
|
||||||
|
self.assertEqual(F('func', pytz.timezone('Europe/Athens')).to_sql(), "func('Europe/Athens')")
|
||||||
|
# Null args
|
||||||
|
self.assertEqual(F('func', None).to_sql(), "func(NULL)")
|
||||||
|
# Fields as args
|
||||||
|
self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)")
|
||||||
|
# Funcs as args
|
||||||
|
self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))')
|
||||||
|
# Iterables as args
|
||||||
|
x = [1, 'z', F('foo', 17)]
|
||||||
|
for y in [x, iter(x)]:
|
||||||
|
self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)")
|
||||||
|
# Tuples as args
|
||||||
|
self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([(1, 2), (3, 4)])")
|
||||||
|
self.assertEqual(F('func', tuple(x), 5).to_sql(), "func((1, 'z', foo(17)), 5)")
|
||||||
|
# Binary operator functions
|
||||||
|
self.assertEqual(F.plus(1, 2).to_sql(), "(1 + 2)")
|
||||||
|
self.assertEqual(F.lessOrEquals(1, 2).to_sql(), "(1 <= 2)")
|
||||||
|
|
||||||
|
def test_filter_float_field(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
# Height > 2
|
||||||
|
self._test_qs(qs.filter(F.greater(Person.height, 2)), 0)
|
||||||
|
self._test_qs(qs.filter(Person.height > 2), 0)
|
||||||
|
# Height > 1.61
|
||||||
|
self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96)
|
||||||
|
self._test_qs(qs.filter(Person.height > 1.61), 96)
|
||||||
|
# Height < 1.61
|
||||||
|
self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4)
|
||||||
|
self._test_qs(qs.filter(Person.height < 1.61), 4)
|
||||||
|
|
||||||
|
def test_filter_date_field(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
# People born on the 30th
|
||||||
|
self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3)
|
||||||
|
self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3)
|
||||||
|
self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3)
|
||||||
|
# People born on Sunday
|
||||||
|
self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18)
|
||||||
|
self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18)
|
||||||
|
self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18)
|
||||||
|
# People born on 1976-10-01
|
||||||
|
self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1)
|
||||||
|
self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1)
|
||||||
|
self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1)
|
||||||
|
|
||||||
|
def test_func_as_field_value(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96)
|
||||||
|
self._test_qs(qs.exclude(birthday=F.today()), 100)
|
||||||
|
self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100)
|
||||||
|
|
||||||
|
def test_in_and_not_in(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
self._test_qs(qs.filter(Person.first_name.isIn(['Ciaran', 'Elton'])), 4)
|
||||||
|
self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
||||||
|
self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96)
|
||||||
|
self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
|
||||||
|
# In subquery
|
||||||
|
subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name)
|
||||||
|
self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4)
|
||||||
|
|
||||||
|
def test_comparison_operators(self):
|
||||||
|
one = F.plus(1, 0)
|
||||||
|
two = F.plus(1, 1)
|
||||||
|
self._test_func(one > one, 0)
|
||||||
|
self._test_func(two > one, 1)
|
||||||
|
self._test_func(one >= two, 0)
|
||||||
|
self._test_func(one >= one, 1)
|
||||||
|
self._test_func(one < one, 0)
|
||||||
|
self._test_func(one < two, 1)
|
||||||
|
self._test_func(two <= one, 0)
|
||||||
|
self._test_func(one <= one, 1)
|
||||||
|
self._test_func(one == two, 0)
|
||||||
|
self._test_func(one == one, 1)
|
||||||
|
self._test_func(one != one, 0)
|
||||||
|
self._test_func(one != two, 1)
|
||||||
|
|
||||||
|
def test_arithmetic_operators(self):
|
||||||
|
one = F.plus(1, 0)
|
||||||
|
two = F.plus(1, 1)
|
||||||
|
# +
|
||||||
|
self._test_func(one + two, 3)
|
||||||
|
self._test_func(one + 2, 3)
|
||||||
|
self._test_func(2 + one, 3)
|
||||||
|
# -
|
||||||
|
self._test_func(one - two, -1)
|
||||||
|
self._test_func(one - 2, -1)
|
||||||
|
self._test_func(1 - two, -1)
|
||||||
|
# *
|
||||||
|
self._test_func(one * two, 2)
|
||||||
|
self._test_func(one * 2, 2)
|
||||||
|
self._test_func(1 * two, 2)
|
||||||
|
# /
|
||||||
|
self._test_func(one / two, 0.5)
|
||||||
|
self._test_func(one / 2, 0.5)
|
||||||
|
self._test_func(1 / two, 0.5)
|
||||||
|
# //
|
||||||
|
self._test_func(one // two, 0)
|
||||||
|
self._test_func(two // one, 2)
|
||||||
|
self._test_func(one // 2, 0)
|
||||||
|
self._test_func(1 // two, 0)
|
||||||
|
# %
|
||||||
|
self._test_func(one % two, 1)
|
||||||
|
self._test_func(one % 2, 1)
|
||||||
|
self._test_func(1 % two, 1)
|
||||||
|
# sign
|
||||||
|
self._test_func(-one, -1)
|
||||||
|
self._test_func(--one, 1)
|
||||||
|
self._test_func(+one, 1)
|
||||||
|
|
||||||
|
def test_logical_operators(self):
|
||||||
|
one = F.plus(1, 0)
|
||||||
|
two = F.plus(1, 1)
|
||||||
|
# &
|
||||||
|
self._test_func(one & two, 1)
|
||||||
|
self._test_func(one & two, 1)
|
||||||
|
self._test_func(one & 0, 0)
|
||||||
|
self._test_func(0 & one, 0)
|
||||||
|
# |
|
||||||
|
self._test_func(one | two, 1)
|
||||||
|
self._test_func(one | 0, 1)
|
||||||
|
self._test_func(0 | one, 1)
|
||||||
|
# ^
|
||||||
|
self._test_func(one ^ one)
|
||||||
|
self._test_func(one ^ 0)
|
||||||
|
self._test_func(0 ^ one)
|
||||||
|
# ~
|
||||||
|
self._test_func(~one, 0)
|
||||||
|
self._test_func(~~one, 1)
|
||||||
|
# compound
|
||||||
|
self._test_func(one & 0 | two, 1)
|
||||||
|
self._test_func(one & 0 & two, 0)
|
||||||
|
self._test_func(one & 0 | 0, 0)
|
||||||
|
self._test_func((one | 0) & two, 1)
|
||||||
|
|
||||||
|
def test_date_functions(self):
|
||||||
|
d = date(2018, 12, 31)
|
||||||
|
dt = datetime(2018, 12, 31, 11, 22, 33)
|
||||||
|
self._test_func(F.toYear(d), 2018)
|
||||||
|
self._test_func(F.toYear(dt), 2018)
|
||||||
|
self._test_func(F.toISOYear(dt, 'Europe/Athens'), 2019) # 2018-12-31 is ISO year 2019, week 1, day 1
|
||||||
|
self._test_func(F.toQuarter(d), 4)
|
||||||
|
self._test_func(F.toQuarter(dt), 4)
|
||||||
|
self._test_func(F.toMonth(d), 12)
|
||||||
|
self._test_func(F.toMonth(dt), 12)
|
||||||
|
self._test_func(F.toWeek(d), 52)
|
||||||
|
self._test_func(F.toWeek(dt), 52)
|
||||||
|
self._test_func(F.toISOWeek(d), 1) # 2018-12-31 is ISO year 2019, week 1, day 1
|
||||||
|
self._test_func(F.toISOWeek(dt), 1)
|
||||||
|
self._test_func(F.toDayOfYear(d), 365)
|
||||||
|
self._test_func(F.toDayOfYear(dt), 365)
|
||||||
|
self._test_func(F.toDayOfMonth(d), 31)
|
||||||
|
self._test_func(F.toDayOfMonth(dt), 31)
|
||||||
|
self._test_func(F.toDayOfWeek(d), 1)
|
||||||
|
self._test_func(F.toDayOfWeek(dt), 1)
|
||||||
|
self._test_func(F.toMinute(dt), 22)
|
||||||
|
self._test_func(F.toSecond(dt), 33)
|
||||||
|
self._test_func(F.toMonday(d), d)
|
||||||
|
self._test_func(F.toMonday(dt), d)
|
||||||
|
self._test_func(F.toStartOfMonth(d), date(2018, 12, 1))
|
||||||
|
self._test_func(F.toStartOfMonth(dt), date(2018, 12, 1))
|
||||||
|
self._test_func(F.toStartOfQuarter(d), date(2018, 10, 1))
|
||||||
|
self._test_func(F.toStartOfQuarter(dt), date(2018, 10, 1))
|
||||||
|
self._test_func(F.toStartOfYear(d), date(2018, 1, 1))
|
||||||
|
self._test_func(F.toStartOfYear(dt), date(2018, 1, 1))
|
||||||
|
self._test_func(F.toStartOfMinute(dt), datetime(2018, 12, 31, 11, 22, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toStartOfFiveMinute(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toStartOfISOYear(dt), date(2018, 12, 31))
|
||||||
|
self._test_func(F.toStartOfTenMinutes(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toStartOfWeek(dt), date(2018, 12, 30))
|
||||||
|
self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toUnixTimestamp(dt, 'UTC'), int(dt.replace(tzinfo=pytz.utc).timestamp()))
|
||||||
|
self._test_func(F.toYYYYMM(d), 201812)
|
||||||
|
self._test_func(F.toYYYYMM(dt), 201812)
|
||||||
|
self._test_func(F.toYYYYMM(dt, 'Europe/Athens'), 201812)
|
||||||
|
self._test_func(F.toYYYYMMDD(d), 20181231)
|
||||||
|
self._test_func(F.toYYYYMMDD(dt), 20181231)
|
||||||
|
self._test_func(F.toYYYYMMDD(dt, 'Europe/Athens'), 20181231)
|
||||||
|
self._test_func(F.toYYYYMMDDhhmmss(d), 20181231000000)
|
||||||
|
self._test_func(F.toYYYYMMDDhhmmss(dt, 'Europe/Athens'), 20181231132233)
|
||||||
|
self._test_func(F.toRelativeYearNum(dt), 2018)
|
||||||
|
self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018)
|
||||||
|
self._test_func(F.toRelativeMonthNum(dt), 2018 * 12 + 12)
|
||||||
|
self._test_func(F.toRelativeMonthNum(dt, 'Europe/Athens'), 2018 * 12 + 12)
|
||||||
|
self._test_func(F.toRelativeWeekNum(dt), 2557)
|
||||||
|
self._test_func(F.toRelativeWeekNum(dt, 'Europe/Athens'), 2557)
|
||||||
|
self._test_func(F.toRelativeDayNum(dt), 17896)
|
||||||
|
self._test_func(F.toRelativeDayNum(dt, 'Europe/Athens'), 17896)
|
||||||
|
self._test_func(F.toRelativeHourNum(dt), 429515)
|
||||||
|
self._test_func(F.toRelativeHourNum(dt, 'Europe/Athens'), 429515)
|
||||||
|
self._test_func(F.toRelativeMinuteNum(dt), 25770922)
|
||||||
|
self._test_func(F.toRelativeMinuteNum(dt, 'Europe/Athens'), 25770922)
|
||||||
|
self._test_func(F.toRelativeSecondNum(dt), 1546255353)
|
||||||
|
self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353)
|
||||||
|
self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)])
|
||||||
|
self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33')
|
||||||
|
self._test_func(F.addDays(d, 7), date(2019, 1, 7))
|
||||||
|
self._test_func(F.addDays(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addHours(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addMinutes(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addMonths(d, 7), date(2019, 7, 31))
|
||||||
|
self._test_func(F.addMonths(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addQuarters(d, 7))
|
||||||
|
self._test_func(F.addQuarters(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addSeconds(d, 7))
|
||||||
|
self._test_func(F.addSeconds(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addWeeks(d, 7))
|
||||||
|
self._test_func(F.addWeeks(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.addYears(d, 7))
|
||||||
|
self._test_func(F.addYears(dt, 7, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractDays(d, 3))
|
||||||
|
self._test_func(F.subtractDays(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractHours(d, 3))
|
||||||
|
self._test_func(F.subtractHours(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractMinutes(d, 3))
|
||||||
|
self._test_func(F.subtractMinutes(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractMonths(d, 3))
|
||||||
|
self._test_func(F.subtractMonths(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractQuarters(d, 3))
|
||||||
|
self._test_func(F.subtractQuarters(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractSeconds(d, 3))
|
||||||
|
self._test_func(F.subtractSeconds(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractWeeks(d, 3))
|
||||||
|
self._test_func(F.subtractWeeks(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.subtractYears(d, 3))
|
||||||
|
self._test_func(F.subtractYears(dt, 3, 'Europe/Athens'))
|
||||||
|
self._test_func(F.now() + F.toIntervalSecond(3) + F.toIntervalMinute(3) + F.toIntervalHour(3) + F.toIntervalDay(3))
|
||||||
|
self._test_func(F.now() + F.toIntervalWeek(3) + F.toIntervalMonth(3) + F.toIntervalQuarter(3) + F.toIntervalYear(3))
|
||||||
|
self._test_func(F.now() + F.toIntervalSecond(3000) - F.toIntervalDay(3000) == F.now() + timedelta(seconds=3000, days=-3000))
|
||||||
|
|
||||||
|
def test_date_functions__utc_only(self):
|
||||||
|
if self.database.server_timezone != pytz.utc:
|
||||||
|
raise unittest.SkipTest('This test must run with UTC as the server timezone')
|
||||||
|
d = date(2018, 12, 31)
|
||||||
|
dt = datetime(2018, 12, 31, 11, 22, 33)
|
||||||
|
athens_tz = pytz.timezone('Europe/Athens')
|
||||||
|
self._test_func(F.toHour(dt), 11)
|
||||||
|
self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toTime(dt, 'Europe/Athens'), athens_tz.localize(datetime(1970, 1, 2, 13, 22, 33)))
|
||||||
|
self._test_func(F.toTime(dt, athens_tz), athens_tz.localize(datetime(1970, 1, 2, 13, 22, 33)))
|
||||||
|
self._test_func(F.toTimeZone(dt, 'Europe/Athens'), athens_tz.localize(datetime(2018, 12, 31, 13, 22, 33)))
|
||||||
|
self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right
|
||||||
|
self._test_func(F.today(), datetime.utcnow().date())
|
||||||
|
self._test_func(F.yesterday(), datetime.utcnow().date() - timedelta(days=1))
|
||||||
|
self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233)
|
||||||
|
self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33')
|
||||||
|
self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.addMinutes(d, 7), datetime(2018, 12, 31, 0, 7, 0, tzinfo=pytz.utc))
|
||||||
|
|
||||||
|
def test_type_conversion_functions(self):
|
||||||
|
for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64):
|
||||||
|
self._test_func(f(17), 17)
|
||||||
|
self._test_func(f('17'), 17)
|
||||||
|
for f in (F.toUInt8OrZero, F.toUInt16OrZero, F.toUInt32OrZero, F.toUInt64OrZero, F.toInt8OrZero, F.toInt16OrZero, F.toInt32OrZero, F.toInt64OrZero, F.toFloat32OrZero, F.toFloat64OrZero):
|
||||||
|
self._test_func(f('17'), 17)
|
||||||
|
self._test_func(f('a'), 0)
|
||||||
|
for f in (F.toDecimal32, F.toDecimal64, F.toDecimal128):
|
||||||
|
self._test_func(f(17.17, 2), Decimal('17.17'))
|
||||||
|
self._test_func(f('17.17', 2), Decimal('17.17'))
|
||||||
|
self._test_func(F.toDate('2018-12-31'), date(2018, 12, 31))
|
||||||
|
self._test_func(F.toString(123), '123')
|
||||||
|
self._test_func(F.toFixedString('123', 5), '123')
|
||||||
|
self._test_func(F.toStringCutToZero('123\0'), '123')
|
||||||
|
self._test_func(F.CAST(17, 'String'), '17')
|
||||||
|
self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM', 'Europe/Athens'))
|
||||||
|
with self.assertRaises(ServerError):
|
||||||
|
self._test_func(F.parseDateTimeBestEffort('foo'))
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM', 'Europe/Athens'))
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrNull('foo'), None)
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM', 'Europe/Athens'))
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrZero('foo'), DateTimeField.class_default)
|
||||||
|
|
||||||
|
def test_type_conversion_functions__utc_only(self):
|
||||||
|
if self.database.server_timezone != pytz.utc:
|
||||||
|
raise unittest.SkipTest('This test must run with UTC as the server timezone')
|
||||||
|
self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.toDateTime64('2018-12-31 11:22:33.001', 6), datetime(2018, 12, 31, 11, 22, 33, 1000, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
|
||||||
|
self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
|
||||||
|
|
||||||
|
def test_string_functions(self):
|
||||||
|
self._test_func(F.empty(''), 1)
|
||||||
|
self._test_func(F.empty('x'), 0)
|
||||||
|
self._test_func(F.notEmpty(''), 0)
|
||||||
|
self._test_func(F.notEmpty('x'), 1)
|
||||||
|
self._test_func(F.length('x'), 1)
|
||||||
|
self._test_func(F.lengthUTF8('x'), 1)
|
||||||
|
self._test_func(F.lower('Ab'), 'ab')
|
||||||
|
self._test_func(F.upper('Ab'), 'AB')
|
||||||
|
self._test_func(F.lowerUTF8('Ab'), 'ab')
|
||||||
|
self._test_func(F.upperUTF8('Ab'), 'AB')
|
||||||
|
self._test_func(F.reverse('Ab'), 'bA')
|
||||||
|
self._test_func(F.reverseUTF8('Ab'), 'bA')
|
||||||
|
self._test_func(F.concat('Ab', 'Cd', 'Ef'), 'AbCdEf')
|
||||||
|
self._test_func(F.substring('123456', 3, 2), '34')
|
||||||
|
self._test_func(F.substringUTF8('123456', 3, 2), '34')
|
||||||
|
self._test_func(F.appendTrailingCharIfAbsent('Hello', '!'), 'Hello!')
|
||||||
|
self._test_func(F.appendTrailingCharIfAbsent('Hello!', '!'), 'Hello!')
|
||||||
|
self._test_func(F.convertCharset(F.convertCharset('Hello', 'latin1', 'utf16'), 'utf16', 'latin1'), 'Hello')
|
||||||
|
self._test_func(F.startsWith('aaa', 'aa'), True)
|
||||||
|
self._test_func(F.startsWith('aaa', 'bb'), False)
|
||||||
|
self._test_func(F.endsWith('aaa', 'aa'), True)
|
||||||
|
self._test_func(F.endsWith('aaa', 'bb'), False)
|
||||||
|
self._test_func(F.trimLeft(' abc '), 'abc ')
|
||||||
|
self._test_func(F.trimRight(' abc '), ' abc')
|
||||||
|
self._test_func(F.trimBoth(' abc '), 'abc')
|
||||||
|
self._test_func(F.CRC32('whoops'), 3361378926)
|
||||||
|
|
||||||
|
def test_string_search_functions(self):
|
||||||
|
self._test_func(F.position('Hello, world!', '!'), 13)
|
||||||
|
self._test_func(F.positionCaseInsensitive('Hello, world!', 'hello'), 1)
|
||||||
|
self._test_func(F.positionUTF8('Привет, мир!', '!'), 12)
|
||||||
|
self._test_func(F.positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'), 9)
|
||||||
|
self._test_func(F.like('Hello, world!', '%ll%'), 1)
|
||||||
|
self._test_func(F.notLike('Hello, world!', '%ll%'), 0)
|
||||||
|
self._test_func(F.match('Hello, world!', '[lmnop]{3}'), 1)
|
||||||
|
self._test_func(F.extract('Hello, world!', '[lmnop]{3}'), 'llo')
|
||||||
|
self._test_func(F.extractAll('Hello, world!', '[a-z]+'), ['ello', 'world'])
|
||||||
|
self._test_func(F.ngramDistance('Hello', 'Hello'), 0)
|
||||||
|
self._test_func(F.ngramDistanceCaseInsensitive('Hello', 'hello'), 0)
|
||||||
|
self._test_func(F.ngramDistanceUTF8('Hello', 'Hello'), 0)
|
||||||
|
self._test_func(F.ngramDistanceCaseInsensitiveUTF8('Hello', 'hello'), 0)
|
||||||
|
self._test_func(F.ngramSearch('Hello', 'Hello'), 1)
|
||||||
|
self._test_func(F.ngramSearchCaseInsensitive('Hello', 'hello'), 1)
|
||||||
|
self._test_func(F.ngramSearchUTF8('Hello', 'Hello'), 1)
|
||||||
|
self._test_func(F.ngramSearchCaseInsensitiveUTF8('Hello', 'hello'), 1)
|
||||||
|
|
||||||
|
def test_base64_functions(self):
|
||||||
|
try:
|
||||||
|
self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello')
|
||||||
|
self._test_func(F.tryBase64Decode(F.base64Encode('Hello')), 'Hello')
|
||||||
|
self._test_func(F.tryBase64Decode(':-)'))
|
||||||
|
except ServerError as e:
|
||||||
|
# ClickHouse version that doesn't support these functions
|
||||||
|
raise unittest.SkipTest(e.message)
|
||||||
|
|
||||||
|
def test_replace_functions(self):
|
||||||
|
haystack = 'hello'
|
||||||
|
self._test_func(F.replace(haystack, 'l', 'L'), 'heLLo')
|
||||||
|
self._test_func(F.replaceAll(haystack, 'l', 'L'), 'heLLo')
|
||||||
|
self._test_func(F.replaceOne(haystack, 'l', 'L'), 'heLlo')
|
||||||
|
self._test_func(F.replaceRegexpAll(haystack, '[eo]', 'X'), 'hXllX')
|
||||||
|
self._test_func(F.replaceRegexpOne(haystack, '[eo]', 'X'), 'hXllo')
|
||||||
|
self._test_func(F.regexpQuoteMeta('[eo]'), '\\[eo\\]')
|
||||||
|
|
||||||
|
def test_math_functions(self):
|
||||||
|
x = 17
|
||||||
|
y = 3
|
||||||
|
self._test_func(F.e())
|
||||||
|
self._test_func(F.pi())
|
||||||
|
self._test_func(F.exp(x))
|
||||||
|
self._test_func(F.exp10(x))
|
||||||
|
self._test_func(F.exp2(x))
|
||||||
|
self._test_func(F.log(x))
|
||||||
|
self._test_func(F.log10(x))
|
||||||
|
self._test_func(F.log2(x))
|
||||||
|
self._test_func(F.ln(x))
|
||||||
|
self._test_func(F.sqrt(x))
|
||||||
|
self._test_func(F.cbrt(x))
|
||||||
|
self._test_func(F.erf(x))
|
||||||
|
self._test_func(F.erfc(x))
|
||||||
|
self._test_func(F.lgamma(x))
|
||||||
|
self._test_func(F.tgamma(x))
|
||||||
|
self._test_func(F.sin(x))
|
||||||
|
self._test_func(F.cos(x))
|
||||||
|
self._test_func(F.tan(x))
|
||||||
|
self._test_func(F.asin(x))
|
||||||
|
self._test_func(F.acos(x))
|
||||||
|
self._test_func(F.atan(x))
|
||||||
|
self._test_func(F.pow(x, y))
|
||||||
|
self._test_func(F.power(x, y))
|
||||||
|
self._test_func(F.intExp10(x))
|
||||||
|
self._test_func(F.intExp2(x))
|
||||||
|
self._test_func(F.intDivOrZero(x, y))
|
||||||
|
self._test_func(F.abs(x))
|
||||||
|
self._test_func(F.gcd(x, y))
|
||||||
|
self._test_func(F.lcm(x, y))
|
||||||
|
|
||||||
|
def test_rounding_functions(self):
|
||||||
|
x = 22.22222
|
||||||
|
n = 3
|
||||||
|
self._test_func(F.floor(x), 22)
|
||||||
|
self._test_func(F.floor(x, n), 22.222)
|
||||||
|
self._test_func(F.ceil(x), 23)
|
||||||
|
self._test_func(F.ceil(x, n), 22.223)
|
||||||
|
self._test_func(F.ceiling(x), 23)
|
||||||
|
self._test_func(F.ceiling(x, n), 22.223)
|
||||||
|
self._test_func(F.round(x), 22)
|
||||||
|
self._test_func(F.round(x, n), 22.222)
|
||||||
|
self._test_func(F.roundAge(x), 18)
|
||||||
|
self._test_func(F.roundDown(x, [10, 20, 30]), 20)
|
||||||
|
self._test_func(F.roundDuration(x), 10)
|
||||||
|
self._test_func(F.roundToExp2(x), 16)
|
||||||
|
|
||||||
|
def test_array_functions(self):
|
||||||
|
arr = [1, 2, 3]
|
||||||
|
self._test_func(F.emptyArrayDate())
|
||||||
|
self._test_func(F.emptyArrayDateTime())
|
||||||
|
self._test_func(F.emptyArrayFloat32())
|
||||||
|
self._test_func(F.emptyArrayFloat64())
|
||||||
|
self._test_func(F.emptyArrayInt16())
|
||||||
|
self._test_func(F.emptyArrayInt32())
|
||||||
|
self._test_func(F.emptyArrayInt64())
|
||||||
|
self._test_func(F.emptyArrayInt8())
|
||||||
|
self._test_func(F.emptyArrayString())
|
||||||
|
self._test_func(F.emptyArrayToSingle(F.emptyArrayInt16()), [0])
|
||||||
|
self._test_func(F.emptyArrayUInt16())
|
||||||
|
self._test_func(F.emptyArrayUInt32())
|
||||||
|
self._test_func(F.emptyArrayUInt64())
|
||||||
|
self._test_func(F.emptyArrayUInt8())
|
||||||
|
self._test_func(F.range(7), list(range(7)))
|
||||||
|
self._test_func(F.array(*arr), arr)
|
||||||
|
self._test_func(F.arrayConcat([1, 2], [3]), arr)
|
||||||
|
self._test_func(F.arrayElement([10, 20, 30], 2), 20)
|
||||||
|
self._test_func(F.has(arr, 2), 1)
|
||||||
|
self._test_func(F.hasAll(arr, [1, 7]), 0)
|
||||||
|
self._test_func(F.hasAny(arr, [1, 7]), 1)
|
||||||
|
self._test_func(F.indexOf(arr, 3), 3)
|
||||||
|
self._test_func(F.countEqual(arr, 2), 1)
|
||||||
|
self._test_func(F.arrayEnumerate(arr))
|
||||||
|
self._test_func(F.arrayEnumerateDense(arr))
|
||||||
|
self._test_func(F.arrayEnumerateDenseRanked(arr))
|
||||||
|
self._test_func(F.arrayEnumerateUniq(arr))
|
||||||
|
self._test_func(F.arrayEnumerateUniqRanked(arr))
|
||||||
|
self._test_func(F.arrayPopBack(arr), [1, 2])
|
||||||
|
self._test_func(F.arrayPopFront(arr), [2, 3])
|
||||||
|
self._test_func(F.arrayPushBack(arr, 7), arr + [7])
|
||||||
|
self._test_func(F.arrayPushFront(arr, 7), [7] + arr)
|
||||||
|
self._test_func(F.arrayResize(arr, 5), [1, 2, 3, 0, 0])
|
||||||
|
self._test_func(F.arrayResize(arr, 5, 9), [1, 2, 3, 9, 9])
|
||||||
|
self._test_func(F.arraySlice(arr, 2), [2, 3])
|
||||||
|
self._test_func(F.arraySlice(arr, 2, 1), [2])
|
||||||
|
self._test_func(F.arrayUniq(arr + arr), 3)
|
||||||
|
self._test_func(F.arrayJoin(arr))
|
||||||
|
self._test_func(F.arrayDifference(arr), [0, 1, 1])
|
||||||
|
self._test_func(F.arrayDistinct(arr + arr), arr)
|
||||||
|
self._test_func(F.arrayIntersect(arr, [3, 4]), [3])
|
||||||
|
self._test_func(F.arrayReduce('min', arr), 1)
|
||||||
|
self._test_func(F.arrayReverse(arr), [3, 2, 1])
|
||||||
|
|
||||||
|
def test_split_and_merge_functions(self):
|
||||||
|
self._test_func(F.splitByChar('_', 'a_b_c'), ['a', 'b', 'c'])
|
||||||
|
self._test_func(F.splitByString('__', 'a__b__c'), ['a', 'b', 'c'])
|
||||||
|
self._test_func(F.arrayStringConcat(['a', 'b', 'c']), 'abc')
|
||||||
|
self._test_func(F.arrayStringConcat(['a', 'b', 'c'], '_'), 'a_b_c')
|
||||||
|
self._test_func(F.alphaTokens('aaa.bbb.111'), ['aaa', 'bbb'])
|
||||||
|
|
||||||
|
def test_bit_functions(self):
|
||||||
|
x = 17
|
||||||
|
y = 4
|
||||||
|
z = 5
|
||||||
|
self._test_func(F.bitAnd(x, y))
|
||||||
|
self._test_func(F.bitNot(x))
|
||||||
|
self._test_func(F.bitOr(x, y))
|
||||||
|
self._test_func(F.bitRotateLeft(x, y))
|
||||||
|
self._test_func(F.bitRotateRight(x, y))
|
||||||
|
self._test_func(F.bitShiftLeft(x, y))
|
||||||
|
self._test_func(F.bitShiftRight(x, y))
|
||||||
|
self._test_func(F.bitTest(x, y))
|
||||||
|
self._test_func(F.bitTestAll(x, y))
|
||||||
|
self._test_func(F.bitTestAll(x, y, z))
|
||||||
|
self._test_func(F.bitTestAny(x, y))
|
||||||
|
self._test_func(F.bitTestAny(x, y, z))
|
||||||
|
self._test_func(F.bitXor(x, y))
|
||||||
|
|
||||||
|
def test_bitmap_functions(self):
|
||||||
|
self._test_func(F.bitmapToArray(F.bitmapBuild([1, 2, 3])), [1, 2, 3])
|
||||||
|
self._test_func(F.bitmapContains(F.bitmapBuild([1, 5, 7, 9]), F.toUInt32(9)), 1)
|
||||||
|
self._test_func(F.bitmapHasAny(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 1)
|
||||||
|
self._test_func(F.bitmapHasAll(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 0)
|
||||||
|
self._test_func(F.bitmapToArray(F.bitmapAnd(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [3])
|
||||||
|
self._test_func(F.bitmapToArray(F.bitmapOr(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 3, 4, 5])
|
||||||
|
self._test_func(F.bitmapToArray(F.bitmapXor(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 4, 5])
|
||||||
|
self._test_func(F.bitmapToArray(F.bitmapAndnot(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2])
|
||||||
|
self._test_func(F.bitmapCardinality(F.bitmapBuild([1, 2, 3, 4, 5])), 5)
|
||||||
|
self._test_func(F.bitmapAndCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 1)
|
||||||
|
self._test_func(F.bitmapOrCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 5)
|
||||||
|
self._test_func(F.bitmapXorCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 4)
|
||||||
|
self._test_func(F.bitmapAndnotCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 2)
|
||||||
|
|
||||||
|
def test_hash_functions(self):
|
||||||
|
args = ['x', 'y', 'z']
|
||||||
|
x = 17
|
||||||
|
s = 'hello'
|
||||||
|
url = 'http://example.com/a/b/c/d'
|
||||||
|
self._test_func(F.hex(F.MD5(s)))
|
||||||
|
self._test_func(F.hex(F.sipHash128(s)))
|
||||||
|
self._test_func(F.hex(F.cityHash64(*args)))
|
||||||
|
self._test_func(F.hex(F.intHash32(x)))
|
||||||
|
self._test_func(F.hex(F.intHash64(x)))
|
||||||
|
self._test_func(F.hex(F.SHA1(s)))
|
||||||
|
self._test_func(F.hex(F.SHA224(s)))
|
||||||
|
self._test_func(F.hex(F.SHA256(s)))
|
||||||
|
self._test_func(F.hex(F.URLHash(url)))
|
||||||
|
self._test_func(F.hex(F.URLHash(url, 3)))
|
||||||
|
self._test_func(F.hex(F.farmHash64(*args)))
|
||||||
|
self._test_func(F.javaHash(s))
|
||||||
|
self._test_func(F.hiveHash(s))
|
||||||
|
self._test_func(F.hex(F.metroHash64(*args)))
|
||||||
|
self._test_func(F.jumpConsistentHash(x, 3))
|
||||||
|
self._test_func(F.hex(F.murmurHash2_32(*args)))
|
||||||
|
self._test_func(F.hex(F.murmurHash2_64(*args)))
|
||||||
|
self._test_func(F.hex(F.murmurHash3_32(*args)))
|
||||||
|
self._test_func(F.hex(F.murmurHash3_64(*args)))
|
||||||
|
self._test_func(F.hex(F.murmurHash3_128(s)))
|
||||||
|
self._test_func(F.hex(F.xxHash32(*args)))
|
||||||
|
self._test_func(F.hex(F.xxHash64(*args)))
|
||||||
|
if self.database.server_version >= (18, 1):
|
||||||
|
self._test_func(F.hex(F.halfMD5(*args)))
|
||||||
|
self._test_func(F.hex(F.sipHash64(*args)))
|
||||||
|
|
||||||
|
def test_rand_functions(self):
|
||||||
|
self._test_func(F.rand())
|
||||||
|
self._test_func(F.rand(17))
|
||||||
|
self._test_func(F.rand64())
|
||||||
|
self._test_func(F.rand64(17))
|
||||||
|
if self.database.server_version >= (19, 15): # buggy in older versions
|
||||||
|
self._test_func(F.randConstant())
|
||||||
|
self._test_func(F.randConstant(17))
|
||||||
|
|
||||||
|
def test_encoding_functions(self):
|
||||||
|
self._test_func(F.hex(F.unhex('0FA1')), '0FA1')
|
||||||
|
self._test_func(F.bitmaskToArray(17))
|
||||||
|
self._test_func(F.bitmaskToList(18))
|
||||||
|
|
||||||
|
def test_uuid_functions(self):
|
||||||
|
from uuid import UUID
|
||||||
|
uuid = self._test_func(F.generateUUIDv4())
|
||||||
|
self.assertEqual(type(uuid), UUID)
|
||||||
|
s = str(uuid)
|
||||||
|
self._test_func(F.toUUID(s), uuid)
|
||||||
|
self._test_func(F.UUIDNumToString(F.UUIDStringToNum(s)), s)
|
||||||
|
|
||||||
|
def test_ip_funcs(self):
|
||||||
|
self._test_func(F.IPv4NumToString(F.toUInt32(1)), '0.0.0.1')
|
||||||
|
self._test_func(F.IPv4NumToStringClassC(F.toUInt32(1)), '0.0.0.xxx')
|
||||||
|
self._test_func(F.IPv4StringToNum('0.0.0.17'), 17)
|
||||||
|
self._test_func(F.IPv6NumToString(F.IPv4ToIPv6(F.IPv4StringToNum('192.168.0.1'))), '::ffff:192.168.0.1')
|
||||||
|
self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11')
|
||||||
|
self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40'))
|
||||||
|
self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1'))
|
||||||
|
self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16),
|
||||||
|
[IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')])
|
||||||
|
self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32),
|
||||||
|
[IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')])
|
||||||
|
|
||||||
|
def test_aggregate_funcs(self):
|
||||||
|
self._test_aggr(F.any(Person.first_name))
|
||||||
|
self._test_aggr(F.anyHeavy(Person.first_name))
|
||||||
|
self._test_aggr(F.anyLast(Person.first_name))
|
||||||
|
self._test_aggr(F.argMin(Person.first_name, Person.height))
|
||||||
|
self._test_aggr(F.argMax(Person.first_name, Person.height))
|
||||||
|
self._test_aggr(F.round(F.avg(Person.height), 4), sum(p.height for p in self._sample_data()) / 100)
|
||||||
|
self._test_aggr(F.corr(Person.height, Person.height), 1)
|
||||||
|
self._test_aggr(F.count(), 100)
|
||||||
|
self._test_aggr(F.round(F.covarPop(Person.height, Person.height), 2), 0)
|
||||||
|
self._test_aggr(F.round(F.covarSamp(Person.height, Person.height), 2), 0)
|
||||||
|
self._test_aggr(F.kurtPop(Person.height))
|
||||||
|
self._test_aggr(F.kurtSamp(Person.height))
|
||||||
|
self._test_aggr(F.min(Person.height), 1.59)
|
||||||
|
self._test_aggr(F.max(Person.height), 1.80)
|
||||||
|
self._test_aggr(F.skewPop(Person.height))
|
||||||
|
self._test_aggr(F.skewSamp(Person.height))
|
||||||
|
self._test_aggr(F.round(F.sum(Person.height), 4), sum(p.height for p in self._sample_data()))
|
||||||
|
self._test_aggr(F.uniq(Person.first_name, Person.last_name), 100)
|
||||||
|
self._test_aggr(F.uniqExact(Person.first_name, Person.last_name), 100)
|
||||||
|
self._test_aggr(F.uniqHLL12(Person.first_name, Person.last_name), 99)
|
||||||
|
self._test_aggr(F.varPop(Person.height))
|
||||||
|
self._test_aggr(F.varSamp(Person.height))
|
||||||
|
|
||||||
|
def test_aggregate_funcs__or_default(self):
|
||||||
|
self.database.raw('TRUNCATE TABLE person')
|
||||||
|
self._test_aggr(F.countOrDefault(), 0)
|
||||||
|
self._test_aggr(F.maxOrDefault(Person.height), 0)
|
||||||
|
|
||||||
|
def test_aggregate_funcs__or_null(self):
|
||||||
|
self.database.raw('TRUNCATE TABLE person')
|
||||||
|
self._test_aggr(F.countOrNull(), None)
|
||||||
|
self._test_aggr(F.maxOrNull(Person.height), None)
|
||||||
|
|
||||||
|
def test_aggregate_funcs__if(self):
|
||||||
|
self._test_aggr(F.argMinIf(Person.first_name, Person.height, Person.last_name > 'H'))
|
||||||
|
self._test_aggr(F.countIf(Person.last_name > 'H'), 57)
|
||||||
|
self._test_aggr(F.minIf(Person.height, Person.last_name > 'H'), 1.6)
|
||||||
|
|
||||||
|
def test_aggregate_funcs__or_default_if(self):
|
||||||
|
self._test_aggr(F.argMinOrDefaultIf(Person.first_name, Person.height, Person.last_name > 'Z'))
|
||||||
|
self._test_aggr(F.countOrDefaultIf(Person.last_name > 'Z'), 0)
|
||||||
|
self._test_aggr(F.minOrDefaultIf(Person.height, Person.last_name > 'Z'), 0)
|
||||||
|
|
||||||
|
def test_aggregate_funcs__or_null_if(self):
|
||||||
|
self._test_aggr(F.argMinOrNullIf(Person.first_name, Person.height, Person.last_name > 'Z'))
|
||||||
|
self._test_aggr(F.countOrNullIf(Person.last_name > 'Z'), None)
|
||||||
|
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
|
||||||
|
|
||||||
|
def test_quantile_funcs(self):
|
||||||
|
cond = Person.last_name > 'H'
|
||||||
|
weight_expr = F.toUInt32(F.round(Person.height))
|
||||||
|
# Quantile
|
||||||
|
self._test_aggr(F.quantile(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileIf(0.9)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
|
||||||
|
self._test_aggr(F.quantileExact(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
|
||||||
|
# Quantile weighted
|
||||||
|
self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantileTiming(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond))
|
||||||
|
self._test_aggr(F.quantileTDigest(0.9)(Person.height))
|
||||||
|
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
|
||||||
|
# Quantiles
|
||||||
|
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17))
|
||||||
|
self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond))
|
||||||
|
self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height))
|
||||||
|
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond))
|
||||||
|
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
|
||||||
|
|
||||||
|
def test_top_k_funcs(self):
|
||||||
|
self._test_aggr(F.topK(3)(Person.height))
|
||||||
|
self._test_aggr(F.topKOrDefault(3)(Person.height))
|
||||||
|
self._test_aggr(F.topKIf(3)(Person.height, Person.last_name > 'H'))
|
||||||
|
self._test_aggr(F.topKOrDefaultIf(3)(Person.height, Person.last_name > 'H'))
|
||||||
|
weight_expr = F.toUInt32(F.round(Person.height))
|
||||||
|
self._test_aggr(F.topKWeighted(3)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.topKWeightedOrDefault(3)(Person.height, weight_expr))
|
||||||
|
self._test_aggr(F.topKWeightedIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
|
||||||
|
self._test_aggr(F.topKWeightedOrDefaultIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
|
||||||
|
|
||||||
|
def test_null_funcs(self):
|
||||||
|
self._test_func(F.ifNull(17, 18), 17)
|
||||||
|
self._test_func(F.ifNull(None, 18), 18)
|
||||||
|
self._test_func(F.nullIf(17, 18), 17)
|
||||||
|
self._test_func(F.nullIf(18, 18), None)
|
||||||
|
self._test_func(F.isNotNull(17), 1)
|
||||||
|
self._test_func(F.isNull(17), 0)
|
||||||
|
self._test_func(F.coalesce(None, None, 17, 18), 17)
|
||||||
|
|
||||||
|
def test_misc_funcs(self):
|
||||||
|
self._test_func(F.ifNotFinite(17, 18), 17)
|
||||||
|
self._test_func(F.isFinite(17), 1)
|
||||||
|
self._test_func(F.isInfinite(17), 0)
|
||||||
|
self._test_func(F.isNaN(17), 0)
|
||||||
|
self._test_func(F.least(17, 18), 17)
|
||||||
|
self._test_func(F.greatest(17, 18), 18)
|
32
tests/test_indexes.py
Normal file
32
tests/test_indexes.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from infi.clickhouse_orm import *
|
||||||
|
|
||||||
|
|
||||||
|
class IndexesTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
if self.database.server_version < (20, 1, 2, 4):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_all_index_types(self):
|
||||||
|
self.database.create_table(ModelWithIndexes)
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithIndexes(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f1 = Int32Field()
|
||||||
|
f2 = StringField()
|
||||||
|
|
||||||
|
i1 = Index(f1, type=Index.minmax(), granularity=1)
|
||||||
|
i2 = Index(f1, type=Index.set(1000), granularity=2)
|
||||||
|
i3 = Index(f2, type=Index.ngrambf_v1(3, 256, 2, 0), granularity=1)
|
||||||
|
i4 = Index(F.lower(f2), type=Index.tokenbf_v1(256, 2, 0), granularity=2)
|
||||||
|
i5 = Index((F.toQuarter(date), f2), type=Index.bloom_filter(), granularity=3)
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
import datetime
|
import datetime
|
||||||
import pytz
|
import pytz
|
||||||
|
|
68
tests/test_ip_fields.py
Normal file
68
tests/test_ip_fields.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
import unittest
|
||||||
|
from ipaddress import IPv4Address, IPv6Address
|
||||||
|
from infi.clickhouse_orm.database import Database
|
||||||
|
from infi.clickhouse_orm.fields import Int16Field, IPv4Field, IPv6Field
|
||||||
|
from infi.clickhouse_orm.models import Model
|
||||||
|
from infi.clickhouse_orm.engines import Memory
|
||||||
|
|
||||||
|
|
||||||
|
class IPFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db', log_statements=True)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_ipv4_field(self):
|
||||||
|
if self.database.server_version < (19, 17):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
# Create a model
|
||||||
|
class TestModel(Model):
|
||||||
|
i = Int16Field()
|
||||||
|
f = IPv4Field()
|
||||||
|
engine = Memory()
|
||||||
|
self.database.create_table(TestModel)
|
||||||
|
# Check valid values (all values are the same ip)
|
||||||
|
values = [
|
||||||
|
'1.2.3.4',
|
||||||
|
b'\x01\x02\x03\x04',
|
||||||
|
16909060,
|
||||||
|
IPv4Address('1.2.3.4')
|
||||||
|
]
|
||||||
|
for index, value in enumerate(values):
|
||||||
|
rec = TestModel(i=index, f=value)
|
||||||
|
self.database.insert([rec])
|
||||||
|
for rec in TestModel.objects_in(self.database):
|
||||||
|
self.assertEqual(rec.f, IPv4Address(values[0]))
|
||||||
|
# Check invalid values
|
||||||
|
for value in [None, 'zzz', -1, '123']:
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
TestModel(i=1, f=value)
|
||||||
|
|
||||||
|
def test_ipv6_field(self):
|
||||||
|
if self.database.server_version < (19, 17):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
# Create a model
|
||||||
|
class TestModel(Model):
|
||||||
|
i = Int16Field()
|
||||||
|
f = IPv6Field()
|
||||||
|
engine = Memory()
|
||||||
|
self.database.create_table(TestModel)
|
||||||
|
# Check valid values (all values are the same ip)
|
||||||
|
values = [
|
||||||
|
'2a02:e980:1e::1',
|
||||||
|
b'*\x02\xe9\x80\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01',
|
||||||
|
55842696359362256756849388082849382401,
|
||||||
|
IPv6Address('2a02:e980:1e::1')
|
||||||
|
]
|
||||||
|
for index, value in enumerate(values):
|
||||||
|
rec = TestModel(i=index, f=value)
|
||||||
|
self.database.insert([rec])
|
||||||
|
for rec in TestModel.objects_in(self.database):
|
||||||
|
self.assertEqual(rec.f, IPv6Address(values[0]))
|
||||||
|
# Check invalid values
|
||||||
|
for value in [None, 'zzz', -1, '123']:
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
TestModel(i=1, f=value)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals, print_function
|
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
import json
|
import json
|
||||||
|
@ -30,8 +29,8 @@ class JoinTest(unittest.TestCase):
|
||||||
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
|
|
||||||
def test_with_subquery(self):
|
def test_with_subquery(self):
|
||||||
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) subquery USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) subquery USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
|
|
||||||
|
|
||||||
class Foo(models.Model):
|
class Foo(models.Model):
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model, NO_VALUE
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
|
||||||
|
|
||||||
class MaterializedFieldsTest(unittest.TestCase):
|
class MaterializedFieldsTest(unittest.TestCase):
|
||||||
|
@ -25,7 +25,7 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.database.insert([instance])
|
self.database.insert([instance])
|
||||||
# We can't select * from table, as it doesn't select materialized and alias fields
|
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||||
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
|
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str, mat_func' \
|
||||||
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
|
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
|
||||||
for model_cls in (ModelWithMaterializedFields, None):
|
for model_cls in (ModelWithMaterializedFields, None):
|
||||||
results = list(self.database.select(query, model_cls))
|
results = list(self.database.select(query, model_cls))
|
||||||
|
@ -36,6 +36,7 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
self.assertEqual(results[0].mat_int, abs(instance.int_field))
|
self.assertEqual(results[0].mat_int, abs(instance.int_field))
|
||||||
self.assertEqual(results[0].mat_str, instance.str_field.lower())
|
self.assertEqual(results[0].mat_str, instance.str_field.lower())
|
||||||
self.assertEqual(results[0].mat_date, instance.date_time_field.date())
|
self.assertEqual(results[0].mat_date, instance.date_time_field.date())
|
||||||
|
self.assertEqual(results[0].mat_func, instance.str_field.lower())
|
||||||
|
|
||||||
def test_assignment_error(self):
|
def test_assignment_error(self):
|
||||||
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||||
|
@ -55,6 +56,10 @@ class MaterializedFieldsTest(unittest.TestCase):
|
||||||
with self.assertRaises(AssertionError):
|
with self.assertRaises(AssertionError):
|
||||||
StringField(materialized='str_field', alias='str_field')
|
StringField(materialized='str_field', alias='str_field')
|
||||||
|
|
||||||
|
def test_default_value(self):
|
||||||
|
instance = ModelWithMaterializedFields()
|
||||||
|
self.assertEqual(instance.mat_str, NO_VALUE)
|
||||||
|
|
||||||
|
|
||||||
class ModelWithMaterializedFields(Model):
|
class ModelWithMaterializedFields(Model):
|
||||||
int_field = Int32Field()
|
int_field = Int32Field()
|
||||||
|
@ -64,5 +69,6 @@ class ModelWithMaterializedFields(Model):
|
||||||
mat_str = StringField(materialized='lower(str_field)')
|
mat_str = StringField(materialized='lower(str_field)')
|
||||||
mat_int = Int32Field(materialized='abs(int_field)')
|
mat_int = Int32Field(materialized='abs(int_field)')
|
||||||
mat_date = DateField(materialized=u'toDate(date_time_field)')
|
mat_date = DateField(materialized=u'toDate(date_time_field)')
|
||||||
|
mat_func = StringField(materialized=F.lower(str_field))
|
||||||
|
|
||||||
engine = MergeTree('mat_date', ('mat_date',))
|
engine = MergeTree('mat_date', ('mat_date',))
|
||||||
|
|
|
@ -1,20 +1,16 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database, ServerError
|
||||||
from infi.clickhouse_orm.models import Model, BufferModel
|
from infi.clickhouse_orm.models import Model, BufferModel, Constraint, Index
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
from infi.clickhouse_orm.migrations import MigrationHistory
|
from infi.clickhouse_orm.migrations import MigrationHistory
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
# Add tests to path so that migrations will be importable
|
# Add tests to path so that migrations will be importable
|
||||||
import sys, os
|
import sys, os
|
||||||
sys.path.append(os.path.dirname(__file__))
|
sys.path.append(os.path.dirname(__file__))
|
||||||
|
|
||||||
try:
|
|
||||||
Enum # exists in Python 3.4+
|
|
||||||
except NameError:
|
|
||||||
from enum import Enum # use the enum34 library instead
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
|
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
|
||||||
|
@ -30,55 +26,58 @@ class MigrationsTestCase(unittest.TestCase):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
|
|
||||||
def tableExists(self, model_class):
|
def table_exists(self, model_class):
|
||||||
query = "EXISTS TABLE $db.`%s`" % model_class.table_name()
|
query = "EXISTS TABLE $db.`%s`" % model_class.table_name()
|
||||||
return next(self.database.select(query)).result == 1
|
return next(self.database.select(query)).result == 1
|
||||||
|
|
||||||
def getTableFields(self, model_class):
|
def get_table_fields(self, model_class):
|
||||||
query = "DESC `%s`.`%s`" % (self.database.db_name, model_class.table_name())
|
query = "DESC `%s`.`%s`" % (self.database.db_name, model_class.table_name())
|
||||||
return [(row.name, row.type) for row in self.database.select(query)]
|
return [(row.name, row.type) for row in self.database.select(query)]
|
||||||
|
|
||||||
|
def get_table_def(self, model_class):
|
||||||
|
return self.database.raw('SHOW CREATE TABLE $db.`%s`' % model_class.table_name())
|
||||||
|
|
||||||
def test_migrations(self):
|
def test_migrations(self):
|
||||||
# Creation and deletion of table
|
# Creation and deletion of table
|
||||||
self.database.migrate('tests.sample_migrations', 1)
|
self.database.migrate('tests.sample_migrations', 1)
|
||||||
self.assertTrue(self.tableExists(Model1))
|
self.assertTrue(self.table_exists(Model1))
|
||||||
self.database.migrate('tests.sample_migrations', 2)
|
self.database.migrate('tests.sample_migrations', 2)
|
||||||
self.assertFalse(self.tableExists(Model1))
|
self.assertFalse(self.table_exists(Model1))
|
||||||
self.database.migrate('tests.sample_migrations', 3)
|
self.database.migrate('tests.sample_migrations', 3)
|
||||||
self.assertTrue(self.tableExists(Model1))
|
self.assertTrue(self.table_exists(Model1))
|
||||||
# Adding, removing and altering simple fields
|
# Adding, removing and altering simple fields
|
||||||
self.assertEqual(self.getTableFields(Model1), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
self.assertEqual(self.get_table_fields(Model1), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||||
self.database.migrate('tests.sample_migrations', 4)
|
self.database.migrate('tests.sample_migrations', 4)
|
||||||
self.assertEqual(self.getTableFields(Model2), [('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'String'), ('f5', 'Array(UInt64)')])
|
self.assertEqual(self.get_table_fields(Model2), [('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'String'), ('f5', 'Array(UInt64)')])
|
||||||
self.database.migrate('tests.sample_migrations', 5)
|
self.database.migrate('tests.sample_migrations', 5)
|
||||||
self.assertEqual(self.getTableFields(Model3), [('date', 'Date'), ('f1', 'Int64'), ('f3', 'Float64'), ('f4', 'String')])
|
self.assertEqual(self.get_table_fields(Model3), [('date', 'Date'), ('f1', 'Int64'), ('f3', 'Float64'), ('f4', 'String')])
|
||||||
# Altering enum fields
|
# Altering enum fields
|
||||||
self.database.migrate('tests.sample_migrations', 6)
|
self.database.migrate('tests.sample_migrations', 6)
|
||||||
self.assertTrue(self.tableExists(EnumModel1))
|
self.assertTrue(self.table_exists(EnumModel1))
|
||||||
self.assertEqual(self.getTableFields(EnumModel1),
|
self.assertEqual(self.get_table_fields(EnumModel1),
|
||||||
[('date', 'Date'), ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")])
|
[('date', 'Date'), ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")])
|
||||||
self.database.migrate('tests.sample_migrations', 7)
|
self.database.migrate('tests.sample_migrations', 7)
|
||||||
self.assertTrue(self.tableExists(EnumModel1))
|
self.assertTrue(self.table_exists(EnumModel1))
|
||||||
self.assertEqual(self.getTableFields(EnumModel2),
|
self.assertEqual(self.get_table_fields(EnumModel2),
|
||||||
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
||||||
# Materialized fields and alias fields
|
# Materialized fields and alias fields
|
||||||
self.database.migrate('tests.sample_migrations', 8)
|
self.database.migrate('tests.sample_migrations', 8)
|
||||||
self.assertTrue(self.tableExists(MaterializedModel))
|
self.assertTrue(self.table_exists(MaterializedModel))
|
||||||
self.assertEqual(self.getTableFields(MaterializedModel),
|
self.assertEqual(self.get_table_fields(MaterializedModel),
|
||||||
[('date_time', "DateTime"), ('date', 'Date')])
|
[('date_time', "DateTime"), ('date', 'Date')])
|
||||||
self.database.migrate('tests.sample_migrations', 9)
|
self.database.migrate('tests.sample_migrations', 9)
|
||||||
self.assertTrue(self.tableExists(AliasModel))
|
self.assertTrue(self.table_exists(AliasModel))
|
||||||
self.assertEqual(self.getTableFields(AliasModel),
|
self.assertEqual(self.get_table_fields(AliasModel),
|
||||||
[('date', 'Date'), ('date_alias', "Date")])
|
[('date', 'Date'), ('date_alias', "Date")])
|
||||||
# Buffer models creation and alteration
|
# Buffer models creation and alteration
|
||||||
self.database.migrate('tests.sample_migrations', 10)
|
self.database.migrate('tests.sample_migrations', 10)
|
||||||
self.assertTrue(self.tableExists(Model4))
|
self.assertTrue(self.table_exists(Model4))
|
||||||
self.assertTrue(self.tableExists(Model4Buffer))
|
self.assertTrue(self.table_exists(Model4Buffer))
|
||||||
self.assertEqual(self.getTableFields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||||
self.assertEqual(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
self.assertEqual(self.get_table_fields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||||
self.database.migrate('tests.sample_migrations', 11)
|
self.database.migrate('tests.sample_migrations', 11)
|
||||||
self.assertEqual(self.getTableFields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||||
self.assertEqual(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
self.assertEqual(self.get_table_fields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||||
|
|
||||||
self.database.migrate('tests.sample_migrations', 12)
|
self.database.migrate('tests.sample_migrations', 12)
|
||||||
self.assertEqual(self.database.count(Model3), 3)
|
self.assertEqual(self.database.count(Model3), 3)
|
||||||
|
@ -91,12 +90,53 @@ class MigrationsTestCase(unittest.TestCase):
|
||||||
self.assertListEqual(data, [1, 2, 3, 4])
|
self.assertListEqual(data, [1, 2, 3, 4])
|
||||||
|
|
||||||
self.database.migrate('tests.sample_migrations', 14)
|
self.database.migrate('tests.sample_migrations', 14)
|
||||||
self.assertTrue(self.tableExists(MaterializedModel1))
|
self.assertTrue(self.table_exists(MaterializedModel1))
|
||||||
self.assertEqual(self.getTableFields(MaterializedModel1),
|
self.assertEqual(self.get_table_fields(MaterializedModel1),
|
||||||
[('date_time', 'DateTime'), ('int_field', 'Int8'), ('date', 'Date'), ('int_field_plus_one', 'Int8')])
|
[('date_time', 'DateTime'), ('int_field', 'Int8'), ('date', 'Date'), ('int_field_plus_one', 'Int8')])
|
||||||
self.assertTrue(self.tableExists(AliasModel1))
|
self.assertTrue(self.table_exists(AliasModel1))
|
||||||
self.assertEqual(self.getTableFields(AliasModel1),
|
self.assertEqual(self.get_table_fields(AliasModel1),
|
||||||
[('date', 'Date'), ('int_field', 'Int8'), ('date_alias', 'Date'), ('int_field_plus_one', 'Int8')])
|
[('date', 'Date'), ('int_field', 'Int8'), ('date_alias', 'Date'), ('int_field_plus_one', 'Int8')])
|
||||||
|
# Codecs and low cardinality
|
||||||
|
self.database.migrate('tests.sample_migrations', 15)
|
||||||
|
self.assertTrue(self.table_exists(Model4_compressed))
|
||||||
|
if self.database.has_low_cardinality_support:
|
||||||
|
self.assertEqual(self.get_table_fields(Model2LowCardinality),
|
||||||
|
[('date', 'Date'), ('f1', 'LowCardinality(Int32)'), ('f3', 'LowCardinality(Float32)'),
|
||||||
|
('f2', 'LowCardinality(String)'), ('f4', 'LowCardinality(Nullable(String))'), ('f5', 'Array(LowCardinality(UInt64))')])
|
||||||
|
else:
|
||||||
|
logging.warning('No support for low cardinality')
|
||||||
|
self.assertEqual(self.get_table_fields(Model2),
|
||||||
|
[('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'Nullable(String)'),
|
||||||
|
('f5', 'Array(UInt64)')])
|
||||||
|
|
||||||
|
if self.database.server_version >= (19, 14, 3, 3):
|
||||||
|
# Creating constraints
|
||||||
|
self.database.migrate('tests.sample_migrations', 16)
|
||||||
|
self.assertTrue(self.table_exists(ModelWithConstraints))
|
||||||
|
self.database.insert([ModelWithConstraints(f1=101, f2='a')])
|
||||||
|
with self.assertRaises(ServerError):
|
||||||
|
self.database.insert([ModelWithConstraints(f1=99, f2='a')])
|
||||||
|
with self.assertRaises(ServerError):
|
||||||
|
self.database.insert([ModelWithConstraints(f1=101, f2='x')])
|
||||||
|
# Modifying constraints
|
||||||
|
self.database.migrate('tests.sample_migrations', 17)
|
||||||
|
self.database.insert([ModelWithConstraints(f1=99, f2='a')])
|
||||||
|
with self.assertRaises(ServerError):
|
||||||
|
self.database.insert([ModelWithConstraints(f1=101, f2='a')])
|
||||||
|
with self.assertRaises(ServerError):
|
||||||
|
self.database.insert([ModelWithConstraints(f1=99, f2='x')])
|
||||||
|
|
||||||
|
if self.database.server_version >= (20, 1, 2, 4):
|
||||||
|
# Creating indexes
|
||||||
|
self.database.migrate('tests.sample_migrations', 18)
|
||||||
|
self.assertTrue(self.table_exists(ModelWithIndex))
|
||||||
|
self.assertIn('INDEX index ', self.get_table_def(ModelWithIndex))
|
||||||
|
self.assertIn('INDEX another_index ', self.get_table_def(ModelWithIndex))
|
||||||
|
# Modifying indexes
|
||||||
|
self.database.migrate('tests.sample_migrations', 19)
|
||||||
|
self.assertNotIn('INDEX index ', self.get_table_def(ModelWithIndex))
|
||||||
|
self.assertIn('INDEX index2 ', self.get_table_def(ModelWithIndex))
|
||||||
|
self.assertIn('INDEX another_index ', self.get_table_def(ModelWithIndex))
|
||||||
|
|
||||||
|
|
||||||
# Several different models with the same table name, to simulate a table that changes over time
|
# Several different models with the same table name, to simulate a table that changes over time
|
||||||
|
@ -258,3 +298,96 @@ class Model4Buffer_changed(BufferModel, Model4_changed):
|
||||||
@classmethod
|
@classmethod
|
||||||
def table_name(cls):
|
def table_name(cls):
|
||||||
return 'model4buffer'
|
return 'model4buffer'
|
||||||
|
|
||||||
|
|
||||||
|
class Model4_compressed(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f3 = DateTimeField(codec='Delta,ZSTD(10)')
|
||||||
|
f2 = StringField(codec='LZ4HC')
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'model4'
|
||||||
|
|
||||||
|
|
||||||
|
class Model2LowCardinality(Model):
|
||||||
|
date = DateField()
|
||||||
|
f1 = LowCardinalityField(Int32Field())
|
||||||
|
f3 = LowCardinalityField(Float32Field())
|
||||||
|
f2 = LowCardinalityField(StringField())
|
||||||
|
f4 = LowCardinalityField(NullableField(StringField()))
|
||||||
|
f5 = ArrayField(LowCardinalityField(UInt64Field()))
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'mig'
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithConstraints(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f1 = Int32Field()
|
||||||
|
f2 = StringField()
|
||||||
|
|
||||||
|
constraint = Constraint(f2.isIn(['a', 'b', 'c'])) # check reserved keyword as constraint name
|
||||||
|
f1_constraint = Constraint(f1 > 100)
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'modelwithconstraints'
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithConstraints2(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f1 = Int32Field()
|
||||||
|
f2 = StringField()
|
||||||
|
|
||||||
|
constraint = Constraint(f2.isIn(['a', 'b', 'c']))
|
||||||
|
f1_constraint_new = Constraint(f1 < 100)
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'modelwithconstraints'
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithIndex(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f1 = Int32Field()
|
||||||
|
f2 = StringField()
|
||||||
|
|
||||||
|
index = Index(f1, type=Index.minmax(), granularity=1)
|
||||||
|
another_index = Index(f2, type=Index.set(0), granularity=1)
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'modelwithindex'
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithIndex2(Model):
|
||||||
|
|
||||||
|
date = DateField()
|
||||||
|
f1 = Int32Field()
|
||||||
|
f2 = StringField()
|
||||||
|
|
||||||
|
index2 = Index(f1, type=Index.bloom_filter(), granularity=2)
|
||||||
|
another_index = Index(f2, type=Index.set(0), granularity=1)
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'modelwithindex'
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
import datetime
|
import datetime
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model, NO_VALUE
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
|
|
||||||
|
|
||||||
class ModelTestCase(unittest.TestCase):
|
class ModelTestCase(unittest.TestCase):
|
||||||
|
@ -18,6 +18,7 @@ class ModelTestCase(unittest.TestCase):
|
||||||
self.assertEqual(instance.str_field, 'dozo')
|
self.assertEqual(instance.str_field, 'dozo')
|
||||||
self.assertEqual(instance.int_field, 17)
|
self.assertEqual(instance.int_field, 17)
|
||||||
self.assertEqual(instance.float_field, 0)
|
self.assertEqual(instance.float_field, 0)
|
||||||
|
self.assertEqual(instance.default_func, NO_VALUE)
|
||||||
|
|
||||||
def test_assignment(self):
|
def test_assignment(self):
|
||||||
# Check that all fields are assigned during construction
|
# Check that all fields are assigned during construction
|
||||||
|
@ -63,15 +64,17 @@ class ModelTestCase(unittest.TestCase):
|
||||||
"int_field": 100,
|
"int_field": 100,
|
||||||
"float_field": 7.0,
|
"float_field": 7.0,
|
||||||
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
||||||
"alias_field": 0.0,
|
"alias_field": NO_VALUE,
|
||||||
'str_field': 'dozo'
|
"str_field": "dozo",
|
||||||
|
"default_func": NO_VALUE
|
||||||
})
|
})
|
||||||
self.assertDictEqual(instance.to_dict(include_readonly=False), {
|
self.assertDictEqual(instance.to_dict(include_readonly=False), {
|
||||||
"date_field": datetime.date(1973, 12, 6),
|
"date_field": datetime.date(1973, 12, 6),
|
||||||
"int_field": 100,
|
"int_field": 100,
|
||||||
"float_field": 7.0,
|
"float_field": 7.0,
|
||||||
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
|
||||||
'str_field': 'dozo'
|
"str_field": "dozo",
|
||||||
|
"default_func": NO_VALUE
|
||||||
})
|
})
|
||||||
self.assertDictEqual(
|
self.assertDictEqual(
|
||||||
instance.to_dict(include_readonly=False, field_names=('int_field', 'alias_field', 'datetime_field')), {
|
instance.to_dict(include_readonly=False, field_names=('int_field', 'alias_field', 'datetime_field')), {
|
||||||
|
@ -86,7 +89,7 @@ class ModelTestCase(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
"Invalid value for StringField: {} (field 'str_field')".format(repr(bad_value)),
|
"Invalid value for StringField: {} (field 'str_field')".format(repr(bad_value)),
|
||||||
text_type(cm.exception)
|
str(cm.exception)
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_field_name_in_error_message_for_invalid_value_in_assignment(self):
|
def test_field_name_in_error_message_for_invalid_value_in_assignment(self):
|
||||||
|
@ -97,7 +100,7 @@ class ModelTestCase(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
"Invalid value for Float32Field - {} (field 'float_field')".format(repr(bad_value)),
|
"Invalid value for Float32Field - {} (field 'float_field')".format(repr(bad_value)),
|
||||||
text_type(cm.exception)
|
str(cm.exception)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,5 +112,6 @@ class SimpleModel(Model):
|
||||||
int_field = Int32Field(default=17)
|
int_field = Int32Field(default=17)
|
||||||
float_field = Float32Field()
|
float_field = Float32Field()
|
||||||
alias_field = Float32Field(alias='float_field')
|
alias_field = Float32Field(alias='float_field')
|
||||||
|
default_func = Float32Field(default=F.sqrt(float_field) + 17)
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('int_field', 'date_field'))
|
engine = MergeTree('date_field', ('int_field', 'date_field'))
|
||||||
|
|
87
tests/test_mutations.py
Normal file
87
tests/test_mutations.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
import unittest
|
||||||
|
from infi.clickhouse_orm import F
|
||||||
|
from .base_test_with_data import *
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
|
||||||
|
class MutationsTestCase(TestCaseWithData):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super().setUp()
|
||||||
|
if self.database.server_version < (18,):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
self._insert_all()
|
||||||
|
|
||||||
|
def _wait_for_mutations(self):
|
||||||
|
sql = 'SELECT * FROM system.mutations WHERE is_done = 0'
|
||||||
|
while list(self.database.raw(sql)):
|
||||||
|
sleep(0.25)
|
||||||
|
|
||||||
|
def test_delete_all(self):
|
||||||
|
Person.objects_in(self.database).delete()
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database))
|
||||||
|
|
||||||
|
def test_delete_with_where_cond(self):
|
||||||
|
cond = Person.first_name == 'Cassady'
|
||||||
|
self.assertTrue(Person.objects_in(self.database).filter(cond))
|
||||||
|
Person.objects_in(self.database).filter(cond).delete()
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).filter(cond))
|
||||||
|
self.assertTrue(Person.objects_in(self.database).exclude(cond))
|
||||||
|
|
||||||
|
def test_delete_with_prewhere_cond(self):
|
||||||
|
cond = F.toYear(Person.birthday) == 1977
|
||||||
|
self.assertTrue(Person.objects_in(self.database).filter(cond))
|
||||||
|
Person.objects_in(self.database).filter(cond, prewhere=True).delete()
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).filter(cond))
|
||||||
|
self.assertTrue(Person.objects_in(self.database).exclude(cond))
|
||||||
|
|
||||||
|
def test_update_all(self):
|
||||||
|
Person.objects_in(self.database).update(height=0)
|
||||||
|
self._wait_for_mutations()
|
||||||
|
for p in Person.objects_in(self.database): print(p.height)
|
||||||
|
self.assertFalse(Person.objects_in(self.database).exclude(height=0))
|
||||||
|
|
||||||
|
def test_update_with_where_cond(self):
|
||||||
|
cond = Person.first_name == 'Cassady'
|
||||||
|
Person.objects_in(self.database).filter(cond).update(height=0)
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).filter(cond).exclude(height=0))
|
||||||
|
|
||||||
|
def test_update_with_prewhere_cond(self):
|
||||||
|
cond = F.toYear(Person.birthday) == 1977
|
||||||
|
Person.objects_in(self.database).filter(cond, prewhere=True).update(height=0)
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).filter(cond).exclude(height=0))
|
||||||
|
|
||||||
|
def test_update_multiple_fields(self):
|
||||||
|
Person.objects_in(self.database).update(height=0, passport=None)
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).exclude(height=0))
|
||||||
|
self.assertFalse(Person.objects_in(self.database).exclude(passport=None))
|
||||||
|
|
||||||
|
def test_chained_update(self):
|
||||||
|
Person.objects_in(self.database).update(height=F.rand()).update(passport=99999)
|
||||||
|
self._wait_for_mutations()
|
||||||
|
self.assertFalse(Person.objects_in(self.database).exclude(passport=99999))
|
||||||
|
|
||||||
|
def test_invalid_state_for_mutations(self):
|
||||||
|
base_query = Person.objects_in(self.database)
|
||||||
|
queries = [
|
||||||
|
base_query[0:1],
|
||||||
|
base_query.limit_by(5, 'first_name'),
|
||||||
|
base_query.distinct(),
|
||||||
|
base_query.aggregate('first_name', count=F.count())
|
||||||
|
]
|
||||||
|
for query in queries:
|
||||||
|
print(query)
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
query.delete()
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
query.update(height=1.8)
|
||||||
|
|
||||||
|
def test_missing_fields_for_update(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
Person.objects_in(self.database).update()
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
@ -38,7 +37,7 @@ class NullableFieldsTest(unittest.TestCase):
|
||||||
if value == '\\N':
|
if value == '\\N':
|
||||||
self.assertIsNone(dt)
|
self.assertIsNone(dt)
|
||||||
else:
|
else:
|
||||||
self.assertEqual(dt.tzinfo, pytz.utc)
|
self.assertTrue(dt.tzinfo)
|
||||||
# Verify that conversion to and from db string does not change value
|
# Verify that conversion to and from db string does not change value
|
||||||
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
||||||
self.assertEqual(dt, dt2)
|
self.assertEqual(dt, dt2)
|
||||||
|
|
|
@ -1,17 +1,16 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals, print_function
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
from infi.clickhouse_orm.query import Q
|
from infi.clickhouse_orm.query import Q
|
||||||
|
from infi.clickhouse_orm.funcs import F
|
||||||
from .base_test_with_data import *
|
from .base_test_with_data import *
|
||||||
import logging
|
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
|
from enum import Enum
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
from logging import getLogger
|
||||||
|
logger = getLogger('tests')
|
||||||
|
|
||||||
try:
|
|
||||||
Enum # exists in Python 3.4+
|
|
||||||
except NameError:
|
|
||||||
from enum import Enum # use the enum34 library instead
|
|
||||||
|
|
||||||
|
|
||||||
class QuerySetTestCase(TestCaseWithData):
|
class QuerySetTestCase(TestCaseWithData):
|
||||||
|
@ -21,11 +20,11 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
self.database.insert(self._sample_data())
|
self.database.insert(self._sample_data())
|
||||||
|
|
||||||
def _test_qs(self, qs, expected_count):
|
def _test_qs(self, qs, expected_count):
|
||||||
logging.info(qs.as_sql())
|
logger.info(qs.as_sql())
|
||||||
count = 0
|
count = 0
|
||||||
for instance in qs:
|
for instance in qs:
|
||||||
count += 1
|
count += 1
|
||||||
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
logger.info('\t[%d]\t%s' % (count, instance.to_dict()))
|
||||||
self.assertEqual(count, expected_count)
|
self.assertEqual(count, expected_count)
|
||||||
self.assertEqual(qs.count(), expected_count)
|
self.assertEqual(qs.count(), expected_count)
|
||||||
|
|
||||||
|
@ -227,7 +226,7 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
|
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
|
||||||
# Try different page sizes
|
# Try different page sizes
|
||||||
for page_size in (1, 2, 7, 10, 30, 100, 150):
|
for page_size in (1, 2, 7, 10, 30, 100, 150):
|
||||||
# Iterate over pages and collect all intances
|
# Iterate over pages and collect all instances
|
||||||
page_num = 1
|
page_num = 1
|
||||||
instances = set()
|
instances = set()
|
||||||
while True:
|
while True:
|
||||||
|
@ -287,7 +286,7 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
self._test_qs(qs[80:], 20)
|
self._test_qs(qs[80:], 20)
|
||||||
|
|
||||||
def test_final(self):
|
def test_final(self):
|
||||||
# Final can be used with CollapsingMergeTree engine only
|
# Final can be used with CollapsingMergeTree/ReplacingMergeTree engines only
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
Person.objects_in(self.database).final()
|
Person.objects_in(self.database).final()
|
||||||
|
|
||||||
|
@ -297,6 +296,29 @@ class QuerySetTestCase(TestCaseWithData):
|
||||||
for item, exp_color in zip(res, (Color.red, Color.green, Color.white, Color.blue)):
|
for item, exp_color in zip(res, (Color.red, Color.green, Color.white, Color.blue)):
|
||||||
self.assertEqual(exp_color, item.color)
|
self.assertEqual(exp_color, item.color)
|
||||||
|
|
||||||
|
def test_mixed_filter(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
qs = qs.filter(Q(first_name='a'), F('greater', Person.height, 1.7), last_name='b')
|
||||||
|
self.assertEqual(qs.conditions_as_sql(),
|
||||||
|
"(first_name = 'a') AND (greater(`height`, 1.7)) AND (last_name = 'b')")
|
||||||
|
|
||||||
|
def test_precedence_of_negation(self):
|
||||||
|
p = ~Q(first_name='a')
|
||||||
|
q = Q(last_name='b')
|
||||||
|
r = p & q
|
||||||
|
self.assertEqual(r.to_sql(Person), "(last_name = 'b') AND (NOT (first_name = 'a'))")
|
||||||
|
r = q & p
|
||||||
|
self.assertEqual(r.to_sql(Person), "(last_name = 'b') AND (NOT (first_name = 'a'))")
|
||||||
|
r = q | p
|
||||||
|
self.assertEqual(r.to_sql(Person), "(last_name = 'b') OR (NOT (first_name = 'a'))")
|
||||||
|
r = ~q & p
|
||||||
|
self.assertEqual(r.to_sql(Person), "(NOT (last_name = 'b')) AND (NOT (first_name = 'a'))")
|
||||||
|
|
||||||
|
def test_invalid_filter(self):
|
||||||
|
qs = Person.objects_in(self.database)
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
qs.filter('foo')
|
||||||
|
|
||||||
|
|
||||||
class AggregateTestCase(TestCaseWithData):
|
class AggregateTestCase(TestCaseWithData):
|
||||||
|
|
||||||
|
@ -311,6 +333,13 @@ class AggregateTestCase(TestCaseWithData):
|
||||||
for row in qs:
|
for row in qs:
|
||||||
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
|
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
|
||||||
self.assertEqual(row.count, 100)
|
self.assertEqual(row.count, 100)
|
||||||
|
# With functions
|
||||||
|
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count())
|
||||||
|
print(qs.as_sql())
|
||||||
|
self.assertEqual(qs.count(), 1)
|
||||||
|
for row in qs:
|
||||||
|
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
|
||||||
|
self.assertEqual(row.count, 100)
|
||||||
|
|
||||||
def test_aggregate_with_filter(self):
|
def test_aggregate_with_filter(self):
|
||||||
# When filter comes before aggregate
|
# When filter comes before aggregate
|
||||||
|
@ -328,6 +357,22 @@ class AggregateTestCase(TestCaseWithData):
|
||||||
self.assertAlmostEqual(row.average_height, 1.675, places=4)
|
self.assertAlmostEqual(row.average_height, 1.675, places=4)
|
||||||
self.assertEqual(row.count, 2)
|
self.assertEqual(row.count, 2)
|
||||||
|
|
||||||
|
def test_aggregate_with_filter__funcs(self):
|
||||||
|
# When filter comes before aggregate
|
||||||
|
qs = Person.objects_in(self.database).filter(Person.first_name=='Warren').aggregate(average_height=F.avg(Person.height), count=F.count())
|
||||||
|
print(qs.as_sql())
|
||||||
|
self.assertEqual(qs.count(), 1)
|
||||||
|
for row in qs:
|
||||||
|
self.assertAlmostEqual(row.average_height, 1.675, places=4)
|
||||||
|
self.assertEqual(row.count, 2)
|
||||||
|
# When filter comes after aggregate
|
||||||
|
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count()).filter(Person.first_name=='Warren')
|
||||||
|
print(qs.as_sql())
|
||||||
|
self.assertEqual(qs.count(), 1)
|
||||||
|
for row in qs:
|
||||||
|
self.assertAlmostEqual(row.average_height, 1.675, places=4)
|
||||||
|
self.assertEqual(row.count, 2)
|
||||||
|
|
||||||
def test_aggregate_with_implicit_grouping(self):
|
def test_aggregate_with_implicit_grouping(self):
|
||||||
qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()')
|
qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()')
|
||||||
print(qs.as_sql())
|
print(qs.as_sql())
|
||||||
|
@ -436,6 +481,28 @@ class AggregateTestCase(TestCaseWithData):
|
||||||
qs = Mdl.objects_in(self.database).filter(the__next__number__gt=1)
|
qs = Mdl.objects_in(self.database).filter(the__next__number__gt=1)
|
||||||
self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1')
|
self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1')
|
||||||
|
|
||||||
|
def test_limit_by(self):
|
||||||
|
if self.database.server_version < (19, 17):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
|
# Test without offset
|
||||||
|
qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n='count()').\
|
||||||
|
order_by('first_name', '-height').limit_by(1, 'first_name')
|
||||||
|
self.assertEqual(qs.count(), 94)
|
||||||
|
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
||||||
|
# Test with funcs and fields
|
||||||
|
qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\
|
||||||
|
order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name))
|
||||||
|
self.assertEqual(qs.count(), 94)
|
||||||
|
self.assertEqual(list(qs)[89].last_name, 'Bowen')
|
||||||
|
# Test with limit and offset, also mixing LIMIT with LIMIT BY
|
||||||
|
qs = Person.objects_in(self.database).filter(height__gt=1.67).order_by('height', 'first_name')
|
||||||
|
limited_qs = qs.limit_by((0, 3), 'height')
|
||||||
|
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora'])
|
||||||
|
limited_qs = qs.limit_by((3, 3), 'height')
|
||||||
|
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay'])
|
||||||
|
limited_qs = qs.limit_by((6, 3), 'height')
|
||||||
|
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver'])
|
||||||
|
|
||||||
|
|
||||||
Color = Enum('Color', u'red blue green yellow brown white black')
|
Color = Enum('Color', u'red blue green yellow brown white black')
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import DatabaseException, ServerError
|
from infi.clickhouse_orm.database import DatabaseException, ServerError
|
||||||
from .base_test_with_data import *
|
from .base_test_with_data import *
|
||||||
|
@ -26,7 +25,9 @@ class ReadonlyTestCase(TestCaseWithData):
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
self._check_db_readonly_err(cm.exception, drop_table=True)
|
self._check_db_readonly_err(cm.exception, drop_table=True)
|
||||||
except ServerError as e:
|
except ServerError as e:
|
||||||
if e.code == 192 and e.message.startswith('Unknown user'):
|
if e.code == 192 and e.message.startswith('Unknown user'): # ClickHouse version < 20.3
|
||||||
|
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
||||||
|
elif e.code == 516 and e.message.startswith('readonly: Authentication failed'): # ClickHouse version >= 20.3
|
||||||
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
@ -35,7 +36,10 @@ class ReadonlyTestCase(TestCaseWithData):
|
||||||
|
|
||||||
def _check_db_readonly_err(self, exc, drop_table=None):
|
def _check_db_readonly_err(self, exc, drop_table=None):
|
||||||
self.assertEqual(exc.code, 164)
|
self.assertEqual(exc.code, 164)
|
||||||
if drop_table:
|
print(exc.message)
|
||||||
|
if self.database.server_version >= (20, 3):
|
||||||
|
self.assertTrue('Cannot execute query in readonly mode' in exc.message)
|
||||||
|
elif drop_table:
|
||||||
self.assertTrue(exc.message.startswith('Cannot drop table in readonly mode'))
|
self.assertTrue(exc.message.startswith('Cannot drop table in readonly mode'))
|
||||||
else:
|
else:
|
||||||
self.assertTrue(exc.message.startswith('Cannot insert into table in readonly mode'))
|
self.assertTrue(exc.message.startswith('Cannot insert into table in readonly mode'))
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import ServerError
|
from infi.clickhouse_orm.database import ServerError
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
|
@ -7,17 +6,21 @@ import pytz
|
||||||
|
|
||||||
class SimpleFieldsTest(unittest.TestCase):
|
class SimpleFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
|
||||||
|
# Valid values
|
||||||
|
dates = [
|
||||||
|
date(1970, 1, 1), datetime(1970, 1, 1), epoch,
|
||||||
|
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
|
||||||
|
'1970-01-01 00:00:00', '1970-01-17 00:00:17', '0000-00-00 00:00:00', 0,
|
||||||
|
'2017-07-26T08:31:05', '2017-07-26T08:31:05Z', '2017-07-26 08:31',
|
||||||
|
'2017-07-26T13:31:05+05', '2017-07-26 13:31:05+0500'
|
||||||
|
]
|
||||||
|
|
||||||
def test_datetime_field(self):
|
def test_datetime_field(self):
|
||||||
f = DateTimeField()
|
f = DateTimeField()
|
||||||
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
|
for value in self.dates:
|
||||||
# Valid values
|
|
||||||
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
|
|
||||||
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
|
|
||||||
'1970-01-01 00:00:00', '1970-01-17 00:00:17', '0000-00-00 00:00:00', 0,
|
|
||||||
'2017-07-26T08:31:05', '2017-07-26T08:31:05Z', '2017-07-26 08:31',
|
|
||||||
'2017-07-26T13:31:05+05', '2017-07-26 13:31:05+0500'):
|
|
||||||
dt = f.to_python(value, pytz.utc)
|
dt = f.to_python(value, pytz.utc)
|
||||||
self.assertEqual(dt.tzinfo, pytz.utc)
|
self.assertTrue(dt.tzinfo)
|
||||||
# Verify that conversion to and from db string does not change value
|
# Verify that conversion to and from db string does not change value
|
||||||
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
||||||
self.assertEqual(dt, dt2)
|
self.assertEqual(dt, dt2)
|
||||||
|
@ -27,6 +30,35 @@ class SimpleFieldsTest(unittest.TestCase):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
f.to_python(value, pytz.utc)
|
f.to_python(value, pytz.utc)
|
||||||
|
|
||||||
|
def test_datetime64_field(self):
|
||||||
|
f = DateTime64Field()
|
||||||
|
# Valid values
|
||||||
|
for value in self.dates + [
|
||||||
|
datetime(1970, 1, 1, microsecond=100000),
|
||||||
|
pytz.timezone('US/Eastern').localize(datetime(1970, 1, 1, microsecond=100000)),
|
||||||
|
'1970-01-01 00:00:00.1', '1970-01-17 00:00:17.1', '0000-00-00 00:00:00.1', 0.1,
|
||||||
|
'2017-07-26T08:31:05.1', '2017-07-26T08:31:05.1Z', '2017-07-26 08:31.1',
|
||||||
|
'2017-07-26T13:31:05.1+05', '2017-07-26 13:31:05.1+0500'
|
||||||
|
]:
|
||||||
|
dt = f.to_python(value, pytz.utc)
|
||||||
|
self.assertTrue(dt.tzinfo)
|
||||||
|
# Verify that conversion to and from db string does not change value
|
||||||
|
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
||||||
|
self.assertEqual(dt, dt2)
|
||||||
|
# Invalid values
|
||||||
|
for value in ('nope', '21/7/1999',
|
||||||
|
'2017-01 15:06:00', '2017-01-01X15:06:00', '2017-13-01T15:06:00'):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.to_python(value, pytz.utc)
|
||||||
|
|
||||||
|
def test_datetime64_field_precision(self):
|
||||||
|
for precision in range(1, 7):
|
||||||
|
f = DateTime64Field(precision=precision, timezone=pytz.utc)
|
||||||
|
dt = f.to_python(datetime(2000, 1, 1, microsecond=123456), pytz.utc)
|
||||||
|
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
|
||||||
|
m = round(123456, precision - 6) # round rightmost microsecond digits according to precision
|
||||||
|
self.assertEqual(dt2, dt.replace(microsecond=m))
|
||||||
|
|
||||||
def test_date_field(self):
|
def test_date_field(self):
|
||||||
f = DateField()
|
f = DateField()
|
||||||
epoch = date(1970, 1, 1)
|
epoch = date(1970, 1, 1)
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
import unittest
|
import unittest
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database
|
||||||
|
@ -16,6 +15,8 @@ class UUIDFieldsTest(unittest.TestCase):
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
|
|
||||||
def test_uuid_field(self):
|
def test_uuid_field(self):
|
||||||
|
if self.database.server_version < (18, 1):
|
||||||
|
raise unittest.SkipTest('ClickHouse version too old')
|
||||||
# Create a model
|
# Create a model
|
||||||
class TestModel(Model):
|
class TestModel(Model):
|
||||||
i = Int16Field()
|
i = Int16Field()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user