Finished Release v2.0.0

This commit is contained in:
Itai Shirav 2020-05-29 15:50:11 +03:00
commit 4f0624d35c
43 changed files with 6692 additions and 498 deletions

1
.gitignore vendored
View File

@ -59,6 +59,7 @@ src/infi/clickhouse_orm/__version__.py
bootstrap.py
htmldocs/
cover/
# tox
.tox/

1073
.noseids Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,17 @@
Change Log
==========
v2.0.0
------
- Dropped support for Python 2.x
- New flexible syntax for database expressions and functions
- Expressions as default values for model fields
- Support for IPv4 and IPv6 fields
- Automatic generation of models by inspecting existing tables
- Convenient ways to import ORM classes
See [What's new in version 2](docs/whats_new_in_version_2.md) for details.
v1.4.0
------
- Added primary_key parameter to MergeTree engines (M1hacka)

View File

@ -8,10 +8,7 @@ Let's jump right in with a simple example of monitoring CPU usage. First we need
connect to the database and create a table for the model:
```python
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
class CPUStats(Model):
@ -45,13 +42,14 @@ Querying the table is easy, using either the query builder or raw SQL:
```python
# Calculate what percentage of the time CPU 1 was over 95% busy
total = CPUStats.objects_in(db).filter(cpu_id=1).count()
busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count()
print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)
queryset = CPUStats.objects_in(db)
total = queryset.filter(CPUStats.cpu_id == 1).count()
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
# Calculate the average usage per CPU
for row in CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)'):
print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
```
To learn more please visit the [documentation](docs/toc.md).
To learn more please visit the [documentation](docs/toc.md).

View File

@ -14,8 +14,7 @@ install_requires = [
'iso8601 >= 0.1.12',
'pytz',
'requests',
'setuptools',
'six'
'setuptools'
]
version_file = src/infi/clickhouse_orm/__version__.py
description = A Python library for working with the ClickHouse database
@ -31,7 +30,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm
[isolated-python]
recipe = infi.recipe.python
version = v2.7.12.4
version = v3.8.0.2
[setup.py]
recipe = infi.recipe.template.version

File diff suppressed because it is too large Load Diff

99
docs/expressions.md Normal file
View File

@ -0,0 +1,99 @@
Expressions
===========
One of the ORM's core concepts is _expressions_, which are composed using functions, operators and model fields. Expressions are used in multiple places in the ORM:
- When defining [field options](field_options.md) - `default`, `alias` and `materialized`.
- In [table engine](table_engines.md) parameters for engines in the `MergeTree` family.
- In [queryset](querysets.md) methods such as `filter`, `exclude`, `order_by`, `aggregate` and `limit_by`.
Using Expressions
-----------------
Expressions usually include ClickHouse database functions, which are made available by the `F` class. Here's a simple function:
```python
from infi.clickhouse_orm import F
expr = F.today()
```
Functions that accept arguments can be composed, just like when using SQL:
```python
expr = F.toDayOfWeek(F.today())
```
You can see the SQL expression that is represented by an ORM expression by calling its `to_sql` method or converting it to a string:
```python
>>> print(expr)
toDayOfWeek(today())
```
### Operators
ORM expressions support Python's standard arithmetic operators, so you can compose expressions using `+`, `-`, `*`, `/`, `//` and `%`. For example:
```python
# A random integer between 1 and 10
F.rand() % 10 + 1
```
There is also support for comparison operators (`<`, `<=`, `==`, `>=`, `>`, `!=`) and logical operators (`&`, `|`, `~`, `^`) which are often used for filtering querysets:
```python
# Is it Friday the 13th?
(F.toDayOfWeek(F.today()) == 6) & (F.toDayOfMonth(F.today()) == 13)
```
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
### Referring to model fields
To refer to a model field inside an expression, use `<class>.<field>` syntax, for example:
```python
# Convert the temperature from Celsius to Fahrenheit
Sensor.temperature * 1.8 + 32
```
Inside model class definitions omit the class name:
```python
class Person(Model):
height_cm = Float32Field()
height_inch = Float32Field(alias=height_cm/2.54)
...
```
### Parametric functions
Some of ClickHouse's aggregate functions can accept one or more parameters - constants for initialization that affect the way the function works. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
```python
# Most common last names
F.topK(5)(Person.last_name)
# Find 90th, 95th and 99th percentile of heights
F.quantiles(0.9, 0.95, 0.99)(Person.height)
```
### Creating new "functions"
Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores:
```python
def normalize_string(s):
return F.replaceAll(F.upper(F.trimBoth(s)), ' ', '_')
```
Then we can use this expression anywhere we need it:
```python
class Event(Model):
code = StringField()
normalized_code = StringField(materialized=normalize_string(code))
```
### Which functions are available?
ClickHouse has many hundreds of functions, and new ones often get added. Many, but not all of them, are already covered by the ORM. If you encounter a function that the database supports but is not available in the `F` class, please report this via a GitHub issue. You can still use the function by providing its name:
```python
expr = F("someFunctionName", arg1, arg2, ...)
```
Note that higher-order database functions (those that use lambda expressions) are not supported.
---
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md)

112
docs/field_options.md Normal file
View File

@ -0,0 +1,112 @@
Field Options
=============
All field types accept the following arguments:
- default
- alias
- materialized
- readonly
- codec
Note that `default`, `alias` and `materialized` are mutually exclusive - you cannot use more than one of them in a single field.
## default
Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc.
The default value can be a Python value suitable for the field type, or an expression. For example:
```python
class Event(Model):
name = StringField(default="EVENT")
repeated = UInt32Field(default=1)
created = DateTimeField(default=F.now())
engine = Memory()
...
```
When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example:
```python
>>> event = Event()
>>> print(event.to_dict())
{'name': 'EVENT', 'repeated': 1, 'created': <NO_VALUE>}
```
:warning: Due to a bug in ClickHouse versions prior to 20.1.2.4, insertion of records with expressions for default values may fail.
## alias / materialized
The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk.
You can use any expression, and can refer to other model fields. For example:
```python
class Event(Model):
created = DateTimeField()
created_date = DateTimeField(materialized=F.toDate(created))
name = StringField()
normalized_name = StringField(alias=F.upper(F.trim(name)))
engine = Memory()
```
For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string:
```python
created_date = DateTimeField(materialized="toDate(created)")
```
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
Usage:
```python
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
```
When creating a model instance, any alias or materialized fields are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` since their real values can only be known after insertion to the database.
## codec
This attribute specifies the compression algorithm to use for the field (instead of the default data compression algorithm defined in server settings).
Supported compression algorithms:
| Codec | Argument | Comment
| -------------------- | -------------------------------------------| ----------------------------------------------------
| NONE | None | No compression.
| LZ4 | None | LZ4 compression.
| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9].
| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory.
| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise.
Codecs can be combined by separating their names with commas. The default database codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline).
Recommended usage for codecs:
- When values for particular metric do not differ significantly from point to point, delta-encoding allows to reduce disk space usage significantly.
- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data)
- Numeric types usually enjoy best compression rates with ZSTD
- String types enjoy good compression rates with LZ4HC
Example:
```python
class Stats(Model):
id = UInt64Field(codec='ZSTD(10)')
timestamp = DateTimeField(codec='Delta,ZSTD')
timestamp_date = DateField(codec='Delta(4),ZSTD(22)')
metadata_id = Int64Field(codec='LZ4')
status = StringField(codec='LZ4HC(10)')
calculation = NullableField(Float32Field(), codec='ZSTD')
alerts = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
engine = MergeTree('timestamp_date', ('id', 'timestamp'))
```
Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse.
## readonly
This attribute is set automatically for fields with `alias` or `materialized` attributes, you do not need to pass it yourself.
---
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)

View File

@ -1,35 +1,38 @@
Field Types
===========
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/)
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/sql-reference/data-types/)
Currently the following field types are supported:
The following field types are supported:
| Class | DB Type | Pythonic Type | Comments
| ------------------ | ---------- | --------------------- | -----------------------------------------------------
| StringField | String | str | Encoded as UTF-8 when written to ClickHouse
| FixedStringField | FixedString| str | Encoded as UTF-8 when written to ClickHouse
| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC
| Int8Field | Int8 | int | Range -128 to 127
| Int16Field | Int16 | int | Range -32768 to 32767
| Int32Field | Int32 | int | Range -2147483648 to 2147483647
| Int64Field | Int64 | int | Range -9223372036854775808 to 9223372036854775807
| UInt8Field | UInt8 | int | Range 0 to 255
| UInt16Field | UInt16 | int | Range 0 to 65535
| UInt32Field | UInt32 | int | Range 0 to 4294967295
| UInt64Field | UInt64 | int | Range 0 to 18446744073709551615
| Float32Field | Float32 | float |
| Float64Field | Float64 | float |
| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field
| Decimal32Field | Decimal32 | Decimal | Ditto
| Decimal64Field | Decimal64 | Decimal | Ditto
| Decimal128Field | Decimal128 | Decimal | Ditto
| UUIDField | UUID | uuid.UUID |
| IPv4Field | IPv4 | ipaddress.IPv4Address |
| IPv6Field | IPv6 | ipaddress.IPv6Address |
| Enum8Field | Enum8 | Enum | See below
| Enum16Field | Enum16 | Enum | See below
| ArrayField | Array | list | See below
| NullableField | Nullable | See below | See below
| Class | DB Type | Pythonic Type | Comments
| ------------------ | ---------- | ------------------- | -----------------------------------------------------
| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC
| Int8Field | Int8 | int | Range -128 to 127
| Int16Field | Int16 | int | Range -32768 to 32767
| Int32Field | Int32 | int | Range -2147483648 to 2147483647
| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807
| UInt8Field | UInt8 | int | Range 0 to 255
| UInt16Field | UInt16 | int | Range 0 to 65535
| UInt32Field | UInt32 | int | Range 0 to 4294967295
| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615
| Float32Field | Float32 | float |
| Float64Field | Float64 | float |
| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field
| Decimal32Field | Decimal32 | Decimal | Ditto
| Decimal64Field | Decimal64 | Decimal | Ditto
| Decimal128Field | Decimal128 | Decimal | Ditto
| UUIDField | UUID | Decimal |
| Enum8Field | Enum8 | Enum | See below
| Enum16Field | Enum16 | Enum | See below
| ArrayField | Array | list | See below
| NullableField | Nullable | See below | See below
DateTimeField and Time Zones
----------------------------
@ -43,29 +46,26 @@ A `DateTimeField` can be assigned values from one of the following types:
The assigned value always gets converted to a timezone-aware `datetime` in UTC. If the assigned value is a timezone-aware `datetime` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse
version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion.
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion.
Working with enum fields
------------------------
`Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member.
Python 3.4 and higher supports Enums natively. When using previous Python versions you need to install the enum34 library.
Example of a model with an enum field:
```python
Gender = Enum('Gender', 'male female unspecified')
class Person(models.Model):
class Person(Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
gender = fields.Enum32Field(Gender)
first_name = StringField()
last_name = StringField()
birthday = DateField()
gender = Enum32Field(Gender)
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
```
@ -76,64 +76,34 @@ Working with array fields
You can create array fields containing any data type, for example:
```python
class SensorData(models.Model):
class SensorData(Model):
date = fields.DateField()
temperatures = fields.ArrayField(fields.Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field())
date = DateField()
temperatures = ArrayField(Float32Field())
humidity_levels = ArrayField(UInt8Field())
engine = engines.MergeTree('date', ('date',))
engine = MergeTree('date', ('date',))
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
```
Note that multidimensional arrays are not supported yet by the ORM.
Working with materialized and alias fields
------------------------------------------
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
See documentation [here](https://clickhouse.yandex/docs/en/query_language/queries/#default-values).
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
Usage:
```python
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
```
Working with nullable fields
----------------------------
[ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable).
[ClickHouse provides a NULL value support](https://clickhouse.tech/docs/en/sql-reference/data-types/nullable/).
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
```python
class EventData(models.Model):
class EventData(Model):
date = fields.DateField()
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
score = fields.NullableField(fields.UInt8Field())
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
date = DateField()
comment = NullableField(StringField(), extra_null_values={''})
score = NullableField(UInt8Field())
serie = NullableField(ArrayField(UInt8Field()))
engine = engines.MergeTree('date', ('date',))
engine = MergeTree('date', ('date',))
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
@ -149,52 +119,12 @@ NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot
NOTE: Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases.
Working with field compression codecs
-------------------------------------
Besides default data compression, defined in server settings, per-field specification is also available.
Supported compression algorithms:
| Codec | Argument | Comment
| -------------------- | -------------------------------------------| ----------------------------------------------------
| NONE | None | No compression.
| LZ4 | None | LZ4 compression.
| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9].
| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory.
| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise.
Codecs can be combined in a pipeline. Default table codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline).
Recommended usage for codecs:
- Usually, values for particular metric, stored in path does not differ significantly from point to point. Using delta-encoding allows to reduce disk space usage significantly.
- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data)
- Numeric types usually enjoy best compression rates with ZSTD
- String types enjoy good compression rates with LZ4HC
Usage:
```python
class Stats(models.Model):
id = fields.UInt64Field(codec='ZSTD(10)')
timestamp = fields.DateTimeField(codec='Delta,ZSTD')
timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)')
metadata_id = fields.Int64Field(codec='LZ4')
status = fields.StringField(codec='LZ4HC(10)')
calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD')
alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC')
engine = MergeTree('timestamp_date', ('id', 'timestamp'))
```
Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse.
Working with LowCardinality fields
----------------------------------
Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries
and compaction of columns for low entropy data.
[More specifically](https://github.com/yandex/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
[More specifically](https://github.com/tech/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently.
LowCardinality works independently of (generic) fields compression.
@ -203,19 +133,16 @@ The compression ratios of LowCardinality fields for text data may be significant
LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form.
You can find further information about LowCardinality in [this presentation](https://github.com/yandex/clickhouse-presentations/blob/master/meetup19/string_optimization.pdf).
You can find further information [here](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/).
Usage example:
```python
class LowCardinalityModel(models.Model):
date = fields.DateField()
int32 = fields.LowCardinalityField(fields.Int32Field())
float32 = fields.LowCardinalityField(fields.Float32Field())
string = fields.LowCardinalityField(fields.StringField())
nullable = fields.LowCardinalityField(fields.NullableField(fields.StringField()))
array = fields.ArrayField(fields.LowCardinalityField(fields.UInt64Field()))
engine = MergeTree('date', ('date',))
class LowCardinalityModel(Model):
date = DateField()
string = LowCardinalityField(StringField())
nullable = LowCardinalityField(NullableField(StringField()))
array = ArrayField(LowCardinalityField(DateField()))
...
```
Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example.
@ -232,7 +159,7 @@ For example, we can create a BooleanField which will hold `True` and `False` val
Here's the full implementation:
```python
from infi.clickhouse_orm.fields import Field
from infi.clickhouse_orm import Field
class BooleanField(Field):
@ -258,4 +185,4 @@ class BooleanField(Field):
---
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)
[<< Field Options](field_options.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)

View File

@ -0,0 +1,89 @@
Importing ORM Classes
=====================
The ORM supports different styles of importing and referring to its classes, so choose what works for you from the options below.
Importing Everything
--------------------
It is safe to use `import *` from `infi.clickhouse_orm` or its submodules. Only classes that are needed by users of the ORM will get imported, and nothing else:
```python
from infi.clickhouse_orm import *
```
This is exactly equivalent to the following import statements:
```python
from infi.clickhouse_orm.database import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.funcs import *
from infi.clickhouse_orm.migrations import *
from infi.clickhouse_orm.models import *
from infi.clickhouse_orm.query import *
from infi.clickhouse_orm.system_models import *
```
By importing everything, all of the ORM's public classes can be used directly. For example:
```python
from infi.clickhouse_orm import *
class Event(Model):
name = StringField(default="EVENT")
repeated = UInt32Field(default=1)
created = DateTimeField(default=F.now())
engine = Memory()
```
Importing Everything into a Namespace
-------------------------------------
To prevent potential name clashes and to make the code more readable, you can import the ORM's classes into a namespace of your choosing, e.g. `orm`. For brevity, it is recommended to import the `F` class explicitly:
```python
import infi.clickhouse_orm as orm
from infi.clickhouse_orm import F
class Event(orm.Model):
name = orm.StringField(default="EVENT")
repeated = orm.UInt32Field(default=1)
created = orm.DateTimeField(default=F.now())
engine = orm.Memory()
```
Importing Specific Submodules
-----------------------------
It is possible to import only the submodules you need, and use their names to qualify the ORM's class names. This option is more verbose, but makes it clear where each class comes from. For example:
```python
from infi.clickhouse_orm import models, fields, engines, F
class Event(models.Model):
name = fields.StringField(default="EVENT")
repeated = fields.UInt32Field(default=1)
created = fields.DateTimeField(default=F.now())
engine = engines.Memory()
```
Importing Specific Classes
--------------------------
If you prefer, you can import only the specific ORM classes that you need directly from `infi.clickhouse_orm`:
```python
from infi.clickhouse_orm import Model, StringField, UInt32Field, DateTimeField, F, Memory
class Event(Model):
name = StringField(default="EVENT")
repeated = UInt32Field(default=1)
created = DateTimeField(default=F.now())
engine = Memory()
```
---
[<< Expressions](expressions.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md)

View File

@ -1,9 +1,9 @@
Overview
========
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it.
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.tech/). It allows you to define model classes whose instances can be written to the database and read from it.
It was tested on Python 2.7 and 3.5.
Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+.
Installation
------------

View File

@ -10,16 +10,16 @@ Defining Models
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
from infi.clickhouse_orm import models, fields, engines
from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
class Person(models.Model):
class Person(Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
height = fields.Float32Field()
first_name = StringField()
last_name = StringField()
birthday = DateField()
height = Float32Field()
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
@ -29,41 +29,45 @@ A model must have an `engine`, which determines how its table is stored on disk
Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter:
first_name = fields.StringField(default="anonymous")
first_name = StringField(default="anonymous")
For additional details see [here](field_options.md).
### Null values
To allow null values in a field, wrap it inside a `NullableField`:
birthday = fields.NullableField(fields.DateField())
birthday = NullableField(DateField())
In this case, the default value for that field becomes `null` unless otherwise specified.
For more information about `NullableField` see [Field Types](field_types.md).
### Materialized fields
The value of a materialized field is calculated from other fields in the model. For example:
year_born = fields.Int16Field(materialized="toYear(birthday)")
year_born = Int16Field(materialized=F.toYear(birthday))
Materialized fields are read-only, meaning that their values are not sent to the database when inserting records.
It is not possible to specify a default value for a materialized field.
For additional details see [here](field_options.md).
### Alias fields
An alias field is a field whose value is calculated by ClickHouse on the fly, as a function of other fields. It is not physically stored by the database. For example:
weekday_born = field.UInt8Field(alias="toDayOfWeek(birthday)")
weekday_born = field.UInt8Field(alias=F.toDayOfWeek(birthday))
Alias fields are read-only, meaning that their values are not sent to the database when inserting records.
It is not possible to specify a default value for an alias field.
For additional details see [here](field_options.md).
### Table Names
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
class Person(models.Model):
class Person(Model):
...
@ -96,7 +100,7 @@ Inserting to the Database
To write your instances to ClickHouse, you need a `Database` instance:
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm import Database
db = Database('my_test_db')
@ -121,19 +125,19 @@ Reading from the Database
Loading model instances from the database is simple:
for person in db.select("SELECT * FROM my_test_db.person", model_class=Person):
print person.first_name, person.last_name
print(person.first_name, person.last_name)
Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`.
It is possible to select only a subset of the columns, and the rest will receive their default values:
for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person):
print person.first_name
print(person.first_name)
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'):
print person.first_name
for person in Person.objects_in(db).filter(Person.last_name == 'Smith').only('first_name'):
print(person.first_name)
See [Querysets](querysets.md) for more information.
@ -144,10 +148,16 @@ Reading without a Model
When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query:
for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"):
print row.max_height
print(row.max_height)
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
It is also possible to generate a model class on the fly for an existing table in the database using `get_model_for_table`. This is particularly useful for querying system tables, for example:
QueryLog = db.get_model_for_table('query_log', system_table=True)
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
print(row.query)
SQL Placeholders
----------------
@ -180,9 +190,9 @@ It is possible to paginate through model instances:
>>> order_by = 'first_name, last_name'
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
>>> print page.number_of_objects
>>> print(page.number_of_objects)
2507
>>> print page.pages_total
>>> print(page.pages_total)
251
>>> for person in page.objects:
>>> # do something
@ -204,4 +214,4 @@ Note that `order_by` must be chosen so that the ordering is unique, otherwise th
---
[<< Overview](index.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md)
[<< Overview](index.md) | [Table of Contents](toc.md) | [Expressions >>](expressions.md)

View File

@ -1,3 +1,4 @@
Querysets
=========
@ -8,7 +9,7 @@ A queryset is an object that represents a database query using a specific Model.
This queryset matches all Person instances in the database. You can get these instances using iteration:
for person in qs:
print person.first_name, person.last_name
print(person.first_name, person.last_name)
Filtering
---------
@ -16,31 +17,76 @@ Filtering
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01')
>>> qs = qs.filter(F.like(Person.first_name, 'V%')).exclude(Person.birthday < '2000-01-01')
>>> qs.conditions_as_sql()
u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
It is possible to specify several fields to filter or exclude by:
It is possible to specify several expressions to filter or exclude by, and they will be ANDed together:
>>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75)
>>> qs = Person.objects_in(database).filter(Person.last_name == 'Smith', Person.height > 1.75)
>>> qs.conditions_as_sql()
u"last_name = 'Smith' AND height > 1.75"
"last_name = 'Smith' AND height > 1.75"
For filters with compound conditions you can use `Q` objects inside `filter` with overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
>>> qs = Person.objects_in(database).filter((Q(first_name='Ciaran', last_name='Carver') | Q(height_lte=1.8)) & ~Q(first_name='David'))
>>> qs.conditions_as_sql()
u"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
For better aggregation performance you can add them to `PREWHERE` section using `prewhere=True` parameter
For compound conditions you can use the overloaded operators `&` (AND), `|` (OR) and `~` (NOT):
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V', prewhere=True)
>>> qs.conditions_as_sql(prewhere=True)
u"first_name LIKE 'V%'"
>>> qs = qs.filter(((Person.first_name == 'Ciaran') & (Person.last_name == 'Carver')) | (Person.height <= 1.8) & ~(Person.first_name = 'David'))
>>> qs.conditions_as_sql()
"((first_name = 'Ciaran' AND last_name = 'Carver') OR height <= 1.8) AND (NOT (first_name = 'David'))"
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
Note that Python's bitwise operators (`&`, `|`, `~`, `^`) have higher precedence than comparison operators, so always use parentheses when combining these two types of operators in an expression. Otherwise the resulting SQL might be different than what you would expect.
### Using `IN` and `NOT IN`
Filtering queries using ClickHouse's `IN` and `NOT IN` operators requires using the `isIn` and `isNotIn` functions (trying to use Python's `in` keyword will not work!).
For example:
```python
# Is it Monday, Tuesday or Wednesday?
F.isIn(F.toDayOfWeek(F.now()), [1, 2, 3])
# This will not work:
F.toDayOfWeek(F.now()) in [1, 2, 3]
```
In case of model fields, there is a simplified syntax:
```python
# Filtering using F.isIn:
qs.filter(F.isIn(Person.first_name, ['Robert', 'Rob', 'Robbie']))
# Simpler syntax using isIn directly on the field:
qs.filter(Person.first_name.isIn(['Robert', 'Rob', 'Robbie']))
```
The `isIn` and `isNotIn` functions expect either a list/tuple of values, or another queryset (a subquery). For example if we want to select only people with Irish last names:
```python
# Last name is in a list of values
qs = Person.objects_in(database).filter(Person.last_name.isIn(["Murphy", "O'Sullivan"]))
# Last name is in a subquery
subquery = IrishLastName.objects_in(database).only("name")
qs = Person.objects_in(database).filter(Person.last_name.isIn(subquery))
```
### Specifying PREWHERE conditions
By default conditions from `filter` and `exclude` methods are add to `WHERE` clause.
For better aggregation performance you can add them to `PREWHERE` section by adding a `prewhere=True` parameter:
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(F.like(Person.first_name, 'V%'), prewhere=True)
>>> qs.conditions_as_sql(prewhere=True)
"first_name LIKE 'V%'"
### Old-style filter conditions
Prior to version 2 of the ORM, filtering conditions were limited to a predefined set of operators, and complex expressions were not supported. This old syntax is still available, so you can use it alongside or even intermixed with new-style functions and expressions.
The old syntax uses keyword arguments to the `filter` and `exclude` methods, that are built as `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. For example:
```python
qs = Position.objects.in(database)
# New style
qs = qs.filter(Position.x > 100, Position.y < 20, Position.terrain == 'water')
# Old style
qs = qs.filter(x__gt=100, y__lt=20, terrain='water')
```
Below are all the supported operators.
| Operator | Equivalent SQL | Comments |
| -------- | -------------------------------------------- | ---------------------------------- |
@ -51,8 +97,8 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| `lt` | `field < value` | |
| `lte` | `field <= value` | |
| `between` | `field BETWEEN value1 AND value2` | |
| `in` | `field IN (values)` | See below |
| `not_in` | `field NOT IN (values)` | See below |
| `in` | `field IN (values)` | |
| `not_in` | `field NOT IN (values)` | |
| `contains` | `field LIKE '%value%'` | For string fields only |
| `startswith` | `field LIKE 'value%'` | For string fields only |
| `endswith` | `field LIKE '%value'` | For string fields only |
@ -61,27 +107,6 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
### Using the `in` Operator
The `in` and `not_in` operators expect one of three types of values:
* A list or tuple of simple values
* A string, which is used verbatim as the contents of the parentheses
* Another queryset (subquery)
For example if we want to select only people with Irish last names:
# A list of simple values
qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"])
# A string
subquery = "SELECT name from $db.irishlastname"
qs = Person.objects_in(database).filter(last_name__in=subquery)
# A queryset
subquery = IrishLastName.objects_in(database).only("name")
qs = Person.objects_in(database).filter(last_name__in=subquery)
Counting and Checking Existence
-------------------------------
@ -128,8 +153,8 @@ Adds a DISTINCT clause to the query, meaning that any duplicate rows in the resu
Final
--------
This method can be used only with CollapsingMergeTree engine.
Adds a FINAL modifier to the query, meaning data is selected fully "collapsed" by sign field.
This method can be used only with `CollapsingMergeTree` engine.
Adds a FINAL modifier to the query, meaning that the selected data is fully "collapsed" by the engine's sign field.
>>> Person.objects_in(database).count()
100
@ -144,8 +169,7 @@ It is possible to get a specific item from the queryset by index:
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first = qs[0]
It is also possible to get a range a instances using a slice. This returns a queryset,
that you can either iterate over or convert to a list.
It is also possible to get a range a instances using a slice. This returns a queryset, that you can either iterate over or convert to a list.
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first_ten_people = list(qs[:10])
@ -153,7 +177,7 @@ that you can either iterate over or convert to a list.
You should use `order_by` to ensure a consistent ordering of the results.
Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`.
Trying to use negative indexes or a slice with a step (e.g. [0 : 100 : 2]) is not supported and will raise an `AssertionError`.
Pagination
----------
@ -162,9 +186,9 @@ Similar to `Database.paginate`, you can go over the queryset results one page at
>>> qs = Person.objects_in(database).order_by('last_name', 'first_name')
>>> page = qs.paginate(page_num=1, page_size=10)
>>> print page.number_of_objects
>>> print(page.number_of_objects)
2507
>>> print page.pages_total
>>> print(page.pages_total)
251
>>> for person in page.objects:
>>> # do something
@ -184,38 +208,44 @@ Aggregation
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
>>> qs = Person.objects_in(database).aggregate(average_height='avg(height)')
>>> print qs.count()
>>> qs = Person.objects_in(database).aggregate(average_height=F.avg(Person.height))
>>> print(qs.count())
1
>>> for row in qs: print row.average_height
>>> for row in qs: print(row.average_height)
1.71
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
You can pass fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
qs = Person.objects_in(database).aggregate('last_name', num='count()')
qs = Person.objects_in(database).aggregate(Person.last_name, num=F.count())
The underlying SQL query would be something like this:
SELECT last_name, count() AS num FROM person GROUP BY last_name
SELECT last_name, count() AS num
FROM person
GROUP BY last_name
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday')
qs = Person.objects_in(database).aggregate(weekday=F.toDayOfWeek(Person.birthday), num=F.count()).group_by('weekday')
This queryset is translated to:
SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday
SELECT toDayOfWeek(birthday) AS weekday, count() AS num
FROM person
GROUP BY weekday
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the aggregated queryset on calculated fields, only on fields that exist in the model.
### Adding totals
If you limit aggregation results, it might be useful to get total aggregation values for all rows.
To achieve this, you can use `with_totals` method. It will return extra row (last) with
values aggregated for all rows suitable for filters.
qs = Person.objects_in(database).aggregate('first_name', num='count()').with_totals().order_by('-count')[:3]
>>> print qs.count()
qs = Person.objects_in(database).aggregate(Person.first_name, num=F.count()).with_totals().order_by('-count')[:3]
>>> print(qs.count())
4
>>> for row in qs:
>>> print("'{}': {}".format(row.first_name, row.count))
@ -225,4 +255,4 @@ values aggregated for all rows suitable for filters.
---
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)
[<< Importing ORM Classes](importing_orm_classes.md) | [Table of Contents](toc.md) | [Field Options >>](field_options.md)

View File

@ -448,7 +448,7 @@ Extends Engine
Extends Engine
Here we define Buffer engine
Read more here https://clickhouse.yandex/reference_en.html#Buffer
Read more here https://clickhouse.tech/reference_en.html#Buffer
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)

View File

@ -1,7 +1,7 @@
System Models
=============
[Clickhouse docs](https://clickhouse.yandex/docs/en/system_tables/).
[Clickhouse docs](https://clickhouse.tech/docs/en/operations/system-tables/).
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
@ -14,7 +14,7 @@ Currently the following system models are supported:
Partitions and Parts
--------------------
[ClickHouse docs](https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts).
[ClickHouse docs](https://clickhouse.tech/docs/en/sql-reference/statements/alter/#alter_manipulations-with-partitions).
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
@ -30,8 +30,7 @@ A partition in a table is data for a single calendar month. Table "system.parts"
Usage example:
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.system_models import SystemPart
from infi.clickhouse_orm import Database, SystemPart
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
if len(partitions) > 0:

View File

@ -1,7 +1,7 @@
Table Engines
=============
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/table_engines/)
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/engines/table-engines/)
Each model must have an engine instance, used when creating the table in ClickHouse.
@ -24,11 +24,11 @@ Simple Engines
`TinyLog`, `Log` and `Memory` engines do not require any parameters:
engine = engines.TinyLog()
engine = TinyLog()
engine = engines.Log()
engine = Log()
engine = engines.Memory()
engine = Memory()
Engines in the MergeTree Family
@ -36,28 +36,28 @@ Engines in the MergeTree Family
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'))
engine = MergeTree('EventDate', ('CounterID', 'EventDate'))
You may also provide a sampling expression:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)')
engine = MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr=F.intHash32(UserID))
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
engine = CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
For a `SummingMergeTree` you can optionally specify the summing columns:
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
summing_cols=('Shows', 'Clicks', 'Cost'))
engine = SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
summing_cols=('Shows', 'Clicks', 'Cost'))
For a `ReplacingMergeTree` you can optionally specify the version column:
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
engine = ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
### Custom partitioning
ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310
ClickHouse supports [custom partitioning](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/) expressions since version 1.1.54310
You can use custom partitioning with any `MergeTree` family engine.
To set custom partitioning:
@ -69,12 +69,12 @@ Standard monthly partitioning by date column can be specified using the `toYYYYM
Example:
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=('toYYYYMM(EventDate)', 'BannerID'))
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=(F.toYYYYMM(EventDate), 'BannerID'))
### Primary key
ClickHouse supports [custom primary key](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/#primary-keys-and-indexes-in-queries/) expressions since version 1.1.54310
ClickHouse supports [custom primary key](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#primary-keys-and-indexes-in-queries) expressions since version 1.1.54310
You can use custom primary key with any `MergeTree` family engine.
To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built.
@ -83,34 +83,34 @@ By default primary key is equal to order_by expression
Example:
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=('toYYYYMM(EventDate)', 'BannerID'), primary_key=('OrderID',))
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=(F.toYYYYMM(EventDate), 'BannerID'), primary_key=('OrderID',))
### Data Replication
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'),
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
replica_name='{replica}')
engine = MergeTree('EventDate', ('CounterID', 'EventDate'),
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
replica_name='{replica}')
Buffer Engine
-------------
A `Buffer` engine is only used in conjunction with a `BufferModel`.
The model should be a subclass of both `models.BufferModel` and the main model.
The model should be a subclass of both `BufferModel` and the main model.
The main model is also passed to the engine:
class PersonBuffer(models.BufferModel, Person):
class PersonBuffer(BufferModel, Person):
engine = engines.Buffer(Person)
engine = Buffer(Person)
Additional buffer parameters can optionally be specified:
engine = engines.Buffer(Person, num_layers=16, min_time=10,
max_time=100, min_rows=10000, max_rows=1000000,
min_bytes=10000000, max_bytes=100000000)
engine = Buffer(Person, num_layers=16, min_time=10,
max_time=100, min_rows=10000, max_rows=1000000,
min_bytes=10000000, max_bytes=100000000)
Then you can insert objects into Buffer model and they will be handled by ClickHouse properly:
@ -123,14 +123,14 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
Merge Engine
-------------
[ClickHouse docs](https://clickhouse.yandex/docs/en/table_engines/merge/)
[ClickHouse docs](https://clickhouse.tech/docs/en/operations/table_engines/merge/)
A `Merge` engine is only used in conjunction with a `MergeModel`.
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
class MergeTable(models.MergeModel):
engine = engines.Merge('^table_prefix')
class MergeTable(MergeModel):
engine = Merge('^table_prefix')
---

View File

@ -20,7 +20,9 @@
* [Querysets](querysets.md#querysets)
* [Filtering](querysets.md#filtering)
* [Using the in Operator](querysets.md#using-the-in-operator)
* [Using IN and NOT IN](querysets.md#using-in-and-not-in)
* [Specifying PREWHERE conditions](querysets.md#specifying-prewhere-conditions)
* [Old-style filter conditions](querysets.md#old-style-filter-conditions)
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields)
@ -29,14 +31,19 @@
* [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation)
* [Adding totals](querysets.md#adding-totals)
* [Field Options](field_options.md#field-options)
* [default](field_options.md#default)
* [alias / materialized](field_options.md#alias-/-materialized)
* [codec](field_options.md#codec)
* [readonly](field_options.md#readonly)
* [Field Types](field_types.md#field-types)
* [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones)
* [Working with enum fields](field_types.md#working-with-enum-fields)
* [Working with array fields](field_types.md#working-with-array-fields)
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
* [Working with field compression codecs](field_types.md#working-with-field-compression-codecs)
* [Working with LowCardinality fields](field_types.md#working-with-lowcardinality-fields)
* [Creating custom field types](field_types.md#creating-custom-field-types)
@ -44,6 +51,7 @@
* [Simple Engines](table_engines.md#simple-engines)
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
* [Custom partitioning](table_engines.md#custom-partitioning)
* [Primary key](table_engines.md#primary-key)
* [Data Replication](table_engines.md#data-replication)
* [Buffer Engine](table_engines.md#buffer-engine)
* [Merge Engine](table_engines.md#merge-engine)
@ -60,14 +68,14 @@
* [Tests](contributing.md#tests)
* [Class Reference](class_reference.md#class-reference)
* [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database)
* [infi.clickhouse_orm.database](class_reference.md#inficlickhouse_ormdatabase)
* [Database](class_reference.md#database)
* [DatabaseException](class_reference.md#databaseexception)
* [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models)
* [infi.clickhouse_orm.models](class_reference.md#inficlickhouse_ormmodels)
* [Model](class_reference.md#model)
* [BufferModel](class_reference.md#buffermodel)
* [DistributedModel](class_reference.md#distributedmodel)
* [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields)
* [infi.clickhouse_orm.fields](class_reference.md#inficlickhouse_ormfields)
* [ArrayField](class_reference.md#arrayfield)
* [BaseEnumField](class_reference.md#baseenumfield)
* [BaseFloatField](class_reference.md#basefloatfield)
@ -84,6 +92,8 @@
* [FixedStringField](class_reference.md#fixedstringfield)
* [Float32Field](class_reference.md#float32field)
* [Float64Field](class_reference.md#float64field)
* [IPv4Field](class_reference.md#ipv4field)
* [IPv6Field](class_reference.md#ipv6field)
* [Int16Field](class_reference.md#int16field)
* [Int32Field](class_reference.md#int32field)
* [Int64Field](class_reference.md#int64field)
@ -96,7 +106,7 @@
* [UInt64Field](class_reference.md#uint64field)
* [UInt8Field](class_reference.md#uint8field)
* [UUIDField](class_reference.md#uuidfield)
* [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines)
* [infi.clickhouse_orm.engines](class_reference.md#inficlickhouse_ormengines)
* [Engine](class_reference.md#engine)
* [TinyLog](class_reference.md#tinylog)
* [Log](class_reference.md#log)
@ -108,7 +118,10 @@
* [CollapsingMergeTree](class_reference.md#collapsingmergetree)
* [SummingMergeTree](class_reference.md#summingmergetree)
* [ReplacingMergeTree](class_reference.md#replacingmergetree)
* [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query)
* [infi.clickhouse_orm.query](class_reference.md#inficlickhouse_ormquery)
* [QuerySet](class_reference.md#queryset)
* [AggregateQuerySet](class_reference.md#aggregatequeryset)
* [Q](class_reference.md#q)
* [infi.clickhouse_orm.funcs](class_reference.md#inficlickhouse_ormfuncs)
* [F](class_reference.md#f)

View File

@ -0,0 +1,58 @@
What's New in Version 2
=======================
## Python 3.5+ Only
This version of the ORM no longer support Python 2.
## New flexible syntax for database expressions and functions
Expressions that use model fields, database functions and Python operators are now first-class citizens of the ORM. They provide infinite expressivity and flexibility when defining models and generating queries.
Example of expressions in model definition:
```python
class Temperature(Model):
station_id = UInt16Field()
timestamp = DateTimeField(default=F.now()) # function as default value
degrees_celsius = Float32Field()
degrees_fahrenheit = Float32Field(alias=degrees_celsius * 1.8 + 32) # expression as field alias
# expressions in engine definition
engine = MergeTree(partition_key=[F.toYYYYMM(timestamp)], order_by=[station_id, timestamp])
```
Example of expressions in queries:
```python
db = Database('default')
start = F.toStartOfMonth(F.now())
expr = (Temperature.timestamp > start) & (Temperature.station_id == 123) & (Temperature.degrees_celsius > 30)
for t in Temperature.objects_in(db).filter(expr):
print(t.timestamp, t.degrees_celsius)
```
See [Expressions](expressions.md).
## Support for IPv4 and IPv6 fields
Two new fields classes were added: `IPv4Field` and `IPv6Field`. Their values are represented by Python's `ipaddress.IPv4Address` and `ipaddress.IPv6Address`.
See [Field Types](field_types.md).
## Automatic generation of models by inspecting existing tables
It is now easy to generate a model class on the fly for an existing table in the database using `Database.get_model_for_table`. This is particularly useful for querying system tables, for example:
```python
QueryLog = db.get_model_for_table('query_log', system_table=True)
for row in QueryLog.objects_in(db).filter(QueryLog.query_duration_ms > 10000):
print(row.query)
```
## Convenient ways to import ORM classes
You can now import all ORM classes directly from `infi.clickhouse_orm`, without worrying about sub-modules. For example:
```python
from infi.clickhouse_orm import Database, Model, StringField, DateTimeField, MergeTree
```
See [Importing ORM Classes](importing_orm_classes.md).

View File

@ -125,6 +125,8 @@ if __name__ == '__main__':
from infi.clickhouse_orm import engines
from infi.clickhouse_orm import models
from infi.clickhouse_orm import query
from infi.clickhouse_orm import funcs
from infi.clickhouse_orm import system_models
print('Class Reference')
print('===============')
@ -133,4 +135,6 @@ if __name__ == '__main__':
module_doc([models.Model, models.BufferModel, models.DistributedModel])
module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False)
module_doc([engines.Engine] + all_subclasses(engines.Engine), False)
module_doc([query.QuerySet, query.AggregateQuerySet])
module_doc([query.QuerySet, query.AggregateQuerySet, query.Q])
module_doc([funcs.F])
module_doc([system_models.SystemPart])

View File

@ -9,6 +9,7 @@ printf "# Table of Contents\n\n" > toc.md
generate_one "index.md"
generate_one "models_and_databases.md"
generate_one "querysets.md"
generate_one "field_options.md"
generate_one "field_types.md"
generate_one "table_engines.md"
generate_one "schema_migrations.md"

View File

@ -1,4 +1,4 @@
from HTMLParser import HTMLParser
from html.parser import HTMLParser
import sys
@ -17,8 +17,8 @@ class HeadersToMarkdownParser(HTMLParser):
def handle_endtag(self, tag):
if tag.lower() in HEADER_TAGS:
indent = ' ' * int(self.inside[1])
fragment = self.text.lower().replace(' ', '-')
print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)
fragment = self.text.lower().replace(' ', '-').replace('.', '')
print('%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment))
self.inside = None
self.text = ''
@ -28,4 +28,4 @@ class HeadersToMarkdownParser(HTMLParser):
HeadersToMarkdownParser().feed(sys.stdin.read())
print
print('')

View File

@ -1 +1,13 @@
__import__("pkg_resources").declare_namespace(__name__)
from infi.clickhouse_orm.database import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.funcs import *
from infi.clickhouse_orm.migrations import *
from infi.clickhouse_orm.models import *
from infi.clickhouse_orm.query import *
from infi.clickhouse_orm.system_models import *
from inspect import isclass
__all__ = [c.__name__ for c in locals().values() if isclass(c)]

View File

@ -8,7 +8,6 @@ from .utils import escape, parse_tsv, import_submodules
from math import ceil
import datetime
from string import Template
from six import PY3, string_types
import pytz
import logging
@ -166,6 +165,24 @@ class Database(object):
r = self._send(sql % (self.db_name, model_class.table_name()))
return r.text.strip() == '1'
def get_model_for_table(self, table_name, system_table=False):
'''
Generates a model class from an existing table in the database.
This can be used for querying tables which don't have a corresponding model class,
for example system tables.
- `table_name`: the table to create a model for
- `system_table`: whether the table is a system table, or belongs to the current database
'''
db_name = 'system' if system_table else self.db_name
sql = "DESCRIBE `%s`.`%s` FORMAT TSV" % (db_name, table_name)
lines = self._send(sql).iter_lines()
fields = [parse_tsv(line)[:2] for line in lines]
model = ModelBase.create_ad_hoc_model(fields, table_name)
if system_table:
model._system = model._readonly = True
return model
def add_setting(self, name, value):
'''
Adds a database setting that will be sent with every request.
@ -174,7 +191,7 @@ class Database(object):
The name must be string, and the value is converted to string in case
it isn't. To remove a setting, pass `None` as the value.
'''
assert isinstance(name, string_types), 'Setting name must be a string'
assert isinstance(name, str), 'Setting name must be a string'
if value is None:
self.settings.pop(name, None)
else:
@ -187,7 +204,6 @@ class Database(object):
- `model_instances`: any iterable containing instances of a single model class.
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
'''
from six import next
from io import BytesIO
i = iter(model_instances)
try:
@ -201,20 +217,19 @@ class Database(object):
fields_list = ','.join(
['`%s`' % name for name in first_instance.fields(writable=True)])
fmt = 'TSKV' if model_class.has_funcs_as_defaults() else 'TabSeparated'
query = 'INSERT INTO $table (%s) FORMAT %s\n' % (fields_list, fmt)
def gen():
buf = BytesIO()
query = 'INSERT INTO $table (%s) FORMAT TabSeparated\n' % fields_list
buf.write(self._substitute(query, model_class).encode('utf-8'))
first_instance.set_database(self)
buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n'.encode('utf-8'))
buf.write(first_instance.to_db_string())
# Collect lines in batches of batch_size
lines = 2
for instance in i:
instance.set_database(self)
buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n'.encode('utf-8'))
buf.write(instance.to_db_string())
lines += 1
if lines >= batch_size:
# Return the current batch of lines
@ -234,9 +249,12 @@ class Database(object):
- `model_class`: the model to count.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
'''
from infi.clickhouse_orm.query import Q
query = 'SELECT count() FROM $table'
if conditions:
query += ' WHERE ' + conditions
if isinstance(conditions, Q):
conditions = conditions.to_sql(model_class)
query += ' WHERE ' + str(conditions)
query = self._substitute(query, model_class)
r = self._send(query)
return int(r.text) if r.text else 0
@ -288,6 +306,7 @@ class Database(object):
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
'''
from infi.clickhouse_orm.query import Q
count = self.count(model_class, conditions)
pages_total = int(ceil(count / float(page_size)))
if page_num == -1:
@ -297,7 +316,9 @@ class Database(object):
offset = (page_num - 1) * page_size
query = 'SELECT * FROM $table'
if conditions:
query += ' WHERE ' + conditions
if isinstance(conditions, Q):
conditions = conditions.to_sql(model_class)
query += ' WHERE ' + str(conditions)
query += ' ORDER BY %s' % order_by
query += ' LIMIT %d, %d' % (offset, page_size)
query = self._substitute(query, model_class)
@ -338,7 +359,7 @@ class Database(object):
return set(obj.module_name for obj in self.select(query))
def _send(self, data, settings=None, stream=False):
if isinstance(data, string_types):
if isinstance(data, str):
data = data.encode('utf-8')
if self.log_statements:
logger.info(data)
@ -366,7 +387,7 @@ class Database(object):
mapping = dict(db="`%s`" % self.db_name)
if model_class:
if model_class.is_system_model():
mapping['table'] = model_class.table_name()
mapping['table'] = "`system`.`%s`" % model_class.table_name()
else:
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
query = Template(query).safe_substitute(mapping)
@ -396,3 +417,7 @@ class Database(object):
def _is_connection_readonly(self):
r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'")
return r.text.strip() != '0'
# Expose only relevant classes in import *
__all__ = [c.__name__ for c in [Page, DatabaseException, ServerError, Database]]

View File

@ -1,9 +1,8 @@
from __future__ import unicode_literals
import logging
import six
from .utils import comma_join
from .utils import comma_join, get_subclass_names
logger = logging.getLogger('clickhouse_orm')
@ -38,8 +37,8 @@ class MergeTree(Engine):
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
primary_key=None):
assert type(order_by) in (list, tuple), 'order_by must be a list or tuple'
assert date_col is None or isinstance(date_col, str), 'date_col must be string if present'
assert primary_key is None or type(primary_key) in (list, tuple), 'primary_key must be a list or tuple'
assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present'
assert partition_key is None or type(partition_key) in (list, tuple),\
'partition_key must be tuple or list if present'
assert (replica_table_path is None) == (replica_name is None), \
@ -75,14 +74,15 @@ class MergeTree(Engine):
name = 'Replicated' + name
# In ClickHouse 1.1.54310 custom partitioning key was introduced
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
# Let's check version and use new syntax if available
if db.server_version >= (1, 1, 54310):
partition_sql = "PARTITION BY %s ORDER BY %s" \
% ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by))
partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \
% (comma_join(self.partition_key, stringify=True),
comma_join(self.order_by, stringify=True))
if self.primary_key:
partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key)
partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key, stringify=True)
if self.sampling_expr:
partition_sql += " SAMPLE BY %s" % self.sampling_expr
@ -94,7 +94,7 @@ class MergeTree(Engine):
from infi.clickhouse_orm.database import DatabaseException
raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
"Please update your server or use date_col syntax."
"https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/")
"https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/")
else:
partition_sql = ''
@ -107,14 +107,14 @@ class MergeTree(Engine):
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
# In ClickHouse 1.1.54310 custom partitioning key was introduced
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
# These parameters are process in create_table_sql directly.
# In previous ClickHouse versions this this syntax does not work.
if db.server_version < (1, 1, 54310):
params.append(self.date_col)
if self.sampling_expr:
params.append(self.sampling_expr)
params.append('(%s)' % comma_join(self.order_by))
params.append('(%s)' % comma_join(self.order_by, stringify=True))
params.append(str(self.index_granularity))
return params
@ -172,7 +172,7 @@ class Buffer(Engine):
"""
Buffers the data to write in RAM, periodically flushing it to another table.
Must be used in conjuction with a `BufferModel`.
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/).
Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
"""
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
@ -203,11 +203,11 @@ class Merge(Engine):
The Merge engine (not to be confused with MergeTree) does not store data itself,
but allows reading from any number of other tables simultaneously.
Writing to a table is not supported
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
"""
def __init__(self, table_regex):
assert isinstance(table_regex, six.string_types), "'table_regex' parameter must be string"
assert isinstance(table_regex, str), "'table_regex' parameter must be string"
self.table_regex = table_regex
def create_table_sql(self, db):
@ -222,15 +222,15 @@ class Distributed(Engine):
During a read, the table indexes on remote servers are used, if there are any.
See full documentation here
https://clickhouse.yandex/docs/en/table_engines/distributed.html
https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
"""
def __init__(self, cluster, table=None, sharding_key=None):
"""
:param cluster: what cluster to access data from
:param table: underlying table that actually stores data.
- `cluster`: what cluster to access data from
- `table`: underlying table that actually stores data.
If you are not specifying any table here, ensure that it can be inferred
from your model's superclass (see models.DistributedModel.fix_engine_table)
:param sharding_key: how to distribute data among shards when inserting
- `sharding_key`: how to distribute data among shards when inserting
straightly into Distributed table, optional
"""
self.cluster = cluster
@ -263,3 +263,7 @@ class Distributed(Engine):
if self.sharding_key:
params.append(self.sharding_key)
return params
# Expose only relevant classes in import *
__all__ = get_subclass_names(locals(), Engine)

View File

@ -1,5 +1,4 @@
from __future__ import unicode_literals
from six import string_types, text_type, binary_type, integer_types
import datetime
import iso8601
import pytz
@ -7,27 +6,32 @@ from calendar import timegm
from decimal import Decimal, localcontext
from uuid import UUID
from logging import getLogger
from .utils import escape, parse_array, comma_join
from .utils import escape, parse_array, comma_join, string_or_func, get_subclass_names
from .funcs import F, FunctionOperatorsMixin
from ipaddress import IPv4Address, IPv6Address
logger = getLogger('clickhouse_orm')
class Field(object):
class Field(FunctionOperatorsMixin):
'''
Abstract base class for all field types.
'''
creation_counter = 0
class_default = 0
db_type = None
name = None # this is set by the parent model
parent = None # this is set by the parent model
creation_counter = 0 # used for keeping the model fields ordered
class_default = 0 # should be overridden by concrete subclasses
db_type = None # should be overridden by concrete subclasses
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
assert [default, alias, materialized].count(None) >= 2, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, string_types) and alias != "",\
"Alias field must be a string, if given"
assert materialized is None or isinstance(materialized, string_types) and materialized != "",\
"Materialized field must be string, if given"
assert alias is None or isinstance(alias, F) or isinstance(alias, str) and alias != "",\
"Alias parameter must be a string or function object, if given"
assert materialized is None or isinstance(materialized, F) or isinstance(materialized, str) and materialized != "",\
"Materialized parameter must be a string or function object, if given"
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
assert codec is None or isinstance(codec, string_types) and codec != "", \
assert codec is None or isinstance(codec, str) and codec != "", \
"Codec field must be string, if given"
self.creation_counter = Field.creation_counter
@ -38,6 +42,12 @@ class Field(object):
self.readonly = bool(self.alias or self.materialized or readonly)
self.codec = codec
def __str__(self):
return self.name
def __repr__(self):
return '<%s>' % self.__class__.__name__
def to_python(self, value, timezone_in_use):
'''
Converts the input value into the expected Python data type, raising ValueError if the
@ -70,9 +80,10 @@ class Field(object):
def get_sql(self, with_default_expression=True, db=None):
'''
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
:param with_default_expression: If True, adds default value to sql.
- `with_default_expression`: If True, adds default value to sql.
It doesn't affect fields with alias and materialized values.
:param db: Database, used for checking supported features.
- `db`: Database, used for checking supported features.
'''
sql = self.db_type
if with_default_expression:
@ -82,9 +93,11 @@ class Field(object):
def _extra_params(self, db):
sql = ''
if self.alias:
sql += ' ALIAS %s' % self.alias
sql += ' ALIAS %s' % string_or_func(self.alias)
elif self.materialized:
sql += ' MATERIALIZED %s' % self.materialized
sql += ' MATERIALIZED %s' % string_or_func(self.materialized)
elif isinstance(self.default, F):
sql += ' DEFAULT %s' % self.default.to_sql()
elif self.default:
default = self.to_db_string(self.default)
sql += ' DEFAULT %s' % default
@ -96,8 +109,10 @@ class Field(object):
"""
Checks if the instance if one of the types provided or if any of the inner_field child is one of the types
provided, returns True if field or any inner_field is one of ths provided, False otherwise
:param types: Iterable of types to check inclusion of instance
:return: Boolean
- `types`: Iterable of types to check inclusion of instance
Returns: Boolean
"""
if isinstance(self, types):
return True
@ -115,9 +130,9 @@ class StringField(Field):
db_type = 'String'
def to_python(self, value, timezone_in_use):
if isinstance(value, text_type):
if isinstance(value, str):
return value
if isinstance(value, binary_type):
if isinstance(value, bytes):
return value.decode('UTF-8')
raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value))
@ -134,7 +149,7 @@ class FixedStringField(StringField):
return value.rstrip('\0')
def validate(self, value):
if isinstance(value, text_type):
if isinstance(value, str):
value = value.encode('UTF-8')
if len(value) > self._length:
raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length))
@ -154,7 +169,7 @@ class DateField(Field):
return value
if isinstance(value, int):
return DateField.class_default + datetime.timedelta(days=value)
if isinstance(value, string_types):
if isinstance(value, str):
if value == '0000-00-00':
return DateField.min_value
return datetime.datetime.strptime(value, '%Y-%m-%d').date()
@ -179,7 +194,7 @@ class DateTimeField(Field):
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
if isinstance(value, int):
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
if isinstance(value, string_types):
if isinstance(value, str):
if value == '0000-00-00 00:00:00':
return self.class_default
if len(value) == 10:
@ -192,7 +207,7 @@ class DateTimeField(Field):
# left the date naive in case of no tzinfo set
dt = iso8601.parse_date(value, default_timezone=None)
except iso8601.ParseError as e:
raise ValueError(text_type(e))
raise ValueError(str(e))
# convert naive to aware
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
@ -217,7 +232,7 @@ class BaseIntField(Field):
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
return str(value)
def validate(self, value):
self._range_check(value, self.min_value, self.max_value)
@ -293,7 +308,7 @@ class BaseFloatField(Field):
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
return str(value)
class Float32Field(BaseFloatField):
@ -337,7 +352,7 @@ class DecimalField(Field):
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
return str(value)
def _round(self, value):
return value.quantize(self.exp)
@ -382,9 +397,9 @@ class BaseEnumField(Field):
if isinstance(value, self.enum_cls):
return value
try:
if isinstance(value, text_type):
if isinstance(value, str):
return self.enum_cls[value]
if isinstance(value, binary_type):
if isinstance(value, bytes):
return self.enum_cls[value.decode('UTF-8')]
if isinstance(value, int):
return self.enum_cls(value)
@ -414,7 +429,7 @@ class BaseEnumField(Field):
import re
from enum import Enum
members = {}
for match in re.finditer("'(\w+)' = (-?\d+)", db_type):
for match in re.finditer(r"'([\w ]+)' = (-?\d+)", db_type):
members[match.group(1)] = int(match.group(2))
enum_cls = Enum('AdHocEnum', members)
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
@ -442,9 +457,9 @@ class ArrayField(Field):
super(ArrayField, self).__init__(default, alias, materialized, readonly, codec)
def to_python(self, value, timezone_in_use):
if isinstance(value, text_type):
if isinstance(value, str):
value = parse_array(value)
elif isinstance(value, binary_type):
elif isinstance(value, bytes):
value = parse_array(value.decode('UTF-8'))
elif not isinstance(value, (list, tuple)):
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
@ -473,11 +488,11 @@ class UUIDField(Field):
def to_python(self, value, timezone_in_use):
if isinstance(value, UUID):
return value
elif isinstance(value, binary_type):
elif isinstance(value, bytes):
return UUID(bytes=value)
elif isinstance(value, string_types):
elif isinstance(value, str):
return UUID(value)
elif isinstance(value, integer_types):
elif isinstance(value, int):
return UUID(int=value)
elif isinstance(value, tuple):
return UUID(fields=value)
@ -488,12 +503,47 @@ class UUIDField(Field):
return escape(str(value), quote)
class IPv4Field(Field):
class_default = 0
db_type = 'IPv4'
def to_python(self, value, timezone_in_use):
if isinstance(value, IPv4Address):
return value
elif isinstance(value, (bytes, str, int)):
return IPv4Address(value)
else:
raise ValueError('Invalid value for IPv4Address: %r' % value)
def to_db_string(self, value, quote=True):
return escape(str(value), quote)
class IPv6Field(Field):
class_default = 0
db_type = 'IPv6'
def to_python(self, value, timezone_in_use):
if isinstance(value, IPv6Address):
return value
elif isinstance(value, (bytes, str, int)):
return IPv6Address(value)
else:
raise ValueError('Invalid value for IPv6Address: %r' % value)
def to_db_string(self, value, quote=True):
return escape(str(value), quote)
class NullableField(Field):
class_default = None
def __init__(self, inner_field, default=None, alias=None, materialized=None,
extra_null_values=None, codec=None):
assert isinstance(inner_field, Field), "The first argument of NullableField must be a Field instance. Not: {}".format(inner_field)
self.inner_field = inner_field
self._null_values = [None]
if extra_null_values:
@ -548,3 +598,8 @@ class LowCardinalityField(Field):
if with_default_expression:
sql += self._extra_params(db)
return sql
# Expose only relevant classes in import *
__all__ = get_subclass_names(locals(), Field)

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,7 @@
import six
from .models import Model, BufferModel
from .fields import DateField, StringField
from .engines import MergeTree
from .utils import escape
from six.moves import zip
from six import iteritems
from .utils import escape, get_subclass_names
import logging
logger = logging.getLogger('migrations')
@ -74,7 +69,7 @@ class AlterTable(Operation):
# Identify fields that were added to the model
prev_name = None
for name, field in iteritems(self.model_class.fields()):
for name, field in self.model_class.fields().items():
is_regular_field = not (field.materialized or field.alias)
if name not in table_fields:
logger.info(' Add column %s', name)
@ -94,7 +89,7 @@ class AlterTable(Operation):
# Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save
# attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47
model_fields = {name: field.get_sql(with_default_expression=False, db=database)
for name, field in iteritems(self.model_class.fields())}
for name, field in self.model_class.fields().items()}
for field_name, field_sql in self._get_table_fields(database):
# All fields must have been created and dropped by this moment
assert field_name in model_fields, 'Model fields and table columns in disagreement'
@ -156,7 +151,7 @@ class RunSQL(Operation):
'''
def __init__(self, sql):
if isinstance(sql, six.string_types):
if isinstance(sql, str):
sql = [sql]
assert isinstance(sql, list), "'sql' parameter must be string or list of strings"
@ -182,3 +177,7 @@ class MigrationHistory(Model):
@classmethod
def table_name(cls):
return 'infi_clickhouse_orm_migrations'
# Expose only relevant classes in import *
__all__ = get_subclass_names(locals(), Operation)

View File

@ -3,12 +3,13 @@ import sys
from collections import OrderedDict
from logging import getLogger
from six import with_metaclass, reraise, iteritems
from six import reraise
import pytz
from .fields import Field, StringField
from .utils import parse_tsv
from .utils import parse_tsv, NO_VALUE, get_subclass_names
from .query import QuerySet
from .funcs import F
from .engines import Merge, Distributed
logger = getLogger('clickhouse_orm')
@ -31,26 +32,43 @@ class ModelBase(type):
fields = base_fields
# Build a list of fields, in the order they were listed in the class
fields.update({n: f for n, f in iteritems(attrs) if isinstance(f, Field)})
fields = sorted(iteritems(fields), key=lambda item: item[1].creation_counter)
fields.update({n: f for n, f in attrs.items() if isinstance(f, Field)})
fields = sorted(fields.items(), key=lambda item: item[1].creation_counter)
# Build a dictionary of default values
defaults = {n: f.to_python(f.default, pytz.UTC) for n, f in fields}
defaults = {}
has_funcs_as_defaults = False
for n, f in fields:
if f.alias or f.materialized:
defaults[n] = NO_VALUE
elif isinstance(f.default, F):
defaults[n] = NO_VALUE
has_funcs_as_defaults = True
else:
defaults[n] = f.to_python(f.default, pytz.UTC)
attrs = dict(
attrs,
_fields=OrderedDict(fields),
_writable_fields=OrderedDict([f for f in fields if not f[1].readonly]),
_defaults=defaults
_defaults=defaults,
_has_funcs_as_defaults=has_funcs_as_defaults
)
return super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
# Let each field know its parent and its own name
for n, f in fields:
setattr(f, 'parent', model)
setattr(f, 'name', n)
return model
@classmethod
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
# fields is a list of tuples (name, db_type)
# Check if model exists in cache
fields = list(fields)
cache_key = str(fields)
cache_key = model_name + ' ' + str(fields)
if cache_key in cls.ad_hoc_model_cache:
return cls.ad_hoc_model_cache[cache_key]
# Create an ad hoc model class
@ -76,14 +94,22 @@ class ModelBase(type):
if db_type.startswith('Array'):
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
return orm_fields.ArrayField(inner_field)
# Tuples (poor man's version - convert to array)
if db_type.startswith('Tuple'):
types = [s.strip() for s in db_type[6 : -1].split(',')]
assert len(set(types)) == 1, 'No support for mixed types in tuples - ' + db_type
inner_field = cls.create_ad_hoc_field(types[0])
return orm_fields.ArrayField(inner_field)
# FixedString
if db_type.startswith('FixedString'):
length = int(db_type[12 : -1])
return orm_fields.FixedStringField(length)
# Decimal
# Decimal / Decimal32 / Decimal64 / Decimal128
if db_type.startswith('Decimal'):
precision, scale = [int(n.strip()) for n in db_type[8 : -1].split(',')]
return orm_fields.DecimalField(precision, scale)
p = db_type.index('(')
args = [int(n.strip()) for n in db_type[p + 1 : -1].split(',')]
field_class = getattr(orm_fields, db_type[:p] + 'Field')
return field_class(*args)
# Nullable
if db_type.startswith('Nullable'):
inner_field = cls.create_ad_hoc_field(db_type[9 : -1])
@ -99,7 +125,7 @@ class ModelBase(type):
return getattr(orm_fields, name)()
class Model(with_metaclass(ModelBase)):
class Model(metaclass=ModelBase):
'''
A base class for ORM models. Each model class represent a ClickHouse table. For example:
@ -131,7 +157,7 @@ class Model(with_metaclass(ModelBase)):
# Assign default values
self.__dict__.update(self._defaults)
# Assign field values from keyword arguments
for name, value in iteritems(kwargs):
for name, value in kwargs.items():
field = self.get_field(name)
if field:
setattr(self, name, value)
@ -144,14 +170,14 @@ class Model(with_metaclass(ModelBase)):
This may raise a `ValueError`.
'''
field = self.get_field(name)
if field:
if field and (value != NO_VALUE):
try:
value = field.to_python(value, pytz.utc)
field.validate(value)
except ValueError:
tp, v, tb = sys.exc_info()
new_msg = "{} (field '{}')".format(v, name)
reraise(tp, tp(new_msg), tb)
raise tp.with_traceback(tp(new_msg), tb)
super(Model, self).__setattr__(name, value)
def set_database(self, db):
@ -186,6 +212,14 @@ class Model(with_metaclass(ModelBase)):
'''
return cls.__name__.lower()
@classmethod
def has_funcs_as_defaults(cls):
'''
Return True if some of the model's fields use a function expression
as a default value. This requires special handling when inserting instances.
'''
return cls._has_funcs_as_defaults
@classmethod
def create_table_sql(cls, db):
'''
@ -193,7 +227,7 @@ class Model(with_metaclass(ModelBase)):
'''
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
cols = []
for name, field in iteritems(cls.fields()):
for name, field in cls.fields().items():
cols.append(' %s %s' % (name, field.get_sql(db=db)))
parts.append(',\n'.join(cols))
parts.append(')')
@ -218,7 +252,6 @@ class Model(with_metaclass(ModelBase)):
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
'''
from six import next
values = iter(parse_tsv(line))
kwargs = {}
for name in field_names:
@ -239,7 +272,30 @@ class Model(with_metaclass(ModelBase)):
'''
data = self.__dict__
fields = self.fields(writable=not include_readonly)
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in iteritems(fields))
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields.items())
def to_tskv(self, include_readonly=True):
'''
Returns the instance's column keys and values as a tab-separated line. A newline is not included.
Fields that were not assigned a value are omitted.
- `include_readonly`: if false, returns only fields that can be inserted into database.
'''
data = self.__dict__
fields = self.fields(writable=not include_readonly)
parts = []
for name, field in fields.items():
if data[name] != NO_VALUE:
parts.append(name + '=' + field.to_db_string(data[name], quote=False))
return '\t'.join(parts)
def to_db_string(self):
'''
Returns the instance as a bytestring ready to be inserted into the database.
'''
s = self.to_tskv(False) if self._has_funcs_as_defaults else self.to_tsv(False)
s += '\n'
return s.encode('utf-8')
def to_dict(self, include_readonly=True, field_names=None):
'''
@ -306,7 +362,7 @@ class MergeModel(Model):
'''
Model for Merge engine
Predefines virtual _table column an controls that rows can't be inserted to this table type
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge
'''
readonly = True
@ -318,7 +374,7 @@ class MergeModel(Model):
assert isinstance(cls.engine, Merge), "engine must be an instance of engines.Merge"
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
cols = []
for name, field in iteritems(cls.fields()):
for name, field in cls.fields().items():
if name != '_table':
cols.append(' %s %s' % (name, field.get_sql(db=db)))
parts.append(',\n'.join(cols))
@ -401,3 +457,7 @@ class DistributedModel(Model):
db.db_name, cls.table_name(), cls.engine.table_name),
'ENGINE = ' + cls.engine.create_table_sql(db)]
return '\n'.join(parts)
# Expose only relevant classes in import *
__all__ = get_subclass_names(locals(), Model)

View File

@ -1,17 +1,15 @@
from __future__ import unicode_literals
import six
import pytz
from copy import copy, deepcopy
from math import ceil
from .engines import CollapsingMergeTree
from .utils import comma_join
from datetime import date, datetime
from .utils import comma_join, string_or_func
# TODO
# - check that field names are valid
# - operators for arrays: length, has, empty
class Operator(object):
"""
@ -25,6 +23,12 @@ class Operator(object):
"""
raise NotImplementedError # pragma: no cover
def _value_to_sql(self, field, value, quote=True):
from infi.clickhouse_orm.funcs import F
if isinstance(value, F):
return value.to_sql()
return field.to_db_string(field.to_python(value, pytz.utc), quote)
class SimpleOperator(Operator):
"""
@ -37,7 +41,7 @@ class SimpleOperator(Operator):
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc))
value = self._value_to_sql(field, value)
if value == '\\N' and self._sql_for_null is not None:
return ' '.join([field_name, self._sql_for_null])
return ' '.join([field_name, self._sql_operator, value])
@ -56,10 +60,10 @@ class InOperator(Operator):
field = getattr(model_cls, field_name)
if isinstance(value, QuerySet):
value = value.as_sql()
elif isinstance(value, six.string_types):
elif isinstance(value, str):
pass
else:
value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value])
value = comma_join([self._value_to_sql(field, v) for v in value])
return '%s IN (%s)' % (field_name, value)
@ -75,7 +79,7 @@ class LikeOperator(Operator):
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc), quote=False)
value = self._value_to_sql(field, value, quote=False)
value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_')
pattern = self._pattern.format(value)
if self._case_sensitive:
@ -91,7 +95,7 @@ class IExactOperator(Operator):
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc))
value = self._value_to_sql(field, value)
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
@ -120,10 +124,8 @@ class BetweenOperator(Operator):
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value0 = field.to_db_string(
field.to_python(value[0], pytz.utc)) if value[0] is not None or len(str(value[0])) > 0 else None
value1 = field.to_db_string(
field.to_python(value[1], pytz.utc)) if value[1] is not None or len(str(value[1])) > 0 else None
value0 = self._value_to_sql(field, value[0]) if value[0] is not None or len(str(value[0])) > 0 else None
value1 = self._value_to_sql(field, value[1]) if value[1] is not None or len(str(value[1])) > 0 else None
if value0 and value1:
return '%s BETWEEN %s AND %s' % (field_name, value0, value1)
if value0 and not value1:
@ -156,11 +158,19 @@ register_operator('iendswith', LikeOperator('%{}', False))
register_operator('iexact', IExactOperator())
class FOV(object):
class Cond(object):
"""
An object for storing Field + Operator + Value.
An abstract object for storing a single query condition Field + Operator + Value.
"""
def to_sql(self, model_cls):
raise NotImplementedError
class FieldCond(Cond):
"""
A single query condition made up of Field + Operator + Value.
"""
def __init__(self, field_name, operator, value):
self._field_name = field_name
self._operator = _operators.get(operator)
@ -184,8 +194,8 @@ class Q(object):
AND_MODE = 'AND'
OR_MODE = 'OR'
def __init__(self, **filter_fields):
self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(filter_fields)]
def __init__(self, *filter_funcs, **filter_fields):
self._conds = list(filter_funcs) + [self._build_cond(k, v) for k, v in filter_fields.items()]
self._children = []
self._negate = False
self._mode = self.AND_MODE
@ -194,9 +204,9 @@ class Q(object):
def is_empty(self):
"""
Checks if there are any conditions in Q object
:return: Boolean
Returns: Boolean
"""
return not bool(self._fovs or self._children)
return not bool(self._conds or self._children)
@classmethod
def _construct_from(cls, l_child, r_child, mode):
@ -214,18 +224,18 @@ class Q(object):
return q
def _build_fov(self, key, value):
def _build_cond(self, key, value):
if '__' in key:
field_name, operator = key.rsplit('__', 1)
else:
field_name, operator = key, 'eq'
return FOV(field_name, operator, value)
return FieldCond(field_name, operator, value)
def to_sql(self, model_cls):
condition_sql = []
if self._fovs:
condition_sql.extend([fov.to_sql(model_cls) for fov in self._fovs])
if self._conds:
condition_sql.extend([cond.to_sql(model_cls) for cond in self._conds])
if self._children:
condition_sql.extend([child.to_sql(model_cls) for child in self._children if child])
@ -261,7 +271,7 @@ class Q(object):
def __deepcopy__(self, memodict={}):
q = Q()
q._fovs = [deepcopy(fov) for fov in self._fovs]
q._conds = [deepcopy(cond) for cond in self._conds]
q._negate = self._negate
q._mode = self._mode
@ -271,7 +281,6 @@ class Q(object):
return q
@six.python_2_unicode_compatible
class QuerySet(object):
"""
A queryset is an object that represents a database query using a specific `Model`.
@ -317,12 +326,12 @@ class QuerySet(object):
return self.as_sql()
def __getitem__(self, s):
if isinstance(s, six.integer_types):
if isinstance(s, int):
# Single index
assert s >= 0, 'negative indexes are not supported'
qs = copy(self)
qs._limits = (s, 1)
return six.next(iter(qs))
return next(iter(qs))
else:
# Slice
assert s.step in (None, 1), 'step is not supported in slices'
@ -334,13 +343,13 @@ class QuerySet(object):
qs._limits = (start, stop - start)
return qs
def limit_by(self, offset_limit, *fields):
def limit_by(self, offset_limit, *fields_or_expr):
"""
Adds a LIMIT BY clause to the query.
- `offset_limit`: either an integer specifying the limit, or a tuple of integers (offset, limit).
- `fields`: the field names to use in the clause.
- `fields_or_expr`: the field names or expressions to use in the clause.
"""
if isinstance(offset_limit, six.integer_types):
if isinstance(offset_limit, int):
# Single limit
offset_limit = (0, offset_limit)
offset = offset_limit[0]
@ -348,14 +357,17 @@ class QuerySet(object):
assert offset >= 0 and limit >= 0, 'negative limits are not supported'
qs = copy(self)
qs._limit_by = (offset, limit)
qs._limit_by_fields = fields
qs._limit_by_fields = fields_or_expr
return qs
def select_fields_as_sql(self):
"""
Returns the selected fields or expressions as a SQL string.
"""
return comma_join('`%s`' % field for field in self._fields) if self._fields else '*'
fields = '*'
if self._fields:
fields = comma_join('`%s`' % field for field in self._fields)
return fields
def as_sql(self):
"""
@ -363,10 +375,9 @@ class QuerySet(object):
"""
distinct = 'DISTINCT ' if self._distinct else ''
final = ' FINAL' if self._final else ''
table_name = self._model_cls.table_name()
if not self._model_cls.is_system_model():
table_name = '`%s`' % table_name
table_name = '`%s`' % self._model_cls.table_name()
if self._model_cls.is_system_model():
table_name = '`system`.' + table_name
params = (distinct, self.select_fields_as_sql(), table_name, final)
sql = u'SELECT %s%s\nFROM %s%s' % params
@ -387,7 +398,7 @@ class QuerySet(object):
if self._limit_by:
sql += '\nLIMIT %d, %d' % self._limit_by
sql += ' BY %s' % comma_join('`%s`' % field for field in self._limit_by_fields)
sql += ' BY %s' % comma_join(string_or_func(field) for field in self._limit_by_fields)
if self._limits:
sql += '\nLIMIT %d, %d' % self._limits
@ -399,7 +410,7 @@ class QuerySet(object):
Returns the contents of the query's `ORDER BY` clause as a string.
"""
return comma_join([
'%s DESC' % field[1:] if field[0] == '-' else field
'%s DESC' % field[1:] if isinstance(field, str) and field[0] == '-' else str(field)
for field in self._order_by
])
@ -443,14 +454,21 @@ class QuerySet(object):
return qs
def _filter_or_exclude(self, *q, **kwargs):
from .funcs import F
inverse = kwargs.pop('_inverse', False)
prewhere = kwargs.pop('prewhere', False)
qs = copy(self)
condition = Q()
for q_obj in q:
condition &= q_obj
for arg in q:
if isinstance(arg, Q):
condition &= arg
elif isinstance(arg, F):
condition &= Q(arg)
else:
raise TypeError('Invalid argument "%r" to queryset filter' % arg)
if kwargs:
condition &= Q(**kwargs)
@ -606,7 +624,7 @@ class AggregateQuerySet(QuerySet):
"""
Returns the selected fields or expressions as a SQL string.
"""
return comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
return comma_join([str(f) for f in self._fields] + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
def __iter__(self):
return self._database.select(self.as_sql()) # using an ad-hoc model
@ -623,8 +641,12 @@ class AggregateQuerySet(QuerySet):
"""
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
with aggregate function calculated across all the rows. More information:
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier
https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
"""
qs = copy(self)
qs._grouping_with_totals = True
return qs
# Expose only relevant classes in import *
__all__ = [c.__name__ for c in [Q, QuerySet, AggregateQuerySet]]

View File

@ -1,9 +1,8 @@
"""
This file contains system readonly models that can be got from the database
https://clickhouse.yandex/docs/en/system_tables/
https://clickhouse.tech/docs/en/system_tables/
"""
from __future__ import unicode_literals
from six import string_types
from .database import Database
from .fields import *
@ -15,7 +14,7 @@ class SystemPart(Model):
"""
Contains information about parts of a table in the MergeTree family.
This model operates only fields, described in the reference. Other fields are ignored.
https://clickhouse.yandex/docs/en/system_tables/system.parts/
https://clickhouse.tech/docs/en/system_tables/system.parts/
"""
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
@ -28,7 +27,7 @@ class SystemPart(Model):
partition = StringField() # Name of the partition, in the format YYYYMM.
name = StringField() # Name of the part.
# This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts),
# This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts),
# but is absent in ClickHouse (in version 1.1.54245)
# replicated = UInt8Field() # Whether the part belongs to replicated data.
@ -52,19 +51,21 @@ class SystemPart(Model):
@classmethod
def table_name(cls):
return 'system.parts'
return 'parts'
"""
Next methods return SQL for some operations, which can be done with partitions
https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
https://clickhouse.tech/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
"""
def _partition_operation_sql(self, operation, settings=None, from_part=None):
"""
Performs some operation over partition
:param db: Database object to execute operation on
:param operation: Operation to execute from SystemPart.OPERATIONS set
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: Operation execution result
- `db`: Database object to execute operation on
- `operation`: Operation to execute from SystemPart.OPERATIONS set
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: Operation execution result
"""
operation = operation.upper()
assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS)
@ -77,41 +78,51 @@ class SystemPart(Model):
def detach(self, settings=None):
"""
Move a partition to the 'detached' directory and forget it.
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: SQL Query
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: SQL Query
"""
return self._partition_operation_sql('DETACH', settings=settings)
def drop(self, settings=None):
"""
Delete a partition
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: SQL Query
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: SQL Query
"""
return self._partition_operation_sql('DROP', settings=settings)
def attach(self, settings=None):
"""
Add a new part or partition from the 'detached' directory to the table.
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: SQL Query
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: SQL Query
"""
return self._partition_operation_sql('ATTACH', settings=settings)
def freeze(self, settings=None):
"""
Create a backup of a partition.
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: SQL Query
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: SQL Query
"""
return self._partition_operation_sql('FREEZE', settings=settings)
def fetch(self, zookeeper_path, settings=None):
"""
Download a partition from another server.
:param zookeeper_path: Path in zookeeper to fetch from
:param settings: Settings for executing request to ClickHouse over db.raw() method
:return: SQL Query
- `zookeeper_path`: Path in zookeeper to fetch from
- `settings`: Settings for executing request to ClickHouse over db.raw() method
Returns: SQL Query
"""
return self._partition_operation_sql('FETCH', settings=settings, from_part=zookeeper_path)
@ -119,27 +130,35 @@ class SystemPart(Model):
def get(cls, database, conditions=""):
"""
Get all data from system.parts table
:param database: A database object to fetch data from.
:param conditions: WHERE clause conditions. Database condition is added automatically
:return: A list of SystemPart objects
- `database`: A database object to fetch data from.
- `conditions`: WHERE clause conditions. Database condition is added automatically
Returns: A list of SystemPart objects
"""
assert isinstance(database, Database), "database must be database.Database class instance"
assert isinstance(conditions, string_types), "conditions must be a string"
assert isinstance(conditions, str), "conditions must be a string"
if conditions:
conditions += " AND"
field_names = ','.join(cls.fields())
return database.select("SELECT %s FROM %s WHERE %s database='%s'" %
(field_names, cls.table_name(), conditions, database.db_name), model_class=cls)
return database.select("SELECT %s FROM `system`.%s WHERE %s database='%s'" %
(field_names, cls.table_name(), conditions, database.db_name), model_class=cls)
@classmethod
def get_active(cls, database, conditions=""):
"""
Gets active data from system.parts table
:param database: A database object to fetch data from.
:param conditions: WHERE clause conditions. Database and active conditions are added automatically
:return: A list of SystemPart objects
- `database`: A database object to fetch data from.
- `conditions`: WHERE clause conditions. Database and active conditions are added automatically
Returns: A list of SystemPart objects
"""
if conditions:
conditions += ' AND '
conditions += 'active'
return SystemPart.get(database, conditions=conditions)
# Expose only relevant classes in import *
__all__ = [c.__name__ for c in [SystemPart]]

View File

@ -1,5 +1,4 @@
from __future__ import unicode_literals
from six import string_types, binary_type, text_type, PY3
import codecs
import re
@ -28,19 +27,23 @@ def escape(value, quote=True):
def escape_one(match):
return SPECIAL_CHARS[match.group(0)]
if isinstance(value, string_types):
if isinstance(value, str):
value = SPECIAL_CHARS_REGEX.sub(escape_one, value)
if quote:
value = "'" + value + "'"
return text_type(value)
return str(value)
def unescape(value):
return codecs.escape_decode(value)[0].decode('utf-8')
def string_or_func(obj):
return obj.to_sql() if hasattr(obj, 'to_sql') else obj
def parse_tsv(line):
if PY3 and isinstance(line, binary_type):
if isinstance(line, bytes):
line = line.decode()
if line and line[-1] == '\n':
line = line[:-1]
@ -49,19 +52,19 @@ def parse_tsv(line):
def parse_array(array_string):
"""
Parse an array string as returned by clickhouse. For example:
Parse an array or tuple string as returned by clickhouse. For example:
"['hello', 'world']" ==> ["hello", "world"]
"[1,2,3]" ==> [1, 2, 3]
"(1,2,3)" ==> [1, 2, 3]
"""
# Sanity check
if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']':
if len(array_string) < 2 or array_string[0] not in '[(' or array_string[-1] not in '])':
raise ValueError('Invalid array string: "%s"' % array_string)
# Drop opening brace
array_string = array_string[1:]
# Go over the string, lopping off each value at the beginning until nothing is left
values = []
while True:
if array_string == ']':
if array_string in '])':
# End of array
return values
elif array_string[0] in ', ':
@ -93,8 +96,38 @@ def import_submodules(package_name):
}
def comma_join(items):
def comma_join(items, stringify=False):
"""
Joins an iterable of strings with commas.
"""
return ', '.join(items)
if stringify:
return ', '.join(str(item) for item in items)
else:
return ', '.join(items)
def is_iterable(obj):
"""
Checks if the given object is iterable.
"""
try:
iter(obj)
return True
except TypeError:
return False
def get_subclass_names(locals, base_class):
from inspect import isclass
return [c.__name__ for c in locals.values() if isclass(c) and issubclass(c, base_class)]
class NoValue:
'''
A sentinel for fields with an expression for a default value,
that were not assigned a value yet.
'''
def __repr__(self):
return 'NO_VALUE'
NO_VALUE = NoValue()

View File

@ -3,12 +3,13 @@ import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, NO_VALUE
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.funcs import F
class MaterializedFieldsTest(unittest.TestCase):
class AliasFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db', log_statements=True)
@ -25,7 +26,7 @@ class MaterializedFieldsTest(unittest.TestCase):
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str, alias_func' \
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
for model_cls in (ModelWithAliasFields, None):
results = list(self.database.select(query, model_cls))
@ -36,6 +37,7 @@ class MaterializedFieldsTest(unittest.TestCase):
self.assertEqual(results[0].alias_int, instance.int_field)
self.assertEqual(results[0].alias_str, instance.str_field)
self.assertEqual(results[0].alias_date, instance.date_field)
self.assertEqual(results[0].alias_func, 201608)
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
@ -55,6 +57,14 @@ class MaterializedFieldsTest(unittest.TestCase):
with self.assertRaises(AssertionError):
StringField(alias='str_field', materialized='str_field')
def test_default_value(self):
instance = ModelWithAliasFields()
self.assertEqual(instance.alias_str, NO_VALUE)
# Check that NO_VALUE can be assigned to a field
instance.str_field = NO_VALUE
# Check that NO_VALUE can be assigned when creating a new instance
instance2 = ModelWithAliasFields(**instance.to_dict())
class ModelWithAliasFields(Model):
int_field = Int32Field()
@ -64,5 +74,6 @@ class ModelWithAliasFields(Model):
alias_str = StringField(alias=u'str_field')
alias_int = Int32Field(alias='int_field')
alias_date = DateField(alias='date_field')
alias_func = Int32Field(alias=F.toYYYYMM(date_field))
engine = MergeTree('date_field', ('date_field',))

View File

@ -4,7 +4,7 @@ import datetime
import pytz
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, NO_VALUE
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.utils import parse_tsv
@ -67,7 +67,7 @@ class CompressedFieldsTestCase(unittest.TestCase):
"int64_field": 100,
"float_field": 7.0,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
"alias_field": 0.0,
"alias_field": NO_VALUE,
'string_field': 'dozo',
'nullable_field': None,
'uint64_field': 0,
@ -89,8 +89,9 @@ class CompressedFieldsTestCase(unittest.TestCase):
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc)
})
# This test will fail on clickhouse version < 19.1.16, use skip test
def test_confirm_compression_codec(self):
if self.database.server_version < (19, 17):
raise unittest.SkipTest('ClickHouse version too old')
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]')
self.database.insert([instance])
r = self.database.raw("select name, compression_codec from system.columns where table = '{}' and database='{}' FORMAT TabSeparatedWithNamesAndTypes".format(instance.table_name(), self.database.db_name))

View File

@ -1,8 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
import datetime
from infi.clickhouse_orm.database import ServerError, DatabaseException
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.funcs import F
from infi.clickhouse_orm.query import Q
from .base_test_with_data import *
@ -26,12 +32,32 @@ class DatabaseTestCase(TestCaseWithData):
def test_insert__medium_batches(self):
self._insert_and_check(self._sample_data(), len(data), batch_size=100)
def test_insert__funcs_as_default_values(self):
if self.database.server_version < (20, 1, 2, 4):
raise unittest.SkipTest('Buggy in server versions before 20.1.2.4')
class TestModel(Model):
a = DateTimeField(default=datetime.datetime(2020, 1, 1))
b = DateField(default=F.toDate(a))
c = Int32Field(default=7)
d = Int32Field(default=c * 5)
engine = Memory()
self.database.create_table(TestModel)
self.database.insert([TestModel()])
t = TestModel.objects_in(self.database)[0]
self.assertEqual(str(t.b), '2020-01-01')
self.assertEqual(t.d, 35)
def test_count(self):
self.database.insert(self._sample_data())
self.assertEqual(self.database.count(Person), 100)
# Conditions as string
self.assertEqual(self.database.count(Person, "first_name = 'Courtney'"), 2)
self.assertEqual(self.database.count(Person, "birthday > '2000-01-01'"), 22)
self.assertEqual(self.database.count(Person, "birthday < '1970-03-01'"), 0)
# Conditions as expression
self.assertEqual(self.database.count(Person, Person.birthday > datetime.date(2000, 1, 1)), 22)
# Conditions as Q object
self.assertEqual(self.database.count(Person, Q(birthday__gt=datetime.date(2000, 1, 1))), 22)
def test_select(self):
self._insert_and_check(self._sample_data(), len(data))
@ -128,8 +154,15 @@ class DatabaseTestCase(TestCaseWithData):
def test_pagination_with_conditions(self):
self._insert_and_check(self._sample_data(), len(data))
# Conditions as string
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'")
self.assertEqual(page.number_of_objects, 10)
# Conditions as expression
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Person.first_name < 'Ava')
self.assertEqual(page.number_of_objects, 10)
# Conditions as Q object
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions=Q(first_name__lt='Ava'))
self.assertEqual(page.number_of_objects, 10)
def test_special_chars(self):
s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r'
@ -219,6 +252,35 @@ class DatabaseTestCase(TestCaseWithData):
from infi.clickhouse_orm.models import ModelBase
query = "SELECT DISTINCT type FROM system.columns"
for row in self.database.select(query):
if row.type in ('IPv4', 'IPv6'):
continue # unsupported yet
ModelBase.create_ad_hoc_field(row.type)
def test_get_model_for_table(self):
# Tests that get_model_for_table works for a non-system model
model = self.database.get_model_for_table('person')
self.assertFalse(model.is_system_model())
self.assertFalse(model.is_read_only())
self.assertEqual(model.table_name(), 'person')
# Read a few records
list(model.objects_in(self.database)[:10])
# Inserts should work too
self.database.insert([
model(first_name='aaa', last_name='bbb', height=1.77)
])
def test_get_model_for_table__system(self):
# Tests that get_model_for_table works for all system tables
query = "SELECT name FROM system.tables WHERE database='system'"
for row in self.database.select(query):
print(row.name)
model = self.database.get_model_for_table(row.name, system_table=True)
self.assertTrue(model.is_system_model())
self.assertTrue(model.is_read_only())
self.assertEqual(model.table_name(), row.name)
# Read a few records
try:
list(model.objects_in(self.database)[:10])
except ServerError as e:
if 'Not enough privileges' in e.message:
pass
else:
raise

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals
import unittest
import datetime
import pytz
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model

View File

@ -13,15 +13,11 @@ class DecimalFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db', log_statements=True)
self.database.add_setting('allow_experimental_decimal_type', 1)
try:
self.database.create_table(DecimalModel)
except ServerError as e:
if 'Unknown setting' in e.message:
# This ClickHouse version does not support decimals yet
raise unittest.SkipTest(e.message)
else:
raise
# This ClickHouse version does not support decimals yet
raise unittest.SkipTest(e.message)
def tearDown(self):
self.database.drop_database()

View File

@ -1,11 +1,8 @@
from __future__ import unicode_literals
import unittest
import datetime
from infi.clickhouse_orm.system_models import SystemPart
from infi.clickhouse_orm.database import Database, DatabaseException, ServerError
from infi.clickhouse_orm.models import Model, MergeModel, DistributedModel
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm import *
import logging
logging.getLogger("requests").setLevel(logging.WARNING)
@ -37,6 +34,11 @@ class EnginesTestCase(_EnginesHelperTestCase):
engine = MergeTree('date', ('date', 'event_id', 'event_group', 'intHash32(event_id)'), sampling_expr='intHash32(event_id)')
self._create_and_insert(TestModel)
def test_merge_tree_with_sampling__funcs(self):
class TestModel(SampleModel):
engine = MergeTree('date', ('date', 'event_id', 'event_group', F.intHash32(SampleModel.event_id)), sampling_expr=F.intHash32(SampleModel.event_id))
self._create_and_insert(TestModel)
def test_merge_tree_with_granularity(self):
class TestModel(SampleModel):
engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096)
@ -166,6 +168,9 @@ class EnginesTestCase(_EnginesHelperTestCase):
self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', ''))
def test_custom_primary_key(self):
if self.database.server_version < (18, 1):
raise unittest.SkipTest('ClickHouse version too old')
class TestModel(SampleModel):
engine = MergeTree(
order_by=('date', 'event_id', 'event_group'),

735
tests/test_funcs.py Normal file
View File

@ -0,0 +1,735 @@
import unittest
from .base_test_with_data import *
from .test_querysets import SampleModel
from datetime import date, datetime, tzinfo, timedelta
import pytz
from ipaddress import IPv4Address, IPv6Address
import logging
from decimal import Decimal
from infi.clickhouse_orm.database import ServerError
from infi.clickhouse_orm.utils import NO_VALUE
from infi.clickhouse_orm.funcs import F
class FuncsTestCase(TestCaseWithData):
def setUp(self):
super(FuncsTestCase, self).setUp()
self.database.insert(self._sample_data())
def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql())
count = 0
for instance in qs:
count += 1
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEqual(count, expected_count)
self.assertEqual(qs.count(), expected_count)
def _test_func(self, func, expected_value=NO_VALUE):
sql = 'SELECT %s AS value' % func.to_sql()
logging.info(sql)
try:
result = list(self.database.select(sql))
logging.info('\t==> %s', result[0].value if result else '<empty>')
if expected_value != NO_VALUE:
self.assertEqual(result[0].value, expected_value)
return result[0].value if result else None
except ServerError as e:
if 'Unknown function' in e.message:
logging.warning(e.message)
return # ignore functions that don't exist in the used ClickHouse version
raise
def _test_aggr(self, func, expected_value=NO_VALUE):
qs = Person.objects_in(self.database).aggregate(value=func)
logging.info(qs.as_sql())
try:
result = list(qs)
logging.info('\t==> %s', result[0].value if result else '<empty>')
if expected_value != NO_VALUE:
self.assertEqual(result[0].value, expected_value)
return result[0].value if result else None
except ServerError as e:
if 'Unknown function' in e.message:
logging.warning(e.message)
return # ignore functions that don't exist in the used ClickHouse version
raise
def test_func_to_sql(self):
# No args
self.assertEqual(F('func').to_sql(), 'func()')
# String args
self.assertEqual(F('func', "Wendy's", u"Wendy's").to_sql(), "func('Wendy\\'s', 'Wendy\\'s')")
# Numeric args
self.assertEqual(F('func', 1, 1.1, Decimal('3.3')).to_sql(), "func(1, 1.1, 3.3)")
# Date args
self.assertEqual(F('func', date(2018, 12, 31)).to_sql(), "func(toDate('2018-12-31'))")
# Datetime args
self.assertEqual(F('func', datetime(2018, 12, 31)).to_sql(), "func(toDateTime('1546214400'))")
# Boolean args
self.assertEqual(F('func', True, False).to_sql(), "func(1, 0)")
# Timezone args
self.assertEqual(F('func', pytz.utc).to_sql(), "func('UTC')")
self.assertEqual(F('func', pytz.timezone('Europe/Athens')).to_sql(), "func('Europe/Athens')")
# Null args
self.assertEqual(F('func', None).to_sql(), "func(NULL)")
# Fields as args
self.assertEqual(F('func', SampleModel.color).to_sql(), "func(`color`)")
# Funcs as args
self.assertEqual(F('func', F('sqrt', 25)).to_sql(), 'func(sqrt(25))')
# Iterables as args
x = [1, 'z', F('foo', 17)]
for y in [x, iter(x)]:
self.assertEqual(F('func', y, 5).to_sql(), "func([1, 'z', foo(17)], 5)")
# Tuples as args
self.assertEqual(F('func', [(1, 2), (3, 4)]).to_sql(), "func([(1, 2), (3, 4)])")
self.assertEqual(F('func', tuple(x), 5).to_sql(), "func((1, 'z', foo(17)), 5)")
# Binary operator functions
self.assertEqual(F.plus(1, 2).to_sql(), "(1 + 2)")
self.assertEqual(F.lessOrEquals(1, 2).to_sql(), "(1 <= 2)")
def test_filter_float_field(self):
qs = Person.objects_in(self.database)
# Height > 2
self._test_qs(qs.filter(F.greater(Person.height, 2)), 0)
self._test_qs(qs.filter(Person.height > 2), 0)
# Height > 1.61
self._test_qs(qs.filter(F.greater(Person.height, 1.61)), 96)
self._test_qs(qs.filter(Person.height > 1.61), 96)
# Height < 1.61
self._test_qs(qs.filter(F.less(Person.height, 1.61)), 4)
self._test_qs(qs.filter(Person.height < 1.61), 4)
def test_filter_date_field(self):
qs = Person.objects_in(self.database)
# People born on the 30th
self._test_qs(qs.filter(F('equals', F('toDayOfMonth', Person.birthday), 30)), 3)
self._test_qs(qs.filter(F('toDayOfMonth', Person.birthday) == 30), 3)
self._test_qs(qs.filter(F.toDayOfMonth(Person.birthday) == 30), 3)
# People born on Sunday
self._test_qs(qs.filter(F('equals', F('toDayOfWeek', Person.birthday), 7)), 18)
self._test_qs(qs.filter(F('toDayOfWeek', Person.birthday) == 7), 18)
self._test_qs(qs.filter(F.toDayOfWeek(Person.birthday) == 7), 18)
# People born on 1976-10-01
self._test_qs(qs.filter(F('equals', Person.birthday, '1976-10-01')), 1)
self._test_qs(qs.filter(F('equals', Person.birthday, date(1976, 10, 1))), 1)
self._test_qs(qs.filter(Person.birthday == date(1976, 10, 1)), 1)
def test_func_as_field_value(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__gt=F.plus(1, 0.61)), 96)
self._test_qs(qs.exclude(birthday=F.today()), 100)
self._test_qs(qs.filter(birthday__between=['1970-01-01', F.today()]), 100)
def test_in_and_not_in(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(Person.first_name.isIn(['Ciaran', 'Elton'])), 4)
self._test_qs(qs.filter(~Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
self._test_qs(qs.filter(Person.first_name.isNotIn(['Ciaran', 'Elton'])), 96)
self._test_qs(qs.exclude(Person.first_name.isIn(['Ciaran', 'Elton'])), 96)
# In subquery
subquery = qs.filter(F.startsWith(Person.last_name, 'M')).only(Person.first_name)
self._test_qs(qs.filter(Person.first_name.isIn(subquery)), 4)
def test_comparison_operators(self):
one = F.plus(1, 0)
two = F.plus(1, 1)
self._test_func(one > one, 0)
self._test_func(two > one, 1)
self._test_func(one >= two, 0)
self._test_func(one >= one, 1)
self._test_func(one < one, 0)
self._test_func(one < two, 1)
self._test_func(two <= one, 0)
self._test_func(one <= one, 1)
self._test_func(one == two, 0)
self._test_func(one == one, 1)
self._test_func(one != one, 0)
self._test_func(one != two, 1)
def test_arithmetic_operators(self):
one = F.plus(1, 0)
two = F.plus(1, 1)
# +
self._test_func(one + two, 3)
self._test_func(one + 2, 3)
self._test_func(2 + one, 3)
# -
self._test_func(one - two, -1)
self._test_func(one - 2, -1)
self._test_func(1 - two, -1)
# *
self._test_func(one * two, 2)
self._test_func(one * 2, 2)
self._test_func(1 * two, 2)
# /
self._test_func(one / two, 0.5)
self._test_func(one / 2, 0.5)
self._test_func(1 / two, 0.5)
# //
self._test_func(one // two, 0)
self._test_func(two // one, 2)
self._test_func(one // 2, 0)
self._test_func(1 // two, 0)
# %
self._test_func(one % two, 1)
self._test_func(one % 2, 1)
self._test_func(1 % two, 1)
# sign
self._test_func(-one, -1)
self._test_func(--one, 1)
self._test_func(+one, 1)
def test_logical_operators(self):
one = F.plus(1, 0)
two = F.plus(1, 1)
# &
self._test_func(one & two, 1)
self._test_func(one & two, 1)
self._test_func(one & 0, 0)
self._test_func(0 & one, 0)
# |
self._test_func(one | two, 1)
self._test_func(one | 0, 1)
self._test_func(0 | one, 1)
# ^
self._test_func(one ^ one)
self._test_func(one ^ 0)
self._test_func(0 ^ one)
# ~
self._test_func(~one, 0)
self._test_func(~~one, 1)
# compound
self._test_func(one & 0 | two, 1)
self._test_func(one & 0 & two, 0)
self._test_func(one & 0 | 0, 0)
self._test_func((one | 0) & two, 1)
def test_date_functions(self):
d = date(2018, 12, 31)
dt = datetime(2018, 12, 31, 11, 22, 33)
self._test_func(F.toYear(d), 2018)
self._test_func(F.toYear(dt), 2018)
self._test_func(F.toISOYear(dt, 'Europe/Athens'), 2019) # 2018-12-31 is ISO year 2019, week 1, day 1
self._test_func(F.toQuarter(d), 4)
self._test_func(F.toQuarter(dt), 4)
self._test_func(F.toMonth(d), 12)
self._test_func(F.toMonth(dt), 12)
self._test_func(F.toWeek(d), 52)
self._test_func(F.toWeek(dt), 52)
self._test_func(F.toISOWeek(d), 1) # 2018-12-31 is ISO year 2019, week 1, day 1
self._test_func(F.toISOWeek(dt), 1)
self._test_func(F.toDayOfYear(d), 365)
self._test_func(F.toDayOfYear(dt), 365)
self._test_func(F.toDayOfMonth(d), 31)
self._test_func(F.toDayOfMonth(dt), 31)
self._test_func(F.toDayOfWeek(d), 1)
self._test_func(F.toDayOfWeek(dt), 1)
self._test_func(F.toMinute(dt), 22)
self._test_func(F.toSecond(dt), 33)
self._test_func(F.toMonday(d), d)
self._test_func(F.toMonday(dt), d)
self._test_func(F.toStartOfMonth(d), date(2018, 12, 1))
self._test_func(F.toStartOfMonth(dt), date(2018, 12, 1))
self._test_func(F.toStartOfQuarter(d), date(2018, 10, 1))
self._test_func(F.toStartOfQuarter(dt), date(2018, 10, 1))
self._test_func(F.toStartOfYear(d), date(2018, 1, 1))
self._test_func(F.toStartOfYear(dt), date(2018, 1, 1))
self._test_func(F.toStartOfMinute(dt), datetime(2018, 12, 31, 11, 22, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfFiveMinute(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfISOYear(dt), date(2018, 12, 31))
self._test_func(F.toStartOfTenMinutes(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfWeek(dt), date(2018, 12, 30))
self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toUnixTimestamp(dt, 'UTC'), int(dt.replace(tzinfo=pytz.utc).timestamp()))
self._test_func(F.toYYYYMM(d), 201812)
self._test_func(F.toYYYYMM(dt), 201812)
self._test_func(F.toYYYYMM(dt, 'Europe/Athens'), 201812)
self._test_func(F.toYYYYMMDD(d), 20181231)
self._test_func(F.toYYYYMMDD(dt), 20181231)
self._test_func(F.toYYYYMMDD(dt, 'Europe/Athens'), 20181231)
self._test_func(F.toYYYYMMDDhhmmss(d), 20181231000000)
self._test_func(F.toYYYYMMDDhhmmss(dt, 'Europe/Athens'), 20181231132233)
self._test_func(F.toRelativeYearNum(dt), 2018)
self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018)
self._test_func(F.toRelativeMonthNum(dt), 2018 * 12 + 12)
self._test_func(F.toRelativeMonthNum(dt, 'Europe/Athens'), 2018 * 12 + 12)
self._test_func(F.toRelativeWeekNum(dt), 2557)
self._test_func(F.toRelativeWeekNum(dt, 'Europe/Athens'), 2557)
self._test_func(F.toRelativeDayNum(dt), 17896)
self._test_func(F.toRelativeDayNum(dt, 'Europe/Athens'), 17896)
self._test_func(F.toRelativeHourNum(dt), 429515)
self._test_func(F.toRelativeHourNum(dt, 'Europe/Athens'), 429515)
self._test_func(F.toRelativeMinuteNum(dt), 25770922)
self._test_func(F.toRelativeMinuteNum(dt, 'Europe/Athens'), 25770922)
self._test_func(F.toRelativeSecondNum(dt), 1546255353)
self._test_func(F.toRelativeSecondNum(dt, 'Europe/Athens'), 1546255353)
self._test_func(F.timeSlot(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc))
self._test_func(F.timeSlots(dt, 300), [datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc)])
self._test_func(F.formatDateTime(dt, '%D %T', 'Europe/Athens'), '12/31/18 13:22:33')
self._test_func(F.addDays(d, 7), date(2019, 1, 7))
self._test_func(F.addDays(dt, 7, 'Europe/Athens'))
self._test_func(F.addHours(dt, 7, 'Europe/Athens'))
self._test_func(F.addMinutes(dt, 7, 'Europe/Athens'))
self._test_func(F.addMonths(d, 7), date(2019, 7, 31))
self._test_func(F.addMonths(dt, 7, 'Europe/Athens'))
self._test_func(F.addQuarters(d, 7))
self._test_func(F.addQuarters(dt, 7, 'Europe/Athens'))
self._test_func(F.addSeconds(d, 7))
self._test_func(F.addSeconds(dt, 7, 'Europe/Athens'))
self._test_func(F.addWeeks(d, 7))
self._test_func(F.addWeeks(dt, 7, 'Europe/Athens'))
self._test_func(F.addYears(d, 7))
self._test_func(F.addYears(dt, 7, 'Europe/Athens'))
self._test_func(F.subtractDays(d, 3))
self._test_func(F.subtractDays(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractHours(d, 3))
self._test_func(F.subtractHours(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractMinutes(d, 3))
self._test_func(F.subtractMinutes(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractMonths(d, 3))
self._test_func(F.subtractMonths(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractQuarters(d, 3))
self._test_func(F.subtractQuarters(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractSeconds(d, 3))
self._test_func(F.subtractSeconds(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractWeeks(d, 3))
self._test_func(F.subtractWeeks(dt, 3, 'Europe/Athens'))
self._test_func(F.subtractYears(d, 3))
self._test_func(F.subtractYears(dt, 3, 'Europe/Athens'))
self._test_func(F.now() + F.toIntervalSecond(3) + F.toIntervalMinute(3) + F.toIntervalHour(3) + F.toIntervalDay(3))
self._test_func(F.now() + F.toIntervalWeek(3) + F.toIntervalMonth(3) + F.toIntervalQuarter(3) + F.toIntervalYear(3))
self._test_func(F.now() + F.toIntervalSecond(3000) - F.toIntervalDay(3000) == F.now() + timedelta(seconds=3000, days=-3000))
def test_date_functions__utc_only(self):
if self.database.server_timezone != pytz.utc:
raise unittest.SkipTest('This test must run with UTC as the server timezone')
d = date(2018, 12, 31)
dt = datetime(2018, 12, 31, 11, 22, 33)
self._test_func(F.toHour(dt), 11)
self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.now(), datetime.utcnow().replace(tzinfo=pytz.utc, microsecond=0)) # FIXME this may fail if the timing is just right
self._test_func(F.today(), datetime.utcnow().date())
self._test_func(F.yesterday(), datetime.utcnow().date() - timedelta(days=1))
self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233)
self._test_func(F.formatDateTime(dt, '%D %T'), '12/31/18 11:22:33')
self._test_func(F.addHours(d, 7), datetime(2018, 12, 31, 7, 0, 0, tzinfo=pytz.utc))
self._test_func(F.addMinutes(d, 7), datetime(2018, 12, 31, 0, 7, 0, tzinfo=pytz.utc))
def test_type_conversion_functions(self):
for f in (F.toUInt8, F.toUInt16, F.toUInt32, F.toUInt64, F.toInt8, F.toInt16, F.toInt32, F.toInt64, F.toFloat32, F.toFloat64):
self._test_func(f(17), 17)
self._test_func(f('17'), 17)
for f in (F.toUInt8OrZero, F.toUInt16OrZero, F.toUInt32OrZero, F.toUInt64OrZero, F.toInt8OrZero, F.toInt16OrZero, F.toInt32OrZero, F.toInt64OrZero, F.toFloat32OrZero, F.toFloat64OrZero):
self._test_func(f('17'), 17)
self._test_func(f('a'), 0)
for f in (F.toDecimal32, F.toDecimal64, F.toDecimal128):
self._test_func(f(17.17, 2), Decimal('17.17'))
self._test_func(f('17.17', 2), Decimal('17.17'))
self._test_func(F.toDate('2018-12-31'), date(2018, 12, 31))
self._test_func(F.toString(123), '123')
self._test_func(F.toFixedString('123', 5), '123')
self._test_func(F.toStringCutToZero('123\0'), '123')
self._test_func(F.CAST(17, 'String'), '17')
self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM', 'Europe/Athens'))
with self.assertRaises(ServerError):
self._test_func(F.parseDateTimeBestEffort('foo'))
self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM', 'Europe/Athens'))
self._test_func(F.parseDateTimeBestEffortOrNull('foo'), None)
self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM', 'Europe/Athens'))
self._test_func(F.parseDateTimeBestEffortOrZero('foo'), DateTimeField.class_default)
def test_type_conversion_functions__utc_only(self):
if self.database.server_timezone != pytz.utc:
raise unittest.SkipTest('This test must run with UTC as the server timezone')
self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
def test_string_functions(self):
self._test_func(F.empty(''), 1)
self._test_func(F.empty('x'), 0)
self._test_func(F.notEmpty(''), 0)
self._test_func(F.notEmpty('x'), 1)
self._test_func(F.length('x'), 1)
self._test_func(F.lengthUTF8('x'), 1)
self._test_func(F.lower('Ab'), 'ab')
self._test_func(F.upper('Ab'), 'AB')
self._test_func(F.lowerUTF8('Ab'), 'ab')
self._test_func(F.upperUTF8('Ab'), 'AB')
self._test_func(F.reverse('Ab'), 'bA')
self._test_func(F.reverseUTF8('Ab'), 'bA')
self._test_func(F.concat('Ab', 'Cd', 'Ef'), 'AbCdEf')
self._test_func(F.substring('123456', 3, 2), '34')
self._test_func(F.substringUTF8('123456', 3, 2), '34')
self._test_func(F.appendTrailingCharIfAbsent('Hello', '!'), 'Hello!')
self._test_func(F.appendTrailingCharIfAbsent('Hello!', '!'), 'Hello!')
self._test_func(F.convertCharset(F.convertCharset('Hello', 'latin1', 'utf16'), 'utf16', 'latin1'), 'Hello')
self._test_func(F.startsWith('aaa', 'aa'), True)
self._test_func(F.startsWith('aaa', 'bb'), False)
self._test_func(F.endsWith('aaa', 'aa'), True)
self._test_func(F.endsWith('aaa', 'bb'), False)
self._test_func(F.trimLeft(' abc '), 'abc ')
self._test_func(F.trimRight(' abc '), ' abc')
self._test_func(F.trimBoth(' abc '), 'abc')
self._test_func(F.CRC32('whoops'), 3361378926)
def test_string_search_functions(self):
self._test_func(F.position('Hello, world!', '!'), 13)
self._test_func(F.positionCaseInsensitive('Hello, world!', 'hello'), 1)
self._test_func(F.positionUTF8('Привет, мир!', '!'), 12)
self._test_func(F.positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'), 9)
self._test_func(F.like('Hello, world!', '%ll%'), 1)
self._test_func(F.notLike('Hello, world!', '%ll%'), 0)
self._test_func(F.match('Hello, world!', '[lmnop]{3}'), 1)
self._test_func(F.extract('Hello, world!', '[lmnop]{3}'), 'llo')
self._test_func(F.extractAll('Hello, world!', '[a-z]+'), ['ello', 'world'])
self._test_func(F.ngramDistance('Hello', 'Hello'), 0)
self._test_func(F.ngramDistanceCaseInsensitive('Hello', 'hello'), 0)
self._test_func(F.ngramDistanceUTF8('Hello', 'Hello'), 0)
self._test_func(F.ngramDistanceCaseInsensitiveUTF8('Hello', 'hello'), 0)
self._test_func(F.ngramSearch('Hello', 'Hello'), 1)
self._test_func(F.ngramSearchCaseInsensitive('Hello', 'hello'), 1)
self._test_func(F.ngramSearchUTF8('Hello', 'Hello'), 1)
self._test_func(F.ngramSearchCaseInsensitiveUTF8('Hello', 'hello'), 1)
def test_base64_functions(self):
try:
self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello')
self._test_func(F.tryBase64Decode(F.base64Encode('Hello')), 'Hello')
self._test_func(F.tryBase64Decode(':-)'))
except ServerError as e:
# ClickHouse version that doesn't support these functions
raise unittest.SkipTest(e.message)
def test_replace_functions(self):
haystack = 'hello'
self._test_func(F.replace(haystack, 'l', 'L'), 'heLLo')
self._test_func(F.replaceAll(haystack, 'l', 'L'), 'heLLo')
self._test_func(F.replaceOne(haystack, 'l', 'L'), 'heLlo')
self._test_func(F.replaceRegexpAll(haystack, '[eo]', 'X'), 'hXllX')
self._test_func(F.replaceRegexpOne(haystack, '[eo]', 'X'), 'hXllo')
self._test_func(F.regexpQuoteMeta('[eo]'), '\\[eo\\]')
def test_math_functions(self):
x = 17
y = 3
self._test_func(F.e())
self._test_func(F.pi())
self._test_func(F.exp(x))
self._test_func(F.exp10(x))
self._test_func(F.exp2(x))
self._test_func(F.log(x))
self._test_func(F.log10(x))
self._test_func(F.log2(x))
self._test_func(F.ln(x))
self._test_func(F.sqrt(x))
self._test_func(F.cbrt(x))
self._test_func(F.erf(x))
self._test_func(F.erfc(x))
self._test_func(F.lgamma(x))
self._test_func(F.tgamma(x))
self._test_func(F.sin(x))
self._test_func(F.cos(x))
self._test_func(F.tan(x))
self._test_func(F.asin(x))
self._test_func(F.acos(x))
self._test_func(F.atan(x))
self._test_func(F.pow(x, y))
self._test_func(F.power(x, y))
self._test_func(F.intExp10(x))
self._test_func(F.intExp2(x))
self._test_func(F.intDivOrZero(x, y))
self._test_func(F.abs(x))
self._test_func(F.gcd(x, y))
self._test_func(F.lcm(x, y))
def test_rounding_functions(self):
x = 22.22222
n = 3
self._test_func(F.floor(x), 22)
self._test_func(F.floor(x, n), 22.222)
self._test_func(F.ceil(x), 23)
self._test_func(F.ceil(x, n), 22.223)
self._test_func(F.ceiling(x), 23)
self._test_func(F.ceiling(x, n), 22.223)
self._test_func(F.round(x), 22)
self._test_func(F.round(x, n), 22.222)
self._test_func(F.roundAge(x), 18)
self._test_func(F.roundDown(x, [10, 20, 30]), 20)
self._test_func(F.roundDuration(x), 10)
self._test_func(F.roundToExp2(x), 16)
def test_array_functions(self):
arr = [1, 2, 3]
self._test_func(F.emptyArrayDate())
self._test_func(F.emptyArrayDateTime())
self._test_func(F.emptyArrayFloat32())
self._test_func(F.emptyArrayFloat64())
self._test_func(F.emptyArrayInt16())
self._test_func(F.emptyArrayInt32())
self._test_func(F.emptyArrayInt64())
self._test_func(F.emptyArrayInt8())
self._test_func(F.emptyArrayString())
self._test_func(F.emptyArrayToSingle(F.emptyArrayInt16()), [0])
self._test_func(F.emptyArrayUInt16())
self._test_func(F.emptyArrayUInt32())
self._test_func(F.emptyArrayUInt64())
self._test_func(F.emptyArrayUInt8())
self._test_func(F.range(7), list(range(7)))
self._test_func(F.array(*arr), arr)
self._test_func(F.arrayConcat([1, 2], [3]), arr)
self._test_func(F.arrayElement([10, 20, 30], 2), 20)
self._test_func(F.has(arr, 2), 1)
self._test_func(F.hasAll(arr, [1, 7]), 0)
self._test_func(F.hasAny(arr, [1, 7]), 1)
self._test_func(F.indexOf(arr, 3), 3)
self._test_func(F.countEqual(arr, 2), 1)
self._test_func(F.arrayEnumerate(arr))
self._test_func(F.arrayEnumerateDense(arr))
self._test_func(F.arrayEnumerateDenseRanked(arr))
self._test_func(F.arrayEnumerateUniq(arr))
self._test_func(F.arrayEnumerateUniqRanked(arr))
self._test_func(F.arrayPopBack(arr), [1, 2])
self._test_func(F.arrayPopFront(arr), [2, 3])
self._test_func(F.arrayPushBack(arr, 7), arr + [7])
self._test_func(F.arrayPushFront(arr, 7), [7] + arr)
self._test_func(F.arrayResize(arr, 5), [1, 2, 3, 0, 0])
self._test_func(F.arrayResize(arr, 5, 9), [1, 2, 3, 9, 9])
self._test_func(F.arraySlice(arr, 2), [2, 3])
self._test_func(F.arraySlice(arr, 2, 1), [2])
self._test_func(F.arrayUniq(arr + arr), 3)
self._test_func(F.arrayJoin(arr))
self._test_func(F.arrayDifference(arr), [0, 1, 1])
self._test_func(F.arrayDistinct(arr + arr), arr)
self._test_func(F.arrayIntersect(arr, [3, 4]), [3])
self._test_func(F.arrayReduce('min', arr), 1)
self._test_func(F.arrayReverse(arr), [3, 2, 1])
def test_split_and_merge_functions(self):
self._test_func(F.splitByChar('_', 'a_b_c'), ['a', 'b', 'c'])
self._test_func(F.splitByString('__', 'a__b__c'), ['a', 'b', 'c'])
self._test_func(F.arrayStringConcat(['a', 'b', 'c']), 'abc')
self._test_func(F.arrayStringConcat(['a', 'b', 'c'], '_'), 'a_b_c')
self._test_func(F.alphaTokens('aaa.bbb.111'), ['aaa', 'bbb'])
def test_bit_functions(self):
x = 17
y = 4
z = 5
self._test_func(F.bitAnd(x, y))
self._test_func(F.bitNot(x))
self._test_func(F.bitOr(x, y))
self._test_func(F.bitRotateLeft(x, y))
self._test_func(F.bitRotateRight(x, y))
self._test_func(F.bitShiftLeft(x, y))
self._test_func(F.bitShiftRight(x, y))
self._test_func(F.bitTest(x, y))
self._test_func(F.bitTestAll(x, y))
self._test_func(F.bitTestAll(x, y, z))
self._test_func(F.bitTestAny(x, y))
self._test_func(F.bitTestAny(x, y, z))
self._test_func(F.bitXor(x, y))
def test_bitmap_functions(self):
self._test_func(F.bitmapToArray(F.bitmapBuild([1, 2, 3])), [1, 2, 3])
self._test_func(F.bitmapContains(F.bitmapBuild([1, 5, 7, 9]), F.toUInt32(9)), 1)
self._test_func(F.bitmapHasAny(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 1)
self._test_func(F.bitmapHasAll(F.bitmapBuild([1,2,3]), F.bitmapBuild([3,4,5])), 0)
self._test_func(F.bitmapToArray(F.bitmapAnd(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [3])
self._test_func(F.bitmapToArray(F.bitmapOr(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 3, 4, 5])
self._test_func(F.bitmapToArray(F.bitmapXor(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2, 4, 5])
self._test_func(F.bitmapToArray(F.bitmapAndnot(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5]))), [1, 2])
self._test_func(F.bitmapCardinality(F.bitmapBuild([1, 2, 3, 4, 5])), 5)
self._test_func(F.bitmapAndCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 1)
self._test_func(F.bitmapOrCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 5)
self._test_func(F.bitmapXorCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 4)
self._test_func(F.bitmapAndnotCardinality(F.bitmapBuild([1, 2, 3]), F.bitmapBuild([3, 4, 5])), 2)
def test_hash_functions(self):
args = ['x', 'y', 'z']
x = 17
s = 'hello'
url = 'http://example.com/a/b/c/d'
self._test_func(F.hex(F.MD5(s)))
self._test_func(F.hex(F.sipHash128(s)))
self._test_func(F.hex(F.cityHash64(*args)))
self._test_func(F.hex(F.intHash32(x)))
self._test_func(F.hex(F.intHash64(x)))
self._test_func(F.hex(F.SHA1(s)))
self._test_func(F.hex(F.SHA224(s)))
self._test_func(F.hex(F.SHA256(s)))
self._test_func(F.hex(F.URLHash(url)))
self._test_func(F.hex(F.URLHash(url, 3)))
self._test_func(F.hex(F.farmHash64(*args)))
self._test_func(F.javaHash(s))
self._test_func(F.hiveHash(s))
self._test_func(F.hex(F.metroHash64(*args)))
self._test_func(F.jumpConsistentHash(x, 3))
self._test_func(F.hex(F.murmurHash2_32(*args)))
self._test_func(F.hex(F.murmurHash2_64(*args)))
self._test_func(F.hex(F.murmurHash3_32(*args)))
self._test_func(F.hex(F.murmurHash3_64(*args)))
self._test_func(F.hex(F.murmurHash3_128(s)))
self._test_func(F.hex(F.xxHash32(*args)))
self._test_func(F.hex(F.xxHash64(*args)))
if self.database.server_version >= (18, 1):
self._test_func(F.hex(F.halfMD5(*args)))
self._test_func(F.hex(F.sipHash64(*args)))
def test_rand_functions(self):
self._test_func(F.rand())
self._test_func(F.rand(17))
self._test_func(F.rand64())
self._test_func(F.rand64(17))
if self.database.server_version >= (19, 15): # buggy in older versions
self._test_func(F.randConstant())
self._test_func(F.randConstant(17))
def test_encoding_functions(self):
self._test_func(F.hex(F.unhex('0FA1')), '0FA1')
self._test_func(F.bitmaskToArray(17))
self._test_func(F.bitmaskToList(18))
def test_uuid_functions(self):
from uuid import UUID
uuid = self._test_func(F.generateUUIDv4())
self.assertEqual(type(uuid), UUID)
s = str(uuid)
self._test_func(F.toUUID(s), uuid)
self._test_func(F.UUIDNumToString(F.UUIDStringToNum(s)), s)
def test_ip_funcs(self):
self._test_func(F.IPv4NumToString(F.toUInt32(1)), '0.0.0.1')
self._test_func(F.IPv4NumToStringClassC(F.toUInt32(1)), '0.0.0.xxx')
self._test_func(F.IPv4StringToNum('0.0.0.17'), 17)
self._test_func(F.IPv6NumToString(F.IPv4ToIPv6(F.IPv4StringToNum('192.168.0.1'))), '::ffff:192.168.0.1')
self._test_func(F.IPv6NumToString(F.IPv6StringToNum('2a02:6b8::11')), '2a02:6b8::11')
self._test_func(F.toIPv4('10.20.30.40'), IPv4Address('10.20.30.40'))
self._test_func(F.toIPv6('2001:438:ffff::407d:1bc1'), IPv6Address('2001:438:ffff::407d:1bc1'))
self._test_func(F.IPv4CIDRToRange(F.toIPv4('192.168.5.2'), 16),
[IPv4Address('192.168.0.0'), IPv4Address('192.168.255.255')])
self._test_func(F.IPv6CIDRToRange(F.toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32),
[IPv6Address('2001:db8::'), IPv6Address('2001:db8:ffff:ffff:ffff:ffff:ffff:ffff')])
def test_aggregate_funcs(self):
self._test_aggr(F.any(Person.first_name))
self._test_aggr(F.anyHeavy(Person.first_name))
self._test_aggr(F.anyLast(Person.first_name))
self._test_aggr(F.argMin(Person.first_name, Person.height))
self._test_aggr(F.argMax(Person.first_name, Person.height))
self._test_aggr(F.round(F.avg(Person.height), 4), sum(p.height for p in self._sample_data()) / 100)
self._test_aggr(F.corr(Person.height, Person.height), 1)
self._test_aggr(F.count(), 100)
self._test_aggr(F.round(F.covarPop(Person.height, Person.height), 2), 0)
self._test_aggr(F.round(F.covarSamp(Person.height, Person.height), 2), 0)
self._test_aggr(F.kurtPop(Person.height))
self._test_aggr(F.kurtSamp(Person.height))
self._test_aggr(F.min(Person.height), 1.59)
self._test_aggr(F.max(Person.height), 1.80)
self._test_aggr(F.skewPop(Person.height))
self._test_aggr(F.skewSamp(Person.height))
self._test_aggr(F.round(F.sum(Person.height), 4), sum(p.height for p in self._sample_data()))
self._test_aggr(F.uniq(Person.first_name, Person.last_name), 100)
self._test_aggr(F.uniqExact(Person.first_name, Person.last_name), 100)
self._test_aggr(F.uniqHLL12(Person.first_name, Person.last_name), 99)
self._test_aggr(F.varPop(Person.height))
self._test_aggr(F.varSamp(Person.height))
def test_aggregate_funcs__or_default(self):
self.database.raw('TRUNCATE TABLE person')
self._test_aggr(F.countOrDefault(), 0)
self._test_aggr(F.maxOrDefault(Person.height), 0)
def test_aggregate_funcs__or_null(self):
self.database.raw('TRUNCATE TABLE person')
self._test_aggr(F.countOrNull(), None)
self._test_aggr(F.maxOrNull(Person.height), None)
def test_aggregate_funcs__if(self):
self._test_aggr(F.argMinIf(Person.first_name, Person.height, Person.last_name > 'H'))
self._test_aggr(F.countIf(Person.last_name > 'H'), 57)
self._test_aggr(F.minIf(Person.height, Person.last_name > 'H'), 1.6)
def test_aggregate_funcs__or_default_if(self):
self._test_aggr(F.argMinOrDefaultIf(Person.first_name, Person.height, Person.last_name > 'Z'))
self._test_aggr(F.countOrDefaultIf(Person.last_name > 'Z'), 0)
self._test_aggr(F.minOrDefaultIf(Person.height, Person.last_name > 'Z'), 0)
def test_aggregate_funcs__or_null_if(self):
self._test_aggr(F.argMinOrNullIf(Person.first_name, Person.height, Person.last_name > 'Z'))
self._test_aggr(F.countOrNullIf(Person.last_name > 'Z'), None)
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
def test_quantile_funcs(self):
cond = Person.last_name > 'H'
weight_expr = F.toUInt32(F.round(Person.height))
# Quantile
self._test_aggr(F.quantile(0.9)(Person.height))
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
self._test_aggr(F.quantileIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
self._test_aggr(F.quantileExact(0.9)(Person.height))
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
# Quantile weighted
self._test_aggr(F.quantileExactWeighted(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileTiming(0.9)(Person.height))
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileTimingWeighted(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, cond))
self._test_aggr(F.quantileTDigest(0.9)(Person.height))
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, cond))
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
# Quantiles
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesDeterministic(0.9, 0.95, 0.99)(Person.height, 17))
self._test_aggr(F.quantilesExact(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
self._test_aggr(F.quantilesTiming(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, cond))
self._test_aggr(F.quantilesTimingWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, cond))
self._test_aggr(F.quantilesTDigest(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, cond))
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
def test_top_k_funcs(self):
self._test_aggr(F.topK(3)(Person.height))
self._test_aggr(F.topKOrDefault(3)(Person.height))
self._test_aggr(F.topKIf(3)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.topKOrDefaultIf(3)(Person.height, Person.last_name > 'H'))
weight_expr = F.toUInt32(F.round(Person.height))
self._test_aggr(F.topKWeighted(3)(Person.height, weight_expr))
self._test_aggr(F.topKWeightedOrDefault(3)(Person.height, weight_expr))
self._test_aggr(F.topKWeightedIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.topKWeightedOrDefaultIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
def test_null_funcs(self):
self._test_func(F.ifNull(17, 18), 17)
self._test_func(F.ifNull(None, 18), 18)
self._test_func(F.nullIf(17, 18), 17)
self._test_func(F.nullIf(18, 18), None)
self._test_func(F.isNotNull(17), 1)
self._test_func(F.isNull(17), 0)
self._test_func(F.coalesce(None, None, 17, 18), 17)
def test_misc_funcs(self):
self._test_func(F.ifNotFinite(17, 18), 17)
self._test_func(F.isFinite(17), 1)
self._test_func(F.isInfinite(17), 0)
self._test_func(F.isNaN(17), 0)
self._test_func(F.least(17, 18), 17)
self._test_func(F.greatest(17, 18), 18)

69
tests/test_ip_fields.py Normal file
View File

@ -0,0 +1,69 @@
from __future__ import unicode_literals
import unittest
from ipaddress import IPv4Address, IPv6Address
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.fields import Int16Field, IPv4Field, IPv6Field
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.engines import Memory
class IPFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db', log_statements=True)
def tearDown(self):
self.database.drop_database()
def test_ipv4_field(self):
if self.database.server_version < (19, 17):
raise unittest.SkipTest('ClickHouse version too old')
# Create a model
class TestModel(Model):
i = Int16Field()
f = IPv4Field()
engine = Memory()
self.database.create_table(TestModel)
# Check valid values (all values are the same ip)
values = [
'1.2.3.4',
b'\x01\x02\x03\x04',
16909060,
IPv4Address('1.2.3.4')
]
for index, value in enumerate(values):
rec = TestModel(i=index, f=value)
self.database.insert([rec])
for rec in TestModel.objects_in(self.database):
self.assertEqual(rec.f, IPv4Address(values[0]))
# Check invalid values
for value in [None, 'zzz', -1, '123']:
with self.assertRaises(ValueError):
TestModel(i=1, f=value)
def test_ipv6_field(self):
if self.database.server_version < (19, 17):
raise unittest.SkipTest('ClickHouse version too old')
# Create a model
class TestModel(Model):
i = Int16Field()
f = IPv6Field()
engine = Memory()
self.database.create_table(TestModel)
# Check valid values (all values are the same ip)
values = [
'2a02:e980:1e::1',
b'*\x02\xe9\x80\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01',
55842696359362256756849388082849382401,
IPv6Address('2a02:e980:1e::1')
]
for index, value in enumerate(values):
rec = TestModel(i=index, f=value)
self.database.insert([rec])
for rec in TestModel.objects_in(self.database):
self.assertEqual(rec.f, IPv6Address(values[0]))
# Check invalid values
for value in [None, 'zzz', -1, '123']:
with self.assertRaises(ValueError):
TestModel(i=1, f=value)

View File

@ -3,9 +3,10 @@ import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, NO_VALUE
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.funcs import F
class MaterializedFieldsTest(unittest.TestCase):
@ -25,7 +26,7 @@ class MaterializedFieldsTest(unittest.TestCase):
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str, mat_func' \
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
for model_cls in (ModelWithMaterializedFields, None):
results = list(self.database.select(query, model_cls))
@ -36,6 +37,7 @@ class MaterializedFieldsTest(unittest.TestCase):
self.assertEqual(results[0].mat_int, abs(instance.int_field))
self.assertEqual(results[0].mat_str, instance.str_field.lower())
self.assertEqual(results[0].mat_date, instance.date_time_field.date())
self.assertEqual(results[0].mat_func, instance.str_field.lower())
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
@ -55,6 +57,10 @@ class MaterializedFieldsTest(unittest.TestCase):
with self.assertRaises(AssertionError):
StringField(materialized='str_field', alias='str_field')
def test_default_value(self):
instance = ModelWithMaterializedFields()
self.assertEqual(instance.mat_str, NO_VALUE)
class ModelWithMaterializedFields(Model):
int_field = Int32Field()
@ -64,5 +70,6 @@ class ModelWithMaterializedFields(Model):
mat_str = StringField(materialized='lower(str_field)')
mat_int = Int32Field(materialized='abs(int_field)')
mat_date = DateField(materialized=u'toDate(date_time_field)')
mat_func = StringField(materialized=F.lower(str_field))
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -3,9 +3,10 @@ import unittest
import datetime
import pytz
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, NO_VALUE
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.funcs import F
class ModelTestCase(unittest.TestCase):
@ -18,6 +19,7 @@ class ModelTestCase(unittest.TestCase):
self.assertEqual(instance.str_field, 'dozo')
self.assertEqual(instance.int_field, 17)
self.assertEqual(instance.float_field, 0)
self.assertEqual(instance.default_func, NO_VALUE)
def test_assignment(self):
# Check that all fields are assigned during construction
@ -63,15 +65,17 @@ class ModelTestCase(unittest.TestCase):
"int_field": 100,
"float_field": 7.0,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
"alias_field": 0.0,
'str_field': 'dozo'
"alias_field": NO_VALUE,
"str_field": "dozo",
"default_func": NO_VALUE
})
self.assertDictEqual(instance.to_dict(include_readonly=False), {
"date_field": datetime.date(1973, 12, 6),
"int_field": 100,
"float_field": 7.0,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
'str_field': 'dozo'
"str_field": "dozo",
"default_func": NO_VALUE
})
self.assertDictEqual(
instance.to_dict(include_readonly=False, field_names=('int_field', 'alias_field', 'datetime_field')), {
@ -86,7 +90,7 @@ class ModelTestCase(unittest.TestCase):
self.assertEqual(
"Invalid value for StringField: {} (field 'str_field')".format(repr(bad_value)),
text_type(cm.exception)
str(cm.exception)
)
def test_field_name_in_error_message_for_invalid_value_in_assignment(self):
@ -97,7 +101,7 @@ class ModelTestCase(unittest.TestCase):
self.assertEqual(
"Invalid value for Float32Field - {} (field 'float_field')".format(repr(bad_value)),
text_type(cm.exception)
str(cm.exception)
)
@ -109,5 +113,6 @@ class SimpleModel(Model):
int_field = Int32Field(default=17)
float_field = Float32Field()
alias_field = Float32Field(alias='float_field')
default_func = Float32Field(default=F.sqrt(float_field) + 17)
engine = MergeTree('date_field', ('int_field', 'date_field'))

View File

@ -1,13 +1,17 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import unittest
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.query import Q
from infi.clickhouse_orm.funcs import F
from .base_test_with_data import *
import logging
from datetime import date, datetime
from enum import Enum
from decimal import Decimal
from logging import getLogger
logger = getLogger('tests')
class QuerySetTestCase(TestCaseWithData):
@ -17,11 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
self.database.insert(self._sample_data())
def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql())
logger.info(qs.as_sql())
count = 0
for instance in qs:
count += 1
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
logger.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEqual(count, expected_count)
self.assertEqual(qs.count(), expected_count)
@ -293,6 +297,17 @@ class QuerySetTestCase(TestCaseWithData):
for item, exp_color in zip(res, (Color.red, Color.green, Color.white, Color.blue)):
self.assertEqual(exp_color, item.color)
def test_mixed_filter(self):
qs = Person.objects_in(self.database)
qs = qs.filter(Q(first_name='a'), F('greater', Person.height, 1.7), last_name='b')
self.assertEqual(qs.conditions_as_sql(),
"(first_name = 'a') AND (greater(`height`, 1.7)) AND (last_name = 'b')")
def test_invalid_filter(self):
qs = Person.objects_in(self.database)
with self.assertRaises(TypeError):
qs.filter('foo')
class AggregateTestCase(TestCaseWithData):
@ -307,6 +322,13 @@ class AggregateTestCase(TestCaseWithData):
for row in qs:
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
self.assertEqual(row.count, 100)
# With functions
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count())
print(qs.as_sql())
self.assertEqual(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
self.assertEqual(row.count, 100)
def test_aggregate_with_filter(self):
# When filter comes before aggregate
@ -324,6 +346,22 @@ class AggregateTestCase(TestCaseWithData):
self.assertAlmostEqual(row.average_height, 1.675, places=4)
self.assertEqual(row.count, 2)
def test_aggregate_with_filter__funcs(self):
# When filter comes before aggregate
qs = Person.objects_in(self.database).filter(Person.first_name=='Warren').aggregate(average_height=F.avg(Person.height), count=F.count())
print(qs.as_sql())
self.assertEqual(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.675, places=4)
self.assertEqual(row.count, 2)
# When filter comes after aggregate
qs = Person.objects_in(self.database).aggregate(average_height=F.avg(Person.height), count=F.count()).filter(Person.first_name=='Warren')
print(qs.as_sql())
self.assertEqual(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.675, places=4)
self.assertEqual(row.count, 2)
def test_aggregate_with_implicit_grouping(self):
qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()')
print(qs.as_sql())
@ -433,21 +471,26 @@ class AggregateTestCase(TestCaseWithData):
self.assertEqual(qs.conditions_as_sql(), 'the__next__number > 1')
def test_limit_by(self):
if self.database.server_version < (19, 17):
raise unittest.SkipTest('ClickHouse version too old')
# Test without offset
qs = Person.objects_in(self.database).aggregate('first_name', 'last_name', 'height', n='count()').\
order_by('first_name', '-height').limit_by(1, 'first_name')
self.assertEqual(qs.count(), 94)
self.assertEqual(list(qs)[89].last_name, 'Bowen')
# Test with funcs and fields
qs = Person.objects_in(self.database).aggregate(Person.first_name, Person.last_name, Person.height, n=F.count()).\
order_by(Person.first_name, '-height').limit_by(1, F.upper(Person.first_name))
self.assertEqual(qs.count(), 94)
self.assertEqual(list(qs)[89].last_name, 'Bowen')
# Test with limit and offset, also mixing LIMIT with LIMIT BY
qs = Person.objects_in(self.database).filter(height__gt=1.67).order_by('height', 'first_name')
limited_qs = qs.limit_by((0, 3), 'height')
self.assertEquals([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora'])
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Amanda', 'Buffy', 'Dora'])
limited_qs = qs.limit_by((3, 3), 'height')
self.assertEquals([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay'])
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Elton', 'Josiah', 'Macaulay'])
limited_qs = qs.limit_by((6, 3), 'height')
self.assertEquals([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver'])
self.assertEqual([p.first_name for p in limited_qs[:3]], ['Norman', 'Octavius', 'Oliver'])
Color = Enum('Color', u'red blue green yellow brown white black')

View File

@ -16,6 +16,8 @@ class UUIDFieldsTest(unittest.TestCase):
self.database.drop_database()
def test_uuid_field(self):
if self.database.server_version < (18, 1):
raise unittest.SkipTest('ClickHouse version too old')
# Create a model
class TestModel(Model):
i = Int16Field()