This commit is contained in:
Itai Shirav 2020-05-28 19:18:10 +03:00
parent e97e48a695
commit 113ac7ad4a
15 changed files with 125 additions and 128 deletions

View File

@ -8,11 +8,7 @@ Let's jump right in with a simple example of monitoring CPU usage. First we need
connect to the database and create a table for the model: connect to the database and create a table for the model:
```python ```python
from infi.clickhouse_orm.database import Database from infi.clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.funcs import F
class CPUStats(Model): class CPUStats(Model):
@ -46,12 +42,13 @@ Querying the table is easy, using either the query builder or raw SQL:
```python ```python
# Calculate what percentage of the time CPU 1 was over 95% busy # Calculate what percentage of the time CPU 1 was over 95% busy
total = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1).count() queryset = CPUStats.objects_in(db)
busy = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count() total = queryset.filter(CPUStats.cpu_id == 1).count()
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)) print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
# Calculate the average usage per CPU # Calculate the average usage per CPU
for row in CPUStats.objects_in(db).aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)): for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)) print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
``` ```

View File

@ -840,7 +840,7 @@ Extends Engine
Buffers the data to write in RAM, periodically flushing it to another table. Buffers the data to write in RAM, periodically flushing it to another table.
Must be used in conjuction with a `BufferModel`. Must be used in conjuction with a `BufferModel`.
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/). Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) #### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
@ -853,7 +853,7 @@ Extends Engine
The Merge engine (not to be confused with MergeTree) does not store data itself, The Merge engine (not to be confused with MergeTree) does not store data itself,
but allows reading from any number of other tables simultaneously. but allows reading from any number of other tables simultaneously.
Writing to a table is not supported Writing to a table is not supported
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
#### Merge(table_regex) #### Merge(table_regex)
@ -869,7 +869,7 @@ Reading is automatically parallelized.
During a read, the table indexes on remote servers are used, if there are any. During a read, the table indexes on remote servers are used, if there are any.
See full documentation here See full documentation here
https://clickhouse.yandex/docs/en/table_engines/distributed.html https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
#### Distributed(cluster, table=None, sharding_key=None) #### Distributed(cluster, table=None, sharding_key=None)
@ -1165,7 +1165,7 @@ Returns the selected fields or expressions as a SQL string.
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
with aggregate function calculated across all the rows. More information: with aggregate function calculated across all the rows. More information:
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
### Q ### Q

View File

@ -55,14 +55,14 @@ Sensor.temperature * 1.8 + 32
Inside model class definitions omit the class name: Inside model class definitions omit the class name:
```python ```python
class Person(Model): class Person(Model):
height_cm = fields.Float32Field() height_cm = Float32Field()
height_inch = fields.Float32Field(alias=height_cm/2.54) height_inch = Float32Field(alias=height_cm/2.54)
... ...
``` ```
### Parametric functions ### Parametric functions
Some of ClickHouse's aggregate functions can accept not only argument columns, but a set of parameters - constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example: Some of ClickHouse's aggregate functions can accept one or more parameters - constants for initialization that affect the way the function works. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
```python ```python
# Most common last names # Most common last names
F.topK(5)(Person.last_name) F.topK(5)(Person.last_name)
@ -81,8 +81,8 @@ def normalize_string(s):
Then we can use this expression anywhere we need it: Then we can use this expression anywhere we need it:
```python ```python
class Event(Model): class Event(Model):
code = fields.StringField() code = StringField()
normalized_code = fields.StringField(materialized=normalize_string(code)) normalized_code = StringField(materialized=normalize_string(code))
``` ```
### Which functions are available? ### Which functions are available?
@ -93,6 +93,7 @@ expr = F("someFunctionName", arg1, arg2, ...)
``` ```
Note that higher-order database functions (those that use lambda expressions) are not supported. Note that higher-order database functions (those that use lambda expressions) are not supported.
--- ---
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md) [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md)

View File

@ -16,13 +16,13 @@ Note that `default`, `alias` and `materialized` are mutually exclusive - you can
Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc. Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc.
The default value can be a Python value suitable for the field type, or an expression. For example: The default value can be a Python value suitable for the field type, or an expression. For example:
```python ```python
class Event(models.Model): class Event(Model):
name = fields.StringField(default="EVENT") name = StringField(default="EVENT")
repeated = fields.UInt32Field(default=1) repeated = UInt32Field(default=1)
created = fields.DateTimeField(default=F.now()) created = DateTimeField(default=F.now())
engine = engines.Memory() engine = Memory()
... ...
``` ```
When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example: When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example:
@ -38,18 +38,18 @@ When creating a model instance, any fields you do not specify get their default
The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk. The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk.
You can use any expression, and can refer to other model fields. For example: You can use any expression, and can refer to other model fields. For example:
```python ```python
class Event(models.Model): class Event(Model):
created = fields.DateTimeField() created = DateTimeField()
created_date = fields.DateTimeField(materialized=F.toDate(created)) created_date = DateTimeField(materialized=F.toDate(created))
name = fields.StringField() name = StringField()
normalized_name = fields.StringField(alias=F.upper(F.trim(name))) normalized_name = StringField(alias=F.upper(F.trim(name)))
engine = engines.Memory() engine = Memory()
``` ```
For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string: For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string:
```python ```python
created_date = fields.DateTimeField(materialized="toDate(created)") created_date = DateTimeField(materialized="toDate(created)")
``` ```
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
@ -89,15 +89,15 @@ Recommended usage for codecs:
Example: Example:
```python ```python
class Stats(models.Model): class Stats(Model):
id = fields.UInt64Field(codec='ZSTD(10)') id = UInt64Field(codec='ZSTD(10)')
timestamp = fields.DateTimeField(codec='Delta,ZSTD') timestamp = DateTimeField(codec='Delta,ZSTD')
timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)') timestamp_date = DateField(codec='Delta(4),ZSTD(22)')
metadata_id = fields.Int64Field(codec='LZ4') metadata_id = Int64Field(codec='LZ4')
status = fields.StringField(codec='LZ4HC(10)') status = StringField(codec='LZ4HC(10)')
calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD') calculation = NullableField(Float32Field(), codec='ZSTD')
alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC') alerts = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
engine = MergeTree('timestamp_date', ('id', 'timestamp')) engine = MergeTree('timestamp_date', ('id', 'timestamp'))
``` ```

View File

@ -1,7 +1,7 @@
Field Types Field Types
=========== ===========
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/) See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/sql-reference/data-types/)
The following field types are supported: The following field types are supported:
@ -58,14 +58,14 @@ Example of a model with an enum field:
```python ```python
Gender = Enum('Gender', 'male female unspecified') Gender = Enum('Gender', 'male female unspecified')
class Person(models.Model): class Person(Model):
first_name = fields.StringField() first_name = StringField()
last_name = fields.StringField() last_name = StringField()
birthday = fields.DateField() birthday = DateField()
gender = fields.Enum32Field(Gender) gender = Enum32Field(Gender)
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
``` ```
@ -76,13 +76,13 @@ Working with array fields
You can create array fields containing any data type, for example: You can create array fields containing any data type, for example:
```python ```python
class SensorData(models.Model): class SensorData(Model):
date = fields.DateField() date = DateField()
temperatures = fields.ArrayField(fields.Float32Field()) temperatures = ArrayField(Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field()) humidity_levels = ArrayField(UInt8Field())
engine = engines.MergeTree('date', ('date',)) engine = MergeTree('date', ('date',))
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
``` ```
@ -91,19 +91,19 @@ Note that multidimensional arrays are not supported yet by the ORM.
Working with nullable fields Working with nullable fields
---------------------------- ----------------------------
[ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable). [ClickHouse provides a NULL value support](https://clickhouse.tech/docs/en/sql-reference/data-types/nullable/).
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example: Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
```python ```python
class EventData(models.Model): class EventData(Model):
date = fields.DateField() date = DateField()
comment = fields.NullableField(fields.StringField(), extra_null_values={''}) comment = NullableField(StringField(), extra_null_values={''})
score = fields.NullableField(fields.UInt8Field()) score = NullableField(UInt8Field())
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) serie = NullableField(ArrayField(UInt8Field()))
engine = engines.MergeTree('date', ('date',)) engine = MergeTree('date', ('date',))
score_event = EventData(date=date.today(), comment=None, score=5, serie=None) score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
@ -124,7 +124,7 @@ Working with LowCardinality fields
Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries
and compaction of columns for low entropy data. and compaction of columns for low entropy data.
[More specifically](https://github.com/yandex/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily. [More specifically](https://github.com/tech/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently. If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently.
LowCardinality works independently of (generic) fields compression. LowCardinality works independently of (generic) fields compression.
@ -133,19 +133,16 @@ The compression ratios of LowCardinality fields for text data may be significant
LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form. LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form.
You can find further information about LowCardinality in [this presentation](https://github.com/yandex/clickhouse-presentations/blob/master/meetup19/string_optimization.pdf). You can find further information [here](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/).
Usage example: Usage example:
```python ```python
class LowCardinalityModel(models.Model): class LowCardinalityModel(Model):
date = fields.DateField() date = DateField()
int32 = fields.LowCardinalityField(fields.Int32Field()) string = LowCardinalityField(StringField())
float32 = fields.LowCardinalityField(fields.Float32Field()) nullable = LowCardinalityField(NullableField(StringField()))
string = fields.LowCardinalityField(fields.StringField()) array = ArrayField(LowCardinalityField(DateField()))
nullable = fields.LowCardinalityField(fields.NullableField(fields.StringField())) ...
array = fields.ArrayField(fields.LowCardinalityField(fields.UInt64Field()))
engine = MergeTree('date', ('date',))
``` ```
Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example. Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example.
@ -162,7 +159,7 @@ For example, we can create a BooleanField which will hold `True` and `False` val
Here's the full implementation: Here's the full implementation:
```python ```python
from infi.clickhouse_orm.fields import Field from infi.clickhouse_orm import Field
class BooleanField(Field): class BooleanField(Field):

View File

@ -1,7 +1,7 @@
Overview Overview
======== ========
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it. This project is simple ORM for working with the [ClickHouse database](https://clickhouse.tech/). It allows you to define model classes whose instances can be written to the database and read from it.
Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+. Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+.

View File

@ -10,16 +10,16 @@ Defining Models
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`: Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
from infi.clickhouse_orm import models, fields, engines from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
class Person(models.Model): class Person(Model):
first_name = fields.StringField() first_name = StringField()
last_name = fields.StringField() last_name = StringField()
birthday = fields.DateField() birthday = DateField()
height = fields.Float32Field() height = Float32Field()
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md). The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
@ -29,7 +29,7 @@ A model must have an `engine`, which determines how its table is stored on disk
Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter: Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter:
first_name = fields.StringField(default="anonymous") first_name = StringField(default="anonymous")
For additional details see [here](field_options.md). For additional details see [here](field_options.md).
@ -37,7 +37,7 @@ For additional details see [here](field_options.md).
To allow null values in a field, wrap it inside a `NullableField`: To allow null values in a field, wrap it inside a `NullableField`:
birthday = fields.NullableField(fields.DateField()) birthday = NullableField(DateField())
In this case, the default value for that field becomes `null` unless otherwise specified. In this case, the default value for that field becomes `null` unless otherwise specified.
@ -47,7 +47,7 @@ For more information about `NullableField` see [Field Types](field_types.md).
The value of a materialized field is calculated from other fields in the model. For example: The value of a materialized field is calculated from other fields in the model. For example:
year_born = fields.Int16Field(materialized=F.toYear(birthday)) year_born = Int16Field(materialized=F.toYear(birthday))
Materialized fields are read-only, meaning that their values are not sent to the database when inserting records. Materialized fields are read-only, meaning that their values are not sent to the database when inserting records.
@ -67,7 +67,7 @@ For additional details see [here](field_options.md).
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method: The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
class Person(models.Model): class Person(Model):
... ...
@ -100,7 +100,7 @@ Inserting to the Database
To write your instances to ClickHouse, you need a `Database` instance: To write your instances to ClickHouse, you need a `Database` instance:
from infi.clickhouse_orm.database import Database from infi.clickhouse_orm import Database
db = Database('my_test_db') db = Database('my_test_db')
@ -136,7 +136,7 @@ It is possible to select only a subset of the columns, and the rest will receive
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this: The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'): for person in Person.objects_in(db).filter(Person.last_name == 'Smith').only('first_name'):
print(person.first_name) print(person.first_name)
See [Querysets](querysets.md) for more information. See [Querysets](querysets.md) for more information.

View File

@ -448,7 +448,7 @@ Extends Engine
Extends Engine Extends Engine
Here we define Buffer engine Here we define Buffer engine
Read more here https://clickhouse.yandex/reference_en.html#Buffer Read more here https://clickhouse.tech/reference_en.html#Buffer
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) #### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)

View File

@ -1,7 +1,7 @@
System Models System Models
============= =============
[Clickhouse docs](https://clickhouse.yandex/docs/en/system_tables/). [Clickhouse docs](https://clickhouse.tech/docs/en/operations/system-tables/).
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
@ -14,7 +14,7 @@ Currently the following system models are supported:
Partitions and Parts Partitions and Parts
-------------------- --------------------
[ClickHouse docs](https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts). [ClickHouse docs](https://clickhouse.tech/docs/en/sql-reference/statements/alter/#alter_manipulations-with-partitions).
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
@ -30,8 +30,7 @@ A partition in a table is data for a single calendar month. Table "system.parts"
Usage example: Usage example:
from infi.clickhouse_orm.database import Database from infi.clickhouse_orm import Database, SystemPart
from infi.clickhouse_orm.system_models import SystemPart
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
if len(partitions) > 0: if len(partitions) > 0:

View File

@ -1,7 +1,7 @@
Table Engines Table Engines
============= =============
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/operations/table_engines/) See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/engines/table-engines/)
Each model must have an engine instance, used when creating the table in ClickHouse. Each model must have an engine instance, used when creating the table in ClickHouse.
@ -24,11 +24,11 @@ Simple Engines
`TinyLog`, `Log` and `Memory` engines do not require any parameters: `TinyLog`, `Log` and `Memory` engines do not require any parameters:
engine = engines.TinyLog() engine = TinyLog()
engine = engines.Log() engine = Log()
engine = engines.Memory() engine = Memory()
Engines in the MergeTree Family Engines in the MergeTree Family
@ -36,28 +36,28 @@ Engines in the MergeTree Family
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns: To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) engine = MergeTree('EventDate', ('CounterID', 'EventDate'))
You may also provide a sampling expression: You may also provide a sampling expression:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)') engine = MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr=F.intHash32(UserID))
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column: A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') engine = CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
For a `SummingMergeTree` you can optionally specify the summing columns: For a `SummingMergeTree` you can optionally specify the summing columns:
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), engine = SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
summing_cols=('Shows', 'Clicks', 'Cost')) summing_cols=('Shows', 'Clicks', 'Cost'))
For a `ReplacingMergeTree` you can optionally specify the version column: For a `ReplacingMergeTree` you can optionally specify the version column:
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') engine = ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
### Custom partitioning ### Custom partitioning
ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310 ClickHouse supports [custom partitioning](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/) expressions since version 1.1.54310
You can use custom partitioning with any `MergeTree` family engine. You can use custom partitioning with any `MergeTree` family engine.
To set custom partitioning: To set custom partitioning:
@ -69,12 +69,12 @@ Standard monthly partitioning by date column can be specified using the `toYYYYM
Example: Example:
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=('toYYYYMM(EventDate)', 'BannerID')) partition_key=(F.toYYYYMM(EventDate), 'BannerID'))
### Primary key ### Primary key
ClickHouse supports [custom primary key](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/#primary-keys-and-indexes-in-queries/) expressions since version 1.1.54310 ClickHouse supports [custom primary key](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#primary-keys-and-indexes-in-queries) expressions since version 1.1.54310
You can use custom primary key with any `MergeTree` family engine. You can use custom primary key with any `MergeTree` family engine.
To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built. To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built.
@ -83,14 +83,14 @@ By default primary key is equal to order_by expression
Example: Example:
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
partition_key=('toYYYYMM(EventDate)', 'BannerID'), primary_key=('OrderID',)) partition_key=(F.toYYYYMM(EventDate), 'BannerID'), primary_key=('OrderID',))
### Data Replication ### Data Replication
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`: Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), engine = MergeTree('EventDate', ('CounterID', 'EventDate'),
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
replica_name='{replica}') replica_name='{replica}')
@ -99,16 +99,16 @@ Buffer Engine
------------- -------------
A `Buffer` engine is only used in conjunction with a `BufferModel`. A `Buffer` engine is only used in conjunction with a `BufferModel`.
The model should be a subclass of both `models.BufferModel` and the main model. The model should be a subclass of both `BufferModel` and the main model.
The main model is also passed to the engine: The main model is also passed to the engine:
class PersonBuffer(models.BufferModel, Person): class PersonBuffer(BufferModel, Person):
engine = engines.Buffer(Person) engine = Buffer(Person)
Additional buffer parameters can optionally be specified: Additional buffer parameters can optionally be specified:
engine = engines.Buffer(Person, num_layers=16, min_time=10, engine = Buffer(Person, num_layers=16, min_time=10,
max_time=100, min_rows=10000, max_rows=1000000, max_time=100, min_rows=10000, max_rows=1000000,
min_bytes=10000000, max_bytes=100000000) min_bytes=10000000, max_bytes=100000000)
@ -123,14 +123,14 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
Merge Engine Merge Engine
------------- -------------
[ClickHouse docs](https://clickhouse.yandex/docs/en/table_engines/merge/) [ClickHouse docs](https://clickhouse.tech/docs/en/operations/table_engines/merge/)
A `Merge` engine is only used in conjunction with a `MergeModel`. A `Merge` engine is only used in conjunction with a `MergeModel`.
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it. This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected. Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
class MergeTable(models.MergeModel): class MergeTable(MergeModel):
engine = engines.Merge('^table_prefix') engine = Merge('^table_prefix')
--- ---

View File

@ -20,7 +20,9 @@
* [Querysets](querysets.md#querysets) * [Querysets](querysets.md#querysets)
* [Filtering](querysets.md#filtering) * [Filtering](querysets.md#filtering)
* [Using the in Operator](querysets.md#using-the-in-operator) * [Using IN and NOT IN](querysets.md#using-in-and-not-in)
* [Specifying PREWHERE conditions](querysets.md#specifying-prewhere-conditions)
* [Old-style filter conditions](querysets.md#old-style-filter-conditions)
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence) * [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering) * [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields) * [Omitting Fields](querysets.md#omitting-fields)
@ -29,6 +31,7 @@
* [Slicing](querysets.md#slicing) * [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination) * [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation) * [Aggregation](querysets.md#aggregation)
* [Adding totals](querysets.md#adding-totals)
* [Field Options](field_options.md#field-options) * [Field Options](field_options.md#field-options)
* [default](field_options.md#default) * [default](field_options.md#default)

View File

@ -74,7 +74,7 @@ class MergeTree(Engine):
name = 'Replicated' + name name = 'Replicated' + name
# In ClickHouse 1.1.54310 custom partitioning key was introduced # In ClickHouse 1.1.54310 custom partitioning key was introduced
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
# Let's check version and use new syntax if available # Let's check version and use new syntax if available
if db.server_version >= (1, 1, 54310): if db.server_version >= (1, 1, 54310):
partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \ partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \
@ -94,7 +94,7 @@ class MergeTree(Engine):
from infi.clickhouse_orm.database import DatabaseException from infi.clickhouse_orm.database import DatabaseException
raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. " raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
"Please update your server or use date_col syntax." "Please update your server or use date_col syntax."
"https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/") "https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/")
else: else:
partition_sql = '' partition_sql = ''
@ -107,7 +107,7 @@ class MergeTree(Engine):
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name] params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
# In ClickHouse 1.1.54310 custom partitioning key was introduced # In ClickHouse 1.1.54310 custom partitioning key was introduced
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ # https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
# These parameters are process in create_table_sql directly. # These parameters are process in create_table_sql directly.
# In previous ClickHouse versions this this syntax does not work. # In previous ClickHouse versions this this syntax does not work.
if db.server_version < (1, 1, 54310): if db.server_version < (1, 1, 54310):
@ -172,7 +172,7 @@ class Buffer(Engine):
""" """
Buffers the data to write in RAM, periodically flushing it to another table. Buffers the data to write in RAM, periodically flushing it to another table.
Must be used in conjuction with a `BufferModel`. Must be used in conjuction with a `BufferModel`.
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/). Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
""" """
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) #Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
@ -203,7 +203,7 @@ class Merge(Engine):
The Merge engine (not to be confused with MergeTree) does not store data itself, The Merge engine (not to be confused with MergeTree) does not store data itself,
but allows reading from any number of other tables simultaneously. but allows reading from any number of other tables simultaneously.
Writing to a table is not supported Writing to a table is not supported
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
""" """
def __init__(self, table_regex): def __init__(self, table_regex):
@ -222,7 +222,7 @@ class Distributed(Engine):
During a read, the table indexes on remote servers are used, if there are any. During a read, the table indexes on remote servers are used, if there are any.
See full documentation here See full documentation here
https://clickhouse.yandex/docs/en/table_engines/distributed.html https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
""" """
def __init__(self, cluster, table=None, sharding_key=None): def __init__(self, cluster, table=None, sharding_key=None):
""" """

View File

@ -362,7 +362,7 @@ class MergeModel(Model):
''' '''
Model for Merge engine Model for Merge engine
Predefines virtual _table column an controls that rows can't be inserted to this table type Predefines virtual _table column an controls that rows can't be inserted to this table type
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge
''' '''
readonly = True readonly = True

View File

@ -641,7 +641,7 @@ class AggregateQuerySet(QuerySet):
""" """
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
with aggregate function calculated across all the rows. More information: with aggregate function calculated across all the rows. More information:
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
""" """
qs = copy(self) qs = copy(self)
qs._grouping_with_totals = True qs._grouping_with_totals = True

View File

@ -1,6 +1,6 @@
""" """
This file contains system readonly models that can be got from the database This file contains system readonly models that can be got from the database
https://clickhouse.yandex/docs/en/system_tables/ https://clickhouse.tech/docs/en/system_tables/
""" """
from __future__ import unicode_literals from __future__ import unicode_literals
@ -14,7 +14,7 @@ class SystemPart(Model):
""" """
Contains information about parts of a table in the MergeTree family. Contains information about parts of a table in the MergeTree family.
This model operates only fields, described in the reference. Other fields are ignored. This model operates only fields, described in the reference. Other fields are ignored.
https://clickhouse.yandex/docs/en/system_tables/system.parts/ https://clickhouse.tech/docs/en/system_tables/system.parts/
""" """
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'}) OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
@ -27,7 +27,7 @@ class SystemPart(Model):
partition = StringField() # Name of the partition, in the format YYYYMM. partition = StringField() # Name of the partition, in the format YYYYMM.
name = StringField() # Name of the part. name = StringField() # Name of the part.
# This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts), # This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts),
# but is absent in ClickHouse (in version 1.1.54245) # but is absent in ClickHouse (in version 1.1.54245)
# replicated = UInt8Field() # Whether the part belongs to replicated data. # replicated = UInt8Field() # Whether the part belongs to replicated data.
@ -55,7 +55,7 @@ class SystemPart(Model):
""" """
Next methods return SQL for some operations, which can be done with partitions Next methods return SQL for some operations, which can be done with partitions
https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts https://clickhouse.tech/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
""" """
def _partition_operation_sql(self, operation, settings=None, from_part=None): def _partition_operation_sql(self, operation, settings=None, from_part=None):
""" """