mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-22 00:56:34 +03:00
docs
This commit is contained in:
parent
e97e48a695
commit
113ac7ad4a
13
README.md
13
README.md
|
@ -8,11 +8,7 @@ Let's jump right in with a simple example of monitoring CPU usage. First we need
|
|||
connect to the database and create a table for the model:
|
||||
|
||||
```python
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.models import Model
|
||||
from infi.clickhouse_orm.fields import *
|
||||
from infi.clickhouse_orm.engines import Memory
|
||||
from infi.clickhouse_orm.funcs import F
|
||||
from infi.clickhouse_orm import Database, Model, DateTimeField, UInt16Field, Float32Field, Memory, F
|
||||
|
||||
class CPUStats(Model):
|
||||
|
||||
|
@ -46,12 +42,13 @@ Querying the table is easy, using either the query builder or raw SQL:
|
|||
|
||||
```python
|
||||
# Calculate what percentage of the time CPU 1 was over 95% busy
|
||||
total = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1).count()
|
||||
busy = CPUStats.objects_in(db).filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
||||
queryset = CPUStats.objects_in(db)
|
||||
total = queryset.filter(CPUStats.cpu_id == 1).count()
|
||||
busy = queryset.filter(CPUStats.cpu_id == 1, CPUStats.cpu_percent > 95).count()
|
||||
print('CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total))
|
||||
|
||||
# Calculate the average usage per CPU
|
||||
for row in CPUStats.objects_in(db).aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
||||
for row in queryset.aggregate(CPUStats.cpu_id, average=F.avg(CPUStats.cpu_percent)):
|
||||
print('CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row))
|
||||
```
|
||||
|
||||
|
|
|
@ -840,7 +840,7 @@ Extends Engine
|
|||
|
||||
Buffers the data to write in RAM, periodically flushing it to another table.
|
||||
Must be used in conjuction with a `BufferModel`.
|
||||
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/).
|
||||
Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
|
||||
|
||||
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
|
||||
|
||||
|
@ -853,7 +853,7 @@ Extends Engine
|
|||
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
||||
but allows reading from any number of other tables simultaneously.
|
||||
Writing to a table is not supported
|
||||
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
||||
https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
|
||||
|
||||
#### Merge(table_regex)
|
||||
|
||||
|
@ -869,7 +869,7 @@ Reading is automatically parallelized.
|
|||
During a read, the table indexes on remote servers are used, if there are any.
|
||||
|
||||
See full documentation here
|
||||
https://clickhouse.yandex/docs/en/table_engines/distributed.html
|
||||
https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
|
||||
|
||||
#### Distributed(cluster, table=None, sharding_key=None)
|
||||
|
||||
|
@ -1165,7 +1165,7 @@ Returns the selected fields or expressions as a SQL string.
|
|||
|
||||
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
|
||||
with aggregate function calculated across all the rows. More information:
|
||||
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier
|
||||
https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
|
||||
|
||||
|
||||
### Q
|
||||
|
|
|
@ -55,14 +55,14 @@ Sensor.temperature * 1.8 + 32
|
|||
Inside model class definitions omit the class name:
|
||||
```python
|
||||
class Person(Model):
|
||||
height_cm = fields.Float32Field()
|
||||
height_inch = fields.Float32Field(alias=height_cm/2.54)
|
||||
height_cm = Float32Field()
|
||||
height_inch = Float32Field(alias=height_cm/2.54)
|
||||
...
|
||||
```
|
||||
|
||||
### Parametric functions
|
||||
|
||||
Some of ClickHouse's aggregate functions can accept not only argument columns, but a set of parameters - constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
|
||||
Some of ClickHouse's aggregate functions can accept one or more parameters - constants for initialization that affect the way the function works. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
|
||||
```python
|
||||
# Most common last names
|
||||
F.topK(5)(Person.last_name)
|
||||
|
@ -81,8 +81,8 @@ def normalize_string(s):
|
|||
Then we can use this expression anywhere we need it:
|
||||
```python
|
||||
class Event(Model):
|
||||
code = fields.StringField()
|
||||
normalized_code = fields.StringField(materialized=normalize_string(code))
|
||||
code = StringField()
|
||||
normalized_code = StringField(materialized=normalize_string(code))
|
||||
```
|
||||
|
||||
### Which functions are available?
|
||||
|
@ -93,6 +93,7 @@ expr = F("someFunctionName", arg1, arg2, ...)
|
|||
```
|
||||
|
||||
Note that higher-order database functions (those that use lambda expressions) are not supported.
|
||||
|
||||
---
|
||||
|
||||
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Importing ORM Classes >>](importing_orm_classes.md)
|
||||
|
|
|
@ -16,13 +16,13 @@ Note that `default`, `alias` and `materialized` are mutually exclusive - you can
|
|||
Specifies a default value to use for the field. If not given, the field will have a default value based on its type: empty string for string fields, zero for numeric fields, etc.
|
||||
The default value can be a Python value suitable for the field type, or an expression. For example:
|
||||
```python
|
||||
class Event(models.Model):
|
||||
class Event(Model):
|
||||
|
||||
name = fields.StringField(default="EVENT")
|
||||
repeated = fields.UInt32Field(default=1)
|
||||
created = fields.DateTimeField(default=F.now())
|
||||
name = StringField(default="EVENT")
|
||||
repeated = UInt32Field(default=1)
|
||||
created = DateTimeField(default=F.now())
|
||||
|
||||
engine = engines.Memory()
|
||||
engine = Memory()
|
||||
...
|
||||
```
|
||||
When creating a model instance, any fields you do not specify get their default value. Fields that use a default expression are assigned a sentinel value of `infi.clickhouse_orm.utils.NO_VALUE` instead. For example:
|
||||
|
@ -38,18 +38,18 @@ When creating a model instance, any fields you do not specify get their default
|
|||
The `alias` and `materialized` attributes expect an expression that gets calculated by the database. The difference is that `alias` fields are calculated on the fly, while `materialized` fields are calculated when the record is inserted, and are stored on disk.
|
||||
You can use any expression, and can refer to other model fields. For example:
|
||||
```python
|
||||
class Event(models.Model):
|
||||
class Event(Model):
|
||||
|
||||
created = fields.DateTimeField()
|
||||
created_date = fields.DateTimeField(materialized=F.toDate(created))
|
||||
name = fields.StringField()
|
||||
normalized_name = fields.StringField(alias=F.upper(F.trim(name)))
|
||||
created = DateTimeField()
|
||||
created_date = DateTimeField(materialized=F.toDate(created))
|
||||
name = StringField()
|
||||
normalized_name = StringField(alias=F.upper(F.trim(name)))
|
||||
|
||||
engine = engines.Memory()
|
||||
engine = Memory()
|
||||
```
|
||||
For backwards compatibility with older versions of the ORM, you can pass the expression as an SQL string:
|
||||
```python
|
||||
created_date = fields.DateTimeField(materialized="toDate(created)")
|
||||
created_date = DateTimeField(materialized="toDate(created)")
|
||||
```
|
||||
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
|
||||
|
||||
|
@ -89,15 +89,15 @@ Recommended usage for codecs:
|
|||
|
||||
Example:
|
||||
```python
|
||||
class Stats(models.Model):
|
||||
class Stats(Model):
|
||||
|
||||
id = fields.UInt64Field(codec='ZSTD(10)')
|
||||
timestamp = fields.DateTimeField(codec='Delta,ZSTD')
|
||||
timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)')
|
||||
metadata_id = fields.Int64Field(codec='LZ4')
|
||||
status = fields.StringField(codec='LZ4HC(10)')
|
||||
calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD')
|
||||
alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC')
|
||||
id = UInt64Field(codec='ZSTD(10)')
|
||||
timestamp = DateTimeField(codec='Delta,ZSTD')
|
||||
timestamp_date = DateField(codec='Delta(4),ZSTD(22)')
|
||||
metadata_id = Int64Field(codec='LZ4')
|
||||
status = StringField(codec='LZ4HC(10)')
|
||||
calculation = NullableField(Float32Field(), codec='ZSTD')
|
||||
alerts = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
|
||||
|
||||
engine = MergeTree('timestamp_date', ('id', 'timestamp'))
|
||||
```
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
Field Types
|
||||
===========
|
||||
|
||||
See: [ClickHouse Documentation](https://clickhouse.yandex/docs/en/data_types/)
|
||||
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/sql-reference/data-types/)
|
||||
|
||||
The following field types are supported:
|
||||
|
||||
|
@ -58,14 +58,14 @@ Example of a model with an enum field:
|
|||
```python
|
||||
Gender = Enum('Gender', 'male female unspecified')
|
||||
|
||||
class Person(models.Model):
|
||||
class Person(Model):
|
||||
|
||||
first_name = fields.StringField()
|
||||
last_name = fields.StringField()
|
||||
birthday = fields.DateField()
|
||||
gender = fields.Enum32Field(Gender)
|
||||
first_name = StringField()
|
||||
last_name = StringField()
|
||||
birthday = DateField()
|
||||
gender = Enum32Field(Gender)
|
||||
|
||||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
|
||||
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
|
||||
```
|
||||
|
@ -76,13 +76,13 @@ Working with array fields
|
|||
You can create array fields containing any data type, for example:
|
||||
|
||||
```python
|
||||
class SensorData(models.Model):
|
||||
class SensorData(Model):
|
||||
|
||||
date = fields.DateField()
|
||||
temperatures = fields.ArrayField(fields.Float32Field())
|
||||
humidity_levels = fields.ArrayField(fields.UInt8Field())
|
||||
date = DateField()
|
||||
temperatures = ArrayField(Float32Field())
|
||||
humidity_levels = ArrayField(UInt8Field())
|
||||
|
||||
engine = engines.MergeTree('date', ('date',))
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
||||
```
|
||||
|
@ -91,19 +91,19 @@ Note that multidimensional arrays are not supported yet by the ORM.
|
|||
|
||||
Working with nullable fields
|
||||
----------------------------
|
||||
[ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable).
|
||||
[ClickHouse provides a NULL value support](https://clickhouse.tech/docs/en/sql-reference/data-types/nullable/).
|
||||
|
||||
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
|
||||
|
||||
```python
|
||||
class EventData(models.Model):
|
||||
class EventData(Model):
|
||||
|
||||
date = fields.DateField()
|
||||
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
|
||||
score = fields.NullableField(fields.UInt8Field())
|
||||
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
|
||||
date = DateField()
|
||||
comment = NullableField(StringField(), extra_null_values={''})
|
||||
score = NullableField(UInt8Field())
|
||||
serie = NullableField(ArrayField(UInt8Field()))
|
||||
|
||||
engine = engines.MergeTree('date', ('date',))
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
|
||||
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
|
||||
|
@ -124,7 +124,7 @@ Working with LowCardinality fields
|
|||
Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries
|
||||
and compaction of columns for low entropy data.
|
||||
|
||||
[More specifically](https://github.com/yandex/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
|
||||
[More specifically](https://github.com/tech/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
|
||||
If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently.
|
||||
|
||||
LowCardinality works independently of (generic) fields compression.
|
||||
|
@ -133,19 +133,16 @@ The compression ratios of LowCardinality fields for text data may be significant
|
|||
|
||||
LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form.
|
||||
|
||||
You can find further information about LowCardinality in [this presentation](https://github.com/yandex/clickhouse-presentations/blob/master/meetup19/string_optimization.pdf).
|
||||
You can find further information [here](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/).
|
||||
|
||||
Usage example:
|
||||
```python
|
||||
class LowCardinalityModel(models.Model):
|
||||
date = fields.DateField()
|
||||
int32 = fields.LowCardinalityField(fields.Int32Field())
|
||||
float32 = fields.LowCardinalityField(fields.Float32Field())
|
||||
string = fields.LowCardinalityField(fields.StringField())
|
||||
nullable = fields.LowCardinalityField(fields.NullableField(fields.StringField()))
|
||||
array = fields.ArrayField(fields.LowCardinalityField(fields.UInt64Field()))
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
class LowCardinalityModel(Model):
|
||||
date = DateField()
|
||||
string = LowCardinalityField(StringField())
|
||||
nullable = LowCardinalityField(NullableField(StringField()))
|
||||
array = ArrayField(LowCardinalityField(DateField()))
|
||||
...
|
||||
```
|
||||
|
||||
Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example.
|
||||
|
@ -162,7 +159,7 @@ For example, we can create a BooleanField which will hold `True` and `False` val
|
|||
Here's the full implementation:
|
||||
|
||||
```python
|
||||
from infi.clickhouse_orm.fields import Field
|
||||
from infi.clickhouse_orm import Field
|
||||
|
||||
class BooleanField(Field):
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
Overview
|
||||
========
|
||||
|
||||
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it.
|
||||
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.tech/). It allows you to define model classes whose instances can be written to the database and read from it.
|
||||
|
||||
Version 1.x supports Python 2.7 and 3.5+. Version 2.x dropped support for Python 2.7, and works only with Python 3.5+.
|
||||
|
||||
|
|
|
@ -10,16 +10,16 @@ Defining Models
|
|||
|
||||
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
|
||||
|
||||
from infi.clickhouse_orm import models, fields, engines
|
||||
from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
|
||||
|
||||
class Person(models.Model):
|
||||
class Person(Model):
|
||||
|
||||
first_name = fields.StringField()
|
||||
last_name = fields.StringField()
|
||||
birthday = fields.DateField()
|
||||
height = fields.Float32Field()
|
||||
first_name = StringField()
|
||||
last_name = StringField()
|
||||
birthday = DateField()
|
||||
height = Float32Field()
|
||||
|
||||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
|
||||
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
|
||||
|
||||
|
@ -29,7 +29,7 @@ A model must have an `engine`, which determines how its table is stored on disk
|
|||
|
||||
Each field has a "natural" default value - empty string for string fields, zero for numeric fields etc. To specify a different value use the `default` parameter:
|
||||
|
||||
first_name = fields.StringField(default="anonymous")
|
||||
first_name = StringField(default="anonymous")
|
||||
|
||||
For additional details see [here](field_options.md).
|
||||
|
||||
|
@ -37,7 +37,7 @@ For additional details see [here](field_options.md).
|
|||
|
||||
To allow null values in a field, wrap it inside a `NullableField`:
|
||||
|
||||
birthday = fields.NullableField(fields.DateField())
|
||||
birthday = NullableField(DateField())
|
||||
|
||||
In this case, the default value for that field becomes `null` unless otherwise specified.
|
||||
|
||||
|
@ -47,7 +47,7 @@ For more information about `NullableField` see [Field Types](field_types.md).
|
|||
|
||||
The value of a materialized field is calculated from other fields in the model. For example:
|
||||
|
||||
year_born = fields.Int16Field(materialized=F.toYear(birthday))
|
||||
year_born = Int16Field(materialized=F.toYear(birthday))
|
||||
|
||||
Materialized fields are read-only, meaning that their values are not sent to the database when inserting records.
|
||||
|
||||
|
@ -67,7 +67,7 @@ For additional details see [here](field_options.md).
|
|||
|
||||
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
|
||||
|
||||
class Person(models.Model):
|
||||
class Person(Model):
|
||||
|
||||
...
|
||||
|
||||
|
@ -100,7 +100,7 @@ Inserting to the Database
|
|||
|
||||
To write your instances to ClickHouse, you need a `Database` instance:
|
||||
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm import Database
|
||||
|
||||
db = Database('my_test_db')
|
||||
|
||||
|
@ -136,7 +136,7 @@ It is possible to select only a subset of the columns, and the rest will receive
|
|||
|
||||
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
|
||||
|
||||
for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'):
|
||||
for person in Person.objects_in(db).filter(Person.last_name == 'Smith').only('first_name'):
|
||||
print(person.first_name)
|
||||
|
||||
See [Querysets](querysets.md) for more information.
|
||||
|
|
|
@ -448,7 +448,7 @@ Extends Engine
|
|||
Extends Engine
|
||||
|
||||
Here we define Buffer engine
|
||||
Read more here https://clickhouse.yandex/reference_en.html#Buffer
|
||||
Read more here https://clickhouse.tech/reference_en.html#Buffer
|
||||
|
||||
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
System Models
|
||||
=============
|
||||
|
||||
[Clickhouse docs](https://clickhouse.yandex/docs/en/system_tables/).
|
||||
[Clickhouse docs](https://clickhouse.tech/docs/en/operations/system-tables/).
|
||||
|
||||
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
|
||||
|
||||
|
@ -14,7 +14,7 @@ Currently the following system models are supported:
|
|||
Partitions and Parts
|
||||
--------------------
|
||||
|
||||
[ClickHouse docs](https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts).
|
||||
[ClickHouse docs](https://clickhouse.tech/docs/en/sql-reference/statements/alter/#alter_manipulations-with-partitions).
|
||||
|
||||
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
|
||||
|
||||
|
@ -30,8 +30,7 @@ A partition in a table is data for a single calendar month. Table "system.parts"
|
|||
|
||||
Usage example:
|
||||
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.system_models import SystemPart
|
||||
from infi.clickhouse_orm import Database, SystemPart
|
||||
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
|
||||
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
|
||||
if len(partitions) > 0:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
Table Engines
|
||||
=============
|
||||
|
||||
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/operations/table_engines/)
|
||||
See: [ClickHouse Documentation](https://clickhouse.tech/docs/en/engines/table-engines/)
|
||||
|
||||
Each model must have an engine instance, used when creating the table in ClickHouse.
|
||||
|
||||
|
@ -24,11 +24,11 @@ Simple Engines
|
|||
|
||||
`TinyLog`, `Log` and `Memory` engines do not require any parameters:
|
||||
|
||||
engine = engines.TinyLog()
|
||||
engine = TinyLog()
|
||||
|
||||
engine = engines.Log()
|
||||
engine = Log()
|
||||
|
||||
engine = engines.Memory()
|
||||
engine = Memory()
|
||||
|
||||
|
||||
Engines in the MergeTree Family
|
||||
|
@ -36,28 +36,28 @@ Engines in the MergeTree Family
|
|||
|
||||
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
|
||||
|
||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'))
|
||||
engine = MergeTree('EventDate', ('CounterID', 'EventDate'))
|
||||
|
||||
You may also provide a sampling expression:
|
||||
|
||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)')
|
||||
engine = MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr=F.intHash32(UserID))
|
||||
|
||||
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
|
||||
|
||||
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
|
||||
engine = CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
|
||||
|
||||
For a `SummingMergeTree` you can optionally specify the summing columns:
|
||||
|
||||
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
|
||||
summing_cols=('Shows', 'Clicks', 'Cost'))
|
||||
engine = SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
|
||||
summing_cols=('Shows', 'Clicks', 'Cost'))
|
||||
|
||||
For a `ReplacingMergeTree` you can optionally specify the version column:
|
||||
|
||||
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
|
||||
engine = ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
|
||||
|
||||
### Custom partitioning
|
||||
|
||||
ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310
|
||||
ClickHouse supports [custom partitioning](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key/) expressions since version 1.1.54310
|
||||
|
||||
You can use custom partitioning with any `MergeTree` family engine.
|
||||
To set custom partitioning:
|
||||
|
@ -69,12 +69,12 @@ Standard monthly partitioning by date column can be specified using the `toYYYYM
|
|||
|
||||
Example:
|
||||
|
||||
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||
partition_key=('toYYYYMM(EventDate)', 'BannerID'))
|
||||
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||
partition_key=(F.toYYYYMM(EventDate), 'BannerID'))
|
||||
|
||||
|
||||
### Primary key
|
||||
ClickHouse supports [custom primary key](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/#primary-keys-and-indexes-in-queries/) expressions since version 1.1.54310
|
||||
ClickHouse supports [custom primary key](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#primary-keys-and-indexes-in-queries) expressions since version 1.1.54310
|
||||
|
||||
You can use custom primary key with any `MergeTree` family engine.
|
||||
To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built.
|
||||
|
@ -83,34 +83,34 @@ By default primary key is equal to order_by expression
|
|||
|
||||
Example:
|
||||
|
||||
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||
partition_key=('toYYYYMM(EventDate)', 'BannerID'), primary_key=('OrderID',))
|
||||
engine = ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||
partition_key=(F.toYYYYMM(EventDate), 'BannerID'), primary_key=('OrderID',))
|
||||
|
||||
### Data Replication
|
||||
|
||||
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
|
||||
|
||||
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'),
|
||||
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
|
||||
replica_name='{replica}')
|
||||
engine = MergeTree('EventDate', ('CounterID', 'EventDate'),
|
||||
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
|
||||
replica_name='{replica}')
|
||||
|
||||
|
||||
Buffer Engine
|
||||
-------------
|
||||
|
||||
A `Buffer` engine is only used in conjunction with a `BufferModel`.
|
||||
The model should be a subclass of both `models.BufferModel` and the main model.
|
||||
The model should be a subclass of both `BufferModel` and the main model.
|
||||
The main model is also passed to the engine:
|
||||
|
||||
class PersonBuffer(models.BufferModel, Person):
|
||||
class PersonBuffer(BufferModel, Person):
|
||||
|
||||
engine = engines.Buffer(Person)
|
||||
engine = Buffer(Person)
|
||||
|
||||
Additional buffer parameters can optionally be specified:
|
||||
|
||||
engine = engines.Buffer(Person, num_layers=16, min_time=10,
|
||||
max_time=100, min_rows=10000, max_rows=1000000,
|
||||
min_bytes=10000000, max_bytes=100000000)
|
||||
engine = Buffer(Person, num_layers=16, min_time=10,
|
||||
max_time=100, min_rows=10000, max_rows=1000000,
|
||||
min_bytes=10000000, max_bytes=100000000)
|
||||
|
||||
Then you can insert objects into Buffer model and they will be handled by ClickHouse properly:
|
||||
|
||||
|
@ -123,14 +123,14 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
|
|||
Merge Engine
|
||||
-------------
|
||||
|
||||
[ClickHouse docs](https://clickhouse.yandex/docs/en/table_engines/merge/)
|
||||
[ClickHouse docs](https://clickhouse.tech/docs/en/operations/table_engines/merge/)
|
||||
|
||||
A `Merge` engine is only used in conjunction with a `MergeModel`.
|
||||
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
|
||||
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
|
||||
|
||||
class MergeTable(models.MergeModel):
|
||||
engine = engines.Merge('^table_prefix')
|
||||
class MergeTable(MergeModel):
|
||||
engine = Merge('^table_prefix')
|
||||
|
||||
|
||||
---
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
|
||||
* [Querysets](querysets.md#querysets)
|
||||
* [Filtering](querysets.md#filtering)
|
||||
* [Using the in Operator](querysets.md#using-the-in-operator)
|
||||
* [Using IN and NOT IN](querysets.md#using-in-and-not-in)
|
||||
* [Specifying PREWHERE conditions](querysets.md#specifying-prewhere-conditions)
|
||||
* [Old-style filter conditions](querysets.md#old-style-filter-conditions)
|
||||
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
|
||||
* [Ordering](querysets.md#ordering)
|
||||
* [Omitting Fields](querysets.md#omitting-fields)
|
||||
|
@ -29,6 +31,7 @@
|
|||
* [Slicing](querysets.md#slicing)
|
||||
* [Pagination](querysets.md#pagination)
|
||||
* [Aggregation](querysets.md#aggregation)
|
||||
* [Adding totals](querysets.md#adding-totals)
|
||||
|
||||
* [Field Options](field_options.md#field-options)
|
||||
* [default](field_options.md#default)
|
||||
|
|
|
@ -74,7 +74,7 @@ class MergeTree(Engine):
|
|||
name = 'Replicated' + name
|
||||
|
||||
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
||||
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
||||
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
||||
# Let's check version and use new syntax if available
|
||||
if db.server_version >= (1, 1, 54310):
|
||||
partition_sql = "PARTITION BY (%s) ORDER BY (%s)" \
|
||||
|
@ -94,7 +94,7 @@ class MergeTree(Engine):
|
|||
from infi.clickhouse_orm.database import DatabaseException
|
||||
raise DatabaseException("Custom partitioning is not supported before ClickHouse 1.1.54310. "
|
||||
"Please update your server or use date_col syntax."
|
||||
"https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/")
|
||||
"https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/")
|
||||
else:
|
||||
partition_sql = ''
|
||||
|
||||
|
@ -107,7 +107,7 @@ class MergeTree(Engine):
|
|||
params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name]
|
||||
|
||||
# In ClickHouse 1.1.54310 custom partitioning key was introduced
|
||||
# https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
|
||||
# https://clickhouse.tech/docs/en/table_engines/custom_partitioning_key/
|
||||
# These parameters are process in create_table_sql directly.
|
||||
# In previous ClickHouse versions this this syntax does not work.
|
||||
if db.server_version < (1, 1, 54310):
|
||||
|
@ -172,7 +172,7 @@ class Buffer(Engine):
|
|||
"""
|
||||
Buffers the data to write in RAM, periodically flushing it to another table.
|
||||
Must be used in conjuction with a `BufferModel`.
|
||||
Read more [here](https://clickhouse.yandex/docs/en/table_engines/buffer/).
|
||||
Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/buffer/).
|
||||
"""
|
||||
|
||||
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
|
@ -203,7 +203,7 @@ class Merge(Engine):
|
|||
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
||||
but allows reading from any number of other tables simultaneously.
|
||||
Writing to a table is not supported
|
||||
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
||||
https://clickhouse.tech/docs/en/engines/table-engines/special/merge/
|
||||
"""
|
||||
|
||||
def __init__(self, table_regex):
|
||||
|
@ -222,7 +222,7 @@ class Distributed(Engine):
|
|||
During a read, the table indexes on remote servers are used, if there are any.
|
||||
|
||||
See full documentation here
|
||||
https://clickhouse.yandex/docs/en/table_engines/distributed.html
|
||||
https://clickhouse.tech/docs/en/engines/table-engines/special/distributed/
|
||||
"""
|
||||
def __init__(self, cluster, table=None, sharding_key=None):
|
||||
"""
|
||||
|
|
|
@ -362,7 +362,7 @@ class MergeModel(Model):
|
|||
'''
|
||||
Model for Merge engine
|
||||
Predefines virtual _table column an controls that rows can't be inserted to this table type
|
||||
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
||||
https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge
|
||||
'''
|
||||
readonly = True
|
||||
|
||||
|
|
|
@ -641,7 +641,7 @@ class AggregateQuerySet(QuerySet):
|
|||
"""
|
||||
Adds WITH TOTALS modifier ot GROUP BY, making query return extra row
|
||||
with aggregate function calculated across all the rows. More information:
|
||||
https://clickhouse.yandex/docs/en/query_language/select/#with-totals-modifier
|
||||
https://clickhouse.tech/docs/en/query_language/select/#with-totals-modifier
|
||||
"""
|
||||
qs = copy(self)
|
||||
qs._grouping_with_totals = True
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
This file contains system readonly models that can be got from the database
|
||||
https://clickhouse.yandex/docs/en/system_tables/
|
||||
https://clickhouse.tech/docs/en/system_tables/
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@ -14,7 +14,7 @@ class SystemPart(Model):
|
|||
"""
|
||||
Contains information about parts of a table in the MergeTree family.
|
||||
This model operates only fields, described in the reference. Other fields are ignored.
|
||||
https://clickhouse.yandex/docs/en/system_tables/system.parts/
|
||||
https://clickhouse.tech/docs/en/system_tables/system.parts/
|
||||
"""
|
||||
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
||||
|
||||
|
@ -27,7 +27,7 @@ class SystemPart(Model):
|
|||
partition = StringField() # Name of the partition, in the format YYYYMM.
|
||||
name = StringField() # Name of the part.
|
||||
|
||||
# This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts),
|
||||
# This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts),
|
||||
# but is absent in ClickHouse (in version 1.1.54245)
|
||||
# replicated = UInt8Field() # Whether the part belongs to replicated data.
|
||||
|
||||
|
@ -55,7 +55,7 @@ class SystemPart(Model):
|
|||
|
||||
"""
|
||||
Next methods return SQL for some operations, which can be done with partitions
|
||||
https://clickhouse.yandex/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
|
||||
https://clickhouse.tech/docs/en/query_language/queries/#manipulations-with-partitions-and-parts
|
||||
"""
|
||||
def _partition_operation_sql(self, operation, settings=None, from_part=None):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue
Block a user