From e791923493f559a5d1b3115e05bc14ac157a854a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 21 Apr 2018 13:48:00 +0300 Subject: [PATCH] Update docs --- CHANGELOG.md | 3 +++ docs/class_reference.md | 26 ++++++-------------------- docs/table_engines.md | 31 ++++++++++++++++--------------- docs/toc.md | 1 + 4 files changed, 26 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f16c12a..23da704 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ Unreleased - Distributed engine support (tsionyx) - `_fields` and `_writable_fields` are OrderedDicts - note that this might break backwards compatibility (tsionyx) - Improve error messages returned from the database with the `ServerError` class (tsionyx) +- Added support of custom partitioning (M1hacka) +- Added attribute `server_version` to Database class (M1hacka) +- Changed `Engine.create_table_sql()`, `Engine.drop_table_sql()`, `Model.create_table_sql()`, `Model.drop_table_sql()` parameter to db from db_name (M1hacka) v0.9.8 ------ diff --git a/docs/class_reference.md b/docs/class_reference.md index 2b38e0f..b51c89f 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -24,18 +24,6 @@ created on the ClickHouse server if it does not already exist. - `autocreate`: automatically create the database if does not exist (unless in readonly mode). -#### server_timezone - - -Contains [pytz](http://pytz.sourceforge.net/) timezone used on database server - - -#### server_version - - -Contains a version tuple of database server, for example (1, 1, 54310) - - #### count(model_class, conditions=None) @@ -353,10 +341,10 @@ invalid values will cause a `ValueError` to be raised. Unrecognized field names will cause an `AttributeError`. -#### DistributedModel.create_table_sql(db_name) +#### DistributedModel.create_table_sql(db) -#### DistributedModel.drop_table_sql(db_name) +#### DistributedModel.drop_table_sql(db) Returns the SQL command for deleting this model's table. @@ -692,15 +680,13 @@ During a read, the table indexes on remote servers are used, if there are any. See full documentation here https://clickhouse.yandex/docs/en/table_engines/distributed.html -#### Distributed(cluster, table=None, db_name=None, sharding_key=None) +#### Distributed(cluster, table=None, sharding_key=None) :param cluster: what cluster to access data from :param table: underlying table that actually stores data. If you are not specifying any table here, ensure that it can be inferred from your model's superclass (see models.DistributedModel.fix_engine_table) -:param db_name: which database to access data from -By default it is 'currentDatabase()' :param sharding_key: how to distribute data among shards when inserting straightly into Distributed table, optional @@ -709,21 +695,21 @@ straightly into Distributed table, optional Extends MergeTree -#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) +#### CollapsingMergeTree(date_col, order_by, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) ### SummingMergeTree Extends MergeTree -#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) +#### SummingMergeTree(date_col, order_by, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) ### ReplacingMergeTree Extends MergeTree -#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) +#### ReplacingMergeTree(date_col, order_by, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) infi.clickhouse_orm.query diff --git a/docs/table_engines.md b/docs/table_engines.md index 7c4f42a..1ad3770 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -27,7 +27,7 @@ Simple Engines engine = engines.TinyLog() engine = engines.Log() - + engine = engines.Memory() @@ -58,16 +58,17 @@ For a `ReplacingMergeTree` you can optionally specify the version column: ### Custom partitioning ClickHouse supports [custom partitioning](https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/) expressions since version 1.1.54310 -You can use custom partitioning with any MergeTree family engine. -To set custom partitioning: -* skip date_col (first) constructor parameter or fill it with None value -* add name to order_by (second) constructor parameter -* add partition_key parameter. It should be a tuple of expressions, by which partition are built. -Standard partitioning by date column can be added using toYYYYMM(date) function. +You can use custom partitioning with any `MergeTree` family engine. +To set custom partitioning: + +* Instead of specifying the `date_col` (first) constructor parameter, pass a tuple of field names or expressions in the `order_by` (second) constructor parameter. +* Add `partition_key` parameter. It should be a tuple of expressions, by which partitions are built. + +Standard monthly partitioning by date column can be specified using the `toYYYYMM(date)` function. Example: - + engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version', partition_key=('toYYYYMM(EventDate)', 'BannerID')) @@ -85,7 +86,7 @@ Buffer Engine ------------- A `Buffer` engine is only used in conjunction with a `BufferModel`. -The model should be a subclass of both `models.BufferModel` and the main model. +The model should be a subclass of both `models.BufferModel` and the main model. The main model is also passed to the engine: class PersonBuffer(models.BufferModel, Person): @@ -94,8 +95,8 @@ The main model is also passed to the engine: Additional buffer parameters can optionally be specified: - engine = engines.Buffer(Person, num_layers=16, min_time=10, - max_time=100, min_rows=10000, max_rows=1000000, + engine = engines.Buffer(Person, num_layers=16, min_time=10, + max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) Then you can insert objects into Buffer model and they will be handled by ClickHouse properly: @@ -104,13 +105,13 @@ Then you can insert objects into Buffer model and they will be handled by ClickH suzy = PersonBuffer(first_name='Suzy', last_name='Jones') dan = PersonBuffer(first_name='Dan', last_name='Schwartz') db.insert([dan, suzy]) - - + + Merge Engine ------------- -[ClickHouse docs](https://clickhouse.yandex/docs/en/single/index.html#merge) -A `Merge` engine is only used in conjunction with a `MergeModel`. +[ClickHouse docs](https://clickhouse.yandex/docs/en/single/index.html#merge) +A `Merge` engine is only used in conjunction with a `MergeModel`. This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it. Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected. diff --git a/docs/toc.md b/docs/toc.md index 6b4a1b1..c9c5ee9 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -36,6 +36,7 @@ * [Table Engines](table_engines.md#table-engines) * [Simple Engines](table_engines.md#simple-engines) * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) + * [Custom partitioning](table_engines.md#custom-partitioning) * [Data Replication](table_engines.md#data-replication) * [Buffer Engine](table_engines.md#buffer-engine) * [Merge Engine](table_engines.md#merge-engine)