mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-07-27 08:19:53 +03:00
Finished Release v1.4.0
This commit is contained in:
commit
0c89989519
|
@ -1,6 +1,11 @@
|
||||||
Change Log
|
Change Log
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
v1.4.0
|
||||||
|
------
|
||||||
|
- Added primary_key parameter to MergeTree engines (M1hacka)
|
||||||
|
- Support negative enum values (Romamo)
|
||||||
|
|
||||||
v1.3.0
|
v1.3.0
|
||||||
------
|
------
|
||||||
- Support LowCardinality columns in ad-hoc queries
|
- Support LowCardinality columns in ad-hoc queries
|
||||||
|
|
|
@ -737,7 +737,7 @@ Extends Engine
|
||||||
|
|
||||||
Extends Engine
|
Extends Engine
|
||||||
|
|
||||||
#### MergeTree(date_col=None, order_by=(), sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None)
|
#### MergeTree(date_col=None, order_by=(), sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None, primary_key=None)
|
||||||
|
|
||||||
|
|
||||||
### Buffer
|
### Buffer
|
||||||
|
@ -793,21 +793,21 @@ straightly into Distributed table, optional
|
||||||
|
|
||||||
Extends MergeTree
|
Extends MergeTree
|
||||||
|
|
||||||
#### CollapsingMergeTree(date_col=None, order_by=(), sign_col="sign", sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None)
|
#### CollapsingMergeTree(date_col=None, order_by=(), sign_col="sign", sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None, primary_key=None)
|
||||||
|
|
||||||
|
|
||||||
### SummingMergeTree
|
### SummingMergeTree
|
||||||
|
|
||||||
Extends MergeTree
|
Extends MergeTree
|
||||||
|
|
||||||
#### SummingMergeTree(date_col=None, order_by=(), summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None)
|
#### SummingMergeTree(date_col=None, order_by=(), summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None, primary_key=None)
|
||||||
|
|
||||||
|
|
||||||
### ReplacingMergeTree
|
### ReplacingMergeTree
|
||||||
|
|
||||||
Extends MergeTree
|
Extends MergeTree
|
||||||
|
|
||||||
#### ReplacingMergeTree(date_col=None, order_by=(), ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None)
|
#### ReplacingMergeTree(date_col=None, order_by=(), ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None, primary_key=None)
|
||||||
|
|
||||||
|
|
||||||
infi.clickhouse_orm.query
|
infi.clickhouse_orm.query
|
||||||
|
|
|
@ -73,6 +73,19 @@ Example:
|
||||||
partition_key=('toYYYYMM(EventDate)', 'BannerID'))
|
partition_key=('toYYYYMM(EventDate)', 'BannerID'))
|
||||||
|
|
||||||
|
|
||||||
|
### Primary key
|
||||||
|
ClickHouse supports [custom primary key](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/#primary-keys-and-indexes-in-queries/) expressions since version 1.1.54310
|
||||||
|
|
||||||
|
You can use custom primary key with any `MergeTree` family engine.
|
||||||
|
To set custom partitioning add `primary_key` parameter. It should be a tuple of expressions, by which partitions are built.
|
||||||
|
|
||||||
|
By default primary key is equal to order_by expression
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
engine = engines.ReplacingMergeTree(order_by=('OrderID', 'EventDate', 'BannerID'), ver_col='Version',
|
||||||
|
partition_key=('toYYYYMM(EventDate)', 'BannerID'), primary_key=('OrderID',))
|
||||||
|
|
||||||
### Data Replication
|
### Data Replication
|
||||||
|
|
||||||
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
|
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
|
||||||
|
|
|
@ -35,8 +35,10 @@ class Memory(Engine):
|
||||||
class MergeTree(Engine):
|
class MergeTree(Engine):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
assert type(order_by) in (list, tuple), 'order_by must be a list or tuple'
|
assert type(order_by) in (list, tuple), 'order_by must be a list or tuple'
|
||||||
|
assert primary_key is None or type(primary_key) in (list, tuple), 'primary_key must be a list or tuple'
|
||||||
assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present'
|
assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present'
|
||||||
assert partition_key is None or type(partition_key) in (list, tuple),\
|
assert partition_key is None or type(partition_key) in (list, tuple),\
|
||||||
'partition_key must be tuple or list if present'
|
'partition_key must be tuple or list if present'
|
||||||
|
@ -48,6 +50,7 @@ class MergeTree(Engine):
|
||||||
assert date_col or partition_key, "You must set either date_col or partition_key"
|
assert date_col or partition_key, "You must set either date_col or partition_key"
|
||||||
self.date_col = date_col
|
self.date_col = date_col
|
||||||
self.partition_key = partition_key if partition_key else ('toYYYYMM(`%s`)' % date_col,)
|
self.partition_key = partition_key if partition_key else ('toYYYYMM(`%s`)' % date_col,)
|
||||||
|
self.primary_key = primary_key
|
||||||
|
|
||||||
self.order_by = order_by
|
self.order_by = order_by
|
||||||
self.sampling_expr = sampling_expr
|
self.sampling_expr = sampling_expr
|
||||||
|
@ -78,6 +81,9 @@ class MergeTree(Engine):
|
||||||
partition_sql = "PARTITION BY %s ORDER BY %s" \
|
partition_sql = "PARTITION BY %s ORDER BY %s" \
|
||||||
% ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by))
|
% ('(%s)' % comma_join(self.partition_key), '(%s)' % comma_join(self.order_by))
|
||||||
|
|
||||||
|
if self.primary_key:
|
||||||
|
partition_sql += " PRIMARY KEY (%s)" % comma_join(self.primary_key)
|
||||||
|
|
||||||
if self.sampling_expr:
|
if self.sampling_expr:
|
||||||
partition_sql += " SAMPLE BY %s" % self.sampling_expr
|
partition_sql += " SAMPLE BY %s" % self.sampling_expr
|
||||||
|
|
||||||
|
@ -117,9 +123,10 @@ class MergeTree(Engine):
|
||||||
class CollapsingMergeTree(MergeTree):
|
class CollapsingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), sign_col='sign', sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), sign_col='sign', sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(CollapsingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
super(CollapsingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
||||||
replica_table_path, replica_name, partition_key)
|
replica_table_path, replica_name, partition_key, primary_key)
|
||||||
self.sign_col = sign_col
|
self.sign_col = sign_col
|
||||||
|
|
||||||
def _build_sql_params(self, db):
|
def _build_sql_params(self, db):
|
||||||
|
@ -131,9 +138,10 @@ class CollapsingMergeTree(MergeTree):
|
||||||
class SummingMergeTree(MergeTree):
|
class SummingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), summing_cols=None, sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), summing_cols=None, sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(SummingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity, replica_table_path,
|
super(SummingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity, replica_table_path,
|
||||||
replica_name, partition_key)
|
replica_name, partition_key, primary_key)
|
||||||
assert type is None or type(summing_cols) in (list, tuple), 'summing_cols must be a list or tuple'
|
assert type is None or type(summing_cols) in (list, tuple), 'summing_cols must be a list or tuple'
|
||||||
self.summing_cols = summing_cols
|
self.summing_cols = summing_cols
|
||||||
|
|
||||||
|
@ -147,9 +155,10 @@ class SummingMergeTree(MergeTree):
|
||||||
class ReplacingMergeTree(MergeTree):
|
class ReplacingMergeTree(MergeTree):
|
||||||
|
|
||||||
def __init__(self, date_col=None, order_by=(), ver_col=None, sampling_expr=None,
|
def __init__(self, date_col=None, order_by=(), ver_col=None, sampling_expr=None,
|
||||||
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None):
|
index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None,
|
||||||
|
primary_key=None):
|
||||||
super(ReplacingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
super(ReplacingMergeTree, self).__init__(date_col, order_by, sampling_expr, index_granularity,
|
||||||
replica_table_path, replica_name, partition_key)
|
replica_table_path, replica_name, partition_key, primary_key)
|
||||||
self.ver_col = ver_col
|
self.ver_col = ver_col
|
||||||
|
|
||||||
def _build_sql_params(self, db):
|
def _build_sql_params(self, db):
|
||||||
|
|
|
@ -414,7 +414,7 @@ class BaseEnumField(Field):
|
||||||
import re
|
import re
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
members = {}
|
members = {}
|
||||||
for match in re.finditer("'(\w+)' = (\d+)", db_type):
|
for match in re.finditer("'(\w+)' = (-?\d+)", db_type):
|
||||||
members[match.group(1)] = int(match.group(2))
|
members[match.group(1)] = int(match.group(2))
|
||||||
enum_cls = Enum('AdHocEnum', members)
|
enum_cls = Enum('AdHocEnum', members)
|
||||||
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
|
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
|
||||||
|
|
|
@ -149,8 +149,12 @@ class DatabaseTestCase(TestCaseWithData):
|
||||||
Database(self.database.db_name, username='default', password='wrong')
|
Database(self.database.db_name, username='default', password='wrong')
|
||||||
|
|
||||||
exc = cm.exception
|
exc = cm.exception
|
||||||
self.assertEqual(exc.code, 193)
|
if exc.code == 193: # ClickHouse version < 20.3
|
||||||
self.assertTrue(exc.message.startswith('Wrong password for user default'))
|
self.assertTrue(exc.message.startswith('Wrong password for user default'))
|
||||||
|
elif exc.code == 516: # ClickHouse version >= 20.3
|
||||||
|
self.assertTrue(exc.message.startswith('default: Authentication failed'))
|
||||||
|
else:
|
||||||
|
raise Exception('Unexpected error code - %s' % exc.code)
|
||||||
|
|
||||||
def test_nonexisting_db(self):
|
def test_nonexisting_db(self):
|
||||||
db = Database('db_not_here', autocreate=False)
|
db = Database('db_not_here', autocreate=False)
|
||||||
|
|
|
@ -165,6 +165,29 @@ class EnginesTestCase(_EnginesHelperTestCase):
|
||||||
self.assertEqual('testmodel', parts[1].table)
|
self.assertEqual('testmodel', parts[1].table)
|
||||||
self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', ''))
|
self.assertEqual('(201701, 13)'.replace(' ', ''), parts[1].partition.replace(' ', ''))
|
||||||
|
|
||||||
|
def test_custom_primary_key(self):
|
||||||
|
class TestModel(SampleModel):
|
||||||
|
engine = MergeTree(
|
||||||
|
order_by=('date', 'event_id', 'event_group'),
|
||||||
|
partition_key=('toYYYYMM(date)',),
|
||||||
|
primary_key=('date', 'event_id')
|
||||||
|
)
|
||||||
|
|
||||||
|
class TestCollapseModel(SampleModel):
|
||||||
|
sign = Int8Field()
|
||||||
|
|
||||||
|
engine = CollapsingMergeTree(
|
||||||
|
sign_col='sign',
|
||||||
|
order_by=('date', 'event_id', 'event_group'),
|
||||||
|
partition_key=('toYYYYMM(date)',),
|
||||||
|
primary_key=('date', 'event_id')
|
||||||
|
)
|
||||||
|
|
||||||
|
self._create_and_insert(TestModel)
|
||||||
|
self._create_and_insert(TestCollapseModel)
|
||||||
|
|
||||||
|
self.assertEqual(2, len(list(SystemPart.get(self.database))))
|
||||||
|
|
||||||
|
|
||||||
class SampleModel(Model):
|
class SampleModel(Model):
|
||||||
|
|
||||||
|
|
|
@ -22,29 +22,35 @@ class EnumFieldsTest(unittest.TestCase):
|
||||||
def test_insert_and_select(self):
|
def test_insert_and_select(self):
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
||||||
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange)
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange),
|
||||||
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.cherry)
|
||||||
])
|
])
|
||||||
query = 'SELECT * from $table ORDER BY date_field'
|
query = 'SELECT * from $table ORDER BY date_field'
|
||||||
results = list(self.database.select(query, ModelWithEnum))
|
results = list(self.database.select(query, ModelWithEnum))
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 3)
|
||||||
self.assertEqual(results[0].enum_field, Fruit.apple)
|
self.assertEqual(results[0].enum_field, Fruit.apple)
|
||||||
self.assertEqual(results[1].enum_field, Fruit.orange)
|
self.assertEqual(results[1].enum_field, Fruit.orange)
|
||||||
|
self.assertEqual(results[2].enum_field, Fruit.cherry)
|
||||||
|
|
||||||
def test_ad_hoc_model(self):
|
def test_ad_hoc_model(self):
|
||||||
self.database.insert([
|
self.database.insert([
|
||||||
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
ModelWithEnum(date_field='2016-08-30', enum_field=Fruit.apple),
|
||||||
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange)
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.orange),
|
||||||
|
ModelWithEnum(date_field='2016-08-31', enum_field=Fruit.cherry)
|
||||||
])
|
])
|
||||||
query = 'SELECT * from $db.modelwithenum ORDER BY date_field'
|
query = 'SELECT * from $db.modelwithenum ORDER BY date_field'
|
||||||
results = list(self.database.select(query))
|
results = list(self.database.select(query))
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 3)
|
||||||
self.assertEqual(results[0].enum_field.name, Fruit.apple.name)
|
self.assertEqual(results[0].enum_field.name, Fruit.apple.name)
|
||||||
self.assertEqual(results[0].enum_field.value, Fruit.apple.value)
|
self.assertEqual(results[0].enum_field.value, Fruit.apple.value)
|
||||||
self.assertEqual(results[1].enum_field.name, Fruit.orange.name)
|
self.assertEqual(results[1].enum_field.name, Fruit.orange.name)
|
||||||
self.assertEqual(results[1].enum_field.value, Fruit.orange.value)
|
self.assertEqual(results[1].enum_field.value, Fruit.orange.value)
|
||||||
|
self.assertEqual(results[2].enum_field.name, Fruit.cherry.name)
|
||||||
|
self.assertEqual(results[2].enum_field.value, Fruit.cherry.value)
|
||||||
|
|
||||||
def test_conversion(self):
|
def test_conversion(self):
|
||||||
self.assertEqual(ModelWithEnum(enum_field=3).enum_field, Fruit.orange)
|
self.assertEqual(ModelWithEnum(enum_field=3).enum_field, Fruit.orange)
|
||||||
|
self.assertEqual(ModelWithEnum(enum_field=-7).enum_field, Fruit.cherry)
|
||||||
self.assertEqual(ModelWithEnum(enum_field='apple').enum_field, Fruit.apple)
|
self.assertEqual(ModelWithEnum(enum_field='apple').enum_field, Fruit.apple)
|
||||||
self.assertEqual(ModelWithEnum(enum_field=Fruit.banana).enum_field, Fruit.banana)
|
self.assertEqual(ModelWithEnum(enum_field=Fruit.banana).enum_field, Fruit.banana)
|
||||||
|
|
||||||
|
@ -66,7 +72,7 @@ class EnumFieldsTest(unittest.TestCase):
|
||||||
self.assertEqual(results[0].enum_array, instance.enum_array)
|
self.assertEqual(results[0].enum_array, instance.enum_array)
|
||||||
|
|
||||||
|
|
||||||
Fruit = Enum('Fruit', u'apple banana orange')
|
Fruit = Enum('Fruit', [('apple', 1), ('banana', 2), ('orange', 3), ('cherry', -7)])
|
||||||
|
|
||||||
|
|
||||||
class ModelWithEnum(Model):
|
class ModelWithEnum(Model):
|
||||||
|
@ -83,3 +89,4 @@ class ModelWithEnumArray(Model):
|
||||||
enum_array = ArrayField(Enum16Field(Fruit))
|
enum_array = ArrayField(Enum16Field(Fruit))
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('date_field',))
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
||||||
|
|
|
@ -30,8 +30,8 @@ class JoinTest(unittest.TestCase):
|
||||||
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
|
|
||||||
def test_with_subquery(self):
|
def test_with_subquery(self):
|
||||||
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) subquery USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name()))
|
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) subquery USING id".format(Foo.table_name(), Bar.table_name()))
|
||||||
|
|
||||||
|
|
||||||
class Foo(models.Model):
|
class Foo(models.Model):
|
||||||
|
|
|
@ -26,7 +26,9 @@ class ReadonlyTestCase(TestCaseWithData):
|
||||||
self.database.drop_database()
|
self.database.drop_database()
|
||||||
self._check_db_readonly_err(cm.exception, drop_table=True)
|
self._check_db_readonly_err(cm.exception, drop_table=True)
|
||||||
except ServerError as e:
|
except ServerError as e:
|
||||||
if e.code == 192 and e.message.startswith('Unknown user'):
|
if e.code == 192 and e.message.startswith('Unknown user'): # ClickHouse version < 20.3
|
||||||
|
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
||||||
|
elif e.code == 516 and e.message.startswith('readonly: Authentication failed'): # ClickHouse version >= 20.3
|
||||||
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
raise unittest.SkipTest('Database user "%s" is not defined' % username)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
@ -35,7 +37,10 @@ class ReadonlyTestCase(TestCaseWithData):
|
||||||
|
|
||||||
def _check_db_readonly_err(self, exc, drop_table=None):
|
def _check_db_readonly_err(self, exc, drop_table=None):
|
||||||
self.assertEqual(exc.code, 164)
|
self.assertEqual(exc.code, 164)
|
||||||
if drop_table:
|
print(exc.message)
|
||||||
|
if self.database.server_version >= (20, 3):
|
||||||
|
self.assertTrue('Cannot execute query in readonly mode' in exc.message)
|
||||||
|
elif drop_table:
|
||||||
self.assertTrue(exc.message.startswith('Cannot drop table in readonly mode'))
|
self.assertTrue(exc.message.startswith('Cannot drop table in readonly mode'))
|
||||||
else:
|
else:
|
||||||
self.assertTrue(exc.message.startswith('Cannot insert into table in readonly mode'))
|
self.assertTrue(exc.message.startswith('Cannot insert into table in readonly mode'))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user