mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-02-21 02:30:33 +03:00
Added Merge engine
1) Divided readonly and system flags of Field model. Readonly flag only restricts insert operations, while system flag restricts also create and drop table operations 2) Added Merge engine and tests for it 3) Added docs for Merge engine 4) Added opportunity to make Field readonly. This is useful for "virtual" columns (https://clickhouse.yandex/docs/en/single/index.html#virtual-columns)
This commit is contained in:
parent
351e698971
commit
de9f64cd3a
|
@ -15,6 +15,7 @@ The following engines are supported by the ORM:
|
||||||
- SummingMergeTree / ReplicatedSummingMergeTree
|
- SummingMergeTree / ReplicatedSummingMergeTree
|
||||||
- ReplacingMergeTree / ReplicatedReplacingMergeTree
|
- ReplacingMergeTree / ReplicatedReplacingMergeTree
|
||||||
- Buffer
|
- Buffer
|
||||||
|
- Merge
|
||||||
|
|
||||||
|
|
||||||
Simple Engines
|
Simple Engines
|
||||||
|
@ -85,6 +86,18 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
|
||||||
suzy = PersonBuffer(first_name='Suzy', last_name='Jones')
|
suzy = PersonBuffer(first_name='Suzy', last_name='Jones')
|
||||||
dan = PersonBuffer(first_name='Dan', last_name='Schwartz')
|
dan = PersonBuffer(first_name='Dan', last_name='Schwartz')
|
||||||
db.insert([dan, suzy])
|
db.insert([dan, suzy])
|
||||||
|
|
||||||
|
|
||||||
|
Merge Engine
|
||||||
|
-------------
|
||||||
|
|
||||||
|
[ClickHouse docs](https://clickhouse.yandex/docs/en/single/index.html#merge)
|
||||||
|
A `Merge` engine is only used in conjunction with a `MergeModel`.
|
||||||
|
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
|
||||||
|
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
|
||||||
|
|
||||||
|
class MergeTable(models.MergeModel):
|
||||||
|
engine = engines.Merge('^table_prefix')
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
|
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
|
||||||
* [Data Replication](table_engines.md#data-replication)
|
* [Data Replication](table_engines.md#data-replication)
|
||||||
* [Buffer Engine](table_engines.md#buffer-engine)
|
* [Buffer Engine](table_engines.md#buffer-engine)
|
||||||
|
* [Merge Engine](table_engines.md#merge-engine)
|
||||||
|
|
||||||
* [Schema Migrations](schema_migrations.md#schema-migrations)
|
* [Schema Migrations](schema_migrations.md#schema-migrations)
|
||||||
* [Writing Migrations](schema_migrations.md#writing-migrations)
|
* [Writing Migrations](schema_migrations.md#writing-migrations)
|
||||||
|
|
|
@ -75,16 +75,16 @@ class Database(object):
|
||||||
Creates a table for the given model class, if it does not exist already.
|
Creates a table for the given model class, if it does not exist already.
|
||||||
'''
|
'''
|
||||||
# TODO check that model has an engine
|
# TODO check that model has an engine
|
||||||
if model_class.readonly:
|
if model_class.system:
|
||||||
raise DatabaseException("You can't create read only table")
|
raise DatabaseException("You can't create system table")
|
||||||
self._send(model_class.create_table_sql(self.db_name))
|
self._send(model_class.create_table_sql(self.db_name))
|
||||||
|
|
||||||
def drop_table(self, model_class):
|
def drop_table(self, model_class):
|
||||||
'''
|
'''
|
||||||
Drops the database table of the given model class, if it exists.
|
Drops the database table of the given model class, if it exists.
|
||||||
'''
|
'''
|
||||||
if model_class.readonly:
|
if model_class.system:
|
||||||
raise DatabaseException("You can't drop read only table")
|
raise DatabaseException("You can't drop system table")
|
||||||
self._send(model_class.drop_table_sql(self.db_name))
|
self._send(model_class.drop_table_sql(self.db_name))
|
||||||
|
|
||||||
def insert(self, model_instances, batch_size=1000):
|
def insert(self, model_instances, batch_size=1000):
|
||||||
|
@ -103,8 +103,8 @@ class Database(object):
|
||||||
return # model_instances is empty
|
return # model_instances is empty
|
||||||
model_class = first_instance.__class__
|
model_class = first_instance.__class__
|
||||||
|
|
||||||
if first_instance.readonly:
|
if first_instance.readonly or first_instance.system:
|
||||||
raise DatabaseException("You can't insert into read only table")
|
raise DatabaseException("You can't insert into read only and system tables")
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
buf = BytesIO()
|
buf = BytesIO()
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
from .utils import comma_join
|
from .utils import comma_join
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,7 +121,6 @@ class Buffer(Engine):
|
||||||
self.min_bytes = min_bytes
|
self.min_bytes = min_bytes
|
||||||
self.max_bytes = max_bytes
|
self.max_bytes = max_bytes
|
||||||
|
|
||||||
|
|
||||||
def create_table_sql(self, db_name):
|
def create_table_sql(self, db_name):
|
||||||
# Overriden create_table_sql example:
|
# Overriden create_table_sql example:
|
||||||
#sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
|
#sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
|
||||||
|
@ -128,3 +130,27 @@ class Buffer(Engine):
|
||||||
self.max_rows, self.min_bytes, self.max_bytes
|
self.max_rows, self.min_bytes, self.max_bytes
|
||||||
)
|
)
|
||||||
return sql
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
class Merge(Engine):
|
||||||
|
"""
|
||||||
|
The Merge engine (not to be confused with MergeTree) does not store data itself,
|
||||||
|
but allows reading from any number of other tables simultaneously.
|
||||||
|
Writing to a table is not supported
|
||||||
|
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, table_regex):
|
||||||
|
assert isinstance(table_regex, six.string_types), "'db_name' parameter must be string"
|
||||||
|
|
||||||
|
self.table_regex = table_regex
|
||||||
|
|
||||||
|
# Use current database as default
|
||||||
|
self.db_name = 'currentDatabase()'
|
||||||
|
|
||||||
|
def create_table_sql(self):
|
||||||
|
return "Merge(%s, '%s')" % (self.db_name, self.table_regex)
|
||||||
|
|
||||||
|
def set_db_name(self, db_name):
|
||||||
|
assert isinstance(db_name, six.string_types), "'db_name' parameter must be string"
|
||||||
|
self.db_name = db_name
|
||||||
|
|
|
@ -16,19 +16,21 @@ class Field(object):
|
||||||
class_default = 0
|
class_default = 0
|
||||||
db_type = None
|
db_type = None
|
||||||
|
|
||||||
def __init__(self, default=None, alias=None, materialized=None):
|
def __init__(self, default=None, alias=None, materialized=None, readonly=None):
|
||||||
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
|
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
|
||||||
"Only one of default, alias and materialized parameters can be given"
|
"Only one of default, alias and materialized parameters can be given"
|
||||||
assert alias is None or isinstance(alias, string_types) and alias != "",\
|
assert alias is None or isinstance(alias, string_types) and alias != "",\
|
||||||
"Alias field must be string field name, if given"
|
"Alias field must be string field name, if given"
|
||||||
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
|
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
|
||||||
"Materialized field must be string, if given"
|
"Materialized field must be string, if given"
|
||||||
|
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
|
||||||
|
|
||||||
self.creation_counter = Field.creation_counter
|
self.creation_counter = Field.creation_counter
|
||||||
Field.creation_counter += 1
|
Field.creation_counter += 1
|
||||||
self.default = self.class_default if default is None else default
|
self.default = self.class_default if default is None else default
|
||||||
self.alias = alias
|
self.alias = alias
|
||||||
self.materialized = materialized
|
self.materialized = materialized
|
||||||
|
self.readonly = bool(self.alias or self.materialized or readonly)
|
||||||
|
|
||||||
def to_python(self, value, timezone_in_use):
|
def to_python(self, value, timezone_in_use):
|
||||||
'''
|
'''
|
||||||
|
@ -75,10 +77,6 @@ class Field(object):
|
||||||
else:
|
else:
|
||||||
return self.db_type
|
return self.db_type
|
||||||
|
|
||||||
@property
|
|
||||||
def readonly(self):
|
|
||||||
return bool(self.alias or self.materialized)
|
|
||||||
|
|
||||||
|
|
||||||
class StringField(Field):
|
class StringField(Field):
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,10 @@ from logging import getLogger
|
||||||
from six import with_metaclass
|
from six import with_metaclass
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from .fields import Field
|
from .fields import Field, StringField
|
||||||
from .utils import parse_tsv
|
from .utils import parse_tsv
|
||||||
from .query import QuerySet
|
from .query import QuerySet
|
||||||
|
from .engines import Merge
|
||||||
|
|
||||||
logger = getLogger('clickhouse_orm')
|
logger = getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
@ -86,8 +87,13 @@ class Model(with_metaclass(ModelBase)):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
engine = None
|
engine = None
|
||||||
|
|
||||||
|
# Insert operations are restricted for read only models
|
||||||
readonly = False
|
readonly = False
|
||||||
|
|
||||||
|
# Create table, drop table, insert operations are restricted for system models
|
||||||
|
system = False
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
'''
|
'''
|
||||||
Creates a model instance, using keyword arguments as field values.
|
Creates a model instance, using keyword arguments as field values.
|
||||||
|
@ -246,3 +252,25 @@ class BufferModel(Model):
|
||||||
engine_str = cls.engine.create_table_sql(db_name)
|
engine_str = cls.engine.create_table_sql(db_name)
|
||||||
parts.append(engine_str)
|
parts.append(engine_str)
|
||||||
return ' '.join(parts)
|
return ' '.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
class MergeModel(Model):
|
||||||
|
'''
|
||||||
|
Model for Merge engine
|
||||||
|
Predefines virtual _table column an controls that rows can't be inserted to this table type
|
||||||
|
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
|
||||||
|
'''
|
||||||
|
readonly = True
|
||||||
|
|
||||||
|
# Virtual fields can't be inserted into database
|
||||||
|
_table = StringField(readonly=True)
|
||||||
|
|
||||||
|
def set_database(self, db):
|
||||||
|
'''
|
||||||
|
Gets the `Database` that this model instance belongs to.
|
||||||
|
Returns `None` unless the instance was read from the database or written to it.
|
||||||
|
'''
|
||||||
|
assert isinstance(self.engine, Merge), "engine must be engines.Merge instance"
|
||||||
|
res = super(MergeModel, self).set_database(db)
|
||||||
|
self.engine.set_db_name(db.db_name)
|
||||||
|
return res
|
||||||
|
|
|
@ -20,6 +20,7 @@ class SystemPart(Model):
|
||||||
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
||||||
|
|
||||||
readonly = True
|
readonly = True
|
||||||
|
system = True
|
||||||
|
|
||||||
database = StringField() # Name of the database where the table that this part belongs to is located.
|
database = StringField() # Name of the database where the table that this part belongs to is located.
|
||||||
table = StringField() # Name of the table that this part belongs to.
|
table = StringField() # Name of the table that this part belongs to.
|
||||||
|
|
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database, DatabaseException
|
from infi.clickhouse_orm.database import Database, DatabaseException
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model, MergeModel
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
|
||||||
|
@ -74,6 +74,56 @@ class EnginesTestCase(unittest.TestCase):
|
||||||
engine = Memory()
|
engine = Memory()
|
||||||
self._create_and_insert(TestModel)
|
self._create_and_insert(TestModel)
|
||||||
|
|
||||||
|
def test_merge(self):
|
||||||
|
class TestModel1(SampleModel):
|
||||||
|
engine = TinyLog()
|
||||||
|
|
||||||
|
class TestModel2(SampleModel):
|
||||||
|
engine = TinyLog()
|
||||||
|
|
||||||
|
class TestMergeModel(MergeModel, SampleModel):
|
||||||
|
engine = Merge('^testmodel')
|
||||||
|
|
||||||
|
self.database.create_table(TestModel1)
|
||||||
|
self.database.create_table(TestModel2)
|
||||||
|
self.database.create_table(TestMergeModel)
|
||||||
|
|
||||||
|
# Insert operations are restricted for this model type
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.insert([
|
||||||
|
TestMergeModel(date='2017-01-01', event_id=23423, event_group=13, event_count=7, event_version=1)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Testing select
|
||||||
|
self.database.insert([
|
||||||
|
TestModel1(date='2017-01-01', event_id=1, event_group=1, event_count=1, event_version=1)
|
||||||
|
])
|
||||||
|
self.database.insert([
|
||||||
|
TestModel2(date='2017-01-02', event_id=2, event_group=2, event_count=2, event_version=2)
|
||||||
|
])
|
||||||
|
# event_uversion is materialized field. So * won't select it and it will be zero
|
||||||
|
res = self.database.select('SELECT *, event_uversion FROM $table ORDER BY event_id', model_class=TestMergeModel)
|
||||||
|
res = [row for row in res]
|
||||||
|
self.assertEqual(2, len(res))
|
||||||
|
self.assertDictEqual({
|
||||||
|
'_table': 'testmodel1',
|
||||||
|
'date': datetime.date(2017, 1, 1),
|
||||||
|
'event_id': 1,
|
||||||
|
'event_group': 1,
|
||||||
|
'event_count': 1,
|
||||||
|
'event_version': 1,
|
||||||
|
'event_uversion': 1
|
||||||
|
}, res[0].to_dict(include_readonly=True))
|
||||||
|
self.assertDictEqual({
|
||||||
|
'_table': 'testmodel2',
|
||||||
|
'date': datetime.date(2017, 1, 2),
|
||||||
|
'event_id': 2,
|
||||||
|
'event_group': 2,
|
||||||
|
'event_count': 2,
|
||||||
|
'event_version': 2,
|
||||||
|
'event_uversion': 2
|
||||||
|
}, res[1].to_dict(include_readonly=True))
|
||||||
|
|
||||||
|
|
||||||
class SampleModel(Model):
|
class SampleModel(Model):
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import unittest
|
|
||||||
|
|
||||||
import six
|
from infi.clickhouse_orm.database import DatabaseException
|
||||||
from infi.clickhouse_orm.database import Database, DatabaseException
|
|
||||||
from infi.clickhouse_orm.models import Model
|
|
||||||
from infi.clickhouse_orm.fields import *
|
|
||||||
from infi.clickhouse_orm.engines import *
|
|
||||||
from .base_test_with_data import *
|
from .base_test_with_data import *
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,15 +40,15 @@ class ReadonlyTestCase(TestCaseWithData):
|
||||||
self.database.insert([m])
|
self.database.insert([m])
|
||||||
|
|
||||||
def test_create_readonly_table(self):
|
def test_create_readonly_table(self):
|
||||||
with self.assertRaises(DatabaseException):
|
self.database.create_table(ReadOnlyModel)
|
||||||
self.database.create_table(ReadOnlyModel)
|
|
||||||
|
|
||||||
def test_drop_readonly_table(self):
|
def test_drop_readonly_table(self):
|
||||||
with self.assertRaises(DatabaseException):
|
self.database.drop_table(ReadOnlyModel)
|
||||||
self.database.drop_table(ReadOnlyModel)
|
|
||||||
|
|
||||||
|
|
||||||
class ReadOnlyModel(Model):
|
class ReadOnlyModel(Model):
|
||||||
readonly = True
|
readonly = True
|
||||||
|
|
||||||
name = StringField()
|
name = StringField()
|
||||||
|
date = DateField()
|
||||||
|
engine = MergeTree('date', ('name',))
|
||||||
|
|
|
@ -2,14 +2,34 @@ from __future__ import unicode_literals
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import date
|
from datetime import date
|
||||||
import os
|
import os
|
||||||
import shutil
|
from infi.clickhouse_orm.database import Database, DatabaseException
|
||||||
from infi.clickhouse_orm.database import Database
|
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model
|
||||||
from infi.clickhouse_orm.system_models import SystemPart
|
from infi.clickhouse_orm.system_models import SystemPart
|
||||||
|
|
||||||
|
|
||||||
|
class SystemTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db')
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_insert_system(self):
|
||||||
|
m = SystemPart()
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.insert([m])
|
||||||
|
|
||||||
|
def test_create_readonly_table(self):
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.create_table(SystemTestModel)
|
||||||
|
|
||||||
|
def test_drop_readonly_table(self):
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.drop_table(SystemTestModel)
|
||||||
|
|
||||||
|
|
||||||
class SystemPartTest(unittest.TestCase):
|
class SystemPartTest(unittest.TestCase):
|
||||||
|
|
||||||
BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/']
|
BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/']
|
||||||
|
@ -75,3 +95,7 @@ class TestTable(Model):
|
||||||
date_field = DateField()
|
date_field = DateField()
|
||||||
|
|
||||||
engine = MergeTree('date_field', ('date_field',))
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
||||||
|
|
||||||
|
class SystemTestModel(Model):
|
||||||
|
system = True
|
||||||
|
|
Loading…
Reference in New Issue
Block a user