Added Merge engine

1) Divided readonly and system flags of Field model. Readonly flag only restricts insert operations, while system flag restricts also create and drop table operations

2) Added Merge engine and tests for it
3) Added docs for Merge engine
4) Added opportunity to make Field readonly. This is useful for "virtual" columns (https://clickhouse.yandex/docs/en/single/index.html#virtual-columns)
This commit is contained in:
M1ha 2017-09-07 17:44:27 +05:00
parent 351e698971
commit de9f64cd3a
10 changed files with 162 additions and 26 deletions

View File

@ -15,6 +15,7 @@ The following engines are supported by the ORM:
- SummingMergeTree / ReplicatedSummingMergeTree
- ReplacingMergeTree / ReplicatedReplacingMergeTree
- Buffer
- Merge
Simple Engines
@ -85,6 +86,18 @@ Then you can insert objects into Buffer model and they will be handled by ClickH
suzy = PersonBuffer(first_name='Suzy', last_name='Jones')
dan = PersonBuffer(first_name='Dan', last_name='Schwartz')
db.insert([dan, suzy])
Merge Engine
-------------
[ClickHouse docs](https://clickhouse.yandex/docs/en/single/index.html#merge)
A `Merge` engine is only used in conjunction with a `MergeModel`.
This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it.
Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected.
class MergeTable(models.MergeModel):
engine = engines.Merge('^table_prefix')
---

View File

@ -36,6 +36,7 @@
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
* [Data Replication](table_engines.md#data-replication)
* [Buffer Engine](table_engines.md#buffer-engine)
* [Merge Engine](table_engines.md#merge-engine)
* [Schema Migrations](schema_migrations.md#schema-migrations)
* [Writing Migrations](schema_migrations.md#writing-migrations)

View File

@ -75,16 +75,16 @@ class Database(object):
Creates a table for the given model class, if it does not exist already.
'''
# TODO check that model has an engine
if model_class.readonly:
raise DatabaseException("You can't create read only table")
if model_class.system:
raise DatabaseException("You can't create system table")
self._send(model_class.create_table_sql(self.db_name))
def drop_table(self, model_class):
'''
Drops the database table of the given model class, if it exists.
'''
if model_class.readonly:
raise DatabaseException("You can't drop read only table")
if model_class.system:
raise DatabaseException("You can't drop system table")
self._send(model_class.drop_table_sql(self.db_name))
def insert(self, model_instances, batch_size=1000):
@ -103,8 +103,8 @@ class Database(object):
return # model_instances is empty
model_class = first_instance.__class__
if first_instance.readonly:
raise DatabaseException("You can't insert into read only table")
if first_instance.readonly or first_instance.system:
raise DatabaseException("You can't insert into read only and system tables")
def gen():
buf = BytesIO()

View File

@ -1,4 +1,7 @@
from __future__ import unicode_literals
import six
from .utils import comma_join
@ -118,7 +121,6 @@ class Buffer(Engine):
self.min_bytes = min_bytes
self.max_bytes = max_bytes
def create_table_sql(self, db_name):
# Overriden create_table_sql example:
#sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
@ -128,3 +130,27 @@ class Buffer(Engine):
self.max_rows, self.min_bytes, self.max_bytes
)
return sql
class Merge(Engine):
"""
The Merge engine (not to be confused with MergeTree) does not store data itself,
but allows reading from any number of other tables simultaneously.
Writing to a table is not supported
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
"""
def __init__(self, table_regex):
assert isinstance(table_regex, six.string_types), "'db_name' parameter must be string"
self.table_regex = table_regex
# Use current database as default
self.db_name = 'currentDatabase()'
def create_table_sql(self):
return "Merge(%s, '%s')" % (self.db_name, self.table_regex)
def set_db_name(self, db_name):
assert isinstance(db_name, six.string_types), "'db_name' parameter must be string"
self.db_name = db_name

View File

@ -16,19 +16,21 @@ class Field(object):
class_default = 0
db_type = None
def __init__(self, default=None, alias=None, materialized=None):
def __init__(self, default=None, alias=None, materialized=None, readonly=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, string_types) and alias != "",\
"Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
"Materialized field must be string, if given"
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
self.creation_counter = Field.creation_counter
Field.creation_counter += 1
self.default = self.class_default if default is None else default
self.alias = alias
self.materialized = materialized
self.readonly = bool(self.alias or self.materialized or readonly)
def to_python(self, value, timezone_in_use):
'''
@ -75,10 +77,6 @@ class Field(object):
else:
return self.db_type
@property
def readonly(self):
return bool(self.alias or self.materialized)
class StringField(Field):

View File

@ -4,9 +4,10 @@ from logging import getLogger
from six import with_metaclass
import pytz
from .fields import Field
from .fields import Field, StringField
from .utils import parse_tsv
from .query import QuerySet
from .engines import Merge
logger = getLogger('clickhouse_orm')
@ -86,8 +87,13 @@ class Model(with_metaclass(ModelBase)):
'''
engine = None
# Insert operations are restricted for read only models
readonly = False
# Create table, drop table, insert operations are restricted for system models
system = False
def __init__(self, **kwargs):
'''
Creates a model instance, using keyword arguments as field values.
@ -246,3 +252,25 @@ class BufferModel(Model):
engine_str = cls.engine.create_table_sql(db_name)
parts.append(engine_str)
return ' '.join(parts)
class MergeModel(Model):
'''
Model for Merge engine
Predefines virtual _table column an controls that rows can't be inserted to this table type
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
'''
readonly = True
# Virtual fields can't be inserted into database
_table = StringField(readonly=True)
def set_database(self, db):
'''
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
'''
assert isinstance(self.engine, Merge), "engine must be engines.Merge instance"
res = super(MergeModel, self).set_database(db)
self.engine.set_db_name(db.db_name)
return res

View File

@ -20,6 +20,7 @@ class SystemPart(Model):
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
readonly = True
system = True
database = StringField() # Name of the database where the table that this part belongs to is located.
table = StringField() # Name of the table that this part belongs to.

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, MergeModel
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
@ -74,6 +74,56 @@ class EnginesTestCase(unittest.TestCase):
engine = Memory()
self._create_and_insert(TestModel)
def test_merge(self):
class TestModel1(SampleModel):
engine = TinyLog()
class TestModel2(SampleModel):
engine = TinyLog()
class TestMergeModel(MergeModel, SampleModel):
engine = Merge('^testmodel')
self.database.create_table(TestModel1)
self.database.create_table(TestModel2)
self.database.create_table(TestMergeModel)
# Insert operations are restricted for this model type
with self.assertRaises(DatabaseException):
self.database.insert([
TestMergeModel(date='2017-01-01', event_id=23423, event_group=13, event_count=7, event_version=1)
])
# Testing select
self.database.insert([
TestModel1(date='2017-01-01', event_id=1, event_group=1, event_count=1, event_version=1)
])
self.database.insert([
TestModel2(date='2017-01-02', event_id=2, event_group=2, event_count=2, event_version=2)
])
# event_uversion is materialized field. So * won't select it and it will be zero
res = self.database.select('SELECT *, event_uversion FROM $table ORDER BY event_id', model_class=TestMergeModel)
res = [row for row in res]
self.assertEqual(2, len(res))
self.assertDictEqual({
'_table': 'testmodel1',
'date': datetime.date(2017, 1, 1),
'event_id': 1,
'event_group': 1,
'event_count': 1,
'event_version': 1,
'event_uversion': 1
}, res[0].to_dict(include_readonly=True))
self.assertDictEqual({
'_table': 'testmodel2',
'date': datetime.date(2017, 1, 2),
'event_id': 2,
'event_group': 2,
'event_count': 2,
'event_version': 2,
'event_uversion': 2
}, res[1].to_dict(include_readonly=True))
class SampleModel(Model):

View File

@ -1,12 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
import six
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.database import DatabaseException
from .base_test_with_data import *
@ -45,15 +40,15 @@ class ReadonlyTestCase(TestCaseWithData):
self.database.insert([m])
def test_create_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.create_table(ReadOnlyModel)
self.database.create_table(ReadOnlyModel)
def test_drop_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.drop_table(ReadOnlyModel)
self.database.drop_table(ReadOnlyModel)
class ReadOnlyModel(Model):
readonly = True
name = StringField()
date = DateField()
engine = MergeTree('date', ('name',))

View File

@ -2,14 +2,34 @@ from __future__ import unicode_literals
import unittest
from datetime import date
import os
import shutil
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.system_models import SystemPart
class SystemTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
def tearDown(self):
self.database.drop_database()
def test_insert_system(self):
m = SystemPart()
with self.assertRaises(DatabaseException):
self.database.insert([m])
def test_create_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.create_table(SystemTestModel)
def test_drop_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.drop_table(SystemTestModel)
class SystemPartTest(unittest.TestCase):
BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/']
@ -75,3 +95,7 @@ class TestTable(Model):
date_field = DateField()
engine = MergeTree('date_field', ('date_field',))
class SystemTestModel(Model):
system = True