Added MaterializedField and AliasField

This commit is contained in:
M1ha 2017-01-26 15:42:33 +05:00
parent 01cd88a938
commit ca341ea997
8 changed files with 242 additions and 4 deletions

View File

@ -50,11 +50,11 @@ class Database(object):
model_class = first_instance.__class__ model_class = first_instance.__class__
def gen(): def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
yield (first_instance.to_tsv() + '\n').encode('utf-8') yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
# Collect lines in batches of batch_size # Collect lines in batches of batch_size
batch = [] batch = []
for instance in i: for instance in i:
batch.append(instance.to_tsv()) batch.append(instance.to_tsv(insertable_only=True))
if len(batch) >= batch_size: if len(batch) >= batch_size:
# Return the current batch of lines # Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8') yield ('\n'.join(batch) + '\n').encode('utf-8')

View File

@ -12,6 +12,9 @@ class Field(object):
class_default = 0 class_default = 0
db_type = None db_type = None
# This flag indicates, if we should take this field value when inserting data
insertable = True
def __init__(self, default=None): def __init__(self, default=None):
self.creation_counter = Field.creation_counter self.creation_counter = Field.creation_counter
Field.creation_counter += 1 Field.creation_counter += 1
@ -295,3 +298,76 @@ class ArrayField(Field):
def get_sql(self, with_default=True): def get_sql(self, with_default=True):
from .utils import escape from .utils import escape
return 'Array(%s)' % self.inner_field.get_sql(with_default=False) return 'Array(%s)' % self.inner_field.get_sql(with_default=False)
class RelativeField(Field):
insertable = False
def __init__(self, inner_field):
"""
Creates MATERIALIZED or ALIAS field
:param inner_field: Field subclass this field is acting like
"""
assert isinstance(inner_field, Field), "field must be Field subclass"
self.class_default = inner_field.class_default
self.default = inner_field.default
super(RelativeField, self).__init__()
self.inner_field = inner_field
def to_python(self, value):
return self.inner_field.to_python(value)
def validate(self, value):
return self.inner_field.validate(value)
def to_db_string(self, value, quote=True):
return self.inner_field.to_db_string(value, quote=quote)
class MaterializedField(RelativeField):
"""
Creates ClickHouse MATERIALIZED field. It doesn't contain real data in database, it is counted on the spot
https://clickhouse.yandex/reference_en.html#Default values
"""
def __init__(self, inner_field, code):
"""
Creates MATERIALIZED field
:param inner_field: Field subclass this field is acting like
:param code: ClickHouse code to execute when materialized field is called. See ClickHouse docs.
"""
super(MaterializedField, self).__init__(inner_field)
self._code = code
def get_sql(self, with_default=True):
"""
Generates SQL for create table command
:param with_default: This flag is inherited from Field model. Does nothing (MATERIALIZED have no default)
:return: Creation SQL string
"""
return '%s MATERIALIZED %s' % (self.inner_field.db_type, self._code)
class AliasField(RelativeField):
"""
Creates ClickHouse ALIAS field. It doesn't contain real data in database, only copies other one
https://clickhouse.yandex/reference_en.html#Default values
"""
def __init__(self, inner_field, base_field_name):
"""
Creates ALIAS field
:param inner_field: Field instance this field is acting like
:param base_field_name: Name of field, to which alias is built
"""
super(AliasField, self).__init__(inner_field)
self.base_field_name = base_field_name
def get_sql(self, with_default=True):
"""
Generates SQL for create table command
:param with_default: This flag is inherited from Field model. Does nothing (ALIAS have no default)
:return: Creation SQL string
"""
return '%s ALIAS %s' % (self.inner_field.db_type, self.base_field_name)

View File

@ -150,9 +150,14 @@ class Model(with_metaclass(ModelBase)):
kwargs[name] = next(values) kwargs[name] = next(values)
return cls(**kwargs) return cls(**kwargs)
def to_tsv(self): def to_tsv(self, insertable_only=False):
''' '''
Returns the instance's column values as a tab-separated line. A newline is not included. Returns the instance's column values as a tab-separated line. A newline is not included.
:param bool insertable_only: If True, returns only fields, that can be inserted into database
''' '''
data = self.__dict__ data = self.__dict__
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields)
fields = [f for f in self._fields if f[1].insertable] if insertable_only else self._fields
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(MaterializedModel)
]

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(AliasModel)
]

View File

@ -0,0 +1,57 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithAliasFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithAliasFields(
date_field='2016-08-30',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
for model_cls in (ModelWithAliasFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_field, instance.date_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].alias_int, instance.int_field)
self.assertEquals(results[0].alias_str, instance.str_field)
self.assertEquals(results[0].alias_date, instance.date_field)
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithAliasFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.alias_date = value
class ModelWithAliasFields(Model):
int_field = Int32Field()
date_field = DateField()
str_field = StringField()
alias_str = AliasField(StringField(), 'str_field')
alias_int = MaterializedField(Int32Field(), 'int_field')
alias_date = MaterializedField(DateField(), 'date_field')
engine = MergeTree('date_field', ('date_field',))

View File

@ -0,0 +1,57 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithMaterializedFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithMaterializedFields(
date_time_field='2016-08-30 11:00:00',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
for model_cls in (ModelWithMaterializedFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_time_field, instance.date_time_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].mat_int, abs(instance.int_field))
self.assertEquals(results[0].mat_str, instance.str_field.lower())
self.assertEquals(results[0].mat_date, instance.date_time_field.date())
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithMaterializedFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.mat_date = value
class ModelWithMaterializedFields(Model):
int_field = Int32Field()
date_time_field = DateTimeField()
str_field = StringField()
mat_str = MaterializedField(StringField(), 'lower(str_field)')
mat_int = MaterializedField(Int32Field(), 'abs(int_field)')
mat_date = MaterializedField(DateField(), 'toDate(date_time_field)')
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(EnumModel1)) self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel2), self.assertEquals(self.getTableFields(EnumModel2),
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
self.database.migrate('tests.sample_migrations', 8)
self.assertTrue(self.tableExists(MaterializedModel))
self.assertEquals(self.getTableFields(MaterializedModel),
[('date_time', "DateTime"), ('date', 'Date')])
self.database.migrate('tests.sample_migrations', 9)
self.assertTrue(self.tableExists(AliasModel))
self.assertEquals(self.getTableFields(AliasModel),
[('date', 'Date'), ('date_alias', "Date")])
# Several different models with the same table name, to simulate a table that changes over time # Several different models with the same table name, to simulate a table that changes over time
@ -127,3 +136,25 @@ class EnumModel2(Model):
@classmethod @classmethod
def table_name(cls): def table_name(cls):
return 'enum_mig' return 'enum_mig'
class MaterializedModel(Model):
date_time = DateTimeField()
date = MaterializedField(DateField(), 'toDate(date_time)')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'materalized_date'
class AliasModel(Model):
date = DateField()
date_alias = AliasField(DateField(), 'date')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'alias_date'