diff --git a/README.rst b/README.rst index 8e8c263..70ab8af 100644 --- a/README.rst +++ b/README.rst @@ -31,6 +31,8 @@ Models are defined in a way reminiscent of Django's ORM:: engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). +It is always possible to pass alias or materialized parameters. See below for usage examples. +Only one of default, alias and materialized parameters can be provided See below for the supported field types and table engines. @@ -189,26 +191,26 @@ Field Types Currently the following field types are supported: -============= ======== ================= =================================================== -Class DB Type Pythonic Type Comments -============= ======== ================= =================================================== -StringField String unicode Encoded as UTF-8 when written to ClickHouse -DateField Date datetime.date Range 1970-01-01 to 2038-01-19 -DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC -Int8Field Int8 int Range -128 to 127 -Int16Field Int16 int Range -32768 to 32767 -Int32Field Int32 int Range -2147483648 to 2147483647 -Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 -UInt8Field UInt8 int Range 0 to 255 -UInt16Field UInt16 int Range 0 to 65535 -UInt32Field UInt32 int Range 0 to 4294967295 -UInt64Field UInt64 int/long Range 0 to 18446744073709551615 -Float32Field Float32 float -Float64Field Float64 float -Enum8Field Enum8 Enum See below -Enum16Field Enum16 Enum See below -ArrayField Array list See below -============= ======== ================= =================================================== +=================== ======== ================= =================================================== +Class DB Type Pythonic Type Comments +=================== ======== ================= =================================================== +StringField String unicode Encoded as UTF-8 when written to ClickHouse +DateField Date datetime.date Range 1970-01-01 to 2038-01-19 +DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC +Int8Field Int8 int Range -128 to 127 +Int16Field Int16 int Range -32768 to 32767 +Int32Field Int32 int Range -2147483648 to 2147483647 +Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 +UInt8Field UInt8 int Range 0 to 255 +UInt16Field UInt16 int Range 0 to 65535 +UInt32Field UInt32 int Range 0 to 4294967295 +UInt64Field UInt64 int/long Range 0 to 18446744073709551615 +Float32Field Float32 float +Float64Field Float64 float +Enum8Field Enum8 Enum See below +Enum16Field Enum16 Enum See below +ArrayField Array list See below +=================== ========== ================= =================================================== Working with enum fields ************************ @@ -249,6 +251,40 @@ You can create array fields containing any data type, for example:: data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) + +Working with materialized and alias fields +****************************************** + +ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. + +See documentation `here `. + +Both field types can't be inserted into database directly. +These field values are ignored, when using database.insert() method. +These fields are set to default values if you use database.select('SELECT * FROM mymodel', model_class=MyModel), +because ClickHouse doesn't return them. +Nevertheless, attribute values (as well as defaults) can be set for model object from python. + +Usage:: + + class Event(models.Model): + + created = fields.DateTimeField() + created_date = fields.DateTimeField(materialized='toDate(created)') + name = fields.StringField() + username = fields.StringField(alias='name') + + engine = engines.MergeTree('created_date', ('created_date', 'created')) + + obj = Event(created=datetime.now(), name='MyEvent') + db = Database('my_test_db') + db.insert([obj]) + # All values will be retrieved from database + db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) + # created_date, username will contain default value + db.select('SELECT * FROM $db.event', model_class=Event) + + Table Engines ------------- diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 9ce61a0..73bbc13 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -50,11 +50,11 @@ class Database(object): model_class = first_instance.__class__ def gen(): yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') - yield (first_instance.to_tsv() + '\n').encode('utf-8') + yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8') # Collect lines in batches of batch_size batch = [] for instance in i: - batch.append(instance.to_tsv()) + batch.append(instance.to_tsv(insertable_only=True)) if len(batch) >= batch_size: # Return the current batch of lines yield ('\n'.join(batch) + '\n').encode('utf-8') diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index e4a5615..51b67de 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -12,10 +12,17 @@ class Field(object): class_default = 0 db_type = None - def __init__(self, default=None): + def __init__(self, default=None, alias=None, materialized=None): + assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ + "Only one of default, alias and materialized parameters can be given" + assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given" + assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given" + self.creation_counter = Field.creation_counter Field.creation_counter += 1 self.default = self.class_default if default is None else default + self.alias = alias + self.materialized = materialized def to_python(self, value): ''' @@ -48,13 +55,22 @@ class Field(object): def get_sql(self, with_default=True): ''' Returns an SQL expression describing the field (e.g. for CREATE TABLE). + :param with_default: If True, adds default value to sql. + It doesn't affect fields with alias and materialized values. ''' - if with_default: + if self.alias: + return '%s ALIAS %s' % (self.db_type, self.alias) + elif self.materialized: + return '%s MATERIALIZED %s' % (self.db_type, self.materialized) + elif with_default: default = self.to_db_string(self.default) return '%s DEFAULT %s' % (self.db_type, default) else: return self.db_type + def is_insertable(self): + return self.alias is None and self.materialized is None + class StringField(Field): @@ -207,11 +223,11 @@ class Float64Field(BaseFloatField): class BaseEnumField(Field): - def __init__(self, enum_cls, default=None): + def __init__(self, enum_cls, default=None, alias=None, materialized=None): self.enum_cls = enum_cls if default is None: default = list(enum_cls)[0] - super(BaseEnumField, self).__init__(default) + super(BaseEnumField, self).__init__(default, alias, materialized) def to_python(self, value): if isinstance(value, self.enum_cls): @@ -271,9 +287,9 @@ class ArrayField(Field): class_default = [] - def __init__(self, inner_field, default=None): + def __init__(self, inner_field, default=None, alias=None, materialized=None): self.inner_field = inner_field - super(ArrayField, self).__init__(default) + super(ArrayField, self).__init__(default, alias, materialized) def to_python(self, value): if isinstance(value, text_type): @@ -295,3 +311,4 @@ class ArrayField(Field): def get_sql(self, with_default=True): from .utils import escape return 'Array(%s)' % self.inner_field.get_sql(with_default=False) + diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 6fae876..16f6f77 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -150,9 +150,14 @@ class Model(with_metaclass(ModelBase)): kwargs[name] = next(values) return cls(**kwargs) - def to_tsv(self): + def to_tsv(self, insertable_only=False): ''' Returns the instance's column values as a tab-separated line. A newline is not included. + :param bool insertable_only: If True, returns only fields, that can be inserted into database ''' data = self.__dict__ - return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields) + + fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields + return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) + + diff --git a/tests/sample_migrations/0008.py b/tests/sample_migrations/0008.py new file mode 100644 index 0000000..691a762 --- /dev/null +++ b/tests/sample_migrations/0008.py @@ -0,0 +1,6 @@ +from infi.clickhouse_orm import migrations +from ..test_migrations import * + +operations = [ + migrations.CreateTable(MaterializedModel) +] \ No newline at end of file diff --git a/tests/sample_migrations/0009.py b/tests/sample_migrations/0009.py new file mode 100644 index 0000000..7841f17 --- /dev/null +++ b/tests/sample_migrations/0009.py @@ -0,0 +1,6 @@ +from infi.clickhouse_orm import migrations +from ..test_migrations import * + +operations = [ + migrations.CreateTable(AliasModel) +] \ No newline at end of file diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py new file mode 100644 index 0000000..af7bbc8 --- /dev/null +++ b/tests/test_alias_fields.py @@ -0,0 +1,69 @@ +import unittest +from datetime import date + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class MaterializedFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(ModelWithAliasFields) + + def tearDown(self): + self.database.drop_database() + + def test_insert_and_select(self): + instance = ModelWithAliasFields( + date_field='2016-08-30', + int_field=-10, + str_field='TEST' + ) + self.database.insert([instance]) + # We can't select * from table, as it doesn't select materialized and alias fields + query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \ + ' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name() + for model_cls in (ModelWithAliasFields, None): + results = list(self.database.select(query, model_cls)) + self.assertEquals(len(results), 1) + self.assertEquals(results[0].date_field, instance.date_field) + self.assertEquals(results[0].int_field, instance.int_field) + self.assertEquals(results[0].str_field, instance.str_field) + self.assertEquals(results[0].alias_int, instance.int_field) + self.assertEquals(results[0].alias_str, instance.str_field) + self.assertEquals(results[0].alias_date, instance.date_field) + + def test_assignment_error(self): + # I can't prevent assigning at all, in case db.select statements with model provided sets model fields. + instance = ModelWithAliasFields() + for value in ('x', [date.today()], ['aaa'], [None]): + with self.assertRaises(ValueError): + instance.alias_date = value + + def test_wrong_field(self): + with self.assertRaises(AssertionError): + StringField(alias=123) + + def test_duplicate_default(self): + with self.assertRaises(AssertionError): + StringField(alias='str_field', default='with default') + + with self.assertRaises(AssertionError): + StringField(alias='str_field', materialized='str_field') + + +class ModelWithAliasFields(Model): + int_field = Int32Field() + date_field = DateField() + str_field = StringField() + + alias_str = StringField(alias='str_field') + alias_int = Int32Field(alias='int_field') + alias_date = DateField(alias='date_field') + + engine = MergeTree('date_field', ('date_field',)) + + diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py new file mode 100644 index 0000000..3151dc3 --- /dev/null +++ b/tests/test_materialized_fields.py @@ -0,0 +1,69 @@ +import unittest +from datetime import date + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class MaterializedFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(ModelWithMaterializedFields) + + def tearDown(self): + self.database.drop_database() + + def test_insert_and_select(self): + instance = ModelWithMaterializedFields( + date_time_field='2016-08-30 11:00:00', + int_field=-10, + str_field='TEST' + ) + self.database.insert([instance]) + # We can't select * from table, as it doesn't select materialized and alias fields + query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \ + ' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name() + for model_cls in (ModelWithMaterializedFields, None): + results = list(self.database.select(query, model_cls)) + self.assertEquals(len(results), 1) + self.assertEquals(results[0].date_time_field, instance.date_time_field) + self.assertEquals(results[0].int_field, instance.int_field) + self.assertEquals(results[0].str_field, instance.str_field) + self.assertEquals(results[0].mat_int, abs(instance.int_field)) + self.assertEquals(results[0].mat_str, instance.str_field.lower()) + self.assertEquals(results[0].mat_date, instance.date_time_field.date()) + + def test_assignment_error(self): + # I can't prevent assigning at all, in case db.select statements with model provided sets model fields. + instance = ModelWithMaterializedFields() + for value in ('x', [date.today()], ['aaa'], [None]): + with self.assertRaises(ValueError): + instance.mat_date = value + + def test_wrong_field(self): + with self.assertRaises(AssertionError): + StringField(materialized=123) + + def test_duplicate_default(self): + with self.assertRaises(AssertionError): + StringField(materialized='str_field', default='with default') + + with self.assertRaises(AssertionError): + StringField(materialized='str_field', alias='str_field') + + +class ModelWithMaterializedFields(Model): + int_field = Int32Field() + date_time_field = DateTimeField() + str_field = StringField() + + mat_str = StringField(materialized='lower(str_field)') + mat_int = Int32Field(materialized='abs(int_field)') + mat_date = DateField(materialized='toDate(date_time_field)') + + engine = MergeTree('mat_date', ('mat_date',)) + + diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 39bcb55..4541a6b 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase): self.assertTrue(self.tableExists(EnumModel1)) self.assertEquals(self.getTableFields(EnumModel2), [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) + self.database.migrate('tests.sample_migrations', 8) + self.assertTrue(self.tableExists(MaterializedModel)) + self.assertEquals(self.getTableFields(MaterializedModel), + [('date_time', "DateTime"), ('date', 'Date')]) + self.database.migrate('tests.sample_migrations', 9) + self.assertTrue(self.tableExists(AliasModel)) + self.assertEquals(self.getTableFields(AliasModel), + [('date', 'Date'), ('date_alias', "Date")]) + # Several different models with the same table name, to simulate a table that changes over time @@ -127,3 +136,25 @@ class EnumModel2(Model): @classmethod def table_name(cls): return 'enum_mig' + + +class MaterializedModel(Model): + date_time = DateTimeField() + date = DateField(materialized='toDate(date_time)') + + engine = MergeTree('date', ('date',)) + + @classmethod + def table_name(cls): + return 'materalized_date' + + +class AliasModel(Model): + date = DateField() + date_alias = DateField(alias='date') + + engine = MergeTree('date', ('date',)) + + @classmethod + def table_name(cls): + return 'alias_date' \ No newline at end of file