Merge branch 'carrotquest-develop' into develop

This commit is contained in:
Itai Shirav 2017-02-07 08:21:38 +02:00
commit 4b086d5b21
9 changed files with 269 additions and 30 deletions

View File

@ -31,6 +31,8 @@ Models are defined in a way reminiscent of Django's ORM::
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
It is always possible to pass alias or materialized parameters. See below for usage examples.
Only one of default, alias and materialized parameters can be provided
See below for the supported field types and table engines. See below for the supported field types and table engines.
@ -189,26 +191,26 @@ Field Types
Currently the following field types are supported: Currently the following field types are supported:
============= ======== ================= =================================================== =================== ======== ================= ===================================================
Class DB Type Pythonic Type Comments Class DB Type Pythonic Type Comments
============= ======== ================= =================================================== =================== ======== ================= ===================================================
StringField String unicode Encoded as UTF-8 when written to ClickHouse StringField String unicode Encoded as UTF-8 when written to ClickHouse
DateField Date datetime.date Range 1970-01-01 to 2038-01-19 DateField Date datetime.date Range 1970-01-01 to 2038-01-19
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
Int8Field Int8 int Range -128 to 127 Int8Field Int8 int Range -128 to 127
Int16Field Int16 int Range -32768 to 32767 Int16Field Int16 int Range -32768 to 32767
Int32Field Int32 int Range -2147483648 to 2147483647 Int32Field Int32 int Range -2147483648 to 2147483647
Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807
UInt8Field UInt8 int Range 0 to 255 UInt8Field UInt8 int Range 0 to 255
UInt16Field UInt16 int Range 0 to 65535 UInt16Field UInt16 int Range 0 to 65535
UInt32Field UInt32 int Range 0 to 4294967295 UInt32Field UInt32 int Range 0 to 4294967295
UInt64Field UInt64 int/long Range 0 to 18446744073709551615 UInt64Field UInt64 int/long Range 0 to 18446744073709551615
Float32Field Float32 float Float32Field Float32 float
Float64Field Float64 float Float64Field Float64 float
Enum8Field Enum8 Enum See below Enum8Field Enum8 Enum See below
Enum16Field Enum16 Enum See below Enum16Field Enum16 Enum See below
ArrayField Array list See below ArrayField Array list See below
============= ======== ================= =================================================== =================== ========== ================= ===================================================
Working with enum fields Working with enum fields
************************ ************************
@ -249,6 +251,40 @@ You can create array fields containing any data type, for example::
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
Working with materialized and alias fields
******************************************
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`.
Both field types can't be inserted into database directly.
These field values are ignored, when using database.insert() method.
These fields are set to default values if you use database.select('SELECT * FROM mymodel', model_class=MyModel),
because ClickHouse doesn't return them.
Nevertheless, attribute values (as well as defaults) can be set for model object from python.
Usage::
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date, username will contain default value
db.select('SELECT * FROM $db.event', model_class=Event)
Table Engines Table Engines
------------- -------------

View File

@ -50,11 +50,11 @@ class Database(object):
model_class = first_instance.__class__ model_class = first_instance.__class__
def gen(): def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
yield (first_instance.to_tsv() + '\n').encode('utf-8') yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
# Collect lines in batches of batch_size # Collect lines in batches of batch_size
batch = [] batch = []
for instance in i: for instance in i:
batch.append(instance.to_tsv()) batch.append(instance.to_tsv(insertable_only=True))
if len(batch) >= batch_size: if len(batch) >= batch_size:
# Return the current batch of lines # Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8') yield ('\n'.join(batch) + '\n').encode('utf-8')

View File

@ -12,10 +12,17 @@ class Field(object):
class_default = 0 class_default = 0
db_type = None db_type = None
def __init__(self, default=None): def __init__(self, default=None, alias=None, materialized=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given"
self.creation_counter = Field.creation_counter self.creation_counter = Field.creation_counter
Field.creation_counter += 1 Field.creation_counter += 1
self.default = self.class_default if default is None else default self.default = self.class_default if default is None else default
self.alias = alias
self.materialized = materialized
def to_python(self, value): def to_python(self, value):
''' '''
@ -48,13 +55,22 @@ class Field(object):
def get_sql(self, with_default=True): def get_sql(self, with_default=True):
''' '''
Returns an SQL expression describing the field (e.g. for CREATE TABLE). Returns an SQL expression describing the field (e.g. for CREATE TABLE).
:param with_default: If True, adds default value to sql.
It doesn't affect fields with alias and materialized values.
''' '''
if with_default: if self.alias:
return '%s ALIAS %s' % (self.db_type, self.alias)
elif self.materialized:
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
elif with_default:
default = self.to_db_string(self.default) default = self.to_db_string(self.default)
return '%s DEFAULT %s' % (self.db_type, default) return '%s DEFAULT %s' % (self.db_type, default)
else: else:
return self.db_type return self.db_type
def is_insertable(self):
return self.alias is None and self.materialized is None
class StringField(Field): class StringField(Field):
@ -207,11 +223,11 @@ class Float64Field(BaseFloatField):
class BaseEnumField(Field): class BaseEnumField(Field):
def __init__(self, enum_cls, default=None): def __init__(self, enum_cls, default=None, alias=None, materialized=None):
self.enum_cls = enum_cls self.enum_cls = enum_cls
if default is None: if default is None:
default = list(enum_cls)[0] default = list(enum_cls)[0]
super(BaseEnumField, self).__init__(default) super(BaseEnumField, self).__init__(default, alias, materialized)
def to_python(self, value): def to_python(self, value):
if isinstance(value, self.enum_cls): if isinstance(value, self.enum_cls):
@ -271,9 +287,9 @@ class ArrayField(Field):
class_default = [] class_default = []
def __init__(self, inner_field, default=None): def __init__(self, inner_field, default=None, alias=None, materialized=None):
self.inner_field = inner_field self.inner_field = inner_field
super(ArrayField, self).__init__(default) super(ArrayField, self).__init__(default, alias, materialized)
def to_python(self, value): def to_python(self, value):
if isinstance(value, text_type): if isinstance(value, text_type):
@ -295,3 +311,4 @@ class ArrayField(Field):
def get_sql(self, with_default=True): def get_sql(self, with_default=True):
from .utils import escape from .utils import escape
return 'Array(%s)' % self.inner_field.get_sql(with_default=False) return 'Array(%s)' % self.inner_field.get_sql(with_default=False)

View File

@ -150,9 +150,14 @@ class Model(with_metaclass(ModelBase)):
kwargs[name] = next(values) kwargs[name] = next(values)
return cls(**kwargs) return cls(**kwargs)
def to_tsv(self): def to_tsv(self, insertable_only=False):
''' '''
Returns the instance's column values as a tab-separated line. A newline is not included. Returns the instance's column values as a tab-separated line. A newline is not included.
:param bool insertable_only: If True, returns only fields, that can be inserted into database
''' '''
data = self.__dict__ data = self.__dict__
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields)
fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(MaterializedModel)
]

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(AliasModel)
]

View File

@ -0,0 +1,69 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithAliasFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithAliasFields(
date_field='2016-08-30',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
for model_cls in (ModelWithAliasFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_field, instance.date_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].alias_int, instance.int_field)
self.assertEquals(results[0].alias_str, instance.str_field)
self.assertEquals(results[0].alias_date, instance.date_field)
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithAliasFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.alias_date = value
def test_wrong_field(self):
with self.assertRaises(AssertionError):
StringField(alias=123)
def test_duplicate_default(self):
with self.assertRaises(AssertionError):
StringField(alias='str_field', default='with default')
with self.assertRaises(AssertionError):
StringField(alias='str_field', materialized='str_field')
class ModelWithAliasFields(Model):
int_field = Int32Field()
date_field = DateField()
str_field = StringField()
alias_str = StringField(alias='str_field')
alias_int = Int32Field(alias='int_field')
alias_date = DateField(alias='date_field')
engine = MergeTree('date_field', ('date_field',))

View File

@ -0,0 +1,69 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithMaterializedFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithMaterializedFields(
date_time_field='2016-08-30 11:00:00',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
for model_cls in (ModelWithMaterializedFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_time_field, instance.date_time_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].mat_int, abs(instance.int_field))
self.assertEquals(results[0].mat_str, instance.str_field.lower())
self.assertEquals(results[0].mat_date, instance.date_time_field.date())
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithMaterializedFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.mat_date = value
def test_wrong_field(self):
with self.assertRaises(AssertionError):
StringField(materialized=123)
def test_duplicate_default(self):
with self.assertRaises(AssertionError):
StringField(materialized='str_field', default='with default')
with self.assertRaises(AssertionError):
StringField(materialized='str_field', alias='str_field')
class ModelWithMaterializedFields(Model):
int_field = Int32Field()
date_time_field = DateTimeField()
str_field = StringField()
mat_str = StringField(materialized='lower(str_field)')
mat_int = Int32Field(materialized='abs(int_field)')
mat_date = DateField(materialized='toDate(date_time_field)')
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(EnumModel1)) self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel2), self.assertEquals(self.getTableFields(EnumModel2),
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
self.database.migrate('tests.sample_migrations', 8)
self.assertTrue(self.tableExists(MaterializedModel))
self.assertEquals(self.getTableFields(MaterializedModel),
[('date_time', "DateTime"), ('date', 'Date')])
self.database.migrate('tests.sample_migrations', 9)
self.assertTrue(self.tableExists(AliasModel))
self.assertEquals(self.getTableFields(AliasModel),
[('date', 'Date'), ('date_alias', "Date")])
# Several different models with the same table name, to simulate a table that changes over time # Several different models with the same table name, to simulate a table that changes over time
@ -127,3 +136,25 @@ class EnumModel2(Model):
@classmethod @classmethod
def table_name(cls): def table_name(cls):
return 'enum_mig' return 'enum_mig'
class MaterializedModel(Model):
date_time = DateTimeField()
date = DateField(materialized='toDate(date_time)')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'materalized_date'
class AliasModel(Model):
date = DateField()
date_alias = DateField(alias='date')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'alias_date'