mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-28 19:53:44 +03:00
Merge branch 'carrotquest-develop' into develop
This commit is contained in:
commit
4b086d5b21
76
README.rst
76
README.rst
|
@ -31,6 +31,8 @@ Models are defined in a way reminiscent of Django's ORM::
|
|||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
|
||||
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
|
||||
It is always possible to pass alias or materialized parameters. See below for usage examples.
|
||||
Only one of default, alias and materialized parameters can be provided
|
||||
|
||||
See below for the supported field types and table engines.
|
||||
|
||||
|
@ -189,26 +191,26 @@ Field Types
|
|||
|
||||
Currently the following field types are supported:
|
||||
|
||||
============= ======== ================= ===================================================
|
||||
Class DB Type Pythonic Type Comments
|
||||
============= ======== ================= ===================================================
|
||||
StringField String unicode Encoded as UTF-8 when written to ClickHouse
|
||||
DateField Date datetime.date Range 1970-01-01 to 2038-01-19
|
||||
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
|
||||
Int8Field Int8 int Range -128 to 127
|
||||
Int16Field Int16 int Range -32768 to 32767
|
||||
Int32Field Int32 int Range -2147483648 to 2147483647
|
||||
Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807
|
||||
UInt8Field UInt8 int Range 0 to 255
|
||||
UInt16Field UInt16 int Range 0 to 65535
|
||||
UInt32Field UInt32 int Range 0 to 4294967295
|
||||
UInt64Field UInt64 int/long Range 0 to 18446744073709551615
|
||||
Float32Field Float32 float
|
||||
Float64Field Float64 float
|
||||
Enum8Field Enum8 Enum See below
|
||||
Enum16Field Enum16 Enum See below
|
||||
ArrayField Array list See below
|
||||
============= ======== ================= ===================================================
|
||||
=================== ======== ================= ===================================================
|
||||
Class DB Type Pythonic Type Comments
|
||||
=================== ======== ================= ===================================================
|
||||
StringField String unicode Encoded as UTF-8 when written to ClickHouse
|
||||
DateField Date datetime.date Range 1970-01-01 to 2038-01-19
|
||||
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
|
||||
Int8Field Int8 int Range -128 to 127
|
||||
Int16Field Int16 int Range -32768 to 32767
|
||||
Int32Field Int32 int Range -2147483648 to 2147483647
|
||||
Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807
|
||||
UInt8Field UInt8 int Range 0 to 255
|
||||
UInt16Field UInt16 int Range 0 to 65535
|
||||
UInt32Field UInt32 int Range 0 to 4294967295
|
||||
UInt64Field UInt64 int/long Range 0 to 18446744073709551615
|
||||
Float32Field Float32 float
|
||||
Float64Field Float64 float
|
||||
Enum8Field Enum8 Enum See below
|
||||
Enum16Field Enum16 Enum See below
|
||||
ArrayField Array list See below
|
||||
=================== ========== ================= ===================================================
|
||||
|
||||
Working with enum fields
|
||||
************************
|
||||
|
@ -249,6 +251,40 @@ You can create array fields containing any data type, for example::
|
|||
|
||||
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
||||
|
||||
|
||||
Working with materialized and alias fields
|
||||
******************************************
|
||||
|
||||
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
|
||||
|
||||
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`.
|
||||
|
||||
Both field types can't be inserted into database directly.
|
||||
These field values are ignored, when using database.insert() method.
|
||||
These fields are set to default values if you use database.select('SELECT * FROM mymodel', model_class=MyModel),
|
||||
because ClickHouse doesn't return them.
|
||||
Nevertheless, attribute values (as well as defaults) can be set for model object from python.
|
||||
|
||||
Usage::
|
||||
|
||||
class Event(models.Model):
|
||||
|
||||
created = fields.DateTimeField()
|
||||
created_date = fields.DateTimeField(materialized='toDate(created)')
|
||||
name = fields.StringField()
|
||||
username = fields.StringField(alias='name')
|
||||
|
||||
engine = engines.MergeTree('created_date', ('created_date', 'created'))
|
||||
|
||||
obj = Event(created=datetime.now(), name='MyEvent')
|
||||
db = Database('my_test_db')
|
||||
db.insert([obj])
|
||||
# All values will be retrieved from database
|
||||
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
|
||||
# created_date, username will contain default value
|
||||
db.select('SELECT * FROM $db.event', model_class=Event)
|
||||
|
||||
|
||||
Table Engines
|
||||
-------------
|
||||
|
||||
|
|
|
@ -50,11 +50,11 @@ class Database(object):
|
|||
model_class = first_instance.__class__
|
||||
def gen():
|
||||
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
||||
yield (first_instance.to_tsv() + '\n').encode('utf-8')
|
||||
yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
|
||||
# Collect lines in batches of batch_size
|
||||
batch = []
|
||||
for instance in i:
|
||||
batch.append(instance.to_tsv())
|
||||
batch.append(instance.to_tsv(insertable_only=True))
|
||||
if len(batch) >= batch_size:
|
||||
# Return the current batch of lines
|
||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
||||
|
|
|
@ -12,10 +12,17 @@ class Field(object):
|
|||
class_default = 0
|
||||
db_type = None
|
||||
|
||||
def __init__(self, default=None):
|
||||
def __init__(self, default=None, alias=None, materialized=None):
|
||||
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
|
||||
"Only one of default, alias and materialized parameters can be given"
|
||||
assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given"
|
||||
assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given"
|
||||
|
||||
self.creation_counter = Field.creation_counter
|
||||
Field.creation_counter += 1
|
||||
self.default = self.class_default if default is None else default
|
||||
self.alias = alias
|
||||
self.materialized = materialized
|
||||
|
||||
def to_python(self, value):
|
||||
'''
|
||||
|
@ -48,13 +55,22 @@ class Field(object):
|
|||
def get_sql(self, with_default=True):
|
||||
'''
|
||||
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
|
||||
:param with_default: If True, adds default value to sql.
|
||||
It doesn't affect fields with alias and materialized values.
|
||||
'''
|
||||
if with_default:
|
||||
if self.alias:
|
||||
return '%s ALIAS %s' % (self.db_type, self.alias)
|
||||
elif self.materialized:
|
||||
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
|
||||
elif with_default:
|
||||
default = self.to_db_string(self.default)
|
||||
return '%s DEFAULT %s' % (self.db_type, default)
|
||||
else:
|
||||
return self.db_type
|
||||
|
||||
def is_insertable(self):
|
||||
return self.alias is None and self.materialized is None
|
||||
|
||||
|
||||
class StringField(Field):
|
||||
|
||||
|
@ -207,11 +223,11 @@ class Float64Field(BaseFloatField):
|
|||
|
||||
class BaseEnumField(Field):
|
||||
|
||||
def __init__(self, enum_cls, default=None):
|
||||
def __init__(self, enum_cls, default=None, alias=None, materialized=None):
|
||||
self.enum_cls = enum_cls
|
||||
if default is None:
|
||||
default = list(enum_cls)[0]
|
||||
super(BaseEnumField, self).__init__(default)
|
||||
super(BaseEnumField, self).__init__(default, alias, materialized)
|
||||
|
||||
def to_python(self, value):
|
||||
if isinstance(value, self.enum_cls):
|
||||
|
@ -271,9 +287,9 @@ class ArrayField(Field):
|
|||
|
||||
class_default = []
|
||||
|
||||
def __init__(self, inner_field, default=None):
|
||||
def __init__(self, inner_field, default=None, alias=None, materialized=None):
|
||||
self.inner_field = inner_field
|
||||
super(ArrayField, self).__init__(default)
|
||||
super(ArrayField, self).__init__(default, alias, materialized)
|
||||
|
||||
def to_python(self, value):
|
||||
if isinstance(value, text_type):
|
||||
|
@ -295,3 +311,4 @@ class ArrayField(Field):
|
|||
def get_sql(self, with_default=True):
|
||||
from .utils import escape
|
||||
return 'Array(%s)' % self.inner_field.get_sql(with_default=False)
|
||||
|
||||
|
|
|
@ -150,9 +150,14 @@ class Model(with_metaclass(ModelBase)):
|
|||
kwargs[name] = next(values)
|
||||
return cls(**kwargs)
|
||||
|
||||
def to_tsv(self):
|
||||
def to_tsv(self, insertable_only=False):
|
||||
'''
|
||||
Returns the instance's column values as a tab-separated line. A newline is not included.
|
||||
:param bool insertable_only: If True, returns only fields, that can be inserted into database
|
||||
'''
|
||||
data = self.__dict__
|
||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields)
|
||||
|
||||
fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields
|
||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
|
||||
|
||||
|
||||
|
|
6
tests/sample_migrations/0008.py
Normal file
6
tests/sample_migrations/0008.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.CreateTable(MaterializedModel)
|
||||
]
|
6
tests/sample_migrations/0009.py
Normal file
6
tests/sample_migrations/0009.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.CreateTable(AliasModel)
|
||||
]
|
69
tests/test_alias_fields.py
Normal file
69
tests/test_alias_fields.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import unittest
|
||||
from datetime import date
|
||||
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.models import Model
|
||||
from infi.clickhouse_orm.fields import *
|
||||
from infi.clickhouse_orm.engines import *
|
||||
|
||||
|
||||
class MaterializedFieldsTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.database = Database('test-db')
|
||||
self.database.create_table(ModelWithAliasFields)
|
||||
|
||||
def tearDown(self):
|
||||
self.database.drop_database()
|
||||
|
||||
def test_insert_and_select(self):
|
||||
instance = ModelWithAliasFields(
|
||||
date_field='2016-08-30',
|
||||
int_field=-10,
|
||||
str_field='TEST'
|
||||
)
|
||||
self.database.insert([instance])
|
||||
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
|
||||
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
|
||||
for model_cls in (ModelWithAliasFields, None):
|
||||
results = list(self.database.select(query, model_cls))
|
||||
self.assertEquals(len(results), 1)
|
||||
self.assertEquals(results[0].date_field, instance.date_field)
|
||||
self.assertEquals(results[0].int_field, instance.int_field)
|
||||
self.assertEquals(results[0].str_field, instance.str_field)
|
||||
self.assertEquals(results[0].alias_int, instance.int_field)
|
||||
self.assertEquals(results[0].alias_str, instance.str_field)
|
||||
self.assertEquals(results[0].alias_date, instance.date_field)
|
||||
|
||||
def test_assignment_error(self):
|
||||
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||
instance = ModelWithAliasFields()
|
||||
for value in ('x', [date.today()], ['aaa'], [None]):
|
||||
with self.assertRaises(ValueError):
|
||||
instance.alias_date = value
|
||||
|
||||
def test_wrong_field(self):
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(alias=123)
|
||||
|
||||
def test_duplicate_default(self):
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(alias='str_field', default='with default')
|
||||
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(alias='str_field', materialized='str_field')
|
||||
|
||||
|
||||
class ModelWithAliasFields(Model):
|
||||
int_field = Int32Field()
|
||||
date_field = DateField()
|
||||
str_field = StringField()
|
||||
|
||||
alias_str = StringField(alias='str_field')
|
||||
alias_int = Int32Field(alias='int_field')
|
||||
alias_date = DateField(alias='date_field')
|
||||
|
||||
engine = MergeTree('date_field', ('date_field',))
|
||||
|
||||
|
69
tests/test_materialized_fields.py
Normal file
69
tests/test_materialized_fields.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import unittest
|
||||
from datetime import date
|
||||
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.models import Model
|
||||
from infi.clickhouse_orm.fields import *
|
||||
from infi.clickhouse_orm.engines import *
|
||||
|
||||
|
||||
class MaterializedFieldsTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.database = Database('test-db')
|
||||
self.database.create_table(ModelWithMaterializedFields)
|
||||
|
||||
def tearDown(self):
|
||||
self.database.drop_database()
|
||||
|
||||
def test_insert_and_select(self):
|
||||
instance = ModelWithMaterializedFields(
|
||||
date_time_field='2016-08-30 11:00:00',
|
||||
int_field=-10,
|
||||
str_field='TEST'
|
||||
)
|
||||
self.database.insert([instance])
|
||||
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
|
||||
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
|
||||
for model_cls in (ModelWithMaterializedFields, None):
|
||||
results = list(self.database.select(query, model_cls))
|
||||
self.assertEquals(len(results), 1)
|
||||
self.assertEquals(results[0].date_time_field, instance.date_time_field)
|
||||
self.assertEquals(results[0].int_field, instance.int_field)
|
||||
self.assertEquals(results[0].str_field, instance.str_field)
|
||||
self.assertEquals(results[0].mat_int, abs(instance.int_field))
|
||||
self.assertEquals(results[0].mat_str, instance.str_field.lower())
|
||||
self.assertEquals(results[0].mat_date, instance.date_time_field.date())
|
||||
|
||||
def test_assignment_error(self):
|
||||
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||
instance = ModelWithMaterializedFields()
|
||||
for value in ('x', [date.today()], ['aaa'], [None]):
|
||||
with self.assertRaises(ValueError):
|
||||
instance.mat_date = value
|
||||
|
||||
def test_wrong_field(self):
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(materialized=123)
|
||||
|
||||
def test_duplicate_default(self):
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(materialized='str_field', default='with default')
|
||||
|
||||
with self.assertRaises(AssertionError):
|
||||
StringField(materialized='str_field', alias='str_field')
|
||||
|
||||
|
||||
class ModelWithMaterializedFields(Model):
|
||||
int_field = Int32Field()
|
||||
date_time_field = DateTimeField()
|
||||
str_field = StringField()
|
||||
|
||||
mat_str = StringField(materialized='lower(str_field)')
|
||||
mat_int = Int32Field(materialized='abs(int_field)')
|
||||
mat_date = DateField(materialized='toDate(date_time_field)')
|
||||
|
||||
engine = MergeTree('mat_date', ('mat_date',))
|
||||
|
||||
|
|
@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase):
|
|||
self.assertTrue(self.tableExists(EnumModel1))
|
||||
self.assertEquals(self.getTableFields(EnumModel2),
|
||||
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
||||
self.database.migrate('tests.sample_migrations', 8)
|
||||
self.assertTrue(self.tableExists(MaterializedModel))
|
||||
self.assertEquals(self.getTableFields(MaterializedModel),
|
||||
[('date_time', "DateTime"), ('date', 'Date')])
|
||||
self.database.migrate('tests.sample_migrations', 9)
|
||||
self.assertTrue(self.tableExists(AliasModel))
|
||||
self.assertEquals(self.getTableFields(AliasModel),
|
||||
[('date', 'Date'), ('date_alias', "Date")])
|
||||
|
||||
|
||||
# Several different models with the same table name, to simulate a table that changes over time
|
||||
|
||||
|
@ -127,3 +136,25 @@ class EnumModel2(Model):
|
|||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'enum_mig'
|
||||
|
||||
|
||||
class MaterializedModel(Model):
|
||||
date_time = DateTimeField()
|
||||
date = DateField(materialized='toDate(date_time)')
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'materalized_date'
|
||||
|
||||
|
||||
class AliasModel(Model):
|
||||
date = DateField()
|
||||
date_alias = DateField(alias='date')
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'alias_date'
|
Loading…
Reference in New Issue
Block a user