Merge pull request #2 from Infinidat/develop

Merge of main develop into forked develop
This commit is contained in:
emakarov 2017-02-07 21:15:20 +03:00 committed by GitHub
commit 8d23c18714
12 changed files with 429 additions and 54 deletions

27
CHANGELOG.rst Normal file
View File

@ -0,0 +1,27 @@
Change Log
==========
[Unreleased]
------------
- Always keep datetime fields in UTC internally, and convert server timezone to UTC when parsing query results
- Support for ALIAS and MATERIALIZED fields (M1ha)
- Pagination: passing -1 as the page number now returns the last page
- Accept datetime values for date fields (Zloool)
- Support readonly mode in Database class (tswr)
v0.7.1
------
- Accept '0000-00-00 00:00:00' as a datetime value (tsionyx)
- Bug fix: parse_array fails on int arrays
- Improve performance when inserting many rows
v0.7.0
------
- Support array fields
- Support enum fields
v0.6.3
------
- Python 3 support

View File

@ -31,6 +31,8 @@ Models are defined in a way reminiscent of Django's ORM::
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
Alternatively it is possible to pass alias or materialized parameters (see below for usage examples).
Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided.
See below for the supported field types and table engines. See below for the supported field types and table engines.
@ -90,6 +92,11 @@ Using the ``Database`` instance you can create a table for your model, and inser
The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class. The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class.
Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot
modify data or schemas::
db = Database('my_test_db', readonly=True)
Reading from the Database Reading from the Database
------------------------- -------------------------
@ -152,7 +159,7 @@ Pagination
It is possible to paginate through model instances:: It is possible to paginate through model instances::
>>> order_by = 'first_name, last_name' >>> order_by = 'first_name, last_name'
>>> page = db.paginate(Person, order_by, page_num=1, page_size=100) >>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
>>> print page.number_of_objects >>> print page.number_of_objects
2507 2507
>>> print page.pages_total >>> print page.pages_total
@ -189,26 +196,44 @@ Field Types
Currently the following field types are supported: Currently the following field types are supported:
============= ======== ================= =================================================== =================== ======== ================= ===================================================
Class DB Type Pythonic Type Comments Class DB Type Pythonic Type Comments
============= ======== ================= =================================================== =================== ======== ================= ===================================================
StringField String unicode Encoded as UTF-8 when written to ClickHouse StringField String unicode Encoded as UTF-8 when written to ClickHouse
DateField Date datetime.date Range 1970-01-01 to 2038-01-19 DateField Date datetime.date Range 1970-01-01 to 2038-01-19
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
Int8Field Int8 int Range -128 to 127 Int8Field Int8 int Range -128 to 127
Int16Field Int16 int Range -32768 to 32767 Int16Field Int16 int Range -32768 to 32767
Int32Field Int32 int Range -2147483648 to 2147483647 Int32Field Int32 int Range -2147483648 to 2147483647
Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807
UInt8Field UInt8 int Range 0 to 255 UInt8Field UInt8 int Range 0 to 255
UInt16Field UInt16 int Range 0 to 65535 UInt16Field UInt16 int Range 0 to 65535
UInt32Field UInt32 int Range 0 to 4294967295 UInt32Field UInt32 int Range 0 to 4294967295
UInt64Field UInt64 int/long Range 0 to 18446744073709551615 UInt64Field UInt64 int/long Range 0 to 18446744073709551615
Float32Field Float32 float Float32Field Float32 float
Float64Field Float64 float Float64Field Float64 float
Enum8Field Enum8 Enum See below Enum8Field Enum8 Enum See below
Enum16Field Enum16 Enum See below Enum16Field Enum16 Enum See below
ArrayField Array list See below ArrayField Array list See below
============= ======== ================= =================================================== =================== ======== ================= ===================================================
DateTimeField and Time Zones
****************************
A ``DateTimeField`` can be assigned values from one of the following types:
- datetime
- date
- integer - number of seconds since the Unix epoch
- string in ``YYYY-MM-DD HH:MM:SS`` format
The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned
value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the
timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new
enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend
settings the server timezone to UTC in order to prevent confusion.
Working with enum fields Working with enum fields
************************ ************************
@ -249,6 +274,37 @@ You can create array fields containing any data type, for example::
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
Working with materialized and alias fields
******************************************
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS fields.
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`_.
Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method.
ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field
names explicitly in the query.
Usage::
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
Table Engines Table Engines
------------- -------------
@ -291,4 +347,8 @@ After cloning the project, run the following commands::
To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run:: To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run::
bin/nosetests bin/nosetests
To see test coverage information run::
bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm

View File

@ -4,9 +4,12 @@ from .models import ModelBase
from .utils import escape, parse_tsv, import_submodules from .utils import escape, parse_tsv, import_submodules
from math import ceil from math import ceil
import datetime import datetime
import logging
from string import Template from string import Template
from six import PY3, string_types from six import PY3, string_types
import pytz
import logging
logger = logging.getLogger('clickhouse_orm')
Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size') Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size')
@ -26,6 +29,7 @@ class Database(object):
self.readonly = readonly self.readonly = readonly
if not self.readonly: if not self.readonly:
self.create_database() self.create_database()
self.server_timezone = self._get_server_timezone()
def create_database(self): def create_database(self):
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name) self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
@ -50,11 +54,11 @@ class Database(object):
model_class = first_instance.__class__ model_class = first_instance.__class__
def gen(): def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
yield (first_instance.to_tsv() + '\n').encode('utf-8') yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
# Collect lines in batches of batch_size # Collect lines in batches of batch_size
batch = [] batch = []
for instance in i: for instance in i:
batch.append(instance.to_tsv()) batch.append(instance.to_tsv(insertable_only=True))
if len(batch) >= batch_size: if len(batch) >= batch_size:
# Return the current batch of lines # Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8') yield ('\n'.join(batch) + '\n').encode('utf-8')
@ -82,7 +86,7 @@ class Database(object):
field_types = parse_tsv(next(lines)) field_types = parse_tsv(next(lines))
model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types)) model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types))
for line in lines: for line in lines:
yield model_class.from_tsv(line, field_names) yield model_class.from_tsv(line, field_names, self.server_timezone)
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None): def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
count = self.count(model_class, conditions) count = self.count(model_class, conditions)
@ -142,6 +146,8 @@ class Database(object):
params['user'] = self.username params['user'] = self.username
if self.password: if self.password:
params['password'] = self.password params['password'] = self.password
if self.readonly:
params['readonly'] = '1'
return params return params
def _substitute(self, query, model_class=None): def _substitute(self, query, model_class=None):
@ -154,3 +160,11 @@ class Database(object):
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name()) mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
query = Template(query).substitute(mapping) query = Template(query).substitute(mapping)
return query return query
def _get_server_timezone(self):
try:
r = self._send('SELECT timezone()')
return pytz.timezone(r.text.strip())
except DatabaseException:
logger.exception('Cannot determine server timezone, assuming UTC')
return pytz.utc

View File

@ -2,6 +2,7 @@ from six import string_types, text_type, binary_type
import datetime import datetime
import pytz import pytz
import time import time
from calendar import timegm
from .utils import escape, parse_array from .utils import escape, parse_array
@ -12,15 +13,23 @@ class Field(object):
class_default = 0 class_default = 0
db_type = None db_type = None
def __init__(self, default=None): def __init__(self, default=None, alias=None, materialized=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given"
self.creation_counter = Field.creation_counter self.creation_counter = Field.creation_counter
Field.creation_counter += 1 Field.creation_counter += 1
self.default = self.class_default if default is None else default self.default = self.class_default if default is None else default
self.alias = alias
self.materialized = materialized
def to_python(self, value): def to_python(self, value, timezone_in_use):
''' '''
Converts the input value into the expected Python data type, raising ValueError if the Converts the input value into the expected Python data type, raising ValueError if the
data can't be converted. Returns the converted value. Subclasses should override this. data can't be converted. Returns the converted value. Subclasses should override this.
The timezone_in_use parameter should be consulted when parsing datetime fields.
''' '''
return value return value
@ -48,20 +57,29 @@ class Field(object):
def get_sql(self, with_default=True): def get_sql(self, with_default=True):
''' '''
Returns an SQL expression describing the field (e.g. for CREATE TABLE). Returns an SQL expression describing the field (e.g. for CREATE TABLE).
:param with_default: If True, adds default value to sql.
It doesn't affect fields with alias and materialized values.
''' '''
if with_default: if self.alias:
return '%s ALIAS %s' % (self.db_type, self.alias)
elif self.materialized:
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
elif with_default:
default = self.to_db_string(self.default) default = self.to_db_string(self.default)
return '%s DEFAULT %s' % (self.db_type, default) return '%s DEFAULT %s' % (self.db_type, default)
else: else:
return self.db_type return self.db_type
def is_insertable(self):
return self.alias is None and self.materialized is None
class StringField(Field): class StringField(Field):
class_default = '' class_default = ''
db_type = 'String' db_type = 'String'
def to_python(self, value): def to_python(self, value, timezone_in_use):
if isinstance(value, text_type): if isinstance(value, text_type):
return value return value
if isinstance(value, binary_type): if isinstance(value, binary_type):
@ -76,11 +94,11 @@ class DateField(Field):
class_default = min_value class_default = min_value
db_type = 'Date' db_type = 'Date'
def to_python(self, value): def to_python(self, value, timezone_in_use):
if isinstance(value, datetime.date):
return value
if isinstance(value, datetime.datetime): if isinstance(value, datetime.datetime):
return value.date() return value.date()
if isinstance(value, datetime.date):
return value
if isinstance(value, int): if isinstance(value, int):
return DateField.class_default + datetime.timedelta(days=value) return DateField.class_default + datetime.timedelta(days=value)
if isinstance(value, string_types): if isinstance(value, string_types):
@ -101,26 +119,27 @@ class DateTimeField(Field):
class_default = datetime.datetime.fromtimestamp(0, pytz.utc) class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
db_type = 'DateTime' db_type = 'DateTime'
def to_python(self, value): def to_python(self, value, timezone_in_use):
if isinstance(value, datetime.datetime): if isinstance(value, datetime.datetime):
return value return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc)
if isinstance(value, datetime.date): if isinstance(value, datetime.date):
return datetime.datetime(value.year, value.month, value.day) return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
if isinstance(value, int): if isinstance(value, int):
return datetime.datetime.fromtimestamp(value, pytz.utc) return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
if isinstance(value, string_types): if isinstance(value, string_types):
if value == '0000-00-00 00:00:00': if value == '0000-00-00 00:00:00':
return self.class_default return self.class_default
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') dt = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
return timezone_in_use.localize(dt).astimezone(pytz.utc)
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True): def to_db_string(self, value, quote=True):
return escape(int(time.mktime(value.timetuple())), quote) return escape(timegm(value.utctimetuple()), quote)
class BaseIntField(Field): class BaseIntField(Field):
def to_python(self, value): def to_python(self, value, timezone_in_use):
try: try:
return int(value) return int(value)
except: except:
@ -188,7 +207,7 @@ class Int64Field(BaseIntField):
class BaseFloatField(Field): class BaseFloatField(Field):
def to_python(self, value): def to_python(self, value, timezone_in_use):
try: try:
return float(value) return float(value)
except: except:
@ -207,13 +226,13 @@ class Float64Field(BaseFloatField):
class BaseEnumField(Field): class BaseEnumField(Field):
def __init__(self, enum_cls, default=None): def __init__(self, enum_cls, default=None, alias=None, materialized=None):
self.enum_cls = enum_cls self.enum_cls = enum_cls
if default is None: if default is None:
default = list(enum_cls)[0] default = list(enum_cls)[0]
super(BaseEnumField, self).__init__(default) super(BaseEnumField, self).__init__(default, alias, materialized)
def to_python(self, value): def to_python(self, value, timezone_in_use):
if isinstance(value, self.enum_cls): if isinstance(value, self.enum_cls):
return value return value
try: try:
@ -271,18 +290,18 @@ class ArrayField(Field):
class_default = [] class_default = []
def __init__(self, inner_field, default=None): def __init__(self, inner_field, default=None, alias=None, materialized=None):
self.inner_field = inner_field self.inner_field = inner_field
super(ArrayField, self).__init__(default) super(ArrayField, self).__init__(default, alias, materialized)
def to_python(self, value): def to_python(self, value, timezone_in_use):
if isinstance(value, text_type): if isinstance(value, text_type):
value = parse_array(value) value = parse_array(value)
elif isinstance(value, binary_type): elif isinstance(value, binary_type):
value = parse_array(value.decode('UTF-8')) value = parse_array(value.decode('UTF-8'))
elif not isinstance(value, (list, tuple)): elif not isinstance(value, (list, tuple)):
raise ValueError('ArrayField expects list or tuple, not %s' % type(value)) raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
return [self.inner_field.to_python(v) for v in value] return [self.inner_field.to_python(v, timezone_in_use) for v in value]
def validate(self, value): def validate(self, value):
for v in value: for v in value:
@ -295,3 +314,4 @@ class ArrayField(Field):
def get_sql(self, with_default=True): def get_sql(self, with_default=True):
from .utils import escape from .utils import escape
return 'Array(%s)' % self.inner_field.get_sql(with_default=False) return 'Array(%s)' % self.inner_field.get_sql(with_default=False)

View File

@ -3,6 +3,7 @@ from .engines import *
from .fields import Field from .fields import Field
from six import with_metaclass from six import with_metaclass
import pytz
from logging import getLogger from logging import getLogger
logger = getLogger('clickhouse_orm') logger = getLogger('clickhouse_orm')
@ -96,7 +97,7 @@ class Model(with_metaclass(ModelBase)):
''' '''
field = self.get_field(name) field = self.get_field(name)
if field: if field:
value = field.to_python(value) value = field.to_python(value, pytz.utc)
field.validate(value) field.validate(value)
super(Model, self).__setattr__(name, value) super(Model, self).__setattr__(name, value)
@ -136,7 +137,7 @@ class Model(with_metaclass(ModelBase)):
return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name()) return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name())
@classmethod @classmethod
def from_tsv(cls, line, field_names=None): def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc):
''' '''
Create a model instance from a tab-separated line. The line may or may not include a newline. Create a model instance from a tab-separated line. The line may or may not include a newline.
The field_names list must match the fields defined in the model, but does not have to include all of them. The field_names list must match the fields defined in the model, but does not have to include all of them.
@ -147,12 +148,19 @@ class Model(with_metaclass(ModelBase)):
values = iter(parse_tsv(line)) values = iter(parse_tsv(line))
kwargs = {} kwargs = {}
for name in field_names: for name in field_names:
kwargs[name] = next(values) field = getattr(cls, name)
kwargs[name] = field.to_python(next(values), timezone_in_use)
return cls(**kwargs) return cls(**kwargs)
def to_tsv(self): def to_tsv(self, insertable_only=False):
''' '''
Returns the instance's column values as a tab-separated line. A newline is not included. Returns the instance's column values as a tab-separated line. A newline is not included.
:param bool insertable_only: If True, returns only fields, that can be inserted into database
''' '''
data = self.__dict__ data = self.__dict__
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields) fields = self._fields
if insertable_only:
fields = [f for f in fields if f[1].is_insertable()]
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(MaterializedModel)
]

View File

@ -0,0 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.CreateTable(AliasModel)
]

View File

@ -0,0 +1,69 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithAliasFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithAliasFields(
date_field='2016-08-30',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
for model_cls in (ModelWithAliasFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_field, instance.date_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].alias_int, instance.int_field)
self.assertEquals(results[0].alias_str, instance.str_field)
self.assertEquals(results[0].alias_date, instance.date_field)
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithAliasFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.alias_date = value
def test_wrong_field(self):
with self.assertRaises(AssertionError):
StringField(alias=123)
def test_duplicate_default(self):
with self.assertRaises(AssertionError):
StringField(alias='str_field', default='with default')
with self.assertRaises(AssertionError):
StringField(alias='str_field', materialized='str_field')
class ModelWithAliasFields(Model):
int_field = Int32Field()
date_field = DateField()
str_field = StringField()
alias_str = StringField(alias='str_field')
alias_int = Int32Field(alias='int_field')
alias_date = DateField(alias='date_field')
engine = MergeTree('date_field', ('date_field',))

View File

@ -2,7 +2,7 @@
import unittest import unittest
from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import * from infi.clickhouse_orm.engines import *
@ -117,6 +117,18 @@ class DatabaseTestCase(unittest.TestCase):
p = list(self.database.select("SELECT * from $table", Person))[0] p = list(self.database.select("SELECT * from $table", Person))[0]
self.assertEquals(p.first_name, s) self.assertEquals(p.first_name, s)
def test_readonly(self):
orig_database = self.database
self.database = Database(orig_database.db_name, readonly=True)
with self.assertRaises(DatabaseException):
self._insert_and_check(self._sample_data(), len(data))
self.assertEquals(self.database.count(Person), 0)
with self.assertRaises(DatabaseException):
self.database.drop_table(Person)
with self.assertRaises(DatabaseException):
self.database.drop_database()
self.database = orig_database
def _sample_data(self): def _sample_data(self):
for entry in data: for entry in data:
yield Person(**entry) yield Person(**entry)

View File

@ -0,0 +1,69 @@
import unittest
from datetime import date
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class MaterializedFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithMaterializedFields)
def tearDown(self):
self.database.drop_database()
def test_insert_and_select(self):
instance = ModelWithMaterializedFields(
date_time_field='2016-08-30 11:00:00',
int_field=-10,
str_field='TEST'
)
self.database.insert([instance])
# We can't select * from table, as it doesn't select materialized and alias fields
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
for model_cls in (ModelWithMaterializedFields, None):
results = list(self.database.select(query, model_cls))
self.assertEquals(len(results), 1)
self.assertEquals(results[0].date_time_field, instance.date_time_field)
self.assertEquals(results[0].int_field, instance.int_field)
self.assertEquals(results[0].str_field, instance.str_field)
self.assertEquals(results[0].mat_int, abs(instance.int_field))
self.assertEquals(results[0].mat_str, instance.str_field.lower())
self.assertEquals(results[0].mat_date, instance.date_time_field.date())
def test_assignment_error(self):
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
instance = ModelWithMaterializedFields()
for value in ('x', [date.today()], ['aaa'], [None]):
with self.assertRaises(ValueError):
instance.mat_date = value
def test_wrong_field(self):
with self.assertRaises(AssertionError):
StringField(materialized=123)
def test_duplicate_default(self):
with self.assertRaises(AssertionError):
StringField(materialized='str_field', default='with default')
with self.assertRaises(AssertionError):
StringField(materialized='str_field', alias='str_field')
class ModelWithMaterializedFields(Model):
int_field = Int32Field()
date_time_field = DateTimeField()
str_field = StringField()
mat_str = StringField(materialized='lower(str_field)')
mat_int = Int32Field(materialized='abs(int_field)')
mat_date = DateField(materialized='toDate(date_time_field)')
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(EnumModel1)) self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel2), self.assertEquals(self.getTableFields(EnumModel2),
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
self.database.migrate('tests.sample_migrations', 8)
self.assertTrue(self.tableExists(MaterializedModel))
self.assertEquals(self.getTableFields(MaterializedModel),
[('date_time', "DateTime"), ('date', 'Date')])
self.database.migrate('tests.sample_migrations', 9)
self.assertTrue(self.tableExists(AliasModel))
self.assertEquals(self.getTableFields(AliasModel),
[('date', 'Date'), ('date_alias', "Date")])
# Several different models with the same table name, to simulate a table that changes over time # Several different models with the same table name, to simulate a table that changes over time
@ -127,3 +136,25 @@ class EnumModel2(Model):
@classmethod @classmethod
def table_name(cls): def table_name(cls):
return 'enum_mig' return 'enum_mig'
class MaterializedModel(Model):
date_time = DateTimeField()
date = DateField(materialized='toDate(date_time)')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'materalized_date'
class AliasModel(Model):
date = DateField()
date_alias = DateField(alias='date')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'alias_date'

View File

@ -0,0 +1,53 @@
import unittest
from infi.clickhouse_orm.fields import *
from datetime import date, datetime
import pytz
class SimpleFieldsTest(unittest.TestCase):
def test_date_field(self):
f = DateField()
# Valid values
for value in (date(1970, 1, 1), datetime(1970, 1, 1), '1970-01-01', '0000-00-00', 0):
self.assertEquals(f.to_python(value, pytz.utc), date(1970, 1, 1))
# Invalid values
for value in ('nope', '21/7/1999', 0.5):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
# Range check
for value in (date(1900, 1, 1), date(2900, 1, 1)):
with self.assertRaises(ValueError):
f.validate(value)
def test_datetime_field(self):
f = DateTimeField()
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
# Valid values
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
'1970-01-01 00:00:00', '0000-00-00 00:00:00', 0):
dt = f.to_python(value, pytz.utc)
self.assertEquals(dt.tzinfo, pytz.utc)
self.assertEquals(dt, epoch)
# Verify that conversion to and from db string does not change value
dt2 = f.to_python(int(f.to_db_string(dt)), pytz.utc)
self.assertEquals(dt, dt2)
# Invalid values
for value in ('nope', '21/7/1999', 0.5):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
def test_uint8_field(self):
f = UInt8Field()
# Valid values
for value in (17, '17', 17.0):
self.assertEquals(f.to_python(value, pytz.utc), 17)
# Invalid values
for value in ('nope', date.today()):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
# Range check
for value in (-1, 1000):
with self.assertRaises(ValueError):
f.validate(value)