mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-22 17:16:34 +03:00
Merge pull request #2 from Infinidat/develop
Merge of main develop into forked develop
This commit is contained in:
commit
8d23c18714
27
CHANGELOG.rst
Normal file
27
CHANGELOG.rst
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
Change Log
|
||||||
|
==========
|
||||||
|
|
||||||
|
[Unreleased]
|
||||||
|
------------
|
||||||
|
- Always keep datetime fields in UTC internally, and convert server timezone to UTC when parsing query results
|
||||||
|
- Support for ALIAS and MATERIALIZED fields (M1ha)
|
||||||
|
- Pagination: passing -1 as the page number now returns the last page
|
||||||
|
- Accept datetime values for date fields (Zloool)
|
||||||
|
- Support readonly mode in Database class (tswr)
|
||||||
|
|
||||||
|
v0.7.1
|
||||||
|
------
|
||||||
|
- Accept '0000-00-00 00:00:00' as a datetime value (tsionyx)
|
||||||
|
- Bug fix: parse_array fails on int arrays
|
||||||
|
- Improve performance when inserting many rows
|
||||||
|
|
||||||
|
v0.7.0
|
||||||
|
------
|
||||||
|
- Support array fields
|
||||||
|
- Support enum fields
|
||||||
|
|
||||||
|
v0.6.3
|
||||||
|
------
|
||||||
|
- Python 3 support
|
||||||
|
|
||||||
|
|
68
README.rst
68
README.rst
|
@ -31,6 +31,8 @@ Models are defined in a way reminiscent of Django's ORM::
|
||||||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||||
|
|
||||||
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
|
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
|
||||||
|
Alternatively it is possible to pass alias or materialized parameters (see below for usage examples).
|
||||||
|
Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided.
|
||||||
|
|
||||||
See below for the supported field types and table engines.
|
See below for the supported field types and table engines.
|
||||||
|
|
||||||
|
@ -90,6 +92,11 @@ Using the ``Database`` instance you can create a table for your model, and inser
|
||||||
|
|
||||||
The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class.
|
The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class.
|
||||||
|
|
||||||
|
Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot
|
||||||
|
modify data or schemas::
|
||||||
|
|
||||||
|
db = Database('my_test_db', readonly=True)
|
||||||
|
|
||||||
Reading from the Database
|
Reading from the Database
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
@ -152,7 +159,7 @@ Pagination
|
||||||
It is possible to paginate through model instances::
|
It is possible to paginate through model instances::
|
||||||
|
|
||||||
>>> order_by = 'first_name, last_name'
|
>>> order_by = 'first_name, last_name'
|
||||||
>>> page = db.paginate(Person, order_by, page_num=1, page_size=100)
|
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
|
||||||
>>> print page.number_of_objects
|
>>> print page.number_of_objects
|
||||||
2507
|
2507
|
||||||
>>> print page.pages_total
|
>>> print page.pages_total
|
||||||
|
@ -189,9 +196,9 @@ Field Types
|
||||||
|
|
||||||
Currently the following field types are supported:
|
Currently the following field types are supported:
|
||||||
|
|
||||||
============= ======== ================= ===================================================
|
=================== ======== ================= ===================================================
|
||||||
Class DB Type Pythonic Type Comments
|
Class DB Type Pythonic Type Comments
|
||||||
============= ======== ================= ===================================================
|
=================== ======== ================= ===================================================
|
||||||
StringField String unicode Encoded as UTF-8 when written to ClickHouse
|
StringField String unicode Encoded as UTF-8 when written to ClickHouse
|
||||||
DateField Date datetime.date Range 1970-01-01 to 2038-01-19
|
DateField Date datetime.date Range 1970-01-01 to 2038-01-19
|
||||||
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
|
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
|
||||||
|
@ -208,7 +215,25 @@ Float64Field Float64 float
|
||||||
Enum8Field Enum8 Enum See below
|
Enum8Field Enum8 Enum See below
|
||||||
Enum16Field Enum16 Enum See below
|
Enum16Field Enum16 Enum See below
|
||||||
ArrayField Array list See below
|
ArrayField Array list See below
|
||||||
============= ======== ================= ===================================================
|
=================== ======== ================= ===================================================
|
||||||
|
|
||||||
|
DateTimeField and Time Zones
|
||||||
|
****************************
|
||||||
|
|
||||||
|
A ``DateTimeField`` can be assigned values from one of the following types:
|
||||||
|
|
||||||
|
- datetime
|
||||||
|
- date
|
||||||
|
- integer - number of seconds since the Unix epoch
|
||||||
|
- string in ``YYYY-MM-DD HH:MM:SS`` format
|
||||||
|
|
||||||
|
The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned
|
||||||
|
value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
|
||||||
|
|
||||||
|
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the
|
||||||
|
timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new
|
||||||
|
enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend
|
||||||
|
settings the server timezone to UTC in order to prevent confusion.
|
||||||
|
|
||||||
Working with enum fields
|
Working with enum fields
|
||||||
************************
|
************************
|
||||||
|
@ -249,6 +274,37 @@ You can create array fields containing any data type, for example::
|
||||||
|
|
||||||
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
|
||||||
|
|
||||||
|
|
||||||
|
Working with materialized and alias fields
|
||||||
|
******************************************
|
||||||
|
|
||||||
|
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS fields.
|
||||||
|
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`_.
|
||||||
|
|
||||||
|
Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method.
|
||||||
|
ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field
|
||||||
|
names explicitly in the query.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
class Event(models.Model):
|
||||||
|
|
||||||
|
created = fields.DateTimeField()
|
||||||
|
created_date = fields.DateTimeField(materialized='toDate(created)')
|
||||||
|
name = fields.StringField()
|
||||||
|
username = fields.StringField(alias='name')
|
||||||
|
|
||||||
|
engine = engines.MergeTree('created_date', ('created_date', 'created'))
|
||||||
|
|
||||||
|
obj = Event(created=datetime.now(), name='MyEvent')
|
||||||
|
db = Database('my_test_db')
|
||||||
|
db.insert([obj])
|
||||||
|
# All values will be retrieved from database
|
||||||
|
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
|
||||||
|
# created_date and username will contain a default value
|
||||||
|
db.select('SELECT * FROM $db.event', model_class=Event)
|
||||||
|
|
||||||
|
|
||||||
Table Engines
|
Table Engines
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
@ -292,3 +348,7 @@ After cloning the project, run the following commands::
|
||||||
To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run::
|
To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run::
|
||||||
|
|
||||||
bin/nosetests
|
bin/nosetests
|
||||||
|
|
||||||
|
To see test coverage information run::
|
||||||
|
|
||||||
|
bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm
|
||||||
|
|
|
@ -4,9 +4,12 @@ from .models import ModelBase
|
||||||
from .utils import escape, parse_tsv, import_submodules
|
from .utils import escape, parse_tsv, import_submodules
|
||||||
from math import ceil
|
from math import ceil
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
|
||||||
from string import Template
|
from string import Template
|
||||||
from six import PY3, string_types
|
from six import PY3, string_types
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
|
||||||
Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size')
|
Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size')
|
||||||
|
@ -26,6 +29,7 @@ class Database(object):
|
||||||
self.readonly = readonly
|
self.readonly = readonly
|
||||||
if not self.readonly:
|
if not self.readonly:
|
||||||
self.create_database()
|
self.create_database()
|
||||||
|
self.server_timezone = self._get_server_timezone()
|
||||||
|
|
||||||
def create_database(self):
|
def create_database(self):
|
||||||
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
|
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
|
||||||
|
@ -50,11 +54,11 @@ class Database(object):
|
||||||
model_class = first_instance.__class__
|
model_class = first_instance.__class__
|
||||||
def gen():
|
def gen():
|
||||||
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
||||||
yield (first_instance.to_tsv() + '\n').encode('utf-8')
|
yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
|
||||||
# Collect lines in batches of batch_size
|
# Collect lines in batches of batch_size
|
||||||
batch = []
|
batch = []
|
||||||
for instance in i:
|
for instance in i:
|
||||||
batch.append(instance.to_tsv())
|
batch.append(instance.to_tsv(insertable_only=True))
|
||||||
if len(batch) >= batch_size:
|
if len(batch) >= batch_size:
|
||||||
# Return the current batch of lines
|
# Return the current batch of lines
|
||||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
||||||
|
@ -82,7 +86,7 @@ class Database(object):
|
||||||
field_types = parse_tsv(next(lines))
|
field_types = parse_tsv(next(lines))
|
||||||
model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types))
|
model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types))
|
||||||
for line in lines:
|
for line in lines:
|
||||||
yield model_class.from_tsv(line, field_names)
|
yield model_class.from_tsv(line, field_names, self.server_timezone)
|
||||||
|
|
||||||
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
|
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
|
||||||
count = self.count(model_class, conditions)
|
count = self.count(model_class, conditions)
|
||||||
|
@ -142,6 +146,8 @@ class Database(object):
|
||||||
params['user'] = self.username
|
params['user'] = self.username
|
||||||
if self.password:
|
if self.password:
|
||||||
params['password'] = self.password
|
params['password'] = self.password
|
||||||
|
if self.readonly:
|
||||||
|
params['readonly'] = '1'
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _substitute(self, query, model_class=None):
|
def _substitute(self, query, model_class=None):
|
||||||
|
@ -154,3 +160,11 @@ class Database(object):
|
||||||
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
|
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
|
||||||
query = Template(query).substitute(mapping)
|
query = Template(query).substitute(mapping)
|
||||||
return query
|
return query
|
||||||
|
|
||||||
|
def _get_server_timezone(self):
|
||||||
|
try:
|
||||||
|
r = self._send('SELECT timezone()')
|
||||||
|
return pytz.timezone(r.text.strip())
|
||||||
|
except DatabaseException:
|
||||||
|
logger.exception('Cannot determine server timezone, assuming UTC')
|
||||||
|
return pytz.utc
|
||||||
|
|
|
@ -2,6 +2,7 @@ from six import string_types, text_type, binary_type
|
||||||
import datetime
|
import datetime
|
||||||
import pytz
|
import pytz
|
||||||
import time
|
import time
|
||||||
|
from calendar import timegm
|
||||||
|
|
||||||
from .utils import escape, parse_array
|
from .utils import escape, parse_array
|
||||||
|
|
||||||
|
@ -12,15 +13,23 @@ class Field(object):
|
||||||
class_default = 0
|
class_default = 0
|
||||||
db_type = None
|
db_type = None
|
||||||
|
|
||||||
def __init__(self, default=None):
|
def __init__(self, default=None, alias=None, materialized=None):
|
||||||
|
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
|
||||||
|
"Only one of default, alias and materialized parameters can be given"
|
||||||
|
assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given"
|
||||||
|
assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given"
|
||||||
|
|
||||||
self.creation_counter = Field.creation_counter
|
self.creation_counter = Field.creation_counter
|
||||||
Field.creation_counter += 1
|
Field.creation_counter += 1
|
||||||
self.default = self.class_default if default is None else default
|
self.default = self.class_default if default is None else default
|
||||||
|
self.alias = alias
|
||||||
|
self.materialized = materialized
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
'''
|
'''
|
||||||
Converts the input value into the expected Python data type, raising ValueError if the
|
Converts the input value into the expected Python data type, raising ValueError if the
|
||||||
data can't be converted. Returns the converted value. Subclasses should override this.
|
data can't be converted. Returns the converted value. Subclasses should override this.
|
||||||
|
The timezone_in_use parameter should be consulted when parsing datetime fields.
|
||||||
'''
|
'''
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
@ -48,20 +57,29 @@ class Field(object):
|
||||||
def get_sql(self, with_default=True):
|
def get_sql(self, with_default=True):
|
||||||
'''
|
'''
|
||||||
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
|
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
|
||||||
|
:param with_default: If True, adds default value to sql.
|
||||||
|
It doesn't affect fields with alias and materialized values.
|
||||||
'''
|
'''
|
||||||
if with_default:
|
if self.alias:
|
||||||
|
return '%s ALIAS %s' % (self.db_type, self.alias)
|
||||||
|
elif self.materialized:
|
||||||
|
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
|
||||||
|
elif with_default:
|
||||||
default = self.to_db_string(self.default)
|
default = self.to_db_string(self.default)
|
||||||
return '%s DEFAULT %s' % (self.db_type, default)
|
return '%s DEFAULT %s' % (self.db_type, default)
|
||||||
else:
|
else:
|
||||||
return self.db_type
|
return self.db_type
|
||||||
|
|
||||||
|
def is_insertable(self):
|
||||||
|
return self.alias is None and self.materialized is None
|
||||||
|
|
||||||
|
|
||||||
class StringField(Field):
|
class StringField(Field):
|
||||||
|
|
||||||
class_default = ''
|
class_default = ''
|
||||||
db_type = 'String'
|
db_type = 'String'
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, text_type):
|
||||||
return value
|
return value
|
||||||
if isinstance(value, binary_type):
|
if isinstance(value, binary_type):
|
||||||
|
@ -76,11 +94,11 @@ class DateField(Field):
|
||||||
class_default = min_value
|
class_default = min_value
|
||||||
db_type = 'Date'
|
db_type = 'Date'
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, datetime.date):
|
|
||||||
return value
|
|
||||||
if isinstance(value, datetime.datetime):
|
if isinstance(value, datetime.datetime):
|
||||||
return value.date()
|
return value.date()
|
||||||
|
if isinstance(value, datetime.date):
|
||||||
|
return value
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return DateField.class_default + datetime.timedelta(days=value)
|
return DateField.class_default + datetime.timedelta(days=value)
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, string_types):
|
||||||
|
@ -101,26 +119,27 @@ class DateTimeField(Field):
|
||||||
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
|
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
|
||||||
db_type = 'DateTime'
|
db_type = 'DateTime'
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, datetime.datetime):
|
if isinstance(value, datetime.datetime):
|
||||||
return value
|
return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc)
|
||||||
if isinstance(value, datetime.date):
|
if isinstance(value, datetime.date):
|
||||||
return datetime.datetime(value.year, value.month, value.day)
|
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
|
||||||
if isinstance(value, int):
|
if isinstance(value, int):
|
||||||
return datetime.datetime.fromtimestamp(value, pytz.utc)
|
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, string_types):
|
||||||
if value == '0000-00-00 00:00:00':
|
if value == '0000-00-00 00:00:00':
|
||||||
return self.class_default
|
return self.class_default
|
||||||
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
dt = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
||||||
|
return timezone_in_use.localize(dt).astimezone(pytz.utc)
|
||||||
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
|
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
|
||||||
|
|
||||||
def to_db_string(self, value, quote=True):
|
def to_db_string(self, value, quote=True):
|
||||||
return escape(int(time.mktime(value.timetuple())), quote)
|
return escape(timegm(value.utctimetuple()), quote)
|
||||||
|
|
||||||
|
|
||||||
class BaseIntField(Field):
|
class BaseIntField(Field):
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
try:
|
try:
|
||||||
return int(value)
|
return int(value)
|
||||||
except:
|
except:
|
||||||
|
@ -188,7 +207,7 @@ class Int64Field(BaseIntField):
|
||||||
|
|
||||||
class BaseFloatField(Field):
|
class BaseFloatField(Field):
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
try:
|
try:
|
||||||
return float(value)
|
return float(value)
|
||||||
except:
|
except:
|
||||||
|
@ -207,13 +226,13 @@ class Float64Field(BaseFloatField):
|
||||||
|
|
||||||
class BaseEnumField(Field):
|
class BaseEnumField(Field):
|
||||||
|
|
||||||
def __init__(self, enum_cls, default=None):
|
def __init__(self, enum_cls, default=None, alias=None, materialized=None):
|
||||||
self.enum_cls = enum_cls
|
self.enum_cls = enum_cls
|
||||||
if default is None:
|
if default is None:
|
||||||
default = list(enum_cls)[0]
|
default = list(enum_cls)[0]
|
||||||
super(BaseEnumField, self).__init__(default)
|
super(BaseEnumField, self).__init__(default, alias, materialized)
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, self.enum_cls):
|
if isinstance(value, self.enum_cls):
|
||||||
return value
|
return value
|
||||||
try:
|
try:
|
||||||
|
@ -271,18 +290,18 @@ class ArrayField(Field):
|
||||||
|
|
||||||
class_default = []
|
class_default = []
|
||||||
|
|
||||||
def __init__(self, inner_field, default=None):
|
def __init__(self, inner_field, default=None, alias=None, materialized=None):
|
||||||
self.inner_field = inner_field
|
self.inner_field = inner_field
|
||||||
super(ArrayField, self).__init__(default)
|
super(ArrayField, self).__init__(default, alias, materialized)
|
||||||
|
|
||||||
def to_python(self, value):
|
def to_python(self, value, timezone_in_use):
|
||||||
if isinstance(value, text_type):
|
if isinstance(value, text_type):
|
||||||
value = parse_array(value)
|
value = parse_array(value)
|
||||||
elif isinstance(value, binary_type):
|
elif isinstance(value, binary_type):
|
||||||
value = parse_array(value.decode('UTF-8'))
|
value = parse_array(value.decode('UTF-8'))
|
||||||
elif not isinstance(value, (list, tuple)):
|
elif not isinstance(value, (list, tuple)):
|
||||||
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
|
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
|
||||||
return [self.inner_field.to_python(v) for v in value]
|
return [self.inner_field.to_python(v, timezone_in_use) for v in value]
|
||||||
|
|
||||||
def validate(self, value):
|
def validate(self, value):
|
||||||
for v in value:
|
for v in value:
|
||||||
|
@ -295,3 +314,4 @@ class ArrayField(Field):
|
||||||
def get_sql(self, with_default=True):
|
def get_sql(self, with_default=True):
|
||||||
from .utils import escape
|
from .utils import escape
|
||||||
return 'Array(%s)' % self.inner_field.get_sql(with_default=False)
|
return 'Array(%s)' % self.inner_field.get_sql(with_default=False)
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ from .engines import *
|
||||||
from .fields import Field
|
from .fields import Field
|
||||||
|
|
||||||
from six import with_metaclass
|
from six import with_metaclass
|
||||||
|
import pytz
|
||||||
|
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
logger = getLogger('clickhouse_orm')
|
logger = getLogger('clickhouse_orm')
|
||||||
|
@ -96,7 +97,7 @@ class Model(with_metaclass(ModelBase)):
|
||||||
'''
|
'''
|
||||||
field = self.get_field(name)
|
field = self.get_field(name)
|
||||||
if field:
|
if field:
|
||||||
value = field.to_python(value)
|
value = field.to_python(value, pytz.utc)
|
||||||
field.validate(value)
|
field.validate(value)
|
||||||
super(Model, self).__setattr__(name, value)
|
super(Model, self).__setattr__(name, value)
|
||||||
|
|
||||||
|
@ -136,7 +137,7 @@ class Model(with_metaclass(ModelBase)):
|
||||||
return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name())
|
return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_tsv(cls, line, field_names=None):
|
def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc):
|
||||||
'''
|
'''
|
||||||
Create a model instance from a tab-separated line. The line may or may not include a newline.
|
Create a model instance from a tab-separated line. The line may or may not include a newline.
|
||||||
The field_names list must match the fields defined in the model, but does not have to include all of them.
|
The field_names list must match the fields defined in the model, but does not have to include all of them.
|
||||||
|
@ -147,12 +148,19 @@ class Model(with_metaclass(ModelBase)):
|
||||||
values = iter(parse_tsv(line))
|
values = iter(parse_tsv(line))
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
for name in field_names:
|
for name in field_names:
|
||||||
kwargs[name] = next(values)
|
field = getattr(cls, name)
|
||||||
|
kwargs[name] = field.to_python(next(values), timezone_in_use)
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
def to_tsv(self):
|
def to_tsv(self, insertable_only=False):
|
||||||
'''
|
'''
|
||||||
Returns the instance's column values as a tab-separated line. A newline is not included.
|
Returns the instance's column values as a tab-separated line. A newline is not included.
|
||||||
|
:param bool insertable_only: If True, returns only fields, that can be inserted into database
|
||||||
'''
|
'''
|
||||||
data = self.__dict__
|
data = self.__dict__
|
||||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in self._fields)
|
fields = self._fields
|
||||||
|
if insertable_only:
|
||||||
|
fields = [f for f in fields if f[1].is_insertable()]
|
||||||
|
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
|
||||||
|
|
||||||
|
|
||||||
|
|
6
tests/sample_migrations/0008.py
Normal file
6
tests/sample_migrations/0008.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateTable(MaterializedModel)
|
||||||
|
]
|
6
tests/sample_migrations/0009.py
Normal file
6
tests/sample_migrations/0009.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from infi.clickhouse_orm import migrations
|
||||||
|
from ..test_migrations import *
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateTable(AliasModel)
|
||||||
|
]
|
69
tests/test_alias_fields.py
Normal file
69
tests/test_alias_fields.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import unittest
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
from infi.clickhouse_orm.database import Database
|
||||||
|
from infi.clickhouse_orm.models import Model
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
|
||||||
|
|
||||||
|
class MaterializedFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db')
|
||||||
|
self.database.create_table(ModelWithAliasFields)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_insert_and_select(self):
|
||||||
|
instance = ModelWithAliasFields(
|
||||||
|
date_field='2016-08-30',
|
||||||
|
int_field=-10,
|
||||||
|
str_field='TEST'
|
||||||
|
)
|
||||||
|
self.database.insert([instance])
|
||||||
|
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||||
|
query = 'SELECT date_field, int_field, str_field, alias_int, alias_date, alias_str' \
|
||||||
|
' FROM $db.%s ORDER BY alias_date' % ModelWithAliasFields.table_name()
|
||||||
|
for model_cls in (ModelWithAliasFields, None):
|
||||||
|
results = list(self.database.select(query, model_cls))
|
||||||
|
self.assertEquals(len(results), 1)
|
||||||
|
self.assertEquals(results[0].date_field, instance.date_field)
|
||||||
|
self.assertEquals(results[0].int_field, instance.int_field)
|
||||||
|
self.assertEquals(results[0].str_field, instance.str_field)
|
||||||
|
self.assertEquals(results[0].alias_int, instance.int_field)
|
||||||
|
self.assertEquals(results[0].alias_str, instance.str_field)
|
||||||
|
self.assertEquals(results[0].alias_date, instance.date_field)
|
||||||
|
|
||||||
|
def test_assignment_error(self):
|
||||||
|
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||||
|
instance = ModelWithAliasFields()
|
||||||
|
for value in ('x', [date.today()], ['aaa'], [None]):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
instance.alias_date = value
|
||||||
|
|
||||||
|
def test_wrong_field(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(alias=123)
|
||||||
|
|
||||||
|
def test_duplicate_default(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(alias='str_field', default='with default')
|
||||||
|
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(alias='str_field', materialized='str_field')
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithAliasFields(Model):
|
||||||
|
int_field = Int32Field()
|
||||||
|
date_field = DateField()
|
||||||
|
str_field = StringField()
|
||||||
|
|
||||||
|
alias_str = StringField(alias='str_field')
|
||||||
|
alias_int = Int32Field(alias='int_field')
|
||||||
|
alias_date = DateField(alias='date_field')
|
||||||
|
|
||||||
|
engine = MergeTree('date_field', ('date_field',))
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database, DatabaseException
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
@ -117,6 +117,18 @@ class DatabaseTestCase(unittest.TestCase):
|
||||||
p = list(self.database.select("SELECT * from $table", Person))[0]
|
p = list(self.database.select("SELECT * from $table", Person))[0]
|
||||||
self.assertEquals(p.first_name, s)
|
self.assertEquals(p.first_name, s)
|
||||||
|
|
||||||
|
def test_readonly(self):
|
||||||
|
orig_database = self.database
|
||||||
|
self.database = Database(orig_database.db_name, readonly=True)
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self._insert_and_check(self._sample_data(), len(data))
|
||||||
|
self.assertEquals(self.database.count(Person), 0)
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.drop_table(Person)
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.drop_database()
|
||||||
|
self.database = orig_database
|
||||||
|
|
||||||
def _sample_data(self):
|
def _sample_data(self):
|
||||||
for entry in data:
|
for entry in data:
|
||||||
yield Person(**entry)
|
yield Person(**entry)
|
||||||
|
|
69
tests/test_materialized_fields.py
Normal file
69
tests/test_materialized_fields.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import unittest
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
from infi.clickhouse_orm.database import Database
|
||||||
|
from infi.clickhouse_orm.models import Model
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
|
||||||
|
|
||||||
|
class MaterializedFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db')
|
||||||
|
self.database.create_table(ModelWithMaterializedFields)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def test_insert_and_select(self):
|
||||||
|
instance = ModelWithMaterializedFields(
|
||||||
|
date_time_field='2016-08-30 11:00:00',
|
||||||
|
int_field=-10,
|
||||||
|
str_field='TEST'
|
||||||
|
)
|
||||||
|
self.database.insert([instance])
|
||||||
|
# We can't select * from table, as it doesn't select materialized and alias fields
|
||||||
|
query = 'SELECT date_time_field, int_field, str_field, mat_int, mat_date, mat_str' \
|
||||||
|
' FROM $db.%s ORDER BY mat_date' % ModelWithMaterializedFields.table_name()
|
||||||
|
for model_cls in (ModelWithMaterializedFields, None):
|
||||||
|
results = list(self.database.select(query, model_cls))
|
||||||
|
self.assertEquals(len(results), 1)
|
||||||
|
self.assertEquals(results[0].date_time_field, instance.date_time_field)
|
||||||
|
self.assertEquals(results[0].int_field, instance.int_field)
|
||||||
|
self.assertEquals(results[0].str_field, instance.str_field)
|
||||||
|
self.assertEquals(results[0].mat_int, abs(instance.int_field))
|
||||||
|
self.assertEquals(results[0].mat_str, instance.str_field.lower())
|
||||||
|
self.assertEquals(results[0].mat_date, instance.date_time_field.date())
|
||||||
|
|
||||||
|
def test_assignment_error(self):
|
||||||
|
# I can't prevent assigning at all, in case db.select statements with model provided sets model fields.
|
||||||
|
instance = ModelWithMaterializedFields()
|
||||||
|
for value in ('x', [date.today()], ['aaa'], [None]):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
instance.mat_date = value
|
||||||
|
|
||||||
|
def test_wrong_field(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(materialized=123)
|
||||||
|
|
||||||
|
def test_duplicate_default(self):
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(materialized='str_field', default='with default')
|
||||||
|
|
||||||
|
with self.assertRaises(AssertionError):
|
||||||
|
StringField(materialized='str_field', alias='str_field')
|
||||||
|
|
||||||
|
|
||||||
|
class ModelWithMaterializedFields(Model):
|
||||||
|
int_field = Int32Field()
|
||||||
|
date_time_field = DateTimeField()
|
||||||
|
str_field = StringField()
|
||||||
|
|
||||||
|
mat_str = StringField(materialized='lower(str_field)')
|
||||||
|
mat_int = Int32Field(materialized='abs(int_field)')
|
||||||
|
mat_date = DateField(materialized='toDate(date_time_field)')
|
||||||
|
|
||||||
|
engine = MergeTree('mat_date', ('mat_date',))
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,15 @@ class MigrationsTestCase(unittest.TestCase):
|
||||||
self.assertTrue(self.tableExists(EnumModel1))
|
self.assertTrue(self.tableExists(EnumModel1))
|
||||||
self.assertEquals(self.getTableFields(EnumModel2),
|
self.assertEquals(self.getTableFields(EnumModel2),
|
||||||
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
||||||
|
self.database.migrate('tests.sample_migrations', 8)
|
||||||
|
self.assertTrue(self.tableExists(MaterializedModel))
|
||||||
|
self.assertEquals(self.getTableFields(MaterializedModel),
|
||||||
|
[('date_time', "DateTime"), ('date', 'Date')])
|
||||||
|
self.database.migrate('tests.sample_migrations', 9)
|
||||||
|
self.assertTrue(self.tableExists(AliasModel))
|
||||||
|
self.assertEquals(self.getTableFields(AliasModel),
|
||||||
|
[('date', 'Date'), ('date_alias', "Date")])
|
||||||
|
|
||||||
|
|
||||||
# Several different models with the same table name, to simulate a table that changes over time
|
# Several different models with the same table name, to simulate a table that changes over time
|
||||||
|
|
||||||
|
@ -127,3 +136,25 @@ class EnumModel2(Model):
|
||||||
@classmethod
|
@classmethod
|
||||||
def table_name(cls):
|
def table_name(cls):
|
||||||
return 'enum_mig'
|
return 'enum_mig'
|
||||||
|
|
||||||
|
|
||||||
|
class MaterializedModel(Model):
|
||||||
|
date_time = DateTimeField()
|
||||||
|
date = DateField(materialized='toDate(date_time)')
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'materalized_date'
|
||||||
|
|
||||||
|
|
||||||
|
class AliasModel(Model):
|
||||||
|
date = DateField()
|
||||||
|
date_alias = DateField(alias='date')
|
||||||
|
|
||||||
|
engine = MergeTree('date', ('date',))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'alias_date'
|
53
tests/test_simple_fields.py
Normal file
53
tests/test_simple_fields.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import unittest
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from datetime import date, datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleFieldsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_date_field(self):
|
||||||
|
f = DateField()
|
||||||
|
# Valid values
|
||||||
|
for value in (date(1970, 1, 1), datetime(1970, 1, 1), '1970-01-01', '0000-00-00', 0):
|
||||||
|
self.assertEquals(f.to_python(value, pytz.utc), date(1970, 1, 1))
|
||||||
|
# Invalid values
|
||||||
|
for value in ('nope', '21/7/1999', 0.5):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.to_python(value, pytz.utc)
|
||||||
|
# Range check
|
||||||
|
for value in (date(1900, 1, 1), date(2900, 1, 1)):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.validate(value)
|
||||||
|
|
||||||
|
def test_datetime_field(self):
|
||||||
|
f = DateTimeField()
|
||||||
|
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
|
||||||
|
# Valid values
|
||||||
|
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
|
||||||
|
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
|
||||||
|
'1970-01-01 00:00:00', '0000-00-00 00:00:00', 0):
|
||||||
|
dt = f.to_python(value, pytz.utc)
|
||||||
|
self.assertEquals(dt.tzinfo, pytz.utc)
|
||||||
|
self.assertEquals(dt, epoch)
|
||||||
|
# Verify that conversion to and from db string does not change value
|
||||||
|
dt2 = f.to_python(int(f.to_db_string(dt)), pytz.utc)
|
||||||
|
self.assertEquals(dt, dt2)
|
||||||
|
# Invalid values
|
||||||
|
for value in ('nope', '21/7/1999', 0.5):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.to_python(value, pytz.utc)
|
||||||
|
|
||||||
|
def test_uint8_field(self):
|
||||||
|
f = UInt8Field()
|
||||||
|
# Valid values
|
||||||
|
for value in (17, '17', 17.0):
|
||||||
|
self.assertEquals(f.to_python(value, pytz.utc), 17)
|
||||||
|
# Invalid values
|
||||||
|
for value in ('nope', date.today()):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.to_python(value, pytz.utc)
|
||||||
|
# Range check
|
||||||
|
for value in (-1, 1000):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
f.validate(value)
|
Loading…
Reference in New Issue
Block a user