From 6c4640bb24f71e0109c7d0c808afc1fa56cd2d62 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 12:20:47 +0200 Subject: [PATCH 1/7] TRIVIAL code style --- src/infi/clickhouse_orm/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 16f6f77..a5c16df 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -156,8 +156,9 @@ class Model(with_metaclass(ModelBase)): :param bool insertable_only: If True, returns only fields, that can be inserted into database ''' data = self.__dict__ - - fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields + fields = self._fields + if insertable_only: + fields = [f for f in fields if f[1].is_insertable()] return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) From a73a69ef523037c3777ecb777a7af5e3332dc67a Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 12:21:16 +0200 Subject: [PATCH 2/7] TRIVIAL add note about coverage --- README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 70ab8af..432519c 100644 --- a/README.rst +++ b/README.rst @@ -327,4 +327,8 @@ After cloning the project, run the following commands:: To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run:: - bin/nosetests \ No newline at end of file + bin/nosetests + +To see test coverage information run:: + + bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm From f29d737f29c0643216ca2a5f84ce0ab1c359812e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 15:25:16 +0200 Subject: [PATCH 3/7] Always keep datetime fields in UTC internally, and convert server timezone to UTC when parsing query results. --- src/infi/clickhouse_orm/database.py | 16 +++++++-- src/infi/clickhouse_orm/fields.py | 35 ++++++++++--------- src/infi/clickhouse_orm/models.py | 8 +++-- tests/test_simple_fields.py | 53 +++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 21 deletions(-) create mode 100644 tests/test_simple_fields.py diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 73bbc13..7b4b398 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -4,9 +4,12 @@ from .models import ModelBase from .utils import escape, parse_tsv, import_submodules from math import ceil import datetime -import logging from string import Template from six import PY3, string_types +import pytz + +import logging +logger = logging.getLogger('clickhouse_orm') Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size') @@ -26,6 +29,7 @@ class Database(object): self.readonly = readonly if not self.readonly: self.create_database() + self.server_timezone = self._get_server_timezone() def create_database(self): self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name) @@ -82,7 +86,7 @@ class Database(object): field_types = parse_tsv(next(lines)) model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types)) for line in lines: - yield model_class.from_tsv(line, field_names) + yield model_class.from_tsv(line, field_names, self.server_timezone) def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None): count = self.count(model_class, conditions) @@ -154,3 +158,11 @@ class Database(object): mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name()) query = Template(query).substitute(mapping) return query + + def _get_server_timezone(self): + try: + r = self._send('SELECT timezone()') + return pytz.timezone(r.text.strip()) + except DatabaseException: + logger.exception('Cannot determine server timezone, assuming UTC') + return pytz.utc diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 51b67de..e4115e8 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -2,6 +2,7 @@ from six import string_types, text_type, binary_type import datetime import pytz import time +from calendar import timegm from .utils import escape, parse_array @@ -24,10 +25,11 @@ class Field(object): self.alias = alias self.materialized = materialized - def to_python(self, value): + def to_python(self, value, timezone_in_use): ''' Converts the input value into the expected Python data type, raising ValueError if the data can't be converted. Returns the converted value. Subclasses should override this. + The timezone_in_use parameter should be consulted when parsing datetime fields. ''' return value @@ -77,7 +79,7 @@ class StringField(Field): class_default = '' db_type = 'String' - def to_python(self, value): + def to_python(self, value, timezone_in_use): if isinstance(value, text_type): return value if isinstance(value, binary_type): @@ -92,11 +94,11 @@ class DateField(Field): class_default = min_value db_type = 'Date' - def to_python(self, value): - if isinstance(value, datetime.date): - return value + def to_python(self, value, timezone_in_use): if isinstance(value, datetime.datetime): return value.date() + if isinstance(value, datetime.date): + return value if isinstance(value, int): return DateField.class_default + datetime.timedelta(days=value) if isinstance(value, string_types): @@ -117,26 +119,27 @@ class DateTimeField(Field): class_default = datetime.datetime.fromtimestamp(0, pytz.utc) db_type = 'DateTime' - def to_python(self, value): + def to_python(self, value, timezone_in_use): if isinstance(value, datetime.datetime): - return value + return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc) if isinstance(value, datetime.date): - return datetime.datetime(value.year, value.month, value.day) + return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc) if isinstance(value, int): - return datetime.datetime.fromtimestamp(value, pytz.utc) + return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc) if isinstance(value, string_types): if value == '0000-00-00 00:00:00': return self.class_default - return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + dt = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + return timezone_in_use.localize(dt).astimezone(pytz.utc) raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) def to_db_string(self, value, quote=True): - return escape(int(time.mktime(value.timetuple())), quote) + return escape(timegm(value.utctimetuple()), quote) class BaseIntField(Field): - def to_python(self, value): + def to_python(self, value, timezone_in_use): try: return int(value) except: @@ -204,7 +207,7 @@ class Int64Field(BaseIntField): class BaseFloatField(Field): - def to_python(self, value): + def to_python(self, value, timezone_in_use): try: return float(value) except: @@ -229,7 +232,7 @@ class BaseEnumField(Field): default = list(enum_cls)[0] super(BaseEnumField, self).__init__(default, alias, materialized) - def to_python(self, value): + def to_python(self, value, timezone_in_use): if isinstance(value, self.enum_cls): return value try: @@ -291,14 +294,14 @@ class ArrayField(Field): self.inner_field = inner_field super(ArrayField, self).__init__(default, alias, materialized) - def to_python(self, value): + def to_python(self, value, timezone_in_use): if isinstance(value, text_type): value = parse_array(value) elif isinstance(value, binary_type): value = parse_array(value.decode('UTF-8')) elif not isinstance(value, (list, tuple)): raise ValueError('ArrayField expects list or tuple, not %s' % type(value)) - return [self.inner_field.to_python(v) for v in value] + return [self.inner_field.to_python(v, timezone_in_use) for v in value] def validate(self, value): for v in value: diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index a5c16df..444e32e 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -3,6 +3,7 @@ from .engines import * from .fields import Field from six import with_metaclass +import pytz from logging import getLogger logger = getLogger('clickhouse_orm') @@ -96,7 +97,7 @@ class Model(with_metaclass(ModelBase)): ''' field = self.get_field(name) if field: - value = field.to_python(value) + value = field.to_python(value, pytz.utc) field.validate(value) super(Model, self).__setattr__(name, value) @@ -136,7 +137,7 @@ class Model(with_metaclass(ModelBase)): return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name()) @classmethod - def from_tsv(cls, line, field_names=None): + def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc): ''' Create a model instance from a tab-separated line. The line may or may not include a newline. The field_names list must match the fields defined in the model, but does not have to include all of them. @@ -147,7 +148,8 @@ class Model(with_metaclass(ModelBase)): values = iter(parse_tsv(line)) kwargs = {} for name in field_names: - kwargs[name] = next(values) + field = getattr(cls, name) + kwargs[name] = field.to_python(next(values), timezone_in_use) return cls(**kwargs) def to_tsv(self, insertable_only=False): diff --git a/tests/test_simple_fields.py b/tests/test_simple_fields.py new file mode 100644 index 0000000..c955574 --- /dev/null +++ b/tests/test_simple_fields.py @@ -0,0 +1,53 @@ +import unittest +from infi.clickhouse_orm.fields import * +from datetime import date, datetime +import pytz + + +class SimpleFieldsTest(unittest.TestCase): + + def test_date_field(self): + f = DateField() + # Valid values + for value in (date(1970, 1, 1), datetime(1970, 1, 1), '1970-01-01', '0000-00-00', 0): + self.assertEquals(f.to_python(value, pytz.utc), date(1970, 1, 1)) + # Invalid values + for value in ('nope', '21/7/1999', 0.5): + with self.assertRaises(ValueError): + f.to_python(value, pytz.utc) + # Range check + for value in (date(1900, 1, 1), date(2900, 1, 1)): + with self.assertRaises(ValueError): + f.validate(value) + + def test_datetime_field(self): + f = DateTimeField() + epoch = datetime(1970, 1, 1, tzinfo=pytz.utc) + # Valid values + for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch, + epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')), + '1970-01-01 00:00:00', '0000-00-00 00:00:00', 0): + dt = f.to_python(value, pytz.utc) + self.assertEquals(dt.tzinfo, pytz.utc) + self.assertEquals(dt, epoch) + # Verify that conversion to and from db string does not change value + dt2 = f.to_python(int(f.to_db_string(dt)), pytz.utc) + self.assertEquals(dt, dt2) + # Invalid values + for value in ('nope', '21/7/1999', 0.5): + with self.assertRaises(ValueError): + f.to_python(value, pytz.utc) + + def test_uint8_field(self): + f = UInt8Field() + # Valid values + for value in (17, '17', 17.0): + self.assertEquals(f.to_python(value, pytz.utc), 17) + # Invalid values + for value in ('nope', date.today()): + with self.assertRaises(ValueError): + f.to_python(value, pytz.utc) + # Range check + for value in (-1, 1000): + with self.assertRaises(ValueError): + f.validate(value) \ No newline at end of file From f22073e2e6f7d26bfb40ed59f4e0a57699b07807 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 16:22:04 +0200 Subject: [PATCH 4/7] Added change log --- CHANGELOG.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 CHANGELOG.rst diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..59d97c0 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,27 @@ +Change Log +========== + +[Unreleased] +------------ +- Always keep datetime fields in UTC internally, and convert server timezone to UTC when parsing query results +- Support for ALIAS and MATERIALIZED fields (M1ha) +- Pagination: passing -1 as the page number now returns the last page +- Accept datetime values for date fields (Zloool) +- Support readonly mode in Database class (tswr) + +v0.7.1 +------ +- Accept '0000-00-00 00:00:00' as a datetime value (tsionyx) +- Bug fix: parse_array fails on int arrays +- Improve performance when inserting many rows + +v0.7.0 +------ +- Support array fields +- Support enum fields + +v0.6.3 +------ +- Python 3 support + + From 5b03e660486c30222583311dabbc5b170019b5b4 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 16:36:41 +0200 Subject: [PATCH 5/7] Send readonly=1 when database is created in readonly mode --- src/infi/clickhouse_orm/database.py | 2 ++ tests/test_database.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 7b4b398..43ca36e 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -146,6 +146,8 @@ class Database(object): params['user'] = self.username if self.password: params['password'] = self.password + if self.readonly: + params['readonly'] = '1' return params def _substitute(self, query, model_class=None): diff --git a/tests/test_database.py b/tests/test_database.py index 30c25d3..1e62472 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -2,7 +2,7 @@ import unittest -from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.database import Database, DatabaseException from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * @@ -117,6 +117,18 @@ class DatabaseTestCase(unittest.TestCase): p = list(self.database.select("SELECT * from $table", Person))[0] self.assertEquals(p.first_name, s) + def test_readonly(self): + orig_database = self.database + self.database = Database(orig_database.db_name, readonly=True) + with self.assertRaises(DatabaseException): + self._insert_and_check(self._sample_data(), len(data)) + self.assertEquals(self.database.count(Person), 0) + with self.assertRaises(DatabaseException): + self.database.drop_table(Person) + with self.assertRaises(DatabaseException): + self.database.drop_database() + self.database = orig_database + def _sample_data(self): for entry in data: yield Person(**entry) From b95046893be68424f4fc3aa21a5e088bdb534b67 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 17:14:18 +0200 Subject: [PATCH 6/7] Add documentation about timezone handling --- README.rst | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 432519c..891fc3f 100644 --- a/README.rst +++ b/README.rst @@ -210,7 +210,25 @@ Float64Field Float64 float Enum8Field Enum8 Enum See below Enum16Field Enum16 Enum See below ArrayField Array list See below -=================== ========== ================= =================================================== +=================== ======== ================= =================================================== + +DateTimeField and Time Zones +**************************** + +A ``DateTimeField`` can be assigned values from one of the following types: + +- datetime +- date +- integer - number of seconds since the Unix epoch +- string in ``YYYY-MM-DD HH:MM:SS`` format + +The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned +value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC. + +DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the +timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new +enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend +settings the server timezone to UTC in order to prevent confusion. Working with enum fields ************************ @@ -255,15 +273,12 @@ You can create array fields containing any data type, for example:: Working with materialized and alias fields ****************************************** -ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. +ClickHouse provides an opportunity to create MATERIALIZED and ALIAS fields. +See documentation `here `_. -See documentation `here `. - -Both field types can't be inserted into database directly. -These field values are ignored, when using database.insert() method. -These fields are set to default values if you use database.select('SELECT * FROM mymodel', model_class=MyModel), -because ClickHouse doesn't return them. -Nevertheless, attribute values (as well as defaults) can be set for model object from python. +Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method. +ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field +names explicitly in the query. Usage:: @@ -281,7 +296,7 @@ Usage:: db.insert([obj]) # All values will be retrieved from database db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) - # created_date, username will contain default value + # created_date and username will contain a default value db.select('SELECT * FROM $db.event', model_class=Event) From dec45a0436d9c0d8aba5ab381a9235e7ca752faf Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 7 Feb 2017 17:24:54 +0200 Subject: [PATCH 7/7] Document the Database.readonly option --- README.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 891fc3f..7dba0f9 100644 --- a/README.rst +++ b/README.rst @@ -31,8 +31,8 @@ Models are defined in a way reminiscent of Django's ORM:: engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). -It is always possible to pass alias or materialized parameters. See below for usage examples. -Only one of default, alias and materialized parameters can be provided +Alternatively it is possible to pass alias or materialized parameters (see below for usage examples). +Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided. See below for the supported field types and table engines. @@ -92,6 +92,11 @@ Using the ``Database`` instance you can create a table for your model, and inser The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class. +Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot +modify data or schemas:: + + db = Database('my_test_db', readonly=True) + Reading from the Database -------------------------