From ed51ad5be6c7979b3109b293b663649fef4ee138 Mon Sep 17 00:00:00 2001 From: Niyaz Batyrshin Date: Sun, 7 Jun 2020 12:50:45 +0300 Subject: [PATCH] DateTime64 field closes #145 --- docs/class_reference.md | 7 ++++++ docs/field_types.md | 3 ++- docs/toc.md | 1 + src/infi/clickhouse_orm/fields.py | 42 +++++++++++++++++++++++++++++++ src/infi/clickhouse_orm/funcs.py | 5 ++++ src/infi/clickhouse_orm/models.py | 9 ++++++- tests/test_datetime_fields.py | 14 ++++++++--- tests/test_funcs.py | 1 + 8 files changed, 77 insertions(+), 5 deletions(-) diff --git a/docs/class_reference.md b/docs/class_reference.md index 285f9b4..1d1ee16 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -635,6 +635,13 @@ Extends Field #### DateTimeField(default=None, alias=None, materialized=None, readonly=None, codec=None) +### DateTime64Field + +Extends DateTimeField + +#### DateTime64Field(default=None, alias=None, materialized=None, readonly=None, codec=None, precision=6, timezone=None) + + ### Decimal128Field Extends DecimalField diff --git a/docs/field_types.md b/docs/field_types.md index 95e77e1..2bfea79 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -11,6 +11,7 @@ The following field types are supported: | FixedStringField | FixedString| str | Encoded as UTF-8 when written to ClickHouse | DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31 | DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC +| DateTime64Field | DateTime64 | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC | Int8Field | Int8 | int | Range -128 to 127 | Int16Field | Int16 | int | Range -32768 to 32767 | Int32Field | Int32 | int | Range -2147483648 to 2147483647 @@ -185,4 +186,4 @@ class BooleanField(Field): --- -[<< Field Options](field_options.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md) \ No newline at end of file +[<< Field Options](field_options.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md) diff --git a/docs/toc.md b/docs/toc.md index 5805eaa..4da8cb9 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -82,6 +82,7 @@ * [BaseIntField](class_reference.md#baseintfield) * [DateField](class_reference.md#datefield) * [DateTimeField](class_reference.md#datetimefield) + * [DateTime64Field](class_reference.md#datetime64field) * [Decimal128Field](class_reference.md#decimal128field) * [Decimal32Field](class_reference.md#decimal32field) * [Decimal64Field](class_reference.md#decimal64field) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 0ee90e9..6b7fee4 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -1,11 +1,14 @@ from __future__ import unicode_literals import datetime +from typing import List + import iso8601 import pytz from calendar import timegm from decimal import Decimal, localcontext from uuid import UUID from logging import getLogger +from pytz import UnknownTimeZoneError from .utils import escape, parse_array, comma_join, string_or_func, get_subclass_names from .funcs import F, FunctionOperatorsMixin from ipaddress import IPv4Address, IPv6Address @@ -86,10 +89,17 @@ class Field(FunctionOperatorsMixin): - `db`: Database, used for checking supported features. ''' sql = self.db_type + args = self.get_db_type_args() + if args: + sql += '(%s)' % ', '.join(args) if with_default_expression: sql += self._extra_params(db) return sql + def get_db_type_args(self) -> List[str]: + """Returns field type arguments""" + return [] + def _extra_params(self, db): sql = '' if self.alias: @@ -219,6 +229,38 @@ class DateTimeField(Field): return escape('%010d' % timegm(value.utctimetuple()), quote) +class DateTime64Field(DateTimeField): + db_type = 'DateTime64' + + def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None, + precision: int = 6, timezone: str = None): + super().__init__(default, alias, materialized, readonly, codec) + assert precision is None or isinstance(precision, int), 'Precision must be int type' + assert timezone is None or isinstance(timezone, str), 'Timezone must be string type' + if timezone: + try: + pytz.timezone(timezone) + except UnknownTimeZoneError: + raise Exception('Timezone must be a valid IANA timezone identifier') + self.precision = precision + self.timezone = timezone + + def get_db_type_args(self) -> List[str]: + args = [str(self.precision)] + if self.timezone: + args.append(escape(self.timezone)) + return args + + def to_db_string(self, value: datetime.datetime, quote=True): + """ + Returns the field's value prepared for writing to the database + + Returns string in 0000000000.000000 format, where remainder digits count is equal to precision + """ + width = 11 + self.precision + return escape(f'{value.timestamp():0{width}.{self.precision}f}', quote) + + class BaseIntField(Field): ''' Abstract base class for all integer-type fields. diff --git a/src/infi/clickhouse_orm/funcs.py b/src/infi/clickhouse_orm/funcs.py index 8d59528..409022b 100644 --- a/src/infi/clickhouse_orm/funcs.py +++ b/src/infi/clickhouse_orm/funcs.py @@ -767,6 +767,11 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): def toDateTime(x): return F('toDateTime', x) + @staticmethod + @type_conversion + def toDateTime64(x, precision, timezone=NO_VALUE): + return F('toDateTime64', x, precision, timezone) + @staticmethod def toString(x): return F('toString', x) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index e4766e5..c335d53 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -6,7 +6,7 @@ from logging import getLogger import pytz from .fields import Field, StringField -from .utils import parse_tsv, NO_VALUE, get_subclass_names +from .utils import parse_tsv, NO_VALUE, get_subclass_names, unescape from .query import QuerySet from .funcs import F from .engines import Merge, Distributed @@ -89,6 +89,13 @@ class ModelBase(type): if db_type.startswith('DateTime('): # Some functions return DateTimeField with timezone in brackets return orm_fields.DateTimeField() + # DateTime with timezone + if db_type.startswith('DateTime64('): + precision, *timezone = [s.strip() for s in db_type[11:-1].split(',')] + return orm_fields.DateTime64Field( + precision=int(precision), + timezone=timezone[0][1:-1] if timezone else None + ) # Arrays if db_type.startswith('Array'): inner_field = cls.create_ad_hoc_field(db_type[6 : -1]) diff --git a/tests/test_datetime_fields.py b/tests/test_datetime_fields.py index 3387ee9..6031396 100644 --- a/tests/test_datetime_fields.py +++ b/tests/test_datetime_fields.py @@ -20,8 +20,14 @@ class DateFieldsTest(unittest.TestCase): def test_ad_hoc_model(self): self.database.insert([ - ModelWithDate(date_field='2016-08-30', datetime_field='2016-08-30 03:50:00'), - ModelWithDate(date_field='2016-08-31', datetime_field='2016-08-31 01:30:00') + ModelWithDate( + date_field='2016-08-30', + datetime_field='2016-08-30 03:50:00', + datetime64_field='2016-08-30 03:50:00.001'), + ModelWithDate( + date_field='2016-08-31', + datetime_field='2016-08-31 01:30:00', + datetime64_field='2016-08-31 01:30:00.002') ]) # toStartOfHour returns DateTime('Asia/Yekaterinburg') in my case, so I test it here to @@ -30,15 +36,17 @@ class DateFieldsTest(unittest.TestCase): self.assertEqual(len(results), 2) self.assertEqual(results[0].date_field, datetime.date(2016, 8, 30)) self.assertEqual(results[0].datetime_field, datetime.datetime(2016, 8, 30, 3, 50, 0, tzinfo=pytz.UTC)) + self.assertEqual(results[0].datetime64_field, datetime.datetime(2016, 8, 30, 3, 50, 0, 1000, tzinfo=pytz.UTC)) self.assertEqual(results[0].hour_start, datetime.datetime(2016, 8, 30, 3, 0, 0, tzinfo=pytz.UTC)) self.assertEqual(results[1].date_field, datetime.date(2016, 8, 31)) self.assertEqual(results[1].datetime_field, datetime.datetime(2016, 8, 31, 1, 30, 0, tzinfo=pytz.UTC)) + self.assertEqual(results[1].datetime64_field, datetime.datetime(2016, 8, 31, 1, 30, 0, 2000, tzinfo=pytz.UTC)) self.assertEqual(results[1].hour_start, datetime.datetime(2016, 8, 31, 1, 0, 0, tzinfo=pytz.UTC)) class ModelWithDate(Model): - date_field = DateField() datetime_field = DateTimeField() + datetime64_field = DateTime64Field() engine = MergeTree('date_field', ('date_field',)) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index fa352d8..ee627c2 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -351,6 +351,7 @@ class FuncsTestCase(TestCaseWithData): if self.database.server_timezone != pytz.utc: raise unittest.SkipTest('This test must run with UTC as the server timezone') self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc)) + self._test_func(F.toDateTime64('2018-12-31 11:22:33.001', 6), datetime(2018, 12, 31, 11, 22, 33, 1000, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc)) self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))