Merge branch 'NiyazNz-feature/DateTime64' into develop

This commit is contained in:
Itai Shirav 2020-06-22 23:24:12 +03:00
commit 0dece65b7b
10 changed files with 213 additions and 22 deletions

View File

@ -862,7 +862,14 @@ Extends Field
Extends Field
#### DateTimeField(default=None, alias=None, materialized=None, readonly=None, codec=None)
#### DateTimeField(default=None, alias=None, materialized=None, readonly=None, codec=None, timezone=None)
### DateTime64Field
Extends DateTimeField
#### DateTime64Field(default=None, alias=None, materialized=None, readonly=None, codec=None, precision=6, timezone=None)
### Decimal128Field

View File

@ -10,7 +10,8 @@ The following field types are supported:
| StringField | String | str | Encoded as UTF-8 when written to ClickHouse
| FixedStringField | FixedString| str | Encoded as UTF-8 when written to ClickHouse
| DateField | Date | datetime.date | Range 1970-01-01 to 2105-12-31
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Timezone aware
| DateTime64Field | DateTime64 | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Timezone aware
| Int8Field | Int8 | int | Range -128 to 127
| Int16Field | Int16 | int | Range -32768 to 32767
| Int32Field | Int32 | int | Range -2147483648 to 2147483647

View File

@ -94,6 +94,7 @@
* [BaseIntField](class_reference.md#baseintfield)
* [DateField](class_reference.md#datefield)
* [DateTimeField](class_reference.md#datetimefield)
* [DateTime64Field](class_reference.md#datetime64field)
* [Decimal128Field](class_reference.md#decimal128field)
* [Decimal32Field](class_reference.md#decimal32field)
* [Decimal64Field](class_reference.md#decimal64field)

View File

@ -1,11 +1,13 @@
from __future__ import unicode_literals
import datetime
from typing import List, Union
import iso8601
import pytz
from calendar import timegm
from decimal import Decimal, localcontext
from uuid import UUID
from logging import getLogger
from pytz import BaseTzInfo
from .utils import escape, parse_array, comma_join, string_or_func, get_subclass_names
from .funcs import F, FunctionOperatorsMixin
from ipaddress import IPv4Address, IPv6Address
@ -86,10 +88,17 @@ class Field(FunctionOperatorsMixin):
- `db`: Database, used for checking supported features.
'''
sql = self.db_type
args = self.get_db_type_args()
if args:
sql += '(%s)' % ', '.join(args)
if with_default_expression:
sql += self._extra_params(db)
return sql
def get_db_type_args(self) -> List[str]:
"""Returns field type arguments"""
return []
def _extra_params(self, db):
sql = ''
if self.alias:
@ -187,9 +196,23 @@ class DateTimeField(Field):
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
db_type = 'DateTime'
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None,
timezone: Union[BaseTzInfo, str] = None):
super().__init__(default, alias, materialized, readonly, codec)
# assert not timezone, 'Temporarily field timezone is not supported'
if timezone:
timezone = timezone if isinstance(timezone, BaseTzInfo) else pytz.timezone(timezone)
self.timezone: BaseTzInfo = timezone
def get_db_type_args(self) -> List[str]:
args = []
if self.timezone:
args.append(escape(self.timezone.zone))
return args
def to_python(self, value, timezone_in_use):
if isinstance(value, datetime.datetime):
return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc)
return value if value.tzinfo else value.replace(tzinfo=pytz.utc)
if isinstance(value, datetime.date):
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
if isinstance(value, int):
@ -212,13 +235,60 @@ class DateTimeField(Field):
# convert naive to aware
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
dt = timezone_in_use.localize(dt)
return dt.astimezone(pytz.utc)
return dt
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True):
return escape('%010d' % timegm(value.utctimetuple()), quote)
class DateTime64Field(DateTimeField):
db_type = 'DateTime64'
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None,
timezone: Union[BaseTzInfo, str] = None, precision: int = 6):
super().__init__(default, alias, materialized, readonly, codec, timezone)
assert precision is None or isinstance(precision, int), 'Precision must be int type'
self.precision = precision
def get_db_type_args(self) -> List[str]:
args = [str(self.precision)]
if self.timezone:
args.append(escape(self.timezone.zone))
return args
def to_db_string(self, value: datetime.datetime, quote=True):
"""
Returns the field's value prepared for writing to the database
Returns string in 0000000000.000000 format, where remainder digits count is equal to precision
"""
return escape(
'{timestamp:0{width}.{precision}f}'.format(
timestamp=value.timestamp(),
width=11 + self.precision,
precision=6),
quote
)
def to_python(self, value, timezone_in_use):
try:
return super().to_python(value, timezone_in_use)
except ValueError:
if isinstance(value, (int, float)):
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
if isinstance(value, str):
if value.split('.')[0] == '0000-00-00 00:00:00':
return self.class_default
if len(value.split('.')[0]) == 10:
try:
value = float(value)
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
except ValueError:
pass
raise
class BaseIntField(Field):
'''
Abstract base class for all integer-type fields.

View File

@ -732,6 +732,11 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def toDateTime(x):
return F('toDateTime', x)
@staticmethod
@type_conversion
def toDateTime64(x, precision, timezone=NO_VALUE):
return F('toDateTime64', x, precision, timezone)
@staticmethod
def toString(x):
return F('toString', x)

View File

@ -7,7 +7,7 @@ from logging import getLogger
import pytz
from .fields import Field, StringField
from .utils import parse_tsv, NO_VALUE, get_subclass_names, arg_to_sql
from .utils import parse_tsv, NO_VALUE, get_subclass_names, arg_to_sql, unescape
from .query import QuerySet
from .funcs import F
from .engines import Merge, Distributed
@ -206,8 +206,17 @@ class ModelBase(type):
return orm_fields.BaseEnumField.create_ad_hoc_field(db_type)
# DateTime with timezone
if db_type.startswith('DateTime('):
# Some functions return DateTimeField with timezone in brackets
return orm_fields.DateTimeField()
timezone = db_type[9:-1]
return orm_fields.DateTimeField(
timezone=timezone[1:-1] if timezone else None
)
# DateTime64
if db_type.startswith('DateTime64('):
precision, *timezone = [s.strip() for s in db_type[11:-1].split(',')]
return orm_fields.DateTime64Field(
precision=int(precision),
timezone=timezone[0][1:-1] if timezone else None
)
# Arrays
if db_type.startswith('Array'):
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
@ -375,14 +384,15 @@ class Model(metaclass=ModelBase):
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes. Some fields use their own timezones.
- `database`: if given, sets the database that this instance belongs to.
'''
values = iter(parse_tsv(line))
kwargs = {}
for name in field_names:
field = getattr(cls, name)
kwargs[name] = field.to_python(next(values), timezone_in_use)
field_timezone = getattr(field, 'timezone', None) or timezone_in_use
kwargs[name] = field.to_python(next(values), field_timezone)
obj = cls(**kwargs)
if database is not None:

View File

@ -20,8 +20,17 @@ class DateFieldsTest(unittest.TestCase):
def test_ad_hoc_model(self):
self.database.insert([
ModelWithDate(date_field='2016-08-30', datetime_field='2016-08-30 03:50:00'),
ModelWithDate(date_field='2016-08-31', datetime_field='2016-08-31 01:30:00')
ModelWithDate(
date_field='2016-08-30',
datetime_field='2016-08-30 03:50:00',
datetime64_field='2016-08-30 03:50:00.123456',
datetime64_3_field='2016-08-30 03:50:00.123456'
),
ModelWithDate(
date_field='2016-08-31',
datetime_field='2016-08-31 01:30:00',
datetime64_field='2016-08-31 01:30:00.123456',
datetime64_3_field='2016-08-31 01:30:00.123456')
])
# toStartOfHour returns DateTime('Asia/Yekaterinburg') in my case, so I test it here to
@ -35,10 +44,72 @@ class DateFieldsTest(unittest.TestCase):
self.assertEqual(results[1].datetime_field, datetime.datetime(2016, 8, 31, 1, 30, 0, tzinfo=pytz.UTC))
self.assertEqual(results[1].hour_start, datetime.datetime(2016, 8, 31, 1, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime64_field, datetime.datetime(2016, 8, 30, 3, 50, 0, 123456, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime64_3_field, datetime.datetime(2016, 8, 30, 3, 50, 0, 123000,
tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime64_field, datetime.datetime(2016, 8, 31, 1, 30, 0, 123456, tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime64_3_field, datetime.datetime(2016, 8, 31, 1, 30, 0, 123000,
tzinfo=pytz.UTC))
class ModelWithDate(Model):
date_field = DateField()
datetime_field = DateTimeField()
datetime64_field = DateTime64Field()
datetime64_3_field = DateTime64Field(precision=3)
engine = MergeTree('date_field', ('date_field',))
class ModelWithTz(Model):
datetime_no_tz_field = DateTimeField() # server tz
datetime_tz_field = DateTimeField(timezone='Europe/Madrid')
datetime64_tz_field = DateTime64Field(timezone='Europe/Madrid')
datetime_utc_field = DateTimeField(timezone=pytz.UTC)
engine = MergeTree('datetime_no_tz_field', ('datetime_no_tz_field',))
class DateTimeFieldWithTzTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db', log_statements=True)
self.database.create_table(ModelWithTz)
def tearDown(self):
self.database.drop_database()
def test_ad_hoc_model(self):
self.database.insert([
ModelWithTz(
datetime_no_tz_field='2020-06-11 04:00:00',
datetime_tz_field='2020-06-11 04:00:00',
datetime64_tz_field='2020-06-11 04:00:00',
datetime_utc_field='2020-06-11 04:00:00',
),
ModelWithTz(
datetime_no_tz_field='2020-06-11 07:00:00+0300',
datetime_tz_field='2020-06-11 07:00:00+0300',
datetime64_tz_field='2020-06-11 07:00:00+0300',
datetime_utc_field='2020-06-11 07:00:00+0300',
),
])
query = 'SELECT * from $db.modelwithtz ORDER BY datetime_no_tz_field'
results = list(self.database.select(query))
self.assertEqual(results[0].datetime_no_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime64_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime_utc_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime_no_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime64_tz_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[1].datetime_utc_field, datetime.datetime(2020, 6, 11, 4, 0, 0, tzinfo=pytz.UTC))
self.assertEqual(results[0].datetime_no_tz_field.tzinfo.zone, self.database.server_timezone.zone)
self.assertEqual(results[0].datetime_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
self.assertEqual(results[0].datetime64_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
self.assertEqual(results[0].datetime_utc_field.tzinfo.zone, pytz.timezone('UTC').zone)
self.assertEqual(results[1].datetime_no_tz_field.tzinfo.zone, self.database.server_timezone.zone)
self.assertEqual(results[1].datetime_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
self.assertEqual(results[1].datetime64_tz_field.tzinfo.zone, pytz.timezone('Europe/Madrid').zone)
self.assertEqual(results[1].datetime_utc_field.tzinfo.zone, pytz.timezone('UTC').zone)

View File

@ -351,6 +351,7 @@ class FuncsTestCase(TestCaseWithData):
if self.database.server_timezone != pytz.utc:
raise unittest.SkipTest('This test must run with UTC as the server timezone')
self._test_func(F.toDateTime('2018-12-31 11:22:33'), datetime(2018, 12, 31, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toDateTime64('2018-12-31 11:22:33.001', 6), datetime(2018, 12, 31, 11, 22, 33, 1000, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffort('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffortOrNull('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))
self._test_func(F.parseDateTimeBestEffortOrZero('31/12/2019 10:05AM'), datetime(2019, 12, 31, 10, 5, tzinfo=pytz.utc))

View File

@ -38,7 +38,7 @@ class NullableFieldsTest(unittest.TestCase):
if value == '\\N':
self.assertIsNone(dt)
else:
self.assertEqual(dt.tzinfo, pytz.utc)
self.assertTrue(dt.tzinfo)
# Verify that conversion to and from db string does not change value
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
self.assertEqual(dt, dt2)

View File

@ -6,18 +6,21 @@ import pytz
class SimpleFieldsTest(unittest.TestCase):
def test_datetime_field(self):
f = DateTimeField()
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
# Valid values
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
dates = [
date(1970, 1, 1), datetime(1970, 1, 1), epoch,
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
'1970-01-01 00:00:00', '1970-01-17 00:00:17', '0000-00-00 00:00:00', 0,
'2017-07-26T08:31:05', '2017-07-26T08:31:05Z', '2017-07-26 08:31',
'2017-07-26T13:31:05+05', '2017-07-26 13:31:05+0500'):
'2017-07-26T13:31:05+05', '2017-07-26 13:31:05+0500'
]
def test_datetime_field(self):
f = DateTimeField()
for value in self.dates:
dt = f.to_python(value, pytz.utc)
self.assertEqual(dt.tzinfo, pytz.utc)
self.assertTrue(dt.tzinfo)
# Verify that conversion to and from db string does not change value
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
self.assertEqual(dt, dt2)
@ -27,6 +30,28 @@ class SimpleFieldsTest(unittest.TestCase):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
def test_datetime64_field(self):
f = DateTime64Field()
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
# Valid values
for value in self.dates + [
datetime(1970, 1, 1, microsecond=100000),
datetime(1970, 1, 1, microsecond=100000).astimezone(pytz.timezone('US/Eastern')),
'1970-01-01 00:00:00.1', '1970-01-17 00:00:17.1', '0000-00-00 00:00:00.1', 0.1,
'2017-07-26T08:31:05.1', '2017-07-26T08:31:05.1Z', '2017-07-26 08:31.1',
'2017-07-26T13:31:05.1+05', '2017-07-26 13:31:05.1+0500'
]:
dt = f.to_python(value, pytz.utc)
self.assertTrue(dt.tzinfo)
# Verify that conversion to and from db string does not change value
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
self.assertEqual(dt, dt2)
# Invalid values
for value in ('nope', '21/7/1999',
'2017-01 15:06:00', '2017-01-01X15:06:00', '2017-13-01T15:06:00'):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
def test_date_field(self):
f = DateField()
epoch = date(1970, 1, 1)