mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-07-25 15:29:50 +03:00
Merge branch 'develop' of https://github.com/Infinidat/infi.clickhouse_orm into develop
# Conflicts: # README.rst # src/infi/clickhouse_orm/models.py
This commit is contained in:
commit
463ac2b786
27
CHANGELOG.rst
Normal file
27
CHANGELOG.rst
Normal file
|
@ -0,0 +1,27 @@
|
|||
Change Log
|
||||
==========
|
||||
|
||||
[Unreleased]
|
||||
------------
|
||||
- Always keep datetime fields in UTC internally, and convert server timezone to UTC when parsing query results
|
||||
- Support for ALIAS and MATERIALIZED fields (M1ha)
|
||||
- Pagination: passing -1 as the page number now returns the last page
|
||||
- Accept datetime values for date fields (Zloool)
|
||||
- Support readonly mode in Database class (tswr)
|
||||
|
||||
v0.7.1
|
||||
------
|
||||
- Accept '0000-00-00 00:00:00' as a datetime value (tsionyx)
|
||||
- Bug fix: parse_array fails on int arrays
|
||||
- Improve performance when inserting many rows
|
||||
|
||||
v0.7.0
|
||||
------
|
||||
- Support array fields
|
||||
- Support enum fields
|
||||
|
||||
v0.6.3
|
||||
------
|
||||
- Python 3 support
|
||||
|
||||
|
48
README.rst
48
README.rst
|
@ -31,8 +31,8 @@ Models are defined in a way reminiscent of Django's ORM::
|
|||
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
|
||||
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
|
||||
It is always possible to pass alias or materialized parameters. See below for usage examples.
|
||||
Only one of default, alias and materialized parameters can be provided
|
||||
Alternatively it is possible to pass alias or materialized parameters (see below for usage examples).
|
||||
Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided.
|
||||
|
||||
See below for the supported field types and table engines.
|
||||
|
||||
|
@ -92,6 +92,11 @@ Using the ``Database`` instance you can create a table for your model, and inser
|
|||
|
||||
The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class.
|
||||
|
||||
Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot
|
||||
modify data or schemas::
|
||||
|
||||
db = Database('my_test_db', readonly=True)
|
||||
|
||||
Reading from the Database
|
||||
-------------------------
|
||||
|
||||
|
@ -256,7 +261,25 @@ Float64Field Float64 float
|
|||
Enum8Field Enum8 Enum See below
|
||||
Enum16Field Enum16 Enum See below
|
||||
ArrayField Array list See below
|
||||
=================== ========== ================= ===================================================
|
||||
=================== ======== ================= ===================================================
|
||||
|
||||
DateTimeField and Time Zones
|
||||
****************************
|
||||
|
||||
A ``DateTimeField`` can be assigned values from one of the following types:
|
||||
|
||||
- datetime
|
||||
- date
|
||||
- integer - number of seconds since the Unix epoch
|
||||
- string in ``YYYY-MM-DD HH:MM:SS`` format
|
||||
|
||||
The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned
|
||||
value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
|
||||
|
||||
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the
|
||||
timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new
|
||||
enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend
|
||||
settings the server timezone to UTC in order to prevent confusion.
|
||||
|
||||
Working with enum fields
|
||||
************************
|
||||
|
@ -301,15 +324,12 @@ You can create array fields containing any data type, for example::
|
|||
Working with materialized and alias fields
|
||||
******************************************
|
||||
|
||||
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
|
||||
|
||||
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS fields.
|
||||
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`_.
|
||||
|
||||
Both field types can't be inserted into database directly.
|
||||
These field values are ignored, when using database.insert() method.
|
||||
These fields are set to default values if you use database.select('SELECT * FROM mymodel', model_class=MyModel),
|
||||
because ClickHouse doesn't return them.
|
||||
Nevertheless, attribute values (as well as defaults) can be set for model object from python.
|
||||
Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method.
|
||||
ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field
|
||||
names explicitly in the query.
|
||||
|
||||
Usage::
|
||||
|
||||
|
@ -327,7 +347,7 @@ Usage::
|
|||
db.insert([obj])
|
||||
# All values will be retrieved from database
|
||||
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
|
||||
# created_date, username will contain default value
|
||||
# created_date and username will contain a default value
|
||||
db.select('SELECT * FROM $db.event', model_class=Event)
|
||||
|
||||
|
||||
|
@ -373,4 +393,8 @@ After cloning the project, run the following commands::
|
|||
|
||||
To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run::
|
||||
|
||||
bin/nosetests
|
||||
bin/nosetests
|
||||
|
||||
To see test coverage information run::
|
||||
|
||||
bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm
|
||||
|
|
|
@ -4,9 +4,12 @@ from .models import ModelBase
|
|||
from .utils import escape, parse_tsv, import_submodules
|
||||
from math import ceil
|
||||
import datetime
|
||||
import logging
|
||||
from string import Template
|
||||
from six import PY3, string_types
|
||||
import pytz
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('clickhouse_orm')
|
||||
|
||||
|
||||
Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size')
|
||||
|
@ -26,6 +29,7 @@ class Database(object):
|
|||
self.readonly = readonly
|
||||
if not self.readonly:
|
||||
self.create_database()
|
||||
self.server_timezone = self._get_server_timezone()
|
||||
|
||||
def create_database(self):
|
||||
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
|
||||
|
@ -90,7 +94,7 @@ class Database(object):
|
|||
field_types = parse_tsv(next(lines))
|
||||
model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types))
|
||||
for line in lines:
|
||||
yield model_class.from_tsv(line, field_names)
|
||||
yield model_class.from_tsv(line, field_names, self.server_timezone)
|
||||
|
||||
def raw(self, query, settings=None, stream=False):
|
||||
"""
|
||||
|
@ -161,6 +165,8 @@ class Database(object):
|
|||
params['user'] = self.username
|
||||
if self.password:
|
||||
params['password'] = self.password
|
||||
if self.readonly:
|
||||
params['readonly'] = '1'
|
||||
return params
|
||||
|
||||
def _substitute(self, query, model_class=None):
|
||||
|
@ -173,3 +179,11 @@ class Database(object):
|
|||
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
|
||||
query = Template(query).substitute(mapping)
|
||||
return query
|
||||
|
||||
def _get_server_timezone(self):
|
||||
try:
|
||||
r = self._send('SELECT timezone()')
|
||||
return pytz.timezone(r.text.strip())
|
||||
except DatabaseException:
|
||||
logger.exception('Cannot determine server timezone, assuming UTC')
|
||||
return pytz.utc
|
||||
|
|
|
@ -2,6 +2,7 @@ from six import string_types, text_type, binary_type
|
|||
import datetime
|
||||
import pytz
|
||||
import time
|
||||
from calendar import timegm
|
||||
|
||||
from .utils import escape, parse_array
|
||||
|
||||
|
@ -24,10 +25,11 @@ class Field(object):
|
|||
self.alias = alias
|
||||
self.materialized = materialized
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
'''
|
||||
Converts the input value into the expected Python data type, raising ValueError if the
|
||||
data can't be converted. Returns the converted value. Subclasses should override this.
|
||||
The timezone_in_use parameter should be consulted when parsing datetime fields.
|
||||
'''
|
||||
return value
|
||||
|
||||
|
@ -78,7 +80,7 @@ class StringField(Field):
|
|||
class_default = ''
|
||||
db_type = 'String'
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
if isinstance(value, text_type):
|
||||
return value
|
||||
if isinstance(value, binary_type):
|
||||
|
@ -93,11 +95,11 @@ class DateField(Field):
|
|||
class_default = min_value
|
||||
db_type = 'Date'
|
||||
|
||||
def to_python(self, value):
|
||||
if isinstance(value, datetime.date):
|
||||
return value
|
||||
def to_python(self, value, timezone_in_use):
|
||||
if isinstance(value, datetime.datetime):
|
||||
return value.date()
|
||||
if isinstance(value, datetime.date):
|
||||
return value
|
||||
if isinstance(value, int):
|
||||
return DateField.class_default + datetime.timedelta(days=value)
|
||||
if isinstance(value, string_types):
|
||||
|
@ -118,26 +120,27 @@ class DateTimeField(Field):
|
|||
class_default = datetime.datetime.fromtimestamp(0, pytz.utc)
|
||||
db_type = 'DateTime'
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
if isinstance(value, datetime.datetime):
|
||||
return value
|
||||
return value.astimezone(pytz.utc) if value.tzinfo else value.replace(tzinfo=pytz.utc)
|
||||
if isinstance(value, datetime.date):
|
||||
return datetime.datetime(value.year, value.month, value.day)
|
||||
return datetime.datetime(value.year, value.month, value.day, tzinfo=pytz.utc)
|
||||
if isinstance(value, int):
|
||||
return datetime.datetime.fromtimestamp(value, pytz.utc)
|
||||
return datetime.datetime.utcfromtimestamp(value).replace(tzinfo=pytz.utc)
|
||||
if isinstance(value, string_types):
|
||||
if value == '0000-00-00 00:00:00':
|
||||
return self.class_default
|
||||
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
||||
dt = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
||||
return timezone_in_use.localize(dt).astimezone(pytz.utc)
|
||||
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
|
||||
|
||||
def to_db_string(self, value, quote=True):
|
||||
return escape(int(time.mktime(value.timetuple())), quote)
|
||||
return escape(timegm(value.utctimetuple()), quote)
|
||||
|
||||
|
||||
class BaseIntField(Field):
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
try:
|
||||
return int(value)
|
||||
except:
|
||||
|
@ -205,7 +208,7 @@ class Int64Field(BaseIntField):
|
|||
|
||||
class BaseFloatField(Field):
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
try:
|
||||
return float(value)
|
||||
except:
|
||||
|
@ -230,7 +233,7 @@ class BaseEnumField(Field):
|
|||
default = list(enum_cls)[0]
|
||||
super(BaseEnumField, self).__init__(default, alias, materialized)
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
if isinstance(value, self.enum_cls):
|
||||
return value
|
||||
try:
|
||||
|
@ -292,14 +295,14 @@ class ArrayField(Field):
|
|||
self.inner_field = inner_field
|
||||
super(ArrayField, self).__init__(default, alias, materialized)
|
||||
|
||||
def to_python(self, value):
|
||||
def to_python(self, value, timezone_in_use):
|
||||
if isinstance(value, text_type):
|
||||
value = parse_array(value)
|
||||
elif isinstance(value, binary_type):
|
||||
value = parse_array(value.decode('UTF-8'))
|
||||
elif not isinstance(value, (list, tuple)):
|
||||
raise ValueError('ArrayField expects list or tuple, not %s' % type(value))
|
||||
return [self.inner_field.to_python(v) for v in value]
|
||||
return [self.inner_field.to_python(v, timezone_in_use) for v in value]
|
||||
|
||||
def validate(self, value):
|
||||
for v in value:
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from logging import getLogger
|
||||
|
||||
from six import with_metaclass
|
||||
import pytz
|
||||
|
||||
from .fields import Field
|
||||
from .utils import parse_tsv
|
||||
|
@ -97,7 +98,7 @@ class Model(with_metaclass(ModelBase)):
|
|||
'''
|
||||
field = self.get_field(name)
|
||||
if field:
|
||||
value = field.to_python(value)
|
||||
value = field.to_python(value, pytz.utc)
|
||||
field.validate(value)
|
||||
super(Model, self).__setattr__(name, value)
|
||||
|
||||
|
@ -137,7 +138,7 @@ class Model(with_metaclass(ModelBase)):
|
|||
return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name())
|
||||
|
||||
@classmethod
|
||||
def from_tsv(cls, line, field_names=None):
|
||||
def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc):
|
||||
'''
|
||||
Create a model instance from a tab-separated line. The line may or may not include a newline.
|
||||
The field_names list must match the fields defined in the model, but does not have to include all of them.
|
||||
|
@ -148,7 +149,8 @@ class Model(with_metaclass(ModelBase)):
|
|||
values = iter(parse_tsv(line))
|
||||
kwargs = {}
|
||||
for name in field_names:
|
||||
kwargs[name] = next(values)
|
||||
field = getattr(cls, name)
|
||||
kwargs[name] = field.to_python(next(values), timezone_in_use)
|
||||
return cls(**kwargs)
|
||||
|
||||
def to_tsv(self, insertable_only=False):
|
||||
|
@ -157,8 +159,9 @@ class Model(with_metaclass(ModelBase)):
|
|||
:param bool insertable_only: If True, returns only fields, that can be inserted into database
|
||||
'''
|
||||
data = self.__dict__
|
||||
|
||||
fields = [f for f in self._fields if not f[1].readonly] if insertable_only else self._fields
|
||||
fields = self._fields
|
||||
if insertable_only:
|
||||
fields = [f for f in fields if not f[1].readonly]
|
||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
|
||||
|
||||
def to_dict(self, insertable_only=False, field_names=None):
|
||||
|
|
|
@ -117,6 +117,18 @@ class DatabaseTestCase(unittest.TestCase):
|
|||
p = list(self.database.select("SELECT * from $table", Person))[0]
|
||||
self.assertEquals(p.first_name, s)
|
||||
|
||||
def test_readonly(self):
|
||||
orig_database = self.database
|
||||
self.database = Database(orig_database.db_name, readonly=True)
|
||||
with self.assertRaises(DatabaseException):
|
||||
self._insert_and_check(self._sample_data(), len(data))
|
||||
self.assertEquals(self.database.count(Person), 0)
|
||||
with self.assertRaises(DatabaseException):
|
||||
self.database.drop_table(Person)
|
||||
with self.assertRaises(DatabaseException):
|
||||
self.database.drop_database()
|
||||
self.database = orig_database
|
||||
|
||||
def _sample_data(self):
|
||||
for entry in data:
|
||||
yield Person(**entry)
|
||||
|
|
53
tests/test_simple_fields.py
Normal file
53
tests/test_simple_fields.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import unittest
|
||||
from infi.clickhouse_orm.fields import *
|
||||
from datetime import date, datetime
|
||||
import pytz
|
||||
|
||||
|
||||
class SimpleFieldsTest(unittest.TestCase):
|
||||
|
||||
def test_date_field(self):
|
||||
f = DateField()
|
||||
# Valid values
|
||||
for value in (date(1970, 1, 1), datetime(1970, 1, 1), '1970-01-01', '0000-00-00', 0):
|
||||
self.assertEquals(f.to_python(value, pytz.utc), date(1970, 1, 1))
|
||||
# Invalid values
|
||||
for value in ('nope', '21/7/1999', 0.5):
|
||||
with self.assertRaises(ValueError):
|
||||
f.to_python(value, pytz.utc)
|
||||
# Range check
|
||||
for value in (date(1900, 1, 1), date(2900, 1, 1)):
|
||||
with self.assertRaises(ValueError):
|
||||
f.validate(value)
|
||||
|
||||
def test_datetime_field(self):
|
||||
f = DateTimeField()
|
||||
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
|
||||
# Valid values
|
||||
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
|
||||
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
|
||||
'1970-01-01 00:00:00', '0000-00-00 00:00:00', 0):
|
||||
dt = f.to_python(value, pytz.utc)
|
||||
self.assertEquals(dt.tzinfo, pytz.utc)
|
||||
self.assertEquals(dt, epoch)
|
||||
# Verify that conversion to and from db string does not change value
|
||||
dt2 = f.to_python(int(f.to_db_string(dt)), pytz.utc)
|
||||
self.assertEquals(dt, dt2)
|
||||
# Invalid values
|
||||
for value in ('nope', '21/7/1999', 0.5):
|
||||
with self.assertRaises(ValueError):
|
||||
f.to_python(value, pytz.utc)
|
||||
|
||||
def test_uint8_field(self):
|
||||
f = UInt8Field()
|
||||
# Valid values
|
||||
for value in (17, '17', 17.0):
|
||||
self.assertEquals(f.to_python(value, pytz.utc), 17)
|
||||
# Invalid values
|
||||
for value in ('nope', date.today()):
|
||||
with self.assertRaises(ValueError):
|
||||
f.to_python(value, pytz.utc)
|
||||
# Range check
|
||||
for value in (-1, 1000):
|
||||
with self.assertRaises(ValueError):
|
||||
f.validate(value)
|
Loading…
Reference in New Issue
Block a user