Finished Release v1.2.0

This commit is contained in:
Itai Shirav 2019-07-13 10:46:16 +03:00
commit 4749918014
14 changed files with 371 additions and 84 deletions

View File

@ -1,6 +1,11 @@
Change Log
==========
v1.2.0
------
- Add support for per-field compression codecs (rbelio, Chocorean)
- Add support for low cardinality fields (rbelio)
v1.1.0
------
- Add PREWHERE support to querysets (M1hacka)

View File

@ -10,7 +10,7 @@ infi.clickhouse_orm.database
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True, timeout=60, verify_ssl_cert=True)
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True, timeout=60, verify_ssl_cert=True, log_statements=False)
Initializes a database instance. Unless it's readonly, the database will be
@ -24,6 +24,7 @@ created on the ClickHouse server if it does not already exist.
- `autocreate`: automatically create the database if it does not exist (unless in readonly mode).
- `timeout`: the connection timeout in seconds.
- `verify_ssl_cert`: whether to verify the server's certificate when connecting via HTTPS.
- `log_statements`: when True, all database statements are logged.
#### add_setting(name, value)
@ -510,7 +511,7 @@ infi.clickhouse_orm.fields
Extends Field
#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None)
#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None)
### BaseEnumField
@ -520,7 +521,7 @@ Extends Field
Abstract base class for all enum-type fields.
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None)
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None, codec=None)
### BaseFloatField
@ -530,7 +531,7 @@ Extends Field
Abstract base class for all float-type fields.
#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None)
#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None, codec=None)
### BaseIntField
@ -540,21 +541,21 @@ Extends Field
Abstract base class for all integer-type fields.
#### BaseIntField(default=None, alias=None, materialized=None, readonly=None)
#### BaseIntField(default=None, alias=None, materialized=None, readonly=None, codec=None)
### DateField
Extends Field
#### DateField(default=None, alias=None, materialized=None, readonly=None)
#### DateField(default=None, alias=None, materialized=None, readonly=None, codec=None)
### DateTimeField
Extends Field
#### DateTimeField(default=None, alias=None, materialized=None, readonly=None)
#### DateTimeField(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Decimal128Field
@ -592,14 +593,14 @@ Base class for all decimal fields. Can also be used directly.
Extends BaseEnumField
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None)
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None, codec=None)
### Enum8Field
Extends BaseEnumField
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None)
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None, codec=None)
### Field
@ -607,7 +608,7 @@ Extends BaseEnumField
Abstract base class for all field types.
#### Field(default=None, alias=None, materialized=None, readonly=None)
#### Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### FixedStringField
@ -621,91 +622,98 @@ Extends StringField
Extends BaseFloatField
#### Float32Field(default=None, alias=None, materialized=None, readonly=None)
#### Float32Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Float64Field
Extends BaseFloatField
#### Float64Field(default=None, alias=None, materialized=None, readonly=None)
#### Float64Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Int16Field
Extends BaseIntField
#### Int16Field(default=None, alias=None, materialized=None, readonly=None)
#### Int16Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Int32Field
Extends BaseIntField
#### Int32Field(default=None, alias=None, materialized=None, readonly=None)
#### Int32Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Int64Field
Extends BaseIntField
#### Int64Field(default=None, alias=None, materialized=None, readonly=None)
#### Int64Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### Int8Field
Extends BaseIntField
#### Int8Field(default=None, alias=None, materialized=None, readonly=None)
#### Int8Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### LowCardinalityField
Extends Field
#### LowCardinalityField(inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None)
### NullableField
Extends Field
#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None)
#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None, codec=None)
### StringField
Extends Field
#### StringField(default=None, alias=None, materialized=None, readonly=None)
#### StringField(default=None, alias=None, materialized=None, readonly=None, codec=None)
### UInt16Field
Extends BaseIntField
#### UInt16Field(default=None, alias=None, materialized=None, readonly=None)
#### UInt16Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### UInt32Field
Extends BaseIntField
#### UInt32Field(default=None, alias=None, materialized=None, readonly=None)
#### UInt32Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### UInt64Field
Extends BaseIntField
#### UInt64Field(default=None, alias=None, materialized=None, readonly=None)
#### UInt64Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### UInt8Field
Extends BaseIntField
#### UInt8Field(default=None, alias=None, materialized=None, readonly=None)
#### UInt8Field(default=None, alias=None, materialized=None, readonly=None, codec=None)
### UUIDField
Extends Field
#### UUIDField(default=None, alias=None, materialized=None, readonly=None)
#### UUIDField(default=None, alias=None, materialized=None, readonly=None, codec=None)
infi.clickhouse_orm.engines

View File

@ -121,8 +121,7 @@ db.select('SELECT * FROM $db.event', model_class=Event)
Working with nullable fields
----------------------------
From [some time](https://github.com/yandex/ClickHouse/pull/70) ClickHouse provides a NULL value support.
Also see some information [here](https://github.com/yandex/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00395_nullable.sql).
[ClickHouse provides a NULL value support](https://clickhouse.yandex/docs/en/data_types/nullable).
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
@ -148,6 +147,79 @@ to `None`.
NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable.
NOTE: Using `Nullable` almost always negatively affects performance, keep this in mind when designing your databases.
Working with field compression codecs
-------------------------------------
Besides default data compression, defined in server settings, per-field specification is also available.
Supported compression algorithms:
| Codec | Argument | Comment
| -------------------- | -------------------------------------------| ----------------------------------------------------
| NONE | None | No compression.
| LZ4 | None | LZ4 compression.
| LZ4HC(`level`) | Possible `level` range: [3, 12]. | Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9].
| ZSTD(`level`) | Possible `level`range: [1, 22]. | Default value: 1. Greater values stands for better compression and higher CPU usage. Levels >= 20, should be used with caution, as they require more memory.
| Delta(`delta_bytes`) | Possible `delta_bytes` range: 1, 2, 4 , 8. | Default value for `delta_bytes` is `sizeof(type)` if it is equal to 1, 2,4 or 8 and equals to 1 otherwise.
Codecs can be combined in a pipeline. Default table codec is not included into pipeline (if it should be applied to a field, you have to specify it explicitly in pipeline).
Recommended usage for codecs:
- Usually, values for particular metric, stored in path does not differ significantly from point to point. Using delta-encoding allows to reduce disk space usage significantly.
- DateTime works great with pipeline of Delta, ZSTD and the column size can be compressed to 2-3% of its original size (given a smooth datetime data)
- Numeric types usually enjoy best compression rates with ZSTD
- String types enjoy good compression rates with LZ4HC
Usage:
```python
class Stats(models.Model):
id = fields.UInt64Field(codec='ZSTD(10)')
timestamp = fields.DateTimeField(codec='Delta,ZSTD')
timestamp_date = fields.DateField(codec='Delta(4),ZSTD(22)')
metadata_id = fields.Int64Field(codec='LZ4')
status = fields.StringField(codec='LZ4HC(10)')
calculation = fields.NullableField(fields.Float32Field(), codec='ZSTD')
alerts = fields.ArrayField(fields.FixedStringField(length=15), codec='Delta(2),LZ4HC')
engine = MergeTree('timestamp_date', ('id', 'timestamp'))
```
Note: This feature is supported on ClickHouse version 19.1.16 and above. Codec arguments will be ignored by the ORM for older versions of ClickHouse.
Working with LowCardinality fields
----------------------------------
Starting with version 19.0 ClickHouse offers a new type of field to improve the performance of queries
and compaction of columns for low entropy data.
[More specifically](https://github.com/yandex/ClickHouse/issues/4074) LowCardinality data type builds dictionaries automatically. It can use multiple different dictionaries if necessarily.
If the number of distinct values is pretty large, the dictionaries become local, several different dictionaries will be used for different ranges of data. For example, if you have too many distinct values in total, but only less than about a million values each day - then the queries by day will be processed efficiently, and queries for larger ranges will be processed rather efficiently.
LowCardinality works independently of (generic) fields compression.
LowCardinality fields are subsequently compressed as usual.
The compression ratios of LowCardinality fields for text data may be significantly better than without LowCardinality.
LowCardinality will give performance boost, in the form of processing speed, if the number of distinct values is less than a few millions. This is because data is processed in dictionary encoded form.
You can find further information about LowCardinality in [this presentation](https://github.com/yandex/clickhouse-presentations/blob/master/meetup19/string_optimization.pdf).
Usage example:
```python
class LowCardinalityModel(models.Model):
date = fields.DateField()
int32 = fields.LowCardinalityField(fields.Int32Field())
float32 = fields.LowCardinalityField(fields.Float32Field())
string = fields.LowCardinalityField(fields.StringField())
nullable = fields.LowCardinalityField(fields.NullableField(fields.StringField()))
array = fields.ArrayField(fields.LowCardinalityField(fields.UInt64Field()))
engine = MergeTree('date', ('date',))
```
Note: `LowCardinality` field with an inner array field is not supported. Use an `ArrayField` with a `LowCardinality` inner field as seen in the example.
Creating custom field types
---------------------------
Sometimes it is convenient to use data types that are supported in Python, but have no corresponding column type in ClickHouse. In these cases it is possible to define a custom field class that knows how to convert the Pythonic object to a suitable representation in the database, and vice versa.

View File

@ -36,6 +36,8 @@
* [Working with array fields](field_types.md#working-with-array-fields)
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
* [Working with field compression codecs](field_types.md#working-with-field-compression-codecs)
* [Working with LowCardinality fields](field_types.md#working-with-lowcardinality-fields)
* [Creating custom field types](field_types.md#creating-custom-field-types)
* [Table Engines](table_engines.md#table-engines)
@ -86,6 +88,7 @@
* [Int32Field](class_reference.md#int32field)
* [Int64Field](class_reference.md#int64field)
* [Int8Field](class_reference.md#int8field)
* [LowCardinalityField](class_reference.md#lowcardinalityfield)
* [NullableField](class_reference.md#nullablefield)
* [StringField](class_reference.md#stringfield)
* [UInt16Field](class_reference.md#uint16field)

View File

@ -120,6 +120,10 @@ class Database(object):
self.server_version = self._get_server_version()
# Versions 1.1.53981 and below don't have timezone function
self.server_timezone = self._get_server_timezone() if self.server_version > (1, 1, 53981) else pytz.utc
# Versions 19.1.16 and above support codec compression
self.has_codec_support = self.server_version >= (19, 1, 16)
# Version 19.0 and above support LowCardinality
self.has_low_cardinality_support = self.server_version >= (19, 0)
def create_database(self):
'''

View File

@ -40,7 +40,7 @@ class MergeTree(Engine):
assert date_col is None or isinstance(date_col, six.string_types), 'date_col must be string if present'
assert partition_key is None or type(partition_key) in (list, tuple),\
'partition_key must be tuple or list if present'
assert (replica_table_path is None) == (replica_name == None), \
assert (replica_table_path is None) == (replica_name is None), \
'both replica_table_path and replica_name must be specified'
# These values conflict with each other (old and new syntax of table engines.

View File

@ -3,13 +3,13 @@ from six import string_types, text_type, binary_type, integer_types
import datetime
import iso8601
import pytz
import time
from calendar import timegm
from decimal import Decimal, localcontext
from uuid import UUID
from logging import getLogger
from .utils import escape, parse_array, comma_join
logger = getLogger('clickhouse_orm')
class Field(object):
'''
@ -19,14 +19,16 @@ class Field(object):
class_default = 0
db_type = None
def __init__(self, default=None, alias=None, materialized=None, readonly=None):
def __init__(self, default=None, alias=None, materialized=None, readonly=None, codec=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, string_types) and alias != "",\
"Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
"Alias field must be a string, if given"
assert materialized is None or isinstance(materialized, string_types) and materialized != "",\
"Materialized field must be string, if given"
assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given"
assert codec is None or isinstance(codec, string_types) and codec != "", \
"Codec field must be string, if given"
self.creation_counter = Field.creation_counter
Field.creation_counter += 1
@ -34,6 +36,7 @@ class Field(object):
self.alias = alias
self.materialized = materialized
self.readonly = bool(self.alias or self.materialized or readonly)
self.codec = codec
def to_python(self, value, timezone_in_use):
'''
@ -64,22 +67,30 @@ class Field(object):
'''
return escape(value, quote)
def get_sql(self, with_default_expression=True):
def get_sql(self, with_default_expression=True, db=None):
'''
Returns an SQL expression describing the field (e.g. for CREATE TABLE).
:param with_default_expression: If True, adds default value to sql.
It doesn't affect fields with alias and materialized values.
:param db: Database, used for checking supported features.
'''
sql = self.db_type
if with_default_expression:
sql += self._extra_params(db)
return sql
def _extra_params(self, db):
sql = ''
if self.alias:
return '%s ALIAS %s' % (self.db_type, self.alias)
sql += ' ALIAS %s' % self.alias
elif self.materialized:
return '%s MATERIALIZED %s' % (self.db_type, self.materialized)
else:
sql += ' MATERIALIZED %s' % self.materialized
elif self.default:
default = self.to_db_string(self.default)
return '%s DEFAULT %s' % (self.db_type, default)
else:
return self.db_type
sql += ' DEFAULT %s' % default
if self.codec and db and db.has_codec_support:
sql += ' CODEC(%s)' % self.codec
return sql
def isinstance(self, types):
"""
@ -361,11 +372,11 @@ class BaseEnumField(Field):
Abstract base class for all enum-type fields.
'''
def __init__(self, enum_cls, default=None, alias=None, materialized=None, readonly=None):
def __init__(self, enum_cls, default=None, alias=None, materialized=None, readonly=None, codec=None):
self.enum_cls = enum_cls
if default is None:
default = list(enum_cls)[0]
super(BaseEnumField, self).__init__(default, alias, materialized, readonly)
super(BaseEnumField, self).__init__(default, alias, materialized, readonly, codec)
def to_python(self, value, timezone_in_use):
if isinstance(value, self.enum_cls):
@ -384,12 +395,14 @@ class BaseEnumField(Field):
def to_db_string(self, value, quote=True):
return escape(value.name, quote)
def get_sql(self, with_default_expression=True):
def get_sql(self, with_default_expression=True, db=None):
values = ['%s = %d' % (escape(item.name), item.value) for item in self.enum_cls]
sql = '%s(%s)' % (self.db_type, ' ,'.join(values))
if with_default_expression:
default = self.to_db_string(self.default)
sql = '%s DEFAULT %s' % (sql, default)
if self.codec and db and db.has_codec_support:
sql+= ' CODEC(%s)' % self.codec
return sql
@classmethod
@ -399,10 +412,7 @@ class BaseEnumField(Field):
this method returns a matching enum field.
'''
import re
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
from enum import Enum
members = {}
for match in re.finditer("'(\w+)' = (\d+)", db_type):
members[match.group(1)] = int(match.group(2))
@ -425,11 +435,11 @@ class ArrayField(Field):
class_default = []
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None):
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None):
assert isinstance(inner_field, Field), "The first argument of ArrayField must be a Field instance"
assert not isinstance(inner_field, ArrayField), "Multidimensional array fields are not supported by the ORM"
self.inner_field = inner_field
super(ArrayField, self).__init__(default, alias, materialized, readonly)
super(ArrayField, self).__init__(default, alias, materialized, readonly, codec)
def to_python(self, value, timezone_in_use):
if isinstance(value, text_type):
@ -448,9 +458,11 @@ class ArrayField(Field):
array = [self.inner_field.to_db_string(v, quote=True) for v in value]
return '[' + comma_join(array) + ']'
def get_sql(self, with_default_expression=True):
from .utils import escape
return 'Array(%s)' % self.inner_field.get_sql(with_default_expression=False)
def get_sql(self, with_default_expression=True, db=None):
sql = 'Array(%s)' % self.inner_field.get_sql(with_default_expression=False, db=db)
if with_default_expression and self.codec and db and db.has_codec_support:
sql+= ' CODEC(%s)' % self.codec
return sql
class UUIDField(Field):
@ -481,12 +493,12 @@ class NullableField(Field):
class_default = None
def __init__(self, inner_field, default=None, alias=None, materialized=None,
extra_null_values=None):
extra_null_values=None, codec=None):
self.inner_field = inner_field
self._null_values = [None]
if extra_null_values:
self._null_values.extend(extra_null_values)
super(NullableField, self).__init__(default, alias, materialized, readonly=None)
super(NullableField, self).__init__(default, alias, materialized, readonly=None, codec=codec)
def to_python(self, value, timezone_in_use):
if value == '\\N' or value in self._null_values:
@ -501,14 +513,38 @@ class NullableField(Field):
return '\\N'
return self.inner_field.to_db_string(value, quote=quote)
def get_sql(self, with_default_expression=True):
s = 'Nullable(%s)' % self.inner_field.get_sql(with_default_expression=False)
def get_sql(self, with_default_expression=True, db=None):
sql = 'Nullable(%s)' % self.inner_field.get_sql(with_default_expression=False, db=db)
if with_default_expression:
if self.alias:
s = '%s ALIAS %s' % (s, self.alias)
elif self.materialized:
s = '%s MATERIALIZED %s' % (s, self.materialized)
elif self.default:
default = self.to_db_string(self.default)
s = '%s DEFAULT %s' % (s, default)
return s
sql += self._extra_params(db)
return sql
class LowCardinalityField(Field):
def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None, codec=None):
assert isinstance(inner_field, Field), "The first argument of LowCardinalityField must be a Field instance. Not: {}".format(inner_field)
assert not isinstance(inner_field, LowCardinalityField), "LowCardinality inner fields are not supported by the ORM"
assert not isinstance(inner_field, ArrayField), "Array field inside LowCardinality are not supported by the ORM. Use Array(LowCardinality) instead"
self.inner_field = inner_field
self.class_default = self.inner_field.class_default
super(LowCardinalityField, self).__init__(default, alias, materialized, readonly, codec)
def to_python(self, value, timezone_in_use):
return self.inner_field.to_python(value, timezone_in_use)
def validate(self, value):
self.inner_field.validate(value)
def to_db_string(self, value, quote=True):
return self.inner_field.to_db_string(value, quote=quote)
def get_sql(self, with_default_expression=True, db=None):
if db and db.has_low_cardinality_support:
sql = 'LowCardinality(%s)' % self.inner_field.get_sql(with_default_expression=False)
else:
sql = self.inner_field.get_sql(with_default_expression=False)
logger.warning('LowCardinalityField not supported on clickhouse-server version < 19.0 using {} as fallback'.format(self.inner_field.__class__.__name__))
if with_default_expression:
sql += self._extra_params(db)
return sql

View File

@ -79,7 +79,7 @@ class AlterTable(Operation):
if name not in table_fields:
logger.info(' Add column %s', name)
assert prev_name, 'Cannot add a column to the beginning of the table'
cmd = 'ADD COLUMN %s %s' % (name, field.get_sql())
cmd = 'ADD COLUMN %s %s' % (name, field.get_sql(db=database))
if is_regular_field:
cmd += ' AFTER %s' % prev_name
self._alter_table(database, cmd)
@ -93,7 +93,7 @@ class AlterTable(Operation):
# The order of class attributes can be changed any time, so we can't count on it
# Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save
# attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47
model_fields = {name: field.get_sql(with_default_expression=False)
model_fields = {name: field.get_sql(with_default_expression=False, db=database)
for name, field in iteritems(self.model_class.fields())}
for field_name, field_sql in self._get_table_fields(database):
# All fields must have been created and dropped by this moment

View File

@ -190,7 +190,7 @@ class Model(with_metaclass(ModelBase)):
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
cols = []
for name, field in iteritems(cls.fields()):
cols.append(' %s %s' % (name, field.get_sql()))
cols.append(' %s %s' % (name, field.get_sql(db=db)))
parts.append(',\n'.join(cols))
parts.append(')')
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
@ -316,7 +316,7 @@ class MergeModel(Model):
cols = []
for name, field in iteritems(cls.fields()):
if name != '_table':
cols.append(' %s %s' % (name, field.get_sql()))
cols.append(' %s %s' % (name, field.get_sql(db=db)))
parts.append(',\n'.join(cols))
parts.append(')')
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))

View File

@ -0,0 +1,7 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.AlterTable(Model4_compressed),
migrations.AlterTable(Model2LowCardinality)
]

View File

@ -0,0 +1,123 @@
from __future__ import unicode_literals
import unittest
import datetime
import pytz
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.utils import parse_tsv
class CompressedFieldsTestCase(unittest.TestCase):
def setUp(self):
self.database = Database('test-db', log_statements=True)
self.database.create_table(CompressedModel)
def tearDown(self):
self.database.drop_database()
def test_defaults(self):
# Check that all fields have their explicit or implicit defaults
instance = CompressedModel()
self.database.insert([instance])
self.assertEqual(instance.date_field, datetime.date(1970, 1, 1))
self.assertEqual(instance.datetime_field, datetime.datetime(1970, 1, 1, tzinfo=pytz.utc))
self.assertEqual(instance.string_field, 'dozo')
self.assertEqual(instance.int64_field, 42)
self.assertEqual(instance.float_field, 0)
self.assertEqual(instance.nullable_field, None)
self.assertEqual(instance.array_field, [])
def test_assignment(self):
# Check that all fields are assigned during construction
kwargs = dict(
uint64_field=217,
date_field=datetime.date(1973, 12, 6),
datetime_field=datetime.datetime(2000, 5, 24, 10, 22, tzinfo=pytz.utc),
string_field='aloha',
int64_field=-50,
float_field=3.14,
nullable_field=-2.718281,
array_field=['123456789123456','','a']
)
instance = CompressedModel(**kwargs)
self.database.insert([instance])
for name, value in kwargs.items():
self.assertEqual(kwargs[name], getattr(instance, name))
def test_string_conversion(self):
# Check field conversion from string during construction
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', nullable_field=None, array_field='[a,b,c]')
self.assertEqual(instance.date_field, datetime.date(1973, 12, 6))
self.assertEqual(instance.int64_field, 100)
self.assertEqual(instance.float_field, 7)
self.assertEqual(instance.nullable_field, None)
self.assertEqual(instance.array_field, ['a', 'b', 'c'])
# Check field conversion from string during assignment
instance.int64_field = '99'
self.assertEqual(instance.int64_field, 99)
def test_to_dict(self):
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]')
self.assertDictEqual(instance.to_dict(), {
"date_field": datetime.date(1973, 12, 6),
"int64_field": 100,
"float_field": 7.0,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
"alias_field": 0.0,
'string_field': 'dozo',
'nullable_field': None,
'uint64_field': 0,
'array_field': ['a','b','c']
})
self.assertDictEqual(instance.to_dict(include_readonly=False), {
"date_field": datetime.date(1973, 12, 6),
"int64_field": 100,
"float_field": 7.0,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
'string_field': 'dozo',
'nullable_field': None,
'uint64_field': 0,
'array_field': ['a', 'b', 'c']
})
self.assertDictEqual(
instance.to_dict(include_readonly=False, field_names=('int64_field', 'alias_field', 'datetime_field')), {
"int64_field": 100,
"datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc)
})
# This test will fail on clickhouse version < 19.1.16, use skip test
def test_confirm_compression_codec(self):
instance = CompressedModel(date_field='1973-12-06', int64_field='100', float_field='7', array_field='[a,b,c]')
self.database.insert([instance])
r = self.database.raw("select name, compression_codec from system.columns where table = '{}' and database='{}' FORMAT TabSeparatedWithNamesAndTypes".format(instance.table_name(), self.database.db_name))
lines = r.splitlines()
field_names = parse_tsv(lines[0])
field_types = parse_tsv(lines[1])
data = [tuple(parse_tsv(line)) for line in lines[2:]]
self.assertListEqual(data, [('uint64_field', 'CODEC(ZSTD(10))'),
('datetime_field', 'CODEC(Delta(4), ZSTD(1))'),
('date_field', 'CODEC(Delta(4), ZSTD(22))'),
('int64_field', 'CODEC(LZ4)'),
('string_field', 'CODEC(LZ4HC(10))'),
('nullable_field', 'CODEC(ZSTD(1))'),
('array_field', 'CODEC(Delta(2), LZ4HC(0))'),
('float_field', 'CODEC(NONE)'),
('alias_field', 'CODEC(ZSTD(4))')])
class CompressedModel(Model):
uint64_field = UInt64Field(codec='ZSTD(10)')
datetime_field = DateTimeField(codec='Delta,ZSTD')
date_field = DateField(codec='Delta(4),ZSTD(22)')
int64_field = Int64Field(default=42, codec='LZ4')
string_field = StringField(default='dozo', codec='LZ4HC(10)')
nullable_field = NullableField(Float32Field(), codec='ZSTD')
array_field = ArrayField(FixedStringField(length=15), codec='Delta(2),LZ4HC')
float_field = Float32Field(codec='NONE')
alias_field = Float32Field(alias='float_field', codec='ZSTD(4)')
engine = MergeTree('datetime_field', ('uint64_field', 'datetime_field'))

View File

@ -6,10 +6,7 @@ from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
from enum import Enum
class EnumFieldsTest(unittest.TestCase):

View File

@ -7,14 +7,11 @@ from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.migrations import MigrationHistory
from enum import Enum
# Add tests to path so that migrations will be importable
import sys, os
sys.path.append(os.path.dirname(__file__))
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
import logging
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
@ -97,6 +94,17 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(AliasModel1))
self.assertEqual(self.getTableFields(AliasModel1),
[('date', 'Date'), ('int_field', 'Int8'), ('date_alias', 'Date'), ('int_field_plus_one', 'Int8')])
self.database.migrate('tests.sample_migrations', 15)
self.assertTrue(self.tableExists(Model4_compressed))
if self.database.has_low_cardinality_support:
self.assertEqual(self.getTableFields(Model2LowCardinality),
[('date', 'Date'), ('f1', 'LowCardinality(Int32)'), ('f3', 'LowCardinality(Float32)'),
('f2', 'LowCardinality(String)'), ('f4', 'LowCardinality(Nullable(String))'), ('f5', 'Array(LowCardinality(UInt64))')])
else:
logging.warning('No support for low cardinality')
self.assertEqual(self.getTableFields(Model2),
[('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'Nullable(String)'),
('f5', 'Array(UInt64)')])
# Several different models with the same table name, to simulate a table that changes over time
@ -258,3 +266,31 @@ class Model4Buffer_changed(BufferModel, Model4_changed):
@classmethod
def table_name(cls):
return 'model4buffer'
class Model4_compressed(Model):
date = DateField()
f3 = DateTimeField(codec='Delta,ZSTD(10)')
f2 = StringField(codec='LZ4HC')
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'model4'
class Model2LowCardinality(Model):
date = DateField()
f1 = LowCardinalityField(Int32Field())
f3 = LowCardinalityField(Float32Field())
f2 = LowCardinalityField(StringField())
f4 = LowCardinalityField(NullableField(StringField()))
f5 = ArrayField(LowCardinalityField(UInt64Field()))
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'mig'

View File

@ -7,11 +7,7 @@ from infi.clickhouse_orm.query import Q
from .base_test_with_data import *
import logging
from datetime import date, datetime
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
from enum import Enum
class QuerySetTestCase(TestCaseWithData):
@ -227,7 +223,7 @@ class QuerySetTestCase(TestCaseWithData):
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
# Try different page sizes
for page_size in (1, 2, 7, 10, 30, 100, 150):
# Iterate over pages and collect all intances
# Iterate over pages and collect all instances
page_num = 1
instances = set()
while True: