diff --git a/CHANGELOG.md b/CHANGELOG.md index a5ff2d6..b0c13e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ Change Log ========== +v1.0.3 +------ +- Bug fix: `QuerySet.count()` ignores slicing +- Bug fix: wrong parentheses when building queries using Q objects +- Support Decimal fields +- Added `Database.add_setting` method + v1.0.2 ---------- - Include alias and materialized fields in queryset results diff --git a/docs/class_reference.md b/docs/class_reference.md index dc12d4e..264dd72 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -24,6 +24,16 @@ created on the ClickHouse server if it does not already exist. - `autocreate`: automatically create the database if does not exist (unless in readonly mode). +#### add_setting(name, value) + + +Adds a database setting that will be sent with every request. +For example, `db.add_setting("max_execution_time", 10)` will +limit query execution time to 10 seconds. +The name must be string, and the value is converted to string in case +it isn't. To remove a setting, pass `None` as the value. + + #### count(model_class, conditions=None) @@ -494,19 +504,41 @@ Returns the instance's column values as a tab-separated line. A newline is not i infi.clickhouse_orm.fields -------------------------- -### Field - - -Abstract base class for all field types. - -#### Field(default=None, alias=None, materialized=None, readonly=None) - - -### StringField +### ArrayField Extends Field -#### StringField(default=None, alias=None, materialized=None, readonly=None) +#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None) + + +### BaseEnumField + +Extends Field + + +Abstract base class for all enum-type fields. + +#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None) + + +### BaseFloatField + +Extends Field + + +Abstract base class for all float-type fields. + +#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None) + + +### BaseIntField + +Extends Field + + +Abstract base class for all integer-type fields. + +#### BaseIntField(default=None, alias=None, materialized=None, readonly=None) ### DateField @@ -523,48 +555,57 @@ Extends Field #### DateTimeField(default=None, alias=None, materialized=None, readonly=None) -### BaseIntField +### Decimal128Field + +Extends DecimalField + +#### Decimal128Field(scale, default=None, alias=None, materialized=None, readonly=None) + + +### Decimal32Field + +Extends DecimalField + +#### Decimal32Field(scale, default=None, alias=None, materialized=None, readonly=None) + + +### Decimal64Field + +Extends DecimalField + +#### Decimal64Field(scale, default=None, alias=None, materialized=None, readonly=None) + + +### DecimalField Extends Field -Abstract base class for all integer-type fields. +Base class for all decimal fields. Can also be used directly. -#### BaseIntField(default=None, alias=None, materialized=None, readonly=None) +#### DecimalField(precision, scale, default=None, alias=None, materialized=None, readonly=None) -### BaseFloatField +### Enum16Field -Extends Field +Extends BaseEnumField + +#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) -Abstract base class for all float-type fields. +### Enum8Field -#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None) +Extends BaseEnumField + +#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) -### BaseEnumField - -Extends Field +### Field -Abstract base class for all enum-type fields. +Abstract base class for all field types. -#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None) - - -### ArrayField - -Extends Field - -#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None) - - -### NullableField - -Extends Field - -#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None) +#### Field(default=None, alias=None, materialized=None, readonly=None) ### FixedStringField @@ -574,39 +615,18 @@ Extends StringField #### FixedStringField(length, default=None, alias=None, materialized=None, readonly=None) -### UInt8Field +### Float32Field -Extends BaseIntField +Extends BaseFloatField -#### UInt8Field(default=None, alias=None, materialized=None, readonly=None) +#### Float32Field(default=None, alias=None, materialized=None, readonly=None) -### UInt16Field +### Float64Field -Extends BaseIntField +Extends BaseFloatField -#### UInt16Field(default=None, alias=None, materialized=None, readonly=None) - - -### UInt32Field - -Extends BaseIntField - -#### UInt32Field(default=None, alias=None, materialized=None, readonly=None) - - -### UInt64Field - -Extends BaseIntField - -#### UInt64Field(default=None, alias=None, materialized=None, readonly=None) - - -### Int8Field - -Extends BaseIntField - -#### Int8Field(default=None, alias=None, materialized=None, readonly=None) +#### Float64Field(default=None, alias=None, materialized=None, readonly=None) ### Int16Field @@ -630,32 +650,53 @@ Extends BaseIntField #### Int64Field(default=None, alias=None, materialized=None, readonly=None) -### Float32Field +### Int8Field -Extends BaseFloatField +Extends BaseIntField -#### Float32Field(default=None, alias=None, materialized=None, readonly=None) +#### Int8Field(default=None, alias=None, materialized=None, readonly=None) -### Float64Field +### NullableField -Extends BaseFloatField +Extends Field -#### Float64Field(default=None, alias=None, materialized=None, readonly=None) +#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None) -### Enum8Field +### StringField -Extends BaseEnumField +Extends Field -#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) +#### StringField(default=None, alias=None, materialized=None, readonly=None) -### Enum16Field +### UInt16Field -Extends BaseEnumField +Extends BaseIntField -#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) +#### UInt16Field(default=None, alias=None, materialized=None, readonly=None) + + +### UInt32Field + +Extends BaseIntField + +#### UInt32Field(default=None, alias=None, materialized=None, readonly=None) + + +### UInt64Field + +Extends BaseIntField + +#### UInt64Field(default=None, alias=None, materialized=None, readonly=None) + + +### UInt8Field + +Extends BaseIntField + +#### UInt8Field(default=None, alias=None, materialized=None, readonly=None) infi.clickhouse_orm.engines diff --git a/docs/field_types.md b/docs/field_types.md index 016f77f..d94782e 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -21,6 +21,10 @@ Currently the following field types are supported: | UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 | Float32Field | Float32 | float | | Float64Field | Float64 | float | +| DecimalField | Decimal | Decimal | Pythonic values are rounded to fit the scale of the database field +| Decimal32Field | Decimal32 | Decimal | Ditto +| Decimal64Field | Decimal64 | Decimal | Ditto +| Decimal128Field | Decimal128 | Decimal | Ditto | Enum8Field | Enum8 | Enum | See below | Enum16Field | Enum16 | Enum | See below | ArrayField | Array | list | See below diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index 230fbbb..b062608 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -37,7 +37,7 @@ To allow null values in a field, wrap it inside a `NullableField`: birthday = fields.NullableField(fields.DateField()) -In this case, the default value for that fields becomes `null` unless otherwide specified. +In this case, the default value for that field becomes `null` unless otherwise specified. ### Materialized fields @@ -51,9 +51,9 @@ It is not possible to specify a default value for a materialized field. ### Alias fields -An alias field is simply a different way to call another field in the model. For example: +An alias field is a field whose value is calculated by ClickHouse on the fly, as a function of other fields. It is not physically stored by the database. For example: - date_born = field.DateField(alias="birthday") + weekday_born = field.UInt8Field(alias="toDayOfWeek(birthday)") Alias fields are read-only, meaning that their values are not sent to the database when inserting records. diff --git a/docs/toc.md b/docs/toc.md index 2ed6e89..e60a055 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -65,28 +65,32 @@ * [BufferModel](class_reference.md#buffermodel) * [DistributedModel](class_reference.md#distributedmodel) * [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields) - * [Field](class_reference.md#field) - * [StringField](class_reference.md#stringfield) + * [ArrayField](class_reference.md#arrayfield) + * [BaseEnumField](class_reference.md#baseenumfield) + * [BaseFloatField](class_reference.md#basefloatfield) + * [BaseIntField](class_reference.md#baseintfield) * [DateField](class_reference.md#datefield) * [DateTimeField](class_reference.md#datetimefield) - * [BaseIntField](class_reference.md#baseintfield) - * [BaseFloatField](class_reference.md#basefloatfield) - * [BaseEnumField](class_reference.md#baseenumfield) - * [ArrayField](class_reference.md#arrayfield) - * [NullableField](class_reference.md#nullablefield) + * [Decimal128Field](class_reference.md#decimal128field) + * [Decimal32Field](class_reference.md#decimal32field) + * [Decimal64Field](class_reference.md#decimal64field) + * [DecimalField](class_reference.md#decimalfield) + * [Enum16Field](class_reference.md#enum16field) + * [Enum8Field](class_reference.md#enum8field) + * [Field](class_reference.md#field) * [FixedStringField](class_reference.md#fixedstringfield) - * [UInt8Field](class_reference.md#uint8field) - * [UInt16Field](class_reference.md#uint16field) - * [UInt32Field](class_reference.md#uint32field) - * [UInt64Field](class_reference.md#uint64field) - * [Int8Field](class_reference.md#int8field) + * [Float32Field](class_reference.md#float32field) + * [Float64Field](class_reference.md#float64field) * [Int16Field](class_reference.md#int16field) * [Int32Field](class_reference.md#int32field) * [Int64Field](class_reference.md#int64field) - * [Float32Field](class_reference.md#float32field) - * [Float64Field](class_reference.md#float64field) - * [Enum8Field](class_reference.md#enum8field) - * [Enum16Field](class_reference.md#enum16field) + * [Int8Field](class_reference.md#int8field) + * [NullableField](class_reference.md#nullablefield) + * [StringField](class_reference.md#stringfield) + * [UInt16Field](class_reference.md#uint16field) + * [UInt32Field](class_reference.md#uint32field) + * [UInt64Field](class_reference.md#uint64field) + * [UInt8Field](class_reference.md#uint8field) * [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines) * [Engine](class_reference.md#engine) * [TinyLog](class_reference.md#tinylog) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index 6a89d1c..c7d31b1 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -133,6 +133,6 @@ if __name__ == '__main__': print module_doc([database.Database, database.DatabaseException]) module_doc([models.Model, models.BufferModel, models.DistributedModel]) - module_doc([fields.Field] + all_subclasses(fields.Field), False) + module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) module_doc([engines.Engine] + all_subclasses(engines.Engine), False) module_doc([query.QuerySet, query.AggregateQuerySet]) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index b6018a9..a8d8afb 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -90,6 +90,7 @@ class Database(object): self.username = username self.password = password self.readonly = False + self.settings = {} self.db_exists = False self.db_exists = self._is_existing_database() if readonly: @@ -143,6 +144,20 @@ class Database(object): r = self._send(sql % (self.db_name, model_class.table_name())) return r.text.strip() == '1' + def add_setting(self, name, value): + ''' + Adds a database setting that will be sent with every request. + For example, `db.add_setting("max_execution_time", 10)` will + limit query execution time to 10 seconds. + The name must be string, and the value is converted to string in case + it isn't. To remove a setting, pass `None` as the value. + ''' + assert isinstance(name, string_types), 'Setting name must be a string' + if value is None: + self.settings.pop(name, None) + else: + self.settings[name] = str(value) + def insert(self, model_instances, batch_size=1000): ''' Insert records into the database. @@ -311,6 +326,7 @@ class Database(object): def _build_params(self, settings): params = dict(settings or {}) + params.update(self.settings) if self.db_exists: params['database'] = self.db_name if self.username: diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index dc51a57..8c11805 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -5,6 +5,7 @@ import iso8601 import pytz import time from calendar import timegm +from decimal import Decimal, localcontext from .utils import escape, parse_array, comma_join @@ -293,6 +294,67 @@ class Float64Field(BaseFloatField): db_type = 'Float64' +class DecimalField(Field): + ''' + Base class for all decimal fields. Can also be used directly. + ''' + + def __init__(self, precision, scale, default=None, alias=None, materialized=None, readonly=None): + assert 1 <= precision <= 38, 'Precision must be between 1 and 38' + assert 0 <= scale <= precision, 'Scale must be between 0 and the given precision' + self.precision = precision + self.scale = scale + self.db_type = 'Decimal(%d,%d)' % (self.precision, self.scale) + with localcontext() as ctx: + ctx.prec = 38 + self.exp = Decimal(10) ** -self.scale # for rounding to the required scale + self.max_value = Decimal(10 ** (self.precision - self.scale)) - self.exp + self.min_value = -self.max_value + super(DecimalField, self).__init__(default, alias, materialized, readonly) + + def to_python(self, value, timezone_in_use): + if not isinstance(value, Decimal): + try: + value = Decimal(value) + except: + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + if not value.is_finite(): + raise ValueError('Non-finite value for %s - %r' % (self.__class__.__name__, value)) + return self._round(value) + + def to_db_string(self, value, quote=True): + # There's no need to call escape since numbers do not contain + # special characters, and never need quoting + return text_type(value) + + def _round(self, value): + return value.quantize(self.exp) + + def validate(self, value): + self._range_check(value, self.min_value, self.max_value) + + +class Decimal32Field(DecimalField): + + def __init__(self, scale, default=None, alias=None, materialized=None, readonly=None): + super(Decimal32Field, self).__init__(9, scale, default, alias, materialized, readonly) + self.db_type = 'Decimal32(%d)' % scale + + +class Decimal64Field(DecimalField): + + def __init__(self, scale, default=None, alias=None, materialized=None, readonly=None): + super(Decimal64Field, self).__init__(18, scale, default, alias, materialized, readonly) + self.db_type = 'Decimal64(%d)' % scale + + +class Decimal128Field(DecimalField): + + def __init__(self, scale, default=None, alias=None, materialized=None, readonly=None): + super(Decimal128Field, self).__init__(38, scale, default, alias, materialized, readonly) + self.db_type = 'Decimal128(%d)' % scale + + class BaseEnumField(Field): ''' Abstract base class for all enum-type fields. diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index d292462..d008513 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -80,6 +80,10 @@ class ModelBase(type): if db_type.startswith('FixedString'): length = int(db_type[12 : -1]) return orm_fields.FixedStringField(length) + # Decimal + if db_type.startswith('Decimal'): + precision, scale = [int(n.strip()) for n in db_type[8 : -1].split(',')] + return orm_fields.DecimalField(precision, scale) # Nullable if db_type.startswith('Nullable'): inner_field = cls.create_ad_hoc_field(db_type[9 : -1]) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index c05244e..1035dd4 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -188,7 +188,7 @@ class Q(object): q = Q() q._l_child = l_child q._r_child = r_child - q._mode = mode + q._mode = mode # AND/OR return q def _build_fov(self, key, value): @@ -203,7 +203,7 @@ class Q(object): sql = ' {} '.format(self._mode).join(fov.to_sql(model_cls) for fov in self._fovs) else: if self._l_child and self._r_child: - sql = '({}) {} ({})'.format( + sql = '({} {} {})'.format( self._l_child.to_sql(model_cls), self._mode, self._r_child.to_sql(model_cls)) else: return '1' @@ -316,11 +316,12 @@ class QuerySet(object): """ Returns the number of matching model instances. """ - if self._distinct: + if self._distinct or self._limits: # Use a subquery, since a simple count won't be accurate sql = u'SELECT count() FROM (%s)' % self.as_sql() raw = self._database.raw(sql) return int(raw) if raw else 0 + # Simple case return self._database.count(self._model_cls, self.conditions_as_sql()) def order_by(self, *field_names): diff --git a/tests/test_database.py b/tests/test_database.py index 4e36e33..c603dde 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -184,3 +184,15 @@ class DatabaseTestCase(TestCaseWithData): pass self.assertTrue(self.database.does_table_exist(Person)) self.assertFalse(self.database.does_table_exist(Person2)) + + def test_add_setting(self): + # Non-string setting name should not be accepted + with self.assertRaises(AssertionError): + self.database.add_setting(0, 1) + # Add a setting and see that it makes the query fail + self.database.add_setting('max_columns_to_read', 1) + with self.assertRaises(ServerError): + list(self.database.select('SELECT * from system.tables')) + # Remove the setting and see that now it works + self.database.add_setting('max_columns_to_read', None) + list(self.database.select('SELECT * from system.tables')) diff --git a/tests/test_decimal_fields.py b/tests/test_decimal_fields.py new file mode 100644 index 0000000..db87d62 --- /dev/null +++ b/tests/test_decimal_fields.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +import unittest +from decimal import Decimal + +from infi.clickhouse_orm.database import Database, ServerError +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class DecimalFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.add_setting('allow_experimental_decimal_type', 1) + try: + self.database.create_table(DecimalModel) + except ServerError as e: + if 'Unknown setting' in e.message: + # This ClickHouse version does not support decimals yet + raise unittest.SkipTest(e.message) + else: + raise + + def tearDown(self): + self.database.drop_database() + + def _insert_sample_data(self): + self.database.insert([ + DecimalModel(date_field='2016-08-20'), + DecimalModel(date_field='2016-08-21', dec=Decimal('1.234')), + DecimalModel(date_field='2016-08-22', dec32=Decimal('12342.2345')), + DecimalModel(date_field='2016-08-23', dec64=Decimal('12342.23456')), + DecimalModel(date_field='2016-08-24', dec128=Decimal('-4545456612342.234567')), + ]) + + def _assert_sample_data(self, results): + self.assertEqual(len(results), 5) + self.assertEqual(results[0].dec, Decimal(0)) + self.assertEqual(results[0].dec32, Decimal(17)) + self.assertEqual(results[1].dec, Decimal('1.234')) + self.assertEqual(results[2].dec32, Decimal('12342.2345')) + self.assertEqual(results[3].dec64, Decimal('12342.23456')) + self.assertEqual(results[4].dec128, Decimal('-4545456612342.234567')) + + def test_insert_and_select(self): + self._insert_sample_data() + query = 'SELECT * from $table ORDER BY date_field' + results = list(self.database.select(query, DecimalModel)) + self._assert_sample_data(results) + + def test_ad_hoc_model(self): + self._insert_sample_data() + query = 'SELECT * from decimalmodel ORDER BY date_field' + results = list(self.database.select(query)) + self._assert_sample_data(results) + + def test_rounding(self): + d = Decimal('11111.2340000000000000001') + self.database.insert([DecimalModel(date_field='2016-08-20', dec=d, dec32=d, dec64=d, dec128=d)]) + m = DecimalModel.objects_in(self.database)[0] + for val in (m.dec, m.dec32, m.dec64, m.dec128): + self.assertEqual(val, Decimal('11111.234')) + + def test_assignment_ok(self): + for value in (True, False, 17, 3.14, '20.5', Decimal('20.5')): + DecimalModel(dec=value) + + def test_assignment_error(self): + for value in ('abc', u'זה ארוך', None, float('NaN'), Decimal('-Infinity')): + with self.assertRaises(ValueError): + DecimalModel(dec=value) + + def test_aggregation(self): + self._insert_sample_data() + result = DecimalModel.objects_in(self.database).aggregate(m='min(dec)', n='max(dec)') + self.assertEqual(result[0].m, Decimal(0)) + self.assertEqual(result[0].n, Decimal('1.234')) + + def test_precision_and_scale(self): + # Go over all valid combinations + for precision in range(1, 39): + for scale in range(0, precision + 1): + f = DecimalField(precision, scale) + # Some invalid combinations + for precision, scale in [(0, 0), (-1, 7), (7, -1), (39, 5), (20, 21)]: + with self.assertRaises(AssertionError): + f = DecimalField(precision, scale) + + def test_min_max(self): + # In range + f = DecimalField(3, 1) + f.validate(f.to_python('99.9', None)) + f.validate(f.to_python('-99.9', None)) + # In range after rounding + f.validate(f.to_python('99.94', None)) + f.validate(f.to_python('-99.94', None)) + # Out of range + with self.assertRaises(ValueError): + f.validate(f.to_python('99.99', None)) + with self.assertRaises(ValueError): + f.validate(f.to_python('-99.99', None)) + # In range + f = Decimal32Field(4) + f.validate(f.to_python('99999.9999', None)) + f.validate(f.to_python('-99999.9999', None)) + # In range after rounding + f.validate(f.to_python('99999.99994', None)) + f.validate(f.to_python('-99999.99994', None)) + # Out of range + with self.assertRaises(ValueError): + f.validate(f.to_python('100000', None)) + with self.assertRaises(ValueError): + f.validate(f.to_python('-100000', None)) + + +class DecimalModel(Model): + + date_field = DateField() + dec = DecimalField(15, 3) + dec32 = Decimal32Field(4, default=17) + dec64 = Decimal64Field(5) + dec128 = Decimal128Field(6) + + engine = Memory() diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 1dd677b..a4fef14 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -75,6 +75,10 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs.filter((Q(first_name__in=['Warren', 'Whilemina', 'Whitney']) & Q(height__gte=1.7) | (Q(first_name__in=['Victoria', 'Victor', 'Venus']) & Q(height__lt=1.7)))), 4) self._test_qs(qs.filter(Q(first_name='Elton') & ~Q(last_name='Smith')), 1) + # Check operator precendence + self._test_qs(qs.filter(first_name='Cassady').filter(Q(last_name='Knapp') | Q(last_name='Rogers') | Q(last_name='Gregory')), 2) + self._test_qs(qs.filter(Q(first_name='Cassady') & Q(last_name='Knapp') | Q(first_name='Beatrice') & Q(last_name='Gregory')), 2) + self._test_qs(qs.filter(Q(first_name='Courtney') | Q(first_name='Cassady') & Q(last_name='Knapp')), 3) def test_filter_unicode_string(self): self.database.insert([ @@ -239,6 +243,12 @@ class QuerySetTestCase(TestCaseWithData): for obj in qs: self.assertTrue(obj.num_squared == obj.num ** 2) + def test_count_of_slice(self): + qs = Person.objects_in(self.database) + self._test_qs(qs[:70], 70) + self._test_qs(qs[70:80], 10) + self._test_qs(qs[80:], 20) + class AggregateTestCase(TestCaseWithData):