From b976899f75c7132d524be1135a95050dcfc1455b Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 31 Oct 2017 10:11:29 +0200 Subject: [PATCH] Added documentation about custom fields --- CHANGELOG.md | 1 + docs/field_types.md | 152 +++++++++++++++++++++++++++--------- docs/toc.md | 1 + tests/test_custom_fields.py | 115 +++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 37 deletions(-) create mode 100644 tests/test_custom_fields.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1adc4ec..30284fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Unreleased - Show field name in error message when invalid value assigned (tsionyx) - Bug fix: select query fails when query contains '$' symbol (M1hacka) - Prevent problems with AlterTable migrations related to field order (M1hacka) +- Added documentation about custom fields. v0.9.7 ------ diff --git a/docs/field_types.md b/docs/field_types.md index fae5c6c..0205804 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -48,33 +48,37 @@ Python 3.4 and higher supports Enums natively. When using previous Python versio Example of a model with an enum field: - Gender = Enum('Gender', 'male female unspecified') +```python +Gender = Enum('Gender', 'male female unspecified') - class Person(models.Model): +class Person(models.Model): - first_name = fields.StringField() - last_name = fields.StringField() - birthday = fields.DateField() - gender = fields.Enum32Field(Gender) + first_name = fields.StringField() + last_name = fields.StringField() + birthday = fields.DateField() + gender = fields.Enum32Field(Gender) - engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) - suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) +suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) +``` Working with array fields ------------------------- You can create array fields containing any data type, for example: - class SensorData(models.Model): +```python +class SensorData(models.Model): - date = fields.DateField() - temperatures = fields.ArrayField(fields.Float32Field()) - humidity_levels = fields.ArrayField(fields.UInt8Field()) + date = fields.DateField() + temperatures = fields.ArrayField(fields.Float32Field()) + humidity_levels = fields.ArrayField(fields.UInt8Field()) - engine = engines.MergeTree('date', ('date',)) + engine = engines.MergeTree('date', ('date',)) - data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) +data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) +``` Working with materialized and alias fields ------------------------------------------ @@ -87,22 +91,24 @@ Both field types can't be inserted into the database directly, so they are ignor Usage: - class Event(models.Model): +```python +class Event(models.Model): - created = fields.DateTimeField() - created_date = fields.DateTimeField(materialized='toDate(created)') - name = fields.StringField() - username = fields.StringField(alias='name') + created = fields.DateTimeField() + created_date = fields.DateTimeField(materialized='toDate(created)') + name = fields.StringField() + username = fields.StringField(alias='name') - engine = engines.MergeTree('created_date', ('created_date', 'created')) + engine = engines.MergeTree('created_date', ('created_date', 'created')) - obj = Event(created=datetime.now(), name='MyEvent') - db = Database('my_test_db') - db.insert([obj]) - # All values will be retrieved from database - db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) - # created_date and username will contain a default value - db.select('SELECT * FROM $db.event', model_class=Event) +obj = Event(created=datetime.now(), name='MyEvent') +db = Database('my_test_db') +db.insert([obj]) +# All values will be retrieved from database +db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) +# created_date and username will contain a default value +db.select('SELECT * FROM $db.event', model_class=Event) +``` Working with nullable fields ---------------------------- @@ -111,26 +117,98 @@ Also see some information [here](https://github.com/yandex/ClickHouse/blob/maste Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example: - class EventData(models.Model): +```python +class EventData(models.Model): - date = fields.DateField() - comment = fields.NullableField(fields.StringField(), extra_null_values={''}) - score = fields.NullableField(fields.UInt8Field()) - serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) + date = fields.DateField() + comment = fields.NullableField(fields.StringField(), extra_null_values={''}) + score = fields.NullableField(fields.UInt8Field()) + serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) - engine = engines.MergeTree('date', ('date',)) + engine = engines.MergeTree('date', ('date',)) - score_event = EventData(date=date.today(), comment=None, score=5, serie=None) - comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None) - another_event = EventData(date=date.today(), comment='', score=None, serie=None) - action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3]) +score_event = EventData(date=date.today(), comment=None, score=5, serie=None) +comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None) +another_event = EventData(date=date.today(), comment='', score=None, serie=None) +action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3]) +``` The `extra_null_values` parameter is an iterable of additional values that should be converted to `None`. NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable. +Creating custom field types +--------------------------- +Sometimes it is convenient to use data types that are supported in Python, but have no corresponding column type in ClickHouse. In these cases it is possible to define a custom field class that knows how to convert the Pythonic object to a suitable representation in the database, and vice versa. + +For example, we can create a BooleanField which will hold `True` and `False` values, but write them to the database as 0 and 1 (in a `UInt8` column). For this purpose we'll subclass the `Field` class, and implement two methods: + +- `to_python` which converts any supported value to a `bool`. The method should know how to handle strings (which typically come from the database), booleans, and possibly other valid options. In case the value is not supported, it should raise a `ValueError`. +- `to_db_string` which converts a `bool` into a string for writing to the database. + +Here's the full implementation: + +```python +from infi.clickhouse_orm.fields import Field + +class BooleanField(Field): + + # The ClickHouse column type to use + db_type = 'UInt8' + + # The default value + class_default = False + + def to_python(self, value, timezone_in_use): + # Convert valid values to bool + if value in (1, '1', True): + return True + elif value in (0, '0', False): + return False + else: + raise ValueError('Invalid value for BooleanField: %r' % value) + + def to_db_string(self, value, quote=True): + # The value was already converted by to_python, so it's a bool + return '1' if value else '0' +``` + +Here's another example - a field for storing UUIDs in the database as 16-byte strings. We'll use Python's built-in `UUID` class to handle the conversion from strings, ints and tuples into UUID instances. So in our Python code we'll have the convenience of working with UUID objects, but they will be stored in the database as efficiently as possible: + +```python + from infi.clickhouse_orm.fields import Field + from infi.clickhouse_orm.utils import escape + from six import string_types + from uuid import UUID + + class UUIDField(Field): + + # The ClickHouse column type to use + db_type = 'FixedString(16)' + + # The default value if empty + class_default = UUID(int=0) + + def to_python(self, value, timezone_in_use): + # Convert valid values to UUID instance + if isinstance(value, UUID): + return value + elif isinstance(value, string_types): + return UUID(bytes=value) if len(value) == 16 else UUID(value) + elif isinstance(value, (int, long)): + return UUID(int=value) + elif isinstance(value, tuple): + return UUID(fields=value) + else: + raise ValueError('Invalid value for UUIDField: %r' % value) + + def to_db_string(self, value, quote=True): + # The value was already converted by to_python, so it's a UUID instance + return escape(value.bytes, quote) +``` + --- [<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md) \ No newline at end of file diff --git a/docs/toc.md b/docs/toc.md index cc1c8a6..3575c3f 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -31,6 +31,7 @@ * [Working with array fields](field_types.md#working-with-array-fields) * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) * [Working with nullable fields](field_types.md#working-with-nullable-fields) + * [Creating custom field types](field_types.md#creating-custom-field-types) * [Table Engines](table_engines.md#table-engines) * [Simple Engines](table_engines.md#simple-engines) diff --git a/tests/test_custom_fields.py b/tests/test_custom_fields.py new file mode 100644 index 0000000..dc4bd63 --- /dev/null +++ b/tests/test_custom_fields.py @@ -0,0 +1,115 @@ +from __future__ import unicode_literals +import unittest +from six import string_types +from uuid import UUID +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.fields import Field, Int16Field +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.engines import Memory +from infi.clickhouse_orm.utils import escape + + +class CustomFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + + def tearDown(self): + self.database.drop_database() + + def test_boolean_field(self): + # Create a model + class TestModel(Model): + i = Int16Field() + f = BooleanField() + engine = Memory() + self.database.create_table(TestModel) + # Check valid values + for index, value in enumerate([1, '1', True, 0, '0', False]): + rec = TestModel(i=index, f=value) + self.database.insert([rec]) + self.assertEquals([rec.f for rec in TestModel.objects_in(self.database).order_by('i')], + [True, True, True, False, False, False]) + # Check invalid values + for value in [None, 'zzz', -5, 7]: + with self.assertRaises(ValueError): + TestModel(i=1, f=value) + + def test_uuid_field(self): + # Create a model + class TestModel(Model): + i = Int16Field() + f = UUIDField() + engine = Memory() + self.database.create_table(TestModel) + # Check valid values (all values are the same UUID) + values = [ + '{12345678-1234-5678-1234-567812345678}', + '12345678123456781234567812345678', + 'urn:uuid:12345678-1234-5678-1234-567812345678', + '\x12\x34\x56\x78'*4, + (0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678), + 0x12345678123456781234567812345678, + ] + for index, value in enumerate(values): + rec = TestModel(i=index, f=value) + self.database.insert([rec]) + for rec in TestModel.objects_in(self.database): + self.assertEquals(rec.f, UUID(values[0])) + # Check that ClickHouse encoding functions are supported + for rec in self.database.select("SELECT i, UUIDNumToString(f) AS f FROM testmodel", TestModel): + self.assertEquals(rec.f, UUID(values[0])) + for rec in self.database.select("SELECT 1 as i, UUIDStringToNum('12345678-1234-5678-1234-567812345678') AS f", TestModel): + self.assertEquals(rec.f, UUID(values[0])) + # Check invalid values + for value in [None, 'zzz', -1, '123']: + with self.assertRaises(ValueError): + TestModel(i=1, f=value) + + +class BooleanField(Field): + + # The ClickHouse column type to use + db_type = 'UInt8' + + # The default value if empty + class_default = False + + def to_python(self, value, timezone_in_use): + # Convert valid values to bool + if value in (1, '1', True): + return True + elif value in (0, '0', False): + return False + else: + raise ValueError('Invalid value for BooleanField: %r' % value) + + def to_db_string(self, value, quote=True): + # The value was already converted by to_python, so it's a bool + return '1' if value else '0' + + +class UUIDField(Field): + + # The ClickHouse column type to use + db_type = 'FixedString(16)' + + # The default value if empty + class_default = UUID(int=0) + + def to_python(self, value, timezone_in_use): + # Convert valid values to UUID instance + if isinstance(value, UUID): + return value + elif isinstance(value, string_types): + return UUID(bytes=value) if len(value) == 16 else UUID(value) + elif isinstance(value, (int, long)): + return UUID(int=value) + elif isinstance(value, tuple): + return UUID(fields=value) + else: + raise ValueError('Invalid value for UUIDField: %r' % value) + + def to_db_string(self, value, quote=True): + # The value was already converted by to_python, so it's a UUID instance + return escape(value.bytes, quote)