Added documentation about custom fields

This commit is contained in:
Itai Shirav 2017-10-31 10:11:29 +02:00
parent 49a76b2ef4
commit b976899f75
4 changed files with 232 additions and 37 deletions

View File

@ -9,6 +9,7 @@ Unreleased
- Show field name in error message when invalid value assigned (tsionyx)
- Bug fix: select query fails when query contains '$' symbol (M1hacka)
- Prevent problems with AlterTable migrations related to field order (M1hacka)
- Added documentation about custom fields.
v0.9.7
------

View File

@ -48,33 +48,37 @@ Python 3.4 and higher supports Enums natively. When using previous Python versio
Example of a model with an enum field:
Gender = Enum('Gender', 'male female unspecified')
```python
Gender = Enum('Gender', 'male female unspecified')
class Person(models.Model):
class Person(models.Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
gender = fields.Enum32Field(Gender)
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
gender = fields.Enum32Field(Gender)
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
```
Working with array fields
-------------------------
You can create array fields containing any data type, for example:
class SensorData(models.Model):
```python
class SensorData(models.Model):
date = fields.DateField()
temperatures = fields.ArrayField(fields.Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field())
date = fields.DateField()
temperatures = fields.ArrayField(fields.Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field())
engine = engines.MergeTree('date', ('date',))
engine = engines.MergeTree('date', ('date',))
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
```
Working with materialized and alias fields
------------------------------------------
@ -87,22 +91,24 @@ Both field types can't be inserted into the database directly, so they are ignor
Usage:
class Event(models.Model):
```python
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
```
Working with nullable fields
----------------------------
@ -111,26 +117,98 @@ Also see some information [here](https://github.com/yandex/ClickHouse/blob/maste
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
class EventData(models.Model):
```python
class EventData(models.Model):
date = fields.DateField()
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
score = fields.NullableField(fields.UInt8Field())
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
date = fields.DateField()
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
score = fields.NullableField(fields.UInt8Field())
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
engine = engines.MergeTree('date', ('date',))
engine = engines.MergeTree('date', ('date',))
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None)
another_event = EventData(date=date.today(), comment='', score=None, serie=None)
action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3])
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None)
another_event = EventData(date=date.today(), comment='', score=None, serie=None)
action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3])
```
The `extra_null_values` parameter is an iterable of additional values that should be converted
to `None`.
NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable.
Creating custom field types
---------------------------
Sometimes it is convenient to use data types that are supported in Python, but have no corresponding column type in ClickHouse. In these cases it is possible to define a custom field class that knows how to convert the Pythonic object to a suitable representation in the database, and vice versa.
For example, we can create a BooleanField which will hold `True` and `False` values, but write them to the database as 0 and 1 (in a `UInt8` column). For this purpose we'll subclass the `Field` class, and implement two methods:
- `to_python` which converts any supported value to a `bool`. The method should know how to handle strings (which typically come from the database), booleans, and possibly other valid options. In case the value is not supported, it should raise a `ValueError`.
- `to_db_string` which converts a `bool` into a string for writing to the database.
Here's the full implementation:
```python
from infi.clickhouse_orm.fields import Field
class BooleanField(Field):
# The ClickHouse column type to use
db_type = 'UInt8'
# The default value
class_default = False
def to_python(self, value, timezone_in_use):
# Convert valid values to bool
if value in (1, '1', True):
return True
elif value in (0, '0', False):
return False
else:
raise ValueError('Invalid value for BooleanField: %r' % value)
def to_db_string(self, value, quote=True):
# The value was already converted by to_python, so it's a bool
return '1' if value else '0'
```
Here's another example - a field for storing UUIDs in the database as 16-byte strings. We'll use Python's built-in `UUID` class to handle the conversion from strings, ints and tuples into UUID instances. So in our Python code we'll have the convenience of working with UUID objects, but they will be stored in the database as efficiently as possible:
```python
from infi.clickhouse_orm.fields import Field
from infi.clickhouse_orm.utils import escape
from six import string_types
from uuid import UUID
class UUIDField(Field):
# The ClickHouse column type to use
db_type = 'FixedString(16)'
# The default value if empty
class_default = UUID(int=0)
def to_python(self, value, timezone_in_use):
# Convert valid values to UUID instance
if isinstance(value, UUID):
return value
elif isinstance(value, string_types):
return UUID(bytes=value) if len(value) == 16 else UUID(value)
elif isinstance(value, (int, long)):
return UUID(int=value)
elif isinstance(value, tuple):
return UUID(fields=value)
else:
raise ValueError('Invalid value for UUIDField: %r' % value)
def to_db_string(self, value, quote=True):
# The value was already converted by to_python, so it's a UUID instance
return escape(value.bytes, quote)
```
---
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)

View File

@ -31,6 +31,7 @@
* [Working with array fields](field_types.md#working-with-array-fields)
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
* [Creating custom field types](field_types.md#creating-custom-field-types)
* [Table Engines](table_engines.md#table-engines)
* [Simple Engines](table_engines.md#simple-engines)

115
tests/test_custom_fields.py Normal file
View File

@ -0,0 +1,115 @@
from __future__ import unicode_literals
import unittest
from six import string_types
from uuid import UUID
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.fields import Field, Int16Field
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.utils import escape
class CustomFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
def tearDown(self):
self.database.drop_database()
def test_boolean_field(self):
# Create a model
class TestModel(Model):
i = Int16Field()
f = BooleanField()
engine = Memory()
self.database.create_table(TestModel)
# Check valid values
for index, value in enumerate([1, '1', True, 0, '0', False]):
rec = TestModel(i=index, f=value)
self.database.insert([rec])
self.assertEquals([rec.f for rec in TestModel.objects_in(self.database).order_by('i')],
[True, True, True, False, False, False])
# Check invalid values
for value in [None, 'zzz', -5, 7]:
with self.assertRaises(ValueError):
TestModel(i=1, f=value)
def test_uuid_field(self):
# Create a model
class TestModel(Model):
i = Int16Field()
f = UUIDField()
engine = Memory()
self.database.create_table(TestModel)
# Check valid values (all values are the same UUID)
values = [
'{12345678-1234-5678-1234-567812345678}',
'12345678123456781234567812345678',
'urn:uuid:12345678-1234-5678-1234-567812345678',
'\x12\x34\x56\x78'*4,
(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678),
0x12345678123456781234567812345678,
]
for index, value in enumerate(values):
rec = TestModel(i=index, f=value)
self.database.insert([rec])
for rec in TestModel.objects_in(self.database):
self.assertEquals(rec.f, UUID(values[0]))
# Check that ClickHouse encoding functions are supported
for rec in self.database.select("SELECT i, UUIDNumToString(f) AS f FROM testmodel", TestModel):
self.assertEquals(rec.f, UUID(values[0]))
for rec in self.database.select("SELECT 1 as i, UUIDStringToNum('12345678-1234-5678-1234-567812345678') AS f", TestModel):
self.assertEquals(rec.f, UUID(values[0]))
# Check invalid values
for value in [None, 'zzz', -1, '123']:
with self.assertRaises(ValueError):
TestModel(i=1, f=value)
class BooleanField(Field):
# The ClickHouse column type to use
db_type = 'UInt8'
# The default value if empty
class_default = False
def to_python(self, value, timezone_in_use):
# Convert valid values to bool
if value in (1, '1', True):
return True
elif value in (0, '0', False):
return False
else:
raise ValueError('Invalid value for BooleanField: %r' % value)
def to_db_string(self, value, quote=True):
# The value was already converted by to_python, so it's a bool
return '1' if value else '0'
class UUIDField(Field):
# The ClickHouse column type to use
db_type = 'FixedString(16)'
# The default value if empty
class_default = UUID(int=0)
def to_python(self, value, timezone_in_use):
# Convert valid values to UUID instance
if isinstance(value, UUID):
return value
elif isinstance(value, string_types):
return UUID(bytes=value) if len(value) == 16 else UUID(value)
elif isinstance(value, (int, long)):
return UUID(int=value)
elif isinstance(value, tuple):
return UUID(fields=value)
else:
raise ValueError('Invalid value for UUIDField: %r' % value)
def to_db_string(self, value, quote=True):
# The value was already converted by to_python, so it's a UUID instance
return escape(value.bytes, quote)