mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-10 19:36:33 +03:00
Merge remote-tracking branch 'origin/develop' into feature/DateTime64
# Conflicts: # src/infi/clickhouse_orm/models.py
This commit is contained in:
commit
bbab55e6d6
|
@ -1,6 +1,11 @@
|
|||
Change Log
|
||||
==========
|
||||
|
||||
Unreleased
|
||||
----------
|
||||
- Support for model constraints
|
||||
- Support for data skipping indexes
|
||||
|
||||
v2.0.1
|
||||
------
|
||||
- Remove unnecessary import of `six`
|
||||
|
|
|
@ -178,7 +178,7 @@ Unrecognized field names will cause an `AttributeError`.
|
|||
#### Model.create_table_sql(db)
|
||||
|
||||
|
||||
Returns the SQL command for creating a table for this model.
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
|
||||
|
||||
#### Model.drop_table_sql(db)
|
||||
|
@ -308,7 +308,7 @@ Unrecognized field names will cause an `AttributeError`.
|
|||
#### BufferModel.create_table_sql(db)
|
||||
|
||||
|
||||
Returns the SQL command for creating a table for this model.
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
|
||||
|
||||
#### BufferModel.drop_table_sql(db)
|
||||
|
@ -422,12 +422,147 @@ Returns the instance's column values as a tab-separated line. A newline is not i
|
|||
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||
|
||||
|
||||
### MergeModel
|
||||
|
||||
Extends Model
|
||||
|
||||
|
||||
Model for Merge engine
|
||||
Predefines virtual _table column an controls that rows can't be inserted to this table type
|
||||
https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge
|
||||
|
||||
#### MergeModel(**kwargs)
|
||||
|
||||
|
||||
Creates a model instance, using keyword arguments as field values.
|
||||
Since values are immediately converted to their Pythonic type,
|
||||
invalid values will cause a `ValueError` to be raised.
|
||||
Unrecognized field names will cause an `AttributeError`.
|
||||
|
||||
|
||||
#### MergeModel.create_table_sql(db)
|
||||
|
||||
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
|
||||
|
||||
#### MergeModel.drop_table_sql(db)
|
||||
|
||||
|
||||
Returns the SQL command for deleting this model's table.
|
||||
|
||||
|
||||
#### MergeModel.fields(writable=False)
|
||||
|
||||
|
||||
Returns an `OrderedDict` of the model's fields (from name to `Field` instance).
|
||||
If `writable` is true, only writable fields are included.
|
||||
Callers should not modify the dictionary.
|
||||
|
||||
|
||||
#### MergeModel.from_tsv(line, field_names, timezone_in_use=UTC, database=None)
|
||||
|
||||
|
||||
Create a model instance from a tab-separated line. The line may or may not include a newline.
|
||||
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
|
||||
|
||||
- `line`: the TSV-formatted data.
|
||||
- `field_names`: names of the model fields in the data.
|
||||
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
|
||||
- `database`: if given, sets the database that this instance belongs to.
|
||||
|
||||
|
||||
#### get_database()
|
||||
|
||||
|
||||
Gets the `Database` that this model instance belongs to.
|
||||
Returns `None` unless the instance was read from the database or written to it.
|
||||
|
||||
|
||||
#### get_field(name)
|
||||
|
||||
|
||||
Gets a `Field` instance given its name, or `None` if not found.
|
||||
|
||||
|
||||
#### MergeModel.has_funcs_as_defaults()
|
||||
|
||||
|
||||
Return True if some of the model's fields use a function expression
|
||||
as a default value. This requires special handling when inserting instances.
|
||||
|
||||
|
||||
#### MergeModel.is_read_only()
|
||||
|
||||
|
||||
Returns true if the model is marked as read only.
|
||||
|
||||
|
||||
#### MergeModel.is_system_model()
|
||||
|
||||
|
||||
Returns true if the model represents a system table.
|
||||
|
||||
|
||||
#### MergeModel.objects_in(database)
|
||||
|
||||
|
||||
Returns a `QuerySet` for selecting instances of this model class.
|
||||
|
||||
|
||||
#### set_database(db)
|
||||
|
||||
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
This is done automatically when the instance is read from the database or written to it.
|
||||
|
||||
|
||||
#### MergeModel.table_name()
|
||||
|
||||
|
||||
Returns the model's database table name. By default this is the
|
||||
class name converted to lowercase. Override this if you want to use
|
||||
a different table name.
|
||||
|
||||
|
||||
#### to_db_string()
|
||||
|
||||
|
||||
Returns the instance as a bytestring ready to be inserted into the database.
|
||||
|
||||
|
||||
#### to_dict(include_readonly=True, field_names=None)
|
||||
|
||||
|
||||
Returns the instance's column values as a dict.
|
||||
|
||||
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||
- `field_names`: an iterable of field names to return (optional)
|
||||
|
||||
|
||||
#### to_tskv(include_readonly=True)
|
||||
|
||||
|
||||
Returns the instance's column keys and values as a tab-separated line. A newline is not included.
|
||||
Fields that were not assigned a value are omitted.
|
||||
|
||||
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||
|
||||
|
||||
#### to_tsv(include_readonly=True)
|
||||
|
||||
|
||||
Returns the instance's column values as a tab-separated line. A newline is not included.
|
||||
|
||||
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||
|
||||
|
||||
### DistributedModel
|
||||
|
||||
Extends Model
|
||||
|
||||
|
||||
Model for Distributed engine
|
||||
Model class for use with a `Distributed` engine.
|
||||
|
||||
#### DistributedModel(**kwargs)
|
||||
|
||||
|
@ -441,6 +576,9 @@ Unrecognized field names will cause an `AttributeError`.
|
|||
#### DistributedModel.create_table_sql(db)
|
||||
|
||||
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
|
||||
|
||||
#### DistributedModel.drop_table_sql(db)
|
||||
|
||||
|
||||
|
@ -541,6 +679,10 @@ Returns a `QuerySet` for selecting instances of this model class.
|
|||
#### set_database(db)
|
||||
|
||||
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
This is done automatically when the instance is read from the database or written to it.
|
||||
|
||||
|
||||
#### DistributedModel.table_name()
|
||||
|
||||
|
||||
|
@ -581,6 +723,94 @@ Returns the instance's column values as a tab-separated line. A newline is not i
|
|||
- `include_readonly`: if false, returns only fields that can be inserted into database.
|
||||
|
||||
|
||||
### Constraint
|
||||
|
||||
|
||||
Defines a model constraint.
|
||||
|
||||
#### Constraint(expr)
|
||||
|
||||
|
||||
Initializer. Expects an expression that ClickHouse will verify when inserting data.
|
||||
|
||||
|
||||
#### create_table_sql()
|
||||
|
||||
|
||||
Returns the SQL statement for defining this constraint during table creation.
|
||||
|
||||
|
||||
### Index
|
||||
|
||||
|
||||
Defines a data-skipping index.
|
||||
|
||||
#### Index(expr, type, granularity)
|
||||
|
||||
|
||||
Initializer.
|
||||
|
||||
- `expr` - a column, expression, or tuple of columns and expressions to index.
|
||||
- `type` - the index type. Use one of the following methods to specify the type:
|
||||
`Index.minmax`, `Index.set`, `Index.ngrambf_v1`, `Index.tokenbf_v1` or `Index.bloom_filter`.
|
||||
- `granularity` - index block size (number of multiples of the `index_granularity` defined by the engine).
|
||||
|
||||
|
||||
#### bloom_filter()
|
||||
|
||||
|
||||
An index that stores a Bloom filter containing values of the index expression.
|
||||
|
||||
- `false_positive` - the probability (between 0 and 1) of receiving a false positive
|
||||
response from the filter
|
||||
|
||||
|
||||
#### create_table_sql()
|
||||
|
||||
|
||||
Returns the SQL statement for defining this index during table creation.
|
||||
|
||||
|
||||
#### minmax()
|
||||
|
||||
|
||||
An index that stores extremes of the specified expression (if the expression is tuple, then it stores
|
||||
extremes for each element of tuple). The stored info is used for skipping blocks of data like the primary key.
|
||||
|
||||
|
||||
#### ngrambf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)
|
||||
|
||||
|
||||
An index that stores a Bloom filter containing all ngrams from a block of data.
|
||||
Works only with strings. Can be used for optimization of equals, like and in expressions.
|
||||
|
||||
- `n` — ngram size
|
||||
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||
for example 256 or 512, because it can be compressed well).
|
||||
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||
- `random_seed` — The seed for Bloom filter hash functions.
|
||||
|
||||
|
||||
#### set()
|
||||
|
||||
|
||||
An index that stores unique values of the specified expression (no more than max_rows rows,
|
||||
or unlimited if max_rows=0). Uses the values to check if the WHERE expression is not satisfiable
|
||||
on a block of data.
|
||||
|
||||
|
||||
#### tokenbf_v1(number_of_hash_functions, random_seed)
|
||||
|
||||
|
||||
An index that stores a Bloom filter containing string tokens. Tokens are sequences
|
||||
separated by non-alphanumeric characters.
|
||||
|
||||
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||
for example 256 or 512, because it can be compressed well).
|
||||
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||
- `random_seed` — The seed for Bloom filter hash functions.
|
||||
|
||||
|
||||
infi.clickhouse_orm.fields
|
||||
--------------------------
|
||||
|
||||
|
|
|
@ -9,17 +9,18 @@ Defining Models
|
|||
---------------
|
||||
|
||||
Models are defined in a way reminiscent of Django's ORM, by subclassing `Model`:
|
||||
```python
|
||||
from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
|
||||
|
||||
from infi.clickhouse_orm import Model, StringField, DateField, Float32Field, MergeTree
|
||||
class Person(Model):
|
||||
|
||||
class Person(Model):
|
||||
first_name = StringField()
|
||||
last_name = StringField()
|
||||
birthday = DateField()
|
||||
height = Float32Field()
|
||||
|
||||
first_name = StringField()
|
||||
last_name = StringField()
|
||||
birthday = DateField()
|
||||
height = Float32Field()
|
||||
|
||||
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||
```
|
||||
|
||||
The columns in the database table are represented by model fields. Each field has a type, which matches the type of the corresponding database column. All the supported fields types are listed [here](field_types.md).
|
||||
|
||||
|
@ -66,14 +67,46 @@ For additional details see [here](field_options.md).
|
|||
### Table Names
|
||||
|
||||
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
|
||||
```python
|
||||
class Person(Model):
|
||||
|
||||
class Person(Model):
|
||||
...
|
||||
|
||||
...
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'people'
|
||||
```
|
||||
|
||||
### Model Constraints
|
||||
|
||||
It is possible to define constraints which ClickHouse verifies when data is inserted. Trying to insert invalid records will raise a `ServerError`. Each constraint has a name and an expression to validate. For example:
|
||||
```python
|
||||
class Person(Model):
|
||||
|
||||
...
|
||||
|
||||
# Ensure that the birthday is not a future date
|
||||
birthday_is_in_the_past = Constraint(birthday <= F.today())
|
||||
```
|
||||
|
||||
### Data Skipping Indexes
|
||||
|
||||
Models that use an engine from the `MergeTree` family can define additional indexes over one or more columns or expressions. These indexes are used in SELECT queries for reducing the amount of data to read from the disk by skipping big blocks of data that do not satisfy the query's conditions.
|
||||
|
||||
For example:
|
||||
```python
|
||||
class Person(Model):
|
||||
|
||||
...
|
||||
|
||||
# A minmax index that can help find people taller or shorter than some height
|
||||
height_index = Index(height, type=Index.minmax(), granularity=2)
|
||||
|
||||
# A trigram index that can help find substrings inside people names
|
||||
names_index = Index((F.lower(first_name), F.lower(last_name)),
|
||||
type=Index.ngrambf_v1(3, 256, 2, 0), granularity=1)
|
||||
```
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'people'
|
||||
|
||||
Using Models
|
||||
------------
|
||||
|
|
|
@ -33,19 +33,19 @@ Each migration file is expected to contain a list of `operations`, for example:
|
|||
The following operations are supported:
|
||||
|
||||
|
||||
**CreateTable**
|
||||
### CreateTable
|
||||
|
||||
A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing.
|
||||
|
||||
In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table.
|
||||
|
||||
|
||||
**DropTable**
|
||||
### DropTable
|
||||
|
||||
A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing.
|
||||
|
||||
|
||||
**AlterTable**
|
||||
### AlterTable
|
||||
|
||||
A migration operation that compares the table of a given model class to the model’s fields, and alters the table to match the model. The operation can:
|
||||
|
||||
|
@ -56,14 +56,19 @@ A migration operation that compares the table of a given model class to the mode
|
|||
Default values are not altered by this operation.
|
||||
|
||||
|
||||
**AlterTableWithBuffer**
|
||||
### AlterTableWithBuffer
|
||||
|
||||
A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified.
|
||||
|
||||
Applying this migration operation to a regular table has the same effect as an `AlterTable` operation.
|
||||
|
||||
|
||||
**RunPython**
|
||||
### AlterConstraints
|
||||
|
||||
A migration operation that adds new constraints from the model to the database table, and drops obsolete ones. Constraints are identified by their names, so a change in an existing constraint will not be detected unless its name was changed too. ClickHouse does not check that the constraints hold for existing data in the table.
|
||||
|
||||
|
||||
### RunPython
|
||||
|
||||
A migration operation that runs a Python function. The function receives the `Database` instance to operate on.
|
||||
|
||||
|
@ -77,9 +82,9 @@ A migration operation that runs a Python function. The function receives the `Da
|
|||
]
|
||||
|
||||
|
||||
**RunSQL**
|
||||
### RunSQL
|
||||
|
||||
A migration operation that runs raw SQL queries. It expects a string containing an SQL query, or an array of SQL-query strings.
|
||||
A migration operation that runs raw SQL statements. It expects a string containing an SQL statements, or a list of statements.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
12
docs/toc.md
12
docs/toc.md
|
@ -10,6 +10,8 @@
|
|||
* [Materialized fields](models_and_databases.md#materialized-fields)
|
||||
* [Alias fields](models_and_databases.md#alias-fields)
|
||||
* [Table Names](models_and_databases.md#table-names)
|
||||
* [Model Constraints](models_and_databases.md#model-constraints)
|
||||
* [Data Skipping Indexes](models_and_databases.md#data-skipping-indexes)
|
||||
* [Using Models](models_and_databases.md#using-models)
|
||||
* [Inserting to the Database](models_and_databases.md#inserting-to-the-database)
|
||||
* [Reading from the Database](models_and_databases.md#reading-from-the-database)
|
||||
|
@ -58,6 +60,13 @@
|
|||
|
||||
* [Schema Migrations](schema_migrations.md#schema-migrations)
|
||||
* [Writing Migrations](schema_migrations.md#writing-migrations)
|
||||
* [CreateTable](schema_migrations.md#createtable)
|
||||
* [DropTable](schema_migrations.md#droptable)
|
||||
* [AlterTable](schema_migrations.md#altertable)
|
||||
* [AlterTableWithBuffer](schema_migrations.md#altertablewithbuffer)
|
||||
* [AlterConstraints](schema_migrations.md#alterconstraints)
|
||||
* [RunPython](schema_migrations.md#runpython)
|
||||
* [RunSQL](schema_migrations.md#runsql)
|
||||
* [Running Migrations](schema_migrations.md#running-migrations)
|
||||
|
||||
* [System Models](system_models.md#system-models)
|
||||
|
@ -74,7 +83,10 @@
|
|||
* [infi.clickhouse_orm.models](class_reference.md#inficlickhouse_ormmodels)
|
||||
* [Model](class_reference.md#model)
|
||||
* [BufferModel](class_reference.md#buffermodel)
|
||||
* [MergeModel](class_reference.md#mergemodel)
|
||||
* [DistributedModel](class_reference.md#distributedmodel)
|
||||
* [Constraint](class_reference.md#constraint)
|
||||
* [Index](class_reference.md#index)
|
||||
* [infi.clickhouse_orm.fields](class_reference.md#inficlickhouse_ormfields)
|
||||
* [ArrayField](class_reference.md#arrayfield)
|
||||
* [BaseEnumField](class_reference.md#baseenumfield)
|
||||
|
|
|
@ -132,7 +132,7 @@ if __name__ == '__main__':
|
|||
print('===============')
|
||||
print()
|
||||
module_doc([database.Database, database.DatabaseException])
|
||||
module_doc([models.Model, models.BufferModel, models.DistributedModel])
|
||||
module_doc([models.Model, models.BufferModel, models.MergeModel, models.DistributedModel, models.Constraint, models.Index])
|
||||
module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False)
|
||||
module_doc([engines.Engine] + all_subclasses(engines.Engine), False)
|
||||
module_doc([query.QuerySet, query.AggregateQuerySet, query.Q])
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
from datetime import date, datetime, tzinfo, timedelta
|
||||
from functools import wraps
|
||||
from inspect import signature, Parameter
|
||||
from types import FunctionType
|
||||
|
||||
from .utils import is_iterable, comma_join, NO_VALUE
|
||||
from .utils import is_iterable, comma_join, NO_VALUE, arg_to_sql
|
||||
from .query import Cond, QuerySet
|
||||
|
||||
|
||||
|
@ -263,43 +262,9 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
|
|||
else:
|
||||
prefix = self.name
|
||||
sep = ', '
|
||||
arg_strs = (F._arg_to_sql(arg) for arg in self.args if arg != NO_VALUE)
|
||||
arg_strs = (arg_to_sql(arg) for arg in self.args if arg != NO_VALUE)
|
||||
return prefix + '(' + sep.join(arg_strs) + ')'
|
||||
|
||||
@staticmethod
|
||||
def _arg_to_sql(arg):
|
||||
"""
|
||||
Converts a function argument to SQL string according to its type.
|
||||
Supports functions, model fields, strings, dates, datetimes, timedeltas, booleans,
|
||||
None, numbers, timezones, arrays/iterables.
|
||||
"""
|
||||
from .fields import Field, StringField, DateTimeField, DateField
|
||||
if isinstance(arg, F):
|
||||
return arg.to_sql()
|
||||
if isinstance(arg, Field):
|
||||
return "`%s`" % arg
|
||||
if isinstance(arg, str):
|
||||
return StringField().to_db_string(arg)
|
||||
if isinstance(arg, datetime):
|
||||
return "toDateTime(%s)" % DateTimeField().to_db_string(arg)
|
||||
if isinstance(arg, date):
|
||||
return "toDate('%s')" % arg.isoformat()
|
||||
if isinstance(arg, timedelta):
|
||||
return "toIntervalSecond(%d)" % int(arg.total_seconds())
|
||||
if isinstance(arg, bool):
|
||||
return str(int(arg))
|
||||
if isinstance(arg, tzinfo):
|
||||
return StringField().to_db_string(arg.tzname(None))
|
||||
if arg is None:
|
||||
return 'NULL'
|
||||
if isinstance(arg, QuerySet):
|
||||
return "(%s)" % arg
|
||||
if isinstance(arg, tuple):
|
||||
return '(' + comma_join(F._arg_to_sql(x) for x in arg) + ')'
|
||||
if is_iterable(arg):
|
||||
return '[' + comma_join(F._arg_to_sql(x) for x in arg) + ']'
|
||||
return str(arg)
|
||||
|
||||
# Arithmetic functions
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -7,7 +7,7 @@ import logging
|
|||
logger = logging.getLogger('migrations')
|
||||
|
||||
|
||||
class Operation(object):
|
||||
class Operation():
|
||||
'''
|
||||
Base class for migration operations.
|
||||
'''
|
||||
|
@ -16,22 +16,40 @@ class Operation(object):
|
|||
raise NotImplementedError() # pragma: no cover
|
||||
|
||||
|
||||
class CreateTable(Operation):
|
||||
class ModelOperation(Operation):
|
||||
'''
|
||||
Base class for migration operations that work on a specific model.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class):
|
||||
'''
|
||||
Initializer.
|
||||
'''
|
||||
self.model_class = model_class
|
||||
self.table_name = model_class.table_name()
|
||||
|
||||
def _alter_table(self, database, cmd):
|
||||
'''
|
||||
Utility for running ALTER TABLE commands.
|
||||
'''
|
||||
cmd = "ALTER TABLE $db.`%s` %s" % (self.table_name, cmd)
|
||||
logger.debug(cmd)
|
||||
database.raw(cmd)
|
||||
|
||||
|
||||
class CreateTable(ModelOperation):
|
||||
'''
|
||||
A migration operation that creates a table for a given model class.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class):
|
||||
self.model_class = model_class
|
||||
|
||||
def apply(self, database):
|
||||
logger.info(' Create table %s', self.model_class.table_name())
|
||||
logger.info(' Create table %s', self.table_name)
|
||||
if issubclass(self.model_class, BufferModel):
|
||||
database.create_table(self.model_class.engine.main_model)
|
||||
database.create_table(self.model_class)
|
||||
|
||||
|
||||
class AlterTable(Operation):
|
||||
class AlterTable(ModelOperation):
|
||||
'''
|
||||
A migration operation that compares the table of a given model class to
|
||||
the model's fields, and alters the table to match the model. The operation can:
|
||||
|
@ -41,20 +59,12 @@ class AlterTable(Operation):
|
|||
Default values are not altered by this operation.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class):
|
||||
self.model_class = model_class
|
||||
|
||||
def _get_table_fields(self, database):
|
||||
query = "DESC `%s`.`%s`" % (database.db_name, self.model_class.table_name())
|
||||
query = "DESC `%s`.`%s`" % (database.db_name, self.table_name)
|
||||
return [(row.name, row.type) for row in database.select(query)]
|
||||
|
||||
def _alter_table(self, database, cmd):
|
||||
cmd = "ALTER TABLE `%s`.`%s` %s" % (database.db_name, self.model_class.table_name(), cmd)
|
||||
logger.debug(cmd)
|
||||
database._send(cmd)
|
||||
|
||||
def apply(self, database):
|
||||
logger.info(' Alter table %s', self.model_class.table_name())
|
||||
logger.info(' Alter table %s', self.table_name)
|
||||
|
||||
# Note that MATERIALIZED and ALIAS fields are always at the end of the DESC,
|
||||
# ADD COLUMN ... AFTER doesn't affect it
|
||||
|
@ -100,16 +110,13 @@ class AlterTable(Operation):
|
|||
self._alter_table(database, 'MODIFY COLUMN %s %s' % (field_name, model_fields[field_name]))
|
||||
|
||||
|
||||
class AlterTableWithBuffer(Operation):
|
||||
class AlterTableWithBuffer(ModelOperation):
|
||||
'''
|
||||
A migration operation for altering a buffer table and its underlying on-disk table.
|
||||
The buffer table is dropped, the on-disk table is altered, and then the buffer table
|
||||
is re-created.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class):
|
||||
self.model_class = model_class
|
||||
|
||||
def apply(self, database):
|
||||
if issubclass(self.model_class, BufferModel):
|
||||
DropTable(self.model_class).apply(database)
|
||||
|
@ -119,25 +126,108 @@ class AlterTableWithBuffer(Operation):
|
|||
AlterTable(self.model_class).apply(database)
|
||||
|
||||
|
||||
class DropTable(Operation):
|
||||
class DropTable(ModelOperation):
|
||||
'''
|
||||
A migration operation that drops the table of a given model class.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class):
|
||||
self.model_class = model_class
|
||||
def apply(self, database):
|
||||
logger.info(' Drop table %s', self.table_name)
|
||||
database.drop_table(self.model_class)
|
||||
|
||||
|
||||
class AlterConstraints(ModelOperation):
|
||||
'''
|
||||
A migration operation that adds new constraints from the model to the database
|
||||
table, and drops obsolete ones. Constraints are identified by their names, so
|
||||
a change in an existing constraint will not be detected unless its name was changed too.
|
||||
ClickHouse does not check that the constraints hold for existing data in the table.
|
||||
'''
|
||||
|
||||
def apply(self, database):
|
||||
logger.info(' Drop table %s', self.model_class.table_name())
|
||||
database.drop_table(self.model_class)
|
||||
logger.info(' Alter constraints for %s', self.table_name)
|
||||
existing = self._get_constraint_names(database)
|
||||
# Go over constraints in the model
|
||||
for constraint in self.model_class._constraints.values():
|
||||
# Check if it's a new constraint
|
||||
if constraint.name not in existing:
|
||||
logger.info(' Add constraint %s', constraint.name)
|
||||
self._alter_table(database, 'ADD %s' % constraint.create_table_sql())
|
||||
else:
|
||||
existing.remove(constraint.name)
|
||||
# Remaining constraints in `existing` are obsolete
|
||||
for name in existing:
|
||||
logger.info(' Drop constraint %s', name)
|
||||
self._alter_table(database, 'DROP CONSTRAINT `%s`' % name)
|
||||
|
||||
def _get_constraint_names(self, database):
|
||||
'''
|
||||
Returns a set containing the names of existing constraints in the table.
|
||||
'''
|
||||
import re
|
||||
table_def = database.raw('SHOW CREATE TABLE $db.`%s`' % self.table_name)
|
||||
matches = re.findall(r'\sCONSTRAINT\s+`?(.+?)`?\s+CHECK\s', table_def)
|
||||
return set(matches)
|
||||
|
||||
|
||||
class AlterIndexes(ModelOperation):
|
||||
'''
|
||||
A migration operation that adds new indexes from the model to the database
|
||||
table, and drops obsolete ones. Indexes are identified by their names, so
|
||||
a change in an existing index will not be detected unless its name was changed too.
|
||||
'''
|
||||
|
||||
def __init__(self, model_class, reindex=False):
|
||||
'''
|
||||
Initializer.
|
||||
By default ClickHouse does not build indexes over existing data, only for
|
||||
new data. Passing `reindex=True` will run `OPTIMIZE TABLE` in order to build
|
||||
the indexes over the existing data.
|
||||
'''
|
||||
super().__init__(model_class)
|
||||
self.reindex = reindex
|
||||
|
||||
def apply(self, database):
|
||||
logger.info(' Alter indexes for %s', self.table_name)
|
||||
existing = self._get_index_names(database)
|
||||
logger.info(existing)
|
||||
# Go over indexes in the model
|
||||
for index in self.model_class._indexes.values():
|
||||
# Check if it's a new index
|
||||
if index.name not in existing:
|
||||
logger.info(' Add index %s', index.name)
|
||||
self._alter_table(database, 'ADD %s' % index.create_table_sql())
|
||||
else:
|
||||
existing.remove(index.name)
|
||||
# Remaining indexes in `existing` are obsolete
|
||||
for name in existing:
|
||||
logger.info(' Drop index %s', name)
|
||||
self._alter_table(database, 'DROP INDEX `%s`' % name)
|
||||
# Reindex
|
||||
if self.reindex:
|
||||
logger.info(' Build indexes on table')
|
||||
database.raw('OPTIMIZE TABLE $db.`%s` FINAL' % self.table_name)
|
||||
|
||||
def _get_index_names(self, database):
|
||||
'''
|
||||
Returns a set containing the names of existing indexes in the table.
|
||||
'''
|
||||
import re
|
||||
table_def = database.raw('SHOW CREATE TABLE $db.`%s`' % self.table_name)
|
||||
matches = re.findall(r'\sINDEX\s+`?(.+?)`?\s+', table_def)
|
||||
return set(matches)
|
||||
|
||||
|
||||
class RunPython(Operation):
|
||||
'''
|
||||
A migration operation that executes given python function on database
|
||||
A migration operation that executes a Python function.
|
||||
'''
|
||||
def __init__(self, func):
|
||||
assert callable(func), "'func' parameter must be function"
|
||||
'''
|
||||
Initializer. The given Python function will be called with a single
|
||||
argument - the Database instance to apply the migration to.
|
||||
'''
|
||||
assert callable(func), "'func' argument must be function"
|
||||
self._func = func
|
||||
|
||||
def apply(self, database):
|
||||
|
@ -147,14 +237,17 @@ class RunPython(Operation):
|
|||
|
||||
class RunSQL(Operation):
|
||||
'''
|
||||
A migration operation that executes given SQL on database
|
||||
A migration operation that executes arbitrary SQL statements.
|
||||
'''
|
||||
|
||||
def __init__(self, sql):
|
||||
'''
|
||||
Initializer. The given sql argument must be a valid SQL statement or
|
||||
list of statements.
|
||||
'''
|
||||
if isinstance(sql, str):
|
||||
sql = [sql]
|
||||
|
||||
assert isinstance(sql, list), "'sql' parameter must be string or list of strings"
|
||||
assert isinstance(sql, list), "'sql' argument must be string or list of strings"
|
||||
self._sql = sql
|
||||
|
||||
def apply(self, database):
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
from __future__ import unicode_literals
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from itertools import chain
|
||||
from logging import getLogger
|
||||
|
||||
import pytz
|
||||
|
||||
from .fields import Field, StringField
|
||||
from .utils import parse_tsv, NO_VALUE, get_subclass_names, unescape
|
||||
from .utils import parse_tsv, NO_VALUE, get_subclass_names, arg_to_sql, unescape
|
||||
from .query import QuerySet
|
||||
from .funcs import F
|
||||
from .engines import Merge, Distributed
|
||||
|
@ -14,6 +15,110 @@ from .engines import Merge, Distributed
|
|||
logger = getLogger('clickhouse_orm')
|
||||
|
||||
|
||||
|
||||
class Constraint:
|
||||
'''
|
||||
Defines a model constraint.
|
||||
'''
|
||||
|
||||
name = None # this is set by the parent model
|
||||
parent = None # this is set by the parent model
|
||||
|
||||
def __init__(self, expr):
|
||||
'''
|
||||
Initializer. Expects an expression that ClickHouse will verify when inserting data.
|
||||
'''
|
||||
self.expr = expr
|
||||
|
||||
def create_table_sql(self):
|
||||
'''
|
||||
Returns the SQL statement for defining this constraint during table creation.
|
||||
'''
|
||||
return 'CONSTRAINT `%s` CHECK %s' % (self.name, arg_to_sql(self.expr))
|
||||
|
||||
|
||||
class Index:
|
||||
'''
|
||||
Defines a data-skipping index.
|
||||
'''
|
||||
|
||||
name = None # this is set by the parent model
|
||||
parent = None # this is set by the parent model
|
||||
|
||||
def __init__(self, expr, type, granularity):
|
||||
'''
|
||||
Initializer.
|
||||
|
||||
- `expr` - a column, expression, or tuple of columns and expressions to index.
|
||||
- `type` - the index type. Use one of the following methods to specify the type:
|
||||
`Index.minmax`, `Index.set`, `Index.ngrambf_v1`, `Index.tokenbf_v1` or `Index.bloom_filter`.
|
||||
- `granularity` - index block size (number of multiples of the `index_granularity` defined by the engine).
|
||||
'''
|
||||
self.expr = expr
|
||||
self.type = type
|
||||
self.granularity = granularity
|
||||
|
||||
def create_table_sql(self):
|
||||
'''
|
||||
Returns the SQL statement for defining this index during table creation.
|
||||
'''
|
||||
return 'INDEX `%s` %s TYPE %s GRANULARITY %d' % (self.name, arg_to_sql(self.expr), self.type, self.granularity)
|
||||
|
||||
@staticmethod
|
||||
def minmax():
|
||||
'''
|
||||
An index that stores extremes of the specified expression (if the expression is tuple, then it stores
|
||||
extremes for each element of tuple). The stored info is used for skipping blocks of data like the primary key.
|
||||
'''
|
||||
return 'minmax'
|
||||
|
||||
@staticmethod
|
||||
def set(max_rows):
|
||||
'''
|
||||
An index that stores unique values of the specified expression (no more than max_rows rows,
|
||||
or unlimited if max_rows=0). Uses the values to check if the WHERE expression is not satisfiable
|
||||
on a block of data.
|
||||
'''
|
||||
return 'set(%d)' % max_rows
|
||||
|
||||
@staticmethod
|
||||
def ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed):
|
||||
'''
|
||||
An index that stores a Bloom filter containing all ngrams from a block of data.
|
||||
Works only with strings. Can be used for optimization of equals, like and in expressions.
|
||||
|
||||
- `n` — ngram size
|
||||
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||
for example 256 or 512, because it can be compressed well).
|
||||
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||
- `random_seed` — The seed for Bloom filter hash functions.
|
||||
'''
|
||||
return 'ngrambf_v1(%d, %d, %d, %d)' % (n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)
|
||||
|
||||
@staticmethod
|
||||
def tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed):
|
||||
'''
|
||||
An index that stores a Bloom filter containing string tokens. Tokens are sequences
|
||||
separated by non-alphanumeric characters.
|
||||
|
||||
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here,
|
||||
for example 256 or 512, because it can be compressed well).
|
||||
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
|
||||
- `random_seed` — The seed for Bloom filter hash functions.
|
||||
'''
|
||||
return 'tokenbf_v1(%d, %d, %d)' % (size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)
|
||||
|
||||
@staticmethod
|
||||
def bloom_filter(false_positive=0.025):
|
||||
'''
|
||||
An index that stores a Bloom filter containing values of the index expression.
|
||||
|
||||
- `false_positive` - the probability (between 0 and 1) of receiving a false positive
|
||||
response from the filter
|
||||
'''
|
||||
return 'bloom_filter(%f)' % false_positive
|
||||
|
||||
|
||||
class ModelBase(type):
|
||||
'''
|
||||
A metaclass for ORM models. It adds the _fields list to model classes.
|
||||
|
@ -22,16 +127,27 @@ class ModelBase(type):
|
|||
ad_hoc_model_cache = {}
|
||||
|
||||
def __new__(cls, name, bases, attrs):
|
||||
# Collect fields from parent classes
|
||||
base_fields = dict()
|
||||
|
||||
# Collect fields, constraints and indexes from parent classes
|
||||
fields = {}
|
||||
constraints = {}
|
||||
indexes = {}
|
||||
for base in bases:
|
||||
if isinstance(base, ModelBase):
|
||||
base_fields.update(base._fields)
|
||||
fields.update(base._fields)
|
||||
constraints.update(base._constraints)
|
||||
indexes.update(base._indexes)
|
||||
|
||||
fields = base_fields
|
||||
# Add fields, constraints and indexes from this class
|
||||
for n, obj in attrs.items():
|
||||
if isinstance(obj, Field):
|
||||
fields[n] = obj
|
||||
elif isinstance(obj, Constraint):
|
||||
constraints[n] = obj
|
||||
elif isinstance(obj, Index):
|
||||
indexes[n] = obj
|
||||
|
||||
# Build a list of fields, in the order they were listed in the class
|
||||
fields.update({n: f for n, f in attrs.items() if isinstance(f, Field)})
|
||||
# Convert fields to a list of (name, field) tuples, in the order they were listed in the class
|
||||
fields = sorted(fields.items(), key=lambda item: item[1].creation_counter)
|
||||
|
||||
# Build a dictionary of default values
|
||||
|
@ -46,19 +162,22 @@ class ModelBase(type):
|
|||
else:
|
||||
defaults[n] = f.to_python(f.default, pytz.UTC)
|
||||
|
||||
# Create the model class
|
||||
attrs = dict(
|
||||
attrs,
|
||||
_fields=OrderedDict(fields),
|
||||
_constraints=constraints,
|
||||
_indexes=indexes,
|
||||
_writable_fields=OrderedDict([f for f in fields if not f[1].readonly]),
|
||||
_defaults=defaults,
|
||||
_has_funcs_as_defaults=has_funcs_as_defaults
|
||||
)
|
||||
model = super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
|
||||
|
||||
# Let each field know its parent and its own name
|
||||
for n, f in fields:
|
||||
setattr(f, 'parent', model)
|
||||
setattr(f, 'name', n)
|
||||
# Let each field, constraint and index know its parent and its own name
|
||||
for n, obj in chain(fields, constraints.items(), indexes.items()):
|
||||
setattr(obj, 'parent', model)
|
||||
setattr(obj, 'name', n)
|
||||
|
||||
return model
|
||||
|
||||
|
@ -89,7 +208,7 @@ class ModelBase(type):
|
|||
if db_type.startswith('DateTime('):
|
||||
# Some functions return DateTimeField with timezone in brackets
|
||||
return orm_fields.DateTimeField()
|
||||
# DateTime with timezone
|
||||
# DateTime64
|
||||
if db_type.startswith('DateTime64('):
|
||||
precision, *timezone = [s.strip() for s in db_type[11:-1].split(',')]
|
||||
return orm_fields.DateTime64Field(
|
||||
|
@ -229,13 +348,21 @@ class Model(metaclass=ModelBase):
|
|||
@classmethod
|
||||
def create_table_sql(cls, db):
|
||||
'''
|
||||
Returns the SQL command for creating a table for this model.
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
'''
|
||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
||||
cols = []
|
||||
# Fields
|
||||
items = []
|
||||
for name, field in cls.fields().items():
|
||||
cols.append(' %s %s' % (name, field.get_sql(db=db)))
|
||||
parts.append(',\n'.join(cols))
|
||||
items.append(' %s %s' % (name, field.get_sql(db=db)))
|
||||
# Constraints
|
||||
for c in cls._constraints.values():
|
||||
items.append(' %s' % c.create_table_sql())
|
||||
# Indexes
|
||||
for i in cls._indexes.values():
|
||||
items.append(' %s' % i.create_table_sql())
|
||||
parts.append(',\n'.join(items))
|
||||
# Engine
|
||||
parts.append(')')
|
||||
parts.append('ENGINE = ' + cls.engine.create_table_sql(db))
|
||||
return '\n'.join(parts)
|
||||
|
@ -355,7 +482,7 @@ class BufferModel(Model):
|
|||
@classmethod
|
||||
def create_table_sql(cls, db):
|
||||
'''
|
||||
Returns the SQL command for creating a table for this model.
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
'''
|
||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` AS `%s`.`%s`' % (db.db_name, cls.table_name(), db.db_name,
|
||||
cls.engine.main_model.table_name())]
|
||||
|
@ -377,6 +504,9 @@ class MergeModel(Model):
|
|||
|
||||
@classmethod
|
||||
def create_table_sql(cls, db):
|
||||
'''
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
'''
|
||||
assert isinstance(cls.engine, Merge), "engine must be an instance of engines.Merge"
|
||||
parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db.db_name, cls.table_name())]
|
||||
cols = []
|
||||
|
@ -393,10 +523,14 @@ class MergeModel(Model):
|
|||
|
||||
class DistributedModel(Model):
|
||||
"""
|
||||
Model for Distributed engine
|
||||
Model class for use with a `Distributed` engine.
|
||||
"""
|
||||
|
||||
def set_database(self, db):
|
||||
'''
|
||||
Sets the `Database` that this model instance belongs to.
|
||||
This is done automatically when the instance is read from the database or written to it.
|
||||
'''
|
||||
assert isinstance(self.engine, Distributed), "engine must be an instance of engines.Distributed"
|
||||
res = super(DistributedModel, self).set_database(db)
|
||||
return res
|
||||
|
@ -454,6 +588,9 @@ class DistributedModel(Model):
|
|||
|
||||
@classmethod
|
||||
def create_table_sql(cls, db):
|
||||
'''
|
||||
Returns the SQL statement for creating a table for this model.
|
||||
'''
|
||||
assert isinstance(cls.engine, Distributed), "engine must be engines.Distributed instance"
|
||||
|
||||
cls.fix_engine_table()
|
||||
|
@ -466,4 +603,4 @@ class DistributedModel(Model):
|
|||
|
||||
|
||||
# Expose only relevant classes in import *
|
||||
__all__ = get_subclass_names(locals(), Model)
|
||||
__all__ = get_subclass_names(locals(), (Model, Constraint, Index))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import unicode_literals
|
||||
import codecs
|
||||
import re
|
||||
from datetime import date, datetime, tzinfo, timedelta
|
||||
|
||||
|
||||
SPECIAL_CHARS = {
|
||||
|
@ -42,6 +42,40 @@ def string_or_func(obj):
|
|||
return obj.to_sql() if hasattr(obj, 'to_sql') else obj
|
||||
|
||||
|
||||
def arg_to_sql(arg):
|
||||
"""
|
||||
Converts a function argument to SQL string according to its type.
|
||||
Supports functions, model fields, strings, dates, datetimes, timedeltas, booleans,
|
||||
None, numbers, timezones, arrays/iterables.
|
||||
"""
|
||||
from infi.clickhouse_orm import Field, StringField, DateTimeField, DateField, F, QuerySet
|
||||
if isinstance(arg, F):
|
||||
return arg.to_sql()
|
||||
if isinstance(arg, Field):
|
||||
return "`%s`" % arg
|
||||
if isinstance(arg, str):
|
||||
return StringField().to_db_string(arg)
|
||||
if isinstance(arg, datetime):
|
||||
return "toDateTime(%s)" % DateTimeField().to_db_string(arg)
|
||||
if isinstance(arg, date):
|
||||
return "toDate('%s')" % arg.isoformat()
|
||||
if isinstance(arg, timedelta):
|
||||
return "toIntervalSecond(%d)" % int(arg.total_seconds())
|
||||
if isinstance(arg, bool):
|
||||
return str(int(arg))
|
||||
if isinstance(arg, tzinfo):
|
||||
return StringField().to_db_string(arg.tzname(None))
|
||||
if arg is None:
|
||||
return 'NULL'
|
||||
if isinstance(arg, QuerySet):
|
||||
return "(%s)" % arg
|
||||
if isinstance(arg, tuple):
|
||||
return '(' + comma_join(arg_to_sql(x) for x in arg) + ')'
|
||||
if is_iterable(arg):
|
||||
return '[' + comma_join(arg_to_sql(x) for x in arg) + ']'
|
||||
return str(arg)
|
||||
|
||||
|
||||
def parse_tsv(line):
|
||||
if isinstance(line, bytes):
|
||||
line = line.decode()
|
||||
|
|
6
tests/sample_migrations/0016.py
Normal file
6
tests/sample_migrations/0016.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.CreateTable(ModelWithConstraints)
|
||||
]
|
6
tests/sample_migrations/0017.py
Normal file
6
tests/sample_migrations/0017.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.AlterConstraints(ModelWithConstraints2)
|
||||
]
|
6
tests/sample_migrations/0018.py
Normal file
6
tests/sample_migrations/0018.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.CreateTable(ModelWithIndex)
|
||||
]
|
6
tests/sample_migrations/0019.py
Normal file
6
tests/sample_migrations/0019.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from infi.clickhouse_orm import migrations
|
||||
from ..test_migrations import *
|
||||
|
||||
operations = [
|
||||
migrations.AlterIndexes(ModelWithIndex2, reindex=True)
|
||||
]
|
44
tests/test_constraints.py
Normal file
44
tests/test_constraints.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
import unittest
|
||||
|
||||
from infi.clickhouse_orm import *
|
||||
from .base_test_with_data import Person
|
||||
|
||||
|
||||
class ConstraintsTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.database = Database('test-db', log_statements=True)
|
||||
if self.database.server_version < (19, 14, 3, 3):
|
||||
raise unittest.SkipTest('ClickHouse version too old')
|
||||
self.database.create_table(PersonWithConstraints)
|
||||
|
||||
def tearDown(self):
|
||||
self.database.drop_database()
|
||||
|
||||
def test_insert_valid_values(self):
|
||||
self.database.insert([
|
||||
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="2000-01-01", height=1.66)
|
||||
])
|
||||
|
||||
def test_insert_invalid_values(self):
|
||||
with self.assertRaises(ServerError) as e:
|
||||
self.database.insert([
|
||||
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="2100-01-01", height=1.66)
|
||||
])
|
||||
self.assertEqual(e.code, 469)
|
||||
self.assertTrue('Constraint `birthday_in_the_past`' in e.message)
|
||||
|
||||
with self.assertRaises(ServerError) as e:
|
||||
self.database.insert([
|
||||
PersonWithConstraints(first_name="Mike", last_name="Caruzo", birthday="1970-01-01", height=3)
|
||||
])
|
||||
self.assertEqual(e.code, 469)
|
||||
self.assertTrue('Constraint `max_height`' in e.message)
|
||||
|
||||
|
||||
class PersonWithConstraints(Person):
|
||||
|
||||
birthday_in_the_past = Constraint(Person.birthday <= F.today())
|
||||
max_height = Constraint(Person.height <= 2.75)
|
||||
|
||||
|
32
tests/test_indexes.py
Normal file
32
tests/test_indexes.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import unittest
|
||||
|
||||
from infi.clickhouse_orm import *
|
||||
|
||||
|
||||
class IndexesTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.database = Database('test-db', log_statements=True)
|
||||
if self.database.server_version < (20, 1, 2, 4):
|
||||
raise unittest.SkipTest('ClickHouse version too old')
|
||||
|
||||
def tearDown(self):
|
||||
self.database.drop_database()
|
||||
|
||||
def test_all_index_types(self):
|
||||
self.database.create_table(ModelWithIndexes)
|
||||
|
||||
|
||||
class ModelWithIndexes(Model):
|
||||
|
||||
date = DateField()
|
||||
f1 = Int32Field()
|
||||
f2 = StringField()
|
||||
|
||||
i1 = Index(f1, type=Index.minmax(), granularity=1)
|
||||
i2 = Index(f1, type=Index.set(1000), granularity=2)
|
||||
i3 = Index(f2, type=Index.ngrambf_v1(3, 256, 2, 0), granularity=1)
|
||||
i4 = Index(F.lower(f2), type=Index.tokenbf_v1(256, 2, 0), granularity=2)
|
||||
i5 = Index((F.toQuarter(date), f2), type=Index.bloom_filter(), granularity=3)
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
|
@ -1,8 +1,8 @@
|
|||
from __future__ import unicode_literals
|
||||
import unittest
|
||||
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.models import Model, BufferModel
|
||||
from infi.clickhouse_orm.database import Database, ServerError
|
||||
from infi.clickhouse_orm.models import Model, BufferModel, Constraint, Index
|
||||
from infi.clickhouse_orm.fields import *
|
||||
from infi.clickhouse_orm.engines import *
|
||||
from infi.clickhouse_orm.migrations import MigrationHistory
|
||||
|
@ -27,55 +27,58 @@ class MigrationsTestCase(unittest.TestCase):
|
|||
def tearDown(self):
|
||||
self.database.drop_database()
|
||||
|
||||
def tableExists(self, model_class):
|
||||
def table_exists(self, model_class):
|
||||
query = "EXISTS TABLE $db.`%s`" % model_class.table_name()
|
||||
return next(self.database.select(query)).result == 1
|
||||
|
||||
def getTableFields(self, model_class):
|
||||
def get_table_fields(self, model_class):
|
||||
query = "DESC `%s`.`%s`" % (self.database.db_name, model_class.table_name())
|
||||
return [(row.name, row.type) for row in self.database.select(query)]
|
||||
|
||||
def get_table_def(self, model_class):
|
||||
return self.database.raw('SHOW CREATE TABLE $db.`%s`' % model_class.table_name())
|
||||
|
||||
def test_migrations(self):
|
||||
# Creation and deletion of table
|
||||
self.database.migrate('tests.sample_migrations', 1)
|
||||
self.assertTrue(self.tableExists(Model1))
|
||||
self.assertTrue(self.table_exists(Model1))
|
||||
self.database.migrate('tests.sample_migrations', 2)
|
||||
self.assertFalse(self.tableExists(Model1))
|
||||
self.assertFalse(self.table_exists(Model1))
|
||||
self.database.migrate('tests.sample_migrations', 3)
|
||||
self.assertTrue(self.tableExists(Model1))
|
||||
self.assertTrue(self.table_exists(Model1))
|
||||
# Adding, removing and altering simple fields
|
||||
self.assertEqual(self.getTableFields(Model1), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.assertEqual(self.get_table_fields(Model1), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.database.migrate('tests.sample_migrations', 4)
|
||||
self.assertEqual(self.getTableFields(Model2), [('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'String'), ('f5', 'Array(UInt64)')])
|
||||
self.assertEqual(self.get_table_fields(Model2), [('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'String'), ('f5', 'Array(UInt64)')])
|
||||
self.database.migrate('tests.sample_migrations', 5)
|
||||
self.assertEqual(self.getTableFields(Model3), [('date', 'Date'), ('f1', 'Int64'), ('f3', 'Float64'), ('f4', 'String')])
|
||||
self.assertEqual(self.get_table_fields(Model3), [('date', 'Date'), ('f1', 'Int64'), ('f3', 'Float64'), ('f4', 'String')])
|
||||
# Altering enum fields
|
||||
self.database.migrate('tests.sample_migrations', 6)
|
||||
self.assertTrue(self.tableExists(EnumModel1))
|
||||
self.assertEqual(self.getTableFields(EnumModel1),
|
||||
self.assertTrue(self.table_exists(EnumModel1))
|
||||
self.assertEqual(self.get_table_fields(EnumModel1),
|
||||
[('date', 'Date'), ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")])
|
||||
self.database.migrate('tests.sample_migrations', 7)
|
||||
self.assertTrue(self.tableExists(EnumModel1))
|
||||
self.assertEqual(self.getTableFields(EnumModel2),
|
||||
self.assertTrue(self.table_exists(EnumModel1))
|
||||
self.assertEqual(self.get_table_fields(EnumModel2),
|
||||
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
|
||||
# Materialized fields and alias fields
|
||||
self.database.migrate('tests.sample_migrations', 8)
|
||||
self.assertTrue(self.tableExists(MaterializedModel))
|
||||
self.assertEqual(self.getTableFields(MaterializedModel),
|
||||
self.assertTrue(self.table_exists(MaterializedModel))
|
||||
self.assertEqual(self.get_table_fields(MaterializedModel),
|
||||
[('date_time', "DateTime"), ('date', 'Date')])
|
||||
self.database.migrate('tests.sample_migrations', 9)
|
||||
self.assertTrue(self.tableExists(AliasModel))
|
||||
self.assertEqual(self.getTableFields(AliasModel),
|
||||
self.assertTrue(self.table_exists(AliasModel))
|
||||
self.assertEqual(self.get_table_fields(AliasModel),
|
||||
[('date', 'Date'), ('date_alias', "Date")])
|
||||
# Buffer models creation and alteration
|
||||
self.database.migrate('tests.sample_migrations', 10)
|
||||
self.assertTrue(self.tableExists(Model4))
|
||||
self.assertTrue(self.tableExists(Model4Buffer))
|
||||
self.assertEqual(self.getTableFields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.assertEqual(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.assertTrue(self.table_exists(Model4))
|
||||
self.assertTrue(self.table_exists(Model4Buffer))
|
||||
self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.assertEqual(self.get_table_fields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
|
||||
self.database.migrate('tests.sample_migrations', 11)
|
||||
self.assertEqual(self.getTableFields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||
self.assertEqual(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||
self.assertEqual(self.get_table_fields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||
self.assertEqual(self.get_table_fields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
|
||||
|
||||
self.database.migrate('tests.sample_migrations', 12)
|
||||
self.assertEqual(self.database.count(Model3), 3)
|
||||
|
@ -88,24 +91,54 @@ class MigrationsTestCase(unittest.TestCase):
|
|||
self.assertListEqual(data, [1, 2, 3, 4])
|
||||
|
||||
self.database.migrate('tests.sample_migrations', 14)
|
||||
self.assertTrue(self.tableExists(MaterializedModel1))
|
||||
self.assertEqual(self.getTableFields(MaterializedModel1),
|
||||
self.assertTrue(self.table_exists(MaterializedModel1))
|
||||
self.assertEqual(self.get_table_fields(MaterializedModel1),
|
||||
[('date_time', 'DateTime'), ('int_field', 'Int8'), ('date', 'Date'), ('int_field_plus_one', 'Int8')])
|
||||
self.assertTrue(self.tableExists(AliasModel1))
|
||||
self.assertEqual(self.getTableFields(AliasModel1),
|
||||
self.assertTrue(self.table_exists(AliasModel1))
|
||||
self.assertEqual(self.get_table_fields(AliasModel1),
|
||||
[('date', 'Date'), ('int_field', 'Int8'), ('date_alias', 'Date'), ('int_field_plus_one', 'Int8')])
|
||||
# Codecs and low cardinality
|
||||
self.database.migrate('tests.sample_migrations', 15)
|
||||
self.assertTrue(self.tableExists(Model4_compressed))
|
||||
self.assertTrue(self.table_exists(Model4_compressed))
|
||||
if self.database.has_low_cardinality_support:
|
||||
self.assertEqual(self.getTableFields(Model2LowCardinality),
|
||||
self.assertEqual(self.get_table_fields(Model2LowCardinality),
|
||||
[('date', 'Date'), ('f1', 'LowCardinality(Int32)'), ('f3', 'LowCardinality(Float32)'),
|
||||
('f2', 'LowCardinality(String)'), ('f4', 'LowCardinality(Nullable(String))'), ('f5', 'Array(LowCardinality(UInt64))')])
|
||||
else:
|
||||
logging.warning('No support for low cardinality')
|
||||
self.assertEqual(self.getTableFields(Model2),
|
||||
self.assertEqual(self.get_table_fields(Model2),
|
||||
[('date', 'Date'), ('f1', 'Int32'), ('f3', 'Float32'), ('f2', 'String'), ('f4', 'Nullable(String)'),
|
||||
('f5', 'Array(UInt64)')])
|
||||
|
||||
if self.database.server_version >= (19, 14, 3, 3):
|
||||
# Creating constraints
|
||||
self.database.migrate('tests.sample_migrations', 16)
|
||||
self.assertTrue(self.table_exists(ModelWithConstraints))
|
||||
self.database.insert([ModelWithConstraints(f1=101, f2='a')])
|
||||
with self.assertRaises(ServerError):
|
||||
self.database.insert([ModelWithConstraints(f1=99, f2='a')])
|
||||
with self.assertRaises(ServerError):
|
||||
self.database.insert([ModelWithConstraints(f1=101, f2='x')])
|
||||
# Modifying constraints
|
||||
self.database.migrate('tests.sample_migrations', 17)
|
||||
self.database.insert([ModelWithConstraints(f1=99, f2='a')])
|
||||
with self.assertRaises(ServerError):
|
||||
self.database.insert([ModelWithConstraints(f1=101, f2='a')])
|
||||
with self.assertRaises(ServerError):
|
||||
self.database.insert([ModelWithConstraints(f1=99, f2='x')])
|
||||
|
||||
if self.database.server_version >= (20, 1, 2, 4):
|
||||
# Creating indexes
|
||||
self.database.migrate('tests.sample_migrations', 18)
|
||||
self.assertTrue(self.table_exists(ModelWithIndex))
|
||||
self.assertIn('INDEX index ', self.get_table_def(ModelWithIndex))
|
||||
self.assertIn('INDEX another_index ', self.get_table_def(ModelWithIndex))
|
||||
# Modifying indexes
|
||||
self.database.migrate('tests.sample_migrations', 19)
|
||||
self.assertNotIn('INDEX index ', self.get_table_def(ModelWithIndex))
|
||||
self.assertIn('INDEX index2 ', self.get_table_def(ModelWithIndex))
|
||||
self.assertIn('INDEX another_index ', self.get_table_def(ModelWithIndex))
|
||||
|
||||
|
||||
# Several different models with the same table name, to simulate a table that changes over time
|
||||
|
||||
|
@ -294,3 +327,68 @@ class Model2LowCardinality(Model):
|
|||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'mig'
|
||||
|
||||
|
||||
class ModelWithConstraints(Model):
|
||||
|
||||
date = DateField()
|
||||
f1 = Int32Field()
|
||||
f2 = StringField()
|
||||
|
||||
constraint = Constraint(f2.isIn(['a', 'b', 'c'])) # check reserved keyword as constraint name
|
||||
f1_constraint = Constraint(f1 > 100)
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'modelwithconstraints'
|
||||
|
||||
|
||||
class ModelWithConstraints2(Model):
|
||||
|
||||
date = DateField()
|
||||
f1 = Int32Field()
|
||||
f2 = StringField()
|
||||
|
||||
constraint = Constraint(f2.isIn(['a', 'b', 'c']))
|
||||
f1_constraint_new = Constraint(f1 < 100)
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'modelwithconstraints'
|
||||
|
||||
|
||||
class ModelWithIndex(Model):
|
||||
|
||||
date = DateField()
|
||||
f1 = Int32Field()
|
||||
f2 = StringField()
|
||||
|
||||
index = Index(f1, type=Index.minmax(), granularity=1)
|
||||
another_index = Index(f2, type=Index.set(0), granularity=1)
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'modelwithindex'
|
||||
|
||||
|
||||
class ModelWithIndex2(Model):
|
||||
|
||||
date = DateField()
|
||||
f1 = Int32Field()
|
||||
f2 = StringField()
|
||||
|
||||
index2 = Index(f1, type=Index.bloom_filter(), granularity=2)
|
||||
another_index = Index(f2, type=Index.set(0), granularity=1)
|
||||
|
||||
engine = MergeTree('date', ('date',))
|
||||
|
||||
@classmethod
|
||||
def table_name(cls):
|
||||
return 'modelwithindex'
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user