From 393209e624c54aa81704af12ce25a8342c400320 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 6 Jun 2020 11:07:01 +0300 Subject: [PATCH] Support for model constraints --- CHANGELOG.md | 4 + docs/class_reference.md | 168 +++++++++++++++++++++++++- docs/models_and_databases.md | 19 +++ docs/schema_migrations.md | 19 +-- docs/toc.md | 10 ++ scripts/generate_ref.py | 2 +- src/infi/clickhouse_orm/migrations.py | 94 ++++++++++---- 7 files changed, 280 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da7d3c4..b6165b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ Change Log ========== +Unreleased +---------- +- Support for model constraints + v2.0.1 ------ - Remove unnecessary import of `six` diff --git a/docs/class_reference.md b/docs/class_reference.md index 285f9b4..616863f 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -178,7 +178,7 @@ Unrecognized field names will cause an `AttributeError`. #### Model.create_table_sql(db) -Returns the SQL command for creating a table for this model. +Returns the SQL statement for creating a table for this model. #### Model.drop_table_sql(db) @@ -308,7 +308,7 @@ Unrecognized field names will cause an `AttributeError`. #### BufferModel.create_table_sql(db) -Returns the SQL command for creating a table for this model. +Returns the SQL statement for creating a table for this model. #### BufferModel.drop_table_sql(db) @@ -422,12 +422,147 @@ Returns the instance's column values as a tab-separated line. A newline is not i - `include_readonly`: if false, returns only fields that can be inserted into database. +### MergeModel + +Extends Model + + +Model for Merge engine +Predefines virtual _table column an controls that rows can't be inserted to this table type +https://clickhouse.tech/docs/en/single/index.html#document-table_engines/merge + +#### MergeModel(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### MergeModel.create_table_sql(db) + + +Returns the SQL statement for creating a table for this model. + + +#### MergeModel.drop_table_sql(db) + + +Returns the SQL command for deleting this model's table. + + +#### MergeModel.fields(writable=False) + + +Returns an `OrderedDict` of the model's fields (from name to `Field` instance). +If `writable` is true, only writable fields are included. +Callers should not modify the dictionary. + + +#### MergeModel.from_tsv(line, field_names, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### MergeModel.has_funcs_as_defaults() + + +Return True if some of the model's fields use a function expression +as a default value. This requires special handling when inserting instances. + + +#### MergeModel.is_read_only() + + +Returns true if the model is marked as read only. + + +#### MergeModel.is_system_model() + + +Returns true if the model represents a system table. + + +#### MergeModel.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### MergeModel.table_name() + + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_db_string() + + +Returns the instance as a bytestring ready to be inserted into the database. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tskv(include_readonly=True) + + +Returns the instance's column keys and values as a tab-separated line. A newline is not included. +Fields that were not assigned a value are omitted. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + ### DistributedModel Extends Model -Model for Distributed engine +Model class for use with a `Distributed` engine. #### DistributedModel(**kwargs) @@ -441,6 +576,9 @@ Unrecognized field names will cause an `AttributeError`. #### DistributedModel.create_table_sql(db) +Returns the SQL statement for creating a table for this model. + + #### DistributedModel.drop_table_sql(db) @@ -541,6 +679,10 @@ Returns a `QuerySet` for selecting instances of this model class. #### set_database(db) +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + #### DistributedModel.table_name() @@ -581,6 +723,26 @@ Returns the instance's column values as a tab-separated line. A newline is not i - `include_readonly`: if false, returns only fields that can be inserted into database. +### Constraint + + +Defines a model constraint. + +#### Constraint(expr) + + +Initializer. Requires an expression that ClickHouse will verify when inserting data. + + +#### create_table_sql() + + +Returns the SQL statement for defining this constraint on table creation. + + +#### str() + + infi.clickhouse_orm.fields -------------------------- diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index b1f262c..928489d 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -75,6 +75,25 @@ The table name used for the model is its class name, converted to lowercase. To def table_name(cls): return 'people' +### Model Constraints + +It is possible to define constraints which ClickHouse verifies when data is inserted. Trying to insert invalid records will raise a `ServerError`. Each constraint has a name and an expression to validate. For example: + + from infi.clickhouse_orm import Model, Constraint, F, StringField, DateField, Float32Field, MergeTree + + class Person(Model): + + first_name = StringField() + last_name = StringField() + birthday = DateField() + height = Float32Field() + + # Ensure that the birthday is not a future date + birthday_is_in_the_past = Constraint(birthday <= F.today()) + + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + + Using Models ------------ diff --git a/docs/schema_migrations.md b/docs/schema_migrations.md index 1556395..9e3fa01 100644 --- a/docs/schema_migrations.md +++ b/docs/schema_migrations.md @@ -33,19 +33,19 @@ Each migration file is expected to contain a list of `operations`, for example: The following operations are supported: -**CreateTable** +### CreateTable A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing. In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table. -**DropTable** +### DropTable A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing. -**AlterTable** +### AlterTable A migration operation that compares the table of a given model class to the model’s fields, and alters the table to match the model. The operation can: @@ -56,14 +56,19 @@ A migration operation that compares the table of a given model class to the mode Default values are not altered by this operation. -**AlterTableWithBuffer** +### AlterTableWithBuffer A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified. Applying this migration operation to a regular table has the same effect as an `AlterTable` operation. -**RunPython** +### AlterConstraints + +A migration operation that adds new constraints from the model to the database table, and drops obsolete ones. Constraints are identified by their names, so a change in an existing constraint will not be detected unless its name was changed too. ClickHouse does not check that the constraints hold for existing data in the table. + + +### RunPython A migration operation that runs a Python function. The function receives the `Database` instance to operate on. @@ -77,9 +82,9 @@ A migration operation that runs a Python function. The function receives the `Da ] -**RunSQL** +### RunSQL -A migration operation that runs raw SQL queries. It expects a string containing an SQL query, or an array of SQL-query strings. +A migration operation that runs raw SQL statements. It expects a string containing an SQL statements, or a list of statements. Example: diff --git a/docs/toc.md b/docs/toc.md index 5805eaa..6dd5da0 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -10,6 +10,7 @@ * [Materialized fields](models_and_databases.md#materialized-fields) * [Alias fields](models_and_databases.md#alias-fields) * [Table Names](models_and_databases.md#table-names) + * [Model Constraints](models_and_databases.md#model-constraints) * [Using Models](models_and_databases.md#using-models) * [Inserting to the Database](models_and_databases.md#inserting-to-the-database) * [Reading from the Database](models_and_databases.md#reading-from-the-database) @@ -58,6 +59,13 @@ * [Schema Migrations](schema_migrations.md#schema-migrations) * [Writing Migrations](schema_migrations.md#writing-migrations) + * [CreateTable](schema_migrations.md#createtable) + * [DropTable](schema_migrations.md#droptable) + * [AlterTable](schema_migrations.md#altertable) + * [AlterTableWithBuffer](schema_migrations.md#altertablewithbuffer) + * [AlterConstraints](schema_migrations.md#alterconstraints) + * [RunPython](schema_migrations.md#runpython) + * [RunSQL](schema_migrations.md#runsql) * [Running Migrations](schema_migrations.md#running-migrations) * [System Models](system_models.md#system-models) @@ -74,7 +82,9 @@ * [infi.clickhouse_orm.models](class_reference.md#inficlickhouse_ormmodels) * [Model](class_reference.md#model) * [BufferModel](class_reference.md#buffermodel) + * [MergeModel](class_reference.md#mergemodel) * [DistributedModel](class_reference.md#distributedmodel) + * [Constraint](class_reference.md#constraint) * [infi.clickhouse_orm.fields](class_reference.md#inficlickhouse_ormfields) * [ArrayField](class_reference.md#arrayfield) * [BaseEnumField](class_reference.md#baseenumfield) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index 6e537ec..8dc0477 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -132,7 +132,7 @@ if __name__ == '__main__': print('===============') print() module_doc([database.Database, database.DatabaseException]) - module_doc([models.Model, models.BufferModel, models.DistributedModel]) + module_doc([models.Model, models.BufferModel, models.MergeModel, models.DistributedModel, models.Constraint]) module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) module_doc([engines.Engine] + all_subclasses(engines.Engine), False) module_doc([query.QuerySet, query.AggregateQuerySet, query.Q]) diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index cf93d9a..5361b2b 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -7,7 +7,7 @@ import logging logger = logging.getLogger('migrations') -class Operation(object): +class Operation(): ''' Base class for migration operations. ''' @@ -16,14 +16,31 @@ class Operation(object): raise NotImplementedError() # pragma: no cover -class CreateTable(Operation): +class ModelOperation(Operation): ''' - A migration operation that creates a table for a given model class. + Base class for migration operations that work on a specific model. ''' def __init__(self, model_class): + ''' + Initializer. + ''' self.model_class = model_class + def _alter_table(self, database, cmd): + ''' + Utility for running ALTER TABLE commands. + ''' + cmd = "ALTER TABLE $db.`%s` %s" % (self.model_class.table_name(), cmd) + logger.debug(cmd) + database.raw(cmd) + + +class CreateTable(ModelOperation): + ''' + A migration operation that creates a table for a given model class. + ''' + def apply(self, database): logger.info(' Create table %s', self.model_class.table_name()) if issubclass(self.model_class, BufferModel): @@ -31,7 +48,7 @@ class CreateTable(Operation): database.create_table(self.model_class) -class AlterTable(Operation): +class AlterTable(ModelOperation): ''' A migration operation that compares the table of a given model class to the model's fields, and alters the table to match the model. The operation can: @@ -41,18 +58,10 @@ class AlterTable(Operation): Default values are not altered by this operation. ''' - def __init__(self, model_class): - self.model_class = model_class - def _get_table_fields(self, database): query = "DESC `%s`.`%s`" % (database.db_name, self.model_class.table_name()) return [(row.name, row.type) for row in database.select(query)] - def _alter_table(self, database, cmd): - cmd = "ALTER TABLE `%s`.`%s` %s" % (database.db_name, self.model_class.table_name(), cmd) - logger.debug(cmd) - database._send(cmd) - def apply(self, database): logger.info(' Alter table %s', self.model_class.table_name()) @@ -100,16 +109,13 @@ class AlterTable(Operation): self._alter_table(database, 'MODIFY COLUMN %s %s' % (field_name, model_fields[field_name])) -class AlterTableWithBuffer(Operation): +class AlterTableWithBuffer(ModelOperation): ''' A migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. ''' - def __init__(self, model_class): - self.model_class = model_class - def apply(self, database): if issubclass(self.model_class, BufferModel): DropTable(self.model_class).apply(database) @@ -119,25 +125,60 @@ class AlterTableWithBuffer(Operation): AlterTable(self.model_class).apply(database) -class DropTable(Operation): +class DropTable(ModelOperation): ''' A migration operation that drops the table of a given model class. ''' - def __init__(self, model_class): - self.model_class = model_class - def apply(self, database): logger.info(' Drop table %s', self.model_class.table_name()) database.drop_table(self.model_class) +class AlterConstraints(ModelOperation): + ''' + A migration operation that adds new constraints from the model to the database + table, and drops obsolete ones. Constraints are identified by their names, so + a change in an existing constraint will not be detected unless its name was changed too. + ClickHouse does not check that the constraints hold for existing data in the table. + ''' + + def apply(self, database): + logger.info(' Alter constraints for %s', self.model_class.table_name()) + existing = self._get_constraint_names(database) + # Go over constraints in the model + for constraint in self.model_class._constraints.values(): + # Check if it's a new constraint + if constraint.name not in existing: + logger.info(' Add constraint %s', constraint.name) + self._alter_table(database, 'ADD %s' % constraint.create_table_sql()) + else: + existing.remove(constraint.name) + # Remaining constraints in `existing` are obsolete + for name in existing: + logger.info(' Drop constraint %s', name) + self._alter_table(database, 'DROP CONSTRAINT `%s`' % name) + + def _get_constraint_names(self, database): + ''' + Returns a set containing the names of existing constraints in the table. + ''' + import re + create_table_sql = database.raw('SHOW CREATE TABLE $db.`%s`' % self.model_class.table_name()) + matches = re.findall(r'\sCONSTRAINT\s+`?(.+?)`?\s+CHECK\s', create_table_sql, flags=re.IGNORECASE) + return set(matches) + + class RunPython(Operation): ''' - A migration operation that executes given python function on database + A migration operation that executes a Python function. ''' def __init__(self, func): - assert callable(func), "'func' parameter must be function" + ''' + Initializer. The given Python function will be called with a single + argument - the Database instance to apply the migration to. + ''' + assert callable(func), "'func' argument must be function" self._func = func def apply(self, database): @@ -147,14 +188,17 @@ class RunPython(Operation): class RunSQL(Operation): ''' - A migration operation that executes given SQL on database + A migration operation that executes arbitrary SQL statements. ''' def __init__(self, sql): + ''' + Initializer. The given sql argument must be a valid SQL statement or + list of statements. + ''' if isinstance(sql, str): sql = [sql] - - assert isinstance(sql, list), "'sql' parameter must be string or list of strings" + assert isinstance(sql, list), "'sql' argument must be string or list of strings" self._sql = sql def apply(self, database):