diff --git a/CHANGELOG.md b/CHANGELOG.md index d302dc4..a32d9d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ Change Log ========== +v0.9.7 +------ +- Add `distinct` method to querysets +- Add `AlterTableWithBuffer` migration operation +- Support Merge engine (M1hacka) + v0.9.6 ------ - Fix python3 compatibility (TvoroG) diff --git a/docs/class_reference.md b/docs/class_reference.md index 7e4bc74..e56fc7a 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -7,7 +7,7 @@ infi.clickhouse_orm.database ### Database -Database instances connect to a specific ClickHouse database for running queries, +Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations. #### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True) @@ -71,7 +71,7 @@ Insert records into the database. Executes schema migrations. -- `migrations_package_name` - fully qualified name of the Python package +- `migrations_package_name` - fully qualified name of the Python package containing the migrations. - `up_to` - number of the last migration to apply. @@ -89,7 +89,7 @@ Selects records and returns a single page of model instances. - `conditions`: optional SQL conditions (contents of the WHERE clause). - `settings`: query settings to send as HTTP GET parameters -The result is a namedtuple containing `objects` (list), `number_of_objects`, +The result is a namedtuple containing `objects` (list), `number_of_objects`, `pages_total`, `number` (of the current page), and `page_size`. @@ -128,7 +128,7 @@ infi.clickhouse_orm.models A base class for ORM models. Each model class represent a ClickHouse table. For example: - + class CPUStats(Model): timestamp = DateTimeField() cpu_id = UInt16Field() @@ -172,7 +172,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o #### get_database() -Gets the `Database` that this model instance belongs to. +Gets the `Database` that this model instance belongs to. Returns `None` unless the instance was read from the database or written to it. @@ -191,7 +191,7 @@ Returns a `QuerySet` for selecting instances of this model class. #### set_database(db) -Sets the `Database` that this model instance belongs to. +Sets the `Database` that this model instance belongs to. This is done automatically when the instance is read from the database or written to it. @@ -261,7 +261,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o #### get_database() -Gets the `Database` that this model instance belongs to. +Gets the `Database` that this model instance belongs to. Returns `None` unless the instance was read from the database or written to it. @@ -280,7 +280,7 @@ Returns a `QuerySet` for selecting instances of this model class. #### set_database(db) -Sets the `Database` that this model instance belongs to. +Sets the `Database` that this model instance belongs to. This is done automatically when the instance is read from the database or written to it. @@ -317,28 +317,28 @@ infi.clickhouse_orm.fields Abstract base class for all field types. -#### Field(default=None, alias=None, materialized=None) +#### Field(default=None, alias=None, materialized=None, readonly=None) ### StringField Extends Field -#### StringField(default=None, alias=None, materialized=None) +#### StringField(default=None, alias=None, materialized=None, readonly=None) ### DateField Extends Field -#### DateField(default=None, alias=None, materialized=None) +#### DateField(default=None, alias=None, materialized=None, readonly=None) ### DateTimeField Extends Field -#### DateTimeField(default=None, alias=None, materialized=None) +#### DateTimeField(default=None, alias=None, materialized=None, readonly=None) ### BaseIntField @@ -348,7 +348,7 @@ Extends Field Abstract base class for all integer-type fields. -#### BaseIntField(default=None, alias=None, materialized=None) +#### BaseIntField(default=None, alias=None, materialized=None, readonly=None) ### BaseFloatField @@ -358,7 +358,7 @@ Extends Field Abstract base class for all float-type fields. -#### BaseFloatField(default=None, alias=None, materialized=None) +#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None) ### BaseEnumField @@ -368,14 +368,14 @@ Extends Field Abstract base class for all enum-type fields. -#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None) +#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None) ### ArrayField Extends Field -#### ArrayField(inner_field, default=None, alias=None, materialized=None) +#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None) ### NullableField @@ -389,91 +389,91 @@ Extends Field Extends StringField -#### FixedStringField(length, default=None, alias=None, materialized=None) +#### FixedStringField(length, default=None, alias=None, materialized=None, readonly=None) ### UInt8Field Extends BaseIntField -#### UInt8Field(default=None, alias=None, materialized=None) +#### UInt8Field(default=None, alias=None, materialized=None, readonly=None) ### UInt16Field Extends BaseIntField -#### UInt16Field(default=None, alias=None, materialized=None) +#### UInt16Field(default=None, alias=None, materialized=None, readonly=None) ### UInt32Field Extends BaseIntField -#### UInt32Field(default=None, alias=None, materialized=None) +#### UInt32Field(default=None, alias=None, materialized=None, readonly=None) ### UInt64Field Extends BaseIntField -#### UInt64Field(default=None, alias=None, materialized=None) +#### UInt64Field(default=None, alias=None, materialized=None, readonly=None) ### Int8Field Extends BaseIntField -#### Int8Field(default=None, alias=None, materialized=None) +#### Int8Field(default=None, alias=None, materialized=None, readonly=None) ### Int16Field Extends BaseIntField -#### Int16Field(default=None, alias=None, materialized=None) +#### Int16Field(default=None, alias=None, materialized=None, readonly=None) ### Int32Field Extends BaseIntField -#### Int32Field(default=None, alias=None, materialized=None) +#### Int32Field(default=None, alias=None, materialized=None, readonly=None) ### Int64Field Extends BaseIntField -#### Int64Field(default=None, alias=None, materialized=None) +#### Int64Field(default=None, alias=None, materialized=None, readonly=None) ### Float32Field Extends BaseFloatField -#### Float32Field(default=None, alias=None, materialized=None) +#### Float32Field(default=None, alias=None, materialized=None, readonly=None) ### Float64Field Extends BaseFloatField -#### Float64Field(default=None, alias=None, materialized=None) +#### Float64Field(default=None, alias=None, materialized=None, readonly=None) ### Enum8Field Extends BaseEnumField -#### Enum8Field(enum_cls, default=None, alias=None, materialized=None) +#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) ### Enum16Field Extends BaseEnumField -#### Enum16Field(enum_cls, default=None, alias=None, materialized=None) +#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None) infi.clickhouse_orm.engines @@ -512,6 +512,19 @@ Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). #### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) +### Merge + +Extends Engine + + +The Merge engine (not to be confused with MergeTree) does not store data itself, +but allows reading from any number of other tables simultaneously. +Writing to a table is not supported +https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge + +#### Merge(table_regex) + + ### CollapsingMergeTree Extends MergeTree @@ -585,6 +598,13 @@ Returns the contents of the query's `WHERE` clause as a string. Returns the number of matching model instances. +#### distinct() + + +Adds a DISTINCT clause to the query, meaning that any duplicate rows +in the results will be omitted. + + #### exclude(**kwargs) @@ -678,6 +698,13 @@ Returns the contents of the query's `WHERE` clause as a string. Returns the number of rows after aggregation. +#### distinct() + + +Adds a DISTINCT clause to the query, meaning that any duplicate rows +in the results will be omitted. + + #### exclude(**kwargs) diff --git a/docs/querysets.md b/docs/querysets.md index 2bbefd9..d27c836 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f qs = Person.objects_in(database).only('first_name', 'birthday') +Distinct +-------- + +Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted. + + >>> Person.objects_in(database).only('first_name').count() + 100 + >>> Person.objects_in(database).only('first_name').distinct().count() + 94 + Slicing ------- diff --git a/docs/schema_migrations.md b/docs/schema_migrations.md index e4647bf..ce56b04 100644 --- a/docs/schema_migrations.md +++ b/docs/schema_migrations.md @@ -34,11 +34,13 @@ The following operations are supported: **CreateTable** -A migration operation that creates a table for a given model class. +A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing. + +In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table. **DropTable** -A migration operation that drops the table of a given model class. +A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing. **AlterTable** @@ -50,6 +52,13 @@ A migration operation that compares the table of a given model class to the mode Default values are not altered by this operation. +**AlterTableWithBuffer** + +A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified. + +Applying this migration operation to a regular table has the same effect as an `AlterTable` operation. + + Running Migrations ------------------ diff --git a/docs/table_engines.md b/docs/table_engines.md index 2f92183..30aa07b 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -15,6 +15,7 @@ The following engines are supported by the ORM: - SummingMergeTree / ReplicatedSummingMergeTree - ReplacingMergeTree / ReplicatedReplacingMergeTree - Buffer +- Merge Simple Engines @@ -85,6 +86,18 @@ Then you can insert objects into Buffer model and they will be handled by ClickH suzy = PersonBuffer(first_name='Suzy', last_name='Jones') dan = PersonBuffer(first_name='Dan', last_name='Schwartz') db.insert([dan, suzy]) + + +Merge Engine +------------- + +[ClickHouse docs](https://clickhouse.yandex/docs/en/single/index.html#merge) +A `Merge` engine is only used in conjunction with a `MergeModel`. +This table does not store data itself, but allows reading from any number of other tables simultaneously. So you can't insert in it. +Engine parameter specifies re2 (similar to PCRE) regular expression, from which data is selected. + + class MergeTable(models.MergeModel): + engine = engines.Merge('^table_prefix') --- diff --git a/docs/toc.md b/docs/toc.md index aa5bb3b..cc1c8a6 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -20,6 +20,7 @@ * [Counting and Checking Existence](querysets.md#counting-and-checking-existence) * [Ordering](querysets.md#ordering) * [Omitting Fields](querysets.md#omitting-fields) + * [Distinct](querysets.md#distinct) * [Slicing](querysets.md#slicing) * [Pagination](querysets.md#pagination) * [Aggregation](querysets.md#aggregation) @@ -36,6 +37,7 @@ * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) * [Data Replication](table_engines.md#data-replication) * [Buffer Engine](table_engines.md#buffer-engine) + * [Merge Engine](table_engines.md#merge-engine) * [Schema Migrations](schema_migrations.md#schema-migrations) * [Writing Migrations](schema_migrations.md#writing-migrations) @@ -85,6 +87,7 @@ * [Memory](class_reference.md#memory) * [MergeTree](class_reference.md#mergetree) * [Buffer](class_reference.md#buffer) + * [Merge](class_reference.md#merge) * [CollapsingMergeTree](class_reference.md#collapsingmergetree) * [SummingMergeTree](class_reference.md#summingmergetree) * [ReplacingMergeTree](class_reference.md#replacingmergetree) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index c35e881..d2863fd 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -51,7 +51,10 @@ def get_method_sig(method): for arg in argspec.args: default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index) if default_arg.has_default: - args.append("%s=%s" % (arg, default_arg.default_value)) + val = default_arg.default_value + if isinstance(val, basestring): + val = '"' + val + '"' + args.append("%s=%s" % (arg, val)) else: args.append(arg) arg_index += 1 diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 5c816d6..4f94b6b 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -75,16 +75,16 @@ class Database(object): Creates a table for the given model class, if it does not exist already. ''' # TODO check that model has an engine - if model_class.readonly: - raise DatabaseException("You can't create read only table") + if model_class.system: + raise DatabaseException("You can't create system table") self._send(model_class.create_table_sql(self.db_name)) def drop_table(self, model_class): ''' Drops the database table of the given model class, if it exists. ''' - if model_class.readonly: - raise DatabaseException("You can't drop read only table") + if model_class.system: + raise DatabaseException("You can't drop system table") self._send(model_class.drop_table_sql(self.db_name)) def insert(self, model_instances, batch_size=1000): @@ -103,8 +103,8 @@ class Database(object): return # model_instances is empty model_class = first_instance.__class__ - if first_instance.readonly: - raise DatabaseException("You can't insert into read only table") + if first_instance.readonly or first_instance.system: + raise DatabaseException("You can't insert into read only and system tables") def gen(): buf = BytesIO() diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index caa05c7..777bd2e 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -1,4 +1,7 @@ from __future__ import unicode_literals + +import six + from .utils import comma_join @@ -118,7 +121,6 @@ class Buffer(Engine): self.min_bytes = min_bytes self.max_bytes = max_bytes - def create_table_sql(self, db_name): # Overriden create_table_sql example: #sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)' @@ -128,3 +130,27 @@ class Buffer(Engine): self.max_rows, self.min_bytes, self.max_bytes ) return sql + + +class Merge(Engine): + """ + The Merge engine (not to be confused with MergeTree) does not store data itself, + but allows reading from any number of other tables simultaneously. + Writing to a table is not supported + https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge + """ + + def __init__(self, table_regex): + assert isinstance(table_regex, six.string_types), "'table_regex' parameter must be string" + + self.table_regex = table_regex + + # Use current database as default + self.db_name = 'currentDatabase()' + + def create_table_sql(self): + return "Merge(%s, '%s')" % (self.db_name, self.table_regex) + + def set_db_name(self, db_name): + assert isinstance(db_name, six.string_types), "'db_name' parameter must be string" + self.db_name = db_name diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index a57314e..3e3207a 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -16,19 +16,21 @@ class Field(object): class_default = 0 db_type = None - def __init__(self, default=None, alias=None, materialized=None): + def __init__(self, default=None, alias=None, materialized=None, readonly=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ "Only one of default, alias and materialized parameters can be given" assert alias is None or isinstance(alias, string_types) and alias != "",\ "Alias field must be string field name, if given" assert materialized is None or isinstance(materialized, string_types) and alias != "",\ "Materialized field must be string, if given" + assert readonly is None or type(readonly) is bool, "readonly parameter must be bool if given" self.creation_counter = Field.creation_counter Field.creation_counter += 1 self.default = self.class_default if default is None else default self.alias = alias self.materialized = materialized + self.readonly = bool(self.alias or self.materialized or readonly) def to_python(self, value, timezone_in_use): ''' @@ -75,10 +77,6 @@ class Field(object): else: return self.db_type - @property - def readonly(self): - return bool(self.alias or self.materialized) - class StringField(Field): @@ -95,10 +93,10 @@ class StringField(Field): class FixedStringField(StringField): - def __init__(self, length, default=None, alias=None, materialized=None): + def __init__(self, length, default=None, alias=None, materialized=None, readonly=None): self._length = length self.db_type = 'FixedString(%d)' % length - super(FixedStringField, self).__init__(default, alias, materialized) + super(FixedStringField, self).__init__(default, alias, materialized, readonly) def to_python(self, value, timezone_in_use): value = super(FixedStringField, self).to_python(value, timezone_in_use) @@ -274,11 +272,11 @@ class BaseEnumField(Field): Abstract base class for all enum-type fields. ''' - def __init__(self, enum_cls, default=None, alias=None, materialized=None): + def __init__(self, enum_cls, default=None, alias=None, materialized=None, readonly=None): self.enum_cls = enum_cls if default is None: default = list(enum_cls)[0] - super(BaseEnumField, self).__init__(default, alias, materialized) + super(BaseEnumField, self).__init__(default, alias, materialized, readonly) def to_python(self, value, timezone_in_use): if isinstance(value, self.enum_cls): @@ -338,9 +336,9 @@ class ArrayField(Field): class_default = [] - def __init__(self, inner_field, default=None, alias=None, materialized=None): + def __init__(self, inner_field, default=None, alias=None, materialized=None, readonly=None): self.inner_field = inner_field - super(ArrayField, self).__init__(default, alias, materialized) + super(ArrayField, self).__init__(default, alias, materialized, readonly) def to_python(self, value, timezone_in_use): if isinstance(value, text_type): @@ -374,7 +372,7 @@ class NullableField(Field): self._null_values = [None] if extra_null_values: self._null_values.extend(extra_null_values) - super(NullableField, self).__init__(default, alias, materialized) + super(NullableField, self).__init__(default, alias, materialized, readonly=None) def to_python(self, value, timezone_in_use): if value == '\\N' or value is None: diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index 1a82a5b..a7843a7 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -82,6 +82,25 @@ class AlterTable(Operation): self._alter_table(database, 'MODIFY COLUMN %s %s' % model_field) +class AlterTableWithBuffer(Operation): + ''' + A migration operation for altering a buffer table and its underlying on-disk table. + The buffer table is dropped, the on-disk table is altered, and then the buffer table + is re-created. + ''' + + def __init__(self, model_class): + self.model_class = model_class + + def apply(self, database): + if issubclass(self.model_class, BufferModel): + DropTable(self.model_class).apply(database) + AlterTable(self.model_class.engine.main_model).apply(database) + CreateTable(self.model_class).apply(database) + else: + AlterTable(self.model_class).apply(database) + + class DropTable(Operation): ''' A migration operation that drops the table of a given model class. @@ -91,7 +110,7 @@ class DropTable(Operation): self.model_class = model_class def apply(self, database): - logger.info(' Drop table %s', self.model_class.__name__) + logger.info(' Drop table %s', self.model_class.table_name()) database.drop_table(self.model_class) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 8714447..33cfde2 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -4,9 +4,10 @@ from logging import getLogger from six import with_metaclass import pytz -from .fields import Field +from .fields import Field, StringField from .utils import parse_tsv from .query import QuerySet +from .engines import Merge logger = getLogger('clickhouse_orm') @@ -86,8 +87,13 @@ class Model(with_metaclass(ModelBase)): ''' engine = None + + # Insert operations are restricted for read only models readonly = False + # Create table, drop table, insert operations are restricted for system models + system = False + def __init__(self, **kwargs): ''' Creates a model instance, using keyword arguments as field values. @@ -246,3 +252,25 @@ class BufferModel(Model): engine_str = cls.engine.create_table_sql(db_name) parts.append(engine_str) return ' '.join(parts) + + +class MergeModel(Model): + ''' + Model for Merge engine + Predefines virtual _table column an controls that rows can't be inserted to this table type + https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge + ''' + readonly = True + + # Virtual fields can't be inserted into database + _table = StringField(readonly=True) + + def set_database(self, db): + ''' + Gets the `Database` that this model instance belongs to. + Returns `None` unless the instance was read from the database or written to it. + ''' + assert isinstance(self.engine, Merge), "engine must be engines.Merge instance" + res = super(MergeModel, self).set_database(db) + self.engine.set_db_name(db.db_name) + return res diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index c1c1dd0..0bf764a 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -187,6 +187,7 @@ class QuerySet(object): self._q = [] self._fields = [] self._limits = None + self._distinct = False def __iter__(self): """ @@ -228,14 +229,15 @@ class QuerySet(object): """ Returns the whole query as a SQL string. """ + distinct = 'DISTINCT ' if self._distinct else '' fields = '*' if self._fields: fields = comma_join('`%s`' % field for field in self._fields) ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' limit = '\nLIMIT %d, %d' % self._limits if self._limits else '' - params = (fields, self._model_cls.table_name(), + params = (distinct, fields, self._model_cls.table_name(), self.conditions_as_sql(), ordering, limit) - return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params + return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params def order_by_as_sql(self): """ @@ -259,6 +261,11 @@ class QuerySet(object): """ Returns the number of matching model instances. """ + if self._distinct: + # Use a subquery, since a simple count won't be accurate + sql = u'SELECT count() FROM (%s)' % self.as_sql() + raw = self._database.raw(sql) + return int(raw) if raw else 0 return self._database.count(self._model_cls, self.conditions_as_sql()) def order_by(self, *field_names): @@ -296,7 +303,7 @@ class QuerySet(object): return qs def paginate(self, page_num=1, page_size=100): - ''' + """ Returns a single page of model instances that match the queryset. Note that `order_by` should be used first, to ensure a correct partitioning of records into pages. @@ -306,7 +313,7 @@ class QuerySet(object): The result is a namedtuple containing `objects` (list), `number_of_objects`, `pages_total`, `number` (of the current page), and `page_size`. - ''' + """ from .database import Page count = self.count() pages_total = int(ceil(count / float(page_size))) @@ -323,8 +330,17 @@ class QuerySet(object): page_size=page_size ) + def distinct(self): + """ + Adds a DISTINCT clause to the query, meaning that any duplicate rows + in the results will be omitted. + """ + qs = copy(self) + qs._distinct = True + return qs + def aggregate(self, *args, **kwargs): - ''' + """ Returns an `AggregateQuerySet` over this query, with `args` serving as grouping fields and `kwargs` serving as calculated fields. At least one calculated field is required. For example: @@ -337,7 +353,7 @@ class QuerySet(object): WHERE data > '2017-08-01' GROUP BY event_type ``` - ''' + """ return AggregateQuerySet(self, args, kwargs) @@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet): self._order_by = list(base_qs._order_by) self._q = list(base_qs._q) self._limits = base_qs._limits + self._distinct = base_qs._distinct def group_by(self, *args): """ @@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet): """ Returns the whole query as a SQL string. """ + distinct = 'DISTINCT ' if self._distinct else '' grouping = comma_join('`%s`' % field for field in self._grouping_fields) fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) params = dict( + distinct=distinct, grouping=grouping or "''", fields=fields, table=self._model_cls.table_name(), conds=self.conditions_as_sql() ) - sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params + sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params if self._order_by: sql += '\nORDER BY ' + self.order_by_as_sql() if self._limits: diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index 49247d9..5ca3efd 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -20,6 +20,7 @@ class SystemPart(Model): OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'}) readonly = True + system = True database = StringField() # Name of the database where the table that this part belongs to is located. table = StringField() # Name of the table that this part belongs to. @@ -27,6 +28,10 @@ class SystemPart(Model): partition = StringField() # Name of the partition, in the format YYYYMM. name = StringField() # Name of the part. + # This field is present in the docs (https://clickhouse.yandex/docs/en/single/index.html#system-parts), + # but is absent in ClickHouse (in version 1.1.54245) + # replicated = UInt8Field() # Whether the part belongs to replicated data. + # Whether the part is used in a table, or is no longer needed and will be deleted soon. # Inactive parts remain after merging. active = UInt8Field() diff --git a/tests/sample_migrations/0010.py b/tests/sample_migrations/0010.py new file mode 100644 index 0000000..3892583 --- /dev/null +++ b/tests/sample_migrations/0010.py @@ -0,0 +1,6 @@ +from infi.clickhouse_orm import migrations +from ..test_migrations import * + +operations = [ + migrations.CreateTable(Model4Buffer) +] diff --git a/tests/sample_migrations/0011.py b/tests/sample_migrations/0011.py new file mode 100644 index 0000000..dd9d09e --- /dev/null +++ b/tests/sample_migrations/0011.py @@ -0,0 +1,6 @@ +from infi.clickhouse_orm import migrations +from ..test_migrations import * + +operations = [ + migrations.AlterTableWithBuffer(Model4Buffer_changed) +] diff --git a/tests/test_engines.py b/tests/test_engines.py index ddc3a85..65497ca 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database, DatabaseException -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, MergeModel from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * @@ -74,6 +74,56 @@ class EnginesTestCase(unittest.TestCase): engine = Memory() self._create_and_insert(TestModel) + def test_merge(self): + class TestModel1(SampleModel): + engine = TinyLog() + + class TestModel2(SampleModel): + engine = TinyLog() + + class TestMergeModel(MergeModel, SampleModel): + engine = Merge('^testmodel') + + self.database.create_table(TestModel1) + self.database.create_table(TestModel2) + self.database.create_table(TestMergeModel) + + # Insert operations are restricted for this model type + with self.assertRaises(DatabaseException): + self.database.insert([ + TestMergeModel(date='2017-01-01', event_id=23423, event_group=13, event_count=7, event_version=1) + ]) + + # Testing select + self.database.insert([ + TestModel1(date='2017-01-01', event_id=1, event_group=1, event_count=1, event_version=1) + ]) + self.database.insert([ + TestModel2(date='2017-01-02', event_id=2, event_group=2, event_count=2, event_version=2) + ]) + # event_uversion is materialized field. So * won't select it and it will be zero + res = self.database.select('SELECT *, event_uversion FROM $table ORDER BY event_id', model_class=TestMergeModel) + res = [row for row in res] + self.assertEqual(2, len(res)) + self.assertDictEqual({ + '_table': 'testmodel1', + 'date': datetime.date(2017, 1, 1), + 'event_id': 1, + 'event_group': 1, + 'event_count': 1, + 'event_version': 1, + 'event_uversion': 1 + }, res[0].to_dict(include_readonly=True)) + self.assertDictEqual({ + '_table': 'testmodel2', + 'date': datetime.date(2017, 1, 2), + 'event_id': 2, + 'event_group': 2, + 'event_count': 2, + 'event_version': 2, + 'event_uversion': 2 + }, res[1].to_dict(include_readonly=True)) + class SampleModel(Model): diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 3478f9f..7e31c84 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database -from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.models import Model, BufferModel from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * from infi.clickhouse_orm.migrations import MigrationHistory @@ -61,6 +61,7 @@ class MigrationsTestCase(unittest.TestCase): self.assertTrue(self.tableExists(EnumModel1)) self.assertEquals(self.getTableFields(EnumModel2), [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) + # Materialized fields and alias fields self.database.migrate('tests.sample_migrations', 8) self.assertTrue(self.tableExists(MaterializedModel)) self.assertEquals(self.getTableFields(MaterializedModel), @@ -69,6 +70,15 @@ class MigrationsTestCase(unittest.TestCase): self.assertTrue(self.tableExists(AliasModel)) self.assertEquals(self.getTableFields(AliasModel), [('date', 'Date'), ('date_alias', "Date")]) + # Buffer models creation and alteration + self.database.migrate('tests.sample_migrations', 10) + self.assertTrue(self.tableExists(Model4)) + self.assertTrue(self.tableExists(Model4Buffer)) + self.assertEquals(self.getTableFields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')]) + self.assertEquals(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')]) + self.database.migrate('tests.sample_migrations', 11) + self.assertEquals(self.getTableFields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')]) + self.assertEquals(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')]) # Several different models with the same table name, to simulate a table that changes over time @@ -159,3 +169,47 @@ class AliasModel(Model): @classmethod def table_name(cls): return 'alias_date' + + +class Model4(Model): + + date = DateField() + f1 = Int32Field() + f2 = StringField() + + engine = MergeTree('date', ('date',)) + + @classmethod + def table_name(cls): + return 'model4' + + +class Model4Buffer(BufferModel, Model4): + + engine = Buffer(Model4) + + @classmethod + def table_name(cls): + return 'model4buffer' + + +class Model4_changed(Model): + + date = DateField() + f3 = DateTimeField() + f2 = StringField() + + engine = MergeTree('date', ('date',)) + + @classmethod + def table_name(cls): + return 'model4' + + +class Model4Buffer_changed(BufferModel, Model4_changed): + + engine = Buffer(Model4_changed) + + @classmethod + def table_name(cls): + return 'model4buffer' diff --git a/tests/test_querysets.py b/tests/test_querysets.py index ad834bb..cbbc65d 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData): def _test_qs(self, qs, expected_count): logging.info(qs.as_sql()) + count = 0 for instance in qs: - logging.info('\t%s' % instance.to_dict()) + count += 1 + logging.info('\t[%d]\t%s' % (count, instance.to_dict())) + self.assertEquals(count, expected_count) self.assertEquals(qs.count(), expected_count) def test_no_filtering(self): @@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData): page = qs.paginate(1, 100) self.assertEquals(page.number_of_objects, 10) + def test_distinct(self): + qs = Person.objects_in(self.database).distinct() + self._test_qs(qs, 100) + self._test_qs(qs.only('first_name'), 94) + class AggregateTestCase(TestCaseWithData): @@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData): qs = qs.filter(weekday=1) self.assertEquals(qs.count(), 1) + def test_aggregate_with_distinct(self): + # In this case distinct has no effect + qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct() + print(qs.as_sql()) + self.assertEquals(qs.count(), 1) + Color = Enum('Color', u'red blue green yellow brown white black') diff --git a/tests/test_readonly.py b/tests/test_readonly.py index ae3d54f..facbaa0 100644 --- a/tests/test_readonly.py +++ b/tests/test_readonly.py @@ -1,12 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import unittest -import six -from infi.clickhouse_orm.database import Database, DatabaseException -from infi.clickhouse_orm.models import Model -from infi.clickhouse_orm.fields import * -from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.database import DatabaseException from .base_test_with_data import * @@ -45,15 +40,15 @@ class ReadonlyTestCase(TestCaseWithData): self.database.insert([m]) def test_create_readonly_table(self): - with self.assertRaises(DatabaseException): - self.database.create_table(ReadOnlyModel) + self.database.create_table(ReadOnlyModel) def test_drop_readonly_table(self): - with self.assertRaises(DatabaseException): - self.database.drop_table(ReadOnlyModel) + self.database.drop_table(ReadOnlyModel) class ReadOnlyModel(Model): readonly = True name = StringField() + date = DateField() + engine = MergeTree('date', ('name',)) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index 1a3b49a..54b6650 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -2,14 +2,34 @@ from __future__ import unicode_literals import unittest from datetime import date import os -import shutil -from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.database import Database, DatabaseException from infi.clickhouse_orm.engines import * from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.system_models import SystemPart +class SystemTest(unittest.TestCase): + def setUp(self): + self.database = Database('test-db') + + def tearDown(self): + self.database.drop_database() + + def test_insert_system(self): + m = SystemPart() + with self.assertRaises(DatabaseException): + self.database.insert([m]) + + def test_create_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.create_table(SystemTestModel) + + def test_drop_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.drop_table(SystemTestModel) + + class SystemPartTest(unittest.TestCase): BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/'] @@ -75,3 +95,7 @@ class TestTable(Model): date_field = DateField() engine = MergeTree('date_field', ('date_field',)) + + +class SystemTestModel(Model): + system = True