Merge branch 'develop' of https://github.com/Infinidat/infi.clickhouse_orm into raw_migrations

# Conflicts:
#	docs/schema_migrations.md
#	tests/sample_migrations/0010.py
#	tests/sample_migrations/0011.py
This commit is contained in:
M1ha 2017-10-02 15:49:53 +05:00
commit 51a814732b
15 changed files with 238 additions and 63 deletions

View File

@ -1,6 +1,12 @@
Change Log
==========
v0.9.7
------
- Add `distinct` method to querysets
- Add `AlterTableWithBuffer` migration operation
- Support Merge engine (M1hacka)
v0.9.6
------
- Fix python3 compatibility (TvoroG)

View File

@ -7,7 +7,7 @@ infi.clickhouse_orm.database
### Database
Database instances connect to a specific ClickHouse database for running queries,
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
@ -71,7 +71,7 @@ Insert records into the database.
Executes schema migrations.
- `migrations_package_name` - fully qualified name of the Python package
- `migrations_package_name` - fully qualified name of the Python package
containing the migrations.
- `up_to` - number of the last migration to apply.
@ -89,7 +89,7 @@ Selects records and returns a single page of model instances.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
- `settings`: query settings to send as HTTP GET parameters
The result is a namedtuple containing `objects` (list), `number_of_objects`,
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
@ -128,7 +128,7 @@ infi.clickhouse_orm.models
A base class for ORM models. Each model class represent a ClickHouse table. For example:
class CPUStats(Model):
timestamp = DateTimeField()
cpu_id = UInt16Field()
@ -172,7 +172,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
#### get_database()
Gets the `Database` that this model instance belongs to.
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
@ -191,7 +191,7 @@ Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
@ -261,7 +261,7 @@ If omitted, it is assumed to be the names of all fields in the model, in order o
#### get_database()
Gets the `Database` that this model instance belongs to.
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
@ -280,7 +280,7 @@ Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
@ -317,28 +317,28 @@ infi.clickhouse_orm.fields
Abstract base class for all field types.
#### Field(default=None, alias=None, materialized=None)
#### Field(default=None, alias=None, materialized=None, readonly=None)
### StringField
Extends Field
#### StringField(default=None, alias=None, materialized=None)
#### StringField(default=None, alias=None, materialized=None, readonly=None)
### DateField
Extends Field
#### DateField(default=None, alias=None, materialized=None)
#### DateField(default=None, alias=None, materialized=None, readonly=None)
### DateTimeField
Extends Field
#### DateTimeField(default=None, alias=None, materialized=None)
#### DateTimeField(default=None, alias=None, materialized=None, readonly=None)
### BaseIntField
@ -348,7 +348,7 @@ Extends Field
Abstract base class for all integer-type fields.
#### BaseIntField(default=None, alias=None, materialized=None)
#### BaseIntField(default=None, alias=None, materialized=None, readonly=None)
### BaseFloatField
@ -358,7 +358,7 @@ Extends Field
Abstract base class for all float-type fields.
#### BaseFloatField(default=None, alias=None, materialized=None)
#### BaseFloatField(default=None, alias=None, materialized=None, readonly=None)
### BaseEnumField
@ -368,14 +368,14 @@ Extends Field
Abstract base class for all enum-type fields.
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None)
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None, readonly=None)
### ArrayField
Extends Field
#### ArrayField(inner_field, default=None, alias=None, materialized=None)
#### ArrayField(inner_field, default=None, alias=None, materialized=None, readonly=None)
### NullableField
@ -389,91 +389,91 @@ Extends Field
Extends StringField
#### FixedStringField(length, default=None, alias=None, materialized=None)
#### FixedStringField(length, default=None, alias=None, materialized=None, readonly=None)
### UInt8Field
Extends BaseIntField
#### UInt8Field(default=None, alias=None, materialized=None)
#### UInt8Field(default=None, alias=None, materialized=None, readonly=None)
### UInt16Field
Extends BaseIntField
#### UInt16Field(default=None, alias=None, materialized=None)
#### UInt16Field(default=None, alias=None, materialized=None, readonly=None)
### UInt32Field
Extends BaseIntField
#### UInt32Field(default=None, alias=None, materialized=None)
#### UInt32Field(default=None, alias=None, materialized=None, readonly=None)
### UInt64Field
Extends BaseIntField
#### UInt64Field(default=None, alias=None, materialized=None)
#### UInt64Field(default=None, alias=None, materialized=None, readonly=None)
### Int8Field
Extends BaseIntField
#### Int8Field(default=None, alias=None, materialized=None)
#### Int8Field(default=None, alias=None, materialized=None, readonly=None)
### Int16Field
Extends BaseIntField
#### Int16Field(default=None, alias=None, materialized=None)
#### Int16Field(default=None, alias=None, materialized=None, readonly=None)
### Int32Field
Extends BaseIntField
#### Int32Field(default=None, alias=None, materialized=None)
#### Int32Field(default=None, alias=None, materialized=None, readonly=None)
### Int64Field
Extends BaseIntField
#### Int64Field(default=None, alias=None, materialized=None)
#### Int64Field(default=None, alias=None, materialized=None, readonly=None)
### Float32Field
Extends BaseFloatField
#### Float32Field(default=None, alias=None, materialized=None)
#### Float32Field(default=None, alias=None, materialized=None, readonly=None)
### Float64Field
Extends BaseFloatField
#### Float64Field(default=None, alias=None, materialized=None)
#### Float64Field(default=None, alias=None, materialized=None, readonly=None)
### Enum8Field
Extends BaseEnumField
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None)
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None, readonly=None)
### Enum16Field
Extends BaseEnumField
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None)
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None, readonly=None)
infi.clickhouse_orm.engines
@ -512,6 +512,19 @@ Read more [here](https://clickhouse.yandex/reference_en.html#Buffer).
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
### Merge
Extends Engine
The Merge engine (not to be confused with MergeTree) does not store data itself,
but allows reading from any number of other tables simultaneously.
Writing to a table is not supported
https://clickhouse.yandex/docs/en/single/index.html#document-table_engines/merge
#### Merge(table_regex)
### CollapsingMergeTree
Extends MergeTree
@ -585,6 +598,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of matching model instances.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs)
@ -678,6 +698,13 @@ Returns the contents of the query's `WHERE` clause as a string.
Returns the number of rows after aggregation.
#### distinct()
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
#### exclude(**kwargs)

View File

@ -99,6 +99,16 @@ When some of the model fields aren't needed, it is more efficient to omit them f
qs = Person.objects_in(database).only('first_name', 'birthday')
Distinct
--------
Adds a DISTINCT clause to the query, meaning that any duplicate rows in the results will be omitted.
>>> Person.objects_in(database).only('first_name').count()
100
>>> Person.objects_in(database).only('first_name').distinct().count()
94
Slicing
-------

View File

@ -34,11 +34,13 @@ The following operations are supported:
**CreateTable**
A migration operation that creates a table for a given model class.
A migration operation that creates a table for a given model class. If the table already exists, the operation does nothing.
In case the model class is a `BufferModel`, the operation first creates the underlying on-disk table, and then creates the buffer table.
**DropTable**
A migration operation that drops the table of a given model class.
A migration operation that drops the table of a given model class. If the table does not exist, the operation does nothing.
**AlterTable**
@ -50,6 +52,13 @@ A migration operation that compares the table of a given model class to the mode
Default values are not altered by this operation.
**AlterTableWithBuffer**
A compound migration operation for altering a buffer table and its underlying on-disk table. The buffer table is dropped, the on-disk table is altered, and then the buffer table is re-created. This is the procedure recommended in the ClickHouse documentation for handling scenarios in which the underlying table needs to be modified.
Applying this migration operation to a regular table has the same effect as an `AlterTable` operation.
**RunPython**
A migration operation that runs python function inside migration.

View File

@ -20,6 +20,7 @@
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields)
* [Distinct](querysets.md#distinct)
* [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation)
@ -86,6 +87,7 @@
* [Memory](class_reference.md#memory)
* [MergeTree](class_reference.md#mergetree)
* [Buffer](class_reference.md#buffer)
* [Merge](class_reference.md#merge)
* [CollapsingMergeTree](class_reference.md#collapsingmergetree)
* [SummingMergeTree](class_reference.md#summingmergetree)
* [ReplacingMergeTree](class_reference.md#replacingmergetree)

View File

@ -51,7 +51,10 @@ def get_method_sig(method):
for arg in argspec.args:
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
if default_arg.has_default:
args.append("%s=%s" % (arg, default_arg.default_value))
val = default_arg.default_value
if isinstance(val, basestring):
val = '"' + val + '"'
args.append("%s=%s" % (arg, val))
else:
args.append(arg)
arg_index += 1

View File

@ -96,7 +96,7 @@ class FixedStringField(StringField):
def __init__(self, length, default=None, alias=None, materialized=None, readonly=None):
self._length = length
self.db_type = 'FixedString(%d)' % length
super(FixedStringField, self).__init__(default, alias, materialized,readonly)
super(FixedStringField, self).__init__(default, alias, materialized, readonly)
def to_python(self, value, timezone_in_use):
value = super(FixedStringField, self).to_python(value, timezone_in_use)

View File

@ -84,6 +84,25 @@ class AlterTable(Operation):
self._alter_table(database, 'MODIFY COLUMN %s %s' % model_field)
class AlterTableWithBuffer(Operation):
'''
A migration operation for altering a buffer table and its underlying on-disk table.
The buffer table is dropped, the on-disk table is altered, and then the buffer table
is re-created.
'''
def __init__(self, model_class):
self.model_class = model_class
def apply(self, database):
if issubclass(self.model_class, BufferModel):
DropTable(self.model_class).apply(database)
AlterTable(self.model_class.engine.main_model).apply(database)
CreateTable(self.model_class).apply(database)
else:
AlterTable(self.model_class).apply(database)
class DropTable(Operation):
'''
A migration operation that drops the table of a given model class.
@ -93,7 +112,7 @@ class DropTable(Operation):
self.model_class = model_class
def apply(self, database):
logger.info(' Drop table %s', self.model_class.__name__)
logger.info(' Drop table %s', self.model_class.table_name())
database.drop_table(self.model_class)

View File

@ -187,6 +187,7 @@ class QuerySet(object):
self._q = []
self._fields = []
self._limits = None
self._distinct = False
def __iter__(self):
"""
@ -228,14 +229,15 @@ class QuerySet(object):
"""
Returns the whole query as a SQL string.
"""
distinct = 'DISTINCT ' if self._distinct else ''
fields = '*'
if self._fields:
fields = comma_join('`%s`' % field for field in self._fields)
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
params = (fields, self._model_cls.table_name(),
params = (distinct, fields, self._model_cls.table_name(),
self.conditions_as_sql(), ordering, limit)
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params
return u'SELECT %s%s\nFROM `%s`\nWHERE %s%s%s' % params
def order_by_as_sql(self):
"""
@ -259,6 +261,11 @@ class QuerySet(object):
"""
Returns the number of matching model instances.
"""
if self._distinct:
# Use a subquery, since a simple count won't be accurate
sql = u'SELECT count() FROM (%s)' % self.as_sql()
raw = self._database.raw(sql)
return int(raw) if raw else 0
return self._database.count(self._model_cls, self.conditions_as_sql())
def order_by(self, *field_names):
@ -296,7 +303,7 @@ class QuerySet(object):
return qs
def paginate(self, page_num=1, page_size=100):
'''
"""
Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages.
@ -306,7 +313,7 @@ class QuerySet(object):
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
'''
"""
from .database import Page
count = self.count()
pages_total = int(ceil(count / float(page_size)))
@ -323,8 +330,17 @@ class QuerySet(object):
page_size=page_size
)
def distinct(self):
"""
Adds a DISTINCT clause to the query, meaning that any duplicate rows
in the results will be omitted.
"""
qs = copy(self)
qs._distinct = True
return qs
def aggregate(self, *args, **kwargs):
'''
"""
Returns an `AggregateQuerySet` over this query, with `args` serving as
grouping fields and `kwargs` serving as calculated fields. At least one
calculated field is required. For example:
@ -337,7 +353,7 @@ class QuerySet(object):
WHERE data > '2017-08-01'
GROUP BY event_type
```
'''
"""
return AggregateQuerySet(self, args, kwargs)
@ -368,6 +384,7 @@ class AggregateQuerySet(QuerySet):
self._order_by = list(base_qs._order_by)
self._q = list(base_qs._q)
self._limits = base_qs._limits
self._distinct = base_qs._distinct
def group_by(self, *args):
"""
@ -398,15 +415,17 @@ class AggregateQuerySet(QuerySet):
"""
Returns the whole query as a SQL string.
"""
distinct = 'DISTINCT ' if self._distinct else ''
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
params = dict(
distinct=distinct,
grouping=grouping or "''",
fields=fields,
table=self._model_cls.table_name(),
conds=self.conditions_as_sql()
)
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
sql = u'SELECT %(distinct)s%(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
if self._order_by:
sql += '\nORDER BY ' + self.order_by_as_sql()
if self._limits:

View File

@ -1,9 +1,6 @@
from infi.clickhouse_orm import migrations
from ..test_migrations import *
operations = [
migrations.RunSQL("INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-01', 1, 1, 'test') "),
migrations.RunSQL([
"INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-02', 2, 2, 'test2') ",
"INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-03', 3, 3, 'test3') ",
])
]
migrations.CreateTable(Model4Buffer)
]

View File

@ -1,15 +1,6 @@
import datetime
from infi.clickhouse_orm import migrations
from test_migrations import Model3
def forward(database):
database.insert([
Model3(date=datetime.date(2016, 1, 4), f1=4, f3=1, f4='test4')
])
from ..test_migrations import *
operations = [
migrations.RunPython(forward)
]
migrations.AlterTableWithBuffer(Model4Buffer_changed)
]

View File

@ -0,0 +1,9 @@
from infi.clickhouse_orm import migrations
operations = [
migrations.RunSQL("INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-01', 1, 1, 'test') "),
migrations.RunSQL([
"INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-02', 2, 2, 'test2') ",
"INSERT INTO `mig` (date, f1, f3, f4) VALUES ('2016-01-03', 3, 3, 'test3') ",
])
]

View File

@ -0,0 +1,15 @@
import datetime
from infi.clickhouse_orm import migrations
from test_migrations import Model3
def forward(database):
database.insert([
Model3(date=datetime.date(2016, 1, 4), f1=4, f3=1, f4='test4')
])
operations = [
migrations.RunPython(forward)
]

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.models import Model, BufferModel
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from infi.clickhouse_orm.migrations import MigrationHistory
@ -61,6 +61,7 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel2),
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
# Materialized fields and alias fields
self.database.migrate('tests.sample_migrations', 8)
self.assertTrue(self.tableExists(MaterializedModel))
self.assertEquals(self.getTableFields(MaterializedModel),
@ -69,13 +70,22 @@ class MigrationsTestCase(unittest.TestCase):
self.assertTrue(self.tableExists(AliasModel))
self.assertEquals(self.getTableFields(AliasModel),
[('date', 'Date'), ('date_alias', "Date")])
# Buffer models creation and alteration
self.database.migrate('tests.sample_migrations', 10)
self.assertTrue(self.tableExists(Model4))
self.assertTrue(self.tableExists(Model4Buffer))
self.assertEquals(self.getTableFields(Model4), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
self.assertEquals(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f1', 'Int32'), ('f2', 'String')])
self.database.migrate('tests.sample_migrations', 11)
self.assertEquals(self.getTableFields(Model4), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
self.assertEquals(self.getTableFields(Model4Buffer), [('date', 'Date'), ('f3', 'DateTime'), ('f2', 'String')])
self.database.migrate('tests.sample_migrations', 12)
self.assertEqual(self.database.count(Model3), 3)
data = [item.f1 for item in self.database.select('SELECT f1 FROM $table ORDER BY f1', model_class=Model3)]
self.assertListEqual(data, [1, 2, 3])
self.database.migrate('tests.sample_migrations', 11)
self.database.migrate('tests.sample_migrations', 13)
self.assertEqual(self.database.count(Model3), 4)
data = [item.f1 for item in self.database.select('SELECT f1 FROM $table ORDER BY f1', model_class=Model3)]
self.assertListEqual(data, [1, 2, 3, 4])
@ -169,3 +179,47 @@ class AliasModel(Model):
@classmethod
def table_name(cls):
return 'alias_date'
class Model4(Model):
date = DateField()
f1 = Int32Field()
f2 = StringField()
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'model4'
class Model4Buffer(BufferModel, Model4):
engine = Buffer(Model4)
@classmethod
def table_name(cls):
return 'model4buffer'
class Model4_changed(Model):
date = DateField()
f3 = DateTimeField()
f2 = StringField()
engine = MergeTree('date', ('date',))
@classmethod
def table_name(cls):
return 'model4'
class Model4Buffer_changed(BufferModel, Model4_changed):
engine = Buffer(Model4_changed)
@classmethod
def table_name(cls):
return 'model4buffer'

View File

@ -21,8 +21,11 @@ class QuerySetTestCase(TestCaseWithData):
def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql())
count = 0
for instance in qs:
logging.info('\t%s' % instance.to_dict())
count += 1
logging.info('\t[%d]\t%s' % (count, instance.to_dict()))
self.assertEquals(count, expected_count)
self.assertEquals(qs.count(), expected_count)
def test_no_filtering(self):
@ -202,6 +205,11 @@ class QuerySetTestCase(TestCaseWithData):
page = qs.paginate(1, 100)
self.assertEquals(page.number_of_objects, 10)
def test_distinct(self):
qs = Person.objects_in(self.database).distinct()
self._test_qs(qs, 100)
self._test_qs(qs.only('first_name'), 94)
class AggregateTestCase(TestCaseWithData):
@ -310,6 +318,12 @@ class AggregateTestCase(TestCaseWithData):
qs = qs.filter(weekday=1)
self.assertEquals(qs.count(), 1)
def test_aggregate_with_distinct(self):
# In this case distinct has no effect
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)').distinct()
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
Color = Enum('Color', u'red blue green yellow brown white black')