From b9fce94b042bc7b273efa1457b2a5d68b0d690d3 Mon Sep 17 00:00:00 2001 From: M1ha Date: Tue, 31 Jan 2017 12:43:11 +0500 Subject: [PATCH 01/16] 1) Added readonly models 2) Added SystemPart models in order to execute partition operations --- src/infi/clickhouse_orm/database.py | 19 ++++ src/infi/clickhouse_orm/models.py | 10 +- src/infi/clickhouse_orm/system_models.py | 137 +++++++++++++++++++++++ tests/test_database.py | 28 ++++- tests/test_system_models.py | 69 ++++++++++++ 5 files changed, 257 insertions(+), 6 deletions(-) create mode 100644 src/infi/clickhouse_orm/system_models.py create mode 100644 tests/test_system_models.py diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 73bbc13..892d92a 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -35,9 +35,13 @@ class Database(object): def create_table(self, model_class): # TODO check that model has an engine + if model_class.readonly: + raise DatabaseException("You can't create read only table") self._send(model_class.create_table_sql(self.db_name)) def drop_table(self, model_class): + if model_class.readonly: + raise DatabaseException("You can't drop read only table") self._send(model_class.drop_table_sql(self.db_name)) def insert(self, model_instances, batch_size=1000): @@ -48,6 +52,10 @@ class Database(object): except StopIteration: return # model_instances is empty model_class = first_instance.__class__ + + if first_instance.readonly: + raise DatabaseException("You can't insert into read only table") + def gen(): yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8') @@ -84,6 +92,17 @@ class Database(object): for line in lines: yield model_class.from_tsv(line, field_names) + def raw(self, query, settings=None, stream=False): + """ + Performs raw query to database. Returns its output + :param query: Query to execute + :param settings: Query settings to send as query GET parameters + :param stream: If flag is true, Http response from ClickHouse will be streamed. + :return: Query execution result + """ + query = self._substitute(query, None) + return self._send(query, settings=settings, stream=stream).text + def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None): count = self.count(model_class, conditions) pages_total = int(ceil(count / float(page_size))) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 16f6f77..09127e3 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -1,10 +1,10 @@ -from .utils import escape, parse_tsv -from .engines import * -from .fields import Field +from logging import getLogger from six import with_metaclass -from logging import getLogger +from .fields import Field +from .utils import parse_tsv + logger = getLogger('clickhouse_orm') @@ -68,6 +68,7 @@ class Model(with_metaclass(ModelBase)): ''' engine = None + readonly = False def __init__(self, **kwargs): ''' @@ -160,4 +161,3 @@ class Model(with_metaclass(ModelBase)): fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) - diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py new file mode 100644 index 0000000..25ea0e0 --- /dev/null +++ b/src/infi/clickhouse_orm/system_models.py @@ -0,0 +1,137 @@ +""" +This file contains system readonly models that can be got from database +https://clickhouse.yandex/reference_en.html#System tables +""" +from .database import Database # Can't import it globally, due to circular import +from .fields import * +from .models import Model +from .engines import MergeTree + + +class SystemPart(Model): + """ + Contains information about parts of a table in the MergeTree family. + This model operates only fields, described in the reference. Other fields are ignored. + https://clickhouse.yandex/reference_en.html#system.parts + """ + OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'}) + + readonly = True + + database = StringField() # Name of the database where the table that this part belongs to is located. + table = StringField() # Name of the table that this part belongs to. + engine = StringField() # Name of the table engine, without parameters. + partition = StringField() # Name of the partition, in the format YYYYMM. + name = StringField() # Name of the part. + replicated = UInt8Field() # Whether the part belongs to replicated data. + + # Whether the part is used in a table, or is no longer needed and will be deleted soon. + # Inactive parts remain after merging. + active = UInt8Field() + + # Number of marks - multiply by the index granularity (usually 8192) + # to get the approximate number of rows in the part. + marks = UInt64Field() + + bytes = UInt64Field() # Number of bytes when compressed. + + # Time the directory with the part was modified. Usually corresponds to the part's creation time. + modification_time = DateTimeField() + remove_time = DateTimeField() # For inactive parts only - the time when the part became inactive. + + # The number of places where the part is used. A value greater than 2 indicates + # that this part participates in queries or merges. + refcount = UInt32Field() + + @classmethod + def table_name(cls): + return 'system.parts' + + """ + Next methods return SQL for some operations, which can be done with partitions + https://clickhouse.yandex/reference_en.html#Manipulations with partitions and parts + """ + def _partition_operation_sql(self, db, operation, settings=None, from_part=None): + """ + Performs some operation over partition + :param db: Database object to execute operation on + :param operation: Operation to execute from SystemPart.OPERATIONS set + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: Operation execution result + """ + operation = operation.upper() + assert operation in self.OPERATIONS, "operation must be in [%s]" % ', '.join(self.OPERATIONS) + sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (db.db_name, self.table, operation, self.partition) + if from_part is not None: + sql += " FROM %s" % from_part + db.raw(sql, settings=settings, stream=False) + + def detach(self, database, settings=None): + """ + Move a partition to the 'detached' directory and forget it. + :param database: Database object to execute operation on + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: SQL Query + """ + return self._partition_operation_sql(database, 'DETACH', settings=settings) + + def drop(self, database, settings=None): + """ + Delete a partition + :param database: Database object to execute operation on + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: SQL Query + """ + return self._partition_operation_sql(database, 'DROP', settings=settings) + + def attach(self, database, settings=None): + """ + Add a new part or partition from the 'detached' directory to the table. + :param database: Database object to execute operation on + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: SQL Query + """ + return self._partition_operation_sql(database, 'ATTACH', settings=settings) + + def freeze(self, database, settings=None): + """ + Create a backup of a partition. + :param database: Database object to execute operation on + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: SQL Query + """ + return self._partition_operation_sql(database, 'FREEZE', settings=settings) + + def fetch(self, database, zookeeper_path, settings=None): + """ + Download a partition from another server. + :param database: Database object to execute operation on + :param zookeeper_path: Path in zookeeper to fetch from + :param settings: Settings for executing request to ClickHouse over db.raw() method + :return: SQL Query + """ + return self._partition_operation_sql(database, 'FETCH', settings=settings, from_part=zookeeper_path) + + @classmethod + def get_active(cls, database): + """ + Get all active parts + :param database: A database object to fetch data from. + :return: A list of SystemPart objects + """ + assert isinstance(database, Database), "database must be database.Database class instance" + field_names = ','.join([f[0] for f in cls._fields]) + return database.select("SELECT %s FROM %s WHERE active AND database='%s'" % + (field_names, cls.table_name(), database.db_name), model_class=cls) + + @classmethod + def all(cls, database): + """ + Gets all data from system.parts database + :param database: + :return: + """ + assert isinstance(database, Database), "database must be database.Database class instance" + field_names = ','.join([f[0] for f in cls._fields]) + return database.select("SELECT %s FROM %s WHERE database='%s'" % + (field_names, cls.table_name(), database.db_name), model_class=cls) diff --git a/tests/test_database.py b/tests/test_database.py index 30c25d3..8e9ba16 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -2,7 +2,7 @@ import unittest -from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.database import Database, DatabaseException from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * @@ -121,6 +121,25 @@ class DatabaseTestCase(unittest.TestCase): for entry in data: yield Person(**entry) + def test_raw(self): + self._insert_and_check(self._sample_data(), len(data)) + query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name" + results = self.database.raw(query) + self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n") + + def test_insert_readonly(self): + m = ReadOnlyModel(name='readonly') + with self.assertRaises(DatabaseException): + self.database.insert([m]) + + def test_create_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.create_table(ReadOnlyModel) + + def test_drop_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.drop_table(ReadOnlyModel) + class Person(Model): @@ -132,6 +151,13 @@ class Person(Model): engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) +class ReadOnlyModel(Model): + readonly = True + + name = StringField() + + + data = [ {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"}, {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"}, diff --git a/tests/test_system_models.py b/tests/test_system_models.py new file mode 100644 index 0000000..db97e5d --- /dev/null +++ b/tests/test_system_models.py @@ -0,0 +1,69 @@ +import unittest +from datetime import date + +import os +import shutil +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.engines import * +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.system_models import SystemPart + + +class SystemPartTest(unittest.TestCase): + BACKUP_DIR = '/opt/clickhouse/shadow/' + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(TestTable) + self.database.insert([TestTable(date_field=date.today())]) + + def tearDown(self): + self.database.drop_database() + + def _get_backups_count(self): + _, dirnames, _ = next(os.walk(self.BACKUP_DIR)) + return len(dirnames) + + def test_get_all(self): + parts = SystemPart.all(self.database) + self.assertEqual(len(list(parts)), 1) + + def test_get_active(self): + parts = list(SystemPart.get_active(self.database)) + self.assertEqual(len(parts), 1) + parts[0].detach(self.database) + self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) + + def test_attach_detach(self): + parts = list(SystemPart.get_active(self.database)) + self.assertEqual(len(parts), 1) + parts[0].detach(self.database) + self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) + parts[0].attach(self.database) + self.assertEqual(len(list(SystemPart.get_active(self.database))), 1) + + def test_drop(self): + parts = list(SystemPart.get_active(self.database)) + parts[0].drop(self.database) + self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) + + def test_freeze(self): + parts = list(SystemPart.all(self.database)) + # There can be other backups in the folder + backups_count = self._get_backups_count() + parts[0].freeze(self.database) + backup_number = self._get_backups_count() + self.assertEqual(backup_number, backups_count + 1) + # Clean created backup + shutil.rmtree(self.BACKUP_DIR + '{0}'.format(backup_number)) + + def test_fetch(self): + # TODO Not tested, as I have no replication set + pass + + +class TestTable(Model): + date_field = DateField() + + engine = MergeTree('date_field', ('date_field',)) From 9f81ed27c6f84d7d0fef1638577091e87f0f4864 Mon Sep 17 00:00:00 2001 From: M1ha Date: Tue, 31 Jan 2017 18:13:46 +0500 Subject: [PATCH 02/16] 1) Added to_dict model method 2) Fixed bug in test_freeze cleaning, if backups don't contain all directory names (e. g. 1, 2, 3, 6, 7 - count=5, created_backup=8, not 6) --- src/infi/clickhouse_orm/models.py | 12 ++++++++++++ tests/test_models.py | 24 ++++++++++++++++++++++++ tests/test_system_models.py | 12 ++++++------ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 09127e3..78033d4 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -161,3 +161,15 @@ class Model(with_metaclass(ModelBase)): fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) + def to_dict(self, insertable_only=False, field_names=None): + ''' + Returns the instance's column values as a dict. + :param bool insertable_only: If True, returns only fields, that can be inserted into database + :param field_names: An iterable of field names to return + ''' + fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields + if field_names is not None: + fields = [f for f in fields if f[0] in field_names] + + data = self.__dict__ + return {name: field.to_python(data[name]) for name, field in fields} diff --git a/tests/test_models.py b/tests/test_models.py index 4b259a9..239f0c8 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -55,6 +55,29 @@ class ModelTestCase(unittest.TestCase): instance.int_field = '99' self.assertEquals(instance.int_field, 99) + def test_to_dict(self): + instance = SimpleModel(date_field='1973-12-06', int_field='100', float_field='7') + self.assertDictEqual(instance.to_dict(), { + "date_field": datetime.date(1973, 12, 6), + "int_field": 100, + "float_field": 7.0, + "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + "alias_field": 0.0, + 'str_field': 'dozo' + }) + self.assertDictEqual(instance.to_dict(insertable_only=True), { + "date_field": datetime.date(1973, 12, 6), + "int_field": 100, + "float_field": 7.0, + "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + 'str_field': 'dozo' + }) + self.assertDictEqual( + instance.to_dict(insertable_only=True, field_names=('int_field', 'alias_field', 'datetime_field')), { + "int_field": 100, + "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc) + }) + class SimpleModel(Model): @@ -63,6 +86,7 @@ class SimpleModel(Model): str_field = StringField(default='dozo') int_field = Int32Field(default=17) float_field = Float32Field() + alias_field = Float32Field(alias='float_field') engine = MergeTree('date_field', ('int_field', 'date_field')) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index db97e5d..d8e075a 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -21,9 +21,9 @@ class SystemPartTest(unittest.TestCase): def tearDown(self): self.database.drop_database() - def _get_backups_count(self): + def _get_backups(self): _, dirnames, _ = next(os.walk(self.BACKUP_DIR)) - return len(dirnames) + return dirnames def test_get_all(self): parts = SystemPart.all(self.database) @@ -51,12 +51,12 @@ class SystemPartTest(unittest.TestCase): def test_freeze(self): parts = list(SystemPart.all(self.database)) # There can be other backups in the folder - backups_count = self._get_backups_count() + prev_backups = set(self._get_backups()) parts[0].freeze(self.database) - backup_number = self._get_backups_count() - self.assertEqual(backup_number, backups_count + 1) + backups = set(self._get_backups()) + self.assertEqual(len(backups), len(prev_backups) + 1) # Clean created backup - shutil.rmtree(self.BACKUP_DIR + '{0}'.format(backup_number)) + shutil.rmtree(self.BACKUP_DIR + '{0}'.format(list(backups - prev_backups)[0])) def test_fetch(self): # TODO Not tested, as I have no replication set From 64435c77fa5646ef461c1a8e1444a7661ab8b61e Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 10:31:19 +0500 Subject: [PATCH 03/16] 1) Replaced is_insertable() field mehtod with readonly property (unification with model and tables) 2) Method SystemPart.all() was replaced with get() 3) Added conditions parameter to SystemPart.get() and SystemPart.get_active() methods. --- README.rst | 23 +++++++++++++++++++ src/infi/clickhouse_orm/fields.py | 5 ++-- src/infi/clickhouse_orm/models.py | 4 ++-- src/infi/clickhouse_orm/system_models.py | 29 ++++++++++++++---------- tests/test_system_models.py | 10 ++++++-- 5 files changed, 53 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 70ab8af..a4fe755 100644 --- a/README.rst +++ b/README.rst @@ -177,6 +177,29 @@ You can optionally pass conditions to the query:: Note that ``order_by`` must be chosen so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages). +System models +------------- +`Clickhouse docs ` +System models are read only models for implementing part of the system's functionality, +and for providing access to information about how the system is working. + +Usage example: + + >>>> from infi.clickhouse_orm import system_models + >>>> print(system_models.SystemPart.all()) + +Currently the fllowing system models are supported: + +=================== ======== ================= =================================================== +Class DB Table Pythonic Type Comments +=================== ======== ================= =================================================== +SystemPart + +Partitions and parts +-------------------- +`ClickHouse docs ` + + Schema Migrations ----------------- diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 51b67de..7114505 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -68,8 +68,9 @@ class Field(object): else: return self.db_type - def is_insertable(self): - return self.alias is None and self.materialized is None + @property + def readonly(self): + return self.alias is not None or self.materialized is not None class StringField(Field): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 78033d4..aa93c32 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -158,7 +158,7 @@ class Model(with_metaclass(ModelBase)): ''' data = self.__dict__ - fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields + fields = [f for f in self._fields if not f[1].readonly] if insertable_only else self._fields return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) def to_dict(self, insertable_only=False, field_names=None): @@ -167,7 +167,7 @@ class Model(with_metaclass(ModelBase)): :param bool insertable_only: If True, returns only fields, that can be inserted into database :param field_names: An iterable of field names to return ''' - fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields + fields = [f for f in self._fields if not f[1].readonly] if insertable_only else self._fields if field_names is not None: fields = [f for f in fields if f[0] in field_names] diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index 25ea0e0..ce94cb6 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -113,25 +113,30 @@ class SystemPart(Model): return self._partition_operation_sql(database, 'FETCH', settings=settings, from_part=zookeeper_path) @classmethod - def get_active(cls, database): + def get(cls, database, conditions=""): """ - Get all active parts + Get all data from system.parts table :param database: A database object to fetch data from. + :param conditions: WHERE clause conditions. Database condition is added automatically :return: A list of SystemPart objects """ assert isinstance(database, Database), "database must be database.Database class instance" + assert isinstance(conditions, str), "conditions must be a string" + if conditions: + conditions += " AND" field_names = ','.join([f[0] for f in cls._fields]) - return database.select("SELECT %s FROM %s WHERE active AND database='%s'" % - (field_names, cls.table_name(), database.db_name), model_class=cls) + return database.select("SELECT %s FROM %s WHERE %s database='%s'" % + (field_names, cls.table_name(), conditions, database.db_name), model_class=cls) @classmethod - def all(cls, database): + def get_active(cls, database, conditions=""): """ - Gets all data from system.parts database - :param database: - :return: + Gets active data from system.parts table + :param database: A database object to fetch data from. + :param conditions: WHERE clause conditions. Database and active conditions are added automatically + :return: A list of SystemPart objects """ - assert isinstance(database, Database), "database must be database.Database class instance" - field_names = ','.join([f[0] for f in cls._fields]) - return database.select("SELECT %s FROM %s WHERE database='%s'" % - (field_names, cls.table_name(), database.db_name), model_class=cls) + if conditions: + conditions += ' AND ' + conditions += 'active' + return SystemPart.get(database, conditions=conditions) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index d8e075a..ac1616c 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -26,7 +26,7 @@ class SystemPartTest(unittest.TestCase): return dirnames def test_get_all(self): - parts = SystemPart.all(self.database) + parts = SystemPart.get(self.database) self.assertEqual(len(list(parts)), 1) def test_get_active(self): @@ -35,6 +35,12 @@ class SystemPartTest(unittest.TestCase): parts[0].detach(self.database) self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) + def test_get_conditions(self): + parts = list(SystemPart.get(self.database, conditions="table='testtable'")) + self.assertEqual(len(parts), 1) + parts = list(SystemPart.get(self.database, conditions="table='othertable'")) + self.assertEqual(len(parts), 0) + def test_attach_detach(self): parts = list(SystemPart.get_active(self.database)) self.assertEqual(len(parts), 1) @@ -49,7 +55,7 @@ class SystemPartTest(unittest.TestCase): self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) def test_freeze(self): - parts = list(SystemPart.all(self.database)) + parts = list(SystemPart.get(self.database)) # There can be other backups in the folder prev_backups = set(self._get_backups()) parts[0].freeze(self.database) From 0ae76edc5ee98bc428504c972897cd2ab5fc1db1 Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 10:56:30 +0500 Subject: [PATCH 04/16] Corrected docs --- README.rst | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index a4fe755..cbc0d0a 100644 --- a/README.rst +++ b/README.rst @@ -179,7 +179,9 @@ inconsistencies in the pagination (such as an instance that appears on two diffe System models ------------- + `Clickhouse docs ` + System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. @@ -188,17 +190,34 @@ Usage example: >>>> from infi.clickhouse_orm import system_models >>>> print(system_models.SystemPart.all()) -Currently the fllowing system models are supported: +Currently the following system models are supported: -=================== ======== ================= =================================================== -Class DB Table Pythonic Type Comments -=================== ======== ================= =================================================== -SystemPart +=================== ============ =================================================== +Class DB Table Comments +=================== ============ =================================================== +SystemPart system.parts Gives methods to work with partitions. See below. Partitions and parts -------------------- + `ClickHouse docs ` +A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. + +=================== ======================= ============================================================================================= +Method Parameters Comments +=================== ======================= ============================================================================================= +get(static) database, conditions="" Gets database partitions, filtered by conditions +get_active(static) database, conditions="" Gets only active (not detached or dropped) partitions, filtered by conditions +detach database, settings=None Detaches the partition. Settings is a dict of params to pass to http request +drop database, settings=None Drops the partition. Settings is a dict of params to pass to http request +attach database, settings=None Attaches already detached partition. Settings is a dict of params to pass to http request +freeze database, settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request +fetch database, settings=None Fetches partition. Settings is a dict of params to pass to http request + +``Note``: system.parts stores information for all databases. To be correct, +SystemPart model was designed to receive only given database parts. + Schema Migrations ----------------- From 5e63e813e0e5dcd56440279f99db1066577d8138 Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 10:59:28 +0500 Subject: [PATCH 05/16] Corrected tables in docs --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index cbc0d0a..4615adb 100644 --- a/README.rst +++ b/README.rst @@ -177,6 +177,7 @@ You can optionally pass conditions to the query:: Note that ``order_by`` must be chosen so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages). + System models ------------- @@ -196,6 +197,8 @@ Currently the following system models are supported: Class DB Table Comments =================== ============ =================================================== SystemPart system.parts Gives methods to work with partitions. See below. +=================== ============ =================================================== + Partitions and parts -------------------- @@ -214,6 +217,7 @@ drop database, settings=None Drops the partition. Settings is attach database, settings=None Attaches already detached partition. Settings is a dict of params to pass to http request freeze database, settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request fetch database, settings=None Fetches partition. Settings is a dict of params to pass to http request +=================== ======================= ============================================================================================= ``Note``: system.parts stores information for all databases. To be correct, SystemPart model was designed to receive only given database parts. From 907cff9704bb8e972409d2a12b1914047ff871d1 Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 11:05:13 +0500 Subject: [PATCH 06/16] Corrected links and code --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 4615adb..151880b 100644 --- a/README.rst +++ b/README.rst @@ -181,15 +181,15 @@ inconsistencies in the pagination (such as an instance that appears on two diffe System models ------------- -`Clickhouse docs ` +`Clickhouse docs `_. System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. -Usage example: +Usage example:: >>>> from infi.clickhouse_orm import system_models - >>>> print(system_models.SystemPart.all()) + >>>> print(system_models.SystemPart.get(Database())) Currently the following system models are supported: @@ -203,7 +203,7 @@ SystemPart system.parts Gives methods to work with partitions. See Partitions and parts -------------------- -`ClickHouse docs ` +`ClickHouse docs `_. A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. @@ -303,7 +303,7 @@ Working with materialized and alias fields ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. -See documentation `here `. +See documentation `here `_. Both field types can't be inserted into database directly. These field values are ignored, when using database.insert() method. From 80eddeb293d182f75c631c4e0fbc11d487720e61 Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 11:37:40 +0500 Subject: [PATCH 07/16] Fixed bugs, connected to merge --- src/infi/clickhouse_orm/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 92c29bb..4248f89 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -164,15 +164,16 @@ class Model(with_metaclass(ModelBase)): fields = [f for f in fields if not f[1].readonly] return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) - def to_dict(self, insertable_only=False, field_names=None): + def to_dict(self, insertable_only=False, field_names=None, timezone_in_use=pytz.utc): ''' Returns the instance's column values as a dict. :param bool insertable_only: If True, returns only fields, that can be inserted into database :param field_names: An iterable of field names to return + :param timezone_in_use: timezone to convert DateField and DateTimeField. ''' fields = [f for f in self._fields if not f[1].readonly] if insertable_only else self._fields if field_names is not None: fields = [f for f in fields if f[0] in field_names] data = self.__dict__ - return {name: field.to_python(data[name]) for name, field in fields} + return {name: field.to_python(data[name], timezone_in_use) for name, field in fields} From 683e1acc6ac963dda0ba4d2aa253d1c0cfaa2fbb Mon Sep 17 00:00:00 2001 From: M1ha Date: Wed, 8 Feb 2017 15:23:27 +0500 Subject: [PATCH 08/16] Fixed: 1) Added partition working to readme 2) replaced insertable_only parameter with include_readonly 3) Added empty string alias and materialized field control --- README.rst | 10 ++++++++-- src/infi/clickhouse_orm/database.py | 4 ++-- src/infi/clickhouse_orm/fields.py | 8 +++++--- src/infi/clickhouse_orm/models.py | 18 ++++++++---------- tests/test_models.py | 4 ++-- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index ce8c2b8..5627ab1 100644 --- a/README.rst +++ b/README.rst @@ -193,8 +193,14 @@ and for providing access to information about how the system is working. Usage example:: - >>>> from infi.clickhouse_orm import system_models - >>>> print(system_models.SystemPart.get(Database())) + from infi.clickhouse_orm.database import Database + from infi.clickhouse_orm.system_models import SystemPart + db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') + partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database + if len(partitions) > 0: + partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so + partitions[0].freeze(db) # Make a backup in /opt/clickhouse/shadow directory + partitions[0].drop() # Dropped partition Currently the following system models are supported: diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 48a7c91..6ef5d97 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -62,11 +62,11 @@ class Database(object): def gen(): yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') - yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8') + yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8') # Collect lines in batches of batch_size batch = [] for instance in i: - batch.append(instance.to_tsv(insertable_only=True)) + batch.append(instance.to_tsv(include_readonly=False)) if len(batch) >= batch_size: # Return the current batch of lines yield ('\n'.join(batch) + '\n').encode('utf-8') diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 0d5b50d..1113dd6 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -16,8 +16,10 @@ class Field(object): def __init__(self, default=None, alias=None, materialized=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ "Only one of default, alias and materialized parameters can be given" - assert alias is None or isinstance(alias, str), "Alias field must be string field name, if given" - assert materialized is None or isinstance(materialized, str), "Materialized field must be string, if given" + assert alias is None or isinstance(alias, str) and alias != "",\ + "Alias field must be string field name, if given" + assert materialized is None or isinstance(materialized, str) and alias != "",\ + "Materialized field must be string, if given" self.creation_counter = Field.creation_counter Field.creation_counter += 1 @@ -72,7 +74,7 @@ class Field(object): @property def readonly(self): - return self.alias is not None or self.materialized is not None + return bool(self.alias or self.materialized) class StringField(Field): diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 4248f89..80cafb9 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -153,27 +153,25 @@ class Model(with_metaclass(ModelBase)): kwargs[name] = field.to_python(next(values), timezone_in_use) return cls(**kwargs) - def to_tsv(self, insertable_only=False): + def to_tsv(self, include_readonly=True): ''' Returns the instance's column values as a tab-separated line. A newline is not included. - :param bool insertable_only: If True, returns only fields, that can be inserted into database + :param bool include_readonly: If False, returns only fields, that can be inserted into database ''' data = self.__dict__ - fields = self._fields - if insertable_only: - fields = [f for f in fields if not f[1].readonly] + fields = self._fields if include_readonly else [f for f in self._fields if not f[1].readonly] return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) - def to_dict(self, insertable_only=False, field_names=None, timezone_in_use=pytz.utc): + def to_dict(self, include_readonly=True, field_names=None): ''' Returns the instance's column values as a dict. - :param bool insertable_only: If True, returns only fields, that can be inserted into database + :param bool include_readonly: If False, returns only fields, that can be inserted into database :param field_names: An iterable of field names to return - :param timezone_in_use: timezone to convert DateField and DateTimeField. ''' - fields = [f for f in self._fields if not f[1].readonly] if insertable_only else self._fields + fields = self._fields if include_readonly else [f for f in self._fields if not f[1].readonly] + if field_names is not None: fields = [f for f in fields if f[0] in field_names] data = self.__dict__ - return {name: field.to_python(data[name], timezone_in_use) for name, field in fields} + return {name: data[name] for name, field in fields} diff --git a/tests/test_models.py b/tests/test_models.py index 239f0c8..b52a2c6 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -65,7 +65,7 @@ class ModelTestCase(unittest.TestCase): "alias_field": 0.0, 'str_field': 'dozo' }) - self.assertDictEqual(instance.to_dict(insertable_only=True), { + self.assertDictEqual(instance.to_dict(include_readonly=False), { "date_field": datetime.date(1973, 12, 6), "int_field": 100, "float_field": 7.0, @@ -73,7 +73,7 @@ class ModelTestCase(unittest.TestCase): 'str_field': 'dozo' }) self.assertDictEqual( - instance.to_dict(insertable_only=True, field_names=('int_field', 'alias_field', 'datetime_field')), { + instance.to_dict(include_readonly=False, field_names=('int_field', 'alias_field', 'datetime_field')), { "int_field": 100, "datetime_field": datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc) }) From b6967901fac8758e4a52f42c1ca9fddcacad65b1 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 9 Feb 2017 09:50:32 +0500 Subject: [PATCH 09/16] Moved code example to another section --- README.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 5627ab1..7f20fb2 100644 --- a/README.rst +++ b/README.rst @@ -191,17 +191,6 @@ System models System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. -Usage example:: - - from infi.clickhouse_orm.database import Database - from infi.clickhouse_orm.system_models import SystemPart - db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') - partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database - if len(partitions) > 0: - partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so - partitions[0].freeze(db) # Make a backup in /opt/clickhouse/shadow directory - partitions[0].drop() # Dropped partition - Currently the following system models are supported: =================== ============ =================================================== @@ -230,6 +219,17 @@ freeze database, settings=None Freezes (makes backup) of the pa fetch database, settings=None Fetches partition. Settings is a dict of params to pass to http request =================== ======================= ============================================================================================= +Usage example:: + + from infi.clickhouse_orm.database import Database + from infi.clickhouse_orm.system_models import SystemPart + db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') + partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database + if len(partitions) > 0: + partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so + partitions[0].freeze(db) # Make a backup in /opt/clickhouse/shadow directory + partitions[0].drop() # Dropped partition + ``Note``: system.parts stores information for all databases. To be correct, SystemPart model was designed to receive only given database parts. From ff04ab4db8a6cd2ba5ae2f64ca12ffc0aa04caa0 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 9 Feb 2017 16:49:25 +0500 Subject: [PATCH 10/16] 1) Removed database params for working with SystemPart operations 2) Added _database attribute to each model, got through select --- src/infi/clickhouse_orm/database.py | 2 +- src/infi/clickhouse_orm/models.py | 24 +++++++++++++++-- src/infi/clickhouse_orm/system_models.py | 34 ++++++++++-------------- tests/test_system_models.py | 10 +++---- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 6ef5d97..5c804c0 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -94,7 +94,7 @@ class Database(object): field_types = parse_tsv(next(lines)) model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types)) for line in lines: - yield model_class.from_tsv(line, field_names, self.server_timezone) + yield model_class.from_tsv(line, field_names, self.server_timezone, self) def raw(self, query, settings=None, stream=False): """ diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 80cafb9..4b7b1b2 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -79,6 +79,9 @@ class Model(with_metaclass(ModelBase)): Unrecognized field names will cause an AttributeError. ''' super(Model, self).__init__() + + self._database = None + # Assign field values from keyword arguments for name, value in kwargs.items(): field = self.get_field(name) @@ -102,6 +105,17 @@ class Model(with_metaclass(ModelBase)): field.validate(value) super(Model, self).__setattr__(name, value) + def set_database(self, db): + """ + Sets _database attribute for current model instance + :param db: Database instance + :return: None + """ + # This can not be imported globally due to circular import + from .database import Database + assert isinstance(db, Database), "database must be database.Database instance" + self._database = db + def get_field(self, name): ''' Get a Field instance given its name, or None if not found. @@ -138,11 +152,12 @@ class Model(with_metaclass(ModelBase)): return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name()) @classmethod - def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc): + def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc, database=None): ''' Create a model instance from a tab-separated line. The line may or may not include a newline. The field_names list must match the fields defined in the model, but does not have to include all of them. If omitted, it is assumed to be the names of all fields in the model, in order of definition. + :param database: if given, model receives database ''' from six import next field_names = field_names or [name for name, field in cls._fields] @@ -151,7 +166,12 @@ class Model(with_metaclass(ModelBase)): for name in field_names: field = getattr(cls, name) kwargs[name] = field.to_python(next(values), timezone_in_use) - return cls(**kwargs) + + obj = cls(**kwargs) + if database is not None: + obj.set_database(database) + + return obj def to_tsv(self, include_readonly=True): ''' diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index ce94cb6..8c66550 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -2,10 +2,9 @@ This file contains system readonly models that can be got from database https://clickhouse.yandex/reference_en.html#System tables """ -from .database import Database # Can't import it globally, due to circular import +from .database import Database from .fields import * from .models import Model -from .engines import MergeTree class SystemPart(Model): @@ -51,7 +50,7 @@ class SystemPart(Model): Next methods return SQL for some operations, which can be done with partitions https://clickhouse.yandex/reference_en.html#Manipulations with partitions and parts """ - def _partition_operation_sql(self, db, operation, settings=None, from_part=None): + def _partition_operation_sql(self, operation, settings=None, from_part=None): """ Performs some operation over partition :param db: Database object to execute operation on @@ -61,56 +60,51 @@ class SystemPart(Model): """ operation = operation.upper() assert operation in self.OPERATIONS, "operation must be in [%s]" % ', '.join(self.OPERATIONS) - sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (db.db_name, self.table, operation, self.partition) + sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (self._database.db_name, self.table, operation, self.partition) if from_part is not None: sql += " FROM %s" % from_part - db.raw(sql, settings=settings, stream=False) + self._database.raw(sql, settings=settings, stream=False) - def detach(self, database, settings=None): + def detach(self, settings=None): """ Move a partition to the 'detached' directory and forget it. - :param database: Database object to execute operation on :param settings: Settings for executing request to ClickHouse over db.raw() method :return: SQL Query """ - return self._partition_operation_sql(database, 'DETACH', settings=settings) + return self._partition_operation_sql('DETACH', settings=settings) - def drop(self, database, settings=None): + def drop(self, settings=None): """ Delete a partition - :param database: Database object to execute operation on :param settings: Settings for executing request to ClickHouse over db.raw() method :return: SQL Query """ - return self._partition_operation_sql(database, 'DROP', settings=settings) + return self._partition_operation_sql('DROP', settings=settings) - def attach(self, database, settings=None): + def attach(self, settings=None): """ Add a new part or partition from the 'detached' directory to the table. - :param database: Database object to execute operation on :param settings: Settings for executing request to ClickHouse over db.raw() method :return: SQL Query """ - return self._partition_operation_sql(database, 'ATTACH', settings=settings) + return self._partition_operation_sql('ATTACH', settings=settings) - def freeze(self, database, settings=None): + def freeze(self, settings=None): """ Create a backup of a partition. - :param database: Database object to execute operation on :param settings: Settings for executing request to ClickHouse over db.raw() method :return: SQL Query """ - return self._partition_operation_sql(database, 'FREEZE', settings=settings) + return self._partition_operation_sql('FREEZE', settings=settings) - def fetch(self, database, zookeeper_path, settings=None): + def fetch(self, zookeeper_path, settings=None): """ Download a partition from another server. - :param database: Database object to execute operation on :param zookeeper_path: Path in zookeeper to fetch from :param settings: Settings for executing request to ClickHouse over db.raw() method :return: SQL Query """ - return self._partition_operation_sql(database, 'FETCH', settings=settings, from_part=zookeeper_path) + return self._partition_operation_sql('FETCH', settings=settings, from_part=zookeeper_path) @classmethod def get(cls, database, conditions=""): diff --git a/tests/test_system_models.py b/tests/test_system_models.py index ac1616c..756e5cd 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -32,7 +32,7 @@ class SystemPartTest(unittest.TestCase): def test_get_active(self): parts = list(SystemPart.get_active(self.database)) self.assertEqual(len(parts), 1) - parts[0].detach(self.database) + parts[0].detach() self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) def test_get_conditions(self): @@ -44,21 +44,21 @@ class SystemPartTest(unittest.TestCase): def test_attach_detach(self): parts = list(SystemPart.get_active(self.database)) self.assertEqual(len(parts), 1) - parts[0].detach(self.database) + parts[0].detach() self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) - parts[0].attach(self.database) + parts[0].attach() self.assertEqual(len(list(SystemPart.get_active(self.database))), 1) def test_drop(self): parts = list(SystemPart.get_active(self.database)) - parts[0].drop(self.database) + parts[0].drop() self.assertEqual(len(list(SystemPart.get_active(self.database))), 0) def test_freeze(self): parts = list(SystemPart.get(self.database)) # There can be other backups in the folder prev_backups = set(self._get_backups()) - parts[0].freeze(self.database) + parts[0].freeze() backups = set(self._get_backups()) self.assertEqual(len(backups), len(prev_backups) + 1) # Clean created backup From 59ff18f09accdea3cc8f46a0a0731456cc2ee448 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 9 Feb 2017 16:52:05 +0500 Subject: [PATCH 11/16] Edited docs (SystemPart) --- README.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 7f20fb2..4842360 100644 --- a/README.rst +++ b/README.rst @@ -212,11 +212,11 @@ Method Parameters Comments =================== ======================= ============================================================================================= get(static) database, conditions="" Gets database partitions, filtered by conditions get_active(static) database, conditions="" Gets only active (not detached or dropped) partitions, filtered by conditions -detach database, settings=None Detaches the partition. Settings is a dict of params to pass to http request -drop database, settings=None Drops the partition. Settings is a dict of params to pass to http request -attach database, settings=None Attaches already detached partition. Settings is a dict of params to pass to http request -freeze database, settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request -fetch database, settings=None Fetches partition. Settings is a dict of params to pass to http request +detach settings=None Detaches the partition. Settings is a dict of params to pass to http request +drop settings=None Drops the partition. Settings is a dict of params to pass to http request +attach settings=None Attaches already detached partition. Settings is a dict of params to pass to http request +freeze settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request +fetch settings=None Fetches partition. Settings is a dict of params to pass to http request =================== ======================= ============================================================================================= Usage example:: @@ -227,7 +227,7 @@ Usage example:: partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database if len(partitions) > 0: partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so - partitions[0].freeze(db) # Make a backup in /opt/clickhouse/shadow directory + partitions[0].freeze() # Make a backup in /opt/clickhouse/shadow directory partitions[0].drop() # Dropped partition ``Note``: system.parts stores information for all databases. To be correct, From dc14d523c17f1955f7d2d8eb646d44f513812da3 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 9 Feb 2017 17:10:48 +0500 Subject: [PATCH 12/16] 1) Added get_database method to Model 2) Added some assertions in tests for adding _database attribute in selects and inserts 3) database.insert() method sets _database --- src/infi/clickhouse_orm/database.py | 2 ++ src/infi/clickhouse_orm/models.py | 7 +++++++ tests/test_database.py | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 5c804c0..2be8534 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -62,10 +62,12 @@ class Database(object): def gen(): yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') + first_instance.set_database(self) yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8') # Collect lines in batches of batch_size batch = [] for instance in i: + instance.set_database(self) batch.append(instance.to_tsv(include_readonly=False)) if len(batch) >= batch_size: # Return the current batch of lines diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 4b7b1b2..6b82244 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -116,6 +116,13 @@ class Model(with_metaclass(ModelBase)): assert isinstance(db, Database), "database must be database.Database instance" self._database = db + def get_database(self): + """ + Gets _database attribute for current model instance + :return: database.Database instance, model was inserted or selected from or None + """ + return self._database + def get_field(self, name): ''' Get a Field instance given its name, or None if not found. diff --git a/tests/test_database.py b/tests/test_database.py index a3ad15c..aa7a4df 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -24,6 +24,8 @@ class DatabaseTestCase(unittest.TestCase): def _insert_and_check(self, data, count): self.database.insert(data) self.assertEquals(count, self.database.count(Person)) + for instance in data: + self.assertEquals(self.database, instance.get_database()) def test_insert__generator(self): self._insert_and_check(self._sample_data(), len(data)) @@ -53,6 +55,8 @@ class DatabaseTestCase(unittest.TestCase): self.assertEquals(results[0].height, 1.72) self.assertEquals(results[1].last_name, 'Scott') self.assertEquals(results[1].height, 1.70) + self.assertEqual(results[0].get_database(), self.database) + self.assertEqual(results[1].get_database(), self.database) def test_select_partial_fields(self): self._insert_and_check(self._sample_data(), len(data)) @@ -63,6 +67,8 @@ class DatabaseTestCase(unittest.TestCase): self.assertEquals(results[0].height, 0) # default value self.assertEquals(results[1].last_name, 'Scott') self.assertEquals(results[1].height, 0) # default value + self.assertEqual(results[0].get_database(), self.database) + self.assertEqual(results[1].get_database(), self.database) def test_select_ad_hoc_model(self): self._insert_and_check(self._sample_data(), len(data)) @@ -74,6 +80,8 @@ class DatabaseTestCase(unittest.TestCase): self.assertEquals(results[0].height, 1.72) self.assertEquals(results[1].last_name, 'Scott') self.assertEquals(results[1].height, 1.70) + self.assertEqual(results[0].get_database(), self.database) + self.assertEqual(results[1].get_database(), self.database) def test_pagination(self): self._insert_and_check(self._sample_data(), len(data)) From 76718426bfba89f80f26d8c667c3d77ebb7894a6 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 6 Apr 2017 10:54:00 +0500 Subject: [PATCH 13/16] Fixed bug with StringIO import, incompatible with python 3 --- src/infi/clickhouse_orm/database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 7226f82..f65399e 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -50,7 +50,7 @@ class Database(object): def insert(self, model_instances, batch_size=1000): from six import next - from cStringIO import StringIO + from io import BytesIO i = iter(model_instances) try: first_instance = next(i) @@ -62,7 +62,7 @@ class Database(object): raise DatabaseException("You can't insert into read only table") def gen(): - buf = StringIO() + buf = BytesIO() buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')) first_instance.set_database(self) buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8')) @@ -78,7 +78,7 @@ class Database(object): # Return the current batch of lines yield buf.getvalue() # Start a new batch - buf = StringIO() + buf = BytesIO() lines = 0 # Return any remaining lines in partial batch if lines: From 533e6e187aadcc7bfe9865278a2ec4a24cf07c91 Mon Sep 17 00:00:00 2001 From: M1ha Date: Thu, 6 Apr 2017 11:10:33 +0500 Subject: [PATCH 14/16] Fixed "TypeError: a bytes-like object is required, not 'str'" on \n insertions --- src/infi/clickhouse_orm/database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index f65399e..1103a1b 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -55,7 +55,7 @@ class Database(object): try: first_instance = next(i) except StopIteration: - return # model_instances is empty + return # model_instances is empty model_class = first_instance.__class__ if first_instance.readonly: @@ -66,13 +66,13 @@ class Database(object): buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')) first_instance.set_database(self) buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8')) - buf.write('\n') + buf.write('\n'.encode('utf-8')) # Collect lines in batches of batch_size lines = 2 for instance in i: instance.set_database(self) buf.write(instance.to_tsv(include_readonly=False).encode('utf-8')) - buf.write('\n') + buf.write('\n'.encode('utf-8')) lines += 1 if lines >= batch_size: # Return the current batch of lines From dfbf277c87bf69245a0bf06e096e2367a558dae5 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 6 Apr 2017 15:11:00 +0300 Subject: [PATCH 15/16] add a few development eggs for profiling --- buildout.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/buildout.cfg b/buildout.cfg index ad10a3e..e3de92a 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -49,6 +49,9 @@ eggs = ${project:name} enum34 infi.unittest infi.traceback + memory_profiler + profilehooks + psutil zc.buildout scripts = ipython nosetests From 2a2bd2ce29dbb4322591026f9a42d1b8f98ea750 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 6 Apr 2017 15:12:39 +0300 Subject: [PATCH 16/16] Releasing v0.8.2 --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4579012..e933d06 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,10 @@ Change Log ========== +v0.8.2 +------ +- Fix broken Python 3 support (M1hacka) + v0.8.1 ------ - Add support for ReplacingMergeTree (leenr)