mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-22 17:16:34 +03:00
1) Added readonly models
2) Added SystemPart models in order to execute partition operations
This commit is contained in:
parent
2509b5b2e3
commit
b9fce94b04
|
@ -35,9 +35,13 @@ class Database(object):
|
||||||
|
|
||||||
def create_table(self, model_class):
|
def create_table(self, model_class):
|
||||||
# TODO check that model has an engine
|
# TODO check that model has an engine
|
||||||
|
if model_class.readonly:
|
||||||
|
raise DatabaseException("You can't create read only table")
|
||||||
self._send(model_class.create_table_sql(self.db_name))
|
self._send(model_class.create_table_sql(self.db_name))
|
||||||
|
|
||||||
def drop_table(self, model_class):
|
def drop_table(self, model_class):
|
||||||
|
if model_class.readonly:
|
||||||
|
raise DatabaseException("You can't drop read only table")
|
||||||
self._send(model_class.drop_table_sql(self.db_name))
|
self._send(model_class.drop_table_sql(self.db_name))
|
||||||
|
|
||||||
def insert(self, model_instances, batch_size=1000):
|
def insert(self, model_instances, batch_size=1000):
|
||||||
|
@ -48,6 +52,10 @@ class Database(object):
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
return # model_instances is empty
|
return # model_instances is empty
|
||||||
model_class = first_instance.__class__
|
model_class = first_instance.__class__
|
||||||
|
|
||||||
|
if first_instance.readonly:
|
||||||
|
raise DatabaseException("You can't insert into read only table")
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
||||||
yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
|
yield (first_instance.to_tsv(insertable_only=True) + '\n').encode('utf-8')
|
||||||
|
@ -84,6 +92,17 @@ class Database(object):
|
||||||
for line in lines:
|
for line in lines:
|
||||||
yield model_class.from_tsv(line, field_names)
|
yield model_class.from_tsv(line, field_names)
|
||||||
|
|
||||||
|
def raw(self, query, settings=None, stream=False):
|
||||||
|
"""
|
||||||
|
Performs raw query to database. Returns its output
|
||||||
|
:param query: Query to execute
|
||||||
|
:param settings: Query settings to send as query GET parameters
|
||||||
|
:param stream: If flag is true, Http response from ClickHouse will be streamed.
|
||||||
|
:return: Query execution result
|
||||||
|
"""
|
||||||
|
query = self._substitute(query, None)
|
||||||
|
return self._send(query, settings=settings, stream=stream).text
|
||||||
|
|
||||||
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
|
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
|
||||||
count = self.count(model_class, conditions)
|
count = self.count(model_class, conditions)
|
||||||
pages_total = int(ceil(count / float(page_size)))
|
pages_total = int(ceil(count / float(page_size)))
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from .utils import escape, parse_tsv
|
from logging import getLogger
|
||||||
from .engines import *
|
|
||||||
from .fields import Field
|
|
||||||
|
|
||||||
from six import with_metaclass
|
from six import with_metaclass
|
||||||
|
|
||||||
from logging import getLogger
|
from .fields import Field
|
||||||
|
from .utils import parse_tsv
|
||||||
|
|
||||||
logger = getLogger('clickhouse_orm')
|
logger = getLogger('clickhouse_orm')
|
||||||
|
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ class Model(with_metaclass(ModelBase)):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
engine = None
|
engine = None
|
||||||
|
readonly = False
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
'''
|
'''
|
||||||
|
@ -160,4 +161,3 @@ class Model(with_metaclass(ModelBase)):
|
||||||
fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields
|
fields = [f for f in self._fields if f[1].is_insertable()] if insertable_only else self._fields
|
||||||
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
|
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
|
||||||
|
|
||||||
|
|
||||||
|
|
137
src/infi/clickhouse_orm/system_models.py
Normal file
137
src/infi/clickhouse_orm/system_models.py
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
"""
|
||||||
|
This file contains system readonly models that can be got from database
|
||||||
|
https://clickhouse.yandex/reference_en.html#System tables
|
||||||
|
"""
|
||||||
|
from .database import Database # Can't import it globally, due to circular import
|
||||||
|
from .fields import *
|
||||||
|
from .models import Model
|
||||||
|
from .engines import MergeTree
|
||||||
|
|
||||||
|
|
||||||
|
class SystemPart(Model):
|
||||||
|
"""
|
||||||
|
Contains information about parts of a table in the MergeTree family.
|
||||||
|
This model operates only fields, described in the reference. Other fields are ignored.
|
||||||
|
https://clickhouse.yandex/reference_en.html#system.parts
|
||||||
|
"""
|
||||||
|
OPERATIONS = frozenset({'DETACH', 'DROP', 'ATTACH', 'FREEZE', 'FETCH'})
|
||||||
|
|
||||||
|
readonly = True
|
||||||
|
|
||||||
|
database = StringField() # Name of the database where the table that this part belongs to is located.
|
||||||
|
table = StringField() # Name of the table that this part belongs to.
|
||||||
|
engine = StringField() # Name of the table engine, without parameters.
|
||||||
|
partition = StringField() # Name of the partition, in the format YYYYMM.
|
||||||
|
name = StringField() # Name of the part.
|
||||||
|
replicated = UInt8Field() # Whether the part belongs to replicated data.
|
||||||
|
|
||||||
|
# Whether the part is used in a table, or is no longer needed and will be deleted soon.
|
||||||
|
# Inactive parts remain after merging.
|
||||||
|
active = UInt8Field()
|
||||||
|
|
||||||
|
# Number of marks - multiply by the index granularity (usually 8192)
|
||||||
|
# to get the approximate number of rows in the part.
|
||||||
|
marks = UInt64Field()
|
||||||
|
|
||||||
|
bytes = UInt64Field() # Number of bytes when compressed.
|
||||||
|
|
||||||
|
# Time the directory with the part was modified. Usually corresponds to the part's creation time.
|
||||||
|
modification_time = DateTimeField()
|
||||||
|
remove_time = DateTimeField() # For inactive parts only - the time when the part became inactive.
|
||||||
|
|
||||||
|
# The number of places where the part is used. A value greater than 2 indicates
|
||||||
|
# that this part participates in queries or merges.
|
||||||
|
refcount = UInt32Field()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def table_name(cls):
|
||||||
|
return 'system.parts'
|
||||||
|
|
||||||
|
"""
|
||||||
|
Next methods return SQL for some operations, which can be done with partitions
|
||||||
|
https://clickhouse.yandex/reference_en.html#Manipulations with partitions and parts
|
||||||
|
"""
|
||||||
|
def _partition_operation_sql(self, db, operation, settings=None, from_part=None):
|
||||||
|
"""
|
||||||
|
Performs some operation over partition
|
||||||
|
:param db: Database object to execute operation on
|
||||||
|
:param operation: Operation to execute from SystemPart.OPERATIONS set
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: Operation execution result
|
||||||
|
"""
|
||||||
|
operation = operation.upper()
|
||||||
|
assert operation in self.OPERATIONS, "operation must be in [%s]" % ', '.join(self.OPERATIONS)
|
||||||
|
sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (db.db_name, self.table, operation, self.partition)
|
||||||
|
if from_part is not None:
|
||||||
|
sql += " FROM %s" % from_part
|
||||||
|
db.raw(sql, settings=settings, stream=False)
|
||||||
|
|
||||||
|
def detach(self, database, settings=None):
|
||||||
|
"""
|
||||||
|
Move a partition to the 'detached' directory and forget it.
|
||||||
|
:param database: Database object to execute operation on
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: SQL Query
|
||||||
|
"""
|
||||||
|
return self._partition_operation_sql(database, 'DETACH', settings=settings)
|
||||||
|
|
||||||
|
def drop(self, database, settings=None):
|
||||||
|
"""
|
||||||
|
Delete a partition
|
||||||
|
:param database: Database object to execute operation on
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: SQL Query
|
||||||
|
"""
|
||||||
|
return self._partition_operation_sql(database, 'DROP', settings=settings)
|
||||||
|
|
||||||
|
def attach(self, database, settings=None):
|
||||||
|
"""
|
||||||
|
Add a new part or partition from the 'detached' directory to the table.
|
||||||
|
:param database: Database object to execute operation on
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: SQL Query
|
||||||
|
"""
|
||||||
|
return self._partition_operation_sql(database, 'ATTACH', settings=settings)
|
||||||
|
|
||||||
|
def freeze(self, database, settings=None):
|
||||||
|
"""
|
||||||
|
Create a backup of a partition.
|
||||||
|
:param database: Database object to execute operation on
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: SQL Query
|
||||||
|
"""
|
||||||
|
return self._partition_operation_sql(database, 'FREEZE', settings=settings)
|
||||||
|
|
||||||
|
def fetch(self, database, zookeeper_path, settings=None):
|
||||||
|
"""
|
||||||
|
Download a partition from another server.
|
||||||
|
:param database: Database object to execute operation on
|
||||||
|
:param zookeeper_path: Path in zookeeper to fetch from
|
||||||
|
:param settings: Settings for executing request to ClickHouse over db.raw() method
|
||||||
|
:return: SQL Query
|
||||||
|
"""
|
||||||
|
return self._partition_operation_sql(database, 'FETCH', settings=settings, from_part=zookeeper_path)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_active(cls, database):
|
||||||
|
"""
|
||||||
|
Get all active parts
|
||||||
|
:param database: A database object to fetch data from.
|
||||||
|
:return: A list of SystemPart objects
|
||||||
|
"""
|
||||||
|
assert isinstance(database, Database), "database must be database.Database class instance"
|
||||||
|
field_names = ','.join([f[0] for f in cls._fields])
|
||||||
|
return database.select("SELECT %s FROM %s WHERE active AND database='%s'" %
|
||||||
|
(field_names, cls.table_name(), database.db_name), model_class=cls)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def all(cls, database):
|
||||||
|
"""
|
||||||
|
Gets all data from system.parts database
|
||||||
|
:param database:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
assert isinstance(database, Database), "database must be database.Database class instance"
|
||||||
|
field_names = ','.join([f[0] for f in cls._fields])
|
||||||
|
return database.select("SELECT %s FROM %s WHERE database='%s'" %
|
||||||
|
(field_names, cls.table_name(), database.db_name), model_class=cls)
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from infi.clickhouse_orm.database import Database
|
from infi.clickhouse_orm.database import Database, DatabaseException
|
||||||
from infi.clickhouse_orm.models import Model
|
from infi.clickhouse_orm.models import Model
|
||||||
from infi.clickhouse_orm.fields import *
|
from infi.clickhouse_orm.fields import *
|
||||||
from infi.clickhouse_orm.engines import *
|
from infi.clickhouse_orm.engines import *
|
||||||
|
@ -121,6 +121,25 @@ class DatabaseTestCase(unittest.TestCase):
|
||||||
for entry in data:
|
for entry in data:
|
||||||
yield Person(**entry)
|
yield Person(**entry)
|
||||||
|
|
||||||
|
def test_raw(self):
|
||||||
|
self._insert_and_check(self._sample_data(), len(data))
|
||||||
|
query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
|
||||||
|
results = self.database.raw(query)
|
||||||
|
self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n")
|
||||||
|
|
||||||
|
def test_insert_readonly(self):
|
||||||
|
m = ReadOnlyModel(name='readonly')
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.insert([m])
|
||||||
|
|
||||||
|
def test_create_readonly_table(self):
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.create_table(ReadOnlyModel)
|
||||||
|
|
||||||
|
def test_drop_readonly_table(self):
|
||||||
|
with self.assertRaises(DatabaseException):
|
||||||
|
self.database.drop_table(ReadOnlyModel)
|
||||||
|
|
||||||
|
|
||||||
class Person(Model):
|
class Person(Model):
|
||||||
|
|
||||||
|
@ -132,6 +151,13 @@ class Person(Model):
|
||||||
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
|
||||||
|
|
||||||
|
|
||||||
|
class ReadOnlyModel(Model):
|
||||||
|
readonly = True
|
||||||
|
|
||||||
|
name = StringField()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
data = [
|
data = [
|
||||||
{"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"},
|
{"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"},
|
||||||
{"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"},
|
{"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"},
|
||||||
|
|
69
tests/test_system_models.py
Normal file
69
tests/test_system_models.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
import unittest
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from infi.clickhouse_orm.database import Database
|
||||||
|
from infi.clickhouse_orm.engines import *
|
||||||
|
from infi.clickhouse_orm.fields import *
|
||||||
|
from infi.clickhouse_orm.models import Model
|
||||||
|
from infi.clickhouse_orm.system_models import SystemPart
|
||||||
|
|
||||||
|
|
||||||
|
class SystemPartTest(unittest.TestCase):
|
||||||
|
BACKUP_DIR = '/opt/clickhouse/shadow/'
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.database = Database('test-db')
|
||||||
|
self.database.create_table(TestTable)
|
||||||
|
self.database.insert([TestTable(date_field=date.today())])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.database.drop_database()
|
||||||
|
|
||||||
|
def _get_backups_count(self):
|
||||||
|
_, dirnames, _ = next(os.walk(self.BACKUP_DIR))
|
||||||
|
return len(dirnames)
|
||||||
|
|
||||||
|
def test_get_all(self):
|
||||||
|
parts = SystemPart.all(self.database)
|
||||||
|
self.assertEqual(len(list(parts)), 1)
|
||||||
|
|
||||||
|
def test_get_active(self):
|
||||||
|
parts = list(SystemPart.get_active(self.database))
|
||||||
|
self.assertEqual(len(parts), 1)
|
||||||
|
parts[0].detach(self.database)
|
||||||
|
self.assertEqual(len(list(SystemPart.get_active(self.database))), 0)
|
||||||
|
|
||||||
|
def test_attach_detach(self):
|
||||||
|
parts = list(SystemPart.get_active(self.database))
|
||||||
|
self.assertEqual(len(parts), 1)
|
||||||
|
parts[0].detach(self.database)
|
||||||
|
self.assertEqual(len(list(SystemPart.get_active(self.database))), 0)
|
||||||
|
parts[0].attach(self.database)
|
||||||
|
self.assertEqual(len(list(SystemPart.get_active(self.database))), 1)
|
||||||
|
|
||||||
|
def test_drop(self):
|
||||||
|
parts = list(SystemPart.get_active(self.database))
|
||||||
|
parts[0].drop(self.database)
|
||||||
|
self.assertEqual(len(list(SystemPart.get_active(self.database))), 0)
|
||||||
|
|
||||||
|
def test_freeze(self):
|
||||||
|
parts = list(SystemPart.all(self.database))
|
||||||
|
# There can be other backups in the folder
|
||||||
|
backups_count = self._get_backups_count()
|
||||||
|
parts[0].freeze(self.database)
|
||||||
|
backup_number = self._get_backups_count()
|
||||||
|
self.assertEqual(backup_number, backups_count + 1)
|
||||||
|
# Clean created backup
|
||||||
|
shutil.rmtree(self.BACKUP_DIR + '{0}'.format(backup_number))
|
||||||
|
|
||||||
|
def test_fetch(self):
|
||||||
|
# TODO Not tested, as I have no replication set
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestTable(Model):
|
||||||
|
date_field = DateField()
|
||||||
|
|
||||||
|
engine = MergeTree('date_field', ('date_field',))
|
Loading…
Reference in New Issue
Block a user