Django models and managers refactoring, but not tested

This commit is contained in:
M1ha 2018-11-14 15:18:06 +05:00
parent 4e340e6212
commit 0967614318
6 changed files with 187 additions and 124 deletions

View File

@ -312,7 +312,7 @@ class ClickHouseModel(InfiModel):
if len(model_ids) > 0: if len(model_ids) > 0:
if cls.sync_type == 'redis': if cls.sync_type == 'redis':
cls.django_model.register_clickhouse_operation('INSERT', *model_ids, database=(database or 'default')) cls.django_model.register_clickhouse_operations('INSERT', *model_ids, database=(database or 'default'))
else: # if self.sync_type == 'postgres' else: # if self.sync_type == 'postgres'
from utils.models import ClickHouseModelOperation from utils.models import ClickHouseModelOperation
ClickHouseModelOperation.objects.bulk_update_or_create([ ClickHouseModelOperation.objects.bulk_update_or_create([
@ -707,7 +707,7 @@ class ClickHouseCollapseModel(ClickHouseModel):
if len(model_ids) > 0: if len(model_ids) > 0:
if cls.sync_type == 'redis': if cls.sync_type == 'redis':
cls.django_model.register_clickhouse_operation('UPDATE', *list(model_ids), database=database) cls.django_model.register_clickhouse_operations('UPDATE', *list(model_ids), database=database)
else: # if self.sync_type == 'postgres' else: # if self.sync_type == 'postgres'
from utils.models import ClickHouseModelOperation from utils.models import ClickHouseModelOperation
ClickHouseModelOperation.objects.bulk_update_or_create([ ClickHouseModelOperation.objects.bulk_update_or_create([

View File

@ -16,7 +16,7 @@ DEFAULTS = {
'SYNC_STORAGE': 'django_clickhouse.storage.DBStorage', 'SYNC_STORAGE': 'django_clickhouse.storage.DBStorage',
'SYNC_DELAY': 5, 'SYNC_DELAY': 5,
'REDIS_CONFIG': None, 'REDIS_CONFIG': None,
'STATSD_PREFIX': 'clickhouse' 'STATSD_PREFIX': 'clickhouse',
} }

View File

@ -1,151 +1,187 @@
"""
This file contains base django model to be synced with ClickHouse.
It saves all operations to storage in order to write them to ClickHouse later.
"""
from typing import Optional, Any, List
import six
from django.db import transaction
from django.db.models.signals import post_save, post_delete from django.db.models.signals import post_save, post_delete
from django.dispatch import receiver from django.dispatch import receiver
from django.db.models import QuerySet as DjangoQuerySet, Manager as DjangoManager, Model as DjangoModel from django.db.models import QuerySet as DjangoQuerySet, Manager as DjangoManager, Model as DjangoModel
class ClickHouseDjangoModelQuerySet(DjangoQuerySet): from .configuration import config
from .storage import Storage
from .utils import lazy_class_import
try:
from django_pg_returning.manager import UpdateReturningMixin
except ImportError:
class UpdateReturningMixin:
pass
try:
from django_pg_bulk_update.manager import BulkUpdateManagerMixin
except ImportError:
class BulkUpdateManagerMixin:
pass
class ClickHouseSyncUpdateReturningQuerySetMixin(UpdateReturningMixin):
""" """
Переопределяет update, чтобы он сгенерировал данные для обновления ClickHouse This mixin adopts methods of django-pg-returning library
""" """
def __init__(self, *args, **kwargs): def _register_ops(self, result):
super(ClickHouseDjangoModelQuerySet, self).__init__(*args, **kwargs) pk_name = self.model._meta.pk.name
pk_list = result.values_list(pk_name, flat=True)
self.model.register_clickhouse_operations('update', *pk_list, using=self.db)
def update_returning(self, **updates):
result = super().update_returning(**updates)
self._register_ops(result)
return result
def delete_returning(self):
result = super().delete_returning()
self._register_ops(result)
return result
class ClickHouseSyncBulkUpdateManagerMixin(BulkUpdateManagerMixin):
def _update_returning_param(self, returning):
pk_name = self.model._meta.pk.name
if returning is None:
returning = pk_name
elif isinstance(returning, six.string_types):
returning = [pk_name, returning]
else:
returning = list(returning) + [pk_name]
return returning
def _register_ops(self, result):
pk_name = self.model._meta.pk.name
pk_list = [getattr(item, pk_name) for item in result]
self.model.register_clickhouse_operations('update', *pk_list, using=self.db)
def bulk_update(self, *args, **kwargs):
original_returning = kwargs.pop('returning', None)
kwargs['returning'] = self._update_returning_param(original_returning)
result = super().bulk_update(*args, **kwargs)
self._register_ops(result)
return result.count() if original_returning is None else result
def bulk_update_or_create(self, *args, **kwargs):
original_returning = kwargs.pop('returning', None)
kwargs['returning'] = self._update_returning_param(original_returning)
result = super().bulk_update_or_create(*args, **kwargs)
self._register_ops(result)
return result.count() if original_returning is None else result
class ClickHouseSyncQuerySetMixin:
def update(self, **kwargs): def update(self, **kwargs):
if self.model.clickhouse_sync_type == 'redis': if self.model.clickhouse_sync_type == 'redis':
pk_name = self.model._meta.pk.name pk_name = self.model._meta.pk.name
res = self.only(pk_name).update_returning(**kwargs).values_list(pk_name, flat=True) res = self.only(pk_name).update_returning(**kwargs).values_list(pk_name, flat=True)
self.model.register_clickhouse_operation('UPDATE', *res, database=(self._db or 'default')) self.model.register_clickhouse_operations('update', *res, usint=self.db)
return len(res) return len(res)
else: else:
return super(ClickHouseDjangoModelQuerySet, self).update(**kwargs) return super().update(**kwargs)
def update_returning(self, **updates):
result = super(ClickHouseDjangoModelQuerySet, self).update_returning(**updates)
if self.model.clickhouse_sync_type == 'redis':
pk_name = self.model._meta.pk.name
pk_list = result.values_list(pk_name, flat=True)
self.model.register_clickhouse_operation('UPDATE', *pk_list, database=(self._db or 'default'))
return result
def delete_returning(self):
result = super(ClickHouseDjangoModelQuerySet, self).delete_returning()
if self.model.clickhouse_sync_type == 'redis':
pk_name = self.model._meta.pk.name
pk_list = result.values_list(pk_name, flat=True)
self.model.register_clickhouse_operation('DELETE', *pk_list, database=(self._db or 'default'))
return result
class ClickHouseDjangoModelManager(DjangoManager):
def get_queryset(self):
"""
Инициализирует кастомный QuerySet
:return: BaseQuerySet модели
"""
return ClickHouseDjangoModelQuerySet(model=self.model, using=self._db)
def bulk_create(self, objs, batch_size=None): def bulk_create(self, objs, batch_size=None):
objs = super(ClickHouseDjangoModelManager, self).bulk_create(objs, batch_size=batch_size) objs = super().bulk_create(objs, batch_size=batch_size)
self.model.register_clickhouse_operation('INSERT', *[obj.pk for obj in objs], database=(self._db or 'default')) self.model.register_clickhouse_operations('insert', *[obj.pk for obj in objs], using=self.db)
return objs return objs
class ClickHouseDjangoModel(DjangoModel): class ClickHouseSyncModelMixin:
""" def get_queryset(self):
Определяет базовую абстрактную модель, синхронизируемую с кликхаусом return ClickHouseSyncModelQuerySet(model=self.model, using=self._db)
"""
# TODO PostgreSQL, используемый сейчас не поддерживает UPSERT. Эта функция появилась в PostgreSQL 9.5
# INSERT INTO "{clickhouse_update_table}" ("table", "model_id", "operation")
# VALUES (TG_TABLE_NAME, NEW.{pk_field_name}, TG_OP) ON CONFILICT DO NOTHING;
# DEPRECATED Пока не удаляю, вдруг все таки решим переписать
# Синхронизация через Postgres основана на триггерах, которые не работают меж шардами
CREATE_TRIGGER_SQL_TEMPLATE = """
CREATE OR REPLACE FUNCTION {table}_clickhouse_update() RETURNS TRIGGER AS ${table}_clickhouse_update$
BEGIN
INSERT INTO "{clickhouse_update_table}" ("table", "model_id", "operation", "database")
SELECT TG_TABLE_NAME, NEW.{pk_field_name}, TG_OP, 'default' WHERE NOT EXISTS (
SELECT id FROM "{clickhouse_update_table}" WHERE "table"=TG_TABLE_NAME AND "model_id"=NEW.{pk_field_name}
);
RETURN NEW;
END;
${table}_clickhouse_update$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS {table}_collapsing_model_update ON {table}; class ClickHouseSyncModelQuerySet(ClickHouseSyncQuerySetMixin, DjangoQuerySet):
CREATE TRIGGER {table}_collapsing_model_update AFTER INSERT OR UPDATE ON {table} pass
FOR EACH ROW EXECUTE PROCEDURE {table}_clickhouse_update();
class ClickHouseSyncModelManager(ClickHouseSyncModelMixin, DjangoManager):
pass
class ClickHouseSyncModel(DjangoModel):
""" """
Base model for syncing data. Each django model synced with data must inherit this
# DEPRECATED Пока не удаляю, вдруг все таки решим переписать
# Синхронизация через Postgres основана на триггерах, которые не работают меж шардами
DROP_TRIGGER_SQL_TEMPLATE = """
DROP TRIGGER IF EXISTS {table}_collapsing_model_update ON {table};
DROP FUNCTION IF EXISTS {table}_clickhouse_update();
""" """
_clickhouse_sync_models = []
clickhouse_sync_type = None objects = ClickHouseSyncModelManager()
objects = ClickHouseDjangoModelManager()
class Meta: class Meta:
abstract = True abstract = True
def __init__(self, *args, **kwargs): @classmethod
# Добавил, чтобы PyCharm не ругался на неопределенный __init__ def get_clickhouse_storage(cls): # type: () -> Storage
super().__init__(*args, **kwargs) """
Returns Storage instance to save clickhouse sync data to
:return:
"""
storage_cls = lazy_class_import(config.SYNC_STORAGE)
return storage_cls()
@classmethod @classmethod
def register_clickhouse_operation(cls, operation, *model_ids, database=None): def register_clickhouse_sync_model(cls, model_cls): # type: (Type[ClickHouseModel]) -> None
""" """
Добавляет в redis запись о том, что произошел Insert, update или delete модели Registers ClickHouse model to listen to this model updates
:param operation: Тип операции INSERT, UPDATE, DELETE :param model_cls: Model class to register
:param model_ids: Id элементов для регистрации
:param database: База данных, в которой лежит данное значение
:return: None :return: None
""" """
if cls.clickhouse_sync_type != 'redis': cls._clickhouse_sync_models.append(model_cls)
return
assert operation in {'INSERT', 'UPDATE', 'DELETE'}, 'operation must be one of [INSERT, UPDATE, DELETE]'
model_ids = get_parameter_pk_list(model_ids)
if len(model_ids) > 0:
key = 'clickhouse_sync:{database}:{table}:{operation}'.format(table=cls._meta.db_table, operation=operation,
database=(database or 'default'))
on_transaction_commit(settings.REDIS.sadd, args=[key] + model_ids)
@classmethod @classmethod
def get_trigger_sql(cls, drop=False, table=None): def get_clickhouse_sync_models(cls): # type: () -> List[ClickHouseModel]
""" """
Формирует SQL для создания или удаления триггера на обновление модели синхронизации с ClickHouse Returns all clickhouse models, listening to this class
:param drop: Если флаг указан, формирует SQL для удаления триггера. Иначе - для создания :return:
:return: Строка SQL
""" """
# DEPRECATED Пока не удаляю, вдруг все таки решим переписать return cls._clickhouse_sync_models
# Синхронизация через Postgres основана на триггерах, которые не работают меж шардами
raise Exception('This method is deprecated due to sharding released')
# table = table or cls._meta.db_table @classmethod
# from utils.models import ClickHouseModelOperation def register_clickhouse_operations(cls, operation, *model_pks, using=None):
# sql = cls.DROP_TRIGGER_SQL_TEMPLATE if drop else cls.CREATE_TRIGGER_SQL_TEMPLATE # type: (str, *Any, Optional[str]) -> None
# sql = sql.format(table=table, pk_field_name=cls._meta.pk.name, """
# clickhouse_update_table=ClickHouseModelOperation._meta.db_table) Registers model operation in storage
# return sql :param operation: Operation type - one of [insert, update, delete)
:param model_pks: Elements to import
:param using: Database alias registered instances are from
:return: None
"""
if len(model_pks) > 0:
storage = cls.get_clickhouse_storage()
def post_save(self, created, using=None): def _on_commit():
self.register_clickhouse_operation('INSERT' if created else 'UPDATE', self.pk, database=(using or 'default')) for model_cls in cls.get_clickhouse_sync_models():
storage.register_operations_wrapped(model_cls.get_import_key(), operation, *model_pks)
def post_delete(self, using=None): transaction.on_commit(_on_commit, using=using)
self.register_clickhouse_operation('DELETE', self.pk, database=(using or 'default'))
def post_save(self, created, using=None): # type: (bool, Optional[str]) -> None
self.register_clickhouse_operations('insert' if created else 'update', self.pk, using=using)
def post_delete(self, using=None): # type: (Optional[str]) -> None
self.register_clickhouse_operations('delete', self.pk, using=using)
@receiver(post_save) @receiver(post_save)
def post_save(sender, instance, **kwargs): def post_save(sender, instance, **kwargs):
if issubclass(sender, ClickHouseDjangoModel): if issubclass(sender, ClickHouseSyncModel):
instance.post_save(kwargs.get('created'), using=kwargs.get('using')) instance.post_save(kwargs.get('created', False), using=kwargs.get('using'))
@receiver(post_delete) @receiver(post_delete)
def post_delete(sender, instance, **kwargs): def post_delete(sender, instance, **kwargs):
if issubclass(sender, ClickHouseDjangoModel): if issubclass(sender, ClickHouseSyncModel):
instance.post_delete(using=kwargs.get('using')) instance.post_delete(using=kwargs.get('using'))

View File

@ -7,6 +7,7 @@ Important:
Storage should be able to restore current importing batch, if something goes wrong. Storage should be able to restore current importing batch, if something goes wrong.
""" """
import datetime import datetime
from itertools import chain
from typing import Any, Optional, List, Tuple, Iterable from typing import Any, Optional, List, Tuple, Iterable
from .exceptions import ConfigurationError from .exceptions import ConfigurationError
@ -81,7 +82,7 @@ class Storage:
""" """
raise NotImplemented() raise NotImplemented()
def register_operation(self, import_key, operation, pk): # type: (str, str, Any) -> None def register_operations(self, import_key, operation, *pks): # type: (str, str, *Iterable[Any]) -> None
""" """
Registers new incoming operation Registers new incoming operation
:param import_key: A key, returned by ClickHouseModel.get_import_key() method :param import_key: A key, returned by ClickHouseModel.get_import_key() method
@ -91,8 +92,8 @@ class Storage:
""" """
raise NotImplementedError() raise NotImplementedError()
def register_operation_wrapped(self, import_key, operation, pk): def register_operations_wrapped(self, import_key, operation, *pks):
# type: (str, str, Any) -> None # type: (str, str, *Iterable[Any]) -> None
""" """
This is a wrapper for register_operation method, checking main parameters. This is a wrapper for register_operation method, checking main parameters.
This method should be called from inner functions. This method should be called from inner functions.
@ -104,7 +105,7 @@ class Storage:
if operation not in {'insert', 'update', 'delete'}: if operation not in {'insert', 'update', 'delete'}:
raise ValueError('operation must be one of [insert, update, delete]') raise ValueError('operation must be one of [insert, update, delete]')
return self.register_operation(import_key, operation, pk) return self.register_operations(import_key, operation, *pks)
class RedisStorage(Storage): class RedisStorage(Storage):
@ -126,12 +127,14 @@ class RedisStorage(Storage):
from redis import StrictRedis from redis import StrictRedis
self._redis = StrictRedis(**config.REDIS_CONFIG) self._redis = StrictRedis(**config.REDIS_CONFIG)
def register_operation(self, import_key, operation, pk): def register_operations(self, import_key, operation, *pks):
key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key) key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
score = datetime.datetime.now().timestamp() score = datetime.datetime.now().timestamp()
# key, score, value items = chain(*((score, '%s:%s' % (operation, str(pk))) for pk in pks))
self._redis.zadd(key, score, '%s:%s' % (operation, str(pk)))
# key, score1, value1, score2, value2, ...
self._redis.zadd(key, *items)
def get_operations(self, import_key, count, **kwargs): def get_operations(self, import_key, count, **kwargs):
ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key) ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)

View File

@ -1,3 +1,8 @@
from typing import Union, Any
import six
from importlib import import_module
def get_clickhouse_tz_offset(): def get_clickhouse_tz_offset():
""" """
@ -31,3 +36,22 @@ def format_datetime(dt, timezone_offset=0, day_end=False):
# Если даты форматируются вручную, то сервер воспринимает их как локаль сервера. # Если даты форматируются вручную, то сервер воспринимает их как локаль сервера.
return (dt - datetime.timedelta(minutes=timezone_offset - get_clickhouse_tz_offset())).strftime("%Y-%m-%d %H:%M:%S") return (dt - datetime.timedelta(minutes=timezone_offset - get_clickhouse_tz_offset())).strftime("%Y-%m-%d %H:%M:%S")
def lazy_class_import(obj): # type: (Union[str, Any]) -> Any
"""
If string is given, imports object by given module path.
Otherwise returns the object
:param obj: A string class path or object to return
:return: Imported object
"""
if isinstance(obj, six.string_types):
module_name, obj_name = obj.rsplit('.', 1)
module = import_module(module_name)
try:
return getattr(module, obj_name)
except AttributeError:
raise ImportError('Invalid import path `%s`' % obj)
else:
return obj

View File

@ -15,9 +15,9 @@ class StorageTest(TestCase):
redis.delete(*keys) redis.delete(*keys)
def test_operation_pks(self): def test_operation_pks(self):
self.storage.register_operation_wrapped('test', 'insert', 100500) self.storage.register_operations_wrapped('test', 'insert', 100500)
self.storage.register_operation_wrapped('test', 'insert', 100501) self.storage.register_operations_wrapped('test', 'insert', 100501)
self.storage.register_operation_wrapped('test', 'insert', 100502) self.storage.register_operations_wrapped('test', 'insert', 100502)
self.assertListEqual([ self.assertListEqual([
('insert', '100500'), ('insert', '100500'),
('insert', '100501'), ('insert', '100501'),
@ -25,9 +25,9 @@ class StorageTest(TestCase):
], self.storage.get_operations('test', 10)) ], self.storage.get_operations('test', 10))
def test_operation_types(self): def test_operation_types(self):
self.storage.register_operation_wrapped('test', 'insert', 100500) self.storage.register_operations_wrapped('test', 'insert', 100500)
self.storage.register_operation_wrapped('test', 'update', 100500) self.storage.register_operations_wrapped('test', 'update', 100500)
self.storage.register_operation_wrapped('test', 'delete', 100500) self.storage.register_operations_wrapped('test', 'delete', 100500)
self.assertListEqual([ self.assertListEqual([
('insert', '100500'), ('insert', '100500'),
('update', '100500'), ('update', '100500'),
@ -35,9 +35,9 @@ class StorageTest(TestCase):
], self.storage.get_operations('test', 10)) ], self.storage.get_operations('test', 10))
def test_operation_import_keys(self): def test_operation_import_keys(self):
self.storage.register_operation_wrapped('test1', 'insert', 100500) self.storage.register_operations_wrapped('test1', 'insert', 100500)
self.storage.register_operation_wrapped('test2', 'insert', 100500) self.storage.register_operations_wrapped('test2', 'insert', 100500)
self.storage.register_operation_wrapped('test2', 'insert', 100501) self.storage.register_operations_wrapped('test2', 'insert', 100501)
self.assertListEqual([ self.assertListEqual([
('insert', '100500') ('insert', '100500')
], self.storage.get_operations('test1', 10)) ], self.storage.get_operations('test1', 10))
@ -51,11 +51,11 @@ class StorageTest(TestCase):
self.assertTupleEqual(tuple(str(i) for i in range(10)), self.storage.get_import_batch('test')) self.assertTupleEqual(tuple(str(i) for i in range(10)), self.storage.get_import_batch('test'))
def test_post_sync(self): def test_post_sync(self):
self.storage.register_operation_wrapped('test', 'insert', 100500) self.storage.register_operations_wrapped('test', 'insert', 100500)
self.storage.register_operation_wrapped('test', 'insert', 100501) self.storage.register_operations_wrapped('test', 'insert', 100501)
self.storage.get_operations('test', 10) self.storage.get_operations('test', 10)
self.storage.write_import_batch('test', [str(i) for i in range(10)]) self.storage.write_import_batch('test', [str(i) for i in range(10)])
self.storage.register_operation_wrapped('test', 'insert', 100502) self.storage.register_operations_wrapped('test', 'insert', 100502)
self.storage.post_sync('test') self.storage.post_sync('test')
self.assertListEqual([ self.assertListEqual([