From a6bea15b362b81c18db4f0b92c586e5619fd601f Mon Sep 17 00:00:00 2001 From: M1ha-Shvn Date: Wed, 31 Mar 2021 18:22:45 +0500 Subject: [PATCH] 1. Added debug logs for sync process 2. Micro refactoring --- src/django_clickhouse/clickhouse_models.py | 24 ++++++++++++++++++---- src/django_clickhouse/storages.py | 2 +- src/django_clickhouse/tasks.py | 5 +++-- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/django_clickhouse/clickhouse_models.py b/src/django_clickhouse/clickhouse_models.py index 343df18..ed2d7fa 100644 --- a/src/django_clickhouse/clickhouse_models.py +++ b/src/django_clickhouse/clickhouse_models.py @@ -236,7 +236,10 @@ class ClickHouseModel(with_metaclass(ClickHouseModelMeta, InfiModel)): with statsd.timer(statsd_key.format('steps.get_operations')): operations = storage.get_operations(import_key, cls.get_sync_batch_size()) - statsd.incr(statsd_key.format('operations'), len(operations)) + + statsd.incr(statsd_key.format('operations'), len(operations)) + logger.debug('django-clickhouse: got %d operations from storage (key: %s)' + % (len(operations), import_key)) if operations: with statsd.timer(statsd_key.format('steps.get_sync_objects')): @@ -245,13 +248,18 @@ class ClickHouseModel(with_metaclass(ClickHouseModelMeta, InfiModel)): import_objects = [] statsd.incr(statsd_key.format('import_objects'), len(import_objects)) + logger.debug('django-clickhouse: got %d objects to import from database (key: %s)' + % (len(import_objects), import_key)) if import_objects: with statsd.timer(statsd_key.format('steps.get_insert_batch')): # NOTE I don't use generator pattern here, as it move all time into insert. - # That makes hard to understand where real problem is in monitoring + # That makes hard to understand where real problem is in monitoring batch = tuple(cls.get_insert_batch(import_objects)) + logger.debug('django-clickhouse: formed %d ClickHouse objects to insert (key: %s)' + % (len(batch), import_key)) + with statsd.timer(statsd_key.format('steps.insert')): cls.insert_batch(batch) @@ -312,7 +320,10 @@ class ClickHouseMultiModel(ClickHouseModel): with statsd.timer(statsd_key.format('steps.get_operations')): operations = storage.get_operations(import_key, cls.get_sync_batch_size()) - statsd.incr(statsd_key.format('operations'), len(operations)) + + statsd.incr(statsd_key.format('operations'), len(operations)) + logger.debug('django-clickhouse: got %d operations from storage (key: %s)' + % (len(operations), import_key)) if operations: with statsd.timer(statsd_key.format('steps.get_sync_objects')): @@ -321,6 +332,8 @@ class ClickHouseMultiModel(ClickHouseModel): import_objects = [] statsd.incr(statsd_key.format('import_objects'), len(import_objects)) + logger.debug('django-clickhouse: got %d objects to import from database (key: %s)' + % (len(import_objects), import_key)) if import_objects: batches = {} @@ -331,7 +344,10 @@ class ClickHouseMultiModel(ClickHouseModel): # NOTE I don't use generator pattern here, as it move all time into insert. # That makes hard to understand where real problem is in monitoring batch = tuple(model_cls.get_insert_batch(import_objects)) - return model_cls, batch + + logger.debug('django-clickhouse: formed %d ClickHouse objects to insert' + ' (model_cls: %s, key: %s)' % (len(batch), model_cls.__name__, import_key)) + return model_cls, batch res = exec_multi_arg_func(_sub_model_func, cls.sub_models, threads_count=len(cls.sub_models)) batches = dict(res) diff --git a/src/django_clickhouse/storages.py b/src/django_clickhouse/storages.py index dcdfceb..43d972b 100644 --- a/src/django_clickhouse/storages.py +++ b/src/django_clickhouse/storages.py @@ -251,7 +251,7 @@ class RedisStorage(with_metaclass(SingletonMeta, Storage)): self.post_batch_removed(import_key, batch_size) self.get_lock(import_key, **kwargs).release() - logger.info('django-clickhouse: synced %d items (key: %s)' % (batch_size, import_key)) + logger.info('django-clickhouse: removed %d operations from storage (key: %s)' % (batch_size, import_key)) def post_sync_failed(self, import_key, **kwargs): # unblock lock after sync completed diff --git a/src/django_clickhouse/tasks.py b/src/django_clickhouse/tasks.py index 0d8120d..3609119 100644 --- a/src/django_clickhouse/tasks.py +++ b/src/django_clickhouse/tasks.py @@ -1,5 +1,6 @@ import datetime import importlib +from typing import Type, Union from celery import shared_task from django.conf import settings @@ -11,7 +12,7 @@ from .utils import get_subclasses, lazy_class_import @shared_task(queue=config.CELERY_QUEUE) -def sync_clickhouse_model(model_cls) -> None: +def sync_clickhouse_model(model_cls: Union[Type[ClickHouseModel], str]) -> None: """ Syncs one batch of given ClickHouseModel :param model_cls: ClickHouseModel subclass or python path to it @@ -25,7 +26,7 @@ def sync_clickhouse_model(model_cls) -> None: @shared_task(queue=config.CELERY_QUEUE) -def clickhouse_auto_sync(): +def clickhouse_auto_sync() -> None: """ Plans syncing models :return: None