mirror of
https://github.com/carrotquest/django-clickhouse.git
synced 2024-11-23 09:33:47 +03:00
1) RedisStorage and tests for it
2) Some fixes
This commit is contained in:
parent
2a9567d27f
commit
bac333d03a
|
@ -5,3 +5,4 @@ typing
|
|||
psycopg2
|
||||
infi.clickhouse-orm
|
||||
celery
|
||||
statsd
|
23
runtests.py
Normal file
23
runtests.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
This suite runs tests in django environment. See:
|
||||
https://docs.djangoproject.com/en/1.11/topics/testing/advanced/#using-the-django-test-runner-to-test-reusable-applications
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
from django.conf import settings
|
||||
from django.test.utils import get_runner
|
||||
|
||||
if __name__ == "__main__":
|
||||
print('Django: ', django.VERSION)
|
||||
print('Python: ', sys.version)
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'tests.settings'
|
||||
django.setup()
|
||||
TestRunner = get_runner(settings)
|
||||
test_runner = TestRunner()
|
||||
failures = test_runner.run_tests(["tests"])
|
||||
sys.exit(bool(failures))
|
5
setup.cfg
Normal file
5
setup.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
[metadata]
|
||||
description-file = README.md
|
||||
|
||||
[bdist_wheel]
|
||||
universal = 1
|
27
setup.py
Normal file
27
setup.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
from setuptools import setup
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
requires = []
|
||||
with open('requirements.txt') as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip() # Remove spaces
|
||||
line = line.split('#')[0] # Remove comments
|
||||
if line: # Remove empty lines
|
||||
requires.append(line)
|
||||
|
||||
setup(
|
||||
name='django-clickhouse',
|
||||
version='0.0.1',
|
||||
packages=['django_clickhouse'],
|
||||
package_dir={'': 'src'},
|
||||
url='https://github.com/carrotquest/django-clickhouse',
|
||||
license='BSD 3-clause "New" or "Revised" License',
|
||||
author='Mikhail Shvein',
|
||||
author_email='work_shvein_mihail@mail.ru',
|
||||
description='Django extension to integrate with ClickHouse database',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
# requires=requires
|
||||
)
|
|
@ -14,7 +14,9 @@ DEFAULTS = {
|
|||
'DATABASES': {},
|
||||
'SYNC_BATCH_SIZE': 10000,
|
||||
'SYNC_STORAGE': 'django_clickhouse.storage.DBStorage',
|
||||
'SYNC_DELAY': 5
|
||||
'SYNC_DELAY': 5,
|
||||
'REDIS_CONFIG': None,
|
||||
'STATSD_PREFIX': 'clickhouse'
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
|
||||
from .configuration import PREFIX
|
||||
|
||||
class ClickHouseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConfigurationError(Exception):
|
||||
def __init__(self, param_name):
|
||||
param_name = PREFIX + param_name
|
||||
super(ConfigurationError, self).__init__("Config parameter '%s' is not set properly" % param_name)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from django.db.models.signals import post_save, post_delete
|
||||
from django.dispatch import receiver
|
||||
from django.db.models import QuerySet as DjangoQuerySet, Manager as DjangoManager, Model as DjangoModel
|
||||
|
||||
|
||||
class ClickHouseDjangoModelQuerySet(DjangoBaseQuerySet):
|
||||
class ClickHouseDjangoModelQuerySet(DjangoQuerySet):
|
||||
"""
|
||||
Переопределяет update, чтобы он сгенерировал данные для обновления ClickHouse
|
||||
"""
|
||||
|
@ -36,7 +36,7 @@ class ClickHouseDjangoModelQuerySet(DjangoBaseQuerySet):
|
|||
return result
|
||||
|
||||
|
||||
class ClickHouseDjangoModelManager(DjangoBaseManager):
|
||||
class ClickHouseDjangoModelManager(DjangoManager):
|
||||
def get_queryset(self):
|
||||
"""
|
||||
Инициализирует кастомный QuerySet
|
||||
|
@ -51,7 +51,7 @@ class ClickHouseDjangoModelManager(DjangoBaseManager):
|
|||
return objs
|
||||
|
||||
|
||||
class ClickHouseDjangoModel(DjangoBaseModel):
|
||||
class ClickHouseDjangoModel(DjangoModel):
|
||||
"""
|
||||
Определяет базовую абстрактную модель, синхронизируемую с кликхаусом
|
||||
"""
|
||||
|
|
|
@ -6,59 +6,159 @@ This data is periodically fetched from storage and applied to ClickHouse tables.
|
|||
Important:
|
||||
Storage should be able to restore current importing batch, if something goes wrong.
|
||||
"""
|
||||
import datetime
|
||||
from typing import Any, Optional, List, Tuple, Iterable
|
||||
|
||||
from .exceptions import ConfigurationError
|
||||
from .configuration import config
|
||||
|
||||
|
||||
class Storage:
|
||||
"""
|
||||
Base abstract storage class, defining interface for all storages.
|
||||
The storage work algorithm:
|
||||
1) pre_sync()
|
||||
2) get_import_batch(). If batch is present go to 5)
|
||||
3) If batch is None, call get_operations()
|
||||
4) Transform operations to batch and call write_import_batch()
|
||||
5) Import batch to ClickHouse
|
||||
6) call post_sync(). If succeeded, it should remove the batch and it's data from sync_queue.
|
||||
|
||||
def pre_sync(self): # type: () -> None
|
||||
If anything goes wrong before write_import_batch(), it is guaranteed that ClickHouse import hasn't been started yet,
|
||||
And we can repeat the procedure from the beginning.
|
||||
If anything goes wrong after write_import_batch(), we don't know it the part has been imported to ClickHouse.
|
||||
But ClickHouse is idempotent to duplicate inserts. So we can insert one batch twice correctly.
|
||||
"""
|
||||
|
||||
def pre_sync(self, import_key, **kwargs): # type: (str, **dict) -> None
|
||||
"""
|
||||
This method is called before import process starts
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param kwargs: Storage dependant arguments
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def post_sync(self, success): # type: (bool) -> None
|
||||
def post_sync(self, import_key, **kwargs): # type: (str, **dict) -> None
|
||||
"""
|
||||
This method is called after import process has finished.
|
||||
:param success: A flag, if process ended with success or error
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param kwargs: Storage dependant arguments
|
||||
:return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_sync_ids(self, **kwargs): # type(**dict) -> Tuple[Set[Any], Set[Any], Set[Any]]
|
||||
def get_import_batch(self, import_key, **kwargs):
|
||||
# type: (str, **dict) -> Optional[Tuple[str]]
|
||||
"""
|
||||
Must return 3 sets of ids: to insert, update and delete records.
|
||||
Method should be error safe - if something goes wrong, import data should not be lost
|
||||
Returns a saved batch for ClickHouse import or None, if it was not found
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param kwargs: Storage dependant arguments
|
||||
:return: 3 sets of primary keys
|
||||
:return: None, if no batch has been formed. A tuple strings, saved in write_import_batch() method.
|
||||
"""
|
||||
raise NotImplemented()
|
||||
|
||||
def write_import_batch(self, import_key, batch, **kwargs):
|
||||
# type: (str, Iterable[str], **dict) -> None
|
||||
"""
|
||||
Saves batch for ClickHouse import
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param batch: An iterable of strings to save as a batch
|
||||
:param kwargs: Storage dependant arguments
|
||||
:return: None
|
||||
"""
|
||||
raise NotImplemented()
|
||||
|
||||
def get_operations(self, import_key, count, **kwargs):
|
||||
# type: (str, int, **dict) -> List[Tuple[str, str]]
|
||||
"""
|
||||
Must return a list of operations on the model.
|
||||
Method should be error safe - if something goes wrong, import data should not be lost.
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param count: A batch size to get
|
||||
:param kwargs: Storage dependant arguments
|
||||
:return: A list of tuples (operation, pk) in incoming order.
|
||||
"""
|
||||
raise NotImplemented()
|
||||
|
||||
def register_operation(self, import_key, operation, pk): # type: (str, str, Any) -> None
|
||||
"""
|
||||
Registers new incoming operation
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param operation: One of insert, update, delete
|
||||
:param pk: Primary key to find records in main database. Should be string-serializable with str() method.
|
||||
:return: None
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def register_operation_wrapped(self, import_key, operation, pk):
|
||||
# type: (str, str, Any) -> None
|
||||
"""
|
||||
This is a wrapper for register_operation method, checking main parameters.
|
||||
This method should be called from inner functions.
|
||||
:param import_key: A key, returned by ClickHouseModel.get_import_key() method
|
||||
:param operation: One of insert, update, delete
|
||||
:param pk: Primary key to find records in main database. Should be string-serializable with str() method.
|
||||
:return: None
|
||||
"""
|
||||
if operation not in {'insert', 'update', 'delete'}:
|
||||
raise ValueError('operation must be one of [insert, update, delete]')
|
||||
|
||||
return self.register_operation(import_key, operation, pk)
|
||||
|
||||
|
||||
class RedisStorage(Storage):
|
||||
"""
|
||||
Fast in-memory storage made on bases of redis and redis-py library.
|
||||
Requires:
|
||||
1) REDIS database
|
||||
2) CLICKHOUSE_REDIS_CONFIG parameter defined. This should be a dict of kwargs for redis.StrictRedis(**kwargs).
|
||||
"""
|
||||
REDIS_KEY_OPS_TEMPLATE = 'clickhouse_sync:operations:{import_key}'
|
||||
REDIS_KEY_TS_TEMPLATE = 'clickhouse_sync:timstamp:{import_key}'
|
||||
REDIS_KEY_BATCH_TEMPLATE = 'clickhouse_sync:batch:{import_key}'
|
||||
|
||||
def __init__(self):
|
||||
# Create redis library connection. If redis is not connected properly errors should be raised
|
||||
if config.REDIS_CONFIG is None:
|
||||
raise ConfigurationError('REDIS_CONFIG')
|
||||
|
||||
from redis import StrictRedis
|
||||
self._redis = StrictRedis(**config.REDIS_CONFIG)
|
||||
|
||||
@classmethod
|
||||
def get_sync_ids(cls, **kwargs):
|
||||
# Шардинговый формат
|
||||
key = 'clickhouse_sync:{using}:{table}:{operation}'.format(table=cls.django_model._meta.db_table,
|
||||
operation='*', using=(using or 'default'))
|
||||
def register_operation(self, import_key, operation, pk):
|
||||
key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
|
||||
# Множества id для вставки, обновления, удаления
|
||||
insert_model_ids, update_model_ids, delete_model_ids = set(), set(), set()
|
||||
# key, score, value
|
||||
self._redis.zadd(key, datetime.datetime.now().timestamp(), '%s:%s' % (operation, str(pk)))
|
||||
|
||||
for key in settings.REDIS.keys(key):
|
||||
model_ids = settings.REDIS.pipeline().smembers(key).delete(key).execute()[0]
|
||||
model_ids = {int(mid) for mid in model_ids}
|
||||
def get_operations(self, import_key, count, **kwargs):
|
||||
ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
ops, scores = zip(*self._redis.zrangebyscore(ops_key, '-inf', datetime.datetime.now().timestamp(),
|
||||
start=0, num=count, withscores=True))
|
||||
|
||||
op = key.decode('utf-8').split(':')[-1]
|
||||
if op == 'INSERT':
|
||||
insert_model_ids = set(model_ids)
|
||||
elif op == 'UPDATE':
|
||||
update_model_ids = set(model_ids)
|
||||
else: # if op == 'DELETE'
|
||||
delete_model_ids = set(model_ids)
|
||||
ts_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
self._redis.set(ts_key, max(scores))
|
||||
|
||||
return insert_model_ids, update_model_ids, delete_model_ids
|
||||
return list(tuple(op.decode().split(':')) for op in ops)
|
||||
|
||||
def get_import_batch(self, import_key, **kwargs):
|
||||
batch_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
return tuple(item.decode() for item in self._redis.lrange(batch_key, 0, -1))
|
||||
|
||||
def write_import_batch(self, import_key, batch, **kwargs):
|
||||
batch_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
|
||||
# Elements are pushed to the head, so we need to invert batch in order to save correct order
|
||||
self._redis.lpush(batch_key, *reversed(batch))
|
||||
|
||||
def post_sync(self, import_key, **kwargs):
|
||||
ts_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
batch_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
|
||||
score = float(self._redis.pipeline().get(ts_key))
|
||||
self._redis.pipeline()\
|
||||
.zremrangebyscore(ops_key, '-inf', score)\
|
||||
.delete(batch_key)\
|
||||
.execute()
|
||||
|
|
|
@ -38,8 +38,14 @@ LOGGING = {
|
|||
}
|
||||
|
||||
INSTALLED_APPS = [
|
||||
"src.django_clickhouse",
|
||||
"src",
|
||||
"tests"
|
||||
]
|
||||
|
||||
CLICKHOUSE_BATCH_SIZE = 5000
|
||||
CLICKHOUSE_SYNC_BATCH_SIZE = 5000
|
||||
CLICKHOUSE_REDIS_CONFIG = {
|
||||
'host': '127.0.0.1',
|
||||
'port': 6379,
|
||||
'db': 8,
|
||||
'socket_timeout': 10
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ class ConfigTest(TestCase):
|
|||
self.assertEqual(5, config.SYNC_DELAY)
|
||||
|
||||
def test_value(self):
|
||||
self.assertEqual(5000, config.BATCH_SIZE)
|
||||
self.assertEqual(5000, config.SYNC_BATCH_SIZE)
|
||||
|
||||
def test_not_lib_prop(self):
|
||||
with self.assertRaises(AttributeError):
|
||||
|
|
48
tests/test_storages.py
Normal file
48
tests/test_storages.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from django.test import TestCase
|
||||
|
||||
from django_clickhouse.storage import RedisStorage
|
||||
|
||||
|
||||
class StorageTest(TestCase):
|
||||
storage = RedisStorage()
|
||||
|
||||
def setUp(self):
|
||||
# Clean storage
|
||||
redis = self.storage._redis
|
||||
redis.delete(*redis.keys('clickhouse_sync*'))
|
||||
|
||||
def test_operation_pks(self):
|
||||
self.storage.register_operation_wrapped('test', 'insert', 100500)
|
||||
self.storage.register_operation_wrapped('test', 'insert', 100501)
|
||||
self.storage.register_operation_wrapped('test', 'insert', 100502)
|
||||
self.assertListEqual([
|
||||
('insert', '100500'),
|
||||
('insert', '100501'),
|
||||
('insert', '100502'),
|
||||
], self.storage.get_operations('test', 10))
|
||||
|
||||
def test_operation_types(self):
|
||||
self.storage.register_operation_wrapped('test', 'insert', 100500)
|
||||
self.storage.register_operation_wrapped('test', 'update', 100500)
|
||||
self.storage.register_operation_wrapped('test', 'delete', 100500)
|
||||
self.assertListEqual([
|
||||
('insert', '100500'),
|
||||
('update', '100500'),
|
||||
('delete', '100500'),
|
||||
], self.storage.get_operations('test', 10))
|
||||
|
||||
def test_operation_import_keys(self):
|
||||
self.storage.register_operation_wrapped('test1', 'insert', 100500)
|
||||
self.storage.register_operation_wrapped('test2', 'insert', 100500)
|
||||
self.storage.register_operation_wrapped('test2', 'insert', 100501)
|
||||
self.assertListEqual([
|
||||
('insert', '100500')
|
||||
], self.storage.get_operations('test1', 10))
|
||||
self.assertListEqual([
|
||||
('insert', '100500'),
|
||||
('insert', '100501'),
|
||||
], self.storage.get_operations('test2', 10))
|
||||
|
||||
def test_import_batch(self):
|
||||
self.storage.write_import_batch('test', [str(i) for i in range(10)])
|
||||
self.assertTupleEqual(tuple(str(i) for i in range(10)), self.storage.get_import_batch('test'))
|
Loading…
Reference in New Issue
Block a user