mirror of
https://github.com/carrotquest/django-clickhouse.git
synced 2024-11-22 00:56:37 +03:00
1) Fixed SyncKillTest
2) Added some logs
This commit is contained in:
parent
b52bfaccae
commit
7e502ee8a8
|
@ -5,7 +5,6 @@ from typing import List, TypeVar, Type
|
|||
|
||||
from django.db.models import Model as DjangoModel
|
||||
from infi.clickhouse_orm import engines as infi_engines
|
||||
from infi.clickhouse_orm.database import Database
|
||||
from infi.clickhouse_orm.models import Model as InfiModel
|
||||
from statsd.defaults.django import statsd
|
||||
|
||||
|
@ -98,7 +97,7 @@ class CollapsingMergeTree(InsertOnlyEngineMixin, infi_engines.CollapsingMergeTre
|
|||
|
||||
def set_obj_sign(self, obj, sign): # type: (InfiModel, int) -> None
|
||||
"""
|
||||
Sets objects sign. By default gets attribute nmae from sign_col
|
||||
Sets objects sign. By default gets attribute name from sign_col
|
||||
:return: None
|
||||
"""
|
||||
setattr(obj, self.sign_col, sign)
|
||||
|
|
|
@ -113,7 +113,8 @@ class Storage:
|
|||
|
||||
statsd_key = "%s.sync.%s.queue" % (config.STATSD_PREFIX, import_key)
|
||||
statsd.gauge(statsd_key, len(pks), delta=True)
|
||||
logger.debug('django-clickhouse: registered %d items (%s) to storage' % (len(pks), import_key))
|
||||
logger.debug('django-clickhouse: registered %s on %d items (%s) to storage'
|
||||
% (operation, len(pks), import_key))
|
||||
|
||||
return self.register_operations(import_key, operation, *pks)
|
||||
|
||||
|
@ -147,7 +148,7 @@ class RedisStorage(Storage):
|
|||
2) CLICKHOUSE_REDIS_CONFIG parameter defined. This should be a dict of kwargs for redis.StrictRedis(**kwargs).
|
||||
"""
|
||||
REDIS_KEY_OPS_TEMPLATE = 'clickhouse_sync:operations:{import_key}'
|
||||
REDIS_KEY_TS_TEMPLATE = 'clickhouse_sync:timstamp:{import_key}'
|
||||
REDIS_KEY_RANK_TEMPLATE = 'clickhouse_sync:timstamp:{import_key}'
|
||||
REDIS_KEY_LOCK = 'clickhouse_sync:lock:{import_key}'
|
||||
REDIS_KEY_LOCK_PID = 'clickhouse_sync:lock_pid:{import_key}'
|
||||
REDIS_KEY_LAST_SYNC_TS = 'clickhouse_sync:last_sync:{import_key}'
|
||||
|
@ -180,8 +181,8 @@ class RedisStorage(Storage):
|
|||
if res:
|
||||
ops, scores = zip(*res)
|
||||
|
||||
ts_key = self.REDIS_KEY_TS_TEMPLATE.format(import_key=import_key)
|
||||
self._redis.set(ts_key, max(scores))
|
||||
rank_key = self.REDIS_KEY_RANK_TEMPLATE.format(import_key=import_key)
|
||||
self._redis.set(rank_key, len(ops) - 1)
|
||||
|
||||
return list(tuple(op.decode().split(':')) for op in ops)
|
||||
else:
|
||||
|
@ -210,7 +211,8 @@ class RedisStorage(Storage):
|
|||
# Let's check if pid exists
|
||||
pid = int(self._redis.get(lock_pid_key) or 0)
|
||||
if pid and not check_pid(pid):
|
||||
logger.warning('django-clickhouse: hard releasing lock "%s" locked by pid %d' % (import_key, pid))
|
||||
logger.warning('django-clickhouse: hard releasing lock "%s" locked by pid %d (process is dead)'
|
||||
% (import_key, pid))
|
||||
self._redis.delete(lock_pid_key)
|
||||
lock.hard_release()
|
||||
self.pre_sync(import_key, **kwargs)
|
||||
|
@ -218,12 +220,12 @@ class RedisStorage(Storage):
|
|||
raise
|
||||
|
||||
def post_sync(self, import_key, **kwargs):
|
||||
ts_key = self.REDIS_KEY_TS_TEMPLATE.format(import_key=import_key)
|
||||
rank_key = self.REDIS_KEY_RANK_TEMPLATE.format(import_key=import_key)
|
||||
ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
|
||||
|
||||
score = self._redis.get(ts_key)
|
||||
if score:
|
||||
res = self._redis.zremrangebyscore(ops_key, '-inf', float(score))
|
||||
top_rank = self._redis.get(rank_key)
|
||||
if top_rank:
|
||||
res = self._redis.zremrangebyrank(ops_key, 0, top_rank)
|
||||
batch_size = int(res)
|
||||
else:
|
||||
batch_size = 0
|
||||
|
@ -239,7 +241,7 @@ class RedisStorage(Storage):
|
|||
|
||||
def flush(self):
|
||||
key_tpls = [
|
||||
self.REDIS_KEY_TS_TEMPLATE.format(import_key='*'),
|
||||
self.REDIS_KEY_RANK_TEMPLATE.format(import_key='*'),
|
||||
self.REDIS_KEY_OPS_TEMPLATE.format(import_key='*'),
|
||||
self.REDIS_KEY_LOCK.format(import_key='*'),
|
||||
self.REDIS_KEY_LAST_SYNC_TS.format(import_key='*')
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import datetime
|
||||
import os
|
||||
from itertools import chain
|
||||
from typing import Union, Any, Optional, TypeVar, Set, Dict, Iterable
|
||||
from typing import Union, Any, Optional, TypeVar, Set, Dict, Iterable, Tuple, Iterator
|
||||
|
||||
import pytz
|
||||
import six
|
||||
|
@ -133,4 +133,28 @@ def check_pid(pid):
|
|||
except OSError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
def int_ranges(items: Iterable[int]) -> Iterator[Tuple[int, int]]:
|
||||
"""
|
||||
Finds continuous intervals in integer iterable.
|
||||
:param items: Items to search in
|
||||
:return: Iterator over Tuple[start, end]
|
||||
"""
|
||||
interval_start = None
|
||||
prev_item = None
|
||||
for item in sorted(items):
|
||||
if prev_item is None:
|
||||
interval_start = prev_item = item
|
||||
elif prev_item + 1 == item:
|
||||
prev_item = item
|
||||
else:
|
||||
interval = interval_start, prev_item
|
||||
interval_start = prev_item = item
|
||||
yield interval
|
||||
|
||||
if interval_start is None:
|
||||
raise StopIteration()
|
||||
else:
|
||||
yield interval_start, prev_item
|
||||
|
|
|
@ -8,6 +8,7 @@ from tests.models import TestModel
|
|||
class ClickHouseTestModel(ClickHouseModel):
|
||||
django_model = TestModel
|
||||
sync_delay = 2
|
||||
sync_enabled = True
|
||||
|
||||
id = fields.Int32Field()
|
||||
created_date = fields.DateField()
|
||||
|
@ -33,3 +34,5 @@ class ClickHouseCollapseTestModel(ClickHouseModel):
|
|||
class ClickHouseMultiTestModel(ClickHouseMultiModel):
|
||||
django_model = TestModel
|
||||
sub_models = [ClickHouseTestModel, ClickHouseCollapseTestModel]
|
||||
sync_delay = 2
|
||||
sync_enabled = True
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
import argparse
|
||||
|
@ -18,42 +19,53 @@ from django.db.models import F
|
|||
from tests.clickhouse_models import ClickHouseCollapseTestModel
|
||||
from tests.models import TestModel
|
||||
|
||||
logger = logging.getLogger('django-clickhouse')
|
||||
|
||||
|
||||
def create(batch_size=1000, test_time=60, period=1, **kwargs):
|
||||
for iteration in range(int(test_time / period)):
|
||||
TestModel.objects.db_manager('test_db').bulk_create([
|
||||
res = TestModel.objects.db_manager('test_db').bulk_create([
|
||||
TestModel(created_date='2018-01-01', value=iteration * batch_size + i) for i in range(batch_size)
|
||||
])
|
||||
logger.info('django-clickhouse: test created %d records' % len(res))
|
||||
sleep(period)
|
||||
|
||||
|
||||
def update(batch_size=1000, test_time=60, period=1, **kwargs):
|
||||
def update(batch_size=500, test_time=60, period=1, **kwargs):
|
||||
for iteration in range(int(test_time / period)):
|
||||
TestModel.objects.db_manager('test_db').filter(id__gte=iteration * batch_size).annotate(idmod10=F('id') % 10). \
|
||||
filter(idmod10=0).update(value=-1)
|
||||
updated = TestModel.objects.db_manager('test_db').\
|
||||
filter(value__gte=iteration * batch_size, value__lt=(iteration + 1) * batch_size).\
|
||||
annotate(valmod10=F('value') % 10).filter(valmod10=0).update(value=-1)
|
||||
logger.debug('django-clickhouse: test updated %d records' % updated)
|
||||
sleep(period)
|
||||
|
||||
|
||||
def delete(batch_size=1000, test_time=60, period=1, **kwargs):
|
||||
def delete(batch_size=500, test_time=60, period=1, **kwargs):
|
||||
for iteration in range(int(test_time / period)):
|
||||
TestModel.objects.db_manager('test_db').filter(id__gte=iteration * batch_size).annotate(idmod10=F('id') % 10). \
|
||||
filter(idmod10=1).delete()
|
||||
deleted, _ = TestModel.objects.db_manager('test_db'). \
|
||||
filter(value__gte=iteration * batch_size, value__lt=(iteration + 1) * batch_size). \
|
||||
annotate(valmod10=F('value') % 10).filter(valmod10=1).delete()
|
||||
logger.debug('django-clickhouse: test deleted %d records' % deleted)
|
||||
sleep(period)
|
||||
|
||||
|
||||
def sync(period=1, test_time=60, **kwargs):
|
||||
start = datetime.datetime.now()
|
||||
while (datetime.datetime.now() - start).total_seconds() < test_time:
|
||||
if kwargs['once']:
|
||||
ClickHouseCollapseTestModel.sync_batch_from_storage()
|
||||
sleep(period)
|
||||
else:
|
||||
start = datetime.datetime.now()
|
||||
while (datetime.datetime.now() - start).total_seconds() < test_time:
|
||||
ClickHouseCollapseTestModel.sync_batch_from_storage()
|
||||
sleep(period)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('process', type=str, choices=('sync', 'create', 'update', 'delete'))
|
||||
parser.add_argument('--test-time', type=int, required=False, default=60)
|
||||
parser.add_argument('--batch-size', type=str, required=False, default=1000)
|
||||
parser.add_argument('--period', type=str, required=False, default=1)
|
||||
parser.add_argument('--batch-size', type=int, required=False, default=1000)
|
||||
parser.add_argument('--period', type=int, required=False, default=1)
|
||||
parser.add_argument('--once', type=bool, required=False, default=False)
|
||||
params = vars(parser.parse_args())
|
||||
|
||||
func_name = params['process']
|
||||
|
|
|
@ -12,6 +12,7 @@ class ClickHouseModelTest(TestCase):
|
|||
|
||||
def test_need_sync(self):
|
||||
# sync is disabled by default
|
||||
ClickHouseTestModel.sync_enabled = False
|
||||
self.assertFalse(ClickHouseTestModel.need_sync())
|
||||
|
||||
# There were no syncs. So it should be done
|
||||
|
|
|
@ -2,7 +2,7 @@ import datetime
|
|||
import logging
|
||||
from subprocess import Popen
|
||||
from time import sleep
|
||||
from unittest import expectedFailure
|
||||
from unittest import expectedFailure, skip
|
||||
|
||||
import os
|
||||
from django.test import TransactionTestCase
|
||||
|
@ -11,6 +11,7 @@ from random import randint
|
|||
|
||||
from django_clickhouse.database import connections
|
||||
from django_clickhouse.migrations import migrate_app
|
||||
from django_clickhouse.utils import int_ranges
|
||||
from tests.clickhouse_models import ClickHouseTestModel, ClickHouseCollapseTestModel, ClickHouseMultiTestModel
|
||||
from tests.models import TestModel
|
||||
|
||||
|
@ -101,7 +102,7 @@ class SyncTest(TransactionTestCase):
|
|||
|
||||
|
||||
class KillTest(TransactionTestCase):
|
||||
TEST_TIME = 30
|
||||
TEST_TIME = 60
|
||||
maxDiff = None
|
||||
|
||||
def setUp(self):
|
||||
|
@ -110,17 +111,39 @@ class KillTest(TransactionTestCase):
|
|||
connections['default'].create_database()
|
||||
migrate_app('tests', 'default')
|
||||
|
||||
# Disable sync for not interesting models
|
||||
ClickHouseMultiTestModel.sync_enabled = False
|
||||
ClickHouseTestModel.sync_enabled = False
|
||||
|
||||
def tearDown(self):
|
||||
# Disable sync for not interesting models
|
||||
ClickHouseMultiTestModel.sync_enabled = True
|
||||
ClickHouseTestModel.sync_enabled = True
|
||||
|
||||
def _check_data(self):
|
||||
logger.debug('django-clickhouse: syncing left test data')
|
||||
|
||||
# Sync all data that is not synced
|
||||
# Data is expected to be in test_db, not default. So we need to call subprocess
|
||||
# in order everything works correctly
|
||||
while ClickHouseCollapseTestModel.get_storage().operations_count(ClickHouseCollapseTestModel.get_import_key()):
|
||||
import_key = ClickHouseCollapseTestModel.get_import_key()
|
||||
storage = ClickHouseCollapseTestModel.get_storage()
|
||||
sync_left = storage.operations_count(import_key)
|
||||
while sync_left:
|
||||
logger.debug('django-clickhouse: final sync (%d left)' % sync_left)
|
||||
self.sync_iteration(False)
|
||||
sync_left = storage.operations_count(import_key)
|
||||
|
||||
ch_data = list(connections['default'].select('SELECT * FROM $table FINAL ORDER BY id',
|
||||
model_class=ClickHouseCollapseTestModel))
|
||||
pg_data = list(TestModel.objects.all().order_by('id'))
|
||||
|
||||
if len(pg_data) != len(ch_data):
|
||||
absent_ids = set(item.id for item in pg_data) - set(item.id for item in ch_data)
|
||||
logger.debug('django_clickhouse: absent ranges: %s (min: %d, max: %d)'
|
||||
% (','.join(('(%d, %d)' % r) for r in int_ranges(absent_ids)),
|
||||
min(item.id for item in pg_data), max(item.id for item in pg_data)))
|
||||
|
||||
self.assertEqual(len(pg_data), len(ch_data))
|
||||
serializer = ClickHouseCollapseTestModel.get_django_model_serializer()
|
||||
self.assertListEqual(ch_data, [serializer.serialize(item) for item in pg_data])
|
||||
|
@ -128,11 +151,15 @@ class KillTest(TransactionTestCase):
|
|||
@classmethod
|
||||
def sync_iteration(cls, kill=True):
|
||||
test_script = os.path.join(os.path.dirname(__file__), 'kill_test_sub_process.py')
|
||||
p_sync = Popen(['python3', test_script, 'sync', '--test-time', str(cls.TEST_TIME)])
|
||||
if kill:
|
||||
args = ['--test-time', str(cls.TEST_TIME)]
|
||||
else:
|
||||
args = ['--once', 'true']
|
||||
p_sync = Popen(['python3', test_script, 'sync'] + args)
|
||||
|
||||
if kill:
|
||||
sleep(randint(0, 5))
|
||||
logger.debug('django-clickhouse: killing: %d' % p_sync.pid)
|
||||
logger.debug('django-clickhouse: test killing: %d' % p_sync.pid)
|
||||
p_sync.kill()
|
||||
else:
|
||||
p_sync.wait()
|
||||
|
@ -140,7 +167,9 @@ class KillTest(TransactionTestCase):
|
|||
def test_kills(self):
|
||||
test_script = os.path.join(os.path.dirname(__file__), 'kill_test_sub_process.py')
|
||||
p_create = Popen(['python3', test_script, 'create', '--test-time', str(self.TEST_TIME)])
|
||||
p_update = Popen(['python3', test_script, 'update', '--test-time', str(self.TEST_TIME)])
|
||||
|
||||
# Updates must be slower than inserts, or they will do nothing
|
||||
p_update = Popen(['python3', test_script, 'update', '--test-time', str(self.TEST_TIME), '--batch-size', '500'])
|
||||
|
||||
start = now()
|
||||
while (now() - start).total_seconds() < self.TEST_TIME:
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
import datetime
|
||||
from unittest import TestCase
|
||||
|
||||
import pytz
|
||||
from django.test import TestCase
|
||||
|
||||
from django_clickhouse.models import ClickHouseSyncModel
|
||||
from django_clickhouse.utils import get_tz_offset, format_datetime, lazy_class_import
|
||||
from django_clickhouse.utils import get_tz_offset, format_datetime, lazy_class_import, int_ranges
|
||||
|
||||
|
||||
class GetTZOffsetTest(TestCase):
|
||||
|
@ -54,3 +54,16 @@ class TestLazyClassImport(TestCase):
|
|||
|
||||
def test_cls(self):
|
||||
self.assertEqual(ClickHouseSyncModel, lazy_class_import(ClickHouseSyncModel))
|
||||
|
||||
|
||||
class TestIntRanges(TestCase):
|
||||
def test_simple(self):
|
||||
self.assertListEqual([(1, 3), (5, 6), (8, 10)],
|
||||
list(int_ranges([1, 2, 3, 5, 6, 8, 9, 10])))
|
||||
|
||||
def test_empty(self):
|
||||
self.assertListEqual([], list(int_ranges([])))
|
||||
|
||||
def test_bounds(self):
|
||||
self.assertListEqual([(1, 1), (5, 6), (10, 10)],
|
||||
list(int_ranges([1, 5, 6, 10])))
|
||||
|
|
Loading…
Reference in New Issue
Block a user