Compare commits

..

No commits in common. "master" and "v1.2.0" have entirely different histories.

19 changed files with 54 additions and 194 deletions

View File

@ -11,56 +11,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
postgres-version: ["9.6", "10", "11", "12", "13", "14", "15", "16"]
django-version: ["3.2", "4.0", "4.1", "4.2", "5.0", "5.1"]
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
postgres-version: ["9.6", "10", "11", "12", "13"]
django-version: ["2.1", "2.2", "3.0", "3.1", "3.2"]
clickhouse-version: ["latest"]
redis-version: ["latest"]
exclude:
# Django 4.0+ doesn't support PostgreSQL 9.6
- django-version: "4.0"
postgres-version: "9.6"
- django-version: "4.1"
postgres-version: "9.6"
- django-version: "4.2"
postgres-version: "9.6"
- django-version: "5.0"
postgres-version: "9.6"
- django-version: "5.1"
postgres-version: "9.6"
# Django 4.1+ doesn't support PostgreSQL 10
- django-version: "4.1"
postgres-version: "10"
- django-version: "4.2"
postgres-version: "10"
- django-version: "5.0"
postgres-version: "10"
- django-version: "5.1"
postgres-version: "10"
# Django 4.2+ doesn't support PostgreSQL 11
- django-version: "4.2"
postgres-version: "11"
- django-version: "5.0"
postgres-version: "11"
- django-version: "5.1"
postgres-version: "11"
# Django 5.1+ doesn't support PostgreSQL 12
- django-version: "5.1"
postgres-version: "12"
# Django 5.0+ does not support python 3.8, 3.9
- django-version: "5.0"
python-version: "3.8"
- django-version: "5.0"
python-version: "3.9"
- django-version: "5.1"
python-version: "3.8"
- django-version: "5.1"
python-version: "3.9"
services:
postgres:
image: postgres:${{ matrix.postgres-version }}

View File

@ -24,7 +24,7 @@ services:
build:
context: .
args:
- PYTHON_IMAGE_TAG=latest
- PYTHON_VER=latest
environment:
- REDIS_HOST=redis_db
- PGHOST=postgres_db

View File

@ -96,19 +96,6 @@ class ClickHouseUser(ClickHouseModel):
engine = MergeTree('birthday', ('birthday',))
```
**Important note**: `clickhouse_model.py` file is not anyhow imported by django initialization code. So if your models are not used anywhere excluding this file, you should import it somewhere in your code if you want synchroniztion working correctly. For instance, you can customise [AppConfig](https://docs.djangoproject.com/en/5.0/ref/applications/#django.apps.AppConfig.ready) like:
```python
from django.apps import AppConfig
class MyAppConfig(AppConfig):
name = 'my_app'
def ready(self):
from my_app.clickhouse_models import ClickHouseUser
```
## Migration to create table in ClickHouse
1. Read [migrations](migrations.md) section
2. Create `clickhouse_migrations` package in your django app
@ -125,7 +112,7 @@ class MyAppConfig(AppConfig):
4. Add content to file `0001_initial.py`:
```python
from django_clickhouse import migrations
from my_app.clickhouse_models import ClickHouseUser
from my_app.cilckhouse_models import ClickHouseUser
class Migration(migrations.Migration):
operations = [

View File

@ -25,17 +25,12 @@ Router is a class, defining 3 methods:
Returns `database alias` to use for given `model` for `SELECT` queries.
* `def db_for_write(self, model: ClickHouseModel, **hints) -> str`
Returns `database alias` to use for given `model` for `INSERT` queries.
* `def allow_migrate(self, db_alias: str, app_label: str, operation: Operation, **hints: dict) -> bool`
* `def allow_migrate(self, db_alias: str, app_label: str, operation: Operation, model: Optional[ClickHouseModel] = None, **hints: dict) -> bool`
Checks if migration `operation` should be applied in django application `app_label` on database `db_alias`.
Optional `hints` help to pass additional info which can be used to test migrations availability on concrete model.
Optional `model` field can be used to determine migrations on concrete model.
By default [CLICKHOUSE_DATABASE_ROUTER](configuration.md#clickhouse_database_router) is used.
It gets routing information from model fields, described below.
It also gives ability to use 2 kinds of hints:
* `force_migrate_on_databases: Iterable[str]` - concrete database aliases where migration should be applied
* `model: Type[ClickHouseModel]` - a model class, to read routing attributes from.
Can be set as class or its string name.
If name is set, class is searched in current `<app_label>.<config.MODELS_MODULE>` package.
## ClickHouseModel routing attributes
Default database router reads routing settings from model attributes.

View File

@ -13,7 +13,7 @@ with open('requirements.txt') as f:
setup(
name='django-clickhouse',
version='1.2.2',
version='1.2.0',
packages=['django_clickhouse', 'django_clickhouse.management.commands'],
package_dir={'': 'src'},
url='https://github.com/carrotquest/django-clickhouse',

View File

@ -9,7 +9,6 @@ from itertools import chain
from typing import List, Tuple, Iterable, Set, Any, Optional
from django.db.models import Model as DjangoModel, QuerySet as DjangoQuerySet
from django.utils.timezone import now
from infi.clickhouse_orm.engines import CollapsingMergeTree
from infi.clickhouse_orm.models import Model as InfiModel, ModelBase as InfiModelBase
from statsd.defaults.django import statsd
@ -291,7 +290,7 @@ class ClickHouseModel(InfiModel, metaclass=ClickHouseModelMeta):
res = (datetime.datetime.now() - last_sync_time).total_seconds() >= cls.get_sync_delay()
logger.debug('django-clickhouse: need_sync returned %s for class %s as no last sync found'
' (now: %s, last: %s, delay: %d)'
% (res, cls.__name__, now().isoformat(), last_sync_time.isoformat(),
% (res, cls.__name__, datetime.datetime.now().isoformat(), last_sync_time.isoformat(),
cls.get_sync_delay()))
return res

View File

@ -40,9 +40,10 @@ class Migration:
database = database or connections[db_alias]
for op in self.operations:
model_class = getattr(op, 'model_class', None)
hints = getattr(op, 'hints', {})
if db_router.allow_migrate(db_alias, self.__module__, op, **hints):
if db_router.allow_migrate(db_alias, self.__module__, op, model_class, **hints):
op.apply(database)

View File

@ -30,26 +30,24 @@ class DefaultRouter:
"""
return random.choice(model.write_db_aliases)
def allow_migrate(self, db_alias: str, app_label: str, operation: Operation, **hints) -> bool:
def allow_migrate(self, db_alias: str, app_label: str, operation: Operation,
model=None, **hints) -> bool:
"""
Checks if migration can be applied to given database
:param db_alias: Database alias to check
:param app_label: App from which migration is got
:param operation: Operation object to perform
:param model: Model migration is applied to
:param hints: Hints to make correct decision
:return: boolean
"""
if hints.get("force_migrate_on_databases", None):
return db_alias in hints["force_migrate_on_databases"]
model = hints.get('model') or getattr(operation, 'model_class', None)
if model is None:
raise ValueError('"model_class" attribute is not defined for operation "%s". '
'Please provide "force_migrate_on_databases" or "model" in hints.'
% operation.__class__.__name__)
if hints.get('model'):
model = '%s.%s.%s' % (app_label, config.MODELS_MODULE, hints['model']) \
if isinstance(hints['model'], str) else hints['model']
model = '%s.%s.%s' % (app_label, config.MODELS_MODULE, model) \
if isinstance(model, str) else model
model = lazy_class_import(model)
if operation.__class__ not in {CreateTable, DropTable}:

View File

@ -11,15 +11,12 @@ import logging
from typing import Any, Optional, List, Tuple
import os
from celery.utils.nodenames import gethostname
from django.utils.timezone import now
from statsd.defaults.django import statsd
from .configuration import config
from .exceptions import ConfigurationError, RedisLockTimeoutError
from .redis import redis_zadd
from .utils import check_pid_exists, get_subclasses, SingletonMeta
from .utils import check_pid, get_subclasses, SingletonMeta
logger = logging.getLogger('django-clickhouse')
@ -189,7 +186,8 @@ class RedisStorage(Storage, metaclass=SingletonMeta):
def get_operations(self, import_key, count, **kwargs):
ops_key = self.REDIS_KEY_OPS_TEMPLATE.format(import_key=import_key)
res = self._redis.zrangebyscore(ops_key, '-inf', now().timestamp(), start=0, num=count, withscores=True)
res = self._redis.zrangebyscore(ops_key, '-inf', datetime.datetime.now().timestamp(), start=0, num=count,
withscores=True)
if res:
ops, scores = zip(*res)
@ -216,31 +214,19 @@ class RedisStorage(Storage, metaclass=SingletonMeta):
# Block process to be single threaded. Default sync delay is 10 * default sync delay.
# It can be changed for model, by passing `lock_timeout` argument to pre_sync
lock = self.get_lock(import_key, **kwargs)
current_host_name = gethostname()
lock_pid_key = self.REDIS_KEY_LOCK_PID.format(import_key=import_key)
try:
lock.acquire()
self._redis.set(lock_pid_key, '%s:%s' % (current_host_name, os.getpid()))
self._redis.set(lock_pid_key, os.getpid())
except RedisLockTimeoutError:
statsd.incr('%s.sync.%s.lock.timeout' % (config.STATSD_PREFIX, import_key))
# Lock is busy. But If the process has been killed, I don't want to wait any more.
# I assume that lock has been killed if it works on the same host (other than localhost)
# and there is no process alive.
# I also assume that there are no hosts with same hostname other than localhost.
# Note: previously value contained only pid. Let's support old value for back compatibility
active_lock_data = self._redis.get(lock_pid_key).split(b":")
active_pid = int(active_lock_data[-1] or 0)
active_host_name = active_lock_data[0] \
if len(active_lock_data) > 1 and active_lock_data[0] != "localhost" else None
if (
active_pid and active_host_name
and active_host_name == current_host_name and not check_pid_exists(active_pid)
):
# Let's check if pid exists
pid = int(self._redis.get(lock_pid_key) or 0)
if pid and not check_pid(pid):
statsd.incr('%s.sync.%s.lock.hard_release' % (config.STATSD_PREFIX, import_key))
logger.warning('django-clickhouse: hard releasing lock "%s" locked by pid %d (process is dead)'
% (import_key, active_pid))
% (import_key, pid))
self._redis.delete(lock_pid_key)
lock.hard_release()
self.pre_sync(import_key, **kwargs)

View File

@ -127,7 +127,7 @@ def model_to_dict(instance: DjangoModel, fields: Optional[Iterable[str]] = None
return data
def check_pid_exists(pid):
def check_pid(pid):
"""
Check For the existence of a unix pid.
"""

View File

@ -5,7 +5,7 @@
"fields": {
"value": 100,
"created_date": "2018-01-01",
"created": "2018-01-01 00:00:00+0000"
"created": "2018-01-01 00:00:00"
}
},
{
@ -14,7 +14,7 @@
"fields": {
"value": 200,
"created_date": "2018-02-01",
"created": "2018-02-01 00:00:00+0000"
"created": "2018-02-01 00:00:00"
}
},
{
@ -23,7 +23,7 @@
"fields": {
"value": 300,
"created_date": "2018-03-01",
"created": "2018-03-01 00:00:00+0000"
"created": "2018-03-01 00:00:00"
}
},
{
@ -32,7 +32,7 @@
"fields": {
"value": 400,
"created_date": "2018-04-01",
"created": "2018-04-01 00:00:00+0000"
"created": "2018-04-01 00:00:00"
}
},
{
@ -41,7 +41,7 @@
"fields": {
"value": 500,
"created_date": "2018-05-01",
"created": "2018-05-01 00:00:00+0000"
"created": "2018-05-01 00:00:00"
}
}
]

View File

@ -5,7 +5,7 @@
"fields": {
"value": 100,
"created_date": "2018-01-01",
"created": "2018-02-01 00:00:00+0000"
"created": "2018-02-01 00:00:00"
}
},
{
@ -14,7 +14,7 @@
"fields": {
"value": 200,
"created_date": "2018-02-01",
"created": "2018-02-01 00:00:00+0000"
"created": "2018-02-01 00:00:00"
}
}
]

View File

@ -8,7 +8,6 @@ from time import sleep
import datetime
from django.utils.timezone import now
# set Django environment
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -26,7 +25,7 @@ logger = logging.getLogger('django-clickhouse')
def create(batch_size=1000, test_time=60, period=1, **kwargs):
for iteration in range(int(test_time / period)):
res = TestModel.objects.db_manager('test_db').bulk_create([
TestModel(created=now(), created_date='2018-01-01', value=iteration * batch_size + i)
TestModel(created=datetime.datetime.now(), created_date='2018-01-01', value=iteration * batch_size + i)
for i in range(batch_size)
])
logger.info('django-clickhouse: test created %d records' % len(res))
@ -55,8 +54,8 @@ def sync(period=1, test_time=60, **kwargs):
if kwargs['once']:
ClickHouseCollapseTestModel.sync_batch_from_storage()
else:
start = now()
while (now() - start).total_seconds() < test_time:
start = datetime.datetime.now()
while (datetime.datetime.now() - start).total_seconds() < test_time:
ClickHouseCollapseTestModel.sync_batch_from_storage()
sleep(period)

View File

@ -4,7 +4,6 @@ This file contains django settings to run tests with runtests.py
from os import environ
SECRET_KEY = 'fake-key'
USE_TZ = True
DATABASES = {
'default': {

View File

@ -1,7 +1,6 @@
import datetime
from django.test import TestCase
from django.utils.timezone import now
from tests.clickhouse_models import ClickHouseTestModel
@ -21,11 +20,11 @@ class ClickHouseModelTest(TestCase):
self.assertTrue(ClickHouseTestModel.need_sync())
# Time hasn't passed - no sync
self.storage.set_last_sync_time(ClickHouseTestModel.get_import_key(), now())
self.storage.set_last_sync_time(ClickHouseTestModel.get_import_key(), datetime.datetime.now())
self.assertFalse(ClickHouseTestModel.need_sync())
# Time has passed
sync_delay = ClickHouseTestModel.get_sync_delay()
self.storage.set_last_sync_time(ClickHouseTestModel.get_import_key(),
now() - datetime.timedelta(seconds=sync_delay + 1))
datetime.datetime.now() - datetime.timedelta(seconds=sync_delay + 1))
self.assertTrue(ClickHouseTestModel.need_sync())

View File

@ -13,7 +13,7 @@ from tests.clickhouse_models import ClickHouseTestModel
class NoMigrateRouter(DefaultRouter):
def allow_migrate(self, db_alias, app_label, operation, **hints):
def allow_migrate(self, db_alias, app_label, operation, model=None, **hints):
return False

View File

@ -40,7 +40,7 @@ class TestOperations(TransactionTestCase):
def test_save(self):
# INSERT operation
instance = self.django_model(created_date=datetime.date.today(), created=now(), value=2)
instance = self.django_model(created_date=datetime.date.today(), created=datetime.datetime.now(), value=2)
instance.save()
self.assertListEqual([('insert', "%s.%d" % (self.db_alias, instance.pk))],
self.storage.get_operations(self.clickhouse_model.get_import_key(), 10))
@ -52,13 +52,13 @@ class TestOperations(TransactionTestCase):
self.storage.get_operations(self.clickhouse_model.get_import_key(), 10))
def test_create(self):
instance = self.django_model.objects.create(pk=100555, created_date=datetime.date.today(), created=now(),
value=2)
instance = self.django_model.objects.create(pk=100555, created_date=datetime.date.today(),
created=datetime.datetime.now(), value=2)
self.assertListEqual([('insert', "%s.%d" % (self.db_alias, instance.pk))],
self.storage.get_operations(self.clickhouse_model.get_import_key(), 10))
def test_bulk_create(self):
items = [self.django_model(created_date=datetime.date.today(), created=now(), value=i)
items = [self.django_model(created_date=datetime.date.today(), created=datetime.datetime.now(), value=i)
for i in range(5)]
items = self.django_model.objects.bulk_create(items)
self.assertEqual(5, len(items))
@ -187,7 +187,7 @@ class TestOperations(TransactionTestCase):
def test_get_or_create(self):
instance, created = self.django_model.objects. \
get_or_create(pk=100, defaults={'created_date': datetime.date.today(), 'created': now(),
get_or_create(pk=100, defaults={'created_date': datetime.date.today(), 'created': datetime.datetime.now(),
'value': 2})
self.assertTrue(created)
@ -203,7 +203,8 @@ class TestOperations(TransactionTestCase):
def test_update_or_create(self):
instance, created = self.django_model.objects. \
update_or_create(pk=100, defaults={'created_date': datetime.date.today(), 'created': now(), 'value': 2})
update_or_create(pk=100, defaults={'created_date': datetime.date.today(),
'created': datetime.datetime.now(), 'value': 2})
self.assertTrue(created)
self.assertListEqual([('insert', "%s.%d" % (self.db_alias, instance.pk))],
self.storage.get_operations(self.clickhouse_model.get_import_key(), 10))
@ -228,7 +229,7 @@ class TestOperations(TransactionTestCase):
def test_bulk_create_returning(self):
items = [
self.django_model(created_date=datetime.date.today(), created=now(), value=i)
self.django_model(created_date=datetime.date.today(), created=datetime.datetime.now(), value=i)
for i in range(5)
]
items = self.django_model.objects.bulk_create_returning(items)
@ -259,7 +260,7 @@ class TestOperations(TransactionTestCase):
def test_save_returning(self):
# INSERT operation
instance = self.django_model(created_date=datetime.date.today(), created=now(), value=2)
instance = self.django_model(created_date=datetime.date.today(), created=datetime.datetime.now(), value=2)
instance.save_returning()
self.assertListEqual([('insert', "%s.%d" % (self.db_alias, instance.pk))],
self.storage.get_operations(self.clickhouse_model.get_import_key(), 10))
@ -283,7 +284,6 @@ class TestOperations(TransactionTestCase):
class TestSecondaryOperations(TestOperations):
# from django.db.models.fields import *
fixtures = ['test_secondary_model']
django_model = SecondaryTestModel
clickhouse_model = ClickHouseSecondTestModel

View File

@ -1,59 +0,0 @@
from django.test import SimpleTestCase
from django_clickhouse.migrations import RunSQL, CreateTable
from django_clickhouse.routers import DefaultRouter
from tests.clickhouse_models import ClickHouseTestModel
class DefaultRouterAllowMigrateTest(SimpleTestCase):
def setUp(self):
self.router = DefaultRouter()
self.operation = RunSQL('SELECT 1')
def test_hints_model_class(self):
hints = {'model': ClickHouseTestModel}
with self.subTest('Allow migrate'):
res = self.router.allow_migrate('default', 'tests', self.operation, **hints)
self.assertTrue(res)
with self.subTest('Reject migrate'):
res = self.router.allow_migrate('other', 'tests', self.operation, **hints)
self.assertFalse(res)
def test_hints_model_name(self):
hints = {'model': 'ClickHouseTestModel'}
with self.subTest('Allow migrate'):
res = self.router.allow_migrate('default', 'tests', self.operation, **hints)
self.assertTrue(res)
with self.subTest('Reject migrate'):
res = self.router.allow_migrate('other', 'tests', self.operation, **hints)
self.assertFalse(res)
def test_hints_force_migrate_on_databases(self):
hints = {'force_migrate_on_databases': ['secondary']}
with self.subTest('Allow migrate'):
res = self.router.allow_migrate('secondary', 'apps', self.operation, **hints)
self.assertTrue(res)
with self.subTest('Reject migrate'):
res = self.router.allow_migrate('default', 'apps', self.operation, **hints)
self.assertFalse(res)
def test_model_operation(self):
with self.subTest('Allow migrate'):
operation = CreateTable(ClickHouseTestModel)
res = self.router.allow_migrate('default', 'apps', operation)
self.assertTrue(res)
with self.subTest('Reject migrate'):
operation = CreateTable(ClickHouseTestModel)
res = self.router.allow_migrate('other', 'apps', operation)
self.assertFalse(res)
def test_no_model(self):
with self.assertRaises(ValueError):
self.router.allow_migrate('default', 'apps', self.operation)

View File

@ -30,7 +30,7 @@ class SyncTest(TransactionTestCase):
ClickHouseTestModel.get_storage().flush()
def test_simple(self):
obj = TestModel.objects.create(value=1, created=now(), created_date=datetime.date.today())
obj = TestModel.objects.create(value=1, created=datetime.datetime.now(), created_date=datetime.date.today())
ClickHouseTestModel.sync_batch_from_storage()
synced_data = list(ClickHouseTestModel.objects.all())
@ -40,7 +40,7 @@ class SyncTest(TransactionTestCase):
self.assertEqual(obj.id, synced_data[0].id)
def test_collapsing_update_by_final(self):
obj = TestModel.objects.create(value=1, created=now(), created_date=datetime.date.today())
obj = TestModel.objects.create(value=1, created=datetime.datetime.now(), created_date=datetime.date.today())
obj.value = 2
obj.save()
ClickHouseCollapseTestModel.sync_batch_from_storage()
@ -63,7 +63,7 @@ class SyncTest(TransactionTestCase):
def test_collapsing_update_by_version(self):
ClickHouseCollapseTestModel.engine.version_col = 'version'
obj = TestModel.objects.create(value=1, created=now(), created_date=datetime.date.today())
obj = TestModel.objects.create(value=1, created=datetime.datetime.now(), created_date=datetime.date.today())
obj.value = 2
obj.save()
ClickHouseCollapseTestModel.sync_batch_from_storage()
@ -97,7 +97,7 @@ class SyncTest(TransactionTestCase):
self.assertEqual(0, len(synced_data))
def test_multi_model(self):
obj = TestModel.objects.create(value=1, created=now(), created_date=datetime.date.today())
obj = TestModel.objects.create(value=1, created=datetime.datetime.now(), created_date=datetime.date.today())
obj.value = 2
obj.save()
ClickHouseMultiTestModel.sync_batch_from_storage()
@ -268,7 +268,7 @@ class ProfileTest(TransactionTestCase):
ClickHouseTestModel.sync_enabled = False
TestModel.objects.bulk_create([
TestModel(created=now(), created_date='2018-01-01', value=i)
TestModel(created=datetime.datetime.now(), created_date='2018-01-01', value=i)
for i in range(self.BATCH_SIZE)
])