Speed up init of multiple clickhouse models in a row

This commit is contained in:
M1ha 2019-01-23 15:43:46 +05:00
parent e8549b9509
commit 6db5f2b5bb
4 changed files with 44 additions and 9 deletions

View File

@ -4,7 +4,7 @@ This file defines base abstract models to inherit from
import datetime import datetime
from collections import defaultdict from collections import defaultdict
from itertools import chain from itertools import chain
from typing import List, Tuple, Iterable, Set, Any from typing import List, Tuple, Iterable, Set, Any, Dict
from django.db.models import Model as DjangoModel, QuerySet as DjangoQuerySet from django.db.models import Model as DjangoModel, QuerySet as DjangoQuerySet
from infi.clickhouse_orm.database import Database from infi.clickhouse_orm.database import Database
@ -19,7 +19,7 @@ from .exceptions import RedisLockTimeoutError
from .models import ClickHouseSyncModel from .models import ClickHouseSyncModel
from .query import QuerySet from .query import QuerySet
from .serializers import Django2ClickHouseModelSerializer from .serializers import Django2ClickHouseModelSerializer
from .utils import lazy_class_import, exec_multi_arg_func, exec_in_parallel from .utils import lazy_class_import, exec_multi_arg_func
class ClickHouseModelMeta(InfiModelBase): class ClickHouseModelMeta(InfiModelBase):
@ -58,6 +58,34 @@ class ClickHouseModel(with_metaclass(ClickHouseModelMeta, InfiModel)):
# This attribute is initialized in metaclass, as it must get model class as a parameter # This attribute is initialized in metaclass, as it must get model class as a parameter
objects = None # type: QuerySet objects = None # type: QuerySet
def __init__(self, **kwargs):
multi_init = kwargs.pop('__multi_init', False)
if multi_init:
pass
else:
super(ClickHouseModel, self).__init__(**kwargs)
@classmethod
def init_many(cls, kwargs_list): # type: (Iterable[Dict[str, Any]]) -> List['ClickHouseModel']
"""
Basic __init__ methods if not effective if we need to init 100k objects
:return: A list of inited classes
"""
# Assign default values
valid_field_names = set(cls._fields.keys())
result = []
for kwargs in kwargs_list:
invalid_fields = set(kwargs.keys()) - valid_field_names
if invalid_fields:
raise AttributeError('%s does not have a fields called %s' % (cls.__name__, ', '.join(invalid_fields)))
item = cls(__multi_init=True)
item.__dict__.update(cls._defaults)
item.__dict__.update(kwargs)
result.append(item)
return result
@classmethod @classmethod
def objects_in(cls, database): # type: (Database) -> QuerySet def objects_in(cls, database): # type: (Database) -> QuerySet
return QuerySet(cls, database) return QuerySet(cls, database)

View File

@ -26,7 +26,7 @@ class InsertOnlyEngineMixin:
:return: A list of model_cls objects :return: A list of model_cls objects
""" """
serializer = model_cls.get_django_model_serializer(writable=True) serializer = model_cls.get_django_model_serializer(writable=True)
return [serializer.serialize(obj) for obj in objects] return serializer.serialize_many(objects)
class MergeTree(InsertOnlyEngineMixin, infi_engines.MergeTree): class MergeTree(InsertOnlyEngineMixin, infi_engines.MergeTree):

View File

@ -1,4 +1,5 @@
from django.db.models import Model as DjangoModel from django.db.models import Model as DjangoModel
from typing import List, Iterable
from django_clickhouse.utils import model_to_dict from django_clickhouse.utils import model_to_dict
@ -13,17 +14,23 @@ class Django2ClickHouseModelSerializer:
self.exclude_serialize_fields = exclude_fields self.exclude_serialize_fields = exclude_fields
def serialize(self, obj): # type: (DjangoModel) -> 'ClickHouseModel' def _get_serialize_kwargs(self, obj):
data = model_to_dict(obj, fields=self.serialize_fields, exclude_fields=self.exclude_serialize_fields) data = model_to_dict(obj, fields=self.serialize_fields, exclude_fields=self.exclude_serialize_fields)
# Remove None values, they should be initialized as defaults # Remove None values, they should be initialized as defaults
params = {} result = {}
for key, value in data.items(): for key, value in data.items():
if value is None: if value is None:
pass pass
elif isinstance(value, bool): elif isinstance(value, bool):
params[key] = int(value) result[key] = int(value)
else: else:
params[key] = value result[key] = value
return self._model_cls(**params) return result
def serialize(self, obj): # type: (DjangoModel) -> 'ClickHouseModel'
return self._model_cls(**self._get_serialize_kwargs(obj))
def serialize_many(self, objs): # type: (Iterable[DjangoModel]) -> List['ClickHouseModel']
return self._model_cls.init_many((self._get_serialize_kwargs(obj) for obj in objs))

View File

@ -166,7 +166,7 @@ class KillTest(TransactionTestCase):
self.assertEqual(len(pg_data), len(ch_data)) self.assertEqual(len(pg_data), len(ch_data))
serializer = ClickHouseCollapseTestModel.get_django_model_serializer() serializer = ClickHouseCollapseTestModel.get_django_model_serializer()
self.assertListEqual(ch_data, [serializer.serialize(item) for item in pg_data]) self.assertListEqual(ch_data, serializer.serialize_many(pg_data))
@classmethod @classmethod
def sync_iteration(cls, kill=True): def sync_iteration(cls, kill=True):