""" This file contains wrappers for infi.clckhouse_orm engines to use in django-clickhouse """ from collections import defaultdict from itertools import chain from typing import List, Tuple, TypeVar, Type, Optional from django.db.models import Model as DjangoModel from infi.clickhouse_orm import engines as infi_engines from infi.clickhouse_orm.database import Database T = TypeVar('T') class InsertOnlyEngineMixin: def get_insert_batch(self, model_cls, database, objects): # type: (Type[T], Database, List[DjangoModel]) -> List[T] """ Gets a list of model_cls instances to insert into database :param model_cls: ClickHouseModel subclass to import :param database: infi.clickhouse_orm Database instance to sync data with :param objects: A list of django Model instances to sync :return: A list of model_cls objects """ serializer = model_cls.get_django_model_serializer() return [serializer.serialize(obj, model_cls) for obj in objects] class MergeTree(InsertOnlyEngineMixin, infi_engines.MergeTree): pass class CollapsingMergeTree(InsertOnlyEngineMixin, infi_engines.CollapsingMergeTree): def get_final_versions(self, model_cls, objects): """ Get objects, that are currently stored in ClickHouse. Depending on the partition key this can be different for different models. In common case, this method is optimized for date field that doesn't change. It also supposes primary key to by id :param model_cls: ClickHouseModel subclass to import :param objects: Objects for which final versions are searched :return: A list of """ min_date, max_date = None, None for obj in objects: obj_date = getattr(obj, self.date_col) if min_date is None or min_date > obj_date: min_date = obj_date if max_date is None or max_date < obj_date: max_date = obj_date obj_ids = [str(obj.id) for obj in objects] query = "SELECT * FROM $table FINAL WHERE `%s` >= '%s' AND `%s` <= '%s', id IN (%s)" \ % (self.date_col, min_date.isoformat(), self.date_col, max_date.isoformat(), ', '.join(obj_ids)) qs = model_cls.select(query, model_class=model_cls) return list(qs) def get_insert_batch(self, model_cls, database, objects): # type: (Type[T], Database, List[DjangoModel]) -> List[T] """ Gets a list of model_cls instances to insert into database :param model_cls: ClickHouseModel subclass to import :param database: infi.clickhouse_orm Database instance to sync data with :param objects: A list of django Model instances to sync :return: A list of model_cls objects """ new_objs = super(CollapsingMergeTree, self).get_insert_batch(model_cls, database, objects) old_objs = self.get_final_versions(model_cls, new_objs) for obj in old_objs: self.set_obj_sign(obj, -1) for obj in new_objs: self.set_obj_sign(obj, 1) return old_objs + new_objs def set_obj_sign(self, obj, sign): # type: (InfiModel, int) -> None """ Sets objects sign. By default gets attribute nmae from sign_col :return: None """ setattr(obj, self.sign_col, sign)