From 7b3287cf5c3ab9eef16da2219c066b97a8a05007 Mon Sep 17 00:00:00 2001 From: M1hacka Date: Sun, 1 Dec 2019 13:33:43 +0500 Subject: [PATCH] Added some docs --- docs/index.md | 11 ++- docs/migrations.md | 2 + docs/models.md | 110 +++++++++++++++++++++ docs/queries.md | 4 + docs/routing.md | 62 ++++++++++++ docs/synchronization.md | 1 + docs/usage.md | 2 - src/django_clickhouse/clickhouse_models.py | 8 ++ src/django_clickhouse/migrations.py | 2 +- 9 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 docs/migrations.md create mode 100644 docs/models.md create mode 100644 docs/queries.md create mode 100644 docs/routing.md create mode 100644 docs/synchronization.md delete mode 100644 docs/usage.md diff --git a/docs/index.md b/docs/index.md index 016ea04..8afcfce 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,5 +6,12 @@ * [Requirements](basic_information.md#requirements) * [Installation](basic_information.md#installation) * Usage - * [Storages](storages.md) - * [RedisStorage](storages.md#redis_storage) \ No newline at end of file + * [Models](models.md) + * [DjangoModel](models.md#DjangoModel) + * [ClickHouseModel](models.md#ClickHouseModel) + * [Making queries](queries.md) + * [Database routing](routing.md) + * [Migrations](migrations.md) + * [Synchronization](synchronization.md) + * [Storages](storages.md) + * [RedisStorage](storages.md#redis_storage) diff --git a/docs/migrations.md b/docs/migrations.md new file mode 100644 index 0000000..08a8e4e --- /dev/null +++ b/docs/migrations.md @@ -0,0 +1,2 @@ +# Migrations +Migration system is based on diff --git a/docs/models.md b/docs/models.md new file mode 100644 index 0000000..fdbc105 --- /dev/null +++ b/docs/models.md @@ -0,0 +1,110 @@ +# Models +Model is a pythonic class representing database table in your code. + It also defined an interface (methods) to perform operations on this table + and describes its configuration inside framework. + +This library operates 2 kinds of models: +* Django model, describing tables in source relational model +* ClickHouseModel, describing models in [ClickHouse](https://clickhouse.yandex/docs/en) database + +In order to distinguish them, I will refer them as ClickHouseModel and DjangoModel in further documentation. + +## DjangoModel +Django provides a [model system](https://docs.djangoproject.com/en/2.2/topics/db/models/) + to interact with relational databases. + In order to perform [synchronization](synchronization.md) we need to "catch" all DML operations + on source django model and save information about it in [storage](storages.md). + To achieve this library introduces abstract `django_clickhouse.models.ClickHouseSyncModel` class. + Each model, inherited from `ClickHouseSyncModel` will automatically save information, needed to sync to storage. +Read [synchronization](synchronization.md) section for more info. + +`ClickHouseSyncModel` saves information about: +* `Model.objects.create()`, `Model.objects.bulk_create()` +* `Model.save()`, `Model.delete()` +* `QuerySet.update()`, `QuerySet.delete()` +* All queries of [django-pg-returning](https://pypi.org/project/django-pg-returning/) library +* All queries of [django-pg-bulk-update](https://pypi.org/project/django-pg-bulk-update/) library + +You can also combine your custom django manager and queryset using mixins from `django_clickhouse.models` package. + +**Important note**: Operations are saved in [transaction.on_commit()](https://docs.djangoproject.com/en/2.2/topics/db/transactions/#django.db.transaction.on_commit). + The goal is avoiding syncing operations, not committed to relational database. + But this may also provide bad effect: situation, when transaction is committed, + but it hasn't been registered, if something went wrong during registration. + +Example: +```python +from django_clickhouse.models import ClickHouseSyncModel +from django.db import models +from datetime import date + +class User(ClickHouseSyncModel): + first_name = models.CharField(max_length=50) + age = models.IntegerField() + birthday = models.DateField() + +# All operations will be registered to sync with ClickHouse models: +MyModel.objects.create(first_name='Alice', age=16, , birthday=date(2003, 6, 1)) +MyModel(first_name='Bob', age=17, birthday=date(2002, 1, 1)).save() +MyModel.objects.update(first_name='Candy') +``` + +## ClickHouseModel +This kind of model is based on [infi.clickhouse_orm Model](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/models_and_databases.md#defining-models) + and represents table in [ClickHouse database](https://clickhouse.yandex/docs/en). + +You should define `ClickHouseModel` subclass for each table you want to access and sync in ClickHouse. +Each model should be inherited from `django_clickhouse.clickhouse_models.ClickHouseModel`. +By default, models are searched in `clickhouse_models` module of each django app. +You can change modules name, using stting [CLICKHOUSE_MODELS_MODULE](configuration.md#models_module) + +You can read more about creating models and fields [here](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/models_and_databases.md#defining-models): + all capabilites are supported. At the same time, django-clickhouse libraries adds: +* [routing attributes and methods](routing.md) +* [sync attributes and methods](synchronization.md) + +Example: +```python +from django_clickhouse.clickhouse_models import ClickHouseModel +from django_clickhouse.engines import MergeTree +from infi.clickhouse_orm import fields + +class HeightData(ClickHouseModel): + django_model = User + + first_name = fields.StringField() + birthday = fields.DateField() + height = fields.Float32Field() + + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + + +class AgeData(ClickHouseModel): + django_model = User + + first_name = fields.StringField() + birthday = fields.DateField() + age = fields.IntegerField() + + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) +``` + +### ClickHouseMultiModel +In some cases you may need to sync single DjangoModel to multiple ClickHouse models. +This model gives ability to reduce number of relational database operations. +You can read more in [sync](synchronization.md) section. + +Example: +```python +from django_clickhouse.clickhouse_models import ClickHouseMultiModel + +class MyMultiModel(ClickHouseMultiModel): + django_model = User + sub_models = [AgeData, HeightData] +``` + +## Engines +Engine is a way of storing, indexing, replicating and sorting data in [ClickHouse](https://clickhouse.yandex/docs/en/operations/table_engines/). +Engine system is based on [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/table_engines.md#table-engines). +django-clickhouse extends original engine classes, as each engine can have it's own synchronization mechanics. +Engines are defined in `django_clickhouse.engines` module. diff --git a/docs/queries.md b/docs/queries.md new file mode 100644 index 0000000..d0178a8 --- /dev/null +++ b/docs/queries.md @@ -0,0 +1,4 @@ +# Making queries +Libraries query system extends [infi.clickhouse-orm](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/querysets.md). + +TODO diff --git a/docs/routing.md b/docs/routing.md new file mode 100644 index 0000000..69d399d --- /dev/null +++ b/docs/routing.md @@ -0,0 +1,62 @@ +# Database routing +One of this libraries goals was to create easy and extendable automatic database routing. + +## Motivation +In original [infi.clickhouse-orm](https://github.com/Infinidat/infi.clickhouse_orm) + you had to explicitly create [Database](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/models_and_databases.md#inserting-to-the-database) objects + and set database to each query with `objects_in(db)` method. + But common projects use a quite little number of database connections. + As a result, it's easier to setup routing once and use it as [django](https://docs.djangoproject.com/en/2.2/topics/db/multi-db/) does. +Unlike traditional relational databases, [ClickHouse](https://clickhouse.yandex/docs/en/) + has per table replication. + This means that: + 1) Each model can have it's own replication scheme + 2) Some migration queries are replicated automatically, others - not. + 3) To make system more extendable we need default routing, per model routing and router class for complex cases. + +## Introduction +All database connections are defined in [CLICKHOUSE_DATABASES](configuration.md#databases) setting. + Each connection has it's alias name to refer with. + If no routing is configured, [CLICKHOUSE_DEFAULT_DB_ALIAS](configuration.md#default_db_alias) is used. + +## Router +Router is a class, defining 3 methods: +* `def db_for_read(self, model: ClickHouseModel, **hints) -> str` + Returns `database alias` to use for given `model` for `SELECT` queries. +* `def db_for_write(self, model: ClickHouseModel, **hints) -> str` + Returns `database alias` to use for given `model` for `INSERT` queries. +* `def allow_migrate(self, db_alias: str, app_label: str, operation: Operation, model: Optional[ClickHouseModel] = None, **hints: dict) -> bool` + Checks if migration `operation` should be applied in django application `app_label` on database `db_alias`. + Optional `model` field can be used to determine migrations on concrete model. + +By default [CLICKHOUSE_DATABASE_ROUTER](configuration.md#database_router) is used. + It gets routing information from model fields, described below. + +## ClickHouseModel routing attributes +Default database router reads routing settings from model attributes. +```python +from django_clickhouse.configuration import config +from django_clickhouse.clickhouse_models import ClickHouseModel + +class MyModel(ClickHouseModel): + # Servers, model is replicated to. + # Router takes random database to read or write from. + read_db_aliases = (config.DEFAULT_DB_ALIAS,) + write_db_aliases = (config.DEFAULT_DB_ALIAS,) + + # Databases to perform replicated migration queries, such as ALTER TABLE. + # Migration is applied to random database from the list. + migrate_replicated_db_aliases = (config.DEFAULT_DB_ALIAS,) + + # Databases to perform non-replicated migrations (CREATE TABLE, DROP TABLE). + # Migration is applied to all databases from the list. + migrate_non_replicated_db_aliases = (config.DEFAULT_DB_ALIAS,) + ``` + +## Settings database in QuerySet +Database can be set in each [QuerySet](# TODO) explicitly by using one of methods: +* With [infi approach](https://github.com/Infinidat/infi.clickhouse_orm/blob/develop/docs/querysets.md#querysets): `MyModel.objects_in(db_object).filter(id__in=[1,2,3]).count()` +* With `using()` method: `MyModel.objects.filter(id__in=[1,2,3]).using(db_alias).count()` + +If no explicit database is provided, database connection to use is determined lazily with router's `db_for_read` or `db_for_write` + method, depending on query type. \ No newline at end of file diff --git a/docs/synchronization.md b/docs/synchronization.md new file mode 100644 index 0000000..ef0d0ee --- /dev/null +++ b/docs/synchronization.md @@ -0,0 +1 @@ +# Synchronization diff --git a/docs/usage.md b/docs/usage.md deleted file mode 100644 index 0ff38af..0000000 --- a/docs/usage.md +++ /dev/null @@ -1,2 +0,0 @@ -# Usage - diff --git a/src/django_clickhouse/clickhouse_models.py b/src/django_clickhouse/clickhouse_models.py index 1b34c8d..eaa47a1 100644 --- a/src/django_clickhouse/clickhouse_models.py +++ b/src/django_clickhouse/clickhouse_models.py @@ -48,9 +48,17 @@ class ClickHouseModel(with_metaclass(ClickHouseModelMeta, InfiModel)): django_model = None django_model_serializer = Django2ClickHouseModelSerializer + # Servers, model is replicated to. + # Router takes random database to read or write from. read_db_aliases = (config.DEFAULT_DB_ALIAS,) write_db_aliases = (config.DEFAULT_DB_ALIAS,) + + # Databases to perform replicated migration queries, such as ALTER TABLE. + # Migration is applied to random database from the list. migrate_replicated_db_aliases = (config.DEFAULT_DB_ALIAS,) + + # Databases to perform non-replicated migrations (CREATE TABLE, DROP TABLE). + # Migration is applied to all databases from the list. migrate_non_replicated_db_aliases = (config.DEFAULT_DB_ALIAS,) sync_enabled = False diff --git a/src/django_clickhouse/migrations.py b/src/django_clickhouse/migrations.py index 5227baa..2f1d9e7 100644 --- a/src/django_clickhouse/migrations.py +++ b/src/django_clickhouse/migrations.py @@ -37,7 +37,7 @@ class Migration: model_class = getattr(op, 'model_class', None) hints = getattr(op, 'hints', {}) - if db_router.allow_migrate(db_alias, self.__module__, op, model=model_class, **hints): + if db_router.allow_migrate(db_alias, self.__module__, op, model_class, **hints): op.apply(database)