diff --git a/README.md b/README.md index 8ecbd52..c2563ce 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,8 @@ Supports both synchronous and asynchronous ways to interact with the clickhouse | Build | [![Python 3.7 Tests](https://github.com/sswest/ch-orm/workflows/Python%203.7%20Tests/badge.svg)](https://github.com/sswest/ch-orm/actions?query=Python+3.7+Tests)[![Python 3.8 Tests](https://github.com/sswest/ch-orm/workflows/Python%203.8%20Tests/badge.svg)](https://github.com/sswest/ch-orm/actions?query=Python+3.8+Tests)[![Python 3.9 Tests](https://github.com/sswest/ch-orm/workflows/Python%203.9%20Tests/badge.svg)](https://github.com/sswest/ch-orm/actions?query=Python+3.9+Tests)[![Python 3.10 Tests](https://github.com/sswest/ch-orm/workflows/Python%203.10%20Tests/badge.svg)](https://github.com/sswest/ch-orm/actions?query=Python+3.10+Tests) | | ----------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| **Package** | [![PyPI](https://img.shields.io/pypi/v/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![PyPI version](https://img.shields.io/pypi/pyversions/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![PyPI Wheel](https://img.shields.io/pypi/wheel/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![Coverage Status](https://coveralls.io/repos/github/sswest/ch-orm/badge.svg?branch=master)](https://coveralls.io/github/sswest/ch-orm?branch=master)[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) - | -| **Docs** | [![Documentation](https://camo.githubusercontent.com/bbb44987324f9324879ccae8ff5ad5c30b7e8b37ccee7235841a9628772595fe/68747470733a2f2f72656164746865646f63732e6f72672f70726f6a656374732f73616e69632f62616467652f3f76657273696f6e3d6c6174657374)](https://sswest.github.io/ch-orm) | +| **Package** | [![PyPI](https://img.shields.io/pypi/v/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![PyPI version](https://img.shields.io/pypi/pyversions/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![PyPI Wheel](https://img.shields.io/pypi/wheel/ch-orm.svg)](https://pypi.python.org/pypi/ch-orm)[![Coverage Status](https://coveralls.io/repos/github/sswest/ch-orm/badge.svg?branch=master)](https://coveralls.io/github/sswest/ch-orm?branch=master)[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)| +| **Docs** | [![Documentation](https://camo.githubusercontent.com/bbb44987324f9324879ccae8ff5ad5c30b7e8b37ccee7235841a9628772595fe/68747470733a2f2f72656164746865646f63732e6f72672f70726f6a656374732f73616e69632f62616467652f3f76657273696f6e3d6c6174657374)](https://sswest.github.io/ch-orm)| Introduction ============ @@ -21,3 +20,4 @@ It allows you to define model classes whose instances can be written to the data This and other examples can be found in the `examples` folder. To learn more please visit the [documentation](https://sswest.github.io/ch-orm/). + diff --git a/docs/class_reference.md b/docs/class_reference.md index 174230b..90b1150 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -10,7 +10,7 @@ clickhouse_orm.database Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations. -#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, auto_create=True, timeout=60, verify_ssl_cert=True, log_statements=False) +#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, auto_create=True, timeout=60, verify_ssl_cert=True, log_statements=False, engine=Atomic()) Initializes a database instance. Unless it's readonly, the database will be @@ -26,6 +26,7 @@ created on the ClickHouse server if it does not already exist. - `timeout`: the connection timeout in seconds. - `verify_ssl_cert`: whether to verify the server's certificate when connecting via HTTPS. - `log_statements`: when True, all database statements are logged. +- `engine`: By default, ClickHouse uses the Atomic database engine. #### add_setting(name, value) @@ -180,6 +181,7 @@ created on the ClickHouse server if it does not already exist. - `timeout`: the connection timeout in seconds. - `verify_ssl_cert`: whether to verify the server's certificate when connecting via HTTPS. - `log_statements`: when True, all database statements are logged. +- `engine`: By default, ClickHouse uses the Atomic database engine. #### add_setting(name, value) @@ -1241,30 +1243,95 @@ Extends Field clickhouse_orm.engines ---------------------- -### Engine +### DatabaseEngine + +### Atomic + +Extends DatabaseEngine + + +It supports non-blocking DROP TABLE and RENAME TABLE queries and atomic EXCHANGE TABLES queries. +Atomic database engine is used by default. + +### Lazy + +Extends DatabaseEngine + + +Keeps tables in RAM only expiration_time_in_seconds seconds after last access. +Can be used only with *Log tables. + +It’s optimized for storing many small *Log tables, +for which there is a long time interval between accesses. + +### MySQL + +Extends DatabaseEngine + + +Allows to connect to databases on a remote MySQL server and perform INSERT and +SELECT queries to exchange data between ClickHouse and MySQL. + +The MySQL database engine translate queries to the MySQL server so you can perform operations +such as SHOW TABLES or SHOW CREATE TABLE. + +### PostgreSQL + +Extends DatabaseEngine + + +Allows to connect to databases on a remote PostgreSQL server. Supports read and write operations + (SELECT and INSERT queries) to exchange data between ClickHouse and PostgreSQL. + +Gives the real-time access to table list and table structure from +remote PostgreSQL with the help of SHOW TABLES and DESCRIBE TABLE queries. + +Supports table structure modifications (ALTER TABLE ... ADD|DROP COLUMN). +If use_table_cache parameter (see the Engine Parameters below) it set to 1, +the table structure is cached and not checked for being modified, +but can be updated with DETACH and ATTACH queries. + +### SQLite + +Extends DatabaseEngine + + +Allows to connect to SQLite database and perform INSERT and SELECT queries to +exchange data between ClickHouse and SQLite. + +#### SQLite(db_path) + + +- `db_path`: Path to a file with SQLite database. + + +clickhouse_orm.engines +---------------------- + +### TableEngine ### TinyLog -Extends Engine +Extends TableEngine ### Log -Extends Engine +Extends TableEngine ### Memory -Extends Engine +Extends TableEngine ### MergeTree -Extends Engine +Extends TableEngine #### MergeTree(date_col=None, order_by=(), sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None, partition_key=None, primary_key=None) ### Buffer -Extends Engine +Extends TableEngine Buffers the data to write in RAM, periodically flushing it to another table. @@ -1276,7 +1343,7 @@ Read more [here](https://clickhouse.tech/docs/en/engines/table-engines/special/b ### Merge -Extends Engine +Extends TableEngine The Merge engine (not to be confused with MergeTree) does not store data itself, @@ -1289,7 +1356,7 @@ https://clickhouse.tech/docs/en/engines/table-engines/special/merge/ ### Distributed -Extends Engine +Extends TableEngine The Distributed engine by itself does not store data, @@ -2208,7 +2275,7 @@ Initializer. #### floor(n=None) -#### formatDateTime(format, timezone=NO_VALUE) +#### formatDateTime(fmt, timezone=NO_VALUE) #### gcd(b) @@ -3234,7 +3301,7 @@ Initializer. #### toStartOfTenMinutes() -#### toStartOfWeek(mode=0) +#### toStartOfWeek() #### toStartOfYear() @@ -3602,7 +3669,8 @@ Returns: A list of SystemPart objects Gets active data from system.parts table - `database`: A database object to fetch data from. -- `conditions`: WHERE clause conditions. Database and active conditions are added automatically +- `conditions`: WHERE clause conditions. + Database and active conditions are added automatically Returns: A list of SystemPart objects diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index 37be8d3..eaad4b0 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -138,7 +138,8 @@ if __name__ == '__main__': module_doc([aio_database.AioDatabase]) module_doc([models.Model, models.BufferModel, models.MergeModel, models.DistributedModel, models.Constraint, models.Index]) module_doc(sorted([fields.Field] + all_subclasses(fields.Field), key=lambda x: x.__name__), False) - module_doc([engines.Engine] + all_subclasses(engines.Engine), False) + module_doc([engines.DatabaseEngine] + all_subclasses(engines.DatabaseEngine), False) + module_doc([engines.TableEngine] + all_subclasses(engines.TableEngine), False) module_doc([query.QuerySet, query.AggregateQuerySet, query.Q]) module_doc([funcs.F]) module_doc([system_models.SystemPart]) diff --git a/src/clickhouse_orm/aio/database.py b/src/clickhouse_orm/aio/database.py index be423b5..d6c64b2 100644 --- a/src/clickhouse_orm/aio/database.py +++ b/src/clickhouse_orm/aio/database.py @@ -80,7 +80,10 @@ class AioDatabase(Database): """ Creates the database on the ClickHouse server if it does not already exist. """ - await self._send("CREATE DATABASE IF NOT EXISTS `%s`" % self.db_name) + await self._send( + "CREATE DATABASE IF NOT EXISTS `%s` ENGINE = %s" + % (self.db_name, self.engine.create_database_sql()) + ) self.db_exists = True async def drop_database(self): diff --git a/src/clickhouse_orm/database.py b/src/clickhouse_orm/database.py index b7ab5e4..d907793 100644 --- a/src/clickhouse_orm/database.py +++ b/src/clickhouse_orm/database.py @@ -11,6 +11,7 @@ from typing import Optional, Generator, Union, Any import pytz import httpx +from .engines import DatabaseEngine, Atomic from .models import ModelBase, MODEL from .utils import parse_tsv, import_submodules from .session import ctx_session_id, ctx_session_timeout @@ -110,6 +111,7 @@ class Database: timeout=60, verify_ssl_cert=True, log_statements=False, + engine: DatabaseEngine = Atomic(), ): """ Initializes a database instance. Unless it's readonly, the database will be @@ -125,6 +127,7 @@ class Database: - `timeout`: the connection timeout in seconds. - `verify_ssl_cert`: whether to verify the server's certificate when connecting via HTTPS. - `log_statements`: when True, all database statements are logged. + - `engine`: By default, ClickHouse uses the Atomic database engine. """ self.db_name = db_name self.db_url = db_url @@ -132,6 +135,7 @@ class Database: self._readonly = readonly self.auto_create = auto_create self.timeout = timeout + self.engine = engine self.request_session = self._client_class(verify=verify_ssl_cert, timeout=timeout) if username: self.request_session.auth = (username, password or "") @@ -175,7 +179,10 @@ class Database: """ Creates the database on the ClickHouse server if it does not already exist. """ - self._send("CREATE DATABASE IF NOT EXISTS `%s`" % self.db_name) + self._send( + "CREATE DATABASE IF NOT EXISTS `%s` ENGINE = %s" + % (self.db_name, self.engine.create_database_sql()) + ) self.db_exists = True def drop_database(self): diff --git a/src/clickhouse_orm/engines.py b/src/clickhouse_orm/engines.py index 21ba6b4..27513f0 100644 --- a/src/clickhouse_orm/engines.py +++ b/src/clickhouse_orm/engines.py @@ -14,27 +14,153 @@ if TYPE_CHECKING: logger = logging.getLogger("clickhouse_orm") -class Engine: +class TableEngine: def create_table_sql(self, db: Database) -> str: raise NotImplementedError() # pragma: no cover -class TinyLog(Engine): +class DatabaseEngine: + def create_database_sql(self) -> str: + raise NotImplementedError() # pragma: no cover + + +class Atomic(DatabaseEngine): + """ + It supports non-blocking DROP TABLE and RENAME TABLE queries and atomic EXCHANGE TABLES queries. + Atomic database engine is used by default. + """ + + def create_database_sql(self) -> str: + return "Atomic" + + +class Lazy(DatabaseEngine): + """ + Keeps tables in RAM only expiration_time_in_seconds seconds after last access. + Can be used only with *Log tables. + + It’s optimized for storing many small *Log tables, + for which there is a long time interval between accesses. + """ + + def __int__(self, expiration_time_in_seconds: int): + self.expiration_time_in_seconds = expiration_time_in_seconds + + def create_database_sql(self) -> str: + return f"Lazy({self.expiration_time_in_seconds})" + + +class MySQL(DatabaseEngine): + """ + Allows to connect to databases on a remote MySQL server and perform INSERT and + SELECT queries to exchange data between ClickHouse and MySQL. + + The MySQL database engine translate queries to the MySQL server so you can perform operations + such as SHOW TABLES or SHOW CREATE TABLE. + """ + + def __int__(self, host: str, port: int, database: str, user: str, password: str): + """ + - `host`: MySQL server address. + - `port`: MySQL server port. + - `database`: Remote database name. + - `user`: MySQL user. + - `password`: User password. + """ + self.host_port = f"{host}:{port}" + self.database = database + self.user = user + self.password = password + + def create_database_sql(self) -> str: + return f"MySQL('{self.host_port}', '{self.database}', '{self.user}', '{self.password}')" + + +class PostgreSQL(DatabaseEngine): + """ + Allows to connect to databases on a remote PostgreSQL server. Supports read and write operations + (SELECT and INSERT queries) to exchange data between ClickHouse and PostgreSQL. + + Gives the real-time access to table list and table structure from + remote PostgreSQL with the help of SHOW TABLES and DESCRIBE TABLE queries. + + Supports table structure modifications (ALTER TABLE ... ADD|DROP COLUMN). + If use_table_cache parameter (see the Engine Parameters below) it set to 1, + the table structure is cached and not checked for being modified, + but can be updated with DETACH and ATTACH queries. + """ + + def __int__( + self, + host: str, + port: int, + database: str, + user: str, + password: str, + schema: str = None, + use_table_cache: int = None, + ): + """ + - `host`: PostgreSQL server address. + - `port`: PostgreSQL server port. + - `database`: Remote database name. + - `user`: PostgreSQL user. + - `password`: User password. + - `schema`: PostgreSQL schema. + - `use_table_cache`: Defines if the database table structure is cached or not. + """ + self.host_port = f"{host}:{port}" + self.database = database + self.user = user + self.password = password + self.schema = schema + self.use_table_cache = use_table_cache + self._params = [ + f"'{self.password}'", + f"'{self.database}'", + f"'{self.user}'", + f"'{self.password}'", + ] + if self.schema is not None: + self._params.append(f"'{self.schema}'") + if self.use_table_cache is not None: + self._params.append(str(self.use_table_cache)) + + def create_database_sql(self) -> str: + return f"PostgreSQL({','.join(self._params)})" + + +class SQLite(DatabaseEngine): + """ + Allows to connect to SQLite database and perform INSERT and SELECT queries to + exchange data between ClickHouse and SQLite. + """ + def __init__(self, db_path: str): + """ + - `db_path`: Path to a file with SQLite database. + """ + self.db_path = db_path + + def create_database_sql(self) -> str: + return f"SQLite('{self.db_path}')" + + +class TinyLog(TableEngine): def create_table_sql(self, db): return "TinyLog" -class Log(Engine): +class Log(TableEngine): def create_table_sql(self, db): return "Log" -class Memory(Engine): +class Memory(TableEngine): def create_table_sql(self, db): return "Memory" -class MergeTree(Engine): +class MergeTree(TableEngine): def __init__( self, date_col: Optional[str] = None, @@ -245,7 +371,7 @@ class ReplacingMergeTree(MergeTree): return params -class Buffer(Engine): +class Buffer(TableEngine): """ Buffers the data to write in RAM, periodically flushing it to another table. Must be used in conjuction with a `BufferModel`. @@ -289,7 +415,7 @@ class Buffer(Engine): return sql -class Merge(Engine): +class Merge(TableEngine): """ The Merge engine (not to be confused with MergeTree) does not store data itself, but allows reading from any number of other tables simultaneously. @@ -305,7 +431,7 @@ class Merge(Engine): return "Merge(`%s`, '%s')" % (db.db_name, self.table_regex) -class Distributed(Engine): +class Distributed(TableEngine): """ The Distributed engine by itself does not store data, but allows distributed query processing on multiple servers. @@ -360,4 +486,4 @@ class Distributed(Engine): # Expose only relevant classes in import * -__all__ = get_subclass_names(locals(), Engine) +__all__ = get_subclass_names(locals(), TableEngine) diff --git a/src/clickhouse_orm/funcs.py b/src/clickhouse_orm/funcs.py index 1f7da39..db7f7c7 100644 --- a/src/clickhouse_orm/funcs.py +++ b/src/clickhouse_orm/funcs.py @@ -5,6 +5,8 @@ from types import FunctionType from .utils import is_iterable, comma_join, NO_VALUE, arg_to_sql from .query import Cond, QuerySet +# pylint: disable=C0103,C0116 + def binary_operator(func): """ @@ -179,10 +181,10 @@ class FunctionOperatorsMixin: return F._not(self) def isIn(self, others): - return F._in(self, others) + return F._in(self, others) # pylint: disable=W0212 def isNotIn(self, others): - return F._notIn(self, others) + return F._notIn(self, others) # pylint: disable=W0212 class FMeta(type): @@ -215,7 +217,8 @@ class FMeta(type): @staticmethod def _add_func(cls, base_func, new_name, extra_args): """ - Adds a new func to the cls, based on the signature of the given base_func but with a new name. + Adds a new func to the cls, based on the signature of the given + base_func but with a new name. """ # Get the function's signature sig = signature(base_func) @@ -247,7 +250,7 @@ class FMeta(type): setattr(cls, new_name, new_func) -class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): +class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): # pylint: disable=R0904 """ Represents a database function call and its arguments. It doubles as a query condition when the function returns a boolean result. @@ -476,7 +479,7 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return F("toStartOfTenMinutes", d) @staticmethod - def toStartOfWeek(d, mode=0): + def toStartOfWeek(d): return F("toStartOfWeek", d) @staticmethod @@ -572,8 +575,8 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta): return F("timeSlots", start_time, F.toUInt32(duration)) @staticmethod - def formatDateTime(d, format, timezone=NO_VALUE): - return F("formatDateTime", d, format, timezone) + def formatDateTime(d, fmt, timezone=NO_VALUE): + return F("formatDateTime", d, fmt, timezone) @staticmethod def addDays(d, n, timezone=NO_VALUE): diff --git a/src/clickhouse_orm/migrations.py b/src/clickhouse_orm/migrations.py index 279bc47..a9359a6 100644 --- a/src/clickhouse_orm/migrations.py +++ b/src/clickhouse_orm/migrations.py @@ -1,9 +1,12 @@ +import re +import logging + from .models import Model, BufferModel from .fields import DateField, StringField from .engines import MergeTree from .utils import get_subclass_names -import logging +# pylint: disable=R0903 logger = logging.getLogger("migrations") @@ -93,13 +96,15 @@ class AlterTable(ModelOperation): self._alter_table(database, cmd) if is_regular_field: - # ALIAS and MATERIALIZED fields are not stored in the database, and raise DatabaseError + # ALIAS and MATERIALIZED fields are not stored in the database + # and raise DatabaseError # (no AFTER column). So we will skip them prev_name = name # Identify fields whose type was changed # The order of class attributes can be changed any time, so we can't count on it - # Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC, so we can't expect them to save + # Secondly, MATERIALIZED and ALIAS fields are always at the end of the DESC + # so we can't expect them to save # attribute position. Watch https://github.com/Infinidat/infi.clickhouse_orm/issues/47 model_fields = { name: field.get_sql(with_default_expression=False, db=database) @@ -159,7 +164,7 @@ class AlterConstraints(ModelOperation): logger.info(" Alter constraints for %s", self.table_name) existing = self._get_constraint_names(database) # Go over constraints in the model - for constraint in self.model_class._constraints.values(): + for constraint in self.model_class._constraints.values(): # pylint: disable=W0212 # Check if it's a new constraint if constraint.name not in existing: logger.info(" Add constraint %s", constraint.name) @@ -175,8 +180,6 @@ class AlterConstraints(ModelOperation): """ Returns a set containing the names of existing constraints in the table. """ - import re - table_def = database.raw("SHOW CREATE TABLE $db.`%s`" % self.table_name) matches = re.findall(r"\sCONSTRAINT\s+`?(.+?)`?\s+CHECK\s", table_def) return set(matches) @@ -204,7 +207,7 @@ class AlterIndexes(ModelOperation): existing = self._get_index_names(database) logger.info(existing) # Go over indexes in the model - for index in self.model_class._indexes.values(): + for index in self.model_class._indexes.values(): # pylint: disable=W0212 # Check if it's a new index if index.name not in existing: logger.info(" Add index %s", index.name) @@ -224,8 +227,6 @@ class AlterIndexes(ModelOperation): """ Returns a set containing the names of existing indexes in the table. """ - import re - table_def = database.raw("SHOW CREATE TABLE $db.`%s`" % self.table_name) matches = re.findall(r"\sINDEX\s+`?(.+?)`?\s+", table_def) return set(matches) diff --git a/src/clickhouse_orm/session.py b/src/clickhouse_orm/session.py index feb063c..1955ab5 100644 --- a/src/clickhouse_orm/session.py +++ b/src/clickhouse_orm/session.py @@ -1,3 +1,11 @@ +""" +ClickHouse sessions in the HTTP protocol. +To do this, you need to use a context manager to add the session_id parameter to the request. +By default, the session is terminated after 60 seconds of inactivity. + +https://clickhouse.com/docs/en/interfaces/http/ +""" + import uuid from typing import Optional from contextvars import ContextVar, Token @@ -7,6 +15,8 @@ ctx_session_timeout: ContextVar[float] = ContextVar("ck.session_timeout") class SessionContext: + """Session context manager""" + def __init__(self, session: str, timeout: float): self.session = session self.timeout = timeout @@ -24,5 +34,9 @@ class SessionContext: def in_session(session: Optional[str] = None, timeout: int = 60): + """ + Add a session_id for subsequent requests + You can use this context manager safely in coroutines or threads + """ session = session or str(uuid.uuid4()) return SessionContext(session, timeout) diff --git a/src/clickhouse_orm/system_models.py b/src/clickhouse_orm/system_models.py index d17635f..7efe9d8 100644 --- a/src/clickhouse_orm/system_models.py +++ b/src/clickhouse_orm/system_models.py @@ -30,7 +30,8 @@ class SystemPart(Model): partition = StringField() # Name of the partition, in the format YYYYMM. name = StringField() # Name of the part. - # This field is present in the docs (https://clickhouse.tech/docs/en/single/index.html#system-parts), + # This field is present in the docs + # https://clickhouse.tech/docs/en/single/index.html#system-parts # but is absent in ClickHouse (in version 1.1.54245) # replicated = UInt8Field() # Whether the part belongs to replicated data. @@ -44,7 +45,8 @@ class SystemPart(Model): bytes = UInt64Field() # Number of bytes when compressed. - # Time the directory with the part was modified. Usually corresponds to the part's creation time. + # Time the directory with the part was modified. + # Usually corresponds to the part's creation time. modification_time = DateTimeField() remove_time = ( DateTimeField() @@ -166,7 +168,8 @@ class SystemPart(Model): Gets active data from system.parts table - `database`: A database object to fetch data from. - - `conditions`: WHERE clause conditions. Database and active conditions are added automatically + - `conditions`: WHERE clause conditions. + Database and active conditions are added automatically Returns: A list of SystemPart objects """