diff --git a/docs/class_reference.md b/docs/class_reference.md new file mode 100644 index 0000000..0f4f57f --- /dev/null +++ b/docs/class_reference.md @@ -0,0 +1,594 @@ +Class Reference +=============== + +infi.clickhouse_orm.database +---------------------------- + +### Database + + +Database instances connect to a specific ClickHouse database for running queries, +inserting data and other operations. + +#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False) + + +Initializes a database instance. Unless it's readonly, the database will be +created on the ClickHouse server if it does not already exist. + +- `db_name`: name of the database to connect to. +- `db_url`: URL of the ClickHouse server. +- `username`: optional connection credentials. +- `password`: optional connection credentials. +- `readonly`: use a read-only connection. + + +#### count(model_class, conditions=None) + + +Counts the number of records in the model's table. + +- `model_class`: the model to count. +- `conditions`: optional SQL conditions (contents of the WHERE clause). + + +#### create_database() + + +Creates the database on the ClickHouse server if it does not already exist. + + +#### create_table(model_class) + + +Creates a table for the given model class, if it does not exist already. + + +#### drop_database() + + +Deletes the database on the ClickHouse server. + + +#### drop_table(model_class) + + +Drops the database table of the given model class, if it exists. + + +#### insert(model_instances, batch_size=1000) + + +Insert records into the database. + +- `model_instances`: any iterable containing instances of a single model class. +- `batch_size`: number of records to send per chunk (use a lower number if your records are very large). + + +#### migrate(migrations_package_name, up_to=9999) + + +Executes schema migrations. + +- `migrations_package_name` - fully qualified name of the Python package + containing the migrations. +- `up_to` - number of the last migration to apply. + + +#### paginate(model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None) + + +Selects records and returns a single page of model instances. + +- `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. +- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause). +- `page_num`: the page number (1-based), or -1 to get the last page. +- `page_size`: number of records to return per page. +- `conditions`: optional SQL conditions (contents of the WHERE clause). +- `settings`: query settings to send as HTTP GET parameters + +The result is a namedtuple containing `objects` (list), `number_of_objects`, +`pages_total`, `number` (of the current page), and `page_size`. + + +#### raw(query, settings=None, stream=False) + + +Performs a query and returns its output as text. + +- `query`: the SQL query to execute. +- `settings`: query settings to send as HTTP GET parameters +- `stream`: if true, the HTTP response from ClickHouse will be streamed. + + +#### select(query, model_class=None, settings=None) + + +Performs a query and returns a generator of model instances. + +- `query`: the SQL query to execute. +- `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. +- `settings`: query settings to send as HTTP GET parameters + + +### DatabaseException + +Extends Exception + + +Raised when a database operation fails. + +infi.clickhouse_orm.models +-------------------------- + +### Model + + +A base class for ORM models. Each model class represent a ClickHouse table. For example: + + class CPUStats(Model): + timestamp = DateTimeField() + cpu_id = UInt16Field() + cpu_percent = Float32Field() + engine = Memory() + +#### Model(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### Model.create_table_sql(db_name) + + +Returns the SQL command for creating a table for this model. + + +#### Model.drop_table_sql(db_name) + + +Returns the SQL command for deleting this model's table. + + +#### Model.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### Model.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### Model.table_name() + + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +### BufferModel + +Extends Model + +#### BufferModel(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### BufferModel.create_table_sql(db_name) + + +Returns the SQL command for creating a table for this model. + + +#### BufferModel.drop_table_sql(db_name) + + +Returns the SQL command for deleting this model's table. + + +#### BufferModel.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### BufferModel.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### BufferModel.table_name() + + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +infi.clickhouse_orm.fields +-------------------------- + +### Field + + +Abstract base class for all field types. + +#### Field(default=None, alias=None, materialized=None) + + +### StringField + +Extends Field + +#### StringField(default=None, alias=None, materialized=None) + + +### DateField + +Extends Field + +#### DateField(default=None, alias=None, materialized=None) + + +### DateTimeField + +Extends Field + +#### DateTimeField(default=None, alias=None, materialized=None) + + +### BaseIntField + +Extends Field + + +Abstract base class for all integer-type fields. + +#### BaseIntField(default=None, alias=None, materialized=None) + + +### BaseFloatField + +Extends Field + + +Abstract base class for all float-type fields. + +#### BaseFloatField(default=None, alias=None, materialized=None) + + +### BaseEnumField + +Extends Field + + +Abstract base class for all enum-type fields. + +#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None) + + +### ArrayField + +Extends Field + +#### ArrayField(inner_field, default=None, alias=None, materialized=None) + + +### FixedStringField + +Extends StringField + +#### FixedStringField(length, default=None, alias=None, materialized=None) + + +### UInt8Field + +Extends BaseIntField + +#### UInt8Field(default=None, alias=None, materialized=None) + + +### UInt16Field + +Extends BaseIntField + +#### UInt16Field(default=None, alias=None, materialized=None) + + +### UInt32Field + +Extends BaseIntField + +#### UInt32Field(default=None, alias=None, materialized=None) + + +### UInt64Field + +Extends BaseIntField + +#### UInt64Field(default=None, alias=None, materialized=None) + + +### Int8Field + +Extends BaseIntField + +#### Int8Field(default=None, alias=None, materialized=None) + + +### Int16Field + +Extends BaseIntField + +#### Int16Field(default=None, alias=None, materialized=None) + + +### Int32Field + +Extends BaseIntField + +#### Int32Field(default=None, alias=None, materialized=None) + + +### Int64Field + +Extends BaseIntField + +#### Int64Field(default=None, alias=None, materialized=None) + + +### Float32Field + +Extends BaseFloatField + +#### Float32Field(default=None, alias=None, materialized=None) + + +### Float64Field + +Extends BaseFloatField + +#### Float64Field(default=None, alias=None, materialized=None) + + +### Enum8Field + +Extends BaseEnumField + +#### Enum8Field(enum_cls, default=None, alias=None, materialized=None) + + +### Enum16Field + +Extends BaseEnumField + +#### Enum16Field(enum_cls, default=None, alias=None, materialized=None) + + +infi.clickhouse_orm.engines +--------------------------- + +### Engine + +### TinyLog + +Extends Engine + +### Log + +Extends Engine + +### Memory + +Extends Engine + +### MergeTree + +Extends Engine + +#### MergeTree(date_col, key_cols, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### Buffer + +Extends Engine + + +Buffers the data to write in RAM, periodically flushing it to another table. +Must be used in conjuction with a `BufferModel`. +Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). + +#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) + + +### CollapsingMergeTree + +Extends MergeTree + +#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### SummingMergeTree + +Extends MergeTree + +#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### ReplacingMergeTree + +Extends MergeTree + +#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +infi.clickhouse_orm.query +------------------------- + +### QuerySet + + +A queryset is an object that represents a database query using a specific `Model`. +It is lazy, meaning that it does not hit the database until you iterate over its +matching rows (model instances). + +#### QuerySet(model_cls, database) + + +Initializer. It is possible to create a queryset like this, but the standard +way is to use `MyModel.objects_in(database)`. + + +#### as_sql() + + +Returns the whole query as a SQL string. + + +#### conditions_as_sql() + + +Returns the contents of the query's `WHERE` clause as a string. + + +#### count() + + +Returns the number of matching model instances. + + +#### exclude(**kwargs) + + +Returns a new `QuerySet` instance that excludes all rows matching the conditions. + + +#### filter(**kwargs) + + +Returns a new `QuerySet` instance that includes only rows matching the conditions. + + +#### only(*field_names) + + +Returns a new `QuerySet` instance limited to the specified field names. +Useful when there are large fields that are not needed, +or for creating a subquery to use with an IN operator. + + +#### order_by(*field_names) + + +Returns a new `QuerySet` instance with the ordering changed. + + +#### order_by_as_sql() + + +Returns the contents of the query's `ORDER BY` clause as a string. + + diff --git a/docs/toc.md b/docs/toc.md index fa9dd70..ae05203 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -42,45 +42,45 @@ * [Contributing](contributing.md#contributing) - * [Class Reference](ref.md#class-reference) - * [infi.clickhouse_orm.database](ref.md#infi.clickhouse_orm.database) - * [Database](ref.md#database) - * [DatabaseException](ref.md#databaseexception) - * [infi.clickhouse_orm.models](ref.md#infi.clickhouse_orm.models) - * [Model](ref.md#model) - * [BufferModel](ref.md#buffermodel) - * [infi.clickhouse_orm.fields](ref.md#infi.clickhouse_orm.fields) - * [Field](ref.md#field) - * [StringField](ref.md#stringfield) - * [DateField](ref.md#datefield) - * [DateTimeField](ref.md#datetimefield) - * [BaseIntField](ref.md#baseintfield) - * [BaseFloatField](ref.md#basefloatfield) - * [BaseEnumField](ref.md#baseenumfield) - * [ArrayField](ref.md#arrayfield) - * [FixedStringField](ref.md#fixedstringfield) - * [UInt8Field](ref.md#uint8field) - * [UInt16Field](ref.md#uint16field) - * [UInt32Field](ref.md#uint32field) - * [UInt64Field](ref.md#uint64field) - * [Int8Field](ref.md#int8field) - * [Int16Field](ref.md#int16field) - * [Int32Field](ref.md#int32field) - * [Int64Field](ref.md#int64field) - * [Float32Field](ref.md#float32field) - * [Float64Field](ref.md#float64field) - * [Enum8Field](ref.md#enum8field) - * [Enum16Field](ref.md#enum16field) - * [infi.clickhouse_orm.engines](ref.md#infi.clickhouse_orm.engines) - * [Engine](ref.md#engine) - * [TinyLog](ref.md#tinylog) - * [Log](ref.md#log) - * [Memory](ref.md#memory) - * [MergeTree](ref.md#mergetree) - * [Buffer](ref.md#buffer) - * [CollapsingMergeTree](ref.md#collapsingmergetree) - * [SummingMergeTree](ref.md#summingmergetree) - * [ReplacingMergeTree](ref.md#replacingmergetree) - * [infi.clickhouse_orm.query](ref.md#infi.clickhouse_orm.query) - * [QuerySet](ref.md#queryset) + * [Class Reference](class_reference.md#class-reference) + * [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database) + * [Database](class_reference.md#database) + * [DatabaseException](class_reference.md#databaseexception) + * [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models) + * [Model](class_reference.md#model) + * [BufferModel](class_reference.md#buffermodel) + * [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields) + * [Field](class_reference.md#field) + * [StringField](class_reference.md#stringfield) + * [DateField](class_reference.md#datefield) + * [DateTimeField](class_reference.md#datetimefield) + * [BaseIntField](class_reference.md#baseintfield) + * [BaseFloatField](class_reference.md#basefloatfield) + * [BaseEnumField](class_reference.md#baseenumfield) + * [ArrayField](class_reference.md#arrayfield) + * [FixedStringField](class_reference.md#fixedstringfield) + * [UInt8Field](class_reference.md#uint8field) + * [UInt16Field](class_reference.md#uint16field) + * [UInt32Field](class_reference.md#uint32field) + * [UInt64Field](class_reference.md#uint64field) + * [Int8Field](class_reference.md#int8field) + * [Int16Field](class_reference.md#int16field) + * [Int32Field](class_reference.md#int32field) + * [Int64Field](class_reference.md#int64field) + * [Float32Field](class_reference.md#float32field) + * [Float64Field](class_reference.md#float64field) + * [Enum8Field](class_reference.md#enum8field) + * [Enum16Field](class_reference.md#enum16field) + * [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines) + * [Engine](class_reference.md#engine) + * [TinyLog](class_reference.md#tinylog) + * [Log](class_reference.md#log) + * [Memory](class_reference.md#memory) + * [MergeTree](class_reference.md#mergetree) + * [Buffer](class_reference.md#buffer) + * [CollapsingMergeTree](class_reference.md#collapsingmergetree) + * [SummingMergeTree](class_reference.md#summingmergetree) + * [ReplacingMergeTree](class_reference.md#replacingmergetree) + * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) + * [QuerySet](class_reference.md#queryset) diff --git a/scripts/README.md b/scripts/README.md index 5782dc9..aaf2f27 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,6 +4,7 @@ generate_toc ------------ Generates the table of contents (toc.md). Requires Pandoc. Usage: + cd docs ../scripts/generate_toc.sh @@ -22,6 +23,29 @@ Usage: ../scripts/docs2html.sh +generate_ref +------------ +Generates a class reference. +Usage: + + cd docs + ../bin/python ../scripts/generate_ref.py > class_reference.md + + +generate_all +------------ +Does everything: + + - Generates the class reference using generate_ref + - Generates the table of contents using generate_toc + - Converts to HTML for visual inspection using docs2html + +Usage: + + cd docs + ../scripts/generate_all.sh + + test_python3 ------------ Creates a Python 3 virtualenv, clones the project into it, and runs the tests. diff --git a/scripts/generate_all.sh b/scripts/generate_all.sh new file mode 100755 index 0000000..eabf65c --- /dev/null +++ b/scripts/generate_all.sh @@ -0,0 +1,8 @@ +# Class reference +../bin/python ../scripts/generate_ref.py > class_reference.md + +# Table of contents +../scripts/generate_toc.sh + +# Convert to HTML for visual inspection +../scripts/docs2html.sh diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index d2d731c..8d11249 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -63,10 +63,14 @@ def get_method_sig(method): def docstring(obj): - doc = (obj.__doc__ or '').strip() + doc = (obj.__doc__ or '').rstrip() if doc: - for line in doc.split('\n'): - print line.strip() + lines = doc.split('\n') + # Find the length of the whitespace prefix common to all non-empty lines + indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip()) + # Output the lines without the indentation + for line in lines: + print line[indentation:] print diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 32ca599..7ed82ce 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -14,4 +14,4 @@ generate_one "table_engines.md" generate_one "schema_migrations.md" generate_one "system_models.md" generate_one "contributing.md" -generate_one "ref.md" +generate_one "class_reference.md" diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 3ae9535..dd5c004 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -23,6 +23,10 @@ class DatabaseException(Exception): class Database(object): + ''' + Database instances connect to a specific ClickHouse database for running queries, + inserting data and other operations. + ''' def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False): ''' diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index 7a011af..9db37da 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -98,8 +98,10 @@ class ReplacingMergeTree(MergeTree): class Buffer(Engine): - """Here we define Buffer engine - Read more here https://clickhouse.yandex/reference_en.html#Buffer + """ + Buffers the data to write in RAM, periodically flushing it to another table. + Must be used in conjuction with a `BufferModel`. + Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). """ #Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index a06f515..1938103 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -71,7 +71,13 @@ class ModelBase(type): class Model(with_metaclass(ModelBase)): ''' - A base class for ORM models. + A base class for ORM models. Each model class represent a ClickHouse table. For example: + + class CPUStats(Model): + timestamp = DateTimeField() + cpu_id = UInt16Field() + cpu_percent = Float32Field() + engine = Memory() ''' engine = None diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index f79c2eb..95c4ea7 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -152,8 +152,17 @@ class Q(object): class QuerySet(object): + """ + A queryset is an object that represents a database query using a specific `Model`. + It is lazy, meaning that it does not hit the database until you iterate over its + matching rows (model instances). + """ def __init__(self, model_cls, database): + """ + Initializer. It is possible to create a queryset like this, but the standard + way is to use `MyModel.objects_in(database)`. + """ self._model_cls = model_cls self._database = database self._order_by = [f[0] for f in model_cls._fields] @@ -168,7 +177,7 @@ class QuerySet(object): def __bool__(self): """ - Return true if this queryset matches any rows. + Returns true if this queryset matches any rows. """ return bool(self.count()) @@ -180,7 +189,7 @@ class QuerySet(object): def as_sql(self): """ - Return the whole queryset as SQL. + Returns the whole query as a SQL string. """ fields = '*' if self._fields: @@ -190,7 +199,7 @@ class QuerySet(object): def order_by_as_sql(self): """ - Return the contents of the queryset's ORDER BY clause. + Returns the contents of the query's `ORDER BY` clause as a string. """ return ', '.join([ '%s DESC' % field[1:] if field[0] == '-' else field @@ -199,7 +208,7 @@ class QuerySet(object): def conditions_as_sql(self): """ - Return the contents of the queryset's WHERE clause. + Returns the contents of the query's `WHERE` clause as a string. """ if self._q: return ' AND '.join([q.to_sql(self._model_cls) for q in self._q]) @@ -214,7 +223,7 @@ class QuerySet(object): def order_by(self, *field_names): """ - Returns a new QuerySet instance with the ordering changed. + Returns a new `QuerySet` instance with the ordering changed. """ qs = copy(self) qs._order_by = field_names @@ -222,7 +231,7 @@ class QuerySet(object): def only(self, *field_names): """ - Limit the query to return only the specified field names. + Returns a new `QuerySet` instance limited to the specified field names. Useful when there are large fields that are not needed, or for creating a subquery to use with an IN operator. """ @@ -232,7 +241,7 @@ class QuerySet(object): def filter(self, **kwargs): """ - Returns a new QuerySet instance that includes only rows matching the conditions. + Returns a new `QuerySet` instance that includes only rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [Q(**kwargs)] @@ -240,7 +249,7 @@ class QuerySet(object): def exclude(self, **kwargs): """ - Returns a new QuerySet instance that excludes all rows matching the conditions. + Returns a new `QuerySet` instance that excludes all rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [~Q(**kwargs)]