Merge branch 'develop' into develop

This commit is contained in:
M1ha Shvn 2017-08-22 13:33:01 +05:00 committed by GitHub
commit 0829936940
52 changed files with 3995 additions and 735 deletions

2
.gitignore vendored
View File

@ -57,3 +57,5 @@ buildout.in
src/infi/clickhouse_orm/__version__.py
bootstrap.py
htmldocs/

View File

@ -1,6 +1,44 @@
Change Log
==========
Unreleased
----------
- Fix python3 compatibility (TvoroG)
- Nullable arrays not supported in latest ClickHouse version
- system.parts table no longer includes "replicated" column in latest ClickHouse version
v0.9.5
------
- Added `QuerySet.paginate()`
- Support for basic aggregation in querysets
v0.9.4
------
- Migrations: when creating a table for a `BufferModel`, create the underlying table too if necessary
v0.9.3
------
- Changed license from PSF to BSD
- Nullable fields support (yamiou)
- Support for queryset slicing
v0.9.2
------
- Added `ne` and `not_in` queryset operators
- Querysets no longer have a default order unless `order_by` is called
- Added `autocreate` flag to database initializer
- Fix some Python 2/3 incompatibilities (TvoroG, tsionyx)
- To work around a JOIN bug in ClickHouse, `$table` now inserts only the table name,
and the database name is sent in the query params instead
v0.9.0
------
- Major new feature: building model queries using QuerySets
- Refactor and expand the documentation
- Add support for FixedString fields
- Add support for more engine types: TinyLog, Log, Memory
- Bug fix: Do not send readonly=1 when connection is already in readonly mode
v0.8.2
------
- Fix broken Python 3 support (M1hacka)

26
LICENSE Normal file
View File

@ -0,0 +1,26 @@
Copyright (c) 2017 INFINIDAT
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1
README
View File

@ -1 +0,0 @@
README.rst

57
README.md Normal file
View File

@ -0,0 +1,57 @@
Introduction
============
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/).
It allows you to define model classes whose instances can be written to the database and read from it.
Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class,
connect to the database and create a table for the model:
```python
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import Memory
class CPUStats(Model):
timestamp = DateTimeField()
cpu_id = UInt16Field()
cpu_percent = Float32Field()
engine = Memory()
db = Database('demo')
db.create_table(CPUStats)
```
Now we can collect usage statistics per CPU, and write them to the database:
```python
import psutil, time, datetime
psutil.cpu_percent(percpu=True) # first sample should be discarded
while True:
time.sleep(1)
stats = psutil.cpu_percent(percpu=True)
timestamp = datetime.datetime.now()
db.insert([
CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent)
for cpu_id, cpu_percent in enumerate(stats)
])
```
Querying the table is easy, using either the query builder or raw SQL:
```python
# Calculate what percentage of the time CPU 1 was over 95% busy
total = CPUStats.objects_in(db).filter(cpu_id=1).count()
busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count()
print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total)
# Calculate the average usage per CPU
for row in CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)'):
print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row)
```
To learn more please visit the [documentation](docs/toc.md).

View File

@ -1,435 +0,0 @@
Overview
========
This project is simple ORM for working with the `ClickHouse database <https://clickhouse.yandex/>`_.
It allows you to define model classes whose instances can be written to the database and read from it.
Installation
============
To install infi.clickhouse_orm::
pip install infi.clickhouse_orm
Usage
=====
Defining Models
---------------
Models are defined in a way reminiscent of Django's ORM::
from infi.clickhouse_orm import models, fields, engines
class Person(models.Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
height = fields.Float32Field()
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.).
Alternatively it is possible to pass alias or materialized parameters (see below for usage examples).
Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided.
See below for the supported field types and table engines.
Table Names
***********
The table name used for the model is its class name, converted to lowercase. To override the default name,
implement the ``table_name`` method::
class Person(models.Model):
...
@classmethod
def table_name(cls):
return 'people'
Using Models
------------
Once you have a model, you can create model instances::
>>> dan = Person(first_name='Dan', last_name='Schwartz')
>>> suzy = Person(first_name='Suzy', last_name='Jones')
>>> dan.first_name
u'Dan'
When values are assigned to model fields, they are immediately converted to their Pythonic data type.
In case the value is invalid, a ``ValueError`` is raised::
>>> suzy.birthday = '1980-01-17'
>>> suzy.birthday
datetime.date(1980, 1, 17)
>>> suzy.birthday = 0.5
ValueError: Invalid value for DateField - 0.5
>>> suzy.birthday = '1922-05-31'
ValueError: DateField out of range - 1922-05-31 is not between 1970-01-01 and 2038-01-19
Inserting to the Database
-------------------------
To write your instances to ClickHouse, you need a ``Database`` instance::
from infi.clickhouse_orm.database import Database
db = Database('my_test_db')
This automatically connects to http://localhost:8123 and creates a database called my_test_db, unless it already exists.
If necessary, you can specify a different database URL and optional credentials::
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
Using the ``Database`` instance you can create a table for your model, and insert instances to it::
db.create_table(Person)
db.insert([dan, suzy])
The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class.
Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot
modify data or schemas::
db = Database('my_test_db', readonly=True)
Reading from the Database
-------------------------
Loading model instances from the database is simple::
for person in db.select("SELECT * FROM my_test_db.person", model_class=Person):
print person.first_name, person.last_name
Do not include a ``FORMAT`` clause in the query, since the ORM automatically sets the format to ``TabSeparatedWithNamesAndTypes``.
It is possible to select only a subset of the columns, and the rest will receive their default values::
for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person):
print person.first_name
SQL Placeholders
****************
There are a couple of special placeholders that you can use inside the SQL to make it easier to write:
``$db`` and ``$table``. The first one is replaced by the database name, and the second is replaced by
the database name plus table name (but is available only when the model is specified).
So instead of this::
db.select("SELECT * FROM my_test_db.person", model_class=Person)
you can use::
db.select("SELECT * FROM $db.person", model_class=Person)
or even::
db.select("SELECT * FROM $table", model_class=Person)
Ad-Hoc Models
*************
Specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will
be defined based on the column names and types returned by the query::
for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"):
print row.max_height
This is a very convenient feature that saves you the need to define a model for each query, while still letting
you work with Pythonic column values and an elegant syntax.
Counting
--------
The ``Database`` class also supports counting records easily::
>>> db.count(Person)
117
>>> db.count(Person, conditions="height > 1.90")
6
Pagination
----------
It is possible to paginate through model instances::
>>> order_by = 'first_name, last_name'
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
>>> print page.number_of_objects
2507
>>> print page.pages_total
251
>>> for person in page.objects:
>>> # do something
The ``paginate`` method returns a ``namedtuple`` containing the following fields:
- ``objects`` - the list of objects in this page
- ``number_of_objects`` - total number of objects in all pages
- ``pages_total`` - total number of pages
- ``number`` - the page number, starting from 1; the special value -1 may be used to retrieve the last page
- ``page_size`` - the number of objects per page
You can optionally pass conditions to the query::
>>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90')
Note that ``order_by`` must be chosen so that the ordering is unique, otherwise there might be
inconsistencies in the pagination (such as an instance that appears on two different pages).
System models
-------------
`Clickhouse docs <https://clickhouse.yandex/reference_en.html#System tables>`_.
System models are read only models for implementing part of the system's functionality,
and for providing access to information about how the system is working.
Currently the following system models are supported:
=================== ============ ===================================================
Class DB Table Comments
=================== ============ ===================================================
SystemPart system.parts Gives methods to work with partitions. See below.
=================== ============ ===================================================
Partitions and parts
--------------------
`ClickHouse docs <https://clickhouse.yandex/reference_en.html#Manipulations with partitions and parts>`_.
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
=================== ======================= =============================================================================================
Method Parameters Comments
=================== ======================= =============================================================================================
get(static) database, conditions="" Gets database partitions, filtered by conditions
get_active(static) database, conditions="" Gets only active (not detached or dropped) partitions, filtered by conditions
detach settings=None Detaches the partition. Settings is a dict of params to pass to http request
drop settings=None Drops the partition. Settings is a dict of params to pass to http request
attach settings=None Attaches already detached partition. Settings is a dict of params to pass to http request
freeze settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request
fetch settings=None Fetches partition. Settings is a dict of params to pass to http request
=================== ======================= =============================================================================================
Usage example::
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.system_models import SystemPart
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
if len(partitions) > 0:
partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so
partitions[0].freeze() # Make a backup in /opt/clickhouse/shadow directory
partitions[0].drop() # Dropped partition
``Note``: system.parts stores information for all databases. To be correct,
SystemPart model was designed to receive only given database parts.
Schema Migrations
-----------------
Over time, your models may change and the database will have to be modified accordingly.
Migrations allow you to describe these changes succinctly using Python, and to apply them
to the database. A migrations table automatically keeps track of which migrations were already applied.
For details please refer to the MIGRATIONS.rst document.
Field Types
-----------
Currently the following field types are supported:
=================== ======== ================= ===================================================
Class DB Type Pythonic Type Comments
=================== ======== ================= ===================================================
StringField String unicode Encoded as UTF-8 when written to ClickHouse
DateField Date datetime.date Range 1970-01-01 to 2038-01-19
DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC
Int8Field Int8 int Range -128 to 127
Int16Field Int16 int Range -32768 to 32767
Int32Field Int32 int Range -2147483648 to 2147483647
Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807
UInt8Field UInt8 int Range 0 to 255
UInt16Field UInt16 int Range 0 to 65535
UInt32Field UInt32 int Range 0 to 4294967295
UInt64Field UInt64 int/long Range 0 to 18446744073709551615
Float32Field Float32 float
Float64Field Float64 float
Enum8Field Enum8 Enum See below
Enum16Field Enum16 Enum See below
ArrayField Array list See below
=================== ======== ================= ===================================================
DateTimeField and Time Zones
****************************
A ``DateTimeField`` can be assigned values from one of the following types:
- datetime
- date
- integer - number of seconds since the Unix epoch
- string in ``YYYY-MM-DD HH:MM:SS`` format
The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned
value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the
timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new
enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend
settings the server timezone to UTC in order to prevent confusion.
Working with enum fields
************************
``Enum8Field`` and ``Enum16Field`` provide support for working with ClickHouse enum columns. They accept
strings or integers as values, and convert them to the matching Pythonic Enum member.
Python 3.4 and higher supports Enums natively. When using previous Python versions you
need to install the `enum34` library.
Example of a model with an enum field::
Gender = Enum('Gender', 'male female unspecified')
class Person(models.Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
gender = fields.Enum32Field(Gender)
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
Working with array fields
*************************
You can create array fields containing any data type, for example::
class SensorData(models.Model):
date = fields.DateField()
temperatures = fields.ArrayField(fields.Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field())
engine = engines.MergeTree('date', ('date',))
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
Working with materialized and alias fields
******************************************
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
See documentation `here <https://clickhouse.yandex/reference_en.html#Default values>`_.
Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method.
ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field
names explicitly in the query.
Usage::
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
Table Engines
-------------
Each model must have an engine instance, used when creating the table in ClickHouse.
To define a ``MergeTree`` engine, supply the date column name and the names (or expressions) for the key columns::
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'))
You may also provide a sampling expression::
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)')
A ``CollapsingMergeTree`` engine is defined in a similar manner, but requires also a sign column::
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
For a ``SummingMergeTree`` you can optionally specify the summing columns::
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
summing_cols=('Shows', 'Clicks', 'Cost'))
For a ``ReplacingMergeTree`` you can optionally specify the version column::
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
A ``Buffer`` engine is available for BufferModels. (See below how to use BufferModel). You can specify following parameters::
engine = engines.Buffer(Person) # you need to initialize engine with main Model. Other default parameters will be used
# or:
engine = engines.Buffer(Person, num_layers=16, min_time=10,
max_time=100, min_rows=10000, max_rows=1000000,
min_bytes=10000000, max_bytes=100000000)
Buffer Models
-------------
Here's how you can define Model for Buffer Engine. The Buffer Model should be inherited from models.BufferModel and main Model::
class PersonBuffer(models.BufferModel, Person):
engine = engines.Buffer(Person)
Then you can insert objects into Buffer model and they will be handled by Clickhouse properly::
db.create_table(PersonBuffer)
suzy = PersonBuffer(first_name='Suzy', last_name='Jones')
dan = PersonBuffer(first_name='Dan', last_name='Schwartz')
db.insert([dan, suzy])
Data Replication
****************
Any of the above engines can be converted to a replicated engine (e.g. ``ReplicatedMergeTree``) by adding two parameters, ``replica_table_path`` and ``replica_name``::
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'),
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
replica_name='{replica}')
Development
===========
After cloning the project, run the following commands::
easy_install -U infi.projector
cd infi.clickhouse_orm
projector devenv build
To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run::
bin/nosetests
To see test coverage information run::
bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm

View File

@ -3,7 +3,7 @@ prefer-final = false
newest = false
download-cache = .cache
develop = .
parts =
parts =
[project]
name = infi.clickhouse_orm
@ -17,6 +17,7 @@ install_requires = [
]
version_file = src/infi/clickhouse_orm/__version__.py
description = A Python library for working with the ClickHouse database
long_description = A Python library for working with the ClickHouse database
console_scripts = []
gui_scripts = []
package_data = []
@ -28,7 +29,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm
[isolated-python]
recipe = infi.recipe.python
version = v2.7.8.4
version = v2.7.12.4
[setup.py]
recipe = infi.recipe.template.version
@ -43,7 +44,7 @@ output = ${project:version_file}
dependent-scripts = true
recipe = infi.recipe.console_scripts
eggs = ${project:name}
ipython
ipython<6
nose
coverage
enum34

732
docs/class_reference.md Normal file
View File

@ -0,0 +1,732 @@
Class Reference
===============
infi.clickhouse_orm.database
----------------------------
### Database
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
Initializes a database instance. Unless it's readonly, the database will be
created on the ClickHouse server if it does not already exist.
- `db_name`: name of the database to connect to.
- `db_url`: URL of the ClickHouse server.
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
#### count(model_class, conditions=None)
Counts the number of records in the model's table.
- `model_class`: the model to count.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
#### create_database()
Creates the database on the ClickHouse server if it does not already exist.
#### create_table(model_class)
Creates a table for the given model class, if it does not exist already.
#### drop_database()
Deletes the database on the ClickHouse server.
#### drop_table(model_class)
Drops the database table of the given model class, if it exists.
#### insert(model_instances, batch_size=1000)
Insert records into the database.
- `model_instances`: any iterable containing instances of a single model class.
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
#### migrate(migrations_package_name, up_to=9999)
Executes schema migrations.
- `migrations_package_name` - fully qualified name of the Python package
containing the migrations.
- `up_to` - number of the last migration to apply.
#### paginate(model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None)
Selects records and returns a single page of model instances.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause).
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
- `settings`: query settings to send as HTTP GET parameters
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
#### raw(query, settings=None, stream=False)
Performs a query and returns its output as text.
- `query`: the SQL query to execute.
- `settings`: query settings to send as HTTP GET parameters
- `stream`: if true, the HTTP response from ClickHouse will be streamed.
#### select(query, model_class=None, settings=None)
Performs a query and returns a generator of model instances.
- `query`: the SQL query to execute.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `settings`: query settings to send as HTTP GET parameters
### DatabaseException
Extends Exception
Raised when a database operation fails.
infi.clickhouse_orm.models
--------------------------
### Model
A base class for ORM models. Each model class represent a ClickHouse table. For example:
class CPUStats(Model):
timestamp = DateTimeField()
cpu_id = UInt16Field()
cpu_percent = Float32Field()
engine = Memory()
#### Model(**kwargs)
Creates a model instance, using keyword arguments as field values.
Since values are immediately converted to their Pythonic type,
invalid values will cause a `ValueError` to be raised.
Unrecognized field names will cause an `AttributeError`.
#### Model.create_table_sql(db_name)
Returns the SQL command for creating a table for this model.
#### Model.drop_table_sql(db_name)
Returns the SQL command for deleting this model's table.
#### Model.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None)
Create a model instance from a tab-separated line. The line may or may not include a newline.
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
If omitted, it is assumed to be the names of all fields in the model, in order of definition.
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
#### get_database()
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
#### get_field(name)
Gets a `Field` instance given its name, or `None` if not found.
#### Model.objects_in(database)
Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
#### Model.table_name()
Returns the model's database table name. By default this is the
class name converted to lowercase. Override this if you want to use
a different table name.
#### to_dict(include_readonly=True, field_names=None)
Returns the instance's column values as a dict.
- `include_readonly`: if false, returns only fields that can be inserted into database.
- `field_names`: an iterable of field names to return (optional)
#### to_tsv(include_readonly=True)
Returns the instance's column values as a tab-separated line. A newline is not included.
- `include_readonly`: if false, returns only fields that can be inserted into database.
### BufferModel
Extends Model
#### BufferModel(**kwargs)
Creates a model instance, using keyword arguments as field values.
Since values are immediately converted to their Pythonic type,
invalid values will cause a `ValueError` to be raised.
Unrecognized field names will cause an `AttributeError`.
#### BufferModel.create_table_sql(db_name)
Returns the SQL command for creating a table for this model.
#### BufferModel.drop_table_sql(db_name)
Returns the SQL command for deleting this model's table.
#### BufferModel.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None)
Create a model instance from a tab-separated line. The line may or may not include a newline.
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
If omitted, it is assumed to be the names of all fields in the model, in order of definition.
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
#### get_database()
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
#### get_field(name)
Gets a `Field` instance given its name, or `None` if not found.
#### BufferModel.objects_in(database)
Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
#### BufferModel.table_name()
Returns the model's database table name. By default this is the
class name converted to lowercase. Override this if you want to use
a different table name.
#### to_dict(include_readonly=True, field_names=None)
Returns the instance's column values as a dict.
- `include_readonly`: if false, returns only fields that can be inserted into database.
- `field_names`: an iterable of field names to return (optional)
#### to_tsv(include_readonly=True)
Returns the instance's column values as a tab-separated line. A newline is not included.
- `include_readonly`: if false, returns only fields that can be inserted into database.
infi.clickhouse_orm.fields
--------------------------
### Field
Abstract base class for all field types.
#### Field(default=None, alias=None, materialized=None)
### StringField
Extends Field
#### StringField(default=None, alias=None, materialized=None)
### DateField
Extends Field
#### DateField(default=None, alias=None, materialized=None)
### DateTimeField
Extends Field
#### DateTimeField(default=None, alias=None, materialized=None)
### BaseIntField
Extends Field
Abstract base class for all integer-type fields.
#### BaseIntField(default=None, alias=None, materialized=None)
### BaseFloatField
Extends Field
Abstract base class for all float-type fields.
#### BaseFloatField(default=None, alias=None, materialized=None)
### BaseEnumField
Extends Field
Abstract base class for all enum-type fields.
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None)
### ArrayField
Extends Field
#### ArrayField(inner_field, default=None, alias=None, materialized=None)
### NullableField
Extends Field
#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None)
### FixedStringField
Extends StringField
#### FixedStringField(length, default=None, alias=None, materialized=None)
### UInt8Field
Extends BaseIntField
#### UInt8Field(default=None, alias=None, materialized=None)
### UInt16Field
Extends BaseIntField
#### UInt16Field(default=None, alias=None, materialized=None)
### UInt32Field
Extends BaseIntField
#### UInt32Field(default=None, alias=None, materialized=None)
### UInt64Field
Extends BaseIntField
#### UInt64Field(default=None, alias=None, materialized=None)
### Int8Field
Extends BaseIntField
#### Int8Field(default=None, alias=None, materialized=None)
### Int16Field
Extends BaseIntField
#### Int16Field(default=None, alias=None, materialized=None)
### Int32Field
Extends BaseIntField
#### Int32Field(default=None, alias=None, materialized=None)
### Int64Field
Extends BaseIntField
#### Int64Field(default=None, alias=None, materialized=None)
### Float32Field
Extends BaseFloatField
#### Float32Field(default=None, alias=None, materialized=None)
### Float64Field
Extends BaseFloatField
#### Float64Field(default=None, alias=None, materialized=None)
### Enum8Field
Extends BaseEnumField
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None)
### Enum16Field
Extends BaseEnumField
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None)
infi.clickhouse_orm.engines
---------------------------
### Engine
### TinyLog
Extends Engine
### Log
Extends Engine
### Memory
Extends Engine
### MergeTree
Extends Engine
#### MergeTree(date_col, key_cols, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### Buffer
Extends Engine
Buffers the data to write in RAM, periodically flushing it to another table.
Must be used in conjuction with a `BufferModel`.
Read more [here](https://clickhouse.yandex/reference_en.html#Buffer).
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
### CollapsingMergeTree
Extends MergeTree
#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### SummingMergeTree
Extends MergeTree
#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### ReplacingMergeTree
Extends MergeTree
#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
infi.clickhouse_orm.query
-------------------------
### QuerySet
A queryset is an object that represents a database query using a specific `Model`.
It is lazy, meaning that it does not hit the database until you iterate over its
matching rows (model instances).
#### QuerySet(model_cls, database)
Initializer. It is possible to create a queryset like this, but the standard
way is to use `MyModel.objects_in(database)`.
#### aggregate(*args, **kwargs)
Returns an `AggregateQuerySet` over this query, with `args` serving as
grouping fields and `kwargs` serving as calculated fields. At least one
calculated field is required. For example:
```
Event.objects_in(database).filter(date__gt='2017-08-01').aggregate('event_type', count='count()')
```
is equivalent to:
```
SELECT event_type, count() AS count FROM event
WHERE data > '2017-08-01'
GROUP BY event_type
```
#### as_sql()
Returns the whole query as a SQL string.
#### conditions_as_sql()
Returns the contents of the query's `WHERE` clause as a string.
#### count()
Returns the number of matching model instances.
#### exclude(**kwargs)
Returns a copy of this queryset that excludes all rows matching the conditions.
#### filter(**kwargs)
Returns a copy of this queryset that includes only rows matching the conditions.
#### only(*field_names)
Returns a copy of this queryset limited to the specified field names.
Useful when there are large fields that are not needed,
or for creating a subquery to use with an IN operator.
#### order_by(*field_names)
Returns a copy of this queryset with the ordering changed.
#### order_by_as_sql()
Returns the contents of the query's `ORDER BY` clause as a string.
#### paginate(page_num=1, page_size=100)
Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages.
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
### AggregateQuerySet
Extends QuerySet
A queryset used for aggregation.
#### AggregateQuerySet(base_qs, grouping_fields, calculated_fields)
Initializer. Normally you should not call this but rather use `QuerySet.aggregate()`.
The grouping fields should be a list/tuple of field names from the model. For example:
```
('event_type', 'event_subtype')
```
The calculated fields should be a mapping from name to a ClickHouse aggregation function. For example:
```
{'weekday': 'toDayOfWeek(event_date)', 'number_of_events': 'count()'}
```
At least one calculated field is required.
#### aggregate(*args, **kwargs)
This method is not supported on `AggregateQuerySet`.
#### as_sql()
Returns the whole query as a SQL string.
#### conditions_as_sql()
Returns the contents of the query's `WHERE` clause as a string.
#### count()
Returns the number of rows after aggregation.
#### exclude(**kwargs)
Returns a copy of this queryset that excludes all rows matching the conditions.
#### filter(**kwargs)
Returns a copy of this queryset that includes only rows matching the conditions.
#### group_by(*args)
This method lets you specify the grouping fields explicitly. The `args` must
be names of grouping fields or calculated fields that this queryset was
created with.
#### only(*field_names)
This method is not supported on `AggregateQuerySet`.
#### order_by(*field_names)
Returns a copy of this queryset with the ordering changed.
#### order_by_as_sql()
Returns the contents of the query's `ORDER BY` clause as a string.
#### paginate(page_num=1, page_size=100)
Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages.
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.

36
docs/contributing.md Normal file
View File

@ -0,0 +1,36 @@
Contributing
============
This project is hosted on GitHub - [https://github.com/Infinidat/infi.clickhouse_orm/](https://github.com/Infinidat/infi.clickhouse_orm/).
Please open an issue there if you encounter a bug or want to request a feature.
Pull requests are also welcome.
Building
--------
After cloning the project, run the following commands:
easy_install -U infi.projector
cd infi.clickhouse_orm
projector devenv build
A `setup.py` file will be generated, which you can use to install the development version of the package:
python setup.py install
Tests
-----
To run the tests, ensure that the ClickHouse server is running on <http://localhost:8123/> (this is the default), and run:
bin/nosetests
To see test coverage information run:
bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm
---
[<< System Models](system_models.md) | [Table of Contents](toc.md) | [Class Reference >>](class_reference.md)

136
docs/field_types.md Normal file
View File

@ -0,0 +1,136 @@
Field Types
===========
Currently the following field types are supported:
| Class | DB Type | Pythonic Type | Comments
| ------------------ | ---------- | ------------------- | -----------------------------------------------------
| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse
| DateField | Date | datetime.date | Range 1970-01-01 to 2038-01-19
| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC
| Int8Field | Int8 | int | Range -128 to 127
| Int16Field | Int16 | int | Range -32768 to 32767
| Int32Field | Int32 | int | Range -2147483648 to 2147483647
| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807
| UInt8Field | UInt8 | int | Range 0 to 255
| UInt16Field | UInt16 | int | Range 0 to 65535
| UInt32Field | UInt32 | int | Range 0 to 4294967295
| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615
| Float32Field | Float32 | float |
| Float64Field | Float64 | float |
| Enum8Field | Enum8 | Enum | See below
| Enum16Field | Enum16 | Enum | See below
| ArrayField | Array | list | See below
| NullableField | Nullable | See below | See below
DateTimeField and Time Zones
----------------------------
A `DateTimeField` can be assigned values from one of the following types:
- datetime
- date
- integer - number of seconds since the Unix epoch
- string in `YYYY-MM-DD HH:MM:SS` format
The assigned value always gets converted to a timezone-aware `datetime` in UTC. If the assigned value is a timezone-aware `datetime` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC.
DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse
version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion.
Working with enum fields
------------------------
`Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member.
Python 3.4 and higher supports Enums natively. When using previous Python versions you need to install the enum34 library.
Example of a model with an enum field:
Gender = Enum('Gender', 'male female unspecified')
class Person(models.Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
gender = fields.Enum32Field(Gender)
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female)
Working with array fields
-------------------------
You can create array fields containing any data type, for example:
class SensorData(models.Model):
date = fields.DateField()
temperatures = fields.ArrayField(fields.Float32Field())
humidity_levels = fields.ArrayField(fields.UInt8Field())
engine = engines.MergeTree('date', ('date',))
data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66])
Working with materialized and alias fields
------------------------------------------
ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields.
See documentation [here](https://clickhouse.yandex/reference_en.html#Default%20values).
Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query.
Usage:
class Event(models.Model):
created = fields.DateTimeField()
created_date = fields.DateTimeField(materialized='toDate(created)')
name = fields.StringField()
username = fields.StringField(alias='name')
engine = engines.MergeTree('created_date', ('created_date', 'created'))
obj = Event(created=datetime.now(), name='MyEvent')
db = Database('my_test_db')
db.insert([obj])
# All values will be retrieved from database
db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event)
# created_date and username will contain a default value
db.select('SELECT * FROM $db.event', model_class=Event)
Working with nullable fields
----------------------------
From [some time](https://github.com/yandex/ClickHouse/pull/70) ClickHouse provides a NULL value support.
Also see some information [here](https://github.com/yandex/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00395_nullable.sql).
Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example:
class EventData(models.Model):
date = fields.DateField()
comment = fields.NullableField(fields.StringField(), extra_null_values={''})
score = fields.NullableField(fields.UInt8Field())
serie = fields.NullableField(fields.ArrayField(fields.UInt8Field()))
engine = engines.MergeTree('date', ('date',))
score_event = EventData(date=date.today(), comment=None, score=5, serie=None)
comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None)
another_event = EventData(date=date.today(), comment='', score=None, serie=None)
action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3])
The `extra_null_values` parameter is an iterable of additional values that should be converted
to `None`.
NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable.
---
[<< Querysets](querysets.md) | [Table of Contents](toc.md) | [Table Engines >>](table_engines.md)

17
docs/index.md Normal file
View File

@ -0,0 +1,17 @@
Overview
========
This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it.
It was tested on Python 2.7 and 3.5.
Installation
------------
To install infi.clickhouse_orm:
pip install infi.clickhouse_orm
---
[Table of Contents](toc.md) | [Models and Databases >>](models_and_databases.md)

View File

@ -0,0 +1,173 @@
Models and Databases
====================
Models represent ClickHouse tables, allowing you to work with them using familiar pythonic syntax.
Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations.
Defining Models
---------------
Models are defined in a way reminiscent of Django's ORM:
from infi.clickhouse_orm import models, fields, engines
class Person(models.Model):
first_name = fields.StringField()
last_name = fields.StringField()
birthday = fields.DateField()
height = fields.Float32Field()
engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). Alternatively it is possible to pass alias or materialized parameters (see below for usage examples). Only one of `default`, `alias` and `materialized` parameters can be provided.
For more details see [Field Types](field_types.md) and [Table Engines](table_engines.md).
### Table Names
The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method:
class Person(models.Model):
...
@classmethod
def table_name(cls):
return 'people'
Using Models
------------
Once you have a model, you can create model instances:
>>> dan = Person(first_name='Dan', last_name='Schwartz')
>>> suzy = Person(first_name='Suzy', last_name='Jones')
>>> dan.first_name
u'Dan'
When values are assigned to model fields, they are immediately converted to their Pythonic data type. In case the value is invalid, a `ValueError` is raised:
>>> suzy.birthday = '1980-01-17'
>>> suzy.birthday
datetime.date(1980, 1, 17)
>>> suzy.birthday = 0.5
ValueError: Invalid value for DateField - 0.5
>>> suzy.birthday = '1922-05-31'
ValueError: DateField out of range - 1922-05-31 is not between 1970-01-01 and 2038-01-19
Inserting to the Database
-------------------------
To write your instances to ClickHouse, you need a `Database` instance:
from infi.clickhouse_orm.database import Database
db = Database('my_test_db')
This automatically connects to <http://localhost:8123> and creates a database called my_test_db, unless it already exists. If necessary, you can specify a different database URL and optional credentials:
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
Using the `Database` instance you can create a table for your model, and insert instances to it:
db.create_table(Person)
db.insert([dan, suzy])
The `insert` method can take any iterable of model instances, but they all must belong to the same model class.
Creating a read-only database is also supported. Such a `Database` instance can only read data, and cannot modify data or schemas:
db = Database('my_test_db', readonly=True)
Reading from the Database
-------------------------
Loading model instances from the database is simple:
for person in db.select("SELECT * FROM my_test_db.person", model_class=Person):
print person.first_name, person.last_name
Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`.
It is possible to select only a subset of the columns, and the rest will receive their default values:
for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person):
print person.first_name
The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this:
for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'):
print person.first_name
See [Querysets](querysets.md) for more information.
Reading without a Model
-----------------------
When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query:
for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"):
print row.max_height
This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax.
SQL Placeholders
----------------
There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the table name (but is available only when the model is specified).
So instead of this:
db.select("SELECT * FROM my_test_db.person", model_class=Person)
you can use:
db.select("SELECT * FROM $db.$table", model_class=Person)
Note: normally it is not necessary to specify the database name, since it's already sent in the query parameters to ClickHouse. It is enough to specify the table name.
Counting
--------
The `Database` class also supports counting records easily:
>>> db.count(Person)
117
>>> db.count(Person, conditions="height > 1.90")
6
Pagination
----------
It is possible to paginate through model instances:
>>> order_by = 'first_name, last_name'
>>> page = db.paginate(Person, order_by, page_num=1, page_size=10)
>>> print page.number_of_objects
2507
>>> print page.pages_total
251
>>> for person in page.objects:
>>> # do something
The `paginate` method returns a `namedtuple` containing the following fields:
- `objects` - the list of objects in this page
- `number_of_objects` - total number of objects in all pages
- `pages_total` - total number of pages
- `number` - the page number, starting from 1; the special value -1 may be used to retrieve the last page
- `page_size` - the number of objects per page
You can optionally pass conditions to the query:
>>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90')
Note that `order_by` must be chosen so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages).
---
[<< Overview](index.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md)

178
docs/querysets.md Normal file
View File

@ -0,0 +1,178 @@
Querysets
=========
A queryset is an object that represents a database query using a specific Model. It is lazy, meaning that it does not hit the database until you iterate over its matching rows (model instances). To create a base queryset for a model class, use:
qs = Person.objects_in(database)
This queryset matches all Person instances in the database. You can get these instances using iteration:
for person in qs:
print person.first_name, person.last_name
Filtering
---------
The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example:
>>> qs = Person.objects_in(database)
>>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01')
>>> qs.conditions_as_sql()
u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')"
It is possible to specify several fields to filter or exclude by:
>>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75)
>>> qs.conditions_as_sql()
u"last_name = 'Smith' AND height > 1.75"
There are different operators that can be used, by passing `<fieldname>__<operator>=<value>` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators.
| Operator | Equivalent SQL | Comments |
| -------- | -------------------------------------------- | ---------------------------------- |
| `eq` | `field = value` | |
| `ne` | `field != value` | |
| `gt` | `field > value` | |
| `gte` | `field >= value` | |
| `lt` | `field < value` | |
| `lte` | `field <= value` | |
| `in` | `field IN (values)` | See below |
| `not_in` | `field NOT IN (values)` | See below |
| `contains` | `field LIKE '%value%'` | For string fields only |
| `startswith` | `field LIKE 'value%'` | For string fields only |
| `endswith` | `field LIKE '%value'` | For string fields only |
| `icontains` | `lowerUTF8(field) LIKE lowerUTF8('%value%')` | For string fields only |
| `istartswith` | `lowerUTF8(field) LIKE lowerUTF8('value%')` | For string fields only |
| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only |
| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only |
### Using the `in` Operator
The `in` and `not_in` operators expect one of three types of values:
* A list or tuple of simple values
* A string, which is used verbatim as the contents of the parentheses
* Another queryset (subquery)
For example if we want to select only people with Irish last names:
# A list of simple values
qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"])
# A string
subquery = "SELECT name from $db.irishlastname"
qs = Person.objects_in(database).filter(last_name__in=subquery)
# A queryset
subquery = IrishLastName.objects_in(database).only("name")
qs = Person.objects_in(database).filter(last_name__in=subquery)
Counting and Checking Existence
-------------------------------
Use the `count` method to get the number of matches:
Person.objects_in(database).count()
To check if there are any matches at all, you can use any of the following equivalent options:
if qs.count(): ...
if bool(qs): ...
if qs: ...
Ordering
--------
The sorting order of the results can be controlled using the `order_by` method:
qs = Person.objects_in(database).order_by('last_name', 'first_name')
The default order is ascending. To use descending order, add a minus sign before the field name:
qs = Person.objects_in(database).order_by('-height')
If you do not use `order_by`, the rows are returned in arbitrary order.
Omitting Fields
---------------
When some of the model fields aren't needed, it is more efficient to omit them from the query. This is especially true when there are large fields that may slow the query down. Use the `only` method to specify which fields to retrieve:
qs = Person.objects_in(database).only('first_name', 'birthday')
Slicing
-------
It is possible to get a specific item from the queryset by index:
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first = qs[0]
It is also possible to get a range a instances using a slice. This returns a queryset,
that you can either iterate over or convert to a list.
qs = Person.objects_in(database).order_by('last_name', 'first_name')
first_ten_people = list(qs[:10])
next_ten_people = list(qs[10:20])
You should use `order_by` to ensure a consistent ordering of the results.
Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`.
Pagination
----------
Similar to `Database.paginate`, you can go over the queryset results one page at a time:
>>> qs = Person.objects_in(database).order_by('last_name', 'first_name')
>>> page = qs.paginate(page_num=1, page_size=10)
>>> print page.number_of_objects
2507
>>> print page.pages_total
251
>>> for person in page.objects:
>>> # do something
The `paginate` method returns a `namedtuple` containing the following fields:
- `objects` - the list of objects in this page
- `number_of_objects` - total number of objects in all pages
- `pages_total` - total number of pages
- `number` - the page number, starting from 1; the special value -1 may be used to retrieve the last page
- `page_size` - the number of objects per page
Note that you should use `QuerySet.order_by` so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages).
Aggregation
-----------
It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset:
>>> qs = Person.objects_in(database).aggregate(average_height='avg(height)')
>>> print qs.count()
1
>>> for row in qs: print row.average_height
1.71
The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`.
You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this:
qs = Person.objects_in(database).aggregate('last_name', num='count()')
The underlying SQL query would be something like this:
SELECT last_name, count() AS num FROM person GROUP BY last_name
If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday:
qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday')
This queryset is translated to:
SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday
After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model.
---
[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md)

526
docs/ref.md Normal file
View File

@ -0,0 +1,526 @@
Class Reference
===============
infi.clickhouse_orm.database
----------------------------
### Database
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False)
Initializes a database instance. Unless it's readonly, the database will be
created on the ClickHouse server if it does not already exist.
- `db_name`: name of the database to connect to.
- `db_url`: URL of the ClickHouse server.
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
#### count(model_class, conditions=None)
Counts the number of records in the model's table.
- `model_class`: the model to count.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
#### create_database()
Creates the database on the ClickHouse server if it does not already exist.
#### create_table(model_class)
Creates a table for the given model class, if it does not exist already.
#### drop_database()
Deletes the database on the ClickHouse server.
#### drop_table(model_class)
Drops the database table of the given model class, if it exists.
#### insert(model_instances, batch_size=1000)
Insert records into the database.
- `model_instances`: any iterable containing instances of a single model class.
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
#### migrate(migrations_package_name, up_to=9999)
Executes schema migrations.
- `migrations_package_name` - fully qualified name of the Python package
containing the migrations.
- `up_to` - number of the last migration to apply.
#### paginate(model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None)
Selects records and returns a single page of model instances.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause).
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
- `settings`: query settings to send as HTTP GET parameters
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
#### raw(query, settings=None, stream=False)
Performs a query and returns its output as text.
- `query`: the SQL query to execute.
- `settings`: query settings to send as HTTP GET parameters
- `stream`: if true, the HTTP response from ClickHouse will be streamed.
#### select(query, model_class=None, settings=None)
Performs a query and returns a generator of model instances.
- `query`: the SQL query to execute.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `settings`: query settings to send as HTTP GET parameters
### DatabaseException
Extends Exception
Raised when a database operation fails.
infi.clickhouse_orm.models
--------------------------
### Model
A base class for ORM models.
#### Model(**kwargs)
Creates a model instance, using keyword arguments as field values.
Since values are immediately converted to their Pythonic type,
invalid values will cause a `ValueError` to be raised.
Unrecognized field names will cause an `AttributeError`.
#### Model.create_table_sql(db_name)
Returns the SQL command for creating a table for this model.
#### Model.drop_table_sql(db_name)
Returns the SQL command for deleting this model's table.
#### Model.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None)
Create a model instance from a tab-separated line. The line may or may not include a newline.
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
If omitted, it is assumed to be the names of all fields in the model, in order of definition.
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
#### get_database()
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
#### get_field(name)
Gets a `Field` instance given its name, or `None` if not found.
#### Model.objects_in(database)
Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
#### Model.table_name()
Returns the model's database table name. By default this is the
class name converted to lowercase. Override this if you want to use
a different table name.
#### to_dict(include_readonly=True, field_names=None)
Returns the instance's column values as a dict.
- `include_readonly`: if false, returns only fields that can be inserted into database.
- `field_names`: an iterable of field names to return (optional)
#### to_tsv(include_readonly=True)
Returns the instance's column values as a tab-separated line. A newline is not included.
- `include_readonly`: if false, returns only fields that can be inserted into database.
### BufferModel
Extends Model
#### BufferModel(**kwargs)
Creates a model instance, using keyword arguments as field values.
Since values are immediately converted to their Pythonic type,
invalid values will cause a `ValueError` to be raised.
Unrecognized field names will cause an `AttributeError`.
#### BufferModel.create_table_sql(db_name)
Returns the SQL command for creating a table for this model.
#### BufferModel.drop_table_sql(db_name)
Returns the SQL command for deleting this model's table.
#### BufferModel.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None)
Create a model instance from a tab-separated line. The line may or may not include a newline.
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
If omitted, it is assumed to be the names of all fields in the model, in order of definition.
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
#### get_database()
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
#### get_field(name)
Gets a `Field` instance given its name, or `None` if not found.
#### BufferModel.objects_in(database)
Returns a `QuerySet` for selecting instances of this model class.
#### set_database(db)
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
#### BufferModel.table_name()
Returns the model's database table name. By default this is the
class name converted to lowercase. Override this if you want to use
a different table name.
#### to_dict(include_readonly=True, field_names=None)
Returns the instance's column values as a dict.
- `include_readonly`: if false, returns only fields that can be inserted into database.
- `field_names`: an iterable of field names to return (optional)
#### to_tsv(include_readonly=True)
Returns the instance's column values as a tab-separated line. A newline is not included.
- `include_readonly`: if false, returns only fields that can be inserted into database.
infi.clickhouse_orm.fields
--------------------------
### Field
Abstract base class for all field types.
#### Field(default=None, alias=None, materialized=None)
### StringField
Extends Field
#### StringField(default=None, alias=None, materialized=None)
### DateField
Extends Field
#### DateField(default=None, alias=None, materialized=None)
### DateTimeField
Extends Field
#### DateTimeField(default=None, alias=None, materialized=None)
### BaseIntField
Extends Field
Abstract base class for all integer-type fields.
#### BaseIntField(default=None, alias=None, materialized=None)
### BaseFloatField
Extends Field
Abstract base class for all float-type fields.
#### BaseFloatField(default=None, alias=None, materialized=None)
### BaseEnumField
Extends Field
Abstract base class for all enum-type fields.
#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None)
### ArrayField
Extends Field
#### ArrayField(inner_field, default=None, alias=None, materialized=None)
### FixedStringField
Extends StringField
#### FixedStringField(length, default=None, alias=None, materialized=None)
### UInt8Field
Extends BaseIntField
#### UInt8Field(default=None, alias=None, materialized=None)
### UInt16Field
Extends BaseIntField
#### UInt16Field(default=None, alias=None, materialized=None)
### UInt32Field
Extends BaseIntField
#### UInt32Field(default=None, alias=None, materialized=None)
### UInt64Field
Extends BaseIntField
#### UInt64Field(default=None, alias=None, materialized=None)
### Int8Field
Extends BaseIntField
#### Int8Field(default=None, alias=None, materialized=None)
### Int16Field
Extends BaseIntField
#### Int16Field(default=None, alias=None, materialized=None)
### Int32Field
Extends BaseIntField
#### Int32Field(default=None, alias=None, materialized=None)
### Int64Field
Extends BaseIntField
#### Int64Field(default=None, alias=None, materialized=None)
### Float32Field
Extends BaseFloatField
#### Float32Field(default=None, alias=None, materialized=None)
### Float64Field
Extends BaseFloatField
#### Float64Field(default=None, alias=None, materialized=None)
### Enum8Field
Extends BaseEnumField
#### Enum8Field(enum_cls, default=None, alias=None, materialized=None)
### Enum16Field
Extends BaseEnumField
#### Enum16Field(enum_cls, default=None, alias=None, materialized=None)
infi.clickhouse_orm.engines
---------------------------
### Engine
### TinyLog
Extends Engine
### Log
Extends Engine
### Memory
Extends Engine
### MergeTree
Extends Engine
#### MergeTree(date_col, key_cols, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### Buffer
Extends Engine
Here we define Buffer engine
Read more here https://clickhouse.yandex/reference_en.html#Buffer
#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000)
### CollapsingMergeTree
Extends MergeTree
#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### SummingMergeTree
Extends MergeTree
#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
### ReplacingMergeTree
Extends MergeTree
#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None)
infi.clickhouse_orm.query
-------------------------
### QuerySet
#### QuerySet(model_cls, database)
#### conditions_as_sql()
Return the contents of the queryset's WHERE clause.
#### count()
Returns the number of matching model instances.
#### exclude(**kwargs)
Returns a new QuerySet instance that excludes all rows matching the conditions.
#### filter(**kwargs)
Returns a new QuerySet instance that includes only rows matching the conditions.
#### only(*field_names)
Limit the query to return only the specified field names.
Useful when there are large fields that are not needed,
or for creating a subquery to use with an IN operator.
#### order_by(*field_names)
Returns a new QuerySet instance with the ordering changed.
#### order_by_as_sql()
Return the contents of the queryset's ORDER BY clause.
#### query()
Return the the queryset as SQL.

View File

@ -1,17 +1,14 @@
Migrations
==========
Schema Migrations
=================
Over time, the ORM models in your application may change. Migrations provide a way to modify the database
tables according to the changes in your models, without writing raw SQL.
Over time, the ORM models in your application may change. Migrations provide a way to modify the database tables according to the changes in your models, without writing raw SQL.
The migrations that were applied to the database are recorded in the ``infi_clickhouse_orm_migrations`` table,
so migrating the database will only apply any missing migrations.
The migrations that were applied to the database are recorded in the `infi_clickhouse_orm_migrations` table, so migrating the database will only apply any missing migrations.
Writing Migrations
------------------
To write migrations, create a Python package. Then create a python file for the initial migration. The migration
files must begin with a four-digit number, and will be applied in sequence. For example::
To write migrations, create a Python package. Then create a python file for the initial migration. The migration files must begin with a four-digit number, and will be applied in sequence. For example:
analytics
|
@ -23,7 +20,7 @@ files must begin with a four-digit number, and will be applied in sequence. For
|
+-- 0002_add_user_agents_table.py
Each migration file is expected to contain a list of ``operations``, for example::
Each migration file is expected to contain a list of `operations`, for example:
from infi.clickhouse_orm import migrations
from analytics import models
@ -45,21 +42,24 @@ A migration operation that drops the table of a given model class.
**AlterTable**
A migration operation that compares the table of a given model class to
the model's fields, and alters the table to match the model. The operation can:
A migration operation that compares the table of a given model class to the models fields, and alters the table to match the model. The operation can:
- add new columns
- drop obsolete columns
- modify column types
- add new columns
- drop obsolete columns
- modify column types
Default values are not altered by this operation.
Running Migrations
------------------
To migrate a database, create a ``Database`` instance and call its ``migrate`` method with the package
name containing your migrations::
To migrate a database, create a `Database` instance and call its `migrate` method with the package name containing your migrations:
Database('analytics_db').migrate('analytics.analytics_migrations')
Note that you may have more than one migrations package.
Note that you may have more than one migrations package.
---
[<< Table Engines](table_engines.md) | [Table of Contents](toc.md) | [System Models >>](system_models.md)

47
docs/system_models.md Normal file
View File

@ -0,0 +1,47 @@
System Models
=============
[Clickhouse docs](https://clickhouse.yandex/reference_en.html#System%20tables).
System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working.
Currently the following system models are supported:
| Class | DB Table | Comments
| ------------ | -------------- | ---------------------------------------------------
| SystemPart | system.parts | Gives methods to work with partitions. See below.
Partitions and Parts
--------------------
[ClickHouse docs](https://clickhouse.yandex/reference_en.html#Manipulations%20with%20partitions%20and%20parts).
A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part.
| Method | Parameters | Comments
| --------------------- | ------------------------- | -----------------------------------------------------------------------------------------------
| get(static) | database, conditions="" | Gets database partitions, filtered by conditions
| get_active(static) | database, conditions="" | Gets only active (not detached or dropped) partitions, filtered by conditions
| detach | settings=None | Detaches the partition. Settings is a dict of params to pass to http request
| drop | settings=None | Drops the partition. Settings is a dict of params to pass to http request
| attach | settings=None | Attaches already detached partition. Settings is a dict of params to pass to http request
| freeze | settings=None | Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request
| fetch | settings=None | Fetches partition. Settings is a dict of params to pass to http request
Usage example:
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.system_models import SystemPart
db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger')
partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database
if len(partitions) > 0:
partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so
partitions[0].freeze() # Make a backup in /opt/clickhouse/shadow directory
partitions[0].drop() # Dropped partition
`Note`: system.parts stores information for all databases. To be correct, SystemPart model was designed to receive only parts belonging to the given database instance.
---
[<< Schema Migrations](schema_migrations.md) | [Table of Contents](toc.md) | [Contributing >>](contributing.md)

92
docs/table_engines.md Normal file
View File

@ -0,0 +1,92 @@
Table Engines
=============
See: [ClickHouse Documentation](https://clickhouse.yandex/reference_en.html#Table+engines)
Each model must have an engine instance, used when creating the table in ClickHouse.
The following engines are supported by the ORM:
- TinyLog
- Log
- Memory
- MergeTree / ReplicatedMergeTree
- CollapsingMergeTree / ReplicatedCollapsingMergeTree
- SummingMergeTree / ReplicatedSummingMergeTree
- ReplacingMergeTree / ReplicatedReplacingMergeTree
- Buffer
Simple Engines
--------------
`TinyLog`, `Log` and `Memory` engines do not require any parameters:
engine = engines.TinyLog()
engine = engines.Log()
engine = engines.Memory()
Engines in the MergeTree Family
-------------------------------
To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'))
You may also provide a sampling expression:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)')
A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column:
engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign')
For a `SummingMergeTree` you can optionally specify the summing columns:
engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'),
summing_cols=('Shows', 'Clicks', 'Cost'))
For a `ReplacingMergeTree` you can optionally specify the version column:
engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version')
### Data Replication
Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`:
engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'),
replica_table_path='/clickhouse/tables/{layer}-{shard}/hits',
replica_name='{replica}')
Buffer Engine
-------------
A `Buffer` engine is only used in conjunction with a `BufferModel`.
The model should be a subclass of both `models.BufferModel` and the main model.
The main model is also passed to the engine:
class PersonBuffer(models.BufferModel, Person):
engine = engines.Buffer(Person)
Additional buffer parameters can optionally be specified:
engine = engines.Buffer(Person, num_layers=16, min_time=10,
max_time=100, min_rows=10000, max_rows=1000000,
min_bytes=10000000, max_bytes=100000000)
Then you can insert objects into Buffer model and they will be handled by ClickHouse properly:
db.create_table(PersonBuffer)
suzy = PersonBuffer(first_name='Suzy', last_name='Jones')
dan = PersonBuffer(first_name='Dan', last_name='Schwartz')
db.insert([dan, suzy])
---
[<< Field Types](field_types.md) | [Table of Contents](toc.md) | [Schema Migrations >>](schema_migrations.md)

94
docs/toc.md Normal file
View File

@ -0,0 +1,94 @@
# Table of Contents
* [Overview](index.md#overview)
* [Installation](index.md#installation)
* [Models and Databases](models_and_databases.md#models-and-databases)
* [Defining Models](models_and_databases.md#defining-models)
* [Table Names](models_and_databases.md#table-names)
* [Using Models](models_and_databases.md#using-models)
* [Inserting to the Database](models_and_databases.md#inserting-to-the-database)
* [Reading from the Database](models_and_databases.md#reading-from-the-database)
* [Reading without a Model](models_and_databases.md#reading-without-a-model)
* [SQL Placeholders](models_and_databases.md#sql-placeholders)
* [Counting](models_and_databases.md#counting)
* [Pagination](models_and_databases.md#pagination)
* [Querysets](querysets.md#querysets)
* [Filtering](querysets.md#filtering)
* [Using the in Operator](querysets.md#using-the-in-operator)
* [Counting and Checking Existence](querysets.md#counting-and-checking-existence)
* [Ordering](querysets.md#ordering)
* [Omitting Fields](querysets.md#omitting-fields)
* [Slicing](querysets.md#slicing)
* [Pagination](querysets.md#pagination)
* [Aggregation](querysets.md#aggregation)
* [Field Types](field_types.md#field-types)
* [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones)
* [Working with enum fields](field_types.md#working-with-enum-fields)
* [Working with array fields](field_types.md#working-with-array-fields)
* [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields)
* [Working with nullable fields](field_types.md#working-with-nullable-fields)
* [Table Engines](table_engines.md#table-engines)
* [Simple Engines](table_engines.md#simple-engines)
* [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family)
* [Data Replication](table_engines.md#data-replication)
* [Buffer Engine](table_engines.md#buffer-engine)
* [Schema Migrations](schema_migrations.md#schema-migrations)
* [Writing Migrations](schema_migrations.md#writing-migrations)
* [Running Migrations](schema_migrations.md#running-migrations)
* [System Models](system_models.md#system-models)
* [Partitions and Parts](system_models.md#partitions-and-parts)
* [Contributing](contributing.md#contributing)
* [Building](contributing.md#building)
* [Tests](contributing.md#tests)
* [Class Reference](class_reference.md#class-reference)
* [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database)
* [Database](class_reference.md#database)
* [DatabaseException](class_reference.md#databaseexception)
* [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models)
* [Model](class_reference.md#model)
* [BufferModel](class_reference.md#buffermodel)
* [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields)
* [Field](class_reference.md#field)
* [StringField](class_reference.md#stringfield)
* [DateField](class_reference.md#datefield)
* [DateTimeField](class_reference.md#datetimefield)
* [BaseIntField](class_reference.md#baseintfield)
* [BaseFloatField](class_reference.md#basefloatfield)
* [BaseEnumField](class_reference.md#baseenumfield)
* [ArrayField](class_reference.md#arrayfield)
* [NullableField](class_reference.md#nullablefield)
* [FixedStringField](class_reference.md#fixedstringfield)
* [UInt8Field](class_reference.md#uint8field)
* [UInt16Field](class_reference.md#uint16field)
* [UInt32Field](class_reference.md#uint32field)
* [UInt64Field](class_reference.md#uint64field)
* [Int8Field](class_reference.md#int8field)
* [Int16Field](class_reference.md#int16field)
* [Int32Field](class_reference.md#int32field)
* [Int64Field](class_reference.md#int64field)
* [Float32Field](class_reference.md#float32field)
* [Float64Field](class_reference.md#float64field)
* [Enum8Field](class_reference.md#enum8field)
* [Enum16Field](class_reference.md#enum16field)
* [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines)
* [Engine](class_reference.md#engine)
* [TinyLog](class_reference.md#tinylog)
* [Log](class_reference.md#log)
* [Memory](class_reference.md#memory)
* [MergeTree](class_reference.md#mergetree)
* [Buffer](class_reference.md#buffer)
* [CollapsingMergeTree](class_reference.md#collapsingmergetree)
* [SummingMergeTree](class_reference.md#summingmergetree)
* [ReplacingMergeTree](class_reference.md#replacingmergetree)
* [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query)
* [QuerySet](class_reference.md#queryset)
* [AggregateQuerySet](class_reference.md#aggregatequeryset)

53
scripts/README.md Normal file
View File

@ -0,0 +1,53 @@
This directory contains various scripts for use while developing.
generate_toc
------------
Generates the table of contents (toc.md). Requires Pandoc.
Usage:
cd docs
../scripts/generate_toc.sh
html_to_markdown_toc.py
-----------------------
Used by generate_toc.
docs2html
---------
Converts markdown docs to html for preview. Requires Pandoc.
Usage:
cd docs
../scripts/docs2html.sh
generate_ref
------------
Generates a class reference.
Usage:
cd docs
../bin/python ../scripts/generate_ref.py > class_reference.md
generate_all
------------
Does everything:
- Generates the class reference using generate_ref
- Generates the table of contents using generate_toc
- Converts to HTML for visual inspection using docs2html
Usage:
cd docs
../scripts/generate_all.sh
test_python3
------------
Creates a Python 3 virtualenv, clones the project into it, and runs the tests.
Usage:
./test_python3.sh

13
scripts/docs2html.sh Executable file
View File

@ -0,0 +1,13 @@
mkdir -p ../htmldocs
find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}" -s -o "../htmldocs/${0%.md}.html"' {} \;
echo "Converting README.md"
pandoc ../README.md -s -o "../htmldocs/README.html"
echo "Converting CHANGELOG.md"
pandoc ../CHANGELOG.md -s -o "../htmldocs/CHANGELOG.html"
echo "Fixing links"
sed -i 's/\.md/\.html/g' ../htmldocs/*.html

8
scripts/generate_all.sh Executable file
View File

@ -0,0 +1,8 @@
# Class reference
../bin/python ../scripts/generate_ref.py > class_reference.md
# Table of contents
../scripts/generate_toc.sh
# Convert to HTML for visual inspection
../scripts/docs2html.sh

135
scripts/generate_ref.py Normal file
View File

@ -0,0 +1,135 @@
import inspect
from collections import namedtuple
DefaultArgSpec = namedtuple('DefaultArgSpec', 'has_default default_value')
def _get_default_arg(args, defaults, arg_index):
""" Method that determines if an argument has default value or not,
and if yes what is the default value for the argument
:param args: array of arguments, eg: ['first_arg', 'second_arg', 'third_arg']
:param defaults: array of default values, eg: (42, 'something')
:param arg_index: index of the argument in the argument array for which,
this function checks if a default value exists or not. And if default value
exists it would return the default value. Example argument: 1
:return: Tuple of whether there is a default or not, and if yes the default
value, eg: for index 2 i.e. for "second_arg" this function returns (True, 42)
"""
if not defaults:
return DefaultArgSpec(False, None)
args_with_no_defaults = len(args) - len(defaults)
if arg_index < args_with_no_defaults:
return DefaultArgSpec(False, None)
else:
value = defaults[arg_index - args_with_no_defaults]
if (type(value) is str):
value = '"%s"' % value
return DefaultArgSpec(True, value)
def get_method_sig(method):
""" Given a function, it returns a string that pretty much looks how the
function signature would be written in python.
:param method: a python method
:return: A string similar describing the pythong method signature.
eg: "my_method(first_argArg, second_arg=42, third_arg='something')"
"""
# The return value of ArgSpec is a bit weird, as the list of arguments and
# list of defaults are returned in separate array.
# eg: ArgSpec(args=['first_arg', 'second_arg', 'third_arg'],
# varargs=None, keywords=None, defaults=(42, 'something'))
argspec = inspect.getargspec(method)
arg_index=0
args = []
# Use the args and defaults array returned by argspec and find out
# which arguments has default
for arg in argspec.args:
default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)
if default_arg.has_default:
args.append("%s=%s" % (arg, default_arg.default_value))
else:
args.append(arg)
arg_index += 1
if argspec.varargs:
args.append('*' + argspec.varargs)
if argspec.keywords:
args.append('**' + argspec.keywords)
return "%s(%s)" % (method.__name__, ", ".join(args[1:]))
def docstring(obj):
doc = (obj.__doc__ or '').rstrip()
if doc:
lines = doc.split('\n')
# Find the length of the whitespace prefix common to all non-empty lines
indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip())
# Output the lines without the indentation
for line in lines:
print line[indentation:]
print
def class_doc(cls, list_methods=True):
bases = ', '.join([b.__name__ for b in cls.__bases__])
print '###', cls.__name__
print
if bases != 'object':
print 'Extends', bases
print
docstring(cls)
for name, method in inspect.getmembers(cls, inspect.ismethod):
if name == '__init__':
# Initializer
print '####', get_method_sig(method).replace(name, cls.__name__)
elif name[0] == '_':
# Private method
continue
elif method.__self__ == cls:
# Class method
if not list_methods:
continue
print '#### %s.%s' % (cls.__name__, get_method_sig(method))
else:
# Regular method
if not list_methods:
continue
print '####', get_method_sig(method)
print
docstring(method)
print
def module_doc(classes, list_methods=True):
mdl = classes[0].__module__
print mdl
print '-' * len(mdl)
print
for cls in classes:
class_doc(cls, list_methods)
def all_subclasses(cls):
return cls.__subclasses__() + [g for s in cls.__subclasses__() for g in all_subclasses(s)]
if __name__ == '__main__':
from infi.clickhouse_orm import database
from infi.clickhouse_orm import fields
from infi.clickhouse_orm import engines
from infi.clickhouse_orm import models
from infi.clickhouse_orm import query
print 'Class Reference'
print '==============='
print
module_doc([database.Database, database.DatabaseException])
module_doc([models.Model, models.BufferModel])
module_doc([fields.Field] + all_subclasses(fields.Field), False)
module_doc([engines.Engine] + all_subclasses(engines.Engine), False)
module_doc([query.QuerySet, query.AggregateQuerySet])

17
scripts/generate_toc.sh Executable file
View File

@ -0,0 +1,17 @@
generate_one() {
# Converts Markdown to HTML using Pandoc, and then extracts the header tags
pandoc "$1" | python "../scripts/html_to_markdown_toc.py" "$1" >> toc.md
}
printf "# Table of Contents\n\n" > toc.md
generate_one "index.md"
generate_one "models_and_databases.md"
generate_one "querysets.md"
generate_one "field_types.md"
generate_one "table_engines.md"
generate_one "schema_migrations.md"
generate_one "system_models.md"
generate_one "contributing.md"
generate_one "class_reference.md"

View File

@ -0,0 +1,31 @@
from HTMLParser import HTMLParser
import sys
HEADER_TAGS = ('h1', 'h2', 'h3')
class HeadersToMarkdownParser(HTMLParser):
inside = None
text = ''
def handle_starttag(self, tag, attrs):
if tag.lower() in HEADER_TAGS:
self.inside = tag
def handle_endtag(self, tag):
if tag.lower() in HEADER_TAGS:
indent = ' ' * int(self.inside[1])
fragment = self.text.lower().replace(' ', '-')
print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment)
self.inside = None
self.text = ''
def handle_data(self, data):
if self.inside:
self.text += data
HeadersToMarkdownParser().feed(sys.stdin.read())
print

11
scripts/test_python3.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
cd /tmp
rm -rf /tmp/orm_env*
virtualenv -p python3 /tmp/orm_env
cd /tmp/orm_env
source bin/activate
pip install infi.projector
git clone https://github.com/Infinidat/infi.clickhouse_orm.git
cd infi.clickhouse_orm
projector devenv build
bin/nosetests

View File

@ -6,14 +6,14 @@ SETUP_INFO = dict(
author_email = '${infi.recipe.template.version:author_email}',
url = ${infi.recipe.template.version:homepage},
license = 'PSF',
license = 'BSD',
description = """${project:description}""",
# http://pypi.python.org/pypi?%3Aaction=list_classifiers
classifiers = [
"Intended Audience :: Developers",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: Python Software Foundation License",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 2.7",

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import requests
from collections import namedtuple
from .models import ModelBase
@ -16,39 +18,82 @@ Page = namedtuple('Page', 'objects number_of_objects pages_total number page_siz
class DatabaseException(Exception):
'''
Raised when a database operation fails.
'''
pass
class Database(object):
'''
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
'''
def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False):
def __init__(self, db_name, db_url='http://localhost:8123/',
username=None, password=None, readonly=False, autocreate=True):
'''
Initializes a database instance. Unless it's readonly, the database will be
created on the ClickHouse server if it does not already exist.
- `db_name`: name of the database to connect to.
- `db_url`: URL of the ClickHouse server.
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
'''
self.db_name = db_name
self.db_url = db_url
self.username = username
self.password = password
self.readonly = readonly
if not self.readonly:
self.readonly = False
self.db_exists = True
if readonly:
self.connection_readonly = self._is_connection_readonly()
self.readonly = True
elif autocreate:
self.db_exists = False
self.create_database()
self.server_timezone = self._get_server_timezone()
def create_database(self):
'''
Creates the database on the ClickHouse server if it does not already exist.
'''
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
self.db_exists = True
def drop_database(self):
'''
Deletes the database on the ClickHouse server.
'''
self._send('DROP DATABASE `%s`' % self.db_name)
def create_table(self, model_class):
'''
Creates a table for the given model class, if it does not exist already.
'''
# TODO check that model has an engine
if model_class.readonly:
raise DatabaseException("You can't create read only table")
self._send(model_class.create_table_sql(self.db_name))
def drop_table(self, model_class):
'''
Drops the database table of the given model class, if it exists.
'''
if model_class.readonly:
raise DatabaseException("You can't drop read only table")
self._send(model_class.drop_table_sql(self.db_name))
def insert(self, model_instances, batch_size=1000):
'''
Insert records into the database.
- `model_instances`: any iterable containing instances of a single model class.
- `batch_size`: number of records to send per chunk (use a lower number if your records are very large).
'''
from six import next
from io import BytesIO
i = iter(model_instances)
@ -86,6 +131,12 @@ class Database(object):
self._send(gen())
def count(self, model_class, conditions=None):
'''
Counts the number of records in the model's table.
- `model_class`: the model to count.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
'''
query = 'SELECT count() FROM $table'
if conditions:
query += ' WHERE ' + conditions
@ -94,6 +145,14 @@ class Database(object):
return int(r.text) if r.text else 0
def select(self, query, model_class=None, settings=None):
'''
Performs a query and returns a generator of model instances.
- `query`: the SQL query to execute.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `settings`: query settings to send as HTTP GET parameters
'''
query += ' FORMAT TabSeparatedWithNamesAndTypes'
query = self._substitute(query, model_class)
r = self._send(query, settings, True)
@ -107,17 +166,31 @@ class Database(object):
yield model_class.from_tsv(line, field_names, self.server_timezone, self)
def raw(self, query, settings=None, stream=False):
"""
Performs raw query to database. Returns its output
:param query: Query to execute
:param settings: Query settings to send as query GET parameters
:param stream: If flag is true, Http response from ClickHouse will be streamed.
:return: Query execution result
"""
'''
Performs a query and returns its output as text.
- `query`: the SQL query to execute.
- `settings`: query settings to send as HTTP GET parameters
- `stream`: if true, the HTTP response from ClickHouse will be streamed.
'''
query = self._substitute(query, None)
return self._send(query, settings=settings, stream=stream).text
def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None):
'''
Selects records and returns a single page of model instances.
- `model_class`: the model class matching the query's table,
or `None` for getting back instances of an ad-hoc model.
- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause).
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
- `conditions`: optional SQL conditions (contents of the WHERE clause).
- `settings`: query settings to send as HTTP GET parameters
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
'''
count = self.count(model_class, conditions)
pages_total = int(ceil(count / float(page_size)))
if page_num == -1:
@ -140,6 +213,13 @@ class Database(object):
)
def migrate(self, migrations_package_name, up_to=9999):
'''
Executes schema migrations.
- `migrations_package_name` - fully qualified name of the Python package
containing the migrations.
- `up_to` - number of the last migration to apply.
'''
from .migrations import MigrationHistory
logger = logging.getLogger('migrations')
applied_migrations = self._get_applied_migrations(migrations_package_name)
@ -161,7 +241,7 @@ class Database(object):
return set(obj.module_name for obj in self.select(query))
def _send(self, data, settings=None, stream=False):
if PY3 and isinstance(data, string_types):
if isinstance(data, string_types):
data = data.encode('utf-8')
params = self._build_params(settings)
r = requests.post(self.db_url, params=params, data=data, stream=stream)
@ -171,11 +251,14 @@ class Database(object):
def _build_params(self, settings):
params = dict(settings or {})
if self.db_exists:
params['database'] = self.db_name
if self.username:
params['user'] = self.username
if self.password:
params['password'] = self.password
if self.readonly:
# Send the readonly flag, unless the connection is already readonly (to prevent db error)
if self.readonly and not self.connection_readonly:
params['readonly'] = '1'
return params
@ -197,3 +280,7 @@ class Database(object):
except DatabaseException:
logger.exception('Cannot determine server timezone, assuming UTC')
return pytz.utc
def _is_connection_readonly(self):
r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'")
return r.text.strip() != '0'

View File

@ -1,8 +1,29 @@
from __future__ import unicode_literals
from .utils import comma_join
class Engine(object):
def create_table_sql(self):
raise NotImplementedError()
raise NotImplementedError() # pragma: no cover
class TinyLog(Engine):
def create_table_sql(self):
return 'TinyLog'
class Log(Engine):
def create_table_sql(self):
return 'Log'
class Memory(Engine):
def create_table_sql(self):
return 'Memory'
class MergeTree(Engine):
@ -23,7 +44,7 @@ class MergeTree(Engine):
if self.replica_name:
name = 'Replicated' + name
params = self._build_sql_params()
return '%s(%s)' % (name, ', '.join(params))
return '%s(%s)' % (name, comma_join(params))
def _build_sql_params(self):
params = []
@ -32,7 +53,7 @@ class MergeTree(Engine):
params.append(self.date_col)
if self.sampling_expr:
params.append(self.sampling_expr)
params.append('(%s)' % ', '.join(self.key_cols))
params.append('(%s)' % comma_join(self.key_cols))
params.append(str(self.index_granularity))
return params
@ -61,7 +82,7 @@ class SummingMergeTree(MergeTree):
def _build_sql_params(self):
params = super(SummingMergeTree, self)._build_sql_params()
if self.summing_cols:
params.append('(%s)' % ', '.join(self.summing_cols))
params.append('(%s)' % comma_join(self.summing_cols))
return params
@ -80,10 +101,12 @@ class ReplacingMergeTree(MergeTree):
class Buffer(Engine):
"""Here we define Buffer engine
Read more here https://clickhouse.yandex/reference_en.html#Buffer
"""
Buffers the data to write in RAM, periodically flushing it to another table.
Must be used in conjuction with a `BufferModel`.
Read more [here](https://clickhouse.yandex/reference_en.html#Buffer).
"""
#Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
def __init__(self, main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000):
self.main_model = main_model
@ -97,7 +120,7 @@ class Buffer(Engine):
def create_table_sql(self, db_name):
# Overriden create_table_sql example:
# Overriden create_table_sql example:
#sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)'
sql = 'ENGINE = Buffer(`%s`, `%s`, %d, %d, %d, %d, %d, %d, %d)' % (
db_name, self.main_model.table_name(), self.num_layers,

View File

@ -1,14 +1,17 @@
from __future__ import unicode_literals
from six import string_types, text_type, binary_type
import datetime
import pytz
import time
from calendar import timegm
from .utils import escape, parse_array
from .utils import escape, parse_array, comma_join
class Field(object):
'''
Abstract base class for all field types.
'''
creation_counter = 0
class_default = 0
db_type = None
@ -16,9 +19,9 @@ class Field(object):
def __init__(self, default=None, alias=None, materialized=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, str) and alias != "",\
assert alias is None or isinstance(alias, string_types) and alias != "",\
"Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, str) and alias != "",\
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
"Materialized field must be string, if given"
self.creation_counter = Field.creation_counter
@ -90,6 +93,24 @@ class StringField(Field):
raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value))
class FixedStringField(StringField):
def __init__(self, length, default=None, alias=None, materialized=None):
self._length = length
self.db_type = 'FixedString(%d)' % length
super(FixedStringField, self).__init__(default, alias, materialized)
def to_python(self, value, timezone_in_use):
value = super(FixedStringField, self).to_python(value, timezone_in_use)
return value.rstrip('\0')
def validate(self, value):
if isinstance(value, text_type):
value = value.encode('UTF-8')
if len(value) > self._length:
raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length))
class DateField(Field):
min_value = datetime.date(1970, 1, 1)
@ -147,7 +168,9 @@ class DateTimeField(Field):
class BaseIntField(Field):
'''
Abstract base class for all integer-type fields.
'''
def to_python(self, value, timezone_in_use):
try:
return int(value)
@ -155,7 +178,7 @@ class BaseIntField(Field):
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
@ -220,6 +243,9 @@ class Int64Field(BaseIntField):
class BaseFloatField(Field):
'''
Abstract base class for all float-type fields.
'''
def to_python(self, value, timezone_in_use):
try:
@ -228,7 +254,7 @@ class BaseFloatField(Field):
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
@ -244,6 +270,9 @@ class Float64Field(BaseFloatField):
class BaseEnumField(Field):
'''
Abstract base class for all enum-type fields.
'''
def __init__(self, enum_cls, default=None, alias=None, materialized=None):
self.enum_cls = enum_cls
@ -328,9 +357,38 @@ class ArrayField(Field):
def to_db_string(self, value, quote=True):
array = [self.inner_field.to_db_string(v, quote=True) for v in value]
return '[' + ', '.join(array) + ']'
return '[' + comma_join(array) + ']'
def get_sql(self, with_default=True):
from .utils import escape
return 'Array(%s)' % self.inner_field.get_sql(with_default=False)
class NullableField(Field):
class_default = None
def __init__(self, inner_field, default=None, alias=None, materialized=None,
extra_null_values=None):
self.inner_field = inner_field
self._null_values = [None]
if extra_null_values:
self._null_values.extend(extra_null_values)
super(NullableField, self).__init__(default, alias, materialized)
def to_python(self, value, timezone_in_use):
if value == '\\N' or value is None:
return None
return self.inner_field.to_python(value, timezone_in_use)
def validate(self, value):
value is None or self.inner_field.validate(value)
def to_db_string(self, value, quote=True):
if value in self._null_values:
return '\\N'
return self.inner_field.to_db_string(value, quote=quote)
def get_sql(self, with_default=True):
from .utils import escape
return 'Nullable(%s)' % self.inner_field.get_sql(with_default=False)

View File

@ -1,4 +1,4 @@
from .models import Model
from .models import Model, BufferModel
from .fields import DateField, StringField
from .engines import MergeTree
from .utils import escape
@ -15,7 +15,7 @@ class Operation(object):
'''
def apply(self, database):
raise NotImplementedError()
raise NotImplementedError() # pragma: no cover
class CreateTable(Operation):
@ -28,6 +28,8 @@ class CreateTable(Operation):
def apply(self, database):
logger.info(' Create table %s', self.model_class.table_name())
if issubclass(self.model_class, BufferModel):
database.create_table(self.model_class.engine.main_model)
database.create_table(self.model_class)

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
from logging import getLogger
from six import with_metaclass
@ -5,6 +6,7 @@ import pytz
from .fields import Field
from .utils import parse_tsv
from .query import QuerySet
logger = getLogger('clickhouse_orm')
@ -17,7 +19,7 @@ class ModelBase(type):
ad_hoc_model_cache = {}
def __new__(cls, name, bases, attrs):
new_cls = super(ModelBase, cls).__new__(cls, name, bases, attrs)
new_cls = super(ModelBase, cls).__new__(cls, str(name), bases, attrs)
# Collect fields from parent classes
base_fields = []
for base in bases:
@ -31,7 +33,7 @@ class ModelBase(type):
return new_cls
@classmethod
def create_ad_hoc_model(cls, fields):
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
# fields is a list of tuples (name, db_type)
# Check if model exists in cache
fields = list(fields)
@ -42,7 +44,7 @@ class ModelBase(type):
attrs = {}
for name, db_type in fields:
attrs[name] = cls.create_ad_hoc_field(db_type)
model_class = cls.__new__(cls, 'AdHocModel', (Model,), attrs)
model_class = cls.__new__(cls, model_name, (Model,), attrs)
# Add the model class to the cache
cls.ad_hoc_model_cache[cache_key] = model_class
return model_class
@ -57,6 +59,14 @@ class ModelBase(type):
if db_type.startswith('Array'):
inner_field = cls.create_ad_hoc_field(db_type[6 : -1])
return orm_fields.ArrayField(inner_field)
# FixedString
if db_type.startswith('FixedString'):
length = int(db_type[12 : -1])
return orm_fields.FixedStringField(length)
# Nullable
if db_type.startswith('Nullable'):
inner_field = cls.create_ad_hoc_field(db_type[9 : -1])
return orm_fields.NullableField(inner_field)
# Simple fields
name = db_type + 'Field'
if not hasattr(orm_fields, name):
@ -66,7 +76,13 @@ class ModelBase(type):
class Model(with_metaclass(ModelBase)):
'''
A base class for ORM models.
A base class for ORM models. Each model class represent a ClickHouse table. For example:
class CPUStats(Model):
timestamp = DateTimeField()
cpu_id = UInt16Field()
cpu_percent = Float32Field()
engine = Memory()
'''
engine = None
@ -76,8 +92,8 @@ class Model(with_metaclass(ModelBase)):
'''
Creates a model instance, using keyword arguments as field values.
Since values are immediately converted to their Pythonic type,
invalid values will cause a ValueError to be raised.
Unrecognized field names will cause an AttributeError.
invalid values will cause a `ValueError` to be raised.
Unrecognized field names will cause an `AttributeError`.
'''
super(Model, self).__init__()
@ -98,7 +114,7 @@ class Model(with_metaclass(ModelBase)):
def __setattr__(self, name, value):
'''
When setting a field value, converts the value to its Pythonic type and validates it.
This may raise a ValueError.
This may raise a `ValueError`.
'''
field = self.get_field(name)
if field:
@ -107,26 +123,25 @@ class Model(with_metaclass(ModelBase)):
super(Model, self).__setattr__(name, value)
def set_database(self, db):
"""
Sets _database attribute for current model instance
:param db: Database instance
:return: None
"""
'''
Sets the `Database` that this model instance belongs to.
This is done automatically when the instance is read from the database or written to it.
'''
# This can not be imported globally due to circular import
from .database import Database
assert isinstance(db, Database), "database must be database.Database instance"
self._database = db
def get_database(self):
"""
Gets _database attribute for current model instance
:return: database.Database instance, model was inserted or selected from or None
"""
'''
Gets the `Database` that this model instance belongs to.
Returns `None` unless the instance was read from the database or written to it.
'''
return self._database
def get_field(self, name):
'''
Get a Field instance given its name, or None if not found.
Gets a `Field` instance given its name, or `None` if not found.
'''
field = getattr(self.__class__, name, None)
return field if isinstance(field, Field) else None
@ -134,7 +149,9 @@ class Model(with_metaclass(ModelBase)):
@classmethod
def table_name(cls):
'''
Returns the model's database table name.
Returns the model's database table name. By default this is the
class name converted to lowercase. Override this if you want to use
a different table name.
'''
return cls.__name__.lower()
@ -163,9 +180,13 @@ class Model(with_metaclass(ModelBase)):
def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc, database=None):
'''
Create a model instance from a tab-separated line. The line may or may not include a newline.
The field_names list must match the fields defined in the model, but does not have to include all of them.
The `field_names` list must match the fields defined in the model, but does not have to include all of them.
If omitted, it is assumed to be the names of all fields in the model, in order of definition.
:param database: if given, model receives database
- `line`: the TSV-formatted data.
- `field_names`: names of the model fields in the data.
- `timezone_in_use`: the timezone to use when parsing dates and datetimes.
- `database`: if given, sets the database that this instance belongs to.
'''
from six import next
field_names = field_names or [name for name, field in cls._fields]
@ -184,7 +205,8 @@ class Model(with_metaclass(ModelBase)):
def to_tsv(self, include_readonly=True):
'''
Returns the instance's column values as a tab-separated line. A newline is not included.
:param bool include_readonly: If False, returns only fields, that can be inserted into database
- `include_readonly`: if false, returns only fields that can be inserted into database.
'''
data = self.__dict__
fields = self._fields if include_readonly else self._writable_fields
@ -193,8 +215,9 @@ class Model(with_metaclass(ModelBase)):
def to_dict(self, include_readonly=True, field_names=None):
'''
Returns the instance's column values as a dict.
:param bool include_readonly: If False, returns only fields, that can be inserted into database
:param field_names: An iterable of field names to return
- `include_readonly`: if false, returns only fields that can be inserted into database.
- `field_names`: an iterable of field names to return (optional)
'''
fields = self._fields if include_readonly else self._writable_fields
@ -204,7 +227,14 @@ class Model(with_metaclass(ModelBase)):
data = self.__dict__
return {name: data[name] for name, field in fields}
@classmethod
def objects_in(cls, database):
'''
Returns a `QuerySet` for selecting instances of this model class.
'''
return QuerySet(cls, database)
class BufferModel(Model):
@classmethod

View File

@ -0,0 +1,425 @@
from __future__ import unicode_literals
import six
import pytz
from copy import copy
from math import ceil
from .utils import comma_join
# TODO
# - and/or between Q objects
# - check that field names are valid
# - operators for arrays: length, has, empty
class Operator(object):
"""
Base class for filtering operators.
"""
def to_sql(self, model_cls, field_name, value):
"""
Subclasses should implement this method. It returns an SQL string
that applies this operator on the given field and value.
"""
raise NotImplementedError # pragma: no cover
class SimpleOperator(Operator):
"""
A simple binary operator such as a=b, a<b, a>b etc.
"""
def __init__(self, sql_operator):
self._sql_operator = sql_operator
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc))
return ' '.join([field_name, self._sql_operator, value])
class InOperator(Operator):
"""
An operator that implements IN.
Accepts 3 different types of values:
- a list or tuple of simple values
- a string (used verbatim as the contents of the parenthesis)
- a queryset (subquery)
"""
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
if isinstance(value, QuerySet):
value = value.as_sql()
elif isinstance(value, six.string_types):
pass
else:
value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value])
return '%s IN (%s)' % (field_name, value)
class LikeOperator(Operator):
"""
A LIKE operator that matches the field to a given pattern. Can be
case sensitive or insensitive.
"""
def __init__(self, pattern, case_sensitive=True):
self._pattern = pattern
self._case_sensitive = case_sensitive
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc), quote=False)
value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_')
pattern = self._pattern.format(value)
if self._case_sensitive:
return '%s LIKE \'%s\'' % (field_name, pattern)
else:
return 'lowerUTF8(%s) LIKE lowerUTF8(\'%s\')' % (field_name, pattern)
class IExactOperator(Operator):
"""
An operator for case insensitive string comparison.
"""
def to_sql(self, model_cls, field_name, value):
field = getattr(model_cls, field_name)
value = field.to_db_string(field.to_python(value, pytz.utc))
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
class NotOperator(Operator):
"""
A wrapper around another operator, which negates it.
"""
def __init__(self, base_operator):
self._base_operator = base_operator
def to_sql(self, model_cls, field_name, value):
# Negate the base operator
return 'NOT (%s)' % self._base_operator.to_sql(model_cls, field_name, value)
# Define the set of builtin operators
_operators = {}
def register_operator(name, sql):
_operators[name] = sql
register_operator('eq', SimpleOperator('='))
register_operator('ne', SimpleOperator('!='))
register_operator('gt', SimpleOperator('>'))
register_operator('gte', SimpleOperator('>='))
register_operator('lt', SimpleOperator('<'))
register_operator('lte', SimpleOperator('<='))
register_operator('in', InOperator())
register_operator('not_in', NotOperator(InOperator()))
register_operator('contains', LikeOperator('%{}%'))
register_operator('startswith', LikeOperator('{}%'))
register_operator('endswith', LikeOperator('%{}'))
register_operator('icontains', LikeOperator('%{}%', False))
register_operator('istartswith', LikeOperator('{}%', False))
register_operator('iendswith', LikeOperator('%{}', False))
register_operator('iexact', IExactOperator())
class FOV(object):
"""
An object for storing Field + Operator + Value.
"""
def __init__(self, field_name, operator, value):
self._field_name = field_name
self._operator = _operators[operator]
self._value = value
def to_sql(self, model_cls):
return self._operator.to_sql(model_cls, self._field_name, self._value)
class Q(object):
def __init__(self, **kwargs):
self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(kwargs)]
self._negate = False
def _build_fov(self, key, value):
if '__' in key:
field_name, operator = key.rsplit('__', 1)
else:
field_name, operator = key, 'eq'
return FOV(field_name, operator, value)
def to_sql(self, model_cls):
if not self._fovs:
return '1'
sql = ' AND '.join(fov.to_sql(model_cls) for fov in self._fovs)
if self._negate:
sql = 'NOT (%s)' % sql
return sql
def __invert__(self):
q = copy(self)
q._negate = True
return q
@six.python_2_unicode_compatible
class QuerySet(object):
"""
A queryset is an object that represents a database query using a specific `Model`.
It is lazy, meaning that it does not hit the database until you iterate over its
matching rows (model instances).
"""
def __init__(self, model_cls, database):
"""
Initializer. It is possible to create a queryset like this, but the standard
way is to use `MyModel.objects_in(database)`.
"""
self._model_cls = model_cls
self._database = database
self._order_by = []
self._q = []
self._fields = []
self._limits = None
def __iter__(self):
"""
Iterates over the model instances matching this queryset
"""
return self._database.select(self.as_sql(), self._model_cls)
def __bool__(self):
"""
Returns true if this queryset matches any rows.
"""
return bool(self.count())
def __nonzero__(self): # Python 2 compatibility
return type(self).__bool__(self)
def __str__(self):
return self.as_sql()
def __getitem__(self, s):
if isinstance(s, six.integer_types):
# Single index
assert s >= 0, 'negative indexes are not supported'
qs = copy(self)
qs._limits = (s, 1)
return six.next(iter(qs))
else:
# Slice
assert s.step in (None, 1), 'step is not supported in slices'
start = s.start or 0
stop = s.stop or 2**63 - 1
assert start >= 0 and stop >= 0, 'negative indexes are not supported'
assert start <= stop, 'start of slice cannot be smaller than its end'
qs = copy(self)
qs._limits = (start, stop - start)
return qs
def as_sql(self):
"""
Returns the whole query as a SQL string.
"""
fields = '*'
if self._fields:
fields = comma_join('`%s`' % field for field in self._fields)
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
limit = '\nLIMIT %d, %d' % self._limits if self._limits else ''
params = (fields, self._model_cls.table_name(),
self.conditions_as_sql(), ordering, limit)
return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params
def order_by_as_sql(self):
"""
Returns the contents of the query's `ORDER BY` clause as a string.
"""
return comma_join([
'%s DESC' % field[1:] if field[0] == '-' else field
for field in self._order_by
])
def conditions_as_sql(self):
"""
Returns the contents of the query's `WHERE` clause as a string.
"""
if self._q:
return u' AND '.join([q.to_sql(self._model_cls) for q in self._q])
else:
return u'1'
def count(self):
"""
Returns the number of matching model instances.
"""
return self._database.count(self._model_cls, self.conditions_as_sql())
def order_by(self, *field_names):
"""
Returns a copy of this queryset with the ordering changed.
"""
qs = copy(self)
qs._order_by = field_names
return qs
def only(self, *field_names):
"""
Returns a copy of this queryset limited to the specified field names.
Useful when there are large fields that are not needed,
or for creating a subquery to use with an IN operator.
"""
qs = copy(self)
qs._fields = field_names
return qs
def filter(self, **kwargs):
"""
Returns a copy of this queryset that includes only rows matching the conditions.
"""
qs = copy(self)
qs._q = list(self._q) + [Q(**kwargs)]
return qs
def exclude(self, **kwargs):
"""
Returns a copy of this queryset that excludes all rows matching the conditions.
"""
qs = copy(self)
qs._q = list(self._q) + [~Q(**kwargs)]
return qs
def paginate(self, page_num=1, page_size=100):
'''
Returns a single page of model instances that match the queryset.
Note that `order_by` should be used first, to ensure a correct
partitioning of records into pages.
- `page_num`: the page number (1-based), or -1 to get the last page.
- `page_size`: number of records to return per page.
The result is a namedtuple containing `objects` (list), `number_of_objects`,
`pages_total`, `number` (of the current page), and `page_size`.
'''
from .database import Page
count = self.count()
pages_total = int(ceil(count / float(page_size)))
if page_num == -1:
page_num = pages_total
elif page_num < 1:
raise ValueError('Invalid page number: %d' % page_num)
offset = (page_num - 1) * page_size
return Page(
objects=list(self[offset : offset + page_size]),
number_of_objects=count,
pages_total=pages_total,
number=page_num,
page_size=page_size
)
def aggregate(self, *args, **kwargs):
'''
Returns an `AggregateQuerySet` over this query, with `args` serving as
grouping fields and `kwargs` serving as calculated fields. At least one
calculated field is required. For example:
```
Event.objects_in(database).filter(date__gt='2017-08-01').aggregate('event_type', count='count()')
```
is equivalent to:
```
SELECT event_type, count() AS count FROM event
WHERE data > '2017-08-01'
GROUP BY event_type
```
'''
return AggregateQuerySet(self, args, kwargs)
class AggregateQuerySet(QuerySet):
"""
A queryset used for aggregation.
"""
def __init__(self, base_qs, grouping_fields, calculated_fields):
"""
Initializer. Normally you should not call this but rather use `QuerySet.aggregate()`.
The grouping fields should be a list/tuple of field names from the model. For example:
```
('event_type', 'event_subtype')
```
The calculated fields should be a mapping from name to a ClickHouse aggregation function. For example:
```
{'weekday': 'toDayOfWeek(event_date)', 'number_of_events': 'count()'}
```
At least one calculated field is required.
"""
super(AggregateQuerySet, self).__init__(base_qs._model_cls, base_qs._database)
assert calculated_fields, 'No calculated fields specified for aggregation'
self._fields = grouping_fields
self._grouping_fields = grouping_fields
self._calculated_fields = calculated_fields
self._order_by = list(base_qs._order_by)
self._q = list(base_qs._q)
self._limits = base_qs._limits
def group_by(self, *args):
"""
This method lets you specify the grouping fields explicitly. The `args` must
be names of grouping fields or calculated fields that this queryset was
created with.
"""
for name in args:
assert name in self._fields or name in self._calculated_fields, \
'Cannot group by `%s` since it is not included in the query' % name
qs = copy(self)
qs._grouping_fields = args
return qs
def only(self, *field_names):
"""
This method is not supported on `AggregateQuerySet`.
"""
raise NotImplementedError('Cannot use "only" with AggregateQuerySet')
def aggregate(self, *args, **kwargs):
"""
This method is not supported on `AggregateQuerySet`.
"""
raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet')
def as_sql(self):
"""
Returns the whole query as a SQL string.
"""
grouping = comma_join('`%s`' % field for field in self._grouping_fields)
fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()])
params = dict(
grouping=grouping or "''",
fields=fields,
table=self._model_cls.table_name(),
conds=self.conditions_as_sql()
)
sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params
if self._order_by:
sql += '\nORDER BY ' + self.order_by_as_sql()
if self._limits:
sql += '\nLIMIT %d, %d' % self._limits
return sql
def __iter__(self):
return self._database.select(self.as_sql()) # using an ad-hoc model
def count(self):
"""
Returns the number of rows after aggregation.
"""
sql = u'SELECT count() FROM (%s)' % self.as_sql()
raw = self._database.raw(sql)
return int(raw) if raw else 0

View File

@ -2,9 +2,13 @@
This file contains system readonly models that can be got from database
https://clickhouse.yandex/reference_en.html#System tables
"""
from __future__ import unicode_literals
from six import string_types
from .database import Database
from .fields import *
from .models import Model
from .utils import comma_join
class SystemPart(Model):
@ -62,7 +66,7 @@ class SystemPart(Model):
:return: Operation execution result
"""
operation = operation.upper()
assert operation in self.OPERATIONS, "operation must be in [%s]" % ', '.join(self.OPERATIONS)
assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS)
sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (self._database.db_name, self.table, operation, self.partition)
if from_part is not None:
sql += " FROM %s" % from_part
@ -118,7 +122,7 @@ class SystemPart(Model):
:return: A list of SystemPart objects
"""
assert isinstance(database, Database), "database must be database.Database class instance"
assert isinstance(conditions, str), "conditions must be a string"
assert isinstance(conditions, string_types), "conditions must be a string"
if conditions:
conditions += " AND"
field_names = ','.join([f[0] for f in cls._fields])

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
from six import string_types, binary_type, text_type, PY3
import codecs
import re
@ -21,7 +22,7 @@ SPECIAL_CHARS_REGEX = re.compile("[" + ''.join(SPECIAL_CHARS.values()) + "]")
def escape(value, quote=True):
'''
If the value is a string, escapes any special characters and optionally
surrounds it with single quotes. If the value is not a string (e.g. a number),
surrounds it with single quotes. If the value is not a string (e.g. a number),
converts it to one.
'''
def escape_one(match):
@ -43,7 +44,7 @@ def parse_tsv(line):
line = line.decode()
if line and line[-1] == '\n':
line = line[:-1]
return [unescape(value) for value in line.split('\t')]
return [unescape(value) for value in line.split(str('\t'))]
def parse_array(array_string):
@ -56,7 +57,7 @@ def parse_array(array_string):
if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']':
raise ValueError('Invalid array string: "%s"' % array_string)
# Drop opening brace
array_string = array_string[1:]
array_string = array_string[1:]
# Go over the string, lopping off each value at the beginning until nothing is left
values = []
while True:
@ -65,7 +66,7 @@ def parse_array(array_string):
return values
elif array_string[0] in ', ':
# In between values
array_string = array_string[1:]
array_string = array_string[1:]
elif array_string[0] == "'":
# Start of quoted value, find its end
match = re.search(r"[^\\]'", array_string)
@ -90,3 +91,10 @@ def import_submodules(package_name):
name: importlib.import_module(package_name + '.' + name)
for _, name, _ in pkgutil.iter_modules(package.__path__)
}
def comma_join(items):
"""
Joins an iterable of strings with commas.
"""
return ', '.join(items)

View File

@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
import logging
logging.getLogger("requests").setLevel(logging.WARNING)
class TestCaseWithData(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(Person)
def tearDown(self):
self.database.drop_table(Person)
self.database.drop_database()
def _insert_and_check(self, data, count):
self.database.insert(data)
self.assertEquals(count, self.database.count(Person))
for instance in data:
self.assertEquals(self.database, instance.get_database())
def _sample_data(self):
for entry in data:
yield Person(**entry)
class Person(Model):
first_name = StringField()
last_name = StringField()
birthday = DateField()
height = Float32Field()
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
data = [
{"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"},
{"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"},
{"first_name": "Adena", "last_name": "Norman", "birthday": "1979-05-14", "height": "1.66"},
{"first_name": "Aline", "last_name": "Crane", "birthday": "1988-05-01", "height": "1.62"},
{"first_name": "Althea", "last_name": "Barrett", "birthday": "2004-07-28", "height": "1.71"},
{"first_name": "Amanda", "last_name": "Vang", "birthday": "1973-02-23", "height": "1.68"},
{"first_name": "Angela", "last_name": "Sanders", "birthday": "2016-01-08", "height": "1.66"},
{"first_name": "Anne", "last_name": "Rasmussen", "birthday": "1995-04-03", "height": "1.77"},
{"first_name": "Ariana", "last_name": "Cole", "birthday": "1977-12-20", "height": "1.72"},
{"first_name": "Ashton", "last_name": "Fuller", "birthday": "1995-11-17", "height": "1.75"},
{"first_name": "Ava", "last_name": "Sanders", "birthday": "1997-08-10", "height": "1.60"},
{"first_name": "Barrett", "last_name": "Clemons", "birthday": "1985-07-03", "height": "1.71"},
{"first_name": "Beatrice", "last_name": "Gregory", "birthday": "1992-01-19", "height": "1.80"},
{"first_name": "Buffy", "last_name": "Webb", "birthday": "1990-03-06", "height": "1.68"},
{"first_name": "Callie", "last_name": "Wiley", "birthday": "1987-11-24", "height": "1.69"},
{"first_name": "Cara", "last_name": "Fox", "birthday": "2004-05-15", "height": "1.71"},
{"first_name": "Caryn", "last_name": "Sears", "birthday": "1999-02-17", "height": "1.71"},
{"first_name": "Cassady", "last_name": "Knapp", "birthday": "1977-12-15", "height": "1.72"},
{"first_name": "Cassady", "last_name": "Rogers", "birthday": "2013-11-04", "height": "1.71"},
{"first_name": "Catherine", "last_name": "Hicks", "birthday": "1989-05-23", "height": "1.80"},
{"first_name": "Cathleen", "last_name": "Frank", "birthday": "1977-09-04", "height": "1.61"},
{"first_name": "Celeste", "last_name": "James", "birthday": "1990-03-08", "height": "1.67"},
{"first_name": "Chelsea", "last_name": "Castro", "birthday": "2001-08-10", "height": "1.71"},
{"first_name": "Ciaran", "last_name": "Carver", "birthday": "2016-12-25", "height": "1.76"},
{"first_name": "Ciaran", "last_name": "Hurley", "birthday": "1995-10-25", "height": "1.65"},
{"first_name": "Clementine", "last_name": "Moon", "birthday": "1994-03-29", "height": "1.73"},
{"first_name": "Connor", "last_name": "Jenkins", "birthday": "1999-07-23", "height": "1.67"},
{"first_name": "Courtney", "last_name": "Cannon", "birthday": "1997-10-26", "height": "1.76"},
{"first_name": "Courtney", "last_name": "Hoffman", "birthday": "1994-11-07", "height": "1.65"},
{"first_name": "Denton", "last_name": "Sanchez", "birthday": "1971-10-16", "height": "1.72"},
{"first_name": "Dominique", "last_name": "Sandoval", "birthday": "1972-02-01", "height": "1.72"},
{"first_name": "Dora", "last_name": "Cabrera", "birthday": "2016-04-26", "height": "1.68"},
{"first_name": "Eagan", "last_name": "Dodson", "birthday": "2015-10-22", "height": "1.67"},
{"first_name": "Edan", "last_name": "Dennis", "birthday": "1989-09-18", "height": "1.73"},
{"first_name": "Ella", "last_name": "Castillo", "birthday": "1973-03-28", "height": "1.73"},
{"first_name": "Elton", "last_name": "Ayers", "birthday": "1994-06-20", "height": "1.68"},
{"first_name": "Elton", "last_name": "Smith", "birthday": "1982-06-20", "height": "1.66"},
{"first_name": "Emma", "last_name": "Clements", "birthday": "1996-08-07", "height": "1.75"},
{"first_name": "Evangeline", "last_name": "Weber", "birthday": "1984-06-03", "height": "1.70"},
{"first_name": "Faith", "last_name": "Emerson", "birthday": "1989-12-30", "height": "1.62"},
{"first_name": "Fritz", "last_name": "Atkinson", "birthday": "2011-06-15", "height": "1.73"},
{"first_name": "Galvin", "last_name": "Phillips", "birthday": "2004-01-17", "height": "1.74"},
{"first_name": "Georgia", "last_name": "Kennedy", "birthday": "1974-12-29", "height": "1.66"},
{"first_name": "Griffith", "last_name": "Henry", "birthday": "1985-04-02", "height": "1.66"},
{"first_name": "Hedy", "last_name": "Strong", "birthday": "2001-10-04", "height": "1.60"},
{"first_name": "Hu", "last_name": "May", "birthday": "1976-10-01", "height": "1.76"},
{"first_name": "Hyacinth", "last_name": "Kent", "birthday": "1971-07-18", "height": "1.72"},
{"first_name": "Idola", "last_name": "Fulton", "birthday": "1974-11-27", "height": "1.66"},
{"first_name": "Jarrod", "last_name": "Gibbs", "birthday": "1987-06-13", "height": "1.62"},
{"first_name": "Jesse", "last_name": "Gomez", "birthday": "2011-01-28", "height": "1.71"},
{"first_name": "Josiah", "last_name": "Hodges", "birthday": "2011-09-04", "height": "1.68"},
{"first_name": "Karleigh", "last_name": "Bartlett", "birthday": "1991-10-24", "height": "1.69"},
{"first_name": "Keelie", "last_name": "Mathis", "birthday": "1993-10-26", "height": "1.69"},
{"first_name": "Kieran", "last_name": "Solomon", "birthday": "1993-10-30", "height": "1.69"},
{"first_name": "Laith", "last_name": "Howell", "birthday": "1991-07-07", "height": "1.70"},
{"first_name": "Leroy", "last_name": "Pacheco", "birthday": "1998-12-30", "height": "1.70"},
{"first_name": "Lesley", "last_name": "Stephenson", "birthday": "2010-04-10", "height": "1.64"},
{"first_name": "Macaulay", "last_name": "Rowe", "birthday": "1982-03-02", "height": "1.68"},
{"first_name": "Macey", "last_name": "Griffin", "birthday": "1971-09-18", "height": "1.63"},
{"first_name": "Madeline", "last_name": "Kidd", "birthday": "1984-12-09", "height": "1.69"},
{"first_name": "Maia", "last_name": "Hyde", "birthday": "1972-06-09", "height": "1.74"},
{"first_name": "Mary", "last_name": "Kirkland", "birthday": "1987-10-09", "height": "1.73"},
{"first_name": "Molly", "last_name": "Salas", "birthday": "1994-04-23", "height": "1.70"},
{"first_name": "Montana", "last_name": "Bruce", "birthday": "1982-06-28", "height": "1.66"},
{"first_name": "Naomi", "last_name": "Hays", "birthday": "2004-11-27", "height": "1.70"},
{"first_name": "Norman", "last_name": "Santos", "birthday": "1989-01-10", "height": "1.68"},
{"first_name": "Octavius", "last_name": "Floyd", "birthday": "1985-02-22", "height": "1.68"},
{"first_name": "Odette", "last_name": "Mcneil", "birthday": "1978-05-21", "height": "1.76"},
{"first_name": "Oliver", "last_name": "Ashley", "birthday": "2004-08-13", "height": "1.68"},
{"first_name": "Quon", "last_name": "Wiggins", "birthday": "1992-05-06", "height": "1.74"},
{"first_name": "Rafael", "last_name": "Parker", "birthday": "2016-01-24", "height": "1.76"},
{"first_name": "Reese", "last_name": "Noel", "birthday": "1996-11-04", "height": "1.77"},
{"first_name": "Rhona", "last_name": "Camacho", "birthday": "1976-12-17", "height": "1.59"},
{"first_name": "Rigel", "last_name": "Oneal", "birthday": "1993-11-05", "height": "1.63"},
{"first_name": "Roary", "last_name": "Simmons", "birthday": "1986-07-23", "height": "1.63"},
{"first_name": "Russell", "last_name": "Pruitt", "birthday": "1979-05-04", "height": "1.63"},
{"first_name": "Sawyer", "last_name": "Fischer", "birthday": "1995-04-01", "height": "1.78"},
{"first_name": "Scarlett", "last_name": "Durham", "birthday": "2005-09-29", "height": "1.65"},
{"first_name": "Seth", "last_name": "Serrano", "birthday": "2017-06-02", "height": "1.71"},
{"first_name": "Shad", "last_name": "Bradshaw", "birthday": "1998-08-25", "height": "1.72"},
{"first_name": "Shana", "last_name": "Jarvis", "birthday": "1997-05-21", "height": "1.72"},
{"first_name": "Sharon", "last_name": "Shelton", "birthday": "1970-05-02", "height": "1.65"},
{"first_name": "Shoshana", "last_name": "Solis", "birthday": "1998-07-18", "height": "1.65"},
{"first_name": "Stephen", "last_name": "Baxter", "birthday": "2004-09-24", "height": "1.74"},
{"first_name": "Sydney", "last_name": "Stevens", "birthday": "1989-07-11", "height": "1.70"},
{"first_name": "Tasha", "last_name": "Campos", "birthday": "1984-02-11", "height": "1.72"},
{"first_name": "Ulla", "last_name": "Arnold", "birthday": "1990-06-04", "height": "1.63"},
{"first_name": "Vaughan", "last_name": "Schmidt", "birthday": "1985-06-19", "height": "1.61"},
{"first_name": "Velma", "last_name": "English", "birthday": "1999-01-18", "height": "1.65"},
{"first_name": "Venus", "last_name": "Hurst", "birthday": "1993-10-22", "height": "1.72"},
{"first_name": "Victor", "last_name": "Woods", "birthday": "1989-06-23", "height": "1.67"},
{"first_name": "Victoria", "last_name": "Slater", "birthday": "2009-07-19", "height": "1.72"},
{"first_name": "Wang", "last_name": "Goodwin", "birthday": "1983-05-15", "height": "1.66"},
{"first_name": "Warren", "last_name": "Bowen", "birthday": "2000-07-20", "height": "1.76"},
{"first_name": "Warren", "last_name": "Dudley", "birthday": "1995-10-23", "height": "1.59"},
{"first_name": "Whilemina", "last_name": "Blankenship", "birthday": "1970-07-14", "height": "1.66"},
{"first_name": "Whitney", "last_name": "Durham", "birthday": "1977-09-15", "height": "1.72"},
{"first_name": "Whitney", "last_name": "Scott", "birthday": "1971-07-04", "height": "1.70"},
{"first_name": "Wynter", "last_name": "Garcia", "birthday": "1975-01-10", "height": "1.69"},
{"first_name": "Yolanda", "last_name": "Duke", "birthday": "1997-02-25", "height": "1.74"}
];

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from datetime import date
@ -60,10 +61,8 @@ class ModelWithAliasFields(Model):
date_field = DateField()
str_field = StringField()
alias_str = StringField(alias='str_field')
alias_str = StringField(alias=u'str_field')
alias_int = Int32Field(alias='int_field')
alias_date = DateField(alias='date_field')
engine = MergeTree('date_field', ('date_field',))

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from datetime import date
@ -18,8 +19,8 @@ class ArrayFieldsTest(unittest.TestCase):
def test_insert_and_select(self):
instance = ModelWithArrays(
date_field='2016-08-30',
arr_str=['goodbye,', 'cruel', 'world', 'special chars: ,"\\\'` \n\t\\[]'],
date_field='2016-08-30',
arr_str=['goodbye,', 'cruel', 'world', 'special chars: ,"\\\'` \n\t\\[]'],
arr_date=['2010-01-01']
)
self.database.insert([instance])
@ -52,11 +53,11 @@ class ArrayFieldsTest(unittest.TestCase):
self.assertEquals(parse_array("[1, 2, 395, -44]"), ["1", "2", "395", "-44"])
self.assertEquals(parse_array("['big','mouse','','!']"), ["big", "mouse", "", "!"])
self.assertEquals(parse_array(unescape("['\\r\\n\\0\\t\\b']")), ["\r\n\0\t\b"])
for s in ("",
"[",
"]",
"[1, 2",
"3, 4]",
for s in ("",
"[",
"]",
"[1, 2",
"3, 4]",
"['aaa', 'aaa]"):
with self.assertRaises(ValueError):
parse_array(s)
@ -70,4 +71,3 @@ class ModelWithArrays(Model):
arr_date = ArrayField(DateField())
engine = MergeTree('date_field', ('date_field',))

27
tests/test_buffer.py Normal file
View File

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.models import BufferModel
from infi.clickhouse_orm.engines import *
from .base_test_with_data import *
class BufferTestCase(TestCaseWithData):
def _insert_and_check_buffer(self, data, count):
self.database.insert(data)
self.assertEquals(count, self.database.count(PersonBuffer))
def _sample_buffer_data(self):
for entry in data:
yield PersonBuffer(**entry)
def test_insert_buffer(self):
self.database.create_table(PersonBuffer)
self._insert_and_check_buffer(self._sample_buffer_data(), len(data))
class PersonBuffer(BufferModel, Person):
engine = Buffer(Person)

View File

@ -1,37 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model, BufferModel
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
import logging
logging.getLogger("requests").setLevel(logging.WARNING)
from .base_test_with_data import *
class DatabaseTestCase(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(Person)
self.database.create_table(PersonBuffer)
def tearDown(self):
self.database.drop_table(PersonBuffer)
self.database.drop_table(Person)
self.database.drop_database()
def _insert_and_check(self, data, count):
self.database.insert(data)
self.assertEquals(count, self.database.count(Person))
for instance in data:
self.assertEquals(self.database, instance.get_database())
def _insert_and_check_buffer(self, data, count):
self.database.insert(data)
self.assertEquals(count, self.database.count(PersonBuffer))
class DatabaseTestCase(TestCaseWithData):
def test_insert__generator(self):
self._insert_and_check(self._sample_data(), len(data))
@ -124,7 +99,7 @@ class DatabaseTestCase(unittest.TestCase):
page_a = self.database.paginate(Person, 'first_name, last_name', -1, page_size)
page_b = self.database.paginate(Person, 'first_name, last_name', page_a.pages_total, page_size)
self.assertEquals(page_a[1:], page_b[1:])
self.assertEquals([obj.to_tsv() for obj in page_a.objects],
self.assertEquals([obj.to_tsv() for obj in page_a.objects],
[obj.to_tsv() for obj in page_b.objects])
def test_pagination_invalid_page(self):
@ -133,6 +108,11 @@ class DatabaseTestCase(unittest.TestCase):
with self.assertRaises(ValueError):
self.database.paginate(Person, 'first_name, last_name', page_num, 100)
def test_pagination_with_conditions(self):
self._insert_and_check(self._sample_data(), len(data))
page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'")
self.assertEquals(page.number_of_objects, 10)
def test_special_chars(self):
s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r'
p = Person(first_name=s)
@ -140,171 +120,21 @@ class DatabaseTestCase(unittest.TestCase):
p = list(self.database.select("SELECT * from $table", Person))[0]
self.assertEquals(p.first_name, s)
def test_readonly(self):
orig_database = self.database
self.database = Database(orig_database.db_name, readonly=True)
with self.assertRaises(DatabaseException):
self._insert_and_check(self._sample_data(), len(data))
self.assertEquals(self.database.count(Person), 0)
with self.assertRaises(DatabaseException):
self.database.drop_table(Person)
with self.assertRaises(DatabaseException):
self.database.drop_database()
self.database = orig_database
def test_insert_buffer(self):
self._insert_and_check_buffer(self._sample_buffer_data(), len(data))
def _sample_data(self):
for entry in data:
yield Person(**entry)
def test_raw(self):
self._insert_and_check(self._sample_data(), len(data))
query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name"
results = self.database.raw(query)
self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n")
def test_insert_readonly(self):
m = ReadOnlyModel(name='readonly')
def test_invalid_user(self):
with self.assertRaises(DatabaseException):
self.database.insert([m])
Database(self.database.db_name, username='default', password='wrong')
def test_create_readonly_table(self):
def test_nonexisting_db(self):
db = Database('db_not_here', autocreate=False)
with self.assertRaises(DatabaseException):
self.database.create_table(ReadOnlyModel)
db.create_table(Person)
def test_drop_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.drop_table(ReadOnlyModel)
def _sample_buffer_data(self):
for entry in data:
yield PersonBuffer(**entry)
class Person(Model):
first_name = StringField()
last_name = StringField()
birthday = DateField()
height = Float32Field()
engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
class ReadOnlyModel(Model):
readonly = True
name = StringField()
class PersonBuffer(BufferModel, Person):
engine = Buffer(Person)
data = [
{"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"},
{"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"},
{"first_name": "Adena", "last_name": "Norman", "birthday": "1979-05-14", "height": "1.66"},
{"first_name": "Aline", "last_name": "Crane", "birthday": "1988-05-01", "height": "1.62"},
{"first_name": "Althea", "last_name": "Barrett", "birthday": "2004-07-28", "height": "1.71"},
{"first_name": "Amanda", "last_name": "Vang", "birthday": "1973-02-23", "height": "1.68"},
{"first_name": "Angela", "last_name": "Sanders", "birthday": "2016-01-08", "height": "1.66"},
{"first_name": "Anne", "last_name": "Rasmussen", "birthday": "1995-04-03", "height": "1.77"},
{"first_name": "Ariana", "last_name": "Cole", "birthday": "1977-12-20", "height": "1.72"},
{"first_name": "Ashton", "last_name": "Fuller", "birthday": "1995-11-17", "height": "1.75"},
{"first_name": "Ava", "last_name": "Sanders", "birthday": "1997-08-10", "height": "1.60"},
{"first_name": "Barrett", "last_name": "Clemons", "birthday": "1985-07-03", "height": "1.71"},
{"first_name": "Beatrice", "last_name": "Gregory", "birthday": "1992-01-19", "height": "1.80"},
{"first_name": "Buffy", "last_name": "Webb", "birthday": "1990-03-06", "height": "1.68"},
{"first_name": "Callie", "last_name": "Wiley", "birthday": "1987-11-24", "height": "1.69"},
{"first_name": "Cara", "last_name": "Fox", "birthday": "2004-05-15", "height": "1.71"},
{"first_name": "Caryn", "last_name": "Sears", "birthday": "1999-02-17", "height": "1.71"},
{"first_name": "Cassady", "last_name": "Knapp", "birthday": "1977-12-15", "height": "1.72"},
{"first_name": "Cassady", "last_name": "Rogers", "birthday": "2013-11-04", "height": "1.71"},
{"first_name": "Catherine", "last_name": "Hicks", "birthday": "1989-05-23", "height": "1.80"},
{"first_name": "Cathleen", "last_name": "Frank", "birthday": "1977-09-04", "height": "1.61"},
{"first_name": "Celeste", "last_name": "James", "birthday": "1990-03-08", "height": "1.67"},
{"first_name": "Chelsea", "last_name": "Castro", "birthday": "2001-08-10", "height": "1.71"},
{"first_name": "Ciaran", "last_name": "Carver", "birthday": "2016-12-25", "height": "1.76"},
{"first_name": "Ciaran", "last_name": "Hurley", "birthday": "1995-10-25", "height": "1.65"},
{"first_name": "Clementine", "last_name": "Moon", "birthday": "1994-03-29", "height": "1.73"},
{"first_name": "Connor", "last_name": "Jenkins", "birthday": "1999-07-23", "height": "1.67"},
{"first_name": "Courtney", "last_name": "Cannon", "birthday": "1997-10-26", "height": "1.76"},
{"first_name": "Courtney", "last_name": "Hoffman", "birthday": "1994-11-07", "height": "1.65"},
{"first_name": "Denton", "last_name": "Sanchez", "birthday": "1971-10-16", "height": "1.72"},
{"first_name": "Dominique", "last_name": "Sandoval", "birthday": "1972-02-01", "height": "1.72"},
{"first_name": "Dora", "last_name": "Cabrera", "birthday": "2016-04-26", "height": "1.68"},
{"first_name": "Eagan", "last_name": "Dodson", "birthday": "2015-10-22", "height": "1.67"},
{"first_name": "Edan", "last_name": "Dennis", "birthday": "1989-09-18", "height": "1.73"},
{"first_name": "Ella", "last_name": "Castillo", "birthday": "1973-03-28", "height": "1.73"},
{"first_name": "Elton", "last_name": "Ayers", "birthday": "1994-06-20", "height": "1.68"},
{"first_name": "Elton", "last_name": "Smith", "birthday": "1982-06-20", "height": "1.66"},
{"first_name": "Emma", "last_name": "Clements", "birthday": "1996-08-07", "height": "1.75"},
{"first_name": "Evangeline", "last_name": "Weber", "birthday": "1984-06-03", "height": "1.70"},
{"first_name": "Faith", "last_name": "Emerson", "birthday": "1989-12-30", "height": "1.62"},
{"first_name": "Fritz", "last_name": "Atkinson", "birthday": "2011-06-15", "height": "1.73"},
{"first_name": "Galvin", "last_name": "Phillips", "birthday": "2004-01-17", "height": "1.74"},
{"first_name": "Georgia", "last_name": "Kennedy", "birthday": "1974-12-29", "height": "1.66"},
{"first_name": "Griffith", "last_name": "Henry", "birthday": "1985-04-02", "height": "1.66"},
{"first_name": "Hedy", "last_name": "Strong", "birthday": "2001-10-04", "height": "1.60"},
{"first_name": "Hu", "last_name": "May", "birthday": "1976-10-01", "height": "1.76"},
{"first_name": "Hyacinth", "last_name": "Kent", "birthday": "1971-07-18", "height": "1.72"},
{"first_name": "Idola", "last_name": "Fulton", "birthday": "1974-11-27", "height": "1.66"},
{"first_name": "Jarrod", "last_name": "Gibbs", "birthday": "1987-06-13", "height": "1.62"},
{"first_name": "Jesse", "last_name": "Gomez", "birthday": "2011-01-28", "height": "1.71"},
{"first_name": "Josiah", "last_name": "Hodges", "birthday": "2011-09-04", "height": "1.68"},
{"first_name": "Karleigh", "last_name": "Bartlett", "birthday": "1991-10-24", "height": "1.69"},
{"first_name": "Keelie", "last_name": "Mathis", "birthday": "1993-10-26", "height": "1.69"},
{"first_name": "Kieran", "last_name": "Solomon", "birthday": "1993-10-30", "height": "1.69"},
{"first_name": "Laith", "last_name": "Howell", "birthday": "1991-07-07", "height": "1.70"},
{"first_name": "Leroy", "last_name": "Pacheco", "birthday": "1998-12-30", "height": "1.70"},
{"first_name": "Lesley", "last_name": "Stephenson", "birthday": "2010-04-10", "height": "1.64"},
{"first_name": "Macaulay", "last_name": "Rowe", "birthday": "1982-03-02", "height": "1.68"},
{"first_name": "Macey", "last_name": "Griffin", "birthday": "1971-09-18", "height": "1.63"},
{"first_name": "Madeline", "last_name": "Kidd", "birthday": "1984-12-09", "height": "1.69"},
{"first_name": "Maia", "last_name": "Hyde", "birthday": "1972-06-09", "height": "1.74"},
{"first_name": "Mary", "last_name": "Kirkland", "birthday": "1987-10-09", "height": "1.73"},
{"first_name": "Molly", "last_name": "Salas", "birthday": "1994-04-23", "height": "1.70"},
{"first_name": "Montana", "last_name": "Bruce", "birthday": "1982-06-28", "height": "1.66"},
{"first_name": "Naomi", "last_name": "Hays", "birthday": "2004-11-27", "height": "1.70"},
{"first_name": "Norman", "last_name": "Santos", "birthday": "1989-01-10", "height": "1.68"},
{"first_name": "Octavius", "last_name": "Floyd", "birthday": "1985-02-22", "height": "1.68"},
{"first_name": "Odette", "last_name": "Mcneil", "birthday": "1978-05-21", "height": "1.76"},
{"first_name": "Oliver", "last_name": "Ashley", "birthday": "2004-08-13", "height": "1.68"},
{"first_name": "Quon", "last_name": "Wiggins", "birthday": "1992-05-06", "height": "1.74"},
{"first_name": "Rafael", "last_name": "Parker", "birthday": "2016-01-24", "height": "1.76"},
{"first_name": "Reese", "last_name": "Noel", "birthday": "1996-11-04", "height": "1.77"},
{"first_name": "Rhona", "last_name": "Camacho", "birthday": "1976-12-17", "height": "1.59"},
{"first_name": "Rigel", "last_name": "Oneal", "birthday": "1993-11-05", "height": "1.63"},
{"first_name": "Roary", "last_name": "Simmons", "birthday": "1986-07-23", "height": "1.63"},
{"first_name": "Russell", "last_name": "Pruitt", "birthday": "1979-05-04", "height": "1.63"},
{"first_name": "Sawyer", "last_name": "Fischer", "birthday": "1995-04-01", "height": "1.78"},
{"first_name": "Scarlett", "last_name": "Durham", "birthday": "2005-09-29", "height": "1.65"},
{"first_name": "Seth", "last_name": "Serrano", "birthday": "2017-06-02", "height": "1.71"},
{"first_name": "Shad", "last_name": "Bradshaw", "birthday": "1998-08-25", "height": "1.72"},
{"first_name": "Shana", "last_name": "Jarvis", "birthday": "1997-05-21", "height": "1.72"},
{"first_name": "Sharon", "last_name": "Shelton", "birthday": "1970-05-02", "height": "1.65"},
{"first_name": "Shoshana", "last_name": "Solis", "birthday": "1998-07-18", "height": "1.65"},
{"first_name": "Stephen", "last_name": "Baxter", "birthday": "2004-09-24", "height": "1.74"},
{"first_name": "Sydney", "last_name": "Stevens", "birthday": "1989-07-11", "height": "1.70"},
{"first_name": "Tasha", "last_name": "Campos", "birthday": "1984-02-11", "height": "1.72"},
{"first_name": "Ulla", "last_name": "Arnold", "birthday": "1990-06-04", "height": "1.63"},
{"first_name": "Vaughan", "last_name": "Schmidt", "birthday": "1985-06-19", "height": "1.61"},
{"first_name": "Velma", "last_name": "English", "birthday": "1999-01-18", "height": "1.65"},
{"first_name": "Venus", "last_name": "Hurst", "birthday": "1993-10-22", "height": "1.72"},
{"first_name": "Victor", "last_name": "Woods", "birthday": "1989-06-23", "height": "1.67"},
{"first_name": "Victoria", "last_name": "Slater", "birthday": "2009-07-19", "height": "1.72"},
{"first_name": "Wang", "last_name": "Goodwin", "birthday": "1983-05-15", "height": "1.66"},
{"first_name": "Warren", "last_name": "Bowen", "birthday": "2000-07-20", "height": "1.76"},
{"first_name": "Warren", "last_name": "Dudley", "birthday": "1995-10-23", "height": "1.59"},
{"first_name": "Whilemina", "last_name": "Blankenship", "birthday": "1970-07-14", "height": "1.66"},
{"first_name": "Whitney", "last_name": "Durham", "birthday": "1977-09-15", "height": "1.72"},
{"first_name": "Whitney", "last_name": "Scott", "birthday": "1971-07-04", "height": "1.70"},
{"first_name": "Wynter", "last_name": "Garcia", "birthday": "1975-01-10", "height": "1.69"},
{"first_name": "Yolanda", "last_name": "Duke", "birthday": "1997-02-25", "height": "1.74"}
];
def test_preexisting_db(self):
db = Database(self.database.db_name, autocreate=False)
db.count(Person)

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database, DatabaseException
@ -38,6 +39,11 @@ class EnginesTestCase(unittest.TestCase):
engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096)
self._create_and_insert(TestModel)
def test_replicated_merge_tree(self):
engine = MergeTree('date', ('date', 'event_id', 'event_group'), replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', replica_name='{replica}')
expected = "ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', date, (date, event_id, event_group), 8192)"
self.assertEquals(engine.create_table_sql(), expected)
def test_collapsing_merge_tree(self):
class TestModel(SampleModel):
engine = CollapsingMergeTree('date', ('date', 'event_id', 'event_group'), 'event_version')
@ -53,6 +59,21 @@ class EnginesTestCase(unittest.TestCase):
engine = ReplacingMergeTree('date', ('date', 'event_id', 'event_group'), 'event_uversion')
self._create_and_insert(TestModel)
def test_tiny_log(self):
class TestModel(SampleModel):
engine = TinyLog()
self._create_and_insert(TestModel)
def test_log(self):
class TestModel(SampleModel):
engine = Log()
self._create_and_insert(TestModel)
def test_memory(self):
class TestModel(SampleModel):
engine = Memory()
self._create_and_insert(TestModel)
class SampleModel(Model):

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database

View File

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
class FixedStringFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(FixedStringModel)
def tearDown(self):
self.database.drop_database()
def _insert_sample_data(self):
self.database.insert([
FixedStringModel(date_field='2016-08-30', fstr_field=''),
FixedStringModel(date_field='2016-08-30'),
FixedStringModel(date_field='2016-08-31', fstr_field='foo'),
FixedStringModel(date_field='2016-08-31', fstr_field=u'לילה')
])
def _assert_sample_data(self, results):
self.assertEquals(len(results), 4)
self.assertEquals(results[0].fstr_field, '')
self.assertEquals(results[1].fstr_field, 'ABCDEFGHIJK')
self.assertEquals(results[2].fstr_field, 'foo')
self.assertEquals(results[3].fstr_field, u'לילה')
def test_insert_and_select(self):
self._insert_sample_data()
query = 'SELECT * from $table ORDER BY date_field'
results = list(self.database.select(query, FixedStringModel))
self._assert_sample_data(results)
def test_ad_hoc_model(self):
self._insert_sample_data()
query = 'SELECT * from $db.fixedstringmodel ORDER BY date_field'
results = list(self.database.select(query))
self._assert_sample_data(results)
def test_assignment_error(self):
for value in (17, 'this is too long', u'זה ארוך', None, 99.9):
with self.assertRaises(ValueError):
FixedStringModel(fstr_field=value)
class FixedStringModel(Model):
date_field = DateField()
fstr_field = FixedStringField(12, default='ABCDEFGHIJK')
engine = MergeTree('date_field', ('date_field',))

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
import datetime
import pytz
@ -49,4 +50,3 @@ class Model1(ParentModel):
class Model2(ParentModel):
float_field = Float32Field()

44
tests/test_join.py Normal file
View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals, print_function
import unittest
import json
from infi.clickhouse_orm import database, engines, fields, models
class JoinTest(unittest.TestCase):
def setUp(self):
self.database = database.Database('test-db')
self.database.create_table(Foo)
self.database.create_table(Bar)
self.database.insert([Foo(id=i) for i in range(3)])
self.database.insert([Bar(id=i, b=i * i) for i in range(3)])
def print_res(self, query):
print(query)
print(json.dumps([row.to_dict() for row in self.database.select(query)]))
def test_without_db_name(self):
self.print_res("SELECT * FROM {}".format(Foo.table_name()))
self.print_res("SELECT * FROM {}".format(Bar.table_name()))
self.print_res("SELECT b FROM {} ALL LEFT JOIN {} USING id".format(Foo.table_name(), Bar.table_name()))
@unittest.skip('ClickHouse issue - https://github.com/yandex/ClickHouse/issues/635')
def test_with_db_name(self):
self.print_res("SELECT * FROM $db.{}".format(Foo.table_name()))
self.print_res("SELECT * FROM $db.{}".format(Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
def test_with_subquery(self):
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name()))
class Foo(models.Model):
id = fields.UInt8Field()
engine = engines.Memory()
class Bar(Foo):
b = fields.UInt8Field()

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from datetime import date
@ -62,8 +63,6 @@ class ModelWithMaterializedFields(Model):
mat_str = StringField(materialized='lower(str_field)')
mat_int = Int32Field(materialized='abs(int_field)')
mat_date = DateField(materialized='toDate(date_time_field)')
mat_date = DateField(materialized=u'toDate(date_time_field)')
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.database import Database
@ -54,11 +55,11 @@ class MigrationsTestCase(unittest.TestCase):
# Altering enum fields
self.database.migrate('tests.sample_migrations', 6)
self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel1),
self.assertEquals(self.getTableFields(EnumModel1),
[('date', 'Date'), ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")])
self.database.migrate('tests.sample_migrations', 7)
self.assertTrue(self.tableExists(EnumModel1))
self.assertEquals(self.getTableFields(EnumModel2),
self.assertEquals(self.getTableFields(EnumModel2),
[('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")])
self.database.migrate('tests.sample_migrations', 8)
self.assertTrue(self.tableExists(MaterializedModel))
@ -157,4 +158,4 @@ class AliasModel(Model):
@classmethod
def table_name(cls):
return 'alias_date'
return 'alias_date'

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
import datetime
import pytz
@ -89,4 +90,3 @@ class SimpleModel(Model):
alias_field = Float32Field(alias='float_field')
engine = MergeTree('date_field', ('int_field', 'date_field'))

View File

@ -0,0 +1,121 @@
from __future__ import unicode_literals
import unittest
import pytz
from infi.clickhouse_orm.database import Database
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from datetime import date, datetime
class NullableFieldsTest(unittest.TestCase):
def setUp(self):
self.database = Database('test-db')
self.database.create_table(ModelWithNullable)
def tearDown(self):
self.database.drop_database()
def test_nullable_datetime_field(self):
f = NullableField(DateTimeField())
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
# Valid values
for value in (date(1970, 1, 1),
datetime(1970, 1, 1),
epoch,
epoch.astimezone(pytz.timezone('US/Eastern')),
epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
'1970-01-01 00:00:00',
'1970-01-17 00:00:17',
'0000-00-00 00:00:00',
0,
'\\N'):
dt = f.to_python(value, pytz.utc)
if value == '\\N':
self.assertIsNone(dt)
else:
self.assertEquals(dt.tzinfo, pytz.utc)
# Verify that conversion to and from db string does not change value
dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc)
self.assertEquals(dt, dt2)
# Invalid values
for value in ('nope', '21/7/1999', 0.5):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
def test_nullable_uint8_field(self):
f = NullableField(UInt8Field())
# Valid values
for value in (17, '17', 17.0, '\\N'):
python_value = f.to_python(value, pytz.utc)
if value == '\\N':
self.assertIsNone(python_value)
self.assertEqual(value, f.to_db_string(python_value))
else:
self.assertEquals(python_value, 17)
# Invalid values
for value in ('nope', date.today()):
with self.assertRaises(ValueError):
f.to_python(value, pytz.utc)
def test_nullable_string_field(self):
f = NullableField(StringField())
# Valid values
for value in ('\\\\N', 'N', 'some text', '\\N'):
python_value = f.to_python(value, pytz.utc)
if value == '\\N':
self.assertIsNone(python_value)
self.assertEqual(value, f.to_db_string(python_value))
else:
self.assertEquals(python_value, value)
def _insert_sample_data(self):
dt = date(1970, 1, 1)
self.database.insert([
ModelWithNullable(date_field='2016-08-30', null_str='', null_int=42, null_date=dt),
ModelWithNullable(date_field='2016-08-30', null_str='nothing', null_int=None, null_date=None),
ModelWithNullable(date_field='2016-08-31', null_str=None, null_int=42, null_date=dt),
ModelWithNullable(date_field='2016-08-31', null_str=None, null_int=None, null_date=None)
])
def _assert_sample_data(self, results):
dt = date(1970, 1, 1)
self.assertEquals(len(results), 4)
self.assertIsNone(results[0].null_str)
self.assertEquals(results[0].null_int, 42)
self.assertEquals(results[0].null_date, dt)
self.assertIsNone(results[1].null_date)
self.assertEquals(results[1].null_str, 'nothing')
self.assertIsNone(results[1].null_date)
self.assertIsNone(results[2].null_str)
self.assertEquals(results[2].null_date, dt)
self.assertEquals(results[2].null_int, 42)
self.assertIsNone(results[3].null_int)
self.assertIsNone(results[3].null_str)
self.assertIsNone(results[3].null_date)
def test_insert_and_select(self):
self._insert_sample_data()
query = 'SELECT * from $table ORDER BY date_field'
results = list(self.database.select(query, ModelWithNullable))
self._assert_sample_data(results)
def test_ad_hoc_model(self):
self._insert_sample_data()
query = 'SELECT * from $db.modelwithnullable ORDER BY date_field'
results = list(self.database.select(query))
self._assert_sample_data(results)
class ModelWithNullable(Model):
date_field = DateField()
null_str = NullableField(StringField(), extra_null_values={''})
null_int = NullableField(Int32Field())
null_date = NullableField(DateField())
engine = MergeTree('date_field', ('date_field',))

329
tests/test_querysets.py Normal file
View File

@ -0,0 +1,329 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import unittest
from infi.clickhouse_orm.database import Database
from .base_test_with_data import *
import logging
from datetime import date, datetime
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
class QuerySetTestCase(TestCaseWithData):
def setUp(self):
super(QuerySetTestCase, self).setUp()
self.database.insert(self._sample_data())
def _test_qs(self, qs, expected_count):
logging.info(qs.as_sql())
for instance in qs:
logging.info('\t%s' % instance.to_dict())
self.assertEquals(qs.count(), expected_count)
def test_no_filtering(self):
qs = Person.objects_in(self.database)
self._test_qs(qs, len(data))
def test_truthiness(self):
qs = Person.objects_in(self.database)
self.assertTrue(qs.filter(first_name='Connor'))
self.assertFalse(qs.filter(first_name='Willy'))
def test_filter_string_field(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(first_name='Ciaran'), 2)
self._test_qs(qs.filter(first_name='ciaran'), 0) # case sensitive
self._test_qs(qs.filter(first_name__iexact='ciaran'), 2) # case insensitive
self._test_qs(qs.filter(first_name__gt='Whilemina'), 4)
self._test_qs(qs.filter(first_name__gte='Whilemina'), 5)
self._test_qs(qs.filter(first_name__lt='Adam'), 1)
self._test_qs(qs.filter(first_name__lte='Adam'), 2)
self._test_qs(qs.filter(first_name__in=('Connor', 'Courtney')), 3) # in tuple
self._test_qs(qs.filter(first_name__in=['Connor', 'Courtney']), 3) # in list
self._test_qs(qs.filter(first_name__in="'Connor', 'Courtney'"), 3) # in string
self._test_qs(qs.filter(first_name__not_in="'Connor', 'Courtney'"), 97)
self._test_qs(qs.filter(first_name__contains='sh'), 3) # case sensitive
self._test_qs(qs.filter(first_name__icontains='sh'), 6) # case insensitive
self._test_qs(qs.filter(first_name__startswith='le'), 0) # case sensitive
self._test_qs(qs.filter(first_name__istartswith='Le'), 2) # case insensitive
self._test_qs(qs.filter(first_name__istartswith=''), 100) # empty prefix
self._test_qs(qs.filter(first_name__endswith='IA'), 0) # case sensitive
self._test_qs(qs.filter(first_name__iendswith='ia'), 3) # case insensitive
self._test_qs(qs.filter(first_name__iendswith=''), 100) # empty suffix
def test_filter_unicode_string(self):
self.database.insert([
Person(first_name=u'דונלד', last_name=u'דאק')
])
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(first_name=u'דונלד'), 1)
def test_filter_float_field(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__gt=2), 0)
self._test_qs(qs.filter(height__lt=1.61), 4)
self._test_qs(qs.filter(height__lt='1.61'), 4)
self._test_qs(qs.exclude(height__lt='1.61'), 96)
self._test_qs(qs.filter(height__gt=0), 100)
self._test_qs(qs.exclude(height__gt=0), 0)
def test_filter_date_field(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(birthday='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__eq='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__ne='1970-12-02'), 99)
self._test_qs(qs.filter(birthday=date(1970, 12, 2)), 1)
self._test_qs(qs.filter(birthday__lte=date(1970, 12, 2)), 3)
def test_only(self):
qs = Person.objects_in(self.database).only('first_name', 'last_name')
for person in qs:
self.assertTrue(person.first_name)
self.assertTrue(person.last_name)
self.assertFalse(person.height)
self.assertEquals(person.birthday, date(1970, 1, 1))
def test_order_by(self):
qs = Person.objects_in(self.database)
self.assertFalse('ORDER BY' in qs.as_sql())
self.assertFalse(qs.order_by_as_sql())
person = list(qs.order_by('first_name', 'last_name'))[0]
self.assertEquals(person.first_name, 'Abdul')
person = list(qs.order_by('-first_name', '-last_name'))[0]
self.assertEquals(person.first_name, 'Yolanda')
person = list(qs.order_by('height'))[0]
self.assertEquals(person.height, 1.59)
person = list(qs.order_by('-height'))[0]
self.assertEquals(person.height, 1.8)
def test_in_subquery(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__in='SELECT max(height) FROM $table'), 2)
self._test_qs(qs.filter(first_name__in=qs.only('last_name')), 2)
self._test_qs(qs.filter(first_name__not_in=qs.only('last_name')), 98)
def _insert_sample_model(self):
self.database.create_table(SampleModel)
now = datetime.now()
self.database.insert([
SampleModel(timestamp=now, num=1, color=Color.red),
SampleModel(timestamp=now, num=2, color=Color.red),
SampleModel(timestamp=now, num=3, color=Color.blue),
SampleModel(timestamp=now, num=4, color=Color.white),
])
def test_filter_enum_field(self):
self._insert_sample_model()
qs = SampleModel.objects_in(self.database)
self._test_qs(qs.filter(color=Color.red), 2)
self._test_qs(qs.exclude(color=Color.white), 3)
# Different ways to specify blue
self._test_qs(qs.filter(color__gt=Color.blue), 1)
self._test_qs(qs.filter(color__gt='blue'), 1)
self._test_qs(qs.filter(color__gt=2), 1)
def test_filter_int_field(self):
self._insert_sample_model()
qs = SampleModel.objects_in(self.database)
self._test_qs(qs.filter(num=1), 1)
self._test_qs(qs.filter(num__eq=1), 1)
self._test_qs(qs.filter(num__ne=1), 3)
self._test_qs(qs.filter(num__gt=1), 3)
self._test_qs(qs.filter(num__gte=1), 4)
self._test_qs(qs.filter(num__in=(1, 2, 3)), 3)
self._test_qs(qs.filter(num__in=range(1, 4)), 3)
def test_slicing(self):
db = Database('system')
numbers = list(range(100))
qs = Numbers.objects_in(db)
self.assertEquals(qs[0].number, numbers[0])
self.assertEquals(qs[5].number, numbers[5])
self.assertEquals([row.number for row in qs[:1]], numbers[:1])
self.assertEquals([row.number for row in qs[:10]], numbers[:10])
self.assertEquals([row.number for row in qs[3:10]], numbers[3:10])
self.assertEquals([row.number for row in qs[9:10]], numbers[9:10])
self.assertEquals([row.number for row in qs[10:10]], numbers[10:10])
def test_invalid_slicing(self):
db = Database('system')
qs = Numbers.objects_in(db)
with self.assertRaises(AssertionError):
qs[3:10:2]
with self.assertRaises(AssertionError):
qs[-5]
with self.assertRaises(AssertionError):
qs[:-5]
with self.assertRaises(AssertionError):
qs[50:1]
def test_pagination(self):
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
# Try different page sizes
for page_size in (1, 2, 7, 10, 30, 100, 150):
# Iterate over pages and collect all intances
page_num = 1
instances = set()
while True:
page = qs.paginate(page_num, page_size)
self.assertEquals(page.number_of_objects, len(data))
self.assertGreater(page.pages_total, 0)
[instances.add(obj.to_tsv()) for obj in page.objects]
if page.pages_total == page_num:
break
page_num += 1
# Verify that all instances were returned
self.assertEquals(len(instances), len(data))
def test_pagination_last_page(self):
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
# Try different page sizes
for page_size in (1, 2, 7, 10, 30, 100, 150):
# Ask for the last page in two different ways and verify equality
page_a = qs.paginate(-1, page_size)
page_b = qs.paginate(page_a.pages_total, page_size)
self.assertEquals(page_a[1:], page_b[1:])
self.assertEquals([obj.to_tsv() for obj in page_a.objects],
[obj.to_tsv() for obj in page_b.objects])
def test_pagination_invalid_page(self):
qs = Person.objects_in(self.database).order_by('first_name', 'last_name')
for page_num in (0, -2, -100):
with self.assertRaises(ValueError):
qs.paginate(page_num, 100)
def test_pagination_with_conditions(self):
qs = Person.objects_in(self.database).order_by('first_name', 'last_name').filter(first_name__lt='Ava')
page = qs.paginate(1, 100)
self.assertEquals(page.number_of_objects, 10)
class AggregateTestCase(TestCaseWithData):
def setUp(self):
super(AggregateTestCase, self).setUp()
self.database.insert(self._sample_data())
def test_aggregate_no_grouping(self):
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()')
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.6923, places=4)
self.assertEquals(row.count, 100)
def test_aggregate_with_filter(self):
# When filter comes before aggregate
qs = Person.objects_in(self.database).filter(first_name='Warren').aggregate(average_height='avg(height)', count='count()')
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.675, places=4)
self.assertEquals(row.count, 2)
# When filter comes after aggregate
qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()').filter(first_name='Warren')
print(qs.as_sql())
self.assertEquals(qs.count(), 1)
for row in qs:
self.assertAlmostEqual(row.average_height, 1.675, places=4)
self.assertEquals(row.count, 2)
def test_aggregate_with_implicit_grouping(self):
qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()')
print(qs.as_sql())
self.assertEquals(qs.count(), 94)
total = 0
for row in qs:
self.assertTrue(1.5 < row.average_height < 2)
self.assertTrue(0 < row.count < 3)
total += row.count
self.assertEquals(total, 100)
def test_aggregate_with_explicit_grouping(self):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
print(qs.as_sql())
self.assertEquals(qs.count(), 7)
total = 0
for row in qs:
total += row.count
self.assertEquals(total, 100)
def test_aggregate_with_order_by(self):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
days = [row.weekday for row in qs.order_by('weekday')]
self.assertEquals(days, list(range(1, 8)))
def test_aggregate_with_indexing(self):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
total = 0
for i in range(7):
total += qs[i].count
self.assertEquals(total, 100)
def test_aggregate_with_slicing(self):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
total = sum(row.count for row in qs[:3]) + sum(row.count for row in qs[3:])
self.assertEquals(total, 100)
def test_aggregate_with_pagination(self):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
total = 0
page_num = 1
while True:
page = qs.paginate(page_num, page_size=3)
self.assertEquals(page.number_of_objects, 7)
total += sum(row.count for row in page.objects)
if page.pages_total == page_num:
break
page_num += 1
self.assertEquals(total, 100)
def test_aggregate_with_wrong_grouping(self):
with self.assertRaises(AssertionError):
Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('first_name')
def test_aggregate_with_no_calculated_fields(self):
with self.assertRaises(AssertionError):
Person.objects_in(self.database).aggregate()
def test_aggregate_with_only(self):
# Cannot put only() after aggregate()
with self.assertRaises(NotImplementedError):
Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').only('weekday')
# When only() comes before aggregate(), it gets overridden
qs = Person.objects_in(self.database).only('last_name').aggregate(average_height='avg(height)', count='count()')
self.assertTrue('last_name' not in qs.as_sql())
def test_aggregate_on_aggregate(self):
with self.assertRaises(NotImplementedError):
Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').aggregate(s='sum(height)')
def test_filter_on_calculated_field(self):
# This is currently not supported, so we expect it to fail
with self.assertRaises(AttributeError):
qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday')
qs = qs.filter(weekday=1)
self.assertEquals(qs.count(), 1)
Color = Enum('Color', u'red blue green yellow brown white black')
class SampleModel(Model):
timestamp = DateTimeField()
materialized_date = DateField(materialized='toDate(timestamp)')
num = Int32Field()
color = Enum8Field(Color)
engine = MergeTree('materialized_date', ('materialized_date',))
class Numbers(Model):
number = UInt64Field()

59
tests/test_readonly.py Normal file
View File

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import unittest
import six
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
from infi.clickhouse_orm.engines import *
from .base_test_with_data import *
class ReadonlyTestCase(TestCaseWithData):
def _test_readonly_db(self, username):
self._insert_and_check(self._sample_data(), len(data))
orig_database = self.database
try:
self.database = Database(orig_database.db_name, username=username, readonly=True)
with self.assertRaises(DatabaseException):
self._insert_and_check(self._sample_data(), len(data))
self.assertEquals(self.database.count(Person), 100)
list(self.database.select('SELECT * from $table', Person))
with self.assertRaises(DatabaseException):
self.database.drop_table(Person)
with self.assertRaises(DatabaseException):
self.database.drop_database()
except DatabaseException as e:
if 'Unknown user' in six.text_type(e):
raise unittest.SkipTest('Database user "%s" is not defined' % username)
else:
raise
finally:
self.database = orig_database
def test_readonly_db_with_default_user(self):
self._test_readonly_db('default')
def test_readonly_db_with_readonly_user(self):
self._test_readonly_db('readonly')
def test_insert_readonly(self):
m = ReadOnlyModel(name='readonly')
with self.assertRaises(DatabaseException):
self.database.insert([m])
def test_create_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.create_table(ReadOnlyModel)
def test_drop_readonly_table(self):
with self.assertRaises(DatabaseException):
self.database.drop_table(ReadOnlyModel)
class ReadOnlyModel(Model):
readonly = True
name = StringField()

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import unittest
from infi.clickhouse_orm.fields import *
from datetime import date, datetime
@ -10,7 +11,7 @@ class SimpleFieldsTest(unittest.TestCase):
f = DateTimeField()
epoch = datetime(1970, 1, 1, tzinfo=pytz.utc)
# Valid values
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch,
epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')),
'1970-01-01 00:00:00', '1970-01-17 00:00:17', '0000-00-00 00:00:00', 0):
dt = f.to_python(value, pytz.utc)
@ -60,4 +61,4 @@ class SimpleFieldsTest(unittest.TestCase):
# Range check
for value in (-1, 1000):
with self.assertRaises(ValueError):
f.validate(value)
f.validate(value)

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals
import unittest
from datetime import date
import os
import shutil
from infi.clickhouse_orm.database import Database
@ -11,7 +11,8 @@ from infi.clickhouse_orm.system_models import SystemPart
class SystemPartTest(unittest.TestCase):
BACKUP_DIR = '/opt/clickhouse/shadow/'
BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/']
def setUp(self):
self.database = Database('test-db')
@ -22,10 +23,11 @@ class SystemPartTest(unittest.TestCase):
self.database.drop_database()
def _get_backups(self):
if not os.path.exists(self.BACKUP_DIR):
return []
_, dirnames, _ = next(os.walk(self.BACKUP_DIR))
return dirnames
for dir in self.BACKUP_DIRS:
if os.path.exists(dir):
_, dirnames, _ = next(os.walk(dir))
return dirnames
raise unittest.SkipTest('Cannot find backups dir')
def test_get_all(self):
parts = SystemPart.get(self.database)
@ -40,7 +42,7 @@ class SystemPartTest(unittest.TestCase):
def test_get_conditions(self):
parts = list(SystemPart.get(self.database, conditions="table='testtable'"))
self.assertEqual(len(parts), 1)
parts = list(SystemPart.get(self.database, conditions="table='othertable'"))
parts = list(SystemPart.get(self.database, conditions=u"table='othertable'"))
self.assertEqual(len(parts), 0)
def test_attach_detach(self):
@ -63,8 +65,6 @@ class SystemPartTest(unittest.TestCase):
parts[0].freeze()
backups = set(self._get_backups())
self.assertEqual(len(backups), len(prev_backups) + 1)
# Clean created backup
shutil.rmtree(self.BACKUP_DIR + '{0}'.format(list(backups - prev_backups)[0]))
def test_fetch(self):
# TODO Not tested, as I have no replication set