Finished Release v0.9.1

This commit is contained in:
Itai Shirav 2017-06-15 14:48:15 +03:00
commit 72becd75be
19 changed files with 128 additions and 28 deletions

View File

@ -1,6 +1,15 @@
Change Log
==========
Unreleased
----------
- Added `ne` and `not_in` queryset operators
- Querysets no longer have a default order unless `order_by` is called
- Added `autocreate` flag to database initializer
- Fix some Python 2/3 incompatibilities (TvoroG, tsionyx)
- To work around a JOIN bug in ClickHouse, `$table` now inserts only the table name,
and the database name is sent in the query params instead
v0.9.0
------
- Major new feature: building model queries using QuerySets

View File

@ -43,7 +43,7 @@ output = ${project:version_file}
dependent-scripts = true
recipe = infi.recipe.console_scripts
eggs = ${project:name}
ipython
ipython<6
nose
coverage
enum34

View File

@ -10,7 +10,7 @@ infi.clickhouse_orm.database
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False)
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
Initializes a database instance. Unless it's readonly, the database will be
@ -21,6 +21,7 @@ created on the ClickHouse server if it does not already exist.
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
#### count(model_class, conditions=None)

View File

@ -117,7 +117,7 @@ This is a very convenient feature that saves you the need to define a model for
SQL Placeholders
----------------
There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the database name plus table name (but is available only when the model is specified).
There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the table name (but is available only when the model is specified).
So instead of this:
@ -125,11 +125,9 @@ So instead of this:
you can use:
db.select("SELECT * FROM $db.person", model_class=Person)
db.select("SELECT * FROM $db.$table", model_class=Person)
or even:
db.select("SELECT * FROM $table", model_class=Person)
Note: normally it is not necessary to specify the database name, since it's already sent in the query parameters to ClickHouse. It is enough to specify the table name.
Counting
--------

View File

@ -31,11 +31,13 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| Operator | Equivalent SQL | Comments |
| -------- | -------------------------------------------- | ---------------------------------- |
| `eq` | `field = value` | |
| `ne` | `field != value` | |
| `gt` | `field > value` | |
| `gte` | `field >= value` | |
| `lt` | `field < value` | |
| `lte` | `field <= value` | |
| `in` | `field IN (values)` | See below |
| `not_in` | `field NOT IN (values)` | See below |
| `contains` | `field LIKE '%value%'` | For string fields only |
| `startswith` | `field LIKE 'value%'` | For string fields only |
| `endswith` | `field LIKE '%value'` | For string fields only |
@ -46,7 +48,7 @@ There are different operators that can be used, by passing `<fieldname>__<operat
### Using the `in` Operator
The `in` operator expects one of three types of values:
The `in` and `not_in` operators expect one of three types of values:
* A list or tuple of simple values
* A string, which is used verbatim as the contents of the parentheses
* Another queryset (subquery)
@ -88,6 +90,8 @@ The default order is ascending. To use descending order, add a minus sign before
qs = Person.objects_in(database).order_by('-height')
If you do not use `order_by`, the rows are returned in arbitrary order.
Omitting Fields
---------------

View File

@ -35,10 +35,9 @@ Usage:
generate_all
------------
Does everything:
- Generates the class reference using generate_ref
- Generates the table of contents using generate_toc
- Converts to HTML for visual inspection using docs2html
- Generates the class reference using generate_ref
- Generates the table of contents using generate_toc
- Converts to HTML for visual inspection using docs2html
Usage:

View File

@ -6,5 +6,8 @@ find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}"
echo "Converting README.md"
pandoc ../README.md -s -o "../htmldocs/README.html"
echo "Converting CHANGELOG.md"
pandoc ../CHANGELOG.md -s -o "../htmldocs/CHANGELOG.html"
echo "Fixing links"
sed -i 's/\.md/\.html/g' ../htmldocs/*.html

View File

@ -28,7 +28,8 @@ class Database(object):
inserting data and other operations.
'''
def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False):
def __init__(self, db_name, db_url='http://localhost:8123/',
username=None, password=None, readonly=False, autocreate=True):
'''
Initializes a database instance. Unless it's readonly, the database will be
created on the ClickHouse server if it does not already exist.
@ -38,16 +39,19 @@ class Database(object):
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
'''
self.db_name = db_name
self.db_url = db_url
self.username = username
self.password = password
self.readonly = False
self.db_exists = True
if readonly:
self.connection_readonly = self._is_connection_readonly()
self.readonly = True
else:
elif autocreate:
self.db_exists = False
self.create_database()
self.server_timezone = self._get_server_timezone()
@ -56,6 +60,7 @@ class Database(object):
Creates the database on the ClickHouse server if it does not already exist.
'''
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
self.db_exists = True
def drop_database(self):
'''
@ -244,6 +249,8 @@ class Database(object):
def _build_params(self, settings):
params = dict(settings or {})
if self.db_exists:
params['database'] = self.db_name
if self.username:
params['user'] = self.username
if self.password:

View File

@ -18,9 +18,9 @@ class Field(object):
def __init__(self, default=None, alias=None, materialized=None):
assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \
"Only one of default, alias and materialized parameters can be given"
assert alias is None or isinstance(alias, str) and alias != "",\
assert alias is None or isinstance(alias, string_types) and alias != "",\
"Alias field must be string field name, if given"
assert materialized is None or isinstance(materialized, str) and alias != "",\
assert materialized is None or isinstance(materialized, string_types) and alias != "",\
"Materialized field must be string, if given"
self.creation_counter = Field.creation_counter

View File

@ -32,7 +32,7 @@ class ModelBase(type):
return new_cls
@classmethod
def create_ad_hoc_model(cls, fields):
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
# fields is a list of tuples (name, db_type)
# Check if model exists in cache
fields = list(fields)
@ -43,7 +43,7 @@ class ModelBase(type):
attrs = {}
for name, db_type in fields:
attrs[name] = cls.create_ad_hoc_field(db_type)
model_class = cls.__new__(cls, 'AdHocModel', (Model,), attrs)
model_class = cls.__new__(cls, model_name, (Model,), attrs)
# Add the model class to the cache
cls.ad_hoc_model_cache[cache_key] = model_class
return model_class

View File

@ -88,6 +88,19 @@ class IExactOperator(Operator):
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
class NotOperator(Operator):
"""
A wrapper around another operator, which negates it.
"""
def __init__(self, base_operator):
self._base_operator = base_operator
def to_sql(self, model_cls, field_name, value):
# Negate the base operator
return 'NOT (%s)' % self._base_operator.to_sql(model_cls, field_name, value)
# Define the set of builtin operators
_operators = {}
@ -96,11 +109,13 @@ def register_operator(name, sql):
_operators[name] = sql
register_operator('eq', SimpleOperator('='))
register_operator('ne', SimpleOperator('!='))
register_operator('gt', SimpleOperator('>'))
register_operator('gte', SimpleOperator('>='))
register_operator('lt', SimpleOperator('<'))
register_operator('lte', SimpleOperator('<='))
register_operator('in', InOperator())
register_operator('not_in', NotOperator(InOperator()))
register_operator('contains', LikeOperator('%{}%'))
register_operator('startswith', LikeOperator('{}%'))
register_operator('endswith', LikeOperator('%{}'))
@ -165,7 +180,7 @@ class QuerySet(object):
"""
self._model_cls = model_cls
self._database = database
self._order_by = [f[0] for f in model_cls._fields]
self._order_by = []
self._q = []
self._fields = []
@ -194,8 +209,9 @@ class QuerySet(object):
fields = '*'
if self._fields:
fields = ', '.join('`%s`' % field for field in self._fields)
params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), self.order_by_as_sql())
return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), ordering)
return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s%s' % params
def order_by_as_sql(self):
"""

View File

@ -2,6 +2,8 @@
This file contains system readonly models that can be got from database
https://clickhouse.yandex/reference_en.html#System tables
"""
from six import string_types
from .database import Database
from .fields import *
from .models import Model
@ -115,7 +117,7 @@ class SystemPart(Model):
:return: A list of SystemPart objects
"""
assert isinstance(database, Database), "database must be database.Database class instance"
assert isinstance(conditions, str), "conditions must be a string"
assert isinstance(conditions, string_types), "conditions must be a string"
if conditions:
conditions += " AND"
field_names = ','.join([f[0] for f in cls._fields])

View File

@ -60,7 +60,7 @@ class ModelWithAliasFields(Model):
date_field = DateField()
str_field = StringField()
alias_str = StringField(alias='str_field')
alias_str = StringField(alias=u'str_field')
alias_int = Int32Field(alias='int_field')
alias_date = DateField(alias='date_field')

View File

@ -128,4 +128,13 @@ class DatabaseTestCase(TestCaseWithData):
def test_invalid_user(self):
with self.assertRaises(DatabaseException):
Database(self.database.db_name, username='default', password='wrong')
Database(self.database.db_name, username='default', password='wrong')
def test_nonexisting_db(self):
db = Database('db_not_here', autocreate=False)
with self.assertRaises(DatabaseException):
db.create_table(Person)
def test_preexisting_db(self):
db = Database(self.database.db_name, autocreate=False)
db.count(Person)

44
tests/test_join.py Normal file
View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals, print_function
import unittest
import json
from infi.clickhouse_orm import database, engines, fields, models
class JoinTest(unittest.TestCase):
def setUp(self):
self.database = database.Database('test-db')
self.database.create_table(Foo)
self.database.create_table(Bar)
self.database.insert([Foo(id=i) for i in range(3)])
self.database.insert([Bar(id=i, b=i * i) for i in range(3)])
def print_res(self, query):
print(query)
print(json.dumps([row.to_dict() for row in self.database.select(query)]))
def test_without_db_name(self):
self.print_res("SELECT * FROM {}".format(Foo.table_name()))
self.print_res("SELECT * FROM {}".format(Bar.table_name()))
self.print_res("SELECT b FROM {} ALL LEFT JOIN {} USING id".format(Foo.table_name(), Bar.table_name()))
@unittest.skip('ClickHouse issue - https://github.com/yandex/ClickHouse/issues/635')
def test_with_db_name(self):
self.print_res("SELECT * FROM $db.{}".format(Foo.table_name()))
self.print_res("SELECT * FROM $db.{}".format(Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
def test_with_subquery(self):
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name()))
class Foo(models.Model):
id = fields.UInt8Field()
engine = engines.Memory()
class Bar(Foo):
b = fields.UInt8Field()

View File

@ -62,7 +62,7 @@ class ModelWithMaterializedFields(Model):
mat_str = StringField(materialized='lower(str_field)')
mat_int = Int32Field(materialized='abs(int_field)')
mat_date = DateField(materialized='toDate(date_time_field)')
mat_date = DateField(materialized=u'toDate(date_time_field)')
engine = MergeTree('mat_date', ('mat_date',))

View File

@ -46,6 +46,7 @@ class QuerySetTestCase(TestCaseWithData):
self._test_qs(qs.filter(first_name__in=('Connor', 'Courtney')), 3) # in tuple
self._test_qs(qs.filter(first_name__in=['Connor', 'Courtney']), 3) # in list
self._test_qs(qs.filter(first_name__in="'Connor', 'Courtney'"), 3) # in string
self._test_qs(qs.filter(first_name__not_in="'Connor', 'Courtney'"), 97)
self._test_qs(qs.filter(first_name__contains='sh'), 3) # case sensitive
self._test_qs(qs.filter(first_name__icontains='sh'), 6) # case insensitive
self._test_qs(qs.filter(first_name__startswith='le'), 0) # case sensitive
@ -74,6 +75,8 @@ class QuerySetTestCase(TestCaseWithData):
def test_filter_date_field(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(birthday='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__eq='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__ne='1970-12-02'), 99)
self._test_qs(qs.filter(birthday=date(1970, 12, 2)), 1)
self._test_qs(qs.filter(birthday__lte=date(1970, 12, 2)), 3)
@ -87,6 +90,8 @@ class QuerySetTestCase(TestCaseWithData):
def test_order_by(self):
qs = Person.objects_in(self.database)
self.assertFalse('ORDER BY' in qs.as_sql())
self.assertFalse(qs.order_by_as_sql())
person = list(qs.order_by('first_name', 'last_name'))[0]
self.assertEquals(person.first_name, 'Abdul')
person = list(qs.order_by('-first_name', '-last_name'))[0]
@ -100,6 +105,7 @@ class QuerySetTestCase(TestCaseWithData):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__in='SELECT max(height) FROM $table'), 2)
self._test_qs(qs.filter(first_name__in=qs.only('last_name')), 2)
self._test_qs(qs.filter(first_name__not_in=qs.only('last_name')), 98)
def _insert_sample_model(self):
self.database.create_table(SampleModel)
@ -125,6 +131,8 @@ class QuerySetTestCase(TestCaseWithData):
self._insert_sample_model()
qs = SampleModel.objects_in(self.database)
self._test_qs(qs.filter(num=1), 1)
self._test_qs(qs.filter(num__eq=1), 1)
self._test_qs(qs.filter(num__ne=1), 3)
self._test_qs(qs.filter(num__gt=1), 3)
self._test_qs(qs.filter(num__gte=1), 4)
self._test_qs(qs.filter(num__in=(1, 2, 3)), 3)

View File

@ -2,6 +2,7 @@
import unittest
import six
from infi.clickhouse_orm.database import Database, DatabaseException
from infi.clickhouse_orm.models import Model
from infi.clickhouse_orm.fields import *
@ -25,7 +26,7 @@ class ReadonlyTestCase(TestCaseWithData):
with self.assertRaises(DatabaseException):
self.database.drop_database()
except DatabaseException as e:
if 'Unknown user' in unicode(e):
if 'Unknown user' in six.text_type(e):
raise unittest.SkipTest('Database user "%s" is not defined' % username)
else:
raise
@ -56,4 +57,3 @@ class ReadOnlyModel(Model):
readonly = True
name = StringField()

View File

@ -40,7 +40,7 @@ class SystemPartTest(unittest.TestCase):
def test_get_conditions(self):
parts = list(SystemPart.get(self.database, conditions="table='testtable'"))
self.assertEqual(len(parts), 1)
parts = list(SystemPart.get(self.database, conditions="table='othertable'"))
parts = list(SystemPart.get(self.database, conditions=u"table='othertable'"))
self.assertEqual(len(parts), 0)
def test_attach_detach(self):