- Added ne and not_in queryset operators

- Querysets no longer have a default order when `order_by` is not called
- Added `autocreate` flag to database initializer
- Fix for SELECT FROM JOIN (#37)
This commit is contained in:
Itai Shirav 2017-06-15 11:19:56 +03:00
parent 30902d0fe0
commit d02d6b14eb
11 changed files with 114 additions and 15 deletions

View File

@ -1,6 +1,15 @@
Change Log
==========
Unreleased
----------
- Added `ne` and `not_in` queryset operators
- Querysets no longer have a default order unless `order_by` is called
- Added `autocreate` flag to database initializer
- Fix some Python 2/3 incompatibilities (TvoroG, tsionyx)
- To work around a JOIN bug in ClickHouse, `$table` now inserts only the table name,
and the database name is sent in the query params instead
v0.9.0
------
- Major new feature: building model queries using QuerySets

View File

@ -10,7 +10,7 @@ infi.clickhouse_orm.database
Database instances connect to a specific ClickHouse database for running queries,
inserting data and other operations.
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False)
#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True)
Initializes a database instance. Unless it's readonly, the database will be
@ -21,6 +21,7 @@ created on the ClickHouse server if it does not already exist.
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
#### count(model_class, conditions=None)

View File

@ -117,7 +117,7 @@ This is a very convenient feature that saves you the need to define a model for
SQL Placeholders
----------------
There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the database name plus table name (but is available only when the model is specified).
There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the table name (but is available only when the model is specified).
So instead of this:
@ -125,11 +125,9 @@ So instead of this:
you can use:
db.select("SELECT * FROM $db.person", model_class=Person)
db.select("SELECT * FROM $db.$table", model_class=Person)
or even:
db.select("SELECT * FROM $table", model_class=Person)
Note: normally it is not necessary to specify the database name, since it's already sent in the query parameters to ClickHouse. It is enough to specify the table name.
Counting
--------

View File

@ -31,11 +31,13 @@ There are different operators that can be used, by passing `<fieldname>__<operat
| Operator | Equivalent SQL | Comments |
| -------- | -------------------------------------------- | ---------------------------------- |
| `eq` | `field = value` | |
| `ne` | `field != value` | |
| `gt` | `field > value` | |
| `gte` | `field >= value` | |
| `lt` | `field < value` | |
| `lte` | `field <= value` | |
| `in` | `field IN (values)` | See below |
| `not_in` | `field NOT IN (values)` | See below |
| `contains` | `field LIKE '%value%'` | For string fields only |
| `startswith` | `field LIKE 'value%'` | For string fields only |
| `endswith` | `field LIKE '%value'` | For string fields only |
@ -46,7 +48,7 @@ There are different operators that can be used, by passing `<fieldname>__<operat
### Using the `in` Operator
The `in` operator expects one of three types of values:
The `in` and `not_in` operators expect one of three types of values:
* A list or tuple of simple values
* A string, which is used verbatim as the contents of the parentheses
* Another queryset (subquery)
@ -88,6 +90,8 @@ The default order is ascending. To use descending order, add a minus sign before
qs = Person.objects_in(database).order_by('-height')
If you do not use `order_by`, the rows are returned in arbitrary order.
Omitting Fields
---------------

View File

@ -6,5 +6,8 @@ find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}"
echo "Converting README.md"
pandoc ../README.md -s -o "../htmldocs/README.html"
echo "Converting CHANGELOG.md"
pandoc ../CHANGELOG.md -s -o "../htmldocs/CHANGELOG.html"
echo "Fixing links"
sed -i 's/\.md/\.html/g' ../htmldocs/*.html

View File

@ -28,7 +28,8 @@ class Database(object):
inserting data and other operations.
'''
def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False):
def __init__(self, db_name, db_url='http://localhost:8123/',
username=None, password=None, readonly=False, autocreate=True):
'''
Initializes a database instance. Unless it's readonly, the database will be
created on the ClickHouse server if it does not already exist.
@ -38,16 +39,19 @@ class Database(object):
- `username`: optional connection credentials.
- `password`: optional connection credentials.
- `readonly`: use a read-only connection.
- `autocreate`: automatically create the database if does not exist (unless in readonly mode).
'''
self.db_name = db_name
self.db_url = db_url
self.username = username
self.password = password
self.readonly = False
self.db_exists = True
if readonly:
self.connection_readonly = self._is_connection_readonly()
self.readonly = True
else:
elif autocreate:
self.db_exists = False
self.create_database()
self.server_timezone = self._get_server_timezone()
@ -56,6 +60,7 @@ class Database(object):
Creates the database on the ClickHouse server if it does not already exist.
'''
self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name)
self.db_exists = True
def drop_database(self):
'''
@ -244,6 +249,8 @@ class Database(object):
def _build_params(self, settings):
params = dict(settings or {})
if self.db_exists:
params['database'] = self.db_name
if self.username:
params['user'] = self.username
if self.password:

View File

@ -32,7 +32,7 @@ class ModelBase(type):
return new_cls
@classmethod
def create_ad_hoc_model(cls, fields):
def create_ad_hoc_model(cls, fields, model_name='AdHocModel'):
# fields is a list of tuples (name, db_type)
# Check if model exists in cache
fields = list(fields)
@ -43,7 +43,7 @@ class ModelBase(type):
attrs = {}
for name, db_type in fields:
attrs[name] = cls.create_ad_hoc_field(db_type)
model_class = cls.__new__(cls, 'AdHocModel', (Model,), attrs)
model_class = cls.__new__(cls, model_name, (Model,), attrs)
# Add the model class to the cache
cls.ad_hoc_model_cache[cache_key] = model_class
return model_class

View File

@ -88,6 +88,19 @@ class IExactOperator(Operator):
return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value)
class NotOperator(Operator):
"""
A wrapper around another operator, which negates it.
"""
def __init__(self, base_operator):
self._base_operator = base_operator
def to_sql(self, model_cls, field_name, value):
# Negate the base operator
return 'NOT (%s)' % self._base_operator.to_sql(model_cls, field_name, value)
# Define the set of builtin operators
_operators = {}
@ -96,11 +109,13 @@ def register_operator(name, sql):
_operators[name] = sql
register_operator('eq', SimpleOperator('='))
register_operator('ne', SimpleOperator('!='))
register_operator('gt', SimpleOperator('>'))
register_operator('gte', SimpleOperator('>='))
register_operator('lt', SimpleOperator('<'))
register_operator('lte', SimpleOperator('<='))
register_operator('in', InOperator())
register_operator('not_in', NotOperator(InOperator()))
register_operator('contains', LikeOperator('%{}%'))
register_operator('startswith', LikeOperator('{}%'))
register_operator('endswith', LikeOperator('%{}'))
@ -165,7 +180,7 @@ class QuerySet(object):
"""
self._model_cls = model_cls
self._database = database
self._order_by = [f[0] for f in model_cls._fields]
self._order_by = []
self._q = []
self._fields = []
@ -194,8 +209,9 @@ class QuerySet(object):
fields = '*'
if self._fields:
fields = ', '.join('`%s`' % field for field in self._fields)
params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), self.order_by_as_sql())
return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params
ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else ''
params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), ordering)
return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s%s' % params
def order_by_as_sql(self):
"""

View File

@ -128,4 +128,13 @@ class DatabaseTestCase(TestCaseWithData):
def test_invalid_user(self):
with self.assertRaises(DatabaseException):
Database(self.database.db_name, username='default', password='wrong')
Database(self.database.db_name, username='default', password='wrong')
def test_nonexisting_db(self):
db = Database('db_not_here', autocreate=False)
with self.assertRaises(DatabaseException):
db.create_table(Person)
def test_preexisting_db(self):
db = Database(self.database.db_name, autocreate=False)
db.count(Person)

44
tests/test_join.py Normal file
View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals, print_function
import unittest
import json
from infi.clickhouse_orm import database, engines, fields, models
class JoinTest(unittest.TestCase):
def setUp(self):
self.database = database.Database('test-db')
self.database.create_table(Foo)
self.database.create_table(Bar)
self.database.insert([Foo(id=i) for i in range(3)])
self.database.insert([Bar(id=i, b=i * i) for i in range(3)])
def print_res(self, query):
print(query)
print(json.dumps([row.to_dict() for row in self.database.select(query)]))
def test_without_db_name(self):
self.print_res("SELECT * FROM {}".format(Foo.table_name()))
self.print_res("SELECT * FROM {}".format(Bar.table_name()))
self.print_res("SELECT b FROM {} ALL LEFT JOIN {} USING id".format(Foo.table_name(), Bar.table_name()))
@unittest.skip('ClickHouse issue - https://github.com/yandex/ClickHouse/issues/635')
def test_with_db_name(self):
self.print_res("SELECT * FROM $db.{}".format(Foo.table_name()))
self.print_res("SELECT * FROM $db.{}".format(Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name()))
def test_with_subquery(self):
self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name()))
self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name()))
class Foo(models.Model):
id = fields.UInt8Field()
engine = engines.Memory()
class Bar(Foo):
b = fields.UInt8Field()

View File

@ -46,6 +46,7 @@ class QuerySetTestCase(TestCaseWithData):
self._test_qs(qs.filter(first_name__in=('Connor', 'Courtney')), 3) # in tuple
self._test_qs(qs.filter(first_name__in=['Connor', 'Courtney']), 3) # in list
self._test_qs(qs.filter(first_name__in="'Connor', 'Courtney'"), 3) # in string
self._test_qs(qs.filter(first_name__not_in="'Connor', 'Courtney'"), 97)
self._test_qs(qs.filter(first_name__contains='sh'), 3) # case sensitive
self._test_qs(qs.filter(first_name__icontains='sh'), 6) # case insensitive
self._test_qs(qs.filter(first_name__startswith='le'), 0) # case sensitive
@ -74,6 +75,8 @@ class QuerySetTestCase(TestCaseWithData):
def test_filter_date_field(self):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(birthday='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__eq='1970-12-02'), 1)
self._test_qs(qs.filter(birthday__ne='1970-12-02'), 99)
self._test_qs(qs.filter(birthday=date(1970, 12, 2)), 1)
self._test_qs(qs.filter(birthday__lte=date(1970, 12, 2)), 3)
@ -87,6 +90,8 @@ class QuerySetTestCase(TestCaseWithData):
def test_order_by(self):
qs = Person.objects_in(self.database)
self.assertFalse('ORDER BY' in qs.as_sql())
self.assertFalse(qs.order_by_as_sql())
person = list(qs.order_by('first_name', 'last_name'))[0]
self.assertEquals(person.first_name, 'Abdul')
person = list(qs.order_by('-first_name', '-last_name'))[0]
@ -100,6 +105,7 @@ class QuerySetTestCase(TestCaseWithData):
qs = Person.objects_in(self.database)
self._test_qs(qs.filter(height__in='SELECT max(height) FROM $table'), 2)
self._test_qs(qs.filter(first_name__in=qs.only('last_name')), 2)
self._test_qs(qs.filter(first_name__not_in=qs.only('last_name')), 98)
def _insert_sample_model(self):
self.database.create_table(SampleModel)
@ -125,6 +131,8 @@ class QuerySetTestCase(TestCaseWithData):
self._insert_sample_model()
qs = SampleModel.objects_in(self.database)
self._test_qs(qs.filter(num=1), 1)
self._test_qs(qs.filter(num__eq=1), 1)
self._test_qs(qs.filter(num__ne=1), 3)
self._test_qs(qs.filter(num__gt=1), 3)
self._test_qs(qs.filter(num__gte=1), 4)
self._test_qs(qs.filter(num__in=(1, 2, 3)), 3)