From 3487f3b2419136b5bf87ad194da171fc29f3328b Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 23 Apr 2017 14:08:14 +0300 Subject: [PATCH 01/53] initial work on query builder --- src/infi/clickhouse_orm/models.py | 8 ++ src/infi/clickhouse_orm/query.py | 210 ++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 src/infi/clickhouse_orm/query.py diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 44437a9..434254e 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -5,6 +5,7 @@ import pytz from .fields import Field from .utils import parse_tsv +from .query import QuerySet logger = getLogger('clickhouse_orm') @@ -204,6 +205,13 @@ class Model(with_metaclass(ModelBase)): data = self.__dict__ return {name: data[name] for name, field in fields} + @classmethod + def objects_in(cls, database): + ''' + Returns a queryset for selecting instances of this model class. + ''' + return QuerySet(cls, database) + class BufferModel(Model): diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py new file mode 100644 index 0000000..88404c7 --- /dev/null +++ b/src/infi/clickhouse_orm/query.py @@ -0,0 +1,210 @@ +import six +import pytz +from copy import copy + + +# TODO +# - comments +# - docs +# - tests +# - and/or between Q objects +# - check that field names are valid +# - add Model.using(db) method that returns a queryset +# - support functions and expressions? + + +class Operator(object): + + def to_sql(self, model_cls, field_name, value): + raise NotImplementedError + + +class SimpleOperator(Operator): + + def __init__(self, sql_operator): + self._sql_operator = sql_operator + + def to_sql(self, model_cls, field_name, value): + field = getattr(model_cls, field_name) + value = field.to_db_string(field.to_python(value, pytz.utc)) + return ' '.join([field_name, self._sql_operator, value]) + + +class InOperator(Operator): + + def to_sql(self, model_cls, field_name, value): + field = getattr(model_cls, field_name) + if isinstance(value, QuerySet): + value = value.query() + elif isinstance(value, six.string_types): + pass + else: + value = ', '.join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value]) + return '%s IN (%s)' % (field_name, value) + + +class LikeOperator(Operator): + + def __init__(self, pattern, case_sensitive=True): + self._pattern = pattern + self._case_sensitive = case_sensitive + + def to_sql(self, model_cls, field_name, value): + field = getattr(model_cls, field_name) + value = field.to_db_string(field.to_python(value, pytz.utc), quote=False) + value = value.replace('\\', '\\\\').replace('%', '\\\\%').replace('_', '\\\\_') + pattern = self._pattern.format(value) + if self._case_sensitive: + return '%s LIKE \'%s\'' % (field_name, pattern) + else: + return 'lowerUTF8(%s) LIKE lowerUTF8(\'%s\')' % (field_name, pattern) + + +class IExactOperator(Operator): + + def to_sql(self, model_cls, field_name, value): + field = getattr(model_cls, field_name) + value = field.to_db_string(field.to_python(value, pytz.utc)) + return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value) + + +_operators = {} + +def register_operator(name, sql): + _operators[name] = sql + +register_operator('eq', SimpleOperator('=')) +register_operator('gt', SimpleOperator('>')) +register_operator('gte', SimpleOperator('>=')) +register_operator('lt', SimpleOperator('<')) +register_operator('lte', SimpleOperator('<=')) +register_operator('in', InOperator()) +register_operator('contains', LikeOperator('%{}%')) +register_operator('startswith', LikeOperator('{}%')) +register_operator('endswith', LikeOperator('%{}')) +register_operator('icontains', LikeOperator('%{}%', False)) +register_operator('istartswith', LikeOperator('{}%', False)) +register_operator('iendswith', LikeOperator('%{}', False)) +register_operator('iexact', IExactOperator()) + + +class FOV(object): + + def __init__(self, field_name, operator, value): + self._field_name = field_name + self._operator = _operators[operator] + self._value = value + + def to_sql(self, model_cls): + return self._operator.to_sql(model_cls, self._field_name, self._value) + + +class Q(object): + + def __init__(self, **kwargs): + self._fovs = [self._build_fov(k, v) for k, v in six.iteritems(kwargs)] + self._negate = False + + def _build_fov(self, key, value): + if '__' in key: + field_name, operator = key.rsplit('__', 1) + else: + field_name, operator = key, 'eq' + return FOV(field_name, operator, value) + + def to_sql(self, model_cls): + if not self._fovs: + return '1' + sql = ' AND '.join(fov.to_sql(model_cls) for fov in self._fovs) + if self._negate: + sql = 'NOT (%s)' % sql + return sql + + def __invert__(self): + q = copy(self) + q._negate = True + return q + + +class QuerySet(object): + + def __init__(self, model_cls, database): + self._model_cls = model_cls + self._database = database + self._order_by = [f[0] for f in model_cls._fields] + self._q = [] + self._fields = [] + + def __iter__(self): + """ + Iterates over the model instances matching this queryset + """ + return self._database.select(self.query(), self._model_cls) + + def query(self): + """ + Return the the queryset as SQL. + """ + fields = '*' + if self._fields: + fields = ', '.join('`%s`' % field for field in self._fields) + params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), self.order_by_as_sql()) + return 'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params + + def order_by_as_sql(self): + """ + Return the contents of the queryset's ORDER BY clause. + """ + return ', '.join([ + '%s DESC' % field[1:] if field[0] == '-' else field + for field in self._order_by + ]) + + def conditions_as_sql(self): + """ + Return the contents of the queryset's WHERE clause. + """ + if self._q: + return ' AND '.join([q.to_sql(self._model_cls) for q in self._q]) + else: + return '1' + + def count(self): + """ + Returns the number of matching model instances. + """ + return self._database.count(self._model_cls, self.conditions_as_sql()) + + def order_by(self, *field_names): + """ + Returns a new QuerySet instance with the ordering changed. + """ + qs = copy(self) + qs._order_by = field_names + return qs + + def only(self, *field_names): + """ + Limit the query to return only the specified field names. + Useful when there are large fields that are not needed, + or for creating a subquery to use with an IN operator. + """ + qs = copy(self) + qs._fields = field_names + return qs + + def filter(self, **kwargs): + """ + Returns a new QuerySet instance that includes only rows matching the conditions. + """ + qs = copy(self) + qs._q = list(self._q) + [Q(**kwargs)] + return qs + + def exclude(self, **kwargs): + """ + Returns a new QuerySet instance that excludes all rows matching the conditions. + """ + qs = copy(self) + qs._q = list(self._q) + [~Q(**kwargs)] + return qs From 7c26c4da5a4df176a720d0439e7d7370536faa92 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 24 Apr 2017 13:14:13 +0300 Subject: [PATCH 02/53] Refactor tests with data --- tests/base_test_with_data.py | 146 ++++++++++++++++++++++++++ tests/test_buffer.py | 29 ++++++ tests/test_database.py | 193 +---------------------------------- tests/test_readonly.py | 44 ++++++++ 4 files changed, 222 insertions(+), 190 deletions(-) create mode 100644 tests/base_test_with_data.py create mode 100644 tests/test_buffer.py create mode 100644 tests/test_readonly.py diff --git a/tests/base_test_with_data.py b/tests/base_test_with_data.py new file mode 100644 index 0000000..d50d311 --- /dev/null +++ b/tests/base_test_with_data.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +import unittest + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + +import logging +logging.getLogger("requests").setLevel(logging.WARNING) + + +class TestCaseWithData(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(Person) + + def tearDown(self): + self.database.drop_table(Person) + self.database.drop_database() + + def _insert_and_check(self, data, count): + self.database.insert(data) + self.assertEquals(count, self.database.count(Person)) + for instance in data: + self.assertEquals(self.database, instance.get_database()) + + def _sample_data(self): + for entry in data: + yield Person(**entry) + + +class Person(Model): + + first_name = StringField() + last_name = StringField() + birthday = DateField() + height = Float32Field() + + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + + +data = [ + {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"}, + {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"}, + {"first_name": "Adena", "last_name": "Norman", "birthday": "1979-05-14", "height": "1.66"}, + {"first_name": "Aline", "last_name": "Crane", "birthday": "1988-05-01", "height": "1.62"}, + {"first_name": "Althea", "last_name": "Barrett", "birthday": "2004-07-28", "height": "1.71"}, + {"first_name": "Amanda", "last_name": "Vang", "birthday": "1973-02-23", "height": "1.68"}, + {"first_name": "Angela", "last_name": "Sanders", "birthday": "2016-01-08", "height": "1.66"}, + {"first_name": "Anne", "last_name": "Rasmussen", "birthday": "1995-04-03", "height": "1.77"}, + {"first_name": "Ariana", "last_name": "Cole", "birthday": "1977-12-20", "height": "1.72"}, + {"first_name": "Ashton", "last_name": "Fuller", "birthday": "1995-11-17", "height": "1.75"}, + {"first_name": "Ava", "last_name": "Sanders", "birthday": "1997-08-10", "height": "1.60"}, + {"first_name": "Barrett", "last_name": "Clemons", "birthday": "1985-07-03", "height": "1.71"}, + {"first_name": "Beatrice", "last_name": "Gregory", "birthday": "1992-01-19", "height": "1.80"}, + {"first_name": "Buffy", "last_name": "Webb", "birthday": "1990-03-06", "height": "1.68"}, + {"first_name": "Callie", "last_name": "Wiley", "birthday": "1987-11-24", "height": "1.69"}, + {"first_name": "Cara", "last_name": "Fox", "birthday": "2004-05-15", "height": "1.71"}, + {"first_name": "Caryn", "last_name": "Sears", "birthday": "1999-02-17", "height": "1.71"}, + {"first_name": "Cassady", "last_name": "Knapp", "birthday": "1977-12-15", "height": "1.72"}, + {"first_name": "Cassady", "last_name": "Rogers", "birthday": "2013-11-04", "height": "1.71"}, + {"first_name": "Catherine", "last_name": "Hicks", "birthday": "1989-05-23", "height": "1.80"}, + {"first_name": "Cathleen", "last_name": "Frank", "birthday": "1977-09-04", "height": "1.61"}, + {"first_name": "Celeste", "last_name": "James", "birthday": "1990-03-08", "height": "1.67"}, + {"first_name": "Chelsea", "last_name": "Castro", "birthday": "2001-08-10", "height": "1.71"}, + {"first_name": "Ciaran", "last_name": "Carver", "birthday": "2016-12-25", "height": "1.76"}, + {"first_name": "Ciaran", "last_name": "Hurley", "birthday": "1995-10-25", "height": "1.65"}, + {"first_name": "Clementine", "last_name": "Moon", "birthday": "1994-03-29", "height": "1.73"}, + {"first_name": "Connor", "last_name": "Jenkins", "birthday": "1999-07-23", "height": "1.67"}, + {"first_name": "Courtney", "last_name": "Cannon", "birthday": "1997-10-26", "height": "1.76"}, + {"first_name": "Courtney", "last_name": "Hoffman", "birthday": "1994-11-07", "height": "1.65"}, + {"first_name": "Denton", "last_name": "Sanchez", "birthday": "1971-10-16", "height": "1.72"}, + {"first_name": "Dominique", "last_name": "Sandoval", "birthday": "1972-02-01", "height": "1.72"}, + {"first_name": "Dora", "last_name": "Cabrera", "birthday": "2016-04-26", "height": "1.68"}, + {"first_name": "Eagan", "last_name": "Dodson", "birthday": "2015-10-22", "height": "1.67"}, + {"first_name": "Edan", "last_name": "Dennis", "birthday": "1989-09-18", "height": "1.73"}, + {"first_name": "Ella", "last_name": "Castillo", "birthday": "1973-03-28", "height": "1.73"}, + {"first_name": "Elton", "last_name": "Ayers", "birthday": "1994-06-20", "height": "1.68"}, + {"first_name": "Elton", "last_name": "Smith", "birthday": "1982-06-20", "height": "1.66"}, + {"first_name": "Emma", "last_name": "Clements", "birthday": "1996-08-07", "height": "1.75"}, + {"first_name": "Evangeline", "last_name": "Weber", "birthday": "1984-06-03", "height": "1.70"}, + {"first_name": "Faith", "last_name": "Emerson", "birthday": "1989-12-30", "height": "1.62"}, + {"first_name": "Fritz", "last_name": "Atkinson", "birthday": "2011-06-15", "height": "1.73"}, + {"first_name": "Galvin", "last_name": "Phillips", "birthday": "2004-01-17", "height": "1.74"}, + {"first_name": "Georgia", "last_name": "Kennedy", "birthday": "1974-12-29", "height": "1.66"}, + {"first_name": "Griffith", "last_name": "Henry", "birthday": "1985-04-02", "height": "1.66"}, + {"first_name": "Hedy", "last_name": "Strong", "birthday": "2001-10-04", "height": "1.60"}, + {"first_name": "Hu", "last_name": "May", "birthday": "1976-10-01", "height": "1.76"}, + {"first_name": "Hyacinth", "last_name": "Kent", "birthday": "1971-07-18", "height": "1.72"}, + {"first_name": "Idola", "last_name": "Fulton", "birthday": "1974-11-27", "height": "1.66"}, + {"first_name": "Jarrod", "last_name": "Gibbs", "birthday": "1987-06-13", "height": "1.62"}, + {"first_name": "Jesse", "last_name": "Gomez", "birthday": "2011-01-28", "height": "1.71"}, + {"first_name": "Josiah", "last_name": "Hodges", "birthday": "2011-09-04", "height": "1.68"}, + {"first_name": "Karleigh", "last_name": "Bartlett", "birthday": "1991-10-24", "height": "1.69"}, + {"first_name": "Keelie", "last_name": "Mathis", "birthday": "1993-10-26", "height": "1.69"}, + {"first_name": "Kieran", "last_name": "Solomon", "birthday": "1993-10-30", "height": "1.69"}, + {"first_name": "Laith", "last_name": "Howell", "birthday": "1991-07-07", "height": "1.70"}, + {"first_name": "Leroy", "last_name": "Pacheco", "birthday": "1998-12-30", "height": "1.70"}, + {"first_name": "Lesley", "last_name": "Stephenson", "birthday": "2010-04-10", "height": "1.64"}, + {"first_name": "Macaulay", "last_name": "Rowe", "birthday": "1982-03-02", "height": "1.68"}, + {"first_name": "Macey", "last_name": "Griffin", "birthday": "1971-09-18", "height": "1.63"}, + {"first_name": "Madeline", "last_name": "Kidd", "birthday": "1984-12-09", "height": "1.69"}, + {"first_name": "Maia", "last_name": "Hyde", "birthday": "1972-06-09", "height": "1.74"}, + {"first_name": "Mary", "last_name": "Kirkland", "birthday": "1987-10-09", "height": "1.73"}, + {"first_name": "Molly", "last_name": "Salas", "birthday": "1994-04-23", "height": "1.70"}, + {"first_name": "Montana", "last_name": "Bruce", "birthday": "1982-06-28", "height": "1.66"}, + {"first_name": "Naomi", "last_name": "Hays", "birthday": "2004-11-27", "height": "1.70"}, + {"first_name": "Norman", "last_name": "Santos", "birthday": "1989-01-10", "height": "1.68"}, + {"first_name": "Octavius", "last_name": "Floyd", "birthday": "1985-02-22", "height": "1.68"}, + {"first_name": "Odette", "last_name": "Mcneil", "birthday": "1978-05-21", "height": "1.76"}, + {"first_name": "Oliver", "last_name": "Ashley", "birthday": "2004-08-13", "height": "1.68"}, + {"first_name": "Quon", "last_name": "Wiggins", "birthday": "1992-05-06", "height": "1.74"}, + {"first_name": "Rafael", "last_name": "Parker", "birthday": "2016-01-24", "height": "1.76"}, + {"first_name": "Reese", "last_name": "Noel", "birthday": "1996-11-04", "height": "1.77"}, + {"first_name": "Rhona", "last_name": "Camacho", "birthday": "1976-12-17", "height": "1.59"}, + {"first_name": "Rigel", "last_name": "Oneal", "birthday": "1993-11-05", "height": "1.63"}, + {"first_name": "Roary", "last_name": "Simmons", "birthday": "1986-07-23", "height": "1.63"}, + {"first_name": "Russell", "last_name": "Pruitt", "birthday": "1979-05-04", "height": "1.63"}, + {"first_name": "Sawyer", "last_name": "Fischer", "birthday": "1995-04-01", "height": "1.78"}, + {"first_name": "Scarlett", "last_name": "Durham", "birthday": "2005-09-29", "height": "1.65"}, + {"first_name": "Seth", "last_name": "Serrano", "birthday": "2017-06-02", "height": "1.71"}, + {"first_name": "Shad", "last_name": "Bradshaw", "birthday": "1998-08-25", "height": "1.72"}, + {"first_name": "Shana", "last_name": "Jarvis", "birthday": "1997-05-21", "height": "1.72"}, + {"first_name": "Sharon", "last_name": "Shelton", "birthday": "1970-05-02", "height": "1.65"}, + {"first_name": "Shoshana", "last_name": "Solis", "birthday": "1998-07-18", "height": "1.65"}, + {"first_name": "Stephen", "last_name": "Baxter", "birthday": "2004-09-24", "height": "1.74"}, + {"first_name": "Sydney", "last_name": "Stevens", "birthday": "1989-07-11", "height": "1.70"}, + {"first_name": "Tasha", "last_name": "Campos", "birthday": "1984-02-11", "height": "1.72"}, + {"first_name": "Ulla", "last_name": "Arnold", "birthday": "1990-06-04", "height": "1.63"}, + {"first_name": "Vaughan", "last_name": "Schmidt", "birthday": "1985-06-19", "height": "1.61"}, + {"first_name": "Velma", "last_name": "English", "birthday": "1999-01-18", "height": "1.65"}, + {"first_name": "Venus", "last_name": "Hurst", "birthday": "1993-10-22", "height": "1.72"}, + {"first_name": "Victor", "last_name": "Woods", "birthday": "1989-06-23", "height": "1.67"}, + {"first_name": "Victoria", "last_name": "Slater", "birthday": "2009-07-19", "height": "1.72"}, + {"first_name": "Wang", "last_name": "Goodwin", "birthday": "1983-05-15", "height": "1.66"}, + {"first_name": "Warren", "last_name": "Bowen", "birthday": "2000-07-20", "height": "1.76"}, + {"first_name": "Warren", "last_name": "Dudley", "birthday": "1995-10-23", "height": "1.59"}, + {"first_name": "Whilemina", "last_name": "Blankenship", "birthday": "1970-07-14", "height": "1.66"}, + {"first_name": "Whitney", "last_name": "Durham", "birthday": "1977-09-15", "height": "1.72"}, + {"first_name": "Whitney", "last_name": "Scott", "birthday": "1971-07-04", "height": "1.70"}, + {"first_name": "Wynter", "last_name": "Garcia", "birthday": "1975-01-10", "height": "1.69"}, + {"first_name": "Yolanda", "last_name": "Duke", "birthday": "1997-02-25", "height": "1.74"} +]; diff --git a/tests/test_buffer.py b/tests/test_buffer.py new file mode 100644 index 0000000..91bf656 --- /dev/null +++ b/tests/test_buffer.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +import unittest + +from infi.clickhouse_orm.models import BufferModel +from infi.clickhouse_orm.engines import * +from .base_test_with_data import * + + +class BufferTestCase(TestCaseWithData): + + def _insert_and_check_buffer(self, data, count): + self.database.insert(data) + self.assertEquals(count, self.database.count(PersonBuffer)) + + def _sample_buffer_data(self): + for entry in data: + yield PersonBuffer(**entry) + + def test_insert_buffer(self): + self.database.create_table(PersonBuffer) + self._insert_and_check_buffer(self._sample_buffer_data(), len(data)) + + +class PersonBuffer(BufferModel, Person): + + engine = Buffer(Person) + + diff --git a/tests/test_database.py b/tests/test_database.py index 1897d8f..91d7af6 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -2,36 +2,11 @@ import unittest -from infi.clickhouse_orm.database import Database, DatabaseException -from infi.clickhouse_orm.models import Model, BufferModel -from infi.clickhouse_orm.fields import * -from infi.clickhouse_orm.engines import * - -import logging -logging.getLogger("requests").setLevel(logging.WARNING) +from infi.clickhouse_orm.database import Database +from .base_test_with_data import * -class DatabaseTestCase(unittest.TestCase): - - def setUp(self): - self.database = Database('test-db') - self.database.create_table(Person) - self.database.create_table(PersonBuffer) - - def tearDown(self): - self.database.drop_table(PersonBuffer) - self.database.drop_table(Person) - self.database.drop_database() - - def _insert_and_check(self, data, count): - self.database.insert(data) - self.assertEquals(count, self.database.count(Person)) - for instance in data: - self.assertEquals(self.database, instance.get_database()) - - def _insert_and_check_buffer(self, data, count): - self.database.insert(data) - self.assertEquals(count, self.database.count(PersonBuffer)) +class DatabaseTestCase(TestCaseWithData): def test_insert__generator(self): self._insert_and_check(self._sample_data(), len(data)) @@ -140,171 +115,9 @@ class DatabaseTestCase(unittest.TestCase): p = list(self.database.select("SELECT * from $table", Person))[0] self.assertEquals(p.first_name, s) - def test_readonly(self): - orig_database = self.database - self.database = Database(orig_database.db_name, readonly=True) - with self.assertRaises(DatabaseException): - self._insert_and_check(self._sample_data(), len(data)) - self.assertEquals(self.database.count(Person), 0) - with self.assertRaises(DatabaseException): - self.database.drop_table(Person) - with self.assertRaises(DatabaseException): - self.database.drop_database() - self.database = orig_database - - def test_insert_buffer(self): - self._insert_and_check_buffer(self._sample_buffer_data(), len(data)) - - def _sample_data(self): - for entry in data: - yield Person(**entry) - def test_raw(self): self._insert_and_check(self._sample_data(), len(data)) query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name" results = self.database.raw(query) self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n") - def test_insert_readonly(self): - m = ReadOnlyModel(name='readonly') - with self.assertRaises(DatabaseException): - self.database.insert([m]) - - def test_create_readonly_table(self): - with self.assertRaises(DatabaseException): - self.database.create_table(ReadOnlyModel) - - def test_drop_readonly_table(self): - with self.assertRaises(DatabaseException): - self.database.drop_table(ReadOnlyModel) - - def _sample_buffer_data(self): - for entry in data: - yield PersonBuffer(**entry) - - - -class Person(Model): - - first_name = StringField() - last_name = StringField() - birthday = DateField() - height = Float32Field() - - engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) - - -class ReadOnlyModel(Model): - readonly = True - - name = StringField() - - -class PersonBuffer(BufferModel, Person): - - engine = Buffer(Person) - - - -data = [ - {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"}, - {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"}, - {"first_name": "Adena", "last_name": "Norman", "birthday": "1979-05-14", "height": "1.66"}, - {"first_name": "Aline", "last_name": "Crane", "birthday": "1988-05-01", "height": "1.62"}, - {"first_name": "Althea", "last_name": "Barrett", "birthday": "2004-07-28", "height": "1.71"}, - {"first_name": "Amanda", "last_name": "Vang", "birthday": "1973-02-23", "height": "1.68"}, - {"first_name": "Angela", "last_name": "Sanders", "birthday": "2016-01-08", "height": "1.66"}, - {"first_name": "Anne", "last_name": "Rasmussen", "birthday": "1995-04-03", "height": "1.77"}, - {"first_name": "Ariana", "last_name": "Cole", "birthday": "1977-12-20", "height": "1.72"}, - {"first_name": "Ashton", "last_name": "Fuller", "birthday": "1995-11-17", "height": "1.75"}, - {"first_name": "Ava", "last_name": "Sanders", "birthday": "1997-08-10", "height": "1.60"}, - {"first_name": "Barrett", "last_name": "Clemons", "birthday": "1985-07-03", "height": "1.71"}, - {"first_name": "Beatrice", "last_name": "Gregory", "birthday": "1992-01-19", "height": "1.80"}, - {"first_name": "Buffy", "last_name": "Webb", "birthday": "1990-03-06", "height": "1.68"}, - {"first_name": "Callie", "last_name": "Wiley", "birthday": "1987-11-24", "height": "1.69"}, - {"first_name": "Cara", "last_name": "Fox", "birthday": "2004-05-15", "height": "1.71"}, - {"first_name": "Caryn", "last_name": "Sears", "birthday": "1999-02-17", "height": "1.71"}, - {"first_name": "Cassady", "last_name": "Knapp", "birthday": "1977-12-15", "height": "1.72"}, - {"first_name": "Cassady", "last_name": "Rogers", "birthday": "2013-11-04", "height": "1.71"}, - {"first_name": "Catherine", "last_name": "Hicks", "birthday": "1989-05-23", "height": "1.80"}, - {"first_name": "Cathleen", "last_name": "Frank", "birthday": "1977-09-04", "height": "1.61"}, - {"first_name": "Celeste", "last_name": "James", "birthday": "1990-03-08", "height": "1.67"}, - {"first_name": "Chelsea", "last_name": "Castro", "birthday": "2001-08-10", "height": "1.71"}, - {"first_name": "Ciaran", "last_name": "Carver", "birthday": "2016-12-25", "height": "1.76"}, - {"first_name": "Ciaran", "last_name": "Hurley", "birthday": "1995-10-25", "height": "1.65"}, - {"first_name": "Clementine", "last_name": "Moon", "birthday": "1994-03-29", "height": "1.73"}, - {"first_name": "Connor", "last_name": "Jenkins", "birthday": "1999-07-23", "height": "1.67"}, - {"first_name": "Courtney", "last_name": "Cannon", "birthday": "1997-10-26", "height": "1.76"}, - {"first_name": "Courtney", "last_name": "Hoffman", "birthday": "1994-11-07", "height": "1.65"}, - {"first_name": "Denton", "last_name": "Sanchez", "birthday": "1971-10-16", "height": "1.72"}, - {"first_name": "Dominique", "last_name": "Sandoval", "birthday": "1972-02-01", "height": "1.72"}, - {"first_name": "Dora", "last_name": "Cabrera", "birthday": "2016-04-26", "height": "1.68"}, - {"first_name": "Eagan", "last_name": "Dodson", "birthday": "2015-10-22", "height": "1.67"}, - {"first_name": "Edan", "last_name": "Dennis", "birthday": "1989-09-18", "height": "1.73"}, - {"first_name": "Ella", "last_name": "Castillo", "birthday": "1973-03-28", "height": "1.73"}, - {"first_name": "Elton", "last_name": "Ayers", "birthday": "1994-06-20", "height": "1.68"}, - {"first_name": "Elton", "last_name": "Smith", "birthday": "1982-06-20", "height": "1.66"}, - {"first_name": "Emma", "last_name": "Clements", "birthday": "1996-08-07", "height": "1.75"}, - {"first_name": "Evangeline", "last_name": "Weber", "birthday": "1984-06-03", "height": "1.70"}, - {"first_name": "Faith", "last_name": "Emerson", "birthday": "1989-12-30", "height": "1.62"}, - {"first_name": "Fritz", "last_name": "Atkinson", "birthday": "2011-06-15", "height": "1.73"}, - {"first_name": "Galvin", "last_name": "Phillips", "birthday": "2004-01-17", "height": "1.74"}, - {"first_name": "Georgia", "last_name": "Kennedy", "birthday": "1974-12-29", "height": "1.66"}, - {"first_name": "Griffith", "last_name": "Henry", "birthday": "1985-04-02", "height": "1.66"}, - {"first_name": "Hedy", "last_name": "Strong", "birthday": "2001-10-04", "height": "1.60"}, - {"first_name": "Hu", "last_name": "May", "birthday": "1976-10-01", "height": "1.76"}, - {"first_name": "Hyacinth", "last_name": "Kent", "birthday": "1971-07-18", "height": "1.72"}, - {"first_name": "Idola", "last_name": "Fulton", "birthday": "1974-11-27", "height": "1.66"}, - {"first_name": "Jarrod", "last_name": "Gibbs", "birthday": "1987-06-13", "height": "1.62"}, - {"first_name": "Jesse", "last_name": "Gomez", "birthday": "2011-01-28", "height": "1.71"}, - {"first_name": "Josiah", "last_name": "Hodges", "birthday": "2011-09-04", "height": "1.68"}, - {"first_name": "Karleigh", "last_name": "Bartlett", "birthday": "1991-10-24", "height": "1.69"}, - {"first_name": "Keelie", "last_name": "Mathis", "birthday": "1993-10-26", "height": "1.69"}, - {"first_name": "Kieran", "last_name": "Solomon", "birthday": "1993-10-30", "height": "1.69"}, - {"first_name": "Laith", "last_name": "Howell", "birthday": "1991-07-07", "height": "1.70"}, - {"first_name": "Leroy", "last_name": "Pacheco", "birthday": "1998-12-30", "height": "1.70"}, - {"first_name": "Lesley", "last_name": "Stephenson", "birthday": "2010-04-10", "height": "1.64"}, - {"first_name": "Macaulay", "last_name": "Rowe", "birthday": "1982-03-02", "height": "1.68"}, - {"first_name": "Macey", "last_name": "Griffin", "birthday": "1971-09-18", "height": "1.63"}, - {"first_name": "Madeline", "last_name": "Kidd", "birthday": "1984-12-09", "height": "1.69"}, - {"first_name": "Maia", "last_name": "Hyde", "birthday": "1972-06-09", "height": "1.74"}, - {"first_name": "Mary", "last_name": "Kirkland", "birthday": "1987-10-09", "height": "1.73"}, - {"first_name": "Molly", "last_name": "Salas", "birthday": "1994-04-23", "height": "1.70"}, - {"first_name": "Montana", "last_name": "Bruce", "birthday": "1982-06-28", "height": "1.66"}, - {"first_name": "Naomi", "last_name": "Hays", "birthday": "2004-11-27", "height": "1.70"}, - {"first_name": "Norman", "last_name": "Santos", "birthday": "1989-01-10", "height": "1.68"}, - {"first_name": "Octavius", "last_name": "Floyd", "birthday": "1985-02-22", "height": "1.68"}, - {"first_name": "Odette", "last_name": "Mcneil", "birthday": "1978-05-21", "height": "1.76"}, - {"first_name": "Oliver", "last_name": "Ashley", "birthday": "2004-08-13", "height": "1.68"}, - {"first_name": "Quon", "last_name": "Wiggins", "birthday": "1992-05-06", "height": "1.74"}, - {"first_name": "Rafael", "last_name": "Parker", "birthday": "2016-01-24", "height": "1.76"}, - {"first_name": "Reese", "last_name": "Noel", "birthday": "1996-11-04", "height": "1.77"}, - {"first_name": "Rhona", "last_name": "Camacho", "birthday": "1976-12-17", "height": "1.59"}, - {"first_name": "Rigel", "last_name": "Oneal", "birthday": "1993-11-05", "height": "1.63"}, - {"first_name": "Roary", "last_name": "Simmons", "birthday": "1986-07-23", "height": "1.63"}, - {"first_name": "Russell", "last_name": "Pruitt", "birthday": "1979-05-04", "height": "1.63"}, - {"first_name": "Sawyer", "last_name": "Fischer", "birthday": "1995-04-01", "height": "1.78"}, - {"first_name": "Scarlett", "last_name": "Durham", "birthday": "2005-09-29", "height": "1.65"}, - {"first_name": "Seth", "last_name": "Serrano", "birthday": "2017-06-02", "height": "1.71"}, - {"first_name": "Shad", "last_name": "Bradshaw", "birthday": "1998-08-25", "height": "1.72"}, - {"first_name": "Shana", "last_name": "Jarvis", "birthday": "1997-05-21", "height": "1.72"}, - {"first_name": "Sharon", "last_name": "Shelton", "birthday": "1970-05-02", "height": "1.65"}, - {"first_name": "Shoshana", "last_name": "Solis", "birthday": "1998-07-18", "height": "1.65"}, - {"first_name": "Stephen", "last_name": "Baxter", "birthday": "2004-09-24", "height": "1.74"}, - {"first_name": "Sydney", "last_name": "Stevens", "birthday": "1989-07-11", "height": "1.70"}, - {"first_name": "Tasha", "last_name": "Campos", "birthday": "1984-02-11", "height": "1.72"}, - {"first_name": "Ulla", "last_name": "Arnold", "birthday": "1990-06-04", "height": "1.63"}, - {"first_name": "Vaughan", "last_name": "Schmidt", "birthday": "1985-06-19", "height": "1.61"}, - {"first_name": "Velma", "last_name": "English", "birthday": "1999-01-18", "height": "1.65"}, - {"first_name": "Venus", "last_name": "Hurst", "birthday": "1993-10-22", "height": "1.72"}, - {"first_name": "Victor", "last_name": "Woods", "birthday": "1989-06-23", "height": "1.67"}, - {"first_name": "Victoria", "last_name": "Slater", "birthday": "2009-07-19", "height": "1.72"}, - {"first_name": "Wang", "last_name": "Goodwin", "birthday": "1983-05-15", "height": "1.66"}, - {"first_name": "Warren", "last_name": "Bowen", "birthday": "2000-07-20", "height": "1.76"}, - {"first_name": "Warren", "last_name": "Dudley", "birthday": "1995-10-23", "height": "1.59"}, - {"first_name": "Whilemina", "last_name": "Blankenship", "birthday": "1970-07-14", "height": "1.66"}, - {"first_name": "Whitney", "last_name": "Durham", "birthday": "1977-09-15", "height": "1.72"}, - {"first_name": "Whitney", "last_name": "Scott", "birthday": "1971-07-04", "height": "1.70"}, - {"first_name": "Wynter", "last_name": "Garcia", "birthday": "1975-01-10", "height": "1.69"}, - {"first_name": "Yolanda", "last_name": "Duke", "birthday": "1997-02-25", "height": "1.74"} -]; diff --git a/tests/test_readonly.py b/tests/test_readonly.py new file mode 100644 index 0000000..ca992ec --- /dev/null +++ b/tests/test_readonly.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +import unittest + +from infi.clickhouse_orm.database import Database, DatabaseException +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * +from .base_test_with_data import * + + +class ReadonlyTestCase(TestCaseWithData): + + def test_readonly(self): + orig_database = self.database + self.database = Database(orig_database.db_name, readonly=True) + with self.assertRaises(DatabaseException): + self._insert_and_check(self._sample_data(), len(data)) + self.assertEquals(self.database.count(Person), 0) + with self.assertRaises(DatabaseException): + self.database.drop_table(Person) + with self.assertRaises(DatabaseException): + self.database.drop_database() + self.database = orig_database + + def test_insert_readonly(self): + m = ReadOnlyModel(name='readonly') + with self.assertRaises(DatabaseException): + self.database.insert([m]) + + def test_create_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.create_table(ReadOnlyModel) + + def test_drop_readonly_table(self): + with self.assertRaises(DatabaseException): + self.database.drop_table(ReadOnlyModel) + + +class ReadOnlyModel(Model): + readonly = True + + name = StringField() + From 2b8c0b6c380d129f158f393eaae0ab5e7609d1e7 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 25 Apr 2017 08:39:24 +0300 Subject: [PATCH 03/53] Do not send readonly=1 when connection is already in readonly mode (this causes a database error) --- src/infi/clickhouse_orm/database.py | 14 +++++++++--- tests/test_readonly.py | 35 ++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 1103a1b..2911d96 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -26,8 +26,11 @@ class Database(object): self.db_url = db_url self.username = username self.password = password - self.readonly = readonly - if not self.readonly: + self.readonly = False + if readonly: + self.connection_readonly = self._is_connection_readonly() + self.readonly = True + else: self.create_database() self.server_timezone = self._get_server_timezone() @@ -175,7 +178,8 @@ class Database(object): params['user'] = self.username if self.password: params['password'] = self.password - if self.readonly: + # Send the readonly flag, unless the connection is already readonly (to prevent db error) + if self.readonly and not self.connection_readonly: params['readonly'] = '1' return params @@ -197,3 +201,7 @@ class Database(object): except DatabaseException: logger.exception('Cannot determine server timezone, assuming UTC') return pytz.utc + + def _is_connection_readonly(self): + r = self._send("SELECT value FROM system.settings WHERE name = 'readonly'") + return r.text.strip() != '0' diff --git a/tests/test_readonly.py b/tests/test_readonly.py index ca992ec..f67c093 100644 --- a/tests/test_readonly.py +++ b/tests/test_readonly.py @@ -11,17 +11,32 @@ from .base_test_with_data import * class ReadonlyTestCase(TestCaseWithData): - def test_readonly(self): + def _test_readonly_db(self, username): + self._insert_and_check(self._sample_data(), len(data)) orig_database = self.database - self.database = Database(orig_database.db_name, readonly=True) - with self.assertRaises(DatabaseException): - self._insert_and_check(self._sample_data(), len(data)) - self.assertEquals(self.database.count(Person), 0) - with self.assertRaises(DatabaseException): - self.database.drop_table(Person) - with self.assertRaises(DatabaseException): - self.database.drop_database() - self.database = orig_database + try: + self.database = Database(orig_database.db_name, username=username, readonly=True) + with self.assertRaises(DatabaseException): + self._insert_and_check(self._sample_data(), len(data)) + self.assertEquals(self.database.count(Person), 100) + list(self.database.select('SELECT * from $table', Person)) + with self.assertRaises(DatabaseException): + self.database.drop_table(Person) + with self.assertRaises(DatabaseException): + self.database.drop_database() + except DatabaseException, e: + if 'Unknown user' in unicode(e): + raise unittest.SkipTest('Database user "%s" is not defined' % username) + else: + raise + finally: + self.database = orig_database + + def test_readonly_db_with_default_user(self): + self._test_readonly_db('default') + + def test_readonly_db_with_readonly_user(self): + self._test_readonly_db('readonly') def test_insert_readonly(self): m = ReadOnlyModel(name='readonly') From dbea017d60ed7ce346c40cc5eeded19735aef4bb Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 25 Apr 2017 16:03:52 +0300 Subject: [PATCH 04/53] Add support for FixedString fields --- README.rst | 1 + src/infi/clickhouse_orm/fields.py | 18 ++++++++++ src/infi/clickhouse_orm/models.py | 4 +++ tests/test_fixed_string_fields.py | 58 +++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+) create mode 100644 tests/test_fixed_string_fields.py diff --git a/README.rst b/README.rst index 6c2c6d9..0951776 100644 --- a/README.rst +++ b/README.rst @@ -252,6 +252,7 @@ Currently the following field types are supported: Class DB Type Pythonic Type Comments =================== ======== ================= =================================================== StringField String unicode Encoded as UTF-8 when written to ClickHouse +FixedStringField String unicode Encoded as UTF-8 when written to ClickHouse DateField Date datetime.date Range 1970-01-01 to 2038-01-19 DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC Int8Field Int8 int Range -128 to 127 diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index f7d3992..03d6c82 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -90,6 +90,24 @@ class StringField(Field): raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value)) +class FixedStringField(StringField): + + def __init__(self, length, default=None, alias=None, materialized=None): + self._length = length + self.db_type = 'FixedString(%d)' % length + super(FixedStringField, self).__init__(default, alias, materialized) + + def to_python(self, value, timezone_in_use): + value = super(FixedStringField, self).to_python(value, timezone_in_use) + return value.rstrip('\0') + + def validate(self, value): + if isinstance(value, text_type): + value = value.encode('UTF-8') + if len(value) > self._length: + raise ValueError('Value of %d bytes is too long for FixedStringField(%d)' % (len(value), self._length)) + + class DateField(Field): min_value = datetime.date(1970, 1, 1) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index 434254e..ef5ab5e 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -58,6 +58,10 @@ class ModelBase(type): if db_type.startswith('Array'): inner_field = cls.create_ad_hoc_field(db_type[6 : -1]) return orm_fields.ArrayField(inner_field) + # FixedString + if db_type.startswith('FixedString'): + length = int(db_type[12 : -1]) + return orm_fields.FixedStringField(length) # Simple fields name = db_type + 'Field' if not hasattr(orm_fields, name): diff --git a/tests/test_fixed_string_fields.py b/tests/test_fixed_string_fields.py new file mode 100644 index 0000000..f9490af --- /dev/null +++ b/tests/test_fixed_string_fields.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +import unittest + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class FixedStringFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(FixedStringModel) + + def tearDown(self): + self.database.drop_database() + + def _insert_sample_data(self): + self.database.insert([ + FixedStringModel(date_field='2016-08-30', fstr_field=''), + FixedStringModel(date_field='2016-08-30'), + FixedStringModel(date_field='2016-08-31', fstr_field='foo'), + FixedStringModel(date_field='2016-08-31', fstr_field=u'לילה') + ]) + + def _assert_sample_data(self, results): + self.assertEquals(len(results), 4) + self.assertEquals(results[0].fstr_field, '') + self.assertEquals(results[1].fstr_field, 'ABCDEFGHIJK') + self.assertEquals(results[2].fstr_field, 'foo') + self.assertEquals(results[3].fstr_field, u'לילה') + + def test_insert_and_select(self): + self._insert_sample_data() + query = 'SELECT * from $table ORDER BY date_field' + results = list(self.database.select(query, FixedStringModel)) + self._assert_sample_data(results) + + def test_ad_hoc_model(self): + self._insert_sample_data() + query = 'SELECT * from $db.fixedstringmodel ORDER BY date_field' + results = list(self.database.select(query)) + self._assert_sample_data(results) + + def test_assignment_error(self): + for value in (17, 'this is too long', u'זה ארוך', None, 99.9): + with self.assertRaises(ValueError): + FixedStringModel(fstr_field=value) + + +class FixedStringModel(Model): + + date_field = DateField() + fstr_field = FixedStringField(12, default='ABCDEFGHIJK') + + engine = MergeTree('date_field', ('date_field',)) From abbe334875b2b3be5791702a22368c92ac5ae989 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 26 Apr 2017 15:46:34 +0300 Subject: [PATCH 05/53] queryset tests --- src/infi/clickhouse_orm/query.py | 70 ++++++++++++---- tests/test_querysets.py | 137 +++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 18 deletions(-) create mode 100644 tests/test_querysets.py diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 88404c7..e11c7ef 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -4,22 +4,28 @@ from copy import copy # TODO -# - comments -# - docs -# - tests # - and/or between Q objects # - check that field names are valid -# - add Model.using(db) method that returns a queryset -# - support functions and expressions? - +# - qs slicing +# - operators for arrays: length, has, empty class Operator(object): + """ + Base class for filtering operators. + """ def to_sql(self, model_cls, field_name, value): + """ + Subclasses should implement this method. It returns an SQL string + that applies this operator on the given field and value. + """ raise NotImplementedError class SimpleOperator(Operator): + """ + A simple binary operator such as a=b, ab etc. + """ def __init__(self, sql_operator): self._sql_operator = sql_operator @@ -31,6 +37,13 @@ class SimpleOperator(Operator): class InOperator(Operator): + """ + An operator that implements IN. + Accepts 3 different types of values: + - a list or tuple of simple values + - a string (used verbatim as the contents of the parenthesis) + - a queryset (subquery) + """ def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) @@ -44,6 +57,10 @@ class InOperator(Operator): class LikeOperator(Operator): + """ + A LIKE operator that matches the field to a given pattern. Can be + case sensitive or insensitive. + """ def __init__(self, pattern, case_sensitive=True): self._pattern = pattern @@ -61,6 +78,9 @@ class LikeOperator(Operator): class IExactOperator(Operator): + """ + An operator for case insensitive string comparison. + """ def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) @@ -68,27 +88,32 @@ class IExactOperator(Operator): return 'lowerUTF8(%s) = lowerUTF8(%s)' % (field_name, value) +# Define the set of builtin operators + _operators = {} def register_operator(name, sql): _operators[name] = sql -register_operator('eq', SimpleOperator('=')) -register_operator('gt', SimpleOperator('>')) -register_operator('gte', SimpleOperator('>=')) -register_operator('lt', SimpleOperator('<')) -register_operator('lte', SimpleOperator('<=')) -register_operator('in', InOperator()) -register_operator('contains', LikeOperator('%{}%')) -register_operator('startswith', LikeOperator('{}%')) -register_operator('endswith', LikeOperator('%{}')) -register_operator('icontains', LikeOperator('%{}%', False)) +register_operator('eq', SimpleOperator('=')) +register_operator('gt', SimpleOperator('>')) +register_operator('gte', SimpleOperator('>=')) +register_operator('lt', SimpleOperator('<')) +register_operator('lte', SimpleOperator('<=')) +register_operator('in', InOperator()) +register_operator('contains', LikeOperator('%{}%')) +register_operator('startswith', LikeOperator('{}%')) +register_operator('endswith', LikeOperator('%{}')) +register_operator('icontains', LikeOperator('%{}%', False)) register_operator('istartswith', LikeOperator('{}%', False)) -register_operator('iendswith', LikeOperator('%{}', False)) -register_operator('iexact', IExactOperator()) +register_operator('iendswith', LikeOperator('%{}', False)) +register_operator('iexact', IExactOperator()) class FOV(object): + """ + An object for storing Field + Operator + Value. + """ def __init__(self, field_name, operator, value): self._field_name = field_name @@ -141,6 +166,15 @@ class QuerySet(object): """ return self._database.select(self.query(), self._model_cls) + def __bool__(self): + """ + Return true if this queryset matches any rows. + """ + return bool(self.count()) + + def __nonzero__(self): # Python 2 compatibility + return type(self).__bool__(self) + def query(self): """ Return the the queryset as SQL. diff --git a/tests/test_querysets.py b/tests/test_querysets.py new file mode 100644 index 0000000..35e2e6c --- /dev/null +++ b/tests/test_querysets.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- + +import unittest + +from infi.clickhouse_orm.database import Database +from .base_test_with_data import * +import logging +from datetime import date, datetime + +try: + Enum # exists in Python 3.4+ +except NameError: + from enum import Enum # use the enum34 library instead + + +class QuerySetTestCase(TestCaseWithData): + + def setUp(self): + super(QuerySetTestCase, self).setUp() + self.database.insert(self._sample_data()) + + def _test_qs(self, qs, expected_count): + logging.info(qs.query()) + for instance in qs: + logging.info('\t%s' % instance.to_dict()) + self.assertEquals(qs.count(), expected_count) + + def test_no_filtering(self): + qs = Person.objects_in(self.database) + self._test_qs(qs, len(data)) + + def test_truthiness(self): + qs = Person.objects_in(self.database) + self.assertTrue(qs.filter(first_name='Connor')) + self.assertFalse(qs.filter(first_name='Willy')) + + def test_filter_string_field(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(first_name='Ciaran'), 2) + self._test_qs(qs.filter(first_name='ciaran'), 0) # case sensitive + self._test_qs(qs.filter(first_name__iexact='ciaran'), 2) # case insensitive + self._test_qs(qs.filter(first_name__gt='Whilemina'), 4) + self._test_qs(qs.filter(first_name__gte='Whilemina'), 5) + self._test_qs(qs.filter(first_name__lt='Adam'), 1) + self._test_qs(qs.filter(first_name__lte='Adam'), 2) + self._test_qs(qs.filter(first_name__in=('Connor', 'Courtney')), 3) # in tuple + self._test_qs(qs.filter(first_name__in=['Connor', 'Courtney']), 3) # in list + self._test_qs(qs.filter(first_name__in="'Connor', 'Courtney'"), 3) # in string + self._test_qs(qs.filter(first_name__contains='sh'), 3) # case sensitive + self._test_qs(qs.filter(first_name__icontains='sh'), 6) # case insensitive + self._test_qs(qs.filter(first_name__startswith='le'), 0) # case sensitive + self._test_qs(qs.filter(first_name__istartswith='Le'), 2) # case insensitive + self._test_qs(qs.filter(first_name__istartswith=''), 100) # empty prefix + self._test_qs(qs.filter(first_name__endswith='IA'), 0) # case sensitive + self._test_qs(qs.filter(first_name__iendswith='ia'), 3) # case insensitive + self._test_qs(qs.filter(first_name__iendswith=''), 100) # empty suffix + + def test_filter_float_field(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(height__gt=2), 0) + self._test_qs(qs.filter(height__lt=1.61), 4) + self._test_qs(qs.filter(height__lt='1.61'), 4) + self._test_qs(qs.exclude(height__lt='1.61'), 96) + self._test_qs(qs.filter(height__gt=0), 100) + self._test_qs(qs.exclude(height__gt=0), 0) + + def test_filter_date_field(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(birthday='1970-12-02'), 1) + self._test_qs(qs.filter(birthday=date(1970, 12, 2)), 1) + self._test_qs(qs.filter(birthday__lte=date(1970, 12, 2)), 3) + + def test_only(self): + qs = Person.objects_in(self.database).only('first_name', 'last_name') + for person in qs: + self.assertTrue(person.first_name) + self.assertTrue(person.last_name) + self.assertFalse(person.height) + self.assertEquals(person.birthday, date(1970, 1, 1)) + + def test_order_by(self): + qs = Person.objects_in(self.database) + person = list(qs.order_by('first_name', 'last_name'))[0] + self.assertEquals(person.first_name, 'Abdul') + person = list(qs.order_by('-first_name', '-last_name'))[0] + self.assertEquals(person.first_name, 'Yolanda') + person = list(qs.order_by('height'))[0] + self.assertEquals(person.height, 1.59) + person = list(qs.order_by('-height'))[0] + self.assertEquals(person.height, 1.8) + + def test_in_subquery(self): + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(height__in='SELECT max(height) FROM $table'), 2) + self._test_qs(qs.filter(first_name__in=qs.only('last_name')), 2) + + def _insert_sample_model(self): + self.database.create_table(SampleModel) + now = datetime.now() + self.database.insert([ + SampleModel(timestamp=now, num=1, color=Color.red), + SampleModel(timestamp=now, num=2, color=Color.red), + SampleModel(timestamp=now, num=3, color=Color.blue), + SampleModel(timestamp=now, num=4, color=Color.white), + ]) + + def test_filter_enum_field(self): + self._insert_sample_model() + qs = SampleModel.objects_in(self.database) + self._test_qs(qs.filter(color=Color.red), 2) + self._test_qs(qs.exclude(color=Color.white), 3) + # Different ways to specify blue + self._test_qs(qs.filter(color__gt=Color.blue), 1) + self._test_qs(qs.filter(color__gt='blue'), 1) + self._test_qs(qs.filter(color__gt=2), 1) + + def test_filter_int_field(self): + self._insert_sample_model() + qs = SampleModel.objects_in(self.database) + self._test_qs(qs.filter(num=1), 1) + self._test_qs(qs.filter(num__gt=1), 3) + self._test_qs(qs.filter(num__gte=1), 4) + self._test_qs(qs.filter(num__in=(1, 2, 3)), 3) + self._test_qs(qs.filter(num__in=xrange(1, 4)), 3) + + +Color = Enum('Color', u'red blue green yellow brown white black') + + +class SampleModel(Model): + + timestamp = DateTimeField() + materialized_date = DateField(materialized='toDate(timestamp)') + num = Int32Field() + color = Enum8Field(Color) + + engine = MergeTree('materialized_date', ('materialized_date',)) \ No newline at end of file From 78bb857c8a0c68c3133d448f03e07362dd8267c7 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 26 Apr 2017 15:47:02 +0300 Subject: [PATCH 06/53] refactor documentation --- .gitignore | 2 + docs/contributing.md | 16 +++ docs/field_types.md | 104 ++++++++++++++++++++ docs/index.md | 11 +++ docs/models_and_databases.md | 172 ++++++++++++++++++++++++++++++++ docs/querysets.md | 96 ++++++++++++++++++ docs/schema_migrations.md | 60 ++++++++++++ docs/system_models.md | 42 ++++++++ docs/table_engines.md | 58 +++++++++++ docs/toc.md | 33 +++++++ scripts/README.md | 22 +++++ scripts/docs2html.sh | 19 ++++ scripts/gh-md-toc | 185 +++++++++++++++++++++++++++++++++++ scripts/test_python3.sh | 10 ++ 14 files changed, 830 insertions(+) create mode 100644 docs/contributing.md create mode 100644 docs/field_types.md create mode 100644 docs/index.md create mode 100644 docs/models_and_databases.md create mode 100644 docs/querysets.md create mode 100644 docs/schema_migrations.md create mode 100644 docs/system_models.md create mode 100644 docs/table_engines.md create mode 100644 docs/toc.md create mode 100644 scripts/README.md create mode 100755 scripts/docs2html.sh create mode 100755 scripts/gh-md-toc create mode 100644 scripts/test_python3.sh diff --git a/.gitignore b/.gitignore index c1eacdb..ae23e11 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,5 @@ buildout.in src/infi/clickhouse_orm/__version__.py bootstrap.py + +htmldocs/ \ No newline at end of file diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..717c71c --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,16 @@ +Contributing +============ + +After cloning the project, run the following commands: + + easy_install -U infi.projector + cd infi.clickhouse_orm + projector devenv build + +To run the tests, ensure that the ClickHouse server is running on (this is the default), and run: + + bin/nosetests + +To see test coverage information run: + + bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm diff --git a/docs/field_types.md b/docs/field_types.md new file mode 100644 index 0000000..e3e4330 --- /dev/null +++ b/docs/field_types.md @@ -0,0 +1,104 @@ +Field Types +=========== + +Currently the following field types are supported: + +| Class | DB Type | Pythonic Type | Comments +| ------------------ | ---------- | ------------------- | ----------------------------------------------------- +| StringField | String | unicode | Encoded as UTF-8 when written to ClickHouse +| FixedStringField | String | unicode | Encoded as UTF-8 when written to ClickHouse +| DateField | Date | datetime.date | Range 1970-01-01 to 2038-01-19 +| DateTimeField | DateTime | datetime.datetime | Minimal value is 1970-01-01 00:00:00; Always in UTC +| Int8Field | Int8 | int | Range -128 to 127 +| Int16Field | Int16 | int | Range -32768 to 32767 +| Int32Field | Int32 | int | Range -2147483648 to 2147483647 +| Int64Field | Int64 | int/long | Range -9223372036854775808 to 9223372036854775807 +| UInt8Field | UInt8 | int | Range 0 to 255 +| UInt16Field | UInt16 | int | Range 0 to 65535 +| UInt32Field | UInt32 | int | Range 0 to 4294967295 +| UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 +| Float32Field | Float32 | float | +| Float64Field | Float64 | float | +| Enum8Field | Enum8 | Enum | See below +| Enum16Field | Enum16 | Enum | See below +| ArrayField | Array | list | See below + +DateTimeField and Time Zones +---------------------------- + +A `DateTimeField` can be assigned values from one of the following types: + +- datetime +- date +- integer - number of seconds since the Unix epoch +- string in `YYYY-MM-DD HH:MM:SS` format + +The assigned value always gets converted to a timezone-aware `datetime` in UTC. If the assigned value is a timezone-aware `datetime` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC. + +DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse +version which is new enough to support the `timezone()` function, otherwise it is assumed to be using UTC. In any case, we recommend settings the server timezone to UTC in order to prevent confusion. + +Working with enum fields +------------------------ + +`Enum8Field` and `Enum16Field` provide support for working with ClickHouse enum columns. They accept strings or integers as values, and convert them to the matching Pythonic Enum member. + +Python 3.4 and higher supports Enums natively. When using previous Python versions you need to install the enum34 library. + +Example of a model with an enum field: + + Gender = Enum('Gender', 'male female unspecified') + + class Person(models.Model): + + first_name = fields.StringField() + last_name = fields.StringField() + birthday = fields.DateField() + gender = fields.Enum32Field(Gender) + + engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + + suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) + +Working with array fields +------------------------- + +You can create array fields containing any data type, for example: + + class SensorData(models.Model): + + date = fields.DateField() + temperatures = fields.ArrayField(fields.Float32Field()) + humidity_levels = fields.ArrayField(fields.UInt8Field()) + + engine = engines.MergeTree('date', ('date',)) + + data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) + +Working with materialized and alias fields +------------------------------------------ + +ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. + +See documentation [here](https://clickhouse.yandex/reference_en.html#Default%20values). + +Both field types can't be inserted into the database directly, so they are ignored when using the `Database.insert()` method. ClickHouse does not return the field values if you use `"SELECT * FROM ..."` - you have to list these field names explicitly in the query. + +Usage: + + class Event(models.Model): + + created = fields.DateTimeField() + created_date = fields.DateTimeField(materialized='toDate(created)') + name = fields.StringField() + username = fields.StringField(alias='name') + + engine = engines.MergeTree('created_date', ('created_date', 'created')) + + obj = Event(created=datetime.now(), name='MyEvent') + db = Database('my_test_db') + db.insert([obj]) + # All values will be retrieved from database + db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) + # created_date and username will contain a default value + db.select('SELECT * FROM $db.event', model_class=Event) diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..ce6ae6b --- /dev/null +++ b/docs/index.md @@ -0,0 +1,11 @@ +Overview +======== + +This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it. + +Installation +------------ + +To install infi.clickhouse_orm: + + pip install infi.clickhouse_orm diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md new file mode 100644 index 0000000..03d5ee3 --- /dev/null +++ b/docs/models_and_databases.md @@ -0,0 +1,172 @@ +Models and Databases +==================== + +Models represent ClickHouse tables, allowing you to work with them using familiar pythonic syntax. + +Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations. + +Defining Models +--------------- + +Models are defined in a way reminiscent of Django's ORM: + + from infi.clickhouse_orm import models, fields, engines + + class Person(models.Model): + + first_name = fields.StringField() + last_name = fields.StringField() + birthday = fields.DateField() + height = fields.Float32Field() + + engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + +It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). Alternatively it is possible to pass alias or materialized parameters (see below for usage examples). Only one of `default`, `alias` and `materialized` parameters can be provided. + +For more details see [Field Types](field_types.md) and [Table Engines](table_engines.md). + +### Table Names + +The table name used for the model is its class name, converted to lowercase. To override the default name, implement the `table_name` method: + + class Person(models.Model): + + ... + + @classmethod + def table_name(cls): + return 'people' + +Using Models +------------ + +Once you have a model, you can create model instances: + + >>> dan = Person(first_name='Dan', last_name='Schwartz') + >>> suzy = Person(first_name='Suzy', last_name='Jones') + >>> dan.first_name + u'Dan' + +When values are assigned to model fields, they are immediately converted to their Pythonic data type. In case the value is invalid, a `ValueError` is raised: + + >>> suzy.birthday = '1980-01-17' + >>> suzy.birthday + datetime.date(1980, 1, 17) + >>> suzy.birthday = 0.5 + ValueError: Invalid value for DateField - 0.5 + >>> suzy.birthday = '1922-05-31' + ValueError: DateField out of range - 1922-05-31 is not between 1970-01-01 and 2038-01-19 + +Inserting to the Database +------------------------- + +To write your instances to ClickHouse, you need a `Database` instance: + + from infi.clickhouse_orm.database import Database + + db = Database('my_test_db') + +This automatically connects to and creates a database called my_test_db, unless it already exists. If necessary, you can specify a different database URL and optional credentials: + + db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') + +Using the `Database` instance you can create a table for your model, and insert instances to it: + + db.create_table(Person) + db.insert([dan, suzy]) + +The `insert` method can take any iterable of model instances, but they all must belong to the same model class. + +Creating a read-only database is also supported. Such a `Database` instance can only read data, and cannot modify data or schemas: + + db = Database('my_test_db', readonly=True) + +Reading from the Database +------------------------- + +Loading model instances from the database is simple: + + for person in db.select("SELECT * FROM my_test_db.person", model_class=Person): + print person.first_name, person.last_name + +Do not include a `FORMAT` clause in the query, since the ORM automatically sets the format to `TabSeparatedWithNamesAndTypes`. + +It is possible to select only a subset of the columns, and the rest will receive their default values: + + for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person): + print person.first_name + +The ORM provides a way to build simple queries without writing SQL by hand. The previous snippet can be written like this: + + for person in Person.objects_in(db).filter(last_name='Smith').only('first_name'): + print person.first_name + +See [Querysets](querysets.md) for more information. + + +Reading without a Model +----------------------- + +When running a query, specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will be defined based on the column names and types returned by the query: + + for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"): + print row.max_height + +This is a very convenient feature that saves you the need to define a model for each query, while still letting you work with Pythonic column values and an elegant syntax. + +SQL Placeholders +---------------- + +There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the database name plus table name (but is available only when the model is specified). + +So instead of this: + + db.select("SELECT * FROM my_test_db.person", model_class=Person) + +you can use: + + db.select("SELECT * FROM $db.person", model_class=Person) + +or even: + + db.select("SELECT * FROM $table", model_class=Person) + +Counting +-------- + +The `Database` class also supports counting records easily: + + >>> db.count(Person) + 117 + >>> db.count(Person, conditions="height > 1.90") + 6 + +Pagination +---------- + +It is possible to paginate through model instances: + + >>> order_by = 'first_name, last_name' + >>> page = db.paginate(Person, order_by, page_num=1, page_size=10) + >>> print page.number_of_objects + 2507 + >>> print page.pages_total + 251 + >>> for person in page.objects: + >>> # do something + +The `paginate` method returns a `namedtuple` containing the following fields: + +- `objects` - the list of objects in this page +- `number_of_objects` - total number of objects in all pages +- `pages_total` - total number of pages +- `number` - the page number, starting from 1; the special value -1 + may be used to retrieve the last page +- `page_size` - the number of objects per page + +You can optionally pass conditions to the query: + + >>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90') + +Note that `order_by` must be chosen so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages). + diff --git a/docs/querysets.md b/docs/querysets.md new file mode 100644 index 0000000..911c5ed --- /dev/null +++ b/docs/querysets.md @@ -0,0 +1,96 @@ +Querysets +========= + +A queryset is an object that represents a database query using a specific Model. It is lazy, meaning that it does not hit the database until you iterate over its matching rows (model instances). To create a base queryset for a model class, use: + + qs = Person.objects_in(database) + +This queryset matches all Person instances in the database. You can get these instances using iteration: + + for person in qs: + print person.first_name, person.last_name + +Filtering +--------- + +The `filter` and `exclude` methods are used for filtering the matching instances. Calling these methods returns a new queryset instance, with the added conditions. For example: + + >>> qs = Person.objects_in(database) + >>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01') + >>> qs.conditions_as_sql() + u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01') " + +It is possible to specify several fields to filter or exclude by: + + >>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75) + >>> qs.conditions_as_sql() + u"last_name = 'Smith' AND height > 1.75" + +There are different operators that can be used, by passing `__=` (two underscores separate the field name from the operator). In case no operator is given, `eq` is used by default. Below are all the supported operators. + +| Operator | Equivalent SQL | Comments | +| -------- | -------------------------------------------- | ---------------------------------- | +| `eq` | `field = value` | | +| `gt` | `field > value` | | +| `gte` | `field >= value` | | +| `lt` | `field < value` | | +| `lte` | `field <= value` | | +| `in` | `field IN (values)` | See below | +| `contains` | `field LIKE '%value%'` | For string fields only | +| `startswith` | `field LIKE 'value%'` | For string fields only | +| `endswith` | `field LIKE '%value'` | For string fields only | +| `icontains` | `lowerUTF8(field) LIKE lowerUTF8('%value%')` | For string fields only | +| `istartswith` | `lowerUTF8(field) LIKE lowerUTF8('value%')` | For string fields only | +| `iendswith` | `lowerUTF8(field) LIKE lowerUTF8('%value')` | For string fields only | +| `iexact` | `lowerUTF8(field) = lowerUTF8(value)` | For string fields only | + +### Using the `in` Operator + +The `in` operator expects one of three types of values: +* A list or tuple of simple values +* A string, which is used verbatim as the contents of the parentheses +* Another queryset (subquery) + +For example if we want to select only people with Irish last names: + + # A list of simple values + qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"]) + + # A string + subquery = "SELECT name from $db.irishlastname" + qs = Person.objects_in(database).filter(last_name__in=subquery) + + # A queryset + subquery = IrishLastName.objects_in(database).only("name") + qs = Person.objects_in(database).filter(last_name__in=subquery) + +Counting and Checking Existence +------------------------------- + +Use the `count` method to get the number of matches: + + Person.objects_in(database).count() + +To check if there are any matches at all, you can use any of the following equivalent options: + + if qs.count(): ... + if bool(qs): ... + if qs: ... + +Ordering +-------- + +To sorting order of the results can be controlled using the `order_by` method: + + qs = Person.objects_in(database).order_by('last_name', 'first_name') + +The default order is ascending. To use descending order, add a minus sign before the field name: + + qs = Person.objects_in(database).order_by('-height') + +Omitting Fields +--------------- + +When not all model fields are needed, it is more efficient to omit them from the query. This is especially true when there are large fields that may slow the query down. Use the `only` method to specify which fields to retrieve: + + qs = Person.objects_in(database).only('first_name', 'birthday') diff --git a/docs/schema_migrations.md b/docs/schema_migrations.md new file mode 100644 index 0000000..75b802e --- /dev/null +++ b/docs/schema_migrations.md @@ -0,0 +1,60 @@ +Schema Migrations +================= + +Over time, the ORM models in your application may change. Migrations provide a way to modify the database tables according to the changes in your models, without writing raw SQL. + +The migrations that were applied to the database are recorded in the `infi_clickhouse_orm_migrations` table, so migrating the database will only apply any missing migrations. + +Writing Migrations +------------------ + +To write migrations, create a Python package. Then create a python file for the initial migration. The migration files must begin with a four-digit number, and will be applied in sequence. For example:: + + analytics + | + +-- analytics_migrations + | + +-- __init__.py + | + +-- 0001_initial.py + | + +-- 0002_add_user_agents_table.py + +Each migration file is expected to contain a list of `operations`, for example:: + + from infi.clickhouse_orm import migrations + from analytics import models + + operations = [ + migrations.CreateTable(models.Visits), + migrations.CreateTable(models.Visitors) + ] + +The following operations are supported: + +**CreateTable** + +A migration operation that creates a table for a given model class. + +**DropTable** + +A migration operation that drops the table of a given model class. + +**AlterTable** + +A migration operation that compares the table of a given model class to the model’s fields, and alters the table to match the model. The operation can: + +- add new columns +- drop obsolete columns +- modify column types + +Default values are not altered by this operation. + +Running Migrations +------------------ + +To migrate a database, create a `Database` instance and call its `migrate` method with the package name containing your migrations:: + + Database('analytics_db').migrate('analytics.analytics_migrations') + +Note that you may have more than one migrations package. \ No newline at end of file diff --git a/docs/system_models.md b/docs/system_models.md new file mode 100644 index 0000000..c3a3331 --- /dev/null +++ b/docs/system_models.md @@ -0,0 +1,42 @@ +System models +============= + +[Clickhouse docs](https://clickhouse.yandex/reference_en.html#System%20tables). + +System models are read only models for implementing part of the system's functionality, and for providing access to information about how the system is working. + +Currently the following system models are supported: + +| Class | DB Table | Comments +| ------------ | -------------- | --------------------------------------------------- +| SystemPart | system.parts | Gives methods to work with partitions. See below. + +Partitions and parts +-------------------- + +[ClickHouse docs](https://clickhouse.yandex/reference_en.html#Manipulations%20with%20partitions%20and%20parts). + +A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. + +| Method | Parameters | Comments +| --------------------- | ------------------------- | ----------------------------------------------------------------------------------------------- +| get(static) | database, conditions="" | Gets database partitions, filtered by conditions +| get_active(static) | database, conditions="" | Gets only active (not detached or dropped) partitions, filtered by conditions +| detach | settings=None | Detaches the partition. Settings is a dict of params to pass to http request +| drop | settings=None | Drops the partition. Settings is a dict of params to pass to http request +| attach | settings=None | Attaches already detached partition. Settings is a dict of params to pass to http request +| freeze | settings=None | Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request +| fetch | settings=None | Fetches partition. Settings is a dict of params to pass to http request + +Usage example: + + from infi.clickhouse_orm.database import Database + from infi.clickhouse_orm.system_models import SystemPart + db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') + partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database + if len(partitions) > 0: + partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so + partitions[0].freeze() # Make a backup in /opt/clickhouse/shadow directory + partitions[0].drop() # Dropped partition + +`Note`: system.parts stores information for all databases. To be correct, SystemPart model was designed to receive only parts belonging to the given database instance. diff --git a/docs/table_engines.md b/docs/table_engines.md new file mode 100644 index 0000000..79f40df --- /dev/null +++ b/docs/table_engines.md @@ -0,0 +1,58 @@ +Table Engines +============= + +Each model must have an engine instance, used when creating the table in ClickHouse. + +To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns: + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) + +You may also provide a sampling expression: + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)') + +A `CollapsingMergeTree` engine is defined in a similar manner, but requires also a sign column: + + engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') + +For a `SummingMergeTree` you can optionally specify the summing columns: + + engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), + summing_cols=('Shows', 'Clicks', 'Cost')) + +For a `ReplacingMergeTree` you can optionally specify the version column: + + engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') + +A `Buffer` engine is available for BufferModels. (See below how to use BufferModel). You can specify following parameters: + + engine = engines.Buffer(Person) # you need to initialize engine with main Model. Other default parameters will be used + # or: + engine = engines.Buffer(Person, num_layers=16, min_time=10, + max_time=100, min_rows=10000, max_rows=1000000, + min_bytes=10000000, max_bytes=100000000) + +Buffer Models +------------- + +Here's how you can define Model for Buffer Engine. The Buffer Model should be inherited from models.BufferModel and main Model: + + class PersonBuffer(models.BufferModel, Person): + + engine = engines.Buffer(Person) + +Then you can insert objects into Buffer model and they will be handled by ClickHouse properly: + + db.create_table(PersonBuffer) + suzy = PersonBuffer(first_name='Suzy', last_name='Jones') + dan = PersonBuffer(first_name='Dan', last_name='Schwartz') + db.insert([dan, suzy]) + +Data Replication +---------------- + +Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`: + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), + replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', + replica_name='{replica}') diff --git a/docs/toc.md b/docs/toc.md new file mode 100644 index 0000000..9e014fb --- /dev/null +++ b/docs/toc.md @@ -0,0 +1,33 @@ + + * [Overview](index.md#overview) + * [Installation](index.md#installation) + + * [Models and Databases](models_and_databases.md#models-and-databases) + * [Defining Models](models_and_databases.md#defining-models) + * [Table Names](models_and_databases.md#table-names) + * [Using Models](models_and_databases.md#using-models) + * [Inserting to the Database](models_and_databases.md#inserting-to-the-database) + * [Reading from the Database](models_and_databases.md#reading-from-the-database) + * [Reading without a Model](models_and_databases.md#reading-without-a-model) + * [SQL Placeholders](models_and_databases.md#sql-placeholders) + * [Counting](models_and_databases.md#counting) + * [Pagination](models_and_databases.md#pagination) + + * [Field Types](field_types.md#field-types) + * [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones) + * [Working with enum fields](field_types.md#working-with-enum-fields) + * [Working with array fields](field_types.md#working-with-array-fields) + * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) + + * [Table Engines](table_engines.md#table-engines) + * [Buffer Models](table_engines.md#buffer-models) + * [Data Replication](table_engines.md#data-replication) + + * [Schema Migrations](schema_migrations.md#schema-migrations) + * [Writing Migrations](schema_migrations.md#writing-migrations) + * [Running Migrations](schema_migrations.md#running-migrations) + + * [System models](system_models.md#system-models) + * [Partitions and parts](system_models.md#partitions-and-parts) + + * [Contributing](contributing.md#contributing) diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..6f4e306 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,22 @@ +This directory contains various scripts for use while developing. + +docs2html +--------- +Converts markdown docs to html for preview. Requires Pandoc. +Usage: + + cd docs + ../scripts/docs2html.sh + + +gh-md-toc +--------- +Used by docs2html to generate the table of contents. + + +test_python3 +------------ +Creates a Python 3 virtualenv, clones the project into it, and runs the tests. +Usage: + + ./test_python3.sh diff --git a/scripts/docs2html.sh b/scripts/docs2html.sh new file mode 100755 index 0000000..6ed4f2d --- /dev/null +++ b/scripts/docs2html.sh @@ -0,0 +1,19 @@ + +mkdir -p ../htmldocs + +echo "Generating table of contents" +../scripts/gh-md-toc \ + index.md \ + models_and_databases.md \ + querysets.md \ + field_types.md \ + table_engines.md \ + schema_migrations.md \ + system_models.md \ + contributing.md \ + > toc.md + +find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}" -s -o "../htmldocs/${0%.md}.html"' {} \; + +echo "Fixing links" +sed -i 's/\.md/\.html/g' ../htmldocs/*.html diff --git a/scripts/gh-md-toc b/scripts/gh-md-toc new file mode 100755 index 0000000..158bc5f --- /dev/null +++ b/scripts/gh-md-toc @@ -0,0 +1,185 @@ +#!/usr/bin/env bash + +# +# Source: https://github.com/ekalinin/github-markdown-toc +# +# Steps: +# +# 1. Download corresponding html file for some README.md: +# curl -s $1 +# +# 2. Discard rows where no substring 'user-content-' (github's markup): +# awk '/user-content-/ { ... +# +# 3.1 Get last number in each row like ' ... sitemap.js.*<\/h/)+2, RLENGTH-5) +# +# 5. Find anchor and insert it inside "(...)": +# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) +# + +gh_toc_version="0.4.8" + +gh_user_agent="gh-md-toc v$gh_toc_version" + +# +# Download rendered into html README.md by its url. +# +# +gh_toc_load() { + local gh_url=$1 + + if type curl &>/dev/null; then + curl --user-agent "$gh_user_agent" -s "$gh_url" + elif type wget &>/dev/null; then + wget --user-agent="$gh_user_agent" -qO- "$gh_url" + else + echo "Please, install 'curl' or 'wget' and try again." + exit 1 + fi +} + +# +# Converts local md file into html by GitHub +# +# ➥ curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown +#

Hello world github/linguist#1 cool, and #1!

'" +gh_toc_md2html() { + local gh_file_md=$1 + curl -s --user-agent "$gh_user_agent" \ + --data-binary @"$gh_file_md" -H "Content-Type:text/plain" \ + https://api.github.com/markdown/raw +} + +# +# Is passed string url +# +gh_is_url() { + if [[ $1 == https* || $1 == http* ]]; then + echo "yes" + else + echo "no" + fi +} + +# +# TOC generator +# +gh_toc(){ + local gh_src=$1 + local gh_src_copy=$1 + local gh_ttl_docs=$2 + + if [ "$gh_src" = "" ]; then + echo "Please, enter URL or local path for a README.md" + exit 1 + fi + + + # Show "TOC" string only if working with one document + if [ "$gh_ttl_docs" = "1" ]; then + + echo "Table of Contents" + echo "=================" + echo "" + gh_src_copy="" + + fi + + if [ "$(gh_is_url "$gh_src")" == "yes" ]; then + gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" + else + gh_toc_md2html "$gh_src" | gh_toc_grab "$gh_src_copy" + fi +} + +# +# Grabber of the TOC from rendered html +# +# $1 — a source url of document. +# It's need if TOC is generated for multiple documents. +# +gh_toc_grab() { + # if closed is on the new line, then move it on the prev line + # for example: + # was: The command foo1 + # + # became: The command foo1 + sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' | + # find strings that corresponds to template + grep -E -o '//' | sed 's/<\/code>//' | + # now all rows are like: + # ... .*<\/h/)+2, RLENGTH-5)"](" gh_url substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) ")"}' | sed 'y/+/ /; s/%/\\x/g')" +} + +# +# Returns filename only from full path or url +# +gh_toc_get_filename() { + echo "${1##*/}" +} + +# +# Options hendlers +# +gh_toc_app() { + local app_name="gh-md-toc" + + if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then + echo "GitHub TOC generator ($app_name): $gh_toc_version" + echo "" + echo "Usage:" + echo " $app_name src [src] Create TOC for a README file (url or local path)" + echo " $app_name - Create TOC for markdown from STDIN" + echo " $app_name --help Show help" + echo " $app_name --version Show version" + return + fi + + if [ "$1" = '--version' ]; then + echo "$gh_toc_version" + return + fi + + if [ "$1" = "-" ]; then + if [ -z "$TMPDIR" ]; then + TMPDIR="/tmp" + elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then + mkdir -p "$TMPDIR" + fi + local gh_tmp_md + gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX) + while read input; do + echo "$input" >> "$gh_tmp_md" + done + gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" + return + fi + + for md in "$@" + do + echo "" + gh_toc "$md" "$#" + done + + #echo "" + #echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)" +} + +# +# Entry point +# +gh_toc_app "$@" \ No newline at end of file diff --git a/scripts/test_python3.sh b/scripts/test_python3.sh new file mode 100644 index 0000000..0fe01e3 --- /dev/null +++ b/scripts/test_python3.sh @@ -0,0 +1,10 @@ +cd /tmp +rm -rf /tmp/orm_env* +virtualenv -p python3 /tmp/orm_env +cd /tmp/orm_env +source bin/activate +pip install infi.projector +git clone https://github.com/Infinidat/infi.clickhouse_orm.git +cd infi.clickhouse_orm +projector devenv build +bin/nosetests From 59b29db74620409d19c89e48ea687d56aca303d2 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 13:44:45 +0300 Subject: [PATCH 07/53] refactor documentation --- docs/contributing.md | 5 +++++ docs/field_types.md | 5 +++++ docs/index.md | 4 ++++ docs/models_and_databases.md | 4 ++++ docs/querysets.md | 11 ++++++++--- docs/schema_migrations.md | 7 ++++++- docs/system_models.md | 9 +++++++-- docs/table_engines.md | 5 +++++ docs/toc.md | 13 +++++++++++-- scripts/README.md | 18 +++++++++++++----- scripts/docs2html.sh | 12 ------------ scripts/generate_toc.sh | 13 +++++++++++++ 12 files changed, 81 insertions(+), 25 deletions(-) create mode 100755 scripts/generate_toc.sh diff --git a/docs/contributing.md b/docs/contributing.md index 717c71c..6268733 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -14,3 +14,8 @@ To run the tests, ensure that the ClickHouse server is running on >](table_engines.md) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index ce6ae6b..9b5199e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,3 +9,7 @@ Installation To install infi.clickhouse_orm: pip install infi.clickhouse_orm + +--- + +[Table of Contents](toc.md) | [Models and Databases >>](models_and_databases.md) \ No newline at end of file diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index 03d5ee3..2879c32 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -170,3 +170,7 @@ You can optionally pass conditions to the query: Note that `order_by` must be chosen so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages). + +--- + +[<< Overview](index.md) | [Table of Contents](toc.md) | [Querysets >>](querysets.md) \ No newline at end of file diff --git a/docs/querysets.md b/docs/querysets.md index 911c5ed..fd7e253 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -18,7 +18,7 @@ The `filter` and `exclude` methods are used for filtering the matching instances >>> qs = Person.objects_in(database) >>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01') >>> qs.conditions_as_sql() - u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01') " + u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')" It is possible to specify several fields to filter or exclude by: @@ -80,7 +80,7 @@ To check if there are any matches at all, you can use any of the following equiv Ordering -------- -To sorting order of the results can be controlled using the `order_by` method: +The sorting order of the results can be controlled using the `order_by` method: qs = Person.objects_in(database).order_by('last_name', 'first_name') @@ -91,6 +91,11 @@ The default order is ascending. To use descending order, add a minus sign before Omitting Fields --------------- -When not all model fields are needed, it is more efficient to omit them from the query. This is especially true when there are large fields that may slow the query down. Use the `only` method to specify which fields to retrieve: +When some of the model fields aren't needed, it is more efficient to omit them from the query. This is especially true when there are large fields that may slow the query down. Use the `only` method to specify which fields to retrieve: qs = Person.objects_in(database).only('first_name', 'birthday') + + +--- + +[<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/docs/schema_migrations.md b/docs/schema_migrations.md index 75b802e..2d67f8d 100644 --- a/docs/schema_migrations.md +++ b/docs/schema_migrations.md @@ -57,4 +57,9 @@ To migrate a database, create a `Database` instance and call its `migrate` metho Database('analytics_db').migrate('analytics.analytics_migrations') -Note that you may have more than one migrations package. \ No newline at end of file +Note that you may have more than one migrations package. + + +--- + +[<< Table Engines](table_engines.md) | [Table of Contents](toc.md) | [System Models >>](system_models.md) \ No newline at end of file diff --git a/docs/system_models.md b/docs/system_models.md index c3a3331..beed825 100644 --- a/docs/system_models.md +++ b/docs/system_models.md @@ -1,4 +1,4 @@ -System models +System Models ============= [Clickhouse docs](https://clickhouse.yandex/reference_en.html#System%20tables). @@ -11,7 +11,7 @@ Currently the following system models are supported: | ------------ | -------------- | --------------------------------------------------- | SystemPart | system.parts | Gives methods to work with partitions. See below. -Partitions and parts +Partitions and Parts -------------------- [ClickHouse docs](https://clickhouse.yandex/reference_en.html#Manipulations%20with%20partitions%20and%20parts). @@ -40,3 +40,8 @@ Usage example: partitions[0].drop() # Dropped partition `Note`: system.parts stores information for all databases. To be correct, SystemPart model was designed to receive only parts belonging to the given database instance. + + +--- + +[<< Schema Migrations](schema_migrations.md) | [Table of Contents](toc.md) | [Contributing >>](contributing.md) \ No newline at end of file diff --git a/docs/table_engines.md b/docs/table_engines.md index 79f40df..2450fec 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -56,3 +56,8 @@ Any of the above engines can be converted to a replicated engine (e.g. `Replicat engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', replica_name='{replica}') + + +--- + +[<< Field Types](field_types.md) | [Table of Contents](toc.md) | [Schema Migrations >>](schema_migrations.md) \ No newline at end of file diff --git a/docs/toc.md b/docs/toc.md index 9e014fb..1848631 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -1,3 +1,5 @@ +Table of Contents +================= * [Overview](index.md#overview) * [Installation](index.md#installation) @@ -13,6 +15,13 @@ * [Counting](models_and_databases.md#counting) * [Pagination](models_and_databases.md#pagination) + * [Querysets](querysets.md#querysets) + * [Filtering](querysets.md#filtering) + * [Using the in Operator](querysets.md#using-the-in-operator) + * [Counting and Checking Existence](querysets.md#counting-and-checking-existence) + * [Ordering](querysets.md#ordering) + * [Omitting Fields](querysets.md#omitting-fields) + * [Field Types](field_types.md#field-types) * [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones) * [Working with enum fields](field_types.md#working-with-enum-fields) @@ -27,7 +36,7 @@ * [Writing Migrations](schema_migrations.md#writing-migrations) * [Running Migrations](schema_migrations.md#running-migrations) - * [System models](system_models.md#system-models) - * [Partitions and parts](system_models.md#partitions-and-parts) + * [System Models](system_models.md#system-models) + * [Partitions and Parts](system_models.md#partitions-and-parts) * [Contributing](contributing.md#contributing) diff --git a/scripts/README.md b/scripts/README.md index 6f4e306..f3e4096 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,5 +1,18 @@ This directory contains various scripts for use while developing. +generate_toc +------------ +Generates the table of contents (toc.md) +Usage: + cd docs + ../scripts/generate_toc.sh + + +gh-md-toc +--------- +Used by generate_toc. + + docs2html --------- Converts markdown docs to html for preview. Requires Pandoc. @@ -9,11 +22,6 @@ Usage: ../scripts/docs2html.sh -gh-md-toc ---------- -Used by docs2html to generate the table of contents. - - test_python3 ------------ Creates a Python 3 virtualenv, clones the project into it, and runs the tests. diff --git a/scripts/docs2html.sh b/scripts/docs2html.sh index 6ed4f2d..4916530 100755 --- a/scripts/docs2html.sh +++ b/scripts/docs2html.sh @@ -1,18 +1,6 @@ mkdir -p ../htmldocs -echo "Generating table of contents" -../scripts/gh-md-toc \ - index.md \ - models_and_databases.md \ - querysets.md \ - field_types.md \ - table_engines.md \ - schema_migrations.md \ - system_models.md \ - contributing.md \ - > toc.md - find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}" -s -o "../htmldocs/${0%.md}.html"' {} \; echo "Fixing links" diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh new file mode 100755 index 0000000..7b57403 --- /dev/null +++ b/scripts/generate_toc.sh @@ -0,0 +1,13 @@ +echo "Table of Contents" > toc.md +echo "=================" >> toc.md + +../scripts/gh-md-toc \ + index.md \ + models_and_databases.md \ + querysets.md \ + field_types.md \ + table_engines.md \ + schema_migrations.md \ + system_models.md \ + contributing.md \ + >> toc.md From 3d02d89e2ac5e4b4aeb8153aa2546890bb37fc58 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 18:17:42 +0300 Subject: [PATCH 08/53] update isolated python version --- buildout.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildout.cfg b/buildout.cfg index e3de92a..0078704 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -28,7 +28,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm [isolated-python] recipe = infi.recipe.python -version = v2.7.8.4 +version = v2.7.9.4 [setup.py] recipe = infi.recipe.template.version From 10cc021acfe04158b5ebcccc752d38e54e4d78d2 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 18:18:04 +0300 Subject: [PATCH 09/53] need to run script under bash, otherwise virtualenv doesn't get activated --- scripts/test_python3.sh | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 scripts/test_python3.sh diff --git a/scripts/test_python3.sh b/scripts/test_python3.sh old mode 100644 new mode 100755 index 0fe01e3..86016cd --- a/scripts/test_python3.sh +++ b/scripts/test_python3.sh @@ -1,3 +1,4 @@ +#!/bin/bash cd /tmp rm -rf /tmp/orm_env* virtualenv -p python3 /tmp/orm_env From 64bf3b423ef034768b3df7fac2a7b4a671731130 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 18:18:23 +0300 Subject: [PATCH 10/53] python 3 compatibility --- tests/test_querysets.py | 2 +- tests/test_readonly.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 35e2e6c..817f7d4 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -121,7 +121,7 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs.filter(num__gt=1), 3) self._test_qs(qs.filter(num__gte=1), 4) self._test_qs(qs.filter(num__in=(1, 2, 3)), 3) - self._test_qs(qs.filter(num__in=xrange(1, 4)), 3) + self._test_qs(qs.filter(num__in=range(1, 4)), 3) Color = Enum('Color', u'red blue green yellow brown white black') diff --git a/tests/test_readonly.py b/tests/test_readonly.py index f67c093..371fdcb 100644 --- a/tests/test_readonly.py +++ b/tests/test_readonly.py @@ -24,7 +24,7 @@ class ReadonlyTestCase(TestCaseWithData): self.database.drop_table(Person) with self.assertRaises(DatabaseException): self.database.drop_database() - except DatabaseException, e: + except DatabaseException as e: if 'Unknown user' in unicode(e): raise unittest.SkipTest('Database user "%s" is not defined' % username) else: From be474b3aede053d492a269c3aa4719d129b604c7 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 18:36:40 +0300 Subject: [PATCH 11/53] Add simple engines: TinyLog, Log, Memory --- docs/table_engines.md | 81 ++++++++++++++++++++---------- src/infi/clickhouse_orm/engines.py | 18 +++++++ tests/test_engines.py | 15 ++++++ 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/docs/table_engines.md b/docs/table_engines.md index 2450fec..2f92183 100644 --- a/docs/table_engines.md +++ b/docs/table_engines.md @@ -1,8 +1,37 @@ Table Engines ============= +See: [ClickHouse Documentation](https://clickhouse.yandex/reference_en.html#Table+engines) + Each model must have an engine instance, used when creating the table in ClickHouse. +The following engines are supported by the ORM: + +- TinyLog +- Log +- Memory +- MergeTree / ReplicatedMergeTree +- CollapsingMergeTree / ReplicatedCollapsingMergeTree +- SummingMergeTree / ReplicatedSummingMergeTree +- ReplacingMergeTree / ReplicatedReplacingMergeTree +- Buffer + + +Simple Engines +-------------- + +`TinyLog`, `Log` and `Memory` engines do not require any parameters: + + engine = engines.TinyLog() + + engine = engines.Log() + + engine = engines.Memory() + + +Engines in the MergeTree Family +------------------------------- + To define a `MergeTree` engine, supply the date column name and the names (or expressions) for the key columns: engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) @@ -24,32 +53,7 @@ For a `ReplacingMergeTree` you can optionally specify the version column: engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') -A `Buffer` engine is available for BufferModels. (See below how to use BufferModel). You can specify following parameters: - - engine = engines.Buffer(Person) # you need to initialize engine with main Model. Other default parameters will be used - # or: - engine = engines.Buffer(Person, num_layers=16, min_time=10, - max_time=100, min_rows=10000, max_rows=1000000, - min_bytes=10000000, max_bytes=100000000) - -Buffer Models -------------- - -Here's how you can define Model for Buffer Engine. The Buffer Model should be inherited from models.BufferModel and main Model: - - class PersonBuffer(models.BufferModel, Person): - - engine = engines.Buffer(Person) - -Then you can insert objects into Buffer model and they will be handled by ClickHouse properly: - - db.create_table(PersonBuffer) - suzy = PersonBuffer(first_name='Suzy', last_name='Jones') - dan = PersonBuffer(first_name='Dan', last_name='Schwartz') - db.insert([dan, suzy]) - -Data Replication ----------------- +### Data Replication Any of the above engines can be converted to a replicated engine (e.g. `ReplicatedMergeTree`) by adding two parameters, `replica_table_path` and `replica_name`: @@ -58,6 +62,31 @@ Any of the above engines can be converted to a replicated engine (e.g. `Replicat replica_name='{replica}') +Buffer Engine +------------- + +A `Buffer` engine is only used in conjunction with a `BufferModel`. +The model should be a subclass of both `models.BufferModel` and the main model. +The main model is also passed to the engine: + + class PersonBuffer(models.BufferModel, Person): + + engine = engines.Buffer(Person) + +Additional buffer parameters can optionally be specified: + + engine = engines.Buffer(Person, num_layers=16, min_time=10, + max_time=100, min_rows=10000, max_rows=1000000, + min_bytes=10000000, max_bytes=100000000) + +Then you can insert objects into Buffer model and they will be handled by ClickHouse properly: + + db.create_table(PersonBuffer) + suzy = PersonBuffer(first_name='Suzy', last_name='Jones') + dan = PersonBuffer(first_name='Dan', last_name='Schwartz') + db.insert([dan, suzy]) + + --- [<< Field Types](field_types.md) | [Table of Contents](toc.md) | [Schema Migrations >>](schema_migrations.md) \ No newline at end of file diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index c26b451..7a011af 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -5,6 +5,24 @@ class Engine(object): raise NotImplementedError() +class TinyLog(Engine): + + def create_table_sql(self): + return 'TinyLog' + + +class Log(Engine): + + def create_table_sql(self): + return 'Log' + + +class Memory(Engine): + + def create_table_sql(self): + return 'Memory' + + class MergeTree(Engine): def __init__(self, date_col, key_cols, sampling_expr=None, diff --git a/tests/test_engines.py b/tests/test_engines.py index d3d8865..4aea6d1 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -53,6 +53,21 @@ class EnginesTestCase(unittest.TestCase): engine = ReplacingMergeTree('date', ('date', 'event_id', 'event_group'), 'event_uversion') self._create_and_insert(TestModel) + def test_tiny_log(self): + class TestModel(SampleModel): + engine = TinyLog() + self._create_and_insert(TestModel) + + def test_log(self): + class TestModel(SampleModel): + engine = Log() + self._create_and_insert(TestModel) + + def test_memory(self): + class TestModel(SampleModel): + engine = Memory() + self._create_and_insert(TestModel) + class SampleModel(Model): From 2ababe8b5e8c00b475b8232a9aadb411be58b7ae Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 19:03:34 +0300 Subject: [PATCH 12/53] refactor documentation --- README | 58 +++++- README.rst | 436 ------------------------------------------- scripts/docs2html.sh | 3 + 3 files changed, 60 insertions(+), 437 deletions(-) delete mode 100644 README.rst diff --git a/README b/README index a1320b1..636c9cb 100644 --- a/README +++ b/README @@ -1 +1,57 @@ -README.rst +Introduction +============ + +This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). +It allows you to define model classes whose instances can be written to the database and read from it. + +Let's jump right in with a simple example of monitoring CPU usage. First we need to define the model class, +connect to the database and create a table for the model: + +```python +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import Memory + +class CPUStats(Model): + + timestamp = DateTimeField() + cpu_id = UInt16Field() + cpu_percent = Float32Field() + + engine = Memory() + +db = Database('demo') +db.create_table(CPUStats) +``` + +Now we can collect usage statistics per CPU, and write them to the database: + +```python +import psutil, time, datetime + +psutil.cpu_percent(percpu=True) # first sample should be discarded +while True: + time.sleep(1) + stats = psutil.cpu_percent(percpu=True) + timestamp = datetime.datetime.now() + db.insert([ + CPUStats(timestamp=timestamp, cpu_id=cpu_id, cpu_percent=cpu_percent) + for cpu_id, cpu_percent in enumerate(stats) + ]) +``` + +Querying the table is easy, using either the query builder or raw SQL: + +```python +# Calculate what percentage of the time CPU 1 was over 95% busy +total = CPUStats.objects_in(db).filter(cpu_id=1).count() +busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count() +print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total) + +# Calculate the average usage per CPU +for row in db.select('SELECT cpu_id, avg(cpu_percent) AS average FROM demo.cpustats GROUP BY cpu_id'): + print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row) +``` + +To learn more please visit the [documentation](docs/index.md). \ No newline at end of file diff --git a/README.rst b/README.rst deleted file mode 100644 index 0951776..0000000 --- a/README.rst +++ /dev/null @@ -1,436 +0,0 @@ -Overview -======== - -This project is simple ORM for working with the `ClickHouse database `_. -It allows you to define model classes whose instances can be written to the database and read from it. - -Installation -============ - -To install infi.clickhouse_orm:: - - pip install infi.clickhouse_orm - -Usage -===== - -Defining Models ---------------- - -Models are defined in a way reminiscent of Django's ORM:: - - from infi.clickhouse_orm import models, fields, engines - - class Person(models.Model): - - first_name = fields.StringField() - last_name = fields.StringField() - birthday = fields.DateField() - height = fields.Float32Field() - - engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) - -It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). -Alternatively it is possible to pass alias or materialized parameters (see below for usage examples). -Only one of ``default``, ``alias`` and ``materialized`` parameters can be provided. - -See below for the supported field types and table engines. - -Table Names -*********** - -The table name used for the model is its class name, converted to lowercase. To override the default name, -implement the ``table_name`` method:: - - class Person(models.Model): - - ... - - @classmethod - def table_name(cls): - return 'people' - -Using Models ------------- - -Once you have a model, you can create model instances:: - - >>> dan = Person(first_name='Dan', last_name='Schwartz') - >>> suzy = Person(first_name='Suzy', last_name='Jones') - >>> dan.first_name - u'Dan' - -When values are assigned to model fields, they are immediately converted to their Pythonic data type. -In case the value is invalid, a ``ValueError`` is raised:: - - >>> suzy.birthday = '1980-01-17' - >>> suzy.birthday - datetime.date(1980, 1, 17) - >>> suzy.birthday = 0.5 - ValueError: Invalid value for DateField - 0.5 - >>> suzy.birthday = '1922-05-31' - ValueError: DateField out of range - 1922-05-31 is not between 1970-01-01 and 2038-01-19 - -Inserting to the Database -------------------------- - -To write your instances to ClickHouse, you need a ``Database`` instance:: - - from infi.clickhouse_orm.database import Database - - db = Database('my_test_db') - -This automatically connects to http://localhost:8123 and creates a database called my_test_db, unless it already exists. -If necessary, you can specify a different database URL and optional credentials:: - - db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') - -Using the ``Database`` instance you can create a table for your model, and insert instances to it:: - - db.create_table(Person) - db.insert([dan, suzy]) - -The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class. - -Creating a read-only database is also supported. Such a ``Database`` instance can only read data, and cannot -modify data or schemas:: - - db = Database('my_test_db', readonly=True) - -Reading from the Database -------------------------- - -Loading model instances from the database is simple:: - - for person in db.select("SELECT * FROM my_test_db.person", model_class=Person): - print person.first_name, person.last_name - -Do not include a ``FORMAT`` clause in the query, since the ORM automatically sets the format to ``TabSeparatedWithNamesAndTypes``. - -It is possible to select only a subset of the columns, and the rest will receive their default values:: - - for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person): - print person.first_name - -SQL Placeholders -**************** - -There are a couple of special placeholders that you can use inside the SQL to make it easier to write: -``$db`` and ``$table``. The first one is replaced by the database name, and the second is replaced by -the database name plus table name (but is available only when the model is specified). - -So instead of this:: - - db.select("SELECT * FROM my_test_db.person", model_class=Person) - -you can use:: - - db.select("SELECT * FROM $db.person", model_class=Person) - -or even:: - - db.select("SELECT * FROM $table", model_class=Person) - -Ad-Hoc Models -************* - -Specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will -be defined based on the column names and types returned by the query:: - - for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"): - print row.max_height - -This is a very convenient feature that saves you the need to define a model for each query, while still letting -you work with Pythonic column values and an elegant syntax. - -Counting --------- - -The ``Database`` class also supports counting records easily:: - - >>> db.count(Person) - 117 - >>> db.count(Person, conditions="height > 1.90") - 6 - -Pagination ----------- - -It is possible to paginate through model instances:: - - >>> order_by = 'first_name, last_name' - >>> page = db.paginate(Person, order_by, page_num=1, page_size=10) - >>> print page.number_of_objects - 2507 - >>> print page.pages_total - 251 - >>> for person in page.objects: - >>> # do something - -The ``paginate`` method returns a ``namedtuple`` containing the following fields: - -- ``objects`` - the list of objects in this page -- ``number_of_objects`` - total number of objects in all pages -- ``pages_total`` - total number of pages -- ``number`` - the page number, starting from 1; the special value -1 may be used to retrieve the last page -- ``page_size`` - the number of objects per page - -You can optionally pass conditions to the query:: - - >>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90') - -Note that ``order_by`` must be chosen so that the ordering is unique, otherwise there might be -inconsistencies in the pagination (such as an instance that appears on two different pages). - - -System models -------------- - -`Clickhouse docs `_. - -System models are read only models for implementing part of the system's functionality, -and for providing access to information about how the system is working. - -Currently the following system models are supported: - -=================== ============ =================================================== -Class DB Table Comments -=================== ============ =================================================== -SystemPart system.parts Gives methods to work with partitions. See below. -=================== ============ =================================================== - - -Partitions and parts --------------------- - -`ClickHouse docs `_. - -A partition in a table is data for a single calendar month. Table "system.parts" contains information about each part. - -=================== ======================= ============================================================================================= -Method Parameters Comments -=================== ======================= ============================================================================================= -get(static) database, conditions="" Gets database partitions, filtered by conditions -get_active(static) database, conditions="" Gets only active (not detached or dropped) partitions, filtered by conditions -detach settings=None Detaches the partition. Settings is a dict of params to pass to http request -drop settings=None Drops the partition. Settings is a dict of params to pass to http request -attach settings=None Attaches already detached partition. Settings is a dict of params to pass to http request -freeze settings=None Freezes (makes backup) of the partition. Settings is a dict of params to pass to http request -fetch settings=None Fetches partition. Settings is a dict of params to pass to http request -=================== ======================= ============================================================================================= - -Usage example:: - - from infi.clickhouse_orm.database import Database - from infi.clickhouse_orm.system_models import SystemPart - db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') - partitions = SystemPart.get_active(db, conditions='') # Getting all active partitions of the database - if len(partitions) > 0: - partitions = sorted(partitions, key=lambda obj: obj.name) # Partition name is YYYYMM, so we can sort so - partitions[0].freeze() # Make a backup in /opt/clickhouse/shadow directory - partitions[0].drop() # Dropped partition - -``Note``: system.parts stores information for all databases. To be correct, -SystemPart model was designed to receive only given database parts. - - -Schema Migrations ------------------ - -Over time, your models may change and the database will have to be modified accordingly. -Migrations allow you to describe these changes succinctly using Python, and to apply them -to the database. A migrations table automatically keeps track of which migrations were already applied. - -For details please refer to the MIGRATIONS.rst document. - -Field Types ------------ - -Currently the following field types are supported: - -=================== ======== ================= =================================================== -Class DB Type Pythonic Type Comments -=================== ======== ================= =================================================== -StringField String unicode Encoded as UTF-8 when written to ClickHouse -FixedStringField String unicode Encoded as UTF-8 when written to ClickHouse -DateField Date datetime.date Range 1970-01-01 to 2038-01-19 -DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC -Int8Field Int8 int Range -128 to 127 -Int16Field Int16 int Range -32768 to 32767 -Int32Field Int32 int Range -2147483648 to 2147483647 -Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 -UInt8Field UInt8 int Range 0 to 255 -UInt16Field UInt16 int Range 0 to 65535 -UInt32Field UInt32 int Range 0 to 4294967295 -UInt64Field UInt64 int/long Range 0 to 18446744073709551615 -Float32Field Float32 float -Float64Field Float64 float -Enum8Field Enum8 Enum See below -Enum16Field Enum16 Enum See below -ArrayField Array list See below -=================== ======== ================= =================================================== - -DateTimeField and Time Zones -**************************** - -A ``DateTimeField`` can be assigned values from one of the following types: - -- datetime -- date -- integer - number of seconds since the Unix epoch -- string in ``YYYY-MM-DD HH:MM:SS`` format - -The assigned value always gets converted to a timezone-aware ``datetime`` in UTC. If the assigned -value is a timezone-aware ``datetime`` in another timezone, it will be converted to UTC. Otherwise, the assigned value is assumed to already be in UTC. - -DateTime values that are read from the database are also converted to UTC. ClickHouse formats them according to the -timezone of the server, and the ORM makes the necessary conversions. This requires a ClickHouse version which is new -enough to support the ``timezone()`` function, otherwise it is assumed to be using UTC. In any case, we recommend -settings the server timezone to UTC in order to prevent confusion. - -Working with enum fields -************************ - -``Enum8Field`` and ``Enum16Field`` provide support for working with ClickHouse enum columns. They accept -strings or integers as values, and convert them to the matching Pythonic Enum member. - -Python 3.4 and higher supports Enums natively. When using previous Python versions you -need to install the `enum34` library. - -Example of a model with an enum field:: - - Gender = Enum('Gender', 'male female unspecified') - - class Person(models.Model): - - first_name = fields.StringField() - last_name = fields.StringField() - birthday = fields.DateField() - gender = fields.Enum32Field(Gender) - - engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) - - suzy = Person(first_name='Suzy', last_name='Jones', gender=Gender.female) - -Working with array fields -************************* - -You can create array fields containing any data type, for example:: - - class SensorData(models.Model): - - date = fields.DateField() - temperatures = fields.ArrayField(fields.Float32Field()) - humidity_levels = fields.ArrayField(fields.UInt8Field()) - - engine = engines.MergeTree('date', ('date',)) - - data = SensorData(date=date.today(), temperatures=[25.5, 31.2, 28.7], humidity_levels=[41, 39, 66]) - - -Working with materialized and alias fields -****************************************** - -ClickHouse provides an opportunity to create MATERIALIZED and ALIAS Fields. - -See documentation `here `_. - -Both field types can't be inserted into the database directly, so they are ignored when using the ``Database.insert()`` method. -ClickHouse does not return the field values if you use ``"SELECT * FROM ..."`` - you have to list these field -names explicitly in the query. - -Usage:: - - class Event(models.Model): - - created = fields.DateTimeField() - created_date = fields.DateTimeField(materialized='toDate(created)') - name = fields.StringField() - username = fields.StringField(alias='name') - - engine = engines.MergeTree('created_date', ('created_date', 'created')) - - obj = Event(created=datetime.now(), name='MyEvent') - db = Database('my_test_db') - db.insert([obj]) - # All values will be retrieved from database - db.select('SELECT created, created_date, username, name FROM $db.event', model_class=Event) - # created_date and username will contain a default value - db.select('SELECT * FROM $db.event', model_class=Event) - - -Table Engines -------------- - -Each model must have an engine instance, used when creating the table in ClickHouse. - -To define a ``MergeTree`` engine, supply the date column name and the names (or expressions) for the key columns:: - - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) - -You may also provide a sampling expression:: - - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)') - -A ``CollapsingMergeTree`` engine is defined in a similar manner, but requires also a sign column:: - - engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') - -For a ``SummingMergeTree`` you can optionally specify the summing columns:: - - engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), - summing_cols=('Shows', 'Clicks', 'Cost')) - -For a ``ReplacingMergeTree`` you can optionally specify the version column:: - - engine = engines.ReplacingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), ver_col='Version') - -A ``Buffer`` engine is available for BufferModels. (See below how to use BufferModel). You can specify following parameters:: - - engine = engines.Buffer(Person) # you need to initialize engine with main Model. Other default parameters will be used - # or: - engine = engines.Buffer(Person, num_layers=16, min_time=10, - max_time=100, min_rows=10000, max_rows=1000000, - min_bytes=10000000, max_bytes=100000000) - -Buffer Models -------------- -Here's how you can define Model for Buffer Engine. The Buffer Model should be inherited from models.BufferModel and main Model:: - - class PersonBuffer(models.BufferModel, Person): - - engine = engines.Buffer(Person) - -Then you can insert objects into Buffer model and they will be handled by Clickhouse properly:: - - db.create_table(PersonBuffer) - suzy = PersonBuffer(first_name='Suzy', last_name='Jones') - dan = PersonBuffer(first_name='Dan', last_name='Schwartz') - db.insert([dan, suzy]) - - -Data Replication -**************** - -Any of the above engines can be converted to a replicated engine (e.g. ``ReplicatedMergeTree``) by adding two parameters, ``replica_table_path`` and ``replica_name``:: - - engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), - replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', - replica_name='{replica}') - -Development -=========== - -After cloning the project, run the following commands:: - - easy_install -U infi.projector - cd infi.clickhouse_orm - projector devenv build - -To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run:: - - bin/nosetests - -To see test coverage information run:: - - bin/nosetests --with-coverage --cover-package=infi.clickhouse_orm diff --git a/scripts/docs2html.sh b/scripts/docs2html.sh index 4916530..b8d87dd 100755 --- a/scripts/docs2html.sh +++ b/scripts/docs2html.sh @@ -3,5 +3,8 @@ mkdir -p ../htmldocs find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}" -s -o "../htmldocs/${0%.md}.html"' {} \; +echo "Converting README" +pandoc ../README -s -o "../htmldocs/README.html" + echo "Fixing links" sed -i 's/\.md/\.html/g' ../htmldocs/*.html From 7a044e0888fdb3c14b0c864089aabb45c4d74e08 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 19:10:45 +0300 Subject: [PATCH 13/53] fix link --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index 636c9cb..4021f28 100644 --- a/README +++ b/README @@ -54,4 +54,4 @@ for row in db.select('SELECT cpu_id, avg(cpu_percent) AS average FROM demo.cpust print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row) ``` -To learn more please visit the [documentation](docs/index.md). \ No newline at end of file +To learn more please visit the [documentation](docs/toc.md). \ No newline at end of file From d1a0fe7ee80872ac965c42553b7ecd4a72f1a127 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 28 Apr 2017 19:45:54 +0300 Subject: [PATCH 14/53] rename README --- README => README.md | 0 scripts/docs2html.sh | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename README => README.md (100%) diff --git a/README b/README.md similarity index 100% rename from README rename to README.md diff --git a/scripts/docs2html.sh b/scripts/docs2html.sh index b8d87dd..3ed15a5 100755 --- a/scripts/docs2html.sh +++ b/scripts/docs2html.sh @@ -3,8 +3,8 @@ mkdir -p ../htmldocs find ./ -iname "*.md" -type f -exec sh -c 'echo "Converting ${0}"; pandoc "${0}" -s -o "../htmldocs/${0%.md}.html"' {} \; -echo "Converting README" -pandoc ../README -s -o "../htmldocs/README.html" +echo "Converting README.md" +pandoc ../README.md -s -o "../htmldocs/README.html" echo "Fixing links" sed -i 's/\.md/\.html/g' ../htmldocs/*.html From f1ab9b6179996f3e3e0569c27ce31796238c6cc8 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 2 May 2017 17:46:47 +0300 Subject: [PATCH 15/53] Better TOC generation --- docs/toc.md | 10 +- scripts/README.md | 6 +- scripts/generate_toc.sh | 27 ++--- scripts/gh-md-toc | 185 -------------------------------- scripts/html_to_markdown_toc.py | 31 ++++++ 5 files changed, 55 insertions(+), 204 deletions(-) delete mode 100755 scripts/gh-md-toc create mode 100644 scripts/html_to_markdown_toc.py diff --git a/docs/toc.md b/docs/toc.md index 1848631..b339fd3 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -1,5 +1,4 @@ -Table of Contents -================= +# Table of Contents * [Overview](index.md#overview) * [Installation](index.md#installation) @@ -29,8 +28,10 @@ Table of Contents * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) * [Table Engines](table_engines.md#table-engines) - * [Buffer Models](table_engines.md#buffer-models) - * [Data Replication](table_engines.md#data-replication) + * [Simple Engines](table_engines.md#simple-engines) + * [Engines in the MergeTree Family](table_engines.md#engines-in-the-mergetree-family) + * [Data Replication](table_engines.md#data-replication) + * [Buffer Engine](table_engines.md#buffer-engine) * [Schema Migrations](schema_migrations.md#schema-migrations) * [Writing Migrations](schema_migrations.md#writing-migrations) @@ -40,3 +41,4 @@ Table of Contents * [Partitions and Parts](system_models.md#partitions-and-parts) * [Contributing](contributing.md#contributing) + diff --git a/scripts/README.md b/scripts/README.md index f3e4096..5782dc9 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,14 +2,14 @@ This directory contains various scripts for use while developing. generate_toc ------------ -Generates the table of contents (toc.md) +Generates the table of contents (toc.md). Requires Pandoc. Usage: cd docs ../scripts/generate_toc.sh -gh-md-toc ---------- +html_to_markdown_toc.py +----------------------- Used by generate_toc. diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 7b57403..1b1f1ed 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -1,13 +1,16 @@ -echo "Table of Contents" > toc.md -echo "=================" >> toc.md -../scripts/gh-md-toc \ - index.md \ - models_and_databases.md \ - querysets.md \ - field_types.md \ - table_engines.md \ - schema_migrations.md \ - system_models.md \ - contributing.md \ - >> toc.md +generate_one() { + # Converts Markdown to HTML using Pandoc, and then extracts the header tags + pandoc "$1" | python "../scripts/html_to_markdown_toc.py" "$1" >> toc.md +} + +printf "# Table of Contents\n\n" > toc.md + +generate_one "index.md" +generate_one "models_and_databases.md" +generate_one "querysets.md" +generate_one "field_types.md" +generate_one "table_engines.md" +generate_one "schema_migrations.md" +generate_one "system_models.md" +generate_one "contributing.md" diff --git a/scripts/gh-md-toc b/scripts/gh-md-toc deleted file mode 100755 index 158bc5f..0000000 --- a/scripts/gh-md-toc +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env bash - -# -# Source: https://github.com/ekalinin/github-markdown-toc -# -# Steps: -# -# 1. Download corresponding html file for some README.md: -# curl -s $1 -# -# 2. Discard rows where no substring 'user-content-' (github's markup): -# awk '/user-content-/ { ... -# -# 3.1 Get last number in each row like ' ... sitemap.js.*<\/h/)+2, RLENGTH-5) -# -# 5. Find anchor and insert it inside "(...)": -# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) -# - -gh_toc_version="0.4.8" - -gh_user_agent="gh-md-toc v$gh_toc_version" - -# -# Download rendered into html README.md by its url. -# -# -gh_toc_load() { - local gh_url=$1 - - if type curl &>/dev/null; then - curl --user-agent "$gh_user_agent" -s "$gh_url" - elif type wget &>/dev/null; then - wget --user-agent="$gh_user_agent" -qO- "$gh_url" - else - echo "Please, install 'curl' or 'wget' and try again." - exit 1 - fi -} - -# -# Converts local md file into html by GitHub -# -# ➥ curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown -#

Hello world github/linguist#1 cool, and #1!

'" -gh_toc_md2html() { - local gh_file_md=$1 - curl -s --user-agent "$gh_user_agent" \ - --data-binary @"$gh_file_md" -H "Content-Type:text/plain" \ - https://api.github.com/markdown/raw -} - -# -# Is passed string url -# -gh_is_url() { - if [[ $1 == https* || $1 == http* ]]; then - echo "yes" - else - echo "no" - fi -} - -# -# TOC generator -# -gh_toc(){ - local gh_src=$1 - local gh_src_copy=$1 - local gh_ttl_docs=$2 - - if [ "$gh_src" = "" ]; then - echo "Please, enter URL or local path for a README.md" - exit 1 - fi - - - # Show "TOC" string only if working with one document - if [ "$gh_ttl_docs" = "1" ]; then - - echo "Table of Contents" - echo "=================" - echo "" - gh_src_copy="" - - fi - - if [ "$(gh_is_url "$gh_src")" == "yes" ]; then - gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" - else - gh_toc_md2html "$gh_src" | gh_toc_grab "$gh_src_copy" - fi -} - -# -# Grabber of the TOC from rendered html -# -# $1 — a source url of document. -# It's need if TOC is generated for multiple documents. -# -gh_toc_grab() { - # if closed is on the new line, then move it on the prev line - # for example: - # was: The command foo1 - # - # became: The command foo1 - sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' | - # find strings that corresponds to template - grep -E -o '//' | sed 's/<\/code>//' | - # now all rows are like: - # ... .*<\/h/)+2, RLENGTH-5)"](" gh_url substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8) ")"}' | sed 'y/+/ /; s/%/\\x/g')" -} - -# -# Returns filename only from full path or url -# -gh_toc_get_filename() { - echo "${1##*/}" -} - -# -# Options hendlers -# -gh_toc_app() { - local app_name="gh-md-toc" - - if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then - echo "GitHub TOC generator ($app_name): $gh_toc_version" - echo "" - echo "Usage:" - echo " $app_name src [src] Create TOC for a README file (url or local path)" - echo " $app_name - Create TOC for markdown from STDIN" - echo " $app_name --help Show help" - echo " $app_name --version Show version" - return - fi - - if [ "$1" = '--version' ]; then - echo "$gh_toc_version" - return - fi - - if [ "$1" = "-" ]; then - if [ -z "$TMPDIR" ]; then - TMPDIR="/tmp" - elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then - mkdir -p "$TMPDIR" - fi - local gh_tmp_md - gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX) - while read input; do - echo "$input" >> "$gh_tmp_md" - done - gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" - return - fi - - for md in "$@" - do - echo "" - gh_toc "$md" "$#" - done - - #echo "" - #echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)" -} - -# -# Entry point -# -gh_toc_app "$@" \ No newline at end of file diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py new file mode 100644 index 0000000..494f32a --- /dev/null +++ b/scripts/html_to_markdown_toc.py @@ -0,0 +1,31 @@ +from HTMLParser import HTMLParser +import sys + + +HEADER_TAGS = ('h1', 'h2', 'h3') + + +class HeadersToMarkdownParser(HTMLParser): + + inside = None + text = '' + + def handle_starttag(self, tag, attrs): + if tag.lower() in HEADER_TAGS: + self.inside = tag + + def handle_endtag(self, tag): + if tag.lower() in HEADER_TAGS: + indent = ' ' * int(self.inside[1]) + fragment = self.text.lower().replace(' ', '-') + print '%s* [%s](%s#%s)' % (indent, self.text, sys.argv[1], fragment) + self.inside = None + self.text = '' + + def handle_data(self, data): + if self.inside: + self.text += data + + +HeadersToMarkdownParser.feed(sys.stdin.read()) +print From 4625a7e00ff0b7acf20d8d261e3febb83ec935fb Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 3 May 2017 08:36:47 +0300 Subject: [PATCH 16/53] Generate a class reference document --- docs/ref.md | 526 ++++++++++++++++++++++++++++ docs/toc.md | 42 +++ scripts/generate_ref.py | 131 +++++++ scripts/generate_toc.sh | 1 + scripts/html_to_markdown_toc.py | 2 +- src/infi/clickhouse_orm/database.py | 80 ++++- src/infi/clickhouse_orm/fields.py | 14 +- src/infi/clickhouse_orm/models.py | 47 +-- 8 files changed, 813 insertions(+), 30 deletions(-) create mode 100644 docs/ref.md create mode 100644 scripts/generate_ref.py diff --git a/docs/ref.md b/docs/ref.md new file mode 100644 index 0000000..f789578 --- /dev/null +++ b/docs/ref.md @@ -0,0 +1,526 @@ +Class Reference +=============== + +infi.clickhouse_orm.database +---------------------------- + +### Database + +#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False) + +Initializes a database instance. Unless it's readonly, the database will be +created on the ClickHouse server if it does not already exist. + +- `db_name`: name of the database to connect to. +- `db_url`: URL of the ClickHouse server. +- `username`: optional connection credentials. +- `password`: optional connection credentials. +- `readonly`: use a read-only connection. + + +#### count(model_class, conditions=None) + +Counts the number of records in the model's table. + +- `model_class`: the model to count. +- `conditions`: optional SQL conditions (contents of the WHERE clause). + + +#### create_database() + +Creates the database on the ClickHouse server if it does not already exist. + + +#### create_table(model_class) + +Creates a table for the given model class, if it does not exist already. + + +#### drop_database() + +Deletes the database on the ClickHouse server. + + +#### drop_table(model_class) + +Drops the database table of the given model class, if it exists. + + +#### insert(model_instances, batch_size=1000) + +Insert records into the database. + +- `model_instances`: any iterable containing instances of a single model class. +- `batch_size`: number of records to send per chunk (use a lower number if your records are very large). + + +#### migrate(migrations_package_name, up_to=9999) + +Executes schema migrations. + +- `migrations_package_name` - fully qualified name of the Python package +containing the migrations. +- `up_to` - number of the last migration to apply. + + +#### paginate(model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None) + +Selects records and returns a single page of model instances. + +- `model_class`: the model class matching the query's table, +or `None` for getting back instances of an ad-hoc model. +- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause). +- `page_num`: the page number (1-based), or -1 to get the last page. +- `page_size`: number of records to return per page. +- `conditions`: optional SQL conditions (contents of the WHERE clause). +- `settings`: query settings to send as HTTP GET parameters + +The result is a namedtuple containing `objects` (list), `number_of_objects`, +`pages_total`, `number` (of the current page), and `page_size`. + + +#### raw(query, settings=None, stream=False) + +Performs a query and returns its output as text. + +- `query`: the SQL query to execute. +- `settings`: query settings to send as HTTP GET parameters +- `stream`: if true, the HTTP response from ClickHouse will be streamed. + + +#### select(query, model_class=None, settings=None) + +Performs a query and returns a generator of model instances. + +- `query`: the SQL query to execute. +- `model_class`: the model class matching the query's table, +or `None` for getting back instances of an ad-hoc model. +- `settings`: query settings to send as HTTP GET parameters + + +### DatabaseException + +Extends Exception + +Raised when a database operation fails. + +infi.clickhouse_orm.models +-------------------------- + +### Model + +A base class for ORM models. + +#### Model(**kwargs) + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### Model.create_table_sql(db_name) + +Returns the SQL command for creating a table for this model. + + +#### Model.drop_table_sql(db_name) + +Returns the SQL command for deleting this model's table. + + +#### Model.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + +Gets a `Field` instance given its name, or `None` if not found. + + +#### Model.objects_in(database) + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### Model.table_name() + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +### BufferModel + +Extends Model + +#### BufferModel(**kwargs) + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### BufferModel.create_table_sql(db_name) + +Returns the SQL command for creating a table for this model. + + +#### BufferModel.drop_table_sql(db_name) + +Returns the SQL command for deleting this model's table. + + +#### BufferModel.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + +Gets a `Field` instance given its name, or `None` if not found. + + +#### BufferModel.objects_in(database) + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### BufferModel.table_name() + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +infi.clickhouse_orm.fields +-------------------------- + +### Field + +Abstract base class for all field types. + +#### Field(default=None, alias=None, materialized=None) + + +### StringField + +Extends Field + +#### StringField(default=None, alias=None, materialized=None) + + +### DateField + +Extends Field + +#### DateField(default=None, alias=None, materialized=None) + + +### DateTimeField + +Extends Field + +#### DateTimeField(default=None, alias=None, materialized=None) + + +### BaseIntField + +Extends Field + +Abstract base class for all integer-type fields. + +#### BaseIntField(default=None, alias=None, materialized=None) + + +### BaseFloatField + +Extends Field + +Abstract base class for all float-type fields. + +#### BaseFloatField(default=None, alias=None, materialized=None) + + +### BaseEnumField + +Extends Field + +Abstract base class for all enum-type fields. + +#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None) + + +### ArrayField + +Extends Field + +#### ArrayField(inner_field, default=None, alias=None, materialized=None) + + +### FixedStringField + +Extends StringField + +#### FixedStringField(length, default=None, alias=None, materialized=None) + + +### UInt8Field + +Extends BaseIntField + +#### UInt8Field(default=None, alias=None, materialized=None) + + +### UInt16Field + +Extends BaseIntField + +#### UInt16Field(default=None, alias=None, materialized=None) + + +### UInt32Field + +Extends BaseIntField + +#### UInt32Field(default=None, alias=None, materialized=None) + + +### UInt64Field + +Extends BaseIntField + +#### UInt64Field(default=None, alias=None, materialized=None) + + +### Int8Field + +Extends BaseIntField + +#### Int8Field(default=None, alias=None, materialized=None) + + +### Int16Field + +Extends BaseIntField + +#### Int16Field(default=None, alias=None, materialized=None) + + +### Int32Field + +Extends BaseIntField + +#### Int32Field(default=None, alias=None, materialized=None) + + +### Int64Field + +Extends BaseIntField + +#### Int64Field(default=None, alias=None, materialized=None) + + +### Float32Field + +Extends BaseFloatField + +#### Float32Field(default=None, alias=None, materialized=None) + + +### Float64Field + +Extends BaseFloatField + +#### Float64Field(default=None, alias=None, materialized=None) + + +### Enum8Field + +Extends BaseEnumField + +#### Enum8Field(enum_cls, default=None, alias=None, materialized=None) + + +### Enum16Field + +Extends BaseEnumField + +#### Enum16Field(enum_cls, default=None, alias=None, materialized=None) + + +infi.clickhouse_orm.engines +--------------------------- + +### Engine + +### TinyLog + +Extends Engine + +### Log + +Extends Engine + +### Memory + +Extends Engine + +### MergeTree + +Extends Engine + +#### MergeTree(date_col, key_cols, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### Buffer + +Extends Engine + +Here we define Buffer engine +Read more here https://clickhouse.yandex/reference_en.html#Buffer + +#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) + + +### CollapsingMergeTree + +Extends MergeTree + +#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### SummingMergeTree + +Extends MergeTree + +#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### ReplacingMergeTree + +Extends MergeTree + +#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +infi.clickhouse_orm.query +------------------------- + +### QuerySet + +#### QuerySet(model_cls, database) + + +#### conditions_as_sql() + +Return the contents of the queryset's WHERE clause. + + +#### count() + +Returns the number of matching model instances. + + +#### exclude(**kwargs) + +Returns a new QuerySet instance that excludes all rows matching the conditions. + + +#### filter(**kwargs) + +Returns a new QuerySet instance that includes only rows matching the conditions. + + +#### only(*field_names) + +Limit the query to return only the specified field names. +Useful when there are large fields that are not needed, +or for creating a subquery to use with an IN operator. + + +#### order_by(*field_names) + +Returns a new QuerySet instance with the ordering changed. + + +#### order_by_as_sql() + +Return the contents of the queryset's ORDER BY clause. + + +#### query() + +Return the the queryset as SQL. + + diff --git a/docs/toc.md b/docs/toc.md index b339fd3..fa9dd70 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -42,3 +42,45 @@ * [Contributing](contributing.md#contributing) + * [Class Reference](ref.md#class-reference) + * [infi.clickhouse_orm.database](ref.md#infi.clickhouse_orm.database) + * [Database](ref.md#database) + * [DatabaseException](ref.md#databaseexception) + * [infi.clickhouse_orm.models](ref.md#infi.clickhouse_orm.models) + * [Model](ref.md#model) + * [BufferModel](ref.md#buffermodel) + * [infi.clickhouse_orm.fields](ref.md#infi.clickhouse_orm.fields) + * [Field](ref.md#field) + * [StringField](ref.md#stringfield) + * [DateField](ref.md#datefield) + * [DateTimeField](ref.md#datetimefield) + * [BaseIntField](ref.md#baseintfield) + * [BaseFloatField](ref.md#basefloatfield) + * [BaseEnumField](ref.md#baseenumfield) + * [ArrayField](ref.md#arrayfield) + * [FixedStringField](ref.md#fixedstringfield) + * [UInt8Field](ref.md#uint8field) + * [UInt16Field](ref.md#uint16field) + * [UInt32Field](ref.md#uint32field) + * [UInt64Field](ref.md#uint64field) + * [Int8Field](ref.md#int8field) + * [Int16Field](ref.md#int16field) + * [Int32Field](ref.md#int32field) + * [Int64Field](ref.md#int64field) + * [Float32Field](ref.md#float32field) + * [Float64Field](ref.md#float64field) + * [Enum8Field](ref.md#enum8field) + * [Enum16Field](ref.md#enum16field) + * [infi.clickhouse_orm.engines](ref.md#infi.clickhouse_orm.engines) + * [Engine](ref.md#engine) + * [TinyLog](ref.md#tinylog) + * [Log](ref.md#log) + * [Memory](ref.md#memory) + * [MergeTree](ref.md#mergetree) + * [Buffer](ref.md#buffer) + * [CollapsingMergeTree](ref.md#collapsingmergetree) + * [SummingMergeTree](ref.md#summingmergetree) + * [ReplacingMergeTree](ref.md#replacingmergetree) + * [infi.clickhouse_orm.query](ref.md#infi.clickhouse_orm.query) + * [QuerySet](ref.md#queryset) + diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py new file mode 100644 index 0000000..d2d731c --- /dev/null +++ b/scripts/generate_ref.py @@ -0,0 +1,131 @@ + +import inspect +from collections import namedtuple + +DefaultArgSpec = namedtuple('DefaultArgSpec', 'has_default default_value') + +def _get_default_arg(args, defaults, arg_index): + """ Method that determines if an argument has default value or not, + and if yes what is the default value for the argument + + :param args: array of arguments, eg: ['first_arg', 'second_arg', 'third_arg'] + :param defaults: array of default values, eg: (42, 'something') + :param arg_index: index of the argument in the argument array for which, + this function checks if a default value exists or not. And if default value + exists it would return the default value. Example argument: 1 + :return: Tuple of whether there is a default or not, and if yes the default + value, eg: for index 2 i.e. for "second_arg" this function returns (True, 42) + """ + if not defaults: + return DefaultArgSpec(False, None) + + args_with_no_defaults = len(args) - len(defaults) + + if arg_index < args_with_no_defaults: + return DefaultArgSpec(False, None) + else: + value = defaults[arg_index - args_with_no_defaults] + if (type(value) is str): + value = '"%s"' % value + return DefaultArgSpec(True, value) + +def get_method_sig(method): + """ Given a function, it returns a string that pretty much looks how the + function signature would be written in python. + + :param method: a python method + :return: A string similar describing the pythong method signature. + eg: "my_method(first_argArg, second_arg=42, third_arg='something')" + """ + + # The return value of ArgSpec is a bit weird, as the list of arguments and + # list of defaults are returned in separate array. + # eg: ArgSpec(args=['first_arg', 'second_arg', 'third_arg'], + # varargs=None, keywords=None, defaults=(42, 'something')) + argspec = inspect.getargspec(method) + arg_index=0 + args = [] + + # Use the args and defaults array returned by argspec and find out + # which arguments has default + for arg in argspec.args: + default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index) + if default_arg.has_default: + args.append("%s=%s" % (arg, default_arg.default_value)) + else: + args.append(arg) + arg_index += 1 + if argspec.varargs: + args.append('*' + argspec.varargs) + if argspec.keywords: + args.append('**' + argspec.keywords) + return "%s(%s)" % (method.__name__, ", ".join(args[1:])) + + +def docstring(obj): + doc = (obj.__doc__ or '').strip() + if doc: + for line in doc.split('\n'): + print line.strip() + print + + +def class_doc(cls, list_methods=True): + bases = ', '.join([b.__name__ for b in cls.__bases__]) + print '###', cls.__name__ + print + if bases != 'object': + print 'Extends', bases + print + docstring(cls) + for name, method in inspect.getmembers(cls, inspect.ismethod): + if name == '__init__': + # Initializer + print '####', get_method_sig(method).replace(name, cls.__name__) + elif name[0] == '_': + # Private method + continue + elif method.__self__ == cls: + # Class method + if not list_methods: + continue + print '#### %s.%s' % (cls.__name__, get_method_sig(method)) + else: + # Regular method + if not list_methods: + continue + print '####', get_method_sig(method) + print + docstring(method) + print + + +def module_doc(classes, list_methods=True): + mdl = classes[0].__module__ + print mdl + print '-' * len(mdl) + print + for cls in classes: + class_doc(cls, list_methods) + + +def all_subclasses(cls): + return cls.__subclasses__() + [g for s in cls.__subclasses__() for g in all_subclasses(s)] + + +if __name__ == '__main__': + + from infi.clickhouse_orm import database + from infi.clickhouse_orm import fields + from infi.clickhouse_orm import engines + from infi.clickhouse_orm import models + from infi.clickhouse_orm import query + + print 'Class Reference' + print '===============' + print + module_doc([database.Database, database.DatabaseException]) + module_doc([models.Model, models.BufferModel]) + module_doc([fields.Field] + all_subclasses(fields.Field), False) + module_doc([engines.Engine] + all_subclasses(engines.Engine), False) + module_doc([query.QuerySet]) diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 1b1f1ed..32ca599 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -14,3 +14,4 @@ generate_one "table_engines.md" generate_one "schema_migrations.md" generate_one "system_models.md" generate_one "contributing.md" +generate_one "ref.md" diff --git a/scripts/html_to_markdown_toc.py b/scripts/html_to_markdown_toc.py index 494f32a..169e698 100644 --- a/scripts/html_to_markdown_toc.py +++ b/scripts/html_to_markdown_toc.py @@ -27,5 +27,5 @@ class HeadersToMarkdownParser(HTMLParser): self.text += data -HeadersToMarkdownParser.feed(sys.stdin.read()) +HeadersToMarkdownParser().feed(sys.stdin.read()) print diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 2911d96..3ae9535 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -16,12 +16,25 @@ Page = namedtuple('Page', 'objects number_of_objects pages_total number page_siz class DatabaseException(Exception): + ''' + Raised when a database operation fails. + ''' pass class Database(object): def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False): + ''' + Initializes a database instance. Unless it's readonly, the database will be + created on the ClickHouse server if it does not already exist. + + - `db_name`: name of the database to connect to. + - `db_url`: URL of the ClickHouse server. + - `username`: optional connection credentials. + - `password`: optional connection credentials. + - `readonly`: use a read-only connection. + ''' self.db_name = db_name self.db_url = db_url self.username = username @@ -35,23 +48,41 @@ class Database(object): self.server_timezone = self._get_server_timezone() def create_database(self): + ''' + Creates the database on the ClickHouse server if it does not already exist. + ''' self._send('CREATE DATABASE IF NOT EXISTS `%s`' % self.db_name) def drop_database(self): + ''' + Deletes the database on the ClickHouse server. + ''' self._send('DROP DATABASE `%s`' % self.db_name) def create_table(self, model_class): + ''' + Creates a table for the given model class, if it does not exist already. + ''' # TODO check that model has an engine if model_class.readonly: raise DatabaseException("You can't create read only table") self._send(model_class.create_table_sql(self.db_name)) def drop_table(self, model_class): + ''' + Drops the database table of the given model class, if it exists. + ''' if model_class.readonly: raise DatabaseException("You can't drop read only table") self._send(model_class.drop_table_sql(self.db_name)) def insert(self, model_instances, batch_size=1000): + ''' + Insert records into the database. + + - `model_instances`: any iterable containing instances of a single model class. + - `batch_size`: number of records to send per chunk (use a lower number if your records are very large). + ''' from six import next from io import BytesIO i = iter(model_instances) @@ -89,6 +120,12 @@ class Database(object): self._send(gen()) def count(self, model_class, conditions=None): + ''' + Counts the number of records in the model's table. + + - `model_class`: the model to count. + - `conditions`: optional SQL conditions (contents of the WHERE clause). + ''' query = 'SELECT count() FROM $table' if conditions: query += ' WHERE ' + conditions @@ -97,6 +134,14 @@ class Database(object): return int(r.text) if r.text else 0 def select(self, query, model_class=None, settings=None): + ''' + Performs a query and returns a generator of model instances. + + - `query`: the SQL query to execute. + - `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. + - `settings`: query settings to send as HTTP GET parameters + ''' query += ' FORMAT TabSeparatedWithNamesAndTypes' query = self._substitute(query, model_class) r = self._send(query, settings, True) @@ -110,17 +155,31 @@ class Database(object): yield model_class.from_tsv(line, field_names, self.server_timezone, self) def raw(self, query, settings=None, stream=False): - """ - Performs raw query to database. Returns its output - :param query: Query to execute - :param settings: Query settings to send as query GET parameters - :param stream: If flag is true, Http response from ClickHouse will be streamed. - :return: Query execution result - """ + ''' + Performs a query and returns its output as text. + + - `query`: the SQL query to execute. + - `settings`: query settings to send as HTTP GET parameters + - `stream`: if true, the HTTP response from ClickHouse will be streamed. + ''' query = self._substitute(query, None) return self._send(query, settings=settings, stream=stream).text def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None): + ''' + Selects records and returns a single page of model instances. + + - `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. + - `order_by`: columns to use for sorting the query (contents of the ORDER BY clause). + - `page_num`: the page number (1-based), or -1 to get the last page. + - `page_size`: number of records to return per page. + - `conditions`: optional SQL conditions (contents of the WHERE clause). + - `settings`: query settings to send as HTTP GET parameters + + The result is a namedtuple containing `objects` (list), `number_of_objects`, + `pages_total`, `number` (of the current page), and `page_size`. + ''' count = self.count(model_class, conditions) pages_total = int(ceil(count / float(page_size))) if page_num == -1: @@ -143,6 +202,13 @@ class Database(object): ) def migrate(self, migrations_package_name, up_to=9999): + ''' + Executes schema migrations. + + - `migrations_package_name` - fully qualified name of the Python package + containing the migrations. + - `up_to` - number of the last migration to apply. + ''' from .migrations import MigrationHistory logger = logging.getLogger('migrations') applied_migrations = self._get_applied_migrations(migrations_package_name) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 03d6c82..a136601 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -8,7 +8,9 @@ from .utils import escape, parse_array class Field(object): - + ''' + Abstract base class for all field types. + ''' creation_counter = 0 class_default = 0 db_type = None @@ -165,7 +167,9 @@ class DateTimeField(Field): class BaseIntField(Field): - + ''' + Abstract base class for all integer-type fields. + ''' def to_python(self, value, timezone_in_use): try: return int(value) @@ -238,6 +242,9 @@ class Int64Field(BaseIntField): class BaseFloatField(Field): + ''' + Abstract base class for all float-type fields. + ''' def to_python(self, value, timezone_in_use): try: @@ -262,6 +269,9 @@ class Float64Field(BaseFloatField): class BaseEnumField(Field): + ''' + Abstract base class for all enum-type fields. + ''' def __init__(self, enum_cls, default=None, alias=None, materialized=None): self.enum_cls = enum_cls diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index ef5ab5e..a06f515 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -81,8 +81,8 @@ class Model(with_metaclass(ModelBase)): ''' Creates a model instance, using keyword arguments as field values. Since values are immediately converted to their Pythonic type, - invalid values will cause a ValueError to be raised. - Unrecognized field names will cause an AttributeError. + invalid values will cause a `ValueError` to be raised. + Unrecognized field names will cause an `AttributeError`. ''' super(Model, self).__init__() @@ -103,7 +103,7 @@ class Model(with_metaclass(ModelBase)): def __setattr__(self, name, value): ''' When setting a field value, converts the value to its Pythonic type and validates it. - This may raise a ValueError. + This may raise a `ValueError`. ''' field = self.get_field(name) if field: @@ -112,26 +112,25 @@ class Model(with_metaclass(ModelBase)): super(Model, self).__setattr__(name, value) def set_database(self, db): - """ - Sets _database attribute for current model instance - :param db: Database instance - :return: None - """ + ''' + Sets the `Database` that this model instance belongs to. + This is done automatically when the instance is read from the database or written to it. + ''' # This can not be imported globally due to circular import from .database import Database assert isinstance(db, Database), "database must be database.Database instance" self._database = db def get_database(self): - """ - Gets _database attribute for current model instance - :return: database.Database instance, model was inserted or selected from or None - """ + ''' + Gets the `Database` that this model instance belongs to. + Returns `None` unless the instance was read from the database or written to it. + ''' return self._database def get_field(self, name): ''' - Get a Field instance given its name, or None if not found. + Gets a `Field` instance given its name, or `None` if not found. ''' field = getattr(self.__class__, name, None) return field if isinstance(field, Field) else None @@ -139,7 +138,9 @@ class Model(with_metaclass(ModelBase)): @classmethod def table_name(cls): ''' - Returns the model's database table name. + Returns the model's database table name. By default this is the + class name converted to lowercase. Override this if you want to use + a different table name. ''' return cls.__name__.lower() @@ -168,9 +169,13 @@ class Model(with_metaclass(ModelBase)): def from_tsv(cls, line, field_names=None, timezone_in_use=pytz.utc, database=None): ''' Create a model instance from a tab-separated line. The line may or may not include a newline. - The field_names list must match the fields defined in the model, but does not have to include all of them. + The `field_names` list must match the fields defined in the model, but does not have to include all of them. If omitted, it is assumed to be the names of all fields in the model, in order of definition. - :param database: if given, model receives database + + - `line`: the TSV-formatted data. + - `field_names`: names of the model fields in the data. + - `timezone_in_use`: the timezone to use when parsing dates and datetimes. + - `database`: if given, sets the database that this instance belongs to. ''' from six import next field_names = field_names or [name for name, field in cls._fields] @@ -189,7 +194,8 @@ class Model(with_metaclass(ModelBase)): def to_tsv(self, include_readonly=True): ''' Returns the instance's column values as a tab-separated line. A newline is not included. - :param bool include_readonly: If False, returns only fields, that can be inserted into database + + - `include_readonly`: if false, returns only fields that can be inserted into database. ''' data = self.__dict__ fields = self._fields if include_readonly else self._writable_fields @@ -198,8 +204,9 @@ class Model(with_metaclass(ModelBase)): def to_dict(self, include_readonly=True, field_names=None): ''' Returns the instance's column values as a dict. - :param bool include_readonly: If False, returns only fields, that can be inserted into database - :param field_names: An iterable of field names to return + + - `include_readonly`: if false, returns only fields that can be inserted into database. + - `field_names`: an iterable of field names to return (optional) ''' fields = self._fields if include_readonly else self._writable_fields @@ -212,7 +219,7 @@ class Model(with_metaclass(ModelBase)): @classmethod def objects_in(cls, database): ''' - Returns a queryset for selecting instances of this model class. + Returns a `QuerySet` for selecting instances of this model class. ''' return QuerySet(cls, database) From 3f8434fe509e6074a843f40537ca8cfd38d69528 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 5 May 2017 15:21:55 +0300 Subject: [PATCH 17/53] rename QuerySet.query to as_sql --- src/infi/clickhouse_orm/query.py | 11 +++++++---- tests/test_querysets.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index e11c7ef..f79c2eb 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -48,7 +48,7 @@ class InOperator(Operator): def to_sql(self, model_cls, field_name, value): field = getattr(model_cls, field_name) if isinstance(value, QuerySet): - value = value.query() + value = value.as_sql() elif isinstance(value, six.string_types): pass else: @@ -164,7 +164,7 @@ class QuerySet(object): """ Iterates over the model instances matching this queryset """ - return self._database.select(self.query(), self._model_cls) + return self._database.select(self.as_sql(), self._model_cls) def __bool__(self): """ @@ -175,9 +175,12 @@ class QuerySet(object): def __nonzero__(self): # Python 2 compatibility return type(self).__bool__(self) - def query(self): + def __unicode__(self): + return self.as_sql() + + def as_sql(self): """ - Return the the queryset as SQL. + Return the whole queryset as SQL. """ fields = '*' if self._fields: diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 817f7d4..3dd0889 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -20,7 +20,7 @@ class QuerySetTestCase(TestCaseWithData): self.database.insert(self._sample_data()) def _test_qs(self, qs, expected_count): - logging.info(qs.query()) + logging.info(qs.as_sql()) for instance in qs: logging.info('\t%s' % instance.to_dict()) self.assertEquals(qs.count(), expected_count) From b3437dae7ef9f2e4269b0b68c53685519a83cdc0 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 5 May 2017 15:22:16 +0300 Subject: [PATCH 18/53] Add a couple of small tests --- tests/test_database.py | 10 +++++++++- tests/test_engines.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_database.py b/tests/test_database.py index 91d7af6..8f94230 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -2,7 +2,7 @@ import unittest -from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.database import Database, DatabaseException from .base_test_with_data import * @@ -108,6 +108,11 @@ class DatabaseTestCase(TestCaseWithData): with self.assertRaises(ValueError): self.database.paginate(Person, 'first_name, last_name', page_num, 100) + def test_pagination_with_conditions(self): + self._insert_and_check(self._sample_data(), len(data)) + page = self.database.paginate(Person, 'first_name, last_name', 1, 100, conditions="first_name < 'Ava'") + self.assertEquals(page.number_of_objects, 10) + def test_special_chars(self): s = u'אבגד \\\'"`,.;éåäöšž\n\t\0\b\r' p = Person(first_name=s) @@ -121,3 +126,6 @@ class DatabaseTestCase(TestCaseWithData): results = self.database.raw(query) self.assertEqual(results, "Whitney\tDurham\t1977-09-15\t1.72\nWhitney\tScott\t1971-07-04\t1.7\n") + def test_invalid_user(self): + with self.assertRaises(DatabaseException): + Database(self.database.db_name, username='default', password='wrong') \ No newline at end of file diff --git a/tests/test_engines.py b/tests/test_engines.py index 4aea6d1..3639960 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -38,6 +38,11 @@ class EnginesTestCase(unittest.TestCase): engine = MergeTree('date', ('date', 'event_id', 'event_group'), index_granularity=4096) self._create_and_insert(TestModel) + def test_replicated_merge_tree(self): + engine = MergeTree('date', ('date', 'event_id', 'event_group'), replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', replica_name='{replica}') + expected = "ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', date, (date, event_id, event_group), 8192)" + self.assertEquals(engine.create_table_sql(), expected) + def test_collapsing_merge_tree(self): class TestModel(SampleModel): engine = CollapsingMergeTree('date', ('date', 'event_id', 'event_group'), 'event_version') From 00ca503b7c52fcd6ccb515b5a7b7d5a098d62d2b Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 5 May 2017 15:31:08 +0300 Subject: [PATCH 19/53] Generate a class reference document --- docs/class_reference.md | 594 ++++++++++++++++++++++++++++ docs/toc.md | 82 ++-- scripts/README.md | 24 ++ scripts/generate_all.sh | 8 + scripts/generate_ref.py | 10 +- scripts/generate_toc.sh | 2 +- src/infi/clickhouse_orm/database.py | 4 + src/infi/clickhouse_orm/engines.py | 6 +- src/infi/clickhouse_orm/models.py | 8 +- src/infi/clickhouse_orm/query.py | 25 +- 10 files changed, 707 insertions(+), 56 deletions(-) create mode 100644 docs/class_reference.md create mode 100755 scripts/generate_all.sh diff --git a/docs/class_reference.md b/docs/class_reference.md new file mode 100644 index 0000000..0f4f57f --- /dev/null +++ b/docs/class_reference.md @@ -0,0 +1,594 @@ +Class Reference +=============== + +infi.clickhouse_orm.database +---------------------------- + +### Database + + +Database instances connect to a specific ClickHouse database for running queries, +inserting data and other operations. + +#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False) + + +Initializes a database instance. Unless it's readonly, the database will be +created on the ClickHouse server if it does not already exist. + +- `db_name`: name of the database to connect to. +- `db_url`: URL of the ClickHouse server. +- `username`: optional connection credentials. +- `password`: optional connection credentials. +- `readonly`: use a read-only connection. + + +#### count(model_class, conditions=None) + + +Counts the number of records in the model's table. + +- `model_class`: the model to count. +- `conditions`: optional SQL conditions (contents of the WHERE clause). + + +#### create_database() + + +Creates the database on the ClickHouse server if it does not already exist. + + +#### create_table(model_class) + + +Creates a table for the given model class, if it does not exist already. + + +#### drop_database() + + +Deletes the database on the ClickHouse server. + + +#### drop_table(model_class) + + +Drops the database table of the given model class, if it exists. + + +#### insert(model_instances, batch_size=1000) + + +Insert records into the database. + +- `model_instances`: any iterable containing instances of a single model class. +- `batch_size`: number of records to send per chunk (use a lower number if your records are very large). + + +#### migrate(migrations_package_name, up_to=9999) + + +Executes schema migrations. + +- `migrations_package_name` - fully qualified name of the Python package + containing the migrations. +- `up_to` - number of the last migration to apply. + + +#### paginate(model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None) + + +Selects records and returns a single page of model instances. + +- `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. +- `order_by`: columns to use for sorting the query (contents of the ORDER BY clause). +- `page_num`: the page number (1-based), or -1 to get the last page. +- `page_size`: number of records to return per page. +- `conditions`: optional SQL conditions (contents of the WHERE clause). +- `settings`: query settings to send as HTTP GET parameters + +The result is a namedtuple containing `objects` (list), `number_of_objects`, +`pages_total`, `number` (of the current page), and `page_size`. + + +#### raw(query, settings=None, stream=False) + + +Performs a query and returns its output as text. + +- `query`: the SQL query to execute. +- `settings`: query settings to send as HTTP GET parameters +- `stream`: if true, the HTTP response from ClickHouse will be streamed. + + +#### select(query, model_class=None, settings=None) + + +Performs a query and returns a generator of model instances. + +- `query`: the SQL query to execute. +- `model_class`: the model class matching the query's table, + or `None` for getting back instances of an ad-hoc model. +- `settings`: query settings to send as HTTP GET parameters + + +### DatabaseException + +Extends Exception + + +Raised when a database operation fails. + +infi.clickhouse_orm.models +-------------------------- + +### Model + + +A base class for ORM models. Each model class represent a ClickHouse table. For example: + + class CPUStats(Model): + timestamp = DateTimeField() + cpu_id = UInt16Field() + cpu_percent = Float32Field() + engine = Memory() + +#### Model(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### Model.create_table_sql(db_name) + + +Returns the SQL command for creating a table for this model. + + +#### Model.drop_table_sql(db_name) + + +Returns the SQL command for deleting this model's table. + + +#### Model.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### Model.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### Model.table_name() + + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +### BufferModel + +Extends Model + +#### BufferModel(**kwargs) + + +Creates a model instance, using keyword arguments as field values. +Since values are immediately converted to their Pythonic type, +invalid values will cause a `ValueError` to be raised. +Unrecognized field names will cause an `AttributeError`. + + +#### BufferModel.create_table_sql(db_name) + + +Returns the SQL command for creating a table for this model. + + +#### BufferModel.drop_table_sql(db_name) + + +Returns the SQL command for deleting this model's table. + + +#### BufferModel.from_tsv(line, field_names=None, timezone_in_use=UTC, database=None) + + +Create a model instance from a tab-separated line. The line may or may not include a newline. +The `field_names` list must match the fields defined in the model, but does not have to include all of them. +If omitted, it is assumed to be the names of all fields in the model, in order of definition. + +- `line`: the TSV-formatted data. +- `field_names`: names of the model fields in the data. +- `timezone_in_use`: the timezone to use when parsing dates and datetimes. +- `database`: if given, sets the database that this instance belongs to. + + +#### get_database() + + +Gets the `Database` that this model instance belongs to. +Returns `None` unless the instance was read from the database or written to it. + + +#### get_field(name) + + +Gets a `Field` instance given its name, or `None` if not found. + + +#### BufferModel.objects_in(database) + + +Returns a `QuerySet` for selecting instances of this model class. + + +#### set_database(db) + + +Sets the `Database` that this model instance belongs to. +This is done automatically when the instance is read from the database or written to it. + + +#### BufferModel.table_name() + + +Returns the model's database table name. By default this is the +class name converted to lowercase. Override this if you want to use +a different table name. + + +#### to_dict(include_readonly=True, field_names=None) + + +Returns the instance's column values as a dict. + +- `include_readonly`: if false, returns only fields that can be inserted into database. +- `field_names`: an iterable of field names to return (optional) + + +#### to_tsv(include_readonly=True) + + +Returns the instance's column values as a tab-separated line. A newline is not included. + +- `include_readonly`: if false, returns only fields that can be inserted into database. + + +infi.clickhouse_orm.fields +-------------------------- + +### Field + + +Abstract base class for all field types. + +#### Field(default=None, alias=None, materialized=None) + + +### StringField + +Extends Field + +#### StringField(default=None, alias=None, materialized=None) + + +### DateField + +Extends Field + +#### DateField(default=None, alias=None, materialized=None) + + +### DateTimeField + +Extends Field + +#### DateTimeField(default=None, alias=None, materialized=None) + + +### BaseIntField + +Extends Field + + +Abstract base class for all integer-type fields. + +#### BaseIntField(default=None, alias=None, materialized=None) + + +### BaseFloatField + +Extends Field + + +Abstract base class for all float-type fields. + +#### BaseFloatField(default=None, alias=None, materialized=None) + + +### BaseEnumField + +Extends Field + + +Abstract base class for all enum-type fields. + +#### BaseEnumField(enum_cls, default=None, alias=None, materialized=None) + + +### ArrayField + +Extends Field + +#### ArrayField(inner_field, default=None, alias=None, materialized=None) + + +### FixedStringField + +Extends StringField + +#### FixedStringField(length, default=None, alias=None, materialized=None) + + +### UInt8Field + +Extends BaseIntField + +#### UInt8Field(default=None, alias=None, materialized=None) + + +### UInt16Field + +Extends BaseIntField + +#### UInt16Field(default=None, alias=None, materialized=None) + + +### UInt32Field + +Extends BaseIntField + +#### UInt32Field(default=None, alias=None, materialized=None) + + +### UInt64Field + +Extends BaseIntField + +#### UInt64Field(default=None, alias=None, materialized=None) + + +### Int8Field + +Extends BaseIntField + +#### Int8Field(default=None, alias=None, materialized=None) + + +### Int16Field + +Extends BaseIntField + +#### Int16Field(default=None, alias=None, materialized=None) + + +### Int32Field + +Extends BaseIntField + +#### Int32Field(default=None, alias=None, materialized=None) + + +### Int64Field + +Extends BaseIntField + +#### Int64Field(default=None, alias=None, materialized=None) + + +### Float32Field + +Extends BaseFloatField + +#### Float32Field(default=None, alias=None, materialized=None) + + +### Float64Field + +Extends BaseFloatField + +#### Float64Field(default=None, alias=None, materialized=None) + + +### Enum8Field + +Extends BaseEnumField + +#### Enum8Field(enum_cls, default=None, alias=None, materialized=None) + + +### Enum16Field + +Extends BaseEnumField + +#### Enum16Field(enum_cls, default=None, alias=None, materialized=None) + + +infi.clickhouse_orm.engines +--------------------------- + +### Engine + +### TinyLog + +Extends Engine + +### Log + +Extends Engine + +### Memory + +Extends Engine + +### MergeTree + +Extends Engine + +#### MergeTree(date_col, key_cols, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### Buffer + +Extends Engine + + +Buffers the data to write in RAM, periodically flushing it to another table. +Must be used in conjuction with a `BufferModel`. +Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). + +#### Buffer(main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000) + + +### CollapsingMergeTree + +Extends MergeTree + +#### CollapsingMergeTree(date_col, key_cols, sign_col, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### SummingMergeTree + +Extends MergeTree + +#### SummingMergeTree(date_col, key_cols, summing_cols=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +### ReplacingMergeTree + +Extends MergeTree + +#### ReplacingMergeTree(date_col, key_cols, ver_col=None, sampling_expr=None, index_granularity=8192, replica_table_path=None, replica_name=None) + + +infi.clickhouse_orm.query +------------------------- + +### QuerySet + + +A queryset is an object that represents a database query using a specific `Model`. +It is lazy, meaning that it does not hit the database until you iterate over its +matching rows (model instances). + +#### QuerySet(model_cls, database) + + +Initializer. It is possible to create a queryset like this, but the standard +way is to use `MyModel.objects_in(database)`. + + +#### as_sql() + + +Returns the whole query as a SQL string. + + +#### conditions_as_sql() + + +Returns the contents of the query's `WHERE` clause as a string. + + +#### count() + + +Returns the number of matching model instances. + + +#### exclude(**kwargs) + + +Returns a new `QuerySet` instance that excludes all rows matching the conditions. + + +#### filter(**kwargs) + + +Returns a new `QuerySet` instance that includes only rows matching the conditions. + + +#### only(*field_names) + + +Returns a new `QuerySet` instance limited to the specified field names. +Useful when there are large fields that are not needed, +or for creating a subquery to use with an IN operator. + + +#### order_by(*field_names) + + +Returns a new `QuerySet` instance with the ordering changed. + + +#### order_by_as_sql() + + +Returns the contents of the query's `ORDER BY` clause as a string. + + diff --git a/docs/toc.md b/docs/toc.md index fa9dd70..ae05203 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -42,45 +42,45 @@ * [Contributing](contributing.md#contributing) - * [Class Reference](ref.md#class-reference) - * [infi.clickhouse_orm.database](ref.md#infi.clickhouse_orm.database) - * [Database](ref.md#database) - * [DatabaseException](ref.md#databaseexception) - * [infi.clickhouse_orm.models](ref.md#infi.clickhouse_orm.models) - * [Model](ref.md#model) - * [BufferModel](ref.md#buffermodel) - * [infi.clickhouse_orm.fields](ref.md#infi.clickhouse_orm.fields) - * [Field](ref.md#field) - * [StringField](ref.md#stringfield) - * [DateField](ref.md#datefield) - * [DateTimeField](ref.md#datetimefield) - * [BaseIntField](ref.md#baseintfield) - * [BaseFloatField](ref.md#basefloatfield) - * [BaseEnumField](ref.md#baseenumfield) - * [ArrayField](ref.md#arrayfield) - * [FixedStringField](ref.md#fixedstringfield) - * [UInt8Field](ref.md#uint8field) - * [UInt16Field](ref.md#uint16field) - * [UInt32Field](ref.md#uint32field) - * [UInt64Field](ref.md#uint64field) - * [Int8Field](ref.md#int8field) - * [Int16Field](ref.md#int16field) - * [Int32Field](ref.md#int32field) - * [Int64Field](ref.md#int64field) - * [Float32Field](ref.md#float32field) - * [Float64Field](ref.md#float64field) - * [Enum8Field](ref.md#enum8field) - * [Enum16Field](ref.md#enum16field) - * [infi.clickhouse_orm.engines](ref.md#infi.clickhouse_orm.engines) - * [Engine](ref.md#engine) - * [TinyLog](ref.md#tinylog) - * [Log](ref.md#log) - * [Memory](ref.md#memory) - * [MergeTree](ref.md#mergetree) - * [Buffer](ref.md#buffer) - * [CollapsingMergeTree](ref.md#collapsingmergetree) - * [SummingMergeTree](ref.md#summingmergetree) - * [ReplacingMergeTree](ref.md#replacingmergetree) - * [infi.clickhouse_orm.query](ref.md#infi.clickhouse_orm.query) - * [QuerySet](ref.md#queryset) + * [Class Reference](class_reference.md#class-reference) + * [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database) + * [Database](class_reference.md#database) + * [DatabaseException](class_reference.md#databaseexception) + * [infi.clickhouse_orm.models](class_reference.md#infi.clickhouse_orm.models) + * [Model](class_reference.md#model) + * [BufferModel](class_reference.md#buffermodel) + * [infi.clickhouse_orm.fields](class_reference.md#infi.clickhouse_orm.fields) + * [Field](class_reference.md#field) + * [StringField](class_reference.md#stringfield) + * [DateField](class_reference.md#datefield) + * [DateTimeField](class_reference.md#datetimefield) + * [BaseIntField](class_reference.md#baseintfield) + * [BaseFloatField](class_reference.md#basefloatfield) + * [BaseEnumField](class_reference.md#baseenumfield) + * [ArrayField](class_reference.md#arrayfield) + * [FixedStringField](class_reference.md#fixedstringfield) + * [UInt8Field](class_reference.md#uint8field) + * [UInt16Field](class_reference.md#uint16field) + * [UInt32Field](class_reference.md#uint32field) + * [UInt64Field](class_reference.md#uint64field) + * [Int8Field](class_reference.md#int8field) + * [Int16Field](class_reference.md#int16field) + * [Int32Field](class_reference.md#int32field) + * [Int64Field](class_reference.md#int64field) + * [Float32Field](class_reference.md#float32field) + * [Float64Field](class_reference.md#float64field) + * [Enum8Field](class_reference.md#enum8field) + * [Enum16Field](class_reference.md#enum16field) + * [infi.clickhouse_orm.engines](class_reference.md#infi.clickhouse_orm.engines) + * [Engine](class_reference.md#engine) + * [TinyLog](class_reference.md#tinylog) + * [Log](class_reference.md#log) + * [Memory](class_reference.md#memory) + * [MergeTree](class_reference.md#mergetree) + * [Buffer](class_reference.md#buffer) + * [CollapsingMergeTree](class_reference.md#collapsingmergetree) + * [SummingMergeTree](class_reference.md#summingmergetree) + * [ReplacingMergeTree](class_reference.md#replacingmergetree) + * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) + * [QuerySet](class_reference.md#queryset) diff --git a/scripts/README.md b/scripts/README.md index 5782dc9..aaf2f27 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,6 +4,7 @@ generate_toc ------------ Generates the table of contents (toc.md). Requires Pandoc. Usage: + cd docs ../scripts/generate_toc.sh @@ -22,6 +23,29 @@ Usage: ../scripts/docs2html.sh +generate_ref +------------ +Generates a class reference. +Usage: + + cd docs + ../bin/python ../scripts/generate_ref.py > class_reference.md + + +generate_all +------------ +Does everything: + + - Generates the class reference using generate_ref + - Generates the table of contents using generate_toc + - Converts to HTML for visual inspection using docs2html + +Usage: + + cd docs + ../scripts/generate_all.sh + + test_python3 ------------ Creates a Python 3 virtualenv, clones the project into it, and runs the tests. diff --git a/scripts/generate_all.sh b/scripts/generate_all.sh new file mode 100755 index 0000000..eabf65c --- /dev/null +++ b/scripts/generate_all.sh @@ -0,0 +1,8 @@ +# Class reference +../bin/python ../scripts/generate_ref.py > class_reference.md + +# Table of contents +../scripts/generate_toc.sh + +# Convert to HTML for visual inspection +../scripts/docs2html.sh diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index d2d731c..8d11249 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -63,10 +63,14 @@ def get_method_sig(method): def docstring(obj): - doc = (obj.__doc__ or '').strip() + doc = (obj.__doc__ or '').rstrip() if doc: - for line in doc.split('\n'): - print line.strip() + lines = doc.split('\n') + # Find the length of the whitespace prefix common to all non-empty lines + indentation = min(len(line) - len(line.lstrip()) for line in lines if line.strip()) + # Output the lines without the indentation + for line in lines: + print line[indentation:] print diff --git a/scripts/generate_toc.sh b/scripts/generate_toc.sh index 32ca599..7ed82ce 100755 --- a/scripts/generate_toc.sh +++ b/scripts/generate_toc.sh @@ -14,4 +14,4 @@ generate_one "table_engines.md" generate_one "schema_migrations.md" generate_one "system_models.md" generate_one "contributing.md" -generate_one "ref.md" +generate_one "class_reference.md" diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 3ae9535..dd5c004 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -23,6 +23,10 @@ class DatabaseException(Exception): class Database(object): + ''' + Database instances connect to a specific ClickHouse database for running queries, + inserting data and other operations. + ''' def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False): ''' diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index 7a011af..9db37da 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -98,8 +98,10 @@ class ReplacingMergeTree(MergeTree): class Buffer(Engine): - """Here we define Buffer engine - Read more here https://clickhouse.yandex/reference_en.html#Buffer + """ + Buffers the data to write in RAM, periodically flushing it to another table. + Must be used in conjuction with a `BufferModel`. + Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). """ #Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index a06f515..1938103 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -71,7 +71,13 @@ class ModelBase(type): class Model(with_metaclass(ModelBase)): ''' - A base class for ORM models. + A base class for ORM models. Each model class represent a ClickHouse table. For example: + + class CPUStats(Model): + timestamp = DateTimeField() + cpu_id = UInt16Field() + cpu_percent = Float32Field() + engine = Memory() ''' engine = None diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index f79c2eb..95c4ea7 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -152,8 +152,17 @@ class Q(object): class QuerySet(object): + """ + A queryset is an object that represents a database query using a specific `Model`. + It is lazy, meaning that it does not hit the database until you iterate over its + matching rows (model instances). + """ def __init__(self, model_cls, database): + """ + Initializer. It is possible to create a queryset like this, but the standard + way is to use `MyModel.objects_in(database)`. + """ self._model_cls = model_cls self._database = database self._order_by = [f[0] for f in model_cls._fields] @@ -168,7 +177,7 @@ class QuerySet(object): def __bool__(self): """ - Return true if this queryset matches any rows. + Returns true if this queryset matches any rows. """ return bool(self.count()) @@ -180,7 +189,7 @@ class QuerySet(object): def as_sql(self): """ - Return the whole queryset as SQL. + Returns the whole query as a SQL string. """ fields = '*' if self._fields: @@ -190,7 +199,7 @@ class QuerySet(object): def order_by_as_sql(self): """ - Return the contents of the queryset's ORDER BY clause. + Returns the contents of the query's `ORDER BY` clause as a string. """ return ', '.join([ '%s DESC' % field[1:] if field[0] == '-' else field @@ -199,7 +208,7 @@ class QuerySet(object): def conditions_as_sql(self): """ - Return the contents of the queryset's WHERE clause. + Returns the contents of the query's `WHERE` clause as a string. """ if self._q: return ' AND '.join([q.to_sql(self._model_cls) for q in self._q]) @@ -214,7 +223,7 @@ class QuerySet(object): def order_by(self, *field_names): """ - Returns a new QuerySet instance with the ordering changed. + Returns a new `QuerySet` instance with the ordering changed. """ qs = copy(self) qs._order_by = field_names @@ -222,7 +231,7 @@ class QuerySet(object): def only(self, *field_names): """ - Limit the query to return only the specified field names. + Returns a new `QuerySet` instance limited to the specified field names. Useful when there are large fields that are not needed, or for creating a subquery to use with an IN operator. """ @@ -232,7 +241,7 @@ class QuerySet(object): def filter(self, **kwargs): """ - Returns a new QuerySet instance that includes only rows matching the conditions. + Returns a new `QuerySet` instance that includes only rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [Q(**kwargs)] @@ -240,7 +249,7 @@ class QuerySet(object): def exclude(self, **kwargs): """ - Returns a new QuerySet instance that excludes all rows matching the conditions. + Returns a new `QuerySet` instance that excludes all rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [~Q(**kwargs)] From 6301ab468ec8e9514a44d6567dea66f4d8628ebf Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 5 May 2017 15:39:01 +0300 Subject: [PATCH 20/53] fix queryset problem with non-ascii chars --- src/infi/clickhouse_orm/database.py | 2 +- src/infi/clickhouse_orm/query.py | 8 ++++---- tests/test_querysets.py | 7 +++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index dd5c004..6af6494 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -234,7 +234,7 @@ class Database(object): return set(obj.module_name for obj in self.select(query)) def _send(self, data, settings=None, stream=False): - if PY3 and isinstance(data, string_types): + if isinstance(data, string_types): data = data.encode('utf-8') params = self._build_params(settings) r = requests.post(self.db_url, params=params, data=data, stream=stream) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 95c4ea7..465e500 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -195,13 +195,13 @@ class QuerySet(object): if self._fields: fields = ', '.join('`%s`' % field for field in self._fields) params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), self.order_by_as_sql()) - return 'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params + return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params def order_by_as_sql(self): """ Returns the contents of the query's `ORDER BY` clause as a string. """ - return ', '.join([ + return u', '.join([ '%s DESC' % field[1:] if field[0] == '-' else field for field in self._order_by ]) @@ -211,9 +211,9 @@ class QuerySet(object): Returns the contents of the query's `WHERE` clause as a string. """ if self._q: - return ' AND '.join([q.to_sql(self._model_cls) for q in self._q]) + return u' AND '.join([q.to_sql(self._model_cls) for q in self._q]) else: - return '1' + return u'1' def count(self): """ diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 3dd0889..27e6c5c 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -55,6 +55,13 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs.filter(first_name__iendswith='ia'), 3) # case insensitive self._test_qs(qs.filter(first_name__iendswith=''), 100) # empty suffix + def test_filter_unicode_string(self): + self.database.insert([ + Person(first_name=u'דונלד', last_name=u'דאק') + ]) + qs = Person.objects_in(self.database) + self._test_qs(qs.filter(first_name=u'דונלד'), 1) + def test_filter_float_field(self): qs = Person.objects_in(self.database) self._test_qs(qs.filter(height__gt=2), 0) From a79e9f97ee4cca35b4f479d14912a23019649b48 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 11 May 2017 05:30:17 +0300 Subject: [PATCH 21/53] Documentation --- CHANGELOG.rst => CHANGELOG.md | 8 +++++ MIGRATIONS.rst | 65 ----------------------------------- docs/contributing.md | 17 ++++++++- docs/schema_migrations.md | 6 ++-- docs/toc.md | 2 ++ 5 files changed, 29 insertions(+), 69 deletions(-) rename CHANGELOG.rst => CHANGELOG.md (81%) delete mode 100644 MIGRATIONS.rst diff --git a/CHANGELOG.rst b/CHANGELOG.md similarity index 81% rename from CHANGELOG.rst rename to CHANGELOG.md index e933d06..e74a7e6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ Change Log ========== +Unreleased +---------- +- Major new feature: building model queries using QuerySets +- Refactor and expand the documentation +- Add support for FixedString fields +- Add support for more engine types: TinyLog, Log, Memory +- Bug fix: Do not send readonly=1 when connection is already in readonly mode + v0.8.2 ------ - Fix broken Python 3 support (M1hacka) diff --git a/MIGRATIONS.rst b/MIGRATIONS.rst deleted file mode 100644 index 0f0d5e2..0000000 --- a/MIGRATIONS.rst +++ /dev/null @@ -1,65 +0,0 @@ -Migrations -========== - -Over time, the ORM models in your application may change. Migrations provide a way to modify the database -tables according to the changes in your models, without writing raw SQL. - -The migrations that were applied to the database are recorded in the ``infi_clickhouse_orm_migrations`` table, -so migrating the database will only apply any missing migrations. - -Writing Migrations ------------------- - -To write migrations, create a Python package. Then create a python file for the initial migration. The migration -files must begin with a four-digit number, and will be applied in sequence. For example:: - - analytics - | - +-- analytics_migrations - | - +-- __init__.py - | - +-- 0001_initial.py - | - +-- 0002_add_user_agents_table.py - -Each migration file is expected to contain a list of ``operations``, for example:: - - from infi.clickhouse_orm import migrations - from analytics import models - - operations = [ - migrations.CreateTable(models.Visits), - migrations.CreateTable(models.Visitors) - ] - -The following operations are supported: - -**CreateTable** - -A migration operation that creates a table for a given model class. - -**DropTable** - -A migration operation that drops the table of a given model class. - -**AlterTable** - -A migration operation that compares the table of a given model class to -the model's fields, and alters the table to match the model. The operation can: - -- add new columns -- drop obsolete columns -- modify column types - -Default values are not altered by this operation. - -Running Migrations ------------------- - -To migrate a database, create a ``Database`` instance and call its ``migrate`` method with the package -name containing your migrations:: - - Database('analytics_db').migrate('analytics.analytics_migrations') - -Note that you may have more than one migrations package. \ No newline at end of file diff --git a/docs/contributing.md b/docs/contributing.md index 6268733..53af669 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,12 +1,27 @@ Contributing ============ +This project is hosted on GitHub - [https://github.com/Infinidat/infi.clickhouse_orm/](https://github.com/Infinidat/infi.clickhouse_orm/). + +Please open an issue there if you encounter a bug or want to request a feature. +Pull requests are also welcome. + +Building +-------- + After cloning the project, run the following commands: easy_install -U infi.projector cd infi.clickhouse_orm projector devenv build +A `setup.py` file will be generate, which you can use to install the development version of the package: + + python setup.py install + +Tests +----- + To run the tests, ensure that the ClickHouse server is running on (this is the default), and run: bin/nosetests @@ -18,4 +33,4 @@ To see test coverage information run: --- -[<< System Models](system_models.md) | [Table of Contents](toc.md) \ No newline at end of file +[<< System Models](system_models.md) | [Table of Contents](toc.md) | [Class Reference >>](class_reference.md) \ No newline at end of file diff --git a/docs/schema_migrations.md b/docs/schema_migrations.md index 2d67f8d..e4647bf 100644 --- a/docs/schema_migrations.md +++ b/docs/schema_migrations.md @@ -8,7 +8,7 @@ The migrations that were applied to the database are recorded in the `infi_click Writing Migrations ------------------ -To write migrations, create a Python package. Then create a python file for the initial migration. The migration files must begin with a four-digit number, and will be applied in sequence. For example:: +To write migrations, create a Python package. Then create a python file for the initial migration. The migration files must begin with a four-digit number, and will be applied in sequence. For example: analytics | @@ -20,7 +20,7 @@ To write migrations, create a Python package. Then create a python file for the | +-- 0002_add_user_agents_table.py -Each migration file is expected to contain a list of `operations`, for example:: +Each migration file is expected to contain a list of `operations`, for example: from infi.clickhouse_orm import migrations from analytics import models @@ -53,7 +53,7 @@ Default values are not altered by this operation. Running Migrations ------------------ -To migrate a database, create a `Database` instance and call its `migrate` method with the package name containing your migrations:: +To migrate a database, create a `Database` instance and call its `migrate` method with the package name containing your migrations: Database('analytics_db').migrate('analytics.analytics_migrations') diff --git a/docs/toc.md b/docs/toc.md index ae05203..4167e64 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -41,6 +41,8 @@ * [Partitions and Parts](system_models.md#partitions-and-parts) * [Contributing](contributing.md#contributing) + * [Building](contributing.md#building) + * [Tests](contributing.md#tests) * [Class Reference](class_reference.md#class-reference) * [infi.clickhouse_orm.database](class_reference.md#infi.clickhouse_orm.database) From 5735ca301a7e3c941b290b390ce842c429a3e3cb Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 11 May 2017 05:34:16 +0300 Subject: [PATCH 22/53] Fix typo --- docs/contributing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing.md b/docs/contributing.md index 53af669..cb64e57 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -15,7 +15,7 @@ After cloning the project, run the following commands: cd infi.clickhouse_orm projector devenv build -A `setup.py` file will be generate, which you can use to install the development version of the package: +A `setup.py` file will be generated, which you can use to install the development version of the package: python setup.py install From ac8a843eee6ceb9cb4dec1ca21cc46313afef6aa Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 11 May 2017 05:46:42 +0300 Subject: [PATCH 23/53] Releasing v0.9.0 --- CHANGELOG.md | 4 ++-- docs/index.md | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e74a7e6..8da6ee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v0.9.0 +------ - Major new feature: building model queries using QuerySets - Refactor and expand the documentation - Add support for FixedString fields diff --git a/docs/index.md b/docs/index.md index 9b5199e..d6df2c1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,6 +3,8 @@ Overview This project is simple ORM for working with the [ClickHouse database](https://clickhouse.yandex/). It allows you to define model classes whose instances can be written to the database and read from it. +It was tested on Python 2.7 and 3.5. + Installation ------------ From 1ff82a57e157a6aebd59c30fc5a65bcedf44ab72 Mon Sep 17 00:00:00 2001 From: Marsel Date: Sun, 14 May 2017 23:11:58 +0300 Subject: [PATCH 24/53] Fix "NameError: name 'unicode' is not defined" in python3 --- tests/test_readonly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_readonly.py b/tests/test_readonly.py index 371fdcb..a192b62 100644 --- a/tests/test_readonly.py +++ b/tests/test_readonly.py @@ -2,6 +2,7 @@ import unittest +import six from infi.clickhouse_orm.database import Database, DatabaseException from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.fields import * @@ -25,7 +26,7 @@ class ReadonlyTestCase(TestCaseWithData): with self.assertRaises(DatabaseException): self.database.drop_database() except DatabaseException as e: - if 'Unknown user' in unicode(e): + if 'Unknown user' in six.text_type(e): raise unittest.SkipTest('Database user "%s" is not defined' % username) else: raise @@ -56,4 +57,3 @@ class ReadOnlyModel(Model): readonly = True name = StringField() - From e6dba1f89f2bc4451bb53b11c271dd99a9f37026 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 15 May 2017 08:35:29 +0300 Subject: [PATCH 25/53] TRIVIAL --- scripts/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index aaf2f27..b666b6c 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -35,10 +35,9 @@ Usage: generate_all ------------ Does everything: - - - Generates the class reference using generate_ref - - Generates the table of contents using generate_toc - - Converts to HTML for visual inspection using docs2html +- Generates the class reference using generate_ref +- Generates the table of contents using generate_toc +- Converts to HTML for visual inspection using docs2html Usage: From c388f543d22bd015e12c035971bcf42453340f42 Mon Sep 17 00:00:00 2001 From: Marsel Date: Mon, 15 May 2017 13:11:20 +0300 Subject: [PATCH 26/53] ipython<6: closes #32 --- buildout.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildout.cfg b/buildout.cfg index 0078704..a8c2fec 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -43,7 +43,7 @@ output = ${project:version_file} dependent-scripts = true recipe = infi.recipe.console_scripts eggs = ${project:name} - ipython + ipython<6 nose coverage enum34 From fcb8196d3db5bc9eb5ada835acdddeb0c4b31774 Mon Sep 17 00:00:00 2001 From: Ivan Ladelshchikov Date: Tue, 6 Jun 2017 20:00:15 +0500 Subject: [PATCH 27/53] fix unicode params for Py2 --- src/infi/clickhouse_orm/fields.py | 4 ++-- src/infi/clickhouse_orm/system_models.py | 4 +++- tests/test_alias_fields.py | 2 +- tests/test_materialized_fields.py | 2 +- tests/test_system_models.py | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index a136601..1d2d59a 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -18,9 +18,9 @@ class Field(object): def __init__(self, default=None, alias=None, materialized=None): assert (None, None) in {(default, alias), (alias, materialized), (default, materialized)}, \ "Only one of default, alias and materialized parameters can be given" - assert alias is None or isinstance(alias, str) and alias != "",\ + assert alias is None or isinstance(alias, string_types) and alias != "",\ "Alias field must be string field name, if given" - assert materialized is None or isinstance(materialized, str) and alias != "",\ + assert materialized is None or isinstance(materialized, string_types) and alias != "",\ "Materialized field must be string, if given" self.creation_counter = Field.creation_counter diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index 8c66550..c151302 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -2,6 +2,8 @@ This file contains system readonly models that can be got from database https://clickhouse.yandex/reference_en.html#System tables """ +from six import string_types + from .database import Database from .fields import * from .models import Model @@ -115,7 +117,7 @@ class SystemPart(Model): :return: A list of SystemPart objects """ assert isinstance(database, Database), "database must be database.Database class instance" - assert isinstance(conditions, str), "conditions must be a string" + assert isinstance(conditions, string_types), "conditions must be a string" if conditions: conditions += " AND" field_names = ','.join([f[0] for f in cls._fields]) diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index af7bbc8..e8d896f 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -60,7 +60,7 @@ class ModelWithAliasFields(Model): date_field = DateField() str_field = StringField() - alias_str = StringField(alias='str_field') + alias_str = StringField(alias=u'str_field') alias_int = Int32Field(alias='int_field') alias_date = DateField(alias='date_field') diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py index 3151dc3..f877116 100644 --- a/tests/test_materialized_fields.py +++ b/tests/test_materialized_fields.py @@ -62,7 +62,7 @@ class ModelWithMaterializedFields(Model): mat_str = StringField(materialized='lower(str_field)') mat_int = Int32Field(materialized='abs(int_field)') - mat_date = DateField(materialized='toDate(date_time_field)') + mat_date = DateField(materialized=u'toDate(date_time_field)') engine = MergeTree('mat_date', ('mat_date',)) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index 544713b..3e48b0c 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -40,7 +40,7 @@ class SystemPartTest(unittest.TestCase): def test_get_conditions(self): parts = list(SystemPart.get(self.database, conditions="table='testtable'")) self.assertEqual(len(parts), 1) - parts = list(SystemPart.get(self.database, conditions="table='othertable'")) + parts = list(SystemPart.get(self.database, conditions=u"table='othertable'")) self.assertEqual(len(parts), 0) def test_attach_detach(self): From d02d6b14eb4e3237c38d7a14fcb48293d9ce0876 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 15 Jun 2017 11:19:56 +0300 Subject: [PATCH 28/53] - Added `ne` and `not_in` queryset operators - Querysets no longer have a default order when `order_by` is not called - Added `autocreate` flag to database initializer - Fix for SELECT FROM JOIN (#37) --- CHANGELOG.md | 9 ++++++ docs/class_reference.md | 3 +- docs/models_and_databases.md | 8 ++---- docs/querysets.md | 6 +++- scripts/docs2html.sh | 3 ++ src/infi/clickhouse_orm/database.py | 11 ++++++-- src/infi/clickhouse_orm/models.py | 4 +-- src/infi/clickhouse_orm/query.py | 22 +++++++++++++-- tests/test_database.py | 11 +++++++- tests/test_join.py | 44 +++++++++++++++++++++++++++++ tests/test_querysets.py | 8 ++++++ 11 files changed, 114 insertions(+), 15 deletions(-) create mode 100644 tests/test_join.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8da6ee6..d48f361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ Change Log ========== +Unreleased +---------- +- Added `ne` and `not_in` queryset operators +- Querysets no longer have a default order unless `order_by` is called +- Added `autocreate` flag to database initializer +- Fix some Python 2/3 incompatibilities (TvoroG, tsionyx) +- To work around a JOIN bug in ClickHouse, `$table` now inserts only the table name, + and the database name is sent in the query params instead + v0.9.0 ------ - Major new feature: building model queries using QuerySets diff --git a/docs/class_reference.md b/docs/class_reference.md index 0f4f57f..0c53eb3 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -10,7 +10,7 @@ infi.clickhouse_orm.database Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations. -#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False) +#### Database(db_name, db_url="http://localhost:8123/", username=None, password=None, readonly=False, autocreate=True) Initializes a database instance. Unless it's readonly, the database will be @@ -21,6 +21,7 @@ created on the ClickHouse server if it does not already exist. - `username`: optional connection credentials. - `password`: optional connection credentials. - `readonly`: use a read-only connection. +- `autocreate`: automatically create the database if does not exist (unless in readonly mode). #### count(model_class, conditions=None) diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index 2879c32..ffeb54e 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -117,7 +117,7 @@ This is a very convenient feature that saves you the need to define a model for SQL Placeholders ---------------- -There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the database name plus table name (but is available only when the model is specified). +There are a couple of special placeholders that you can use inside the SQL to make it easier to write: `$db` and `$table`. The first one is replaced by the database name, and the second is replaced by the table name (but is available only when the model is specified). So instead of this: @@ -125,11 +125,9 @@ So instead of this: you can use: - db.select("SELECT * FROM $db.person", model_class=Person) + db.select("SELECT * FROM $db.$table", model_class=Person) -or even: - - db.select("SELECT * FROM $table", model_class=Person) +Note: normally it is not necessary to specify the database name, since it's already sent in the query parameters to ClickHouse. It is enough to specify the table name. Counting -------- diff --git a/docs/querysets.md b/docs/querysets.md index fd7e253..2990a77 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -31,11 +31,13 @@ There are different operators that can be used, by passing `__ value` | | | `gte` | `field >= value` | | | `lt` | `field < value` | | | `lte` | `field <= value` | | | `in` | `field IN (values)` | See below | +| `not_in` | `field NOT IN (values)` | See below | | `contains` | `field LIKE '%value%'` | For string fields only | | `startswith` | `field LIKE 'value%'` | For string fields only | | `endswith` | `field LIKE '%value'` | For string fields only | @@ -46,7 +48,7 @@ There are different operators that can be used, by passing `__')) register_operator('gte', SimpleOperator('>=')) register_operator('lt', SimpleOperator('<')) register_operator('lte', SimpleOperator('<=')) register_operator('in', InOperator()) +register_operator('not_in', NotOperator(InOperator())) register_operator('contains', LikeOperator('%{}%')) register_operator('startswith', LikeOperator('{}%')) register_operator('endswith', LikeOperator('%{}')) @@ -165,7 +180,7 @@ class QuerySet(object): """ self._model_cls = model_cls self._database = database - self._order_by = [f[0] for f in model_cls._fields] + self._order_by = [] self._q = [] self._fields = [] @@ -194,8 +209,9 @@ class QuerySet(object): fields = '*' if self._fields: fields = ', '.join('`%s`' % field for field in self._fields) - params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), self.order_by_as_sql()) - return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s\nORDER BY %s' % params + ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' + params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), ordering) + return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s%s' % params def order_by_as_sql(self): """ diff --git a/tests/test_database.py b/tests/test_database.py index 8f94230..fddf383 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -128,4 +128,13 @@ class DatabaseTestCase(TestCaseWithData): def test_invalid_user(self): with self.assertRaises(DatabaseException): - Database(self.database.db_name, username='default', password='wrong') \ No newline at end of file + Database(self.database.db_name, username='default', password='wrong') + + def test_nonexisting_db(self): + db = Database('db_not_here', autocreate=False) + with self.assertRaises(DatabaseException): + db.create_table(Person) + + def test_preexisting_db(self): + db = Database(self.database.db_name, autocreate=False) + db.count(Person) diff --git a/tests/test_join.py b/tests/test_join.py new file mode 100644 index 0000000..7f3e2df --- /dev/null +++ b/tests/test_join.py @@ -0,0 +1,44 @@ +from __future__ import unicode_literals, print_function + +import unittest +import json + +from infi.clickhouse_orm import database, engines, fields, models + + +class JoinTest(unittest.TestCase): + + def setUp(self): + self.database = database.Database('test-db') + self.database.create_table(Foo) + self.database.create_table(Bar) + self.database.insert([Foo(id=i) for i in range(3)]) + self.database.insert([Bar(id=i, b=i * i) for i in range(3)]) + + def print_res(self, query): + print(query) + print(json.dumps([row.to_dict() for row in self.database.select(query)])) + + def test_without_db_name(self): + self.print_res("SELECT * FROM {}".format(Foo.table_name())) + self.print_res("SELECT * FROM {}".format(Bar.table_name())) + self.print_res("SELECT b FROM {} ALL LEFT JOIN {} USING id".format(Foo.table_name(), Bar.table_name())) + + @unittest.skip('ClickHouse issue - https://github.com/yandex/ClickHouse/issues/635') + def test_with_db_name(self): + self.print_res("SELECT * FROM $db.{}".format(Foo.table_name())) + self.print_res("SELECT * FROM $db.{}".format(Bar.table_name())) + self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN $db.{} USING id".format(Foo.table_name(), Bar.table_name())) + + def test_with_subquery(self): + self.print_res("SELECT b FROM {} ALL LEFT JOIN (SELECT * from {}) USING id".format(Foo.table_name(), Bar.table_name())) + self.print_res("SELECT b FROM $db.{} ALL LEFT JOIN (SELECT * from $db.{}) USING id".format(Foo.table_name(), Bar.table_name())) + + +class Foo(models.Model): + id = fields.UInt8Field() + engine = engines.Memory() + + +class Bar(Foo): + b = fields.UInt8Field() diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 27e6c5c..50fd1ba 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -46,6 +46,7 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs.filter(first_name__in=('Connor', 'Courtney')), 3) # in tuple self._test_qs(qs.filter(first_name__in=['Connor', 'Courtney']), 3) # in list self._test_qs(qs.filter(first_name__in="'Connor', 'Courtney'"), 3) # in string + self._test_qs(qs.filter(first_name__not_in="'Connor', 'Courtney'"), 97) self._test_qs(qs.filter(first_name__contains='sh'), 3) # case sensitive self._test_qs(qs.filter(first_name__icontains='sh'), 6) # case insensitive self._test_qs(qs.filter(first_name__startswith='le'), 0) # case sensitive @@ -74,6 +75,8 @@ class QuerySetTestCase(TestCaseWithData): def test_filter_date_field(self): qs = Person.objects_in(self.database) self._test_qs(qs.filter(birthday='1970-12-02'), 1) + self._test_qs(qs.filter(birthday__eq='1970-12-02'), 1) + self._test_qs(qs.filter(birthday__ne='1970-12-02'), 99) self._test_qs(qs.filter(birthday=date(1970, 12, 2)), 1) self._test_qs(qs.filter(birthday__lte=date(1970, 12, 2)), 3) @@ -87,6 +90,8 @@ class QuerySetTestCase(TestCaseWithData): def test_order_by(self): qs = Person.objects_in(self.database) + self.assertFalse('ORDER BY' in qs.as_sql()) + self.assertFalse(qs.order_by_as_sql()) person = list(qs.order_by('first_name', 'last_name'))[0] self.assertEquals(person.first_name, 'Abdul') person = list(qs.order_by('-first_name', '-last_name'))[0] @@ -100,6 +105,7 @@ class QuerySetTestCase(TestCaseWithData): qs = Person.objects_in(self.database) self._test_qs(qs.filter(height__in='SELECT max(height) FROM $table'), 2) self._test_qs(qs.filter(first_name__in=qs.only('last_name')), 2) + self._test_qs(qs.filter(first_name__not_in=qs.only('last_name')), 98) def _insert_sample_model(self): self.database.create_table(SampleModel) @@ -125,6 +131,8 @@ class QuerySetTestCase(TestCaseWithData): self._insert_sample_model() qs = SampleModel.objects_in(self.database) self._test_qs(qs.filter(num=1), 1) + self._test_qs(qs.filter(num__eq=1), 1) + self._test_qs(qs.filter(num__ne=1), 3) self._test_qs(qs.filter(num__gt=1), 3) self._test_qs(qs.filter(num__gte=1), 4) self._test_qs(qs.filter(num__in=(1, 2, 3)), 3) From f2ac744bc75aea6f471b0230ce1e263eae3c61f8 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 15 Jun 2017 14:48:39 +0300 Subject: [PATCH 29/53] Releasing v0.9.1 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d48f361..ab47d82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v0.9.1 +------ - Added `ne` and `not_in` queryset operators - Querysets no longer have a default order unless `order_by` is called - Added `autocreate` flag to database initializer From b72772f1b91a4d46f3224dcd7404a9609e697768 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 15 Jun 2017 14:49:43 +0300 Subject: [PATCH 30/53] Oops, need to rename 0.9.1 to 0.9.2 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab47d82..24a1668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ Change Log ========== -v0.9.1 +v0.9.2 ------ - Added `ne` and `not_in` queryset operators - Querysets no longer have a default order unless `order_by` is called From 1b0ea035b80308bcfcc7be68eaa6d6c451b6f649 Mon Sep 17 00:00:00 2001 From: Dzianis Sivets <12rvt2008@gmail.com> Date: Fri, 16 Jun 2017 20:37:37 +0300 Subject: [PATCH 31/53] Nullable fields support --- src/infi/clickhouse_orm/fields.py | 27 +++++++ src/infi/clickhouse_orm/models.py | 4 + tests/test_nullable_fields.py | 124 ++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 tests/test_nullable_fields.py diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 1d2d59a..61f5e00 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -362,3 +362,30 @@ class ArrayField(Field): from .utils import escape return 'Array(%s)' % self.inner_field.get_sql(with_default=False) + +class NullableField(Field): + + class_default = None + + def __init__(self, inner_field, default=None, alias=None, materialized=None, + extra_null_values=set()): + self.inner_field = inner_field + self._extra_null_values = extra_null_values + super(NullableField, self).__init__(default, alias, materialized) + + def to_python(self, value, timezone_in_use): + if value == '\\N' or value is None: + return None + return self.inner_field.to_python(value, timezone_in_use) + + def validate(self, value): + value is None or self.inner_field.validate(value) + + def to_db_string(self, value, quote=True): + if value is None or value in self._extra_null_values: + return '\\N' + return self.inner_field.to_db_string(value, quote=quote) + + def get_sql(self, with_default=True): + from .utils import escape + return 'Nullable(%s)' % self.inner_field.get_sql(with_default=False) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index c3bf52d..b7f4fa5 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -62,6 +62,10 @@ class ModelBase(type): if db_type.startswith('FixedString'): length = int(db_type[12 : -1]) return orm_fields.FixedStringField(length) + # Nullable + if db_type.startswith('Nullable'): + inner_field = cls.create_ad_hoc_field(db_type[9 : -1]) + return orm_fields.NullableField(inner_field) # Simple fields name = db_type + 'Field' if not hasattr(orm_fields, name): diff --git a/tests/test_nullable_fields.py b/tests/test_nullable_fields.py new file mode 100644 index 0000000..54c96a3 --- /dev/null +++ b/tests/test_nullable_fields.py @@ -0,0 +1,124 @@ +import unittest +import pytz + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + +from datetime import date, datetime + + +class NullableFieldsTest(unittest.TestCase): + + def setUp(self): + self.database = Database('test-db') + self.database.create_table(ModelWithNullable) + + def tearDown(self): + self.database.drop_database() + + def test_nullable_datetime_field(self): + f = NullableField(DateTimeField()) + epoch = datetime(1970, 1, 1, tzinfo=pytz.utc) + # Valid values + for value in (date(1970, 1, 1), + datetime(1970, 1, 1), + epoch, + epoch.astimezone(pytz.timezone('US/Eastern')), + epoch.astimezone(pytz.timezone('Asia/Jerusalem')), + '1970-01-01 00:00:00', + '1970-01-17 00:00:17', + '0000-00-00 00:00:00', + 0, + '\\N'): + dt = f.to_python(value, pytz.utc) + if value == '\\N': + self.assertIsNone(dt) + else: + self.assertEquals(dt.tzinfo, pytz.utc) + # Verify that conversion to and from db string does not change value + dt2 = f.to_python(f.to_db_string(dt, quote=False), pytz.utc) + self.assertEquals(dt, dt2) + # Invalid values + for value in ('nope', '21/7/1999', 0.5): + with self.assertRaises(ValueError): + f.to_python(value, pytz.utc) + + def test_nullable_uint8_field(self): + f = NullableField(UInt8Field()) + # Valid values + for value in (17, '17', 17.0, '\\N'): + python_value = f.to_python(value, pytz.utc) + if value == '\\N': + self.assertIsNone(python_value) + self.assertEqual(value, f.to_db_string(python_value)) + else: + self.assertEquals(python_value, 17) + + # Invalid values + for value in ('nope', date.today()): + with self.assertRaises(ValueError): + f.to_python(value, pytz.utc) + + def test_nullable_string_field(self): + f = NullableField(StringField()) + # Valid values + for value in ('\\\\N', 'N', 'some text', '\\N'): + python_value = f.to_python(value, pytz.utc) + if value == '\\N': + self.assertIsNone(python_value) + self.assertEqual(value, f.to_db_string(python_value)) + else: + self.assertEquals(python_value, value) + + def _insert_sample_data(self): + dt = date(1970, 1, 1) + self.database.insert([ + ModelWithNullable(date_field='2016-08-30', + null_str='', null_int=42, null_date=dt), + ModelWithNullable(date_field='2016-08-30', + null_str='nothing', null_int=None, null_date=None), + ModelWithNullable(date_field='2016-08-31', + null_str=None, null_int=42, null_date=dt), + ModelWithNullable(date_field='2016-08-31', + null_str=None, null_int=None, null_date=None) + ]) + + def _assert_sample_data(self, results): + dt = date(1970, 1, 1) + self.assertEquals(len(results), 4) + self.assertIsNone(results[0].null_str) + self.assertEquals(results[0].null_int, 42) + self.assertEquals(results[0].null_date, dt) + self.assertIsNone(results[1].null_date) + self.assertEquals(results[1].null_str, 'nothing') + self.assertIsNone(results[1].null_date) + self.assertIsNone(results[2].null_str) + self.assertEquals(results[2].null_date, dt) + self.assertEquals(results[2].null_int, 42) + self.assertIsNone(results[3].null_int) + self.assertIsNone(results[3].null_str) + self.assertIsNone(results[3].null_date) + + def test_insert_and_select(self): + self._insert_sample_data() + query = 'SELECT * from $table ORDER BY date_field' + results = list(self.database.select(query, ModelWithNullable)) + self._assert_sample_data(results) + + def test_ad_hoc_model(self): + self._insert_sample_data() + query = 'SELECT * from $db.modelwithnullable ORDER BY date_field' + results = list(self.database.select(query)) + self._assert_sample_data(results) + + +class ModelWithNullable(Model): + + date_field = DateField() + null_str = NullableField(StringField(), extra_null_values={''}) + null_int = NullableField(Int32Field()) + null_date = NullableField(DateField()) + + engine = MergeTree('date_field', ('date_field',)) From 9b7e7a179e1bdfc7115efb4577f696d17d0859bf Mon Sep 17 00:00:00 2001 From: Dzianis Sivets <12rvt2008@gmail.com> Date: Fri, 16 Jun 2017 20:27:05 +0300 Subject: [PATCH 32/53] NullableField of ArrayField --- src/infi/clickhouse_orm/fields.py | 7 +++++-- tests/test_nullable_fields.py | 18 ++++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 61f5e00..6c06d44 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -368,9 +368,12 @@ class NullableField(Field): class_default = None def __init__(self, inner_field, default=None, alias=None, materialized=None, - extra_null_values=set()): + extra_null_values=None): self.inner_field = inner_field - self._extra_null_values = extra_null_values + if extra_null_values is None: + self._extra_null_values = list() + else: + self._extra_null_values = extra_null_values super(NullableField, self).__init__(default, alias, materialized) def to_python(self, value, timezone_in_use): diff --git a/tests/test_nullable_fields.py b/tests/test_nullable_fields.py index 54c96a3..7dcab92 100644 --- a/tests/test_nullable_fields.py +++ b/tests/test_nullable_fields.py @@ -76,13 +76,17 @@ class NullableFieldsTest(unittest.TestCase): dt = date(1970, 1, 1) self.database.insert([ ModelWithNullable(date_field='2016-08-30', - null_str='', null_int=42, null_date=dt), + null_str='', null_int=42, null_date=dt, + null_array=None), ModelWithNullable(date_field='2016-08-30', - null_str='nothing', null_int=None, null_date=None), + null_str='nothing', null_int=None, null_date=None, + null_array=[1, 2, 3]), ModelWithNullable(date_field='2016-08-31', - null_str=None, null_int=42, null_date=dt), + null_str=None, null_int=42, null_date=dt, + null_array=[]), ModelWithNullable(date_field='2016-08-31', - null_str=None, null_int=None, null_date=None) + null_str=None, null_int=None, null_date=None, + null_array=[3, 2, 1]) ]) def _assert_sample_data(self, results): @@ -101,6 +105,11 @@ class NullableFieldsTest(unittest.TestCase): self.assertIsNone(results[3].null_str) self.assertIsNone(results[3].null_date) + self.assertIsNone(results[0].null_array) + self.assertEquals(results[1].null_array, [1, 2, 3]) + self.assertEquals(results[2].null_array, []) + self.assertEquals(results[3].null_array, [3, 2, 1]) + def test_insert_and_select(self): self._insert_sample_data() query = 'SELECT * from $table ORDER BY date_field' @@ -120,5 +129,6 @@ class ModelWithNullable(Model): null_str = NullableField(StringField(), extra_null_values={''}) null_int = NullableField(Int32Field()) null_date = NullableField(DateField()) + null_array = NullableField(ArrayField(Int32Field())) engine = MergeTree('date_field', ('date_field',)) From 3a56041da6e73a100fb47d4898e838a83cd4cb81 Mon Sep 17 00:00:00 2001 From: Dzianis Sivets <12rvt2008@gmail.com> Date: Fri, 16 Jun 2017 20:27:25 +0300 Subject: [PATCH 33/53] documentation for NullableField --- docs/field_types.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/field_types.md b/docs/field_types.md index 8936ef1..fbf033b 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -22,6 +22,7 @@ Currently the following field types are supported: | Enum8Field | Enum8 | Enum | See below | Enum16Field | Enum16 | Enum | See below | ArrayField | Array | list | See below +| NullableField | Nullable | See below | See below DateTimeField and Time Zones ---------------------------- @@ -103,6 +104,30 @@ Usage: # created_date and username will contain a default value db.select('SELECT * FROM $db.event', model_class=Event) +Working with nullable fields +------------------------------------------ +From [some time](https://github.com/yandex/ClickHouse/pull/70) ClickHouse provides a NULL value support. +Also see some information [here](https://github.com/yandex/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00395_nullable.sql). + +You can create fields, that can contain any data type (except Enum) +or 'None' value, for example: + + class EventData(models.Model): + + date = fields.DateField() + comment = fields.NullableField(fields.StringField(), extra_null_values={''}) + score = fields.NullableField(fields.UInt8Field()) + serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) + + engine = engines.MergeTree('date', ('date',)) + + + score_event = EventData(date=date.today(), comment=None, score=5, serie=None) + comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None) + another_event = EventData(date=date.today(), comment='', score=None, serie=None) + action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3]) + +NOTE: ArrayField of NullableField yet not supported. --- From e7b31018eb609365f0859d1d7a9c6835922148c8 Mon Sep 17 00:00:00 2001 From: Arnon Yaari Date: Sun, 18 Jun 2017 12:35:08 +0300 Subject: [PATCH 34/53] HOSTDEV-2736 change license and add license file --- LICENSE | 26 ++++++++++++++++++++++++++ setup.in | 4 ++-- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ff7bfb4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2017 INFINIDAT + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/setup.in b/setup.in index 9b65942..e62522f 100644 --- a/setup.in +++ b/setup.in @@ -6,14 +6,14 @@ SETUP_INFO = dict( author_email = '${infi.recipe.template.version:author_email}', url = ${infi.recipe.template.version:homepage}, - license = 'PSF', + license = 'BSD', description = """${project:description}""", # http://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers = [ "Intended Audience :: Developers", "Intended Audience :: System Administrators", - "License :: OSI Approved :: Python Software Foundation License", + "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2.7", From 2c0ef08ee1e7d72a1a7a0e1aa021eb5b9aa4a1bd Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 23 Jun 2017 11:10:49 +0300 Subject: [PATCH 35/53] Minor refactoring of extra_null_values --- src/infi/clickhouse_orm/fields.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 6c06d44..988ace7 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -177,7 +177,7 @@ class BaseIntField(Field): raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) def to_db_string(self, value, quote=True): - # There's no need to call escape since numbers do not contain + # There's no need to call escape since numbers do not contain # special characters, and never need quoting return text_type(value) @@ -253,7 +253,7 @@ class BaseFloatField(Field): raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) def to_db_string(self, value, quote=True): - # There's no need to call escape since numbers do not contain + # There's no need to call escape since numbers do not contain # special characters, and never need quoting return text_type(value) @@ -370,10 +370,9 @@ class NullableField(Field): def __init__(self, inner_field, default=None, alias=None, materialized=None, extra_null_values=None): self.inner_field = inner_field - if extra_null_values is None: - self._extra_null_values = list() - else: - self._extra_null_values = extra_null_values + self._null_values = [None] + if extra_null_values: + self._null_values.extend(extra_null_values) super(NullableField, self).__init__(default, alias, materialized) def to_python(self, value, timezone_in_use): @@ -385,7 +384,7 @@ class NullableField(Field): value is None or self.inner_field.validate(value) def to_db_string(self, value, quote=True): - if value is None or value in self._extra_null_values: + if value in self._null_values: return '\\N' return self.inner_field.to_db_string(value, quote=quote) From 53e67fb59f922ec737591b40e4e3cc0433bfa401 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 23 Jun 2017 11:56:05 +0300 Subject: [PATCH 36/53] Update docs for nullable fields --- CHANGELOG.md | 5 +++++ docs/class_reference.md | 7 +++++++ docs/field_types.md | 20 +++++++++++--------- docs/toc.md | 2 ++ 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24a1668..9574c9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Change Log ========== +Unreleased +---------- +- Changed license from PSF to BSD +- Nullable fields support (yamiou) + v0.9.2 ------ - Added `ne` and `not_in` queryset operators diff --git a/docs/class_reference.md b/docs/class_reference.md index 0c53eb3..b02bf62 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -378,6 +378,13 @@ Extends Field #### ArrayField(inner_field, default=None, alias=None, materialized=None) +### NullableField + +Extends Field + +#### NullableField(inner_field, default=None, alias=None, materialized=None, extra_null_values=None) + + ### FixedStringField Extends StringField diff --git a/docs/field_types.md b/docs/field_types.md index fbf033b..f07f9d5 100644 --- a/docs/field_types.md +++ b/docs/field_types.md @@ -17,8 +17,8 @@ Currently the following field types are supported: | UInt16Field | UInt16 | int | Range 0 to 65535 | UInt32Field | UInt32 | int | Range 0 to 4294967295 | UInt64Field | UInt64 | int/long | Range 0 to 18446744073709551615 -| Float32Field | Float32 | float | -| Float64Field | Float64 | float | +| Float32Field | Float32 | float | +| Float64Field | Float64 | float | | Enum8Field | Enum8 | Enum | See below | Enum16Field | Enum16 | Enum | See below | ArrayField | Array | list | See below @@ -105,12 +105,11 @@ Usage: db.select('SELECT * FROM $db.event', model_class=Event) Working with nullable fields ------------------------------------------- +---------------------------- From [some time](https://github.com/yandex/ClickHouse/pull/70) ClickHouse provides a NULL value support. Also see some information [here](https://github.com/yandex/ClickHouse/blob/master/dbms/tests/queries/0_stateless/00395_nullable.sql). -You can create fields, that can contain any data type (except Enum) -or 'None' value, for example: +Wrapping another field in a `NullableField` makes it possible to assign `None` to that field. For example: class EventData(models.Model): @@ -120,14 +119,17 @@ or 'None' value, for example: serie = fields.NullableField(fields.ArrayField(fields.UInt8Field())) engine = engines.MergeTree('date', ('date',)) - - + + score_event = EventData(date=date.today(), comment=None, score=5, serie=None) comment_event = EventData(date=date.today(), comment='Excellent!', score=None, serie=None) another_event = EventData(date=date.today(), comment='', score=None, serie=None) action_event = EventData(date=date.today(), comment='', score=None, serie=[1, 2, 3]) - -NOTE: ArrayField of NullableField yet not supported. + +The `extra_null_values` parameter is an iterable of additional values that should be converted +to `None`. + +NOTE: `ArrayField` of `NullableField` is not supported. Also `EnumField` cannot be nullable. --- diff --git a/docs/toc.md b/docs/toc.md index 4167e64..cadb096 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -26,6 +26,7 @@ * [Working with enum fields](field_types.md#working-with-enum-fields) * [Working with array fields](field_types.md#working-with-array-fields) * [Working with materialized and alias fields](field_types.md#working-with-materialized-and-alias-fields) + * [Working with nullable fields](field_types.md#working-with-nullable-fields) * [Table Engines](table_engines.md#table-engines) * [Simple Engines](table_engines.md#simple-engines) @@ -60,6 +61,7 @@ * [BaseFloatField](class_reference.md#basefloatfield) * [BaseEnumField](class_reference.md#baseenumfield) * [ArrayField](class_reference.md#arrayfield) + * [NullableField](class_reference.md#nullablefield) * [FixedStringField](class_reference.md#fixedstringfield) * [UInt8Field](class_reference.md#uint8field) * [UInt16Field](class_reference.md#uint16field) From 1966896850a67112de89f55a5962e24b57a3734e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 24 Jun 2017 12:28:42 +0300 Subject: [PATCH 37/53] Support queryset slicing --- CHANGELOG.md | 1 + docs/class_reference.md | 4 ++-- docs/querysets.md | 29 ++++++++++++++++++++----- docs/toc.md | 1 + src/infi/clickhouse_orm/query.py | 37 +++++++++++++++++++++++++------- tests/test_querysets.py | 35 +++++++++++++++++++++++++++--- 6 files changed, 89 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9574c9e..8bf34c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Unreleased ---------- - Changed license from PSF to BSD - Nullable fields support (yamiou) +- Support for queryset slicing v0.9.2 ------ diff --git a/docs/class_reference.md b/docs/class_reference.md index b02bf62..d526e20 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -539,8 +539,8 @@ infi.clickhouse_orm.query ### QuerySet -A queryset is an object that represents a database query using a specific `Model`. -It is lazy, meaning that it does not hit the database until you iterate over its +A queryset is an object that represents a database query using a specific `Model`. +It is lazy, meaning that it does not hit the database until you iterate over its matching rows (model instances). #### QuerySet(model_cls, database) diff --git a/docs/querysets.md b/docs/querysets.md index 2990a77..bb10332 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -4,7 +4,7 @@ Querysets A queryset is an object that represents a database query using a specific Model. It is lazy, meaning that it does not hit the database until you iterate over its matching rows (model instances). To create a base queryset for a model class, use: qs = Person.objects_in(database) - + This queryset matches all Person instances in the database. You can get these instances using iteration: for person in qs: @@ -19,7 +19,7 @@ The `filter` and `exclude` methods are used for filtering the matching instances >>> qs = qs.filter(first_name__startswith='V').exclude(birthday__lt='2000-01-01') >>> qs.conditions_as_sql() u"first_name LIKE 'V%' AND NOT (birthday < '2000-01-01')" - + It is possible to specify several fields to filter or exclude by: >>> qs = Person.objects_in(database).filter(last_name='Smith', height__gt=1.75) @@ -57,7 +57,7 @@ For example if we want to select only people with Irish last names: # A list of simple values qs = Person.objects_in(database).filter(last_name__in=["Murphy", "O'Sullivan"]) - + # A string subquery = "SELECT name from $db.irishlastname" qs = Person.objects_in(database).filter(last_name__in=subquery) @@ -72,7 +72,7 @@ Counting and Checking Existence Use the `count` method to get the number of matches: Person.objects_in(database).count() - + To check if there are any matches at all, you can use any of the following equivalent options: if qs.count(): ... @@ -85,7 +85,7 @@ Ordering The sorting order of the results can be controlled using the `order_by` method: qs = Person.objects_in(database).order_by('last_name', 'first_name') - + The default order is ascending. To use descending order, add a minus sign before the field name: qs = Person.objects_in(database).order_by('-height') @@ -100,6 +100,25 @@ When some of the model fields aren't needed, it is more efficient to omit them f qs = Person.objects_in(database).only('first_name', 'birthday') +Slicing +------- + +It is possible to get a specific item from the queryset by index. + + qs = Person.objects_in(database).order_by('last_name', 'first_name') + first = qs[0] + +It is also possible to get a range a instances using a slice. This returns a queryset, +that you can either iterate over or convert to a list. + + qs = Person.objects_in(database).order_by('last_name', 'first_name') + first_ten_people = list(qs[:10]) + next_ten_people = list(qs[10:20]) + +You should use `order_by` to ensure a consistent ordering of the results. + +Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`. + --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/docs/toc.md b/docs/toc.md index cadb096..da69986 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -20,6 +20,7 @@ * [Counting and Checking Existence](querysets.md#counting-and-checking-existence) * [Ordering](querysets.md#ordering) * [Omitting Fields](querysets.md#omitting-fields) + * [Slicing](querysets.md#slicing) * [Field Types](field_types.md#field-types) * [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index a5dbbe4..cecee19 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -59,7 +59,7 @@ class InOperator(Operator): class LikeOperator(Operator): """ A LIKE operator that matches the field to a given pattern. Can be - case sensitive or insensitive. + case sensitive or insensitive. """ def __init__(self, pattern, case_sensitive=True): @@ -168,8 +168,8 @@ class Q(object): class QuerySet(object): """ - A queryset is an object that represents a database query using a specific `Model`. - It is lazy, meaning that it does not hit the database until you iterate over its + A queryset is an object that represents a database query using a specific `Model`. + It is lazy, meaning that it does not hit the database until you iterate over its matching rows (model instances). """ @@ -183,10 +183,11 @@ class QuerySet(object): self._order_by = [] self._q = [] self._fields = [] + self._limits = None def __iter__(self): """ - Iterates over the model instances matching this queryset + Iterates over the model instances matching this queryset """ return self._database.select(self.as_sql(), self._model_cls) @@ -201,7 +202,25 @@ class QuerySet(object): def __unicode__(self): return self.as_sql() - + + def __getitem__(self, s): + if isinstance(s, six.integer_types): + # Single index + assert s >= 0, 'negative indexes are not supported' + qs = copy(self) + qs._limits = (s, 1) + return iter(qs).next() + else: + # Slice + assert s.step in (None, 1), 'step is not supported in slices' + start = s.start or 0 + stop = s.stop or 2**63 - 1 + assert start >= 0 and stop >= 0, 'negative indexes are not supported' + assert start <= stop, 'start of slice cannot be smaller than its end' + qs = copy(self) + qs._limits = (start, stop - start) + return qs + def as_sql(self): """ Returns the whole query as a SQL string. @@ -210,8 +229,10 @@ class QuerySet(object): if self._fields: fields = ', '.join('`%s`' % field for field in self._fields) ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' - params = (fields, self._database.db_name, self._model_cls.table_name(), self.conditions_as_sql(), ordering) - return u'SELECT %s\nFROM `%s`.`%s`\nWHERE %s%s' % params + limit = '\nLIMIT %d, %d' % self._limits if self._limits else '' + params = (fields, self._model_cls.table_name(), + self.conditions_as_sql(), ordering, limit) + return u'SELECT %s\nFROM `%s`\nWHERE %s%s%s' % params def order_by_as_sql(self): """ @@ -236,7 +257,7 @@ class QuerySet(object): Returns the number of matching model instances. """ return self._database.count(self._model_cls, self.conditions_as_sql()) - + def order_by(self, *field_names): """ Returns a new `QuerySet` instance with the ordering changed. diff --git a/tests/test_querysets.py b/tests/test_querysets.py index 50fd1ba..c26e3c4 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -18,11 +18,11 @@ class QuerySetTestCase(TestCaseWithData): def setUp(self): super(QuerySetTestCase, self).setUp() self.database.insert(self._sample_data()) - + def _test_qs(self, qs, expected_count): logging.info(qs.as_sql()) for instance in qs: - logging.info('\t%s' % instance.to_dict()) + logging.info('\t%s' % instance.to_dict()) self.assertEquals(qs.count(), expected_count) def test_no_filtering(self): @@ -138,6 +138,30 @@ class QuerySetTestCase(TestCaseWithData): self._test_qs(qs.filter(num__in=(1, 2, 3)), 3) self._test_qs(qs.filter(num__in=range(1, 4)), 3) + def test_slicing(self): + db = Database('system') + numbers = range(100) + qs = Numbers.objects_in(db) + self.assertEquals(qs[0].number, numbers[0]) + self.assertEquals(qs[5].number, numbers[5]) + self.assertEquals([row.number for row in qs[:1]], numbers[:1]) + self.assertEquals([row.number for row in qs[:10]], numbers[:10]) + self.assertEquals([row.number for row in qs[3:10]], numbers[3:10]) + self.assertEquals([row.number for row in qs[9:10]], numbers[9:10]) + self.assertEquals([row.number for row in qs[10:10]], numbers[10:10]) + + def test_invalid_slicing(self): + db = Database('system') + qs = Numbers.objects_in(db) + with self.assertRaises(AssertionError): + qs[3:10:2] + with self.assertRaises(AssertionError): + qs[-5] + with self.assertRaises(AssertionError): + qs[:-5] + with self.assertRaises(AssertionError): + qs[50:1] + Color = Enum('Color', u'red blue green yellow brown white black') @@ -149,4 +173,9 @@ class SampleModel(Model): num = Int32Field() color = Enum8Field(Color) - engine = MergeTree('materialized_date', ('materialized_date',)) \ No newline at end of file + engine = MergeTree('materialized_date', ('materialized_date',)) + + +class Numbers(Model): + + number = UInt64Field() \ No newline at end of file From 33209542d63c50fec9d68a45d73532b25d4f294e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sat, 24 Jun 2017 12:59:35 +0300 Subject: [PATCH 38/53] Releasing v0.9.3 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf34c3..02795f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v0.9.3 +------ - Changed license from PSF to BSD - Nullable fields support (yamiou) - Support for queryset slicing From cb6c329d32f571936bd34881710ea11efb4e5014 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 26 Jun 2017 11:09:57 +0300 Subject: [PATCH 39/53] Migrations: when creating a table for a `BufferModel`, create the underlying table too if necessary --- CHANGELOG.md | 4 ++++ buildout.cfg | 1 + src/infi/clickhouse_orm/migrations.py | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02795f2..421955a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ Change Log ========== +Unreleased +---------- +- Migrations: when creating a table for a `BufferModel`, create the underlying table too if necessary + v0.9.3 ------ - Changed license from PSF to BSD diff --git a/buildout.cfg b/buildout.cfg index a8c2fec..004a6fd 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -17,6 +17,7 @@ install_requires = [ ] version_file = src/infi/clickhouse_orm/__version__.py description = A Python library for working with the ClickHouse database +long_description = A Python library for working with the ClickHouse database console_scripts = [] gui_scripts = [] package_data = [] diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index 8167a20..ebcbacc 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -1,4 +1,4 @@ -from .models import Model +from .models import Model, BufferModel from .fields import DateField, StringField from .engines import MergeTree from .utils import escape @@ -28,6 +28,8 @@ class CreateTable(Operation): def apply(self, database): logger.info(' Create table %s', self.model_class.table_name()) + if issubclass(self.model_class, BufferModel): + database.create_table(self.model_class.engine.main_model) database.create_table(self.model_class) From a2acb9a025c3205df4b490a1f46d73eec4e7a2fc Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 26 Jun 2017 11:10:18 +0300 Subject: [PATCH 40/53] Releasing v0.9.4 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 421955a..f875d12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v0.9.4 +------ - Migrations: when creating a table for a `BufferModel`, create the underlying table too if necessary v0.9.3 From 639867bb32586d0b439547f29d16948adbc71c5c Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Fri, 11 Aug 2017 17:26:46 +0300 Subject: [PATCH 41/53] - Added `QuerySet.paginate()` - Support for basic aggregation in querysets --- CHANGELOG.md | 5 + docs/class_reference.md | 138 ++++++++++++++++++++- docs/models_and_databases.md | 3 +- docs/querysets.md | 58 ++++++++- docs/toc.md | 3 + scripts/generate_ref.py | 4 +- src/infi/clickhouse_orm/engines.py | 14 ++- src/infi/clickhouse_orm/fields.py | 4 +- src/infi/clickhouse_orm/migrations.py | 2 +- src/infi/clickhouse_orm/query.py | 149 +++++++++++++++++++++-- src/infi/clickhouse_orm/system_models.py | 3 +- src/infi/clickhouse_orm/utils.py | 13 +- tests/test_querysets.py | 148 ++++++++++++++++++++++ 13 files changed, 512 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f875d12..a60ecda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Change Log ========== +Unreleased +---------- +- Added `QuerySet.paginate()` +- Support for basic aggregation in querysets + v0.9.4 ------ - Migrations: when creating a table for a `BufferModel`, create the underlying table too if necessary diff --git a/docs/class_reference.md b/docs/class_reference.md index d526e20..7e4bc74 100644 --- a/docs/class_reference.md +++ b/docs/class_reference.md @@ -550,6 +550,23 @@ Initializer. It is possible to create a queryset like this, but the standard way is to use `MyModel.objects_in(database)`. +#### aggregate(*args, **kwargs) + + +Returns an `AggregateQuerySet` over this query, with `args` serving as +grouping fields and `kwargs` serving as calculated fields. At least one +calculated field is required. For example: +``` + Event.objects_in(database).filter(date__gt='2017-08-01').aggregate('event_type', count='count()') +``` +is equivalent to: +``` + SELECT event_type, count() AS count FROM event + WHERE data > '2017-08-01' + GROUP BY event_type +``` + + #### as_sql() @@ -571,19 +588,19 @@ Returns the number of matching model instances. #### exclude(**kwargs) -Returns a new `QuerySet` instance that excludes all rows matching the conditions. +Returns a copy of this queryset that excludes all rows matching the conditions. #### filter(**kwargs) -Returns a new `QuerySet` instance that includes only rows matching the conditions. +Returns a copy of this queryset that includes only rows matching the conditions. #### only(*field_names) -Returns a new `QuerySet` instance limited to the specified field names. +Returns a copy of this queryset limited to the specified field names. Useful when there are large fields that are not needed, or for creating a subquery to use with an IN operator. @@ -591,7 +608,7 @@ or for creating a subquery to use with an IN operator. #### order_by(*field_names) -Returns a new `QuerySet` instance with the ordering changed. +Returns a copy of this queryset with the ordering changed. #### order_by_as_sql() @@ -600,3 +617,116 @@ Returns a new `QuerySet` instance with the ordering changed. Returns the contents of the query's `ORDER BY` clause as a string. +#### paginate(page_num=1, page_size=100) + + +Returns a single page of model instances that match the queryset. +Note that `order_by` should be used first, to ensure a correct +partitioning of records into pages. + +- `page_num`: the page number (1-based), or -1 to get the last page. +- `page_size`: number of records to return per page. + +The result is a namedtuple containing `objects` (list), `number_of_objects`, +`pages_total`, `number` (of the current page), and `page_size`. + + +### AggregateQuerySet + +Extends QuerySet + + +A queryset used for aggregation. + +#### AggregateQuerySet(base_qs, grouping_fields, calculated_fields) + + +Initializer. Normally you should not call this but rather use `QuerySet.aggregate()`. + +The grouping fields should be a list/tuple of field names from the model. For example: +``` + ('event_type', 'event_subtype') +``` +The calculated fields should be a mapping from name to a ClickHouse aggregation function. For example: +``` + {'weekday': 'toDayOfWeek(event_date)', 'number_of_events': 'count()'} +``` +At least one calculated field is required. + + +#### aggregate(*args, **kwargs) + + +This method is not supported on `AggregateQuerySet`. + + +#### as_sql() + + +Returns the whole query as a SQL string. + + +#### conditions_as_sql() + + +Returns the contents of the query's `WHERE` clause as a string. + + +#### count() + + +Returns the number of rows after aggregation. + + +#### exclude(**kwargs) + + +Returns a copy of this queryset that excludes all rows matching the conditions. + + +#### filter(**kwargs) + + +Returns a copy of this queryset that includes only rows matching the conditions. + + +#### group_by(*args) + + +This method lets you specify the grouping fields explicitly. The `args` must +be names of grouping fields or calculated fields that this queryset was +created with. + + +#### only(*field_names) + + +This method is not supported on `AggregateQuerySet`. + + +#### order_by(*field_names) + + +Returns a copy of this queryset with the ordering changed. + + +#### order_by_as_sql() + + +Returns the contents of the query's `ORDER BY` clause as a string. + + +#### paginate(page_num=1, page_size=100) + + +Returns a single page of model instances that match the queryset. +Note that `order_by` should be used first, to ensure a correct +partitioning of records into pages. + +- `page_num`: the page number (1-based), or -1 to get the last page. +- `page_size`: number of records to return per page. + +The result is a namedtuple containing `objects` (list), `number_of_objects`, +`pages_total`, `number` (of the current page), and `page_size`. + + diff --git a/docs/models_and_databases.md b/docs/models_and_databases.md index ffeb54e..2b84f99 100644 --- a/docs/models_and_databases.md +++ b/docs/models_and_databases.md @@ -158,8 +158,7 @@ The `paginate` method returns a `namedtuple` containing the following fields: - `objects` - the list of objects in this page - `number_of_objects` - total number of objects in all pages - `pages_total` - total number of pages -- `number` - the page number, starting from 1; the special value -1 - may be used to retrieve the last page +- `number` - the page number, starting from 1; the special value -1 may be used to retrieve the last page - `page_size` - the number of objects per page You can optionally pass conditions to the query: diff --git a/docs/querysets.md b/docs/querysets.md index bb10332..2bbefd9 100644 --- a/docs/querysets.md +++ b/docs/querysets.md @@ -99,11 +99,10 @@ When some of the model fields aren't needed, it is more efficient to omit them f qs = Person.objects_in(database).only('first_name', 'birthday') - Slicing ------- -It is possible to get a specific item from the queryset by index. +It is possible to get a specific item from the queryset by index: qs = Person.objects_in(database).order_by('last_name', 'first_name') first = qs[0] @@ -119,6 +118,61 @@ You should use `order_by` to ensure a consistent ordering of the results. Trying to use negative indexes or a slice with a step (e.g. [0:100:2]) is not supported and will raise an `AssertionError`. +Pagination +---------- + +Similar to `Database.paginate`, you can go over the queryset results one page at a time: + + >>> qs = Person.objects_in(database).order_by('last_name', 'first_name') + >>> page = qs.paginate(page_num=1, page_size=10) + >>> print page.number_of_objects + 2507 + >>> print page.pages_total + 251 + >>> for person in page.objects: + >>> # do something + +The `paginate` method returns a `namedtuple` containing the following fields: + +- `objects` - the list of objects in this page +- `number_of_objects` - total number of objects in all pages +- `pages_total` - total number of pages +- `number` - the page number, starting from 1; the special value -1 may be used to retrieve the last page +- `page_size` - the number of objects per page + +Note that you should use `QuerySet.order_by` so that the ordering is unique, otherwise there might be inconsistencies in the pagination (such as an instance that appears on two different pages). + +Aggregation +----------- + +It is possible to use aggregation functions over querysets using the `aggregate` method. The simplest form of aggregation works over all rows in the queryset: + + >>> qs = Person.objects_in(database).aggregate(average_height='avg(height)') + >>> print qs.count() + 1 + >>> for row in qs: print row.average_height + 1.71 + +The returned row or rows are no longer instances of the base model (`Person` in this example), but rather instances of an ad-hoc model that includes only the fields specified in the call to `aggregate`. + +You can pass names of fields from the model that will be included in the query. By default, they will be also used in the GROUP BY clause. For example to count the number of people per last name you could do this: + + qs = Person.objects_in(database).aggregate('last_name', num='count()') + +The underlying SQL query would be something like this: + + SELECT last_name, count() AS num FROM person GROUP BY last_name + +If you would like to control the GROUP BY explicitly, use the `group_by` method. This is useful when you need to group by a calculated field, instead of a field that exists in the model. For example, to count the number of people born on each weekday: + + qs = Person.objects_in(database).aggregate(weekday='toDayOfWeek(birthday)', num='count()').group_by('weekday') + +This queryset is translated to: + + SELECT toDayOfWeek(birthday) AS weekday, count() AS num FROM person GROUP BY weekday + +After calling `aggregate` you can still use most of the regular queryset methods, such as `count`, `order_by` and `paginate`. It is not possible, however, to call `only` or `aggregate`. It is also not possible to filter the queryset on calculated fields, only on fields that exist in the model. + --- [<< Models and Databases](models_and_databases.md) | [Table of Contents](toc.md) | [Field Types >>](field_types.md) \ No newline at end of file diff --git a/docs/toc.md b/docs/toc.md index da69986..aa5bb3b 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -21,6 +21,8 @@ * [Ordering](querysets.md#ordering) * [Omitting Fields](querysets.md#omitting-fields) * [Slicing](querysets.md#slicing) + * [Pagination](querysets.md#pagination) + * [Aggregation](querysets.md#aggregation) * [Field Types](field_types.md#field-types) * [DateTimeField and Time Zones](field_types.md#datetimefield-and-time-zones) @@ -88,4 +90,5 @@ * [ReplacingMergeTree](class_reference.md#replacingmergetree) * [infi.clickhouse_orm.query](class_reference.md#infi.clickhouse_orm.query) * [QuerySet](class_reference.md#queryset) + * [AggregateQuerySet](class_reference.md#aggregatequeryset) diff --git a/scripts/generate_ref.py b/scripts/generate_ref.py index 8d11249..c35e881 100644 --- a/scripts/generate_ref.py +++ b/scripts/generate_ref.py @@ -110,7 +110,7 @@ def module_doc(classes, list_methods=True): print '-' * len(mdl) print for cls in classes: - class_doc(cls, list_methods) + class_doc(cls, list_methods) def all_subclasses(cls): @@ -132,4 +132,4 @@ if __name__ == '__main__': module_doc([models.Model, models.BufferModel]) module_doc([fields.Field] + all_subclasses(fields.Field), False) module_doc([engines.Engine] + all_subclasses(engines.Engine), False) - module_doc([query.QuerySet]) + module_doc([query.QuerySet, query.AggregateQuerySet]) diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index 9db37da..b28fedd 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -1,8 +1,10 @@ +from .utils import comma_join + class Engine(object): def create_table_sql(self): - raise NotImplementedError() + raise NotImplementedError() # pragma: no cover class TinyLog(Engine): @@ -41,7 +43,7 @@ class MergeTree(Engine): if self.replica_name: name = 'Replicated' + name params = self._build_sql_params() - return '%s(%s)' % (name, ', '.join(params)) + return '%s(%s)' % (name, comma_join(params)) def _build_sql_params(self): params = [] @@ -50,7 +52,7 @@ class MergeTree(Engine): params.append(self.date_col) if self.sampling_expr: params.append(self.sampling_expr) - params.append('(%s)' % ', '.join(self.key_cols)) + params.append('(%s)' % comma_join(self.key_cols)) params.append(str(self.index_granularity)) return params @@ -79,7 +81,7 @@ class SummingMergeTree(MergeTree): def _build_sql_params(self): params = super(SummingMergeTree, self)._build_sql_params() if self.summing_cols: - params.append('(%s)' % ', '.join(self.summing_cols)) + params.append('(%s)' % comma_join(self.summing_cols)) return params @@ -103,7 +105,7 @@ class Buffer(Engine): Must be used in conjuction with a `BufferModel`. Read more [here](https://clickhouse.yandex/reference_en.html#Buffer). """ - + #Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) def __init__(self, main_model, num_layers=16, min_time=10, max_time=100, min_rows=10000, max_rows=1000000, min_bytes=10000000, max_bytes=100000000): self.main_model = main_model @@ -117,7 +119,7 @@ class Buffer(Engine): def create_table_sql(self, db_name): - # Overriden create_table_sql example: + # Overriden create_table_sql example: #sql = 'ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)' sql = 'ENGINE = Buffer(`%s`, `%s`, %d, %d, %d, %d, %d, %d, %d)' % ( db_name, self.main_model.table_name(), self.num_layers, diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 988ace7..e6e100a 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -4,7 +4,7 @@ import pytz import time from calendar import timegm -from .utils import escape, parse_array +from .utils import escape, parse_array, comma_join class Field(object): @@ -356,7 +356,7 @@ class ArrayField(Field): def to_db_string(self, value, quote=True): array = [self.inner_field.to_db_string(v, quote=True) for v in value] - return '[' + ', '.join(array) + ']' + return '[' + comma_join(array) + ']' def get_sql(self, with_default=True): from .utils import escape diff --git a/src/infi/clickhouse_orm/migrations.py b/src/infi/clickhouse_orm/migrations.py index ebcbacc..1a82a5b 100644 --- a/src/infi/clickhouse_orm/migrations.py +++ b/src/infi/clickhouse_orm/migrations.py @@ -15,7 +15,7 @@ class Operation(object): ''' def apply(self, database): - raise NotImplementedError() + raise NotImplementedError() # pragma: no cover class CreateTable(Operation): diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index cecee19..217fb68 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -1,12 +1,13 @@ import six import pytz from copy import copy +from math import ceil +from .utils import comma_join # TODO # - and/or between Q objects # - check that field names are valid -# - qs slicing # - operators for arrays: length, has, empty class Operator(object): @@ -19,7 +20,7 @@ class Operator(object): Subclasses should implement this method. It returns an SQL string that applies this operator on the given field and value. """ - raise NotImplementedError + raise NotImplementedError # pragma: no cover class SimpleOperator(Operator): @@ -52,7 +53,7 @@ class InOperator(Operator): elif isinstance(value, six.string_types): pass else: - value = ', '.join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value]) + value = comma_join([field.to_db_string(field.to_python(v, pytz.utc)) for v in value]) return '%s IN (%s)' % (field_name, value) @@ -189,6 +190,7 @@ class QuerySet(object): """ Iterates over the model instances matching this queryset """ + print self.as_sql() return self._database.select(self.as_sql(), self._model_cls) def __bool__(self): @@ -227,7 +229,7 @@ class QuerySet(object): """ fields = '*' if self._fields: - fields = ', '.join('`%s`' % field for field in self._fields) + fields = comma_join('`%s`' % field for field in self._fields) ordering = '\nORDER BY ' + self.order_by_as_sql() if self._order_by else '' limit = '\nLIMIT %d, %d' % self._limits if self._limits else '' params = (fields, self._model_cls.table_name(), @@ -238,7 +240,7 @@ class QuerySet(object): """ Returns the contents of the query's `ORDER BY` clause as a string. """ - return u', '.join([ + return comma_join([ '%s DESC' % field[1:] if field[0] == '-' else field for field in self._order_by ]) @@ -260,7 +262,7 @@ class QuerySet(object): def order_by(self, *field_names): """ - Returns a new `QuerySet` instance with the ordering changed. + Returns a copy of this queryset with the ordering changed. """ qs = copy(self) qs._order_by = field_names @@ -268,7 +270,7 @@ class QuerySet(object): def only(self, *field_names): """ - Returns a new `QuerySet` instance limited to the specified field names. + Returns a copy of this queryset limited to the specified field names. Useful when there are large fields that are not needed, or for creating a subquery to use with an IN operator. """ @@ -278,7 +280,7 @@ class QuerySet(object): def filter(self, **kwargs): """ - Returns a new `QuerySet` instance that includes only rows matching the conditions. + Returns a copy of this queryset that includes only rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [Q(**kwargs)] @@ -286,8 +288,137 @@ class QuerySet(object): def exclude(self, **kwargs): """ - Returns a new `QuerySet` instance that excludes all rows matching the conditions. + Returns a copy of this queryset that excludes all rows matching the conditions. """ qs = copy(self) qs._q = list(self._q) + [~Q(**kwargs)] return qs + + def paginate(self, page_num=1, page_size=100): + ''' + Returns a single page of model instances that match the queryset. + Note that `order_by` should be used first, to ensure a correct + partitioning of records into pages. + + - `page_num`: the page number (1-based), or -1 to get the last page. + - `page_size`: number of records to return per page. + + The result is a namedtuple containing `objects` (list), `number_of_objects`, + `pages_total`, `number` (of the current page), and `page_size`. + ''' + from .database import Page + count = self.count() + pages_total = int(ceil(count / float(page_size))) + if page_num == -1: + page_num = pages_total + elif page_num < 1: + raise ValueError('Invalid page number: %d' % page_num) + offset = (page_num - 1) * page_size + return Page( + objects=list(self[offset : offset + page_size]), + number_of_objects=count, + pages_total=pages_total, + number=page_num, + page_size=page_size + ) + + def aggregate(self, *args, **kwargs): + ''' + Returns an `AggregateQuerySet` over this query, with `args` serving as + grouping fields and `kwargs` serving as calculated fields. At least one + calculated field is required. For example: + ``` + Event.objects_in(database).filter(date__gt='2017-08-01').aggregate('event_type', count='count()') + ``` + is equivalent to: + ``` + SELECT event_type, count() AS count FROM event + WHERE data > '2017-08-01' + GROUP BY event_type + ``` + ''' + return AggregateQuerySet(self, args, kwargs) + + +class AggregateQuerySet(QuerySet): + """ + A queryset used for aggregation. + """ + + def __init__(self, base_qs, grouping_fields, calculated_fields): + """ + Initializer. Normally you should not call this but rather use `QuerySet.aggregate()`. + + The grouping fields should be a list/tuple of field names from the model. For example: + ``` + ('event_type', 'event_subtype') + ``` + The calculated fields should be a mapping from name to a ClickHouse aggregation function. For example: + ``` + {'weekday': 'toDayOfWeek(event_date)', 'number_of_events': 'count()'} + ``` + At least one calculated field is required. + """ + super(AggregateQuerySet, self).__init__(base_qs._model_cls, base_qs._database) + assert calculated_fields, 'No calculated fields specified for aggregation' + self._fields = grouping_fields + self._grouping_fields = grouping_fields + self._calculated_fields = calculated_fields + self._order_by = list(base_qs._order_by) + self._q = list(base_qs._q) + self._limits = base_qs._limits + + def group_by(self, *args): + """ + This method lets you specify the grouping fields explicitly. The `args` must + be names of grouping fields or calculated fields that this queryset was + created with. + """ + for name in args: + assert name in self._fields or name in self._calculated_fields, \ + 'Cannot group by `%s` since it is not included in the query' % name + qs = copy(self) + qs._grouping_fields = args + return qs + + def only(self, *field_names): + """ + This method is not supported on `AggregateQuerySet`. + """ + raise NotImplementedError('Cannot use "only" with AggregateQuerySet') + + def aggregate(self, *args, **kwargs): + """ + This method is not supported on `AggregateQuerySet`. + """ + raise NotImplementedError('Cannot re-aggregate an AggregateQuerySet') + + def as_sql(self): + """ + Returns the whole query as a SQL string. + """ + grouping = comma_join('`%s`' % field for field in self._grouping_fields) + fields = comma_join(list(self._fields) + ['%s AS %s' % (v, k) for k, v in self._calculated_fields.items()]) + params = dict( + grouping=grouping or "''", + fields=fields, + table=self._model_cls.table_name(), + conds=self.conditions_as_sql() + ) + sql = u'SELECT %(fields)s\nFROM `%(table)s`\nWHERE %(conds)s\nGROUP BY %(grouping)s' % params + if self._order_by: + sql += '\nORDER BY ' + self.order_by_as_sql() + if self._limits: + sql += '\nLIMIT %d, %d' % self._limits + return sql + + def __iter__(self): + return self._database.select(self.as_sql()) # using an ad-hoc model + + def count(self): + """ + Returns the number of rows after aggregation. + """ + sql = u'SELECT count() FROM (%s)' % self.as_sql() + raw = self._database.raw(sql) + return int(raw) if raw else 0 diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index c151302..b1e1bb7 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -7,6 +7,7 @@ from six import string_types from .database import Database from .fields import * from .models import Model +from .utils import comma_join class SystemPart(Model): @@ -61,7 +62,7 @@ class SystemPart(Model): :return: Operation execution result """ operation = operation.upper() - assert operation in self.OPERATIONS, "operation must be in [%s]" % ', '.join(self.OPERATIONS) + assert operation in self.OPERATIONS, "operation must be in [%s]" % comma_join(self.OPERATIONS) sql = "ALTER TABLE `%s`.`%s` %s PARTITION '%s'" % (self._database.db_name, self.table, operation, self.partition) if from_part is not None: sql += " FROM %s" % from_part diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 83d11e0..5a8a17a 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -21,7 +21,7 @@ SPECIAL_CHARS_REGEX = re.compile("[" + ''.join(SPECIAL_CHARS.values()) + "]") def escape(value, quote=True): ''' If the value is a string, escapes any special characters and optionally - surrounds it with single quotes. If the value is not a string (e.g. a number), + surrounds it with single quotes. If the value is not a string (e.g. a number), converts it to one. ''' def escape_one(match): @@ -56,7 +56,7 @@ def parse_array(array_string): if len(array_string) < 2 or array_string[0] != '[' or array_string[-1] != ']': raise ValueError('Invalid array string: "%s"' % array_string) # Drop opening brace - array_string = array_string[1:] + array_string = array_string[1:] # Go over the string, lopping off each value at the beginning until nothing is left values = [] while True: @@ -65,7 +65,7 @@ def parse_array(array_string): return values elif array_string[0] in ', ': # In between values - array_string = array_string[1:] + array_string = array_string[1:] elif array_string[0] == "'": # Start of quoted value, find its end match = re.search(r"[^\\]'", array_string) @@ -90,3 +90,10 @@ def import_submodules(package_name): name: importlib.import_module(package_name + '.' + name) for _, name, _ in pkgutil.iter_modules(package.__path__) } + + +def comma_join(items): + """ + Joins an iterable of strings with commas. + """ + return ', '.join(items) diff --git a/tests/test_querysets.py b/tests/test_querysets.py index c26e3c4..ded1d84 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -162,6 +162,154 @@ class QuerySetTestCase(TestCaseWithData): with self.assertRaises(AssertionError): qs[50:1] + def test_pagination(self): + qs = Person.objects_in(self.database).order_by('first_name', 'last_name') + # Try different page sizes + for page_size in (1, 2, 7, 10, 30, 100, 150): + # Iterate over pages and collect all intances + page_num = 1 + instances = set() + while True: + page = qs.paginate(page_num, page_size) + self.assertEquals(page.number_of_objects, len(data)) + self.assertGreater(page.pages_total, 0) + [instances.add(obj.to_tsv()) for obj in page.objects] + if page.pages_total == page_num: + break + page_num += 1 + # Verify that all instances were returned + self.assertEquals(len(instances), len(data)) + + def test_pagination_last_page(self): + qs = Person.objects_in(self.database).order_by('first_name', 'last_name') + # Try different page sizes + for page_size in (1, 2, 7, 10, 30, 100, 150): + # Ask for the last page in two different ways and verify equality + page_a = qs.paginate(-1, page_size) + page_b = qs.paginate(page_a.pages_total, page_size) + self.assertEquals(page_a[1:], page_b[1:]) + self.assertEquals([obj.to_tsv() for obj in page_a.objects], + [obj.to_tsv() for obj in page_b.objects]) + + def test_pagination_invalid_page(self): + qs = Person.objects_in(self.database).order_by('first_name', 'last_name') + for page_num in (0, -2, -100): + with self.assertRaises(ValueError): + qs.paginate(page_num, 100) + + def test_pagination_with_conditions(self): + qs = Person.objects_in(self.database).order_by('first_name', 'last_name').filter(first_name__lt='Ava') + page = qs.paginate(1, 100) + self.assertEquals(page.number_of_objects, 10) + + +class AggregateTestCase(TestCaseWithData): + + def setUp(self): + super(AggregateTestCase, self).setUp() + self.database.insert(self._sample_data()) + + def test_aggregate_no_grouping(self): + qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()') + print qs.as_sql() + self.assertEquals(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.6923, places=4) + self.assertEquals(row.count, 100) + + def test_aggregate_with_filter(self): + # When filter comes before aggregate + qs = Person.objects_in(self.database).filter(first_name='Warren').aggregate(average_height='avg(height)', count='count()') + print qs.as_sql() + self.assertEquals(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.675, places=4) + self.assertEquals(row.count, 2) + # When filter comes after aggregate + qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()').filter(first_name='Warren') + print qs.as_sql() + self.assertEquals(qs.count(), 1) + for row in qs: + self.assertAlmostEqual(row.average_height, 1.675, places=4) + self.assertEquals(row.count, 2) + + def test_aggregate_with_implicit_grouping(self): + qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()') + print qs.as_sql() + self.assertEquals(qs.count(), 94) + total = 0 + for row in qs: + self.assertTrue(1.5 < row.average_height < 2) + self.assertTrue(0 < row.count < 3) + total += row.count + self.assertEquals(total, 100) + + def test_aggregate_with_explicit_grouping(self): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + print qs.as_sql() + self.assertEquals(qs.count(), 7) + total = 0 + for row in qs: + total += row.count + self.assertEquals(total, 100) + + def test_aggregate_with_order_by(self): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + days = [row.weekday for row in qs.order_by('weekday')] + self.assertEquals(days, range(1, 8)) + + def test_aggregate_with_indexing(self): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + total = 0 + for i in range(7): + total += qs[i].count + self.assertEquals(total, 100) + + def test_aggregate_with_slicing(self): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + total = sum(row.count for row in qs[:3]) + sum(row.count for row in qs[3:]) + self.assertEquals(total, 100) + + def test_aggregate_with_pagination(self): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + total = 0 + page_num = 1 + while True: + page = qs.paginate(page_num, page_size=3) + self.assertEquals(page.number_of_objects, 7) + total += sum(row.count for row in page.objects) + if page.pages_total == page_num: + break + page_num += 1 + self.assertEquals(total, 100) + + def test_aggregate_with_wrong_grouping(self): + with self.assertRaises(AssertionError): + Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('first_name') + + def test_aggregate_with_no_calculated_fields(self): + with self.assertRaises(AssertionError): + Person.objects_in(self.database).aggregate() + + def test_aggregate_with_only(self): + # Cannot put only() after aggregate() + with self.assertRaises(NotImplementedError): + Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').only('weekday') + # When only() comes before aggregate(), it gets overridden + qs = Person.objects_in(self.database).only('last_name').aggregate(average_height='avg(height)', count='count()') + self.assertTrue('last_name' not in qs.as_sql()) + + def test_aggregate_on_aggregate(self): + with self.assertRaises(NotImplementedError): + Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').aggregate(s='sum(height)') + + def test_filter_on_calculated_field(self): + # This is currently not supported, so we expect it to fail + with self.assertRaises(AttributeError): + qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') + qs = qs.filter(weekday=1) + self.assertEquals(qs.count(), 1) + Color = Enum('Color', u'red blue green yellow brown white black') From 5db4f113ac0a68232763cbcfea2d8b6523ccb7d5 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 14 Aug 2017 12:17:38 +0300 Subject: [PATCH 42/53] Update example in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4021f28..9820948 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ busy = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=95).count() print 'CPU 1 was busy {:.2f}% of the time'.format(busy * 100.0 / total) # Calculate the average usage per CPU -for row in db.select('SELECT cpu_id, avg(cpu_percent) AS average FROM demo.cpustats GROUP BY cpu_id'): +for row in CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)'): print 'CPU {row.cpu_id}: {row.average:.2f}%'.format(row=row) ``` From 3daba3429f10dcae6ba97e858b8e3e1dc26eb267 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Tue, 15 Aug 2017 23:26:21 +0300 Subject: [PATCH 43/53] Releasing v0.9.5 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a60ecda..c11cd37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ Change Log ========== -Unreleased ----------- +v0.9.5 +------ - Added `QuerySet.paginate()` - Support for basic aggregation in querysets From 8f8f1d217605c7b7ee33f208e8fbe65877d64c89 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Wed, 16 Aug 2017 13:07:39 +0300 Subject: [PATCH 44/53] Update version of isolated python --- buildout.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildout.cfg b/buildout.cfg index 004a6fd..5ea6da5 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -3,7 +3,7 @@ prefer-final = false newest = false download-cache = .cache develop = . -parts = +parts = [project] name = infi.clickhouse_orm @@ -29,7 +29,7 @@ homepage = https://github.com/Infinidat/infi.clickhouse_orm [isolated-python] recipe = infi.recipe.python -version = v2.7.9.4 +version = v2.7.12.4 [setup.py] recipe = infi.recipe.template.version From 67e0442645e1ae819616c07d9b1f161950924e45 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 09:30:40 +0300 Subject: [PATCH 45/53] - Nullable arrays not supported in latest ClickHouse version - system.parts table no longer includes "replicated" column in latest ClickHouse version --- CHANGELOG.md | 5 +++++ src/infi/clickhouse_orm/system_models.py | 1 - tests/test_nullable_fields.py | 22 ++++------------------ 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c11cd37..470a9a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Change Log ========== +Unreleased +---------- +- Nullable arrays not supported in latest ClickHouse version +- system.parts table no longer includes "replicated" column in latest ClickHouse version + v0.9.5 ------ - Added `QuerySet.paginate()` diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index b1e1bb7..174b21f 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -25,7 +25,6 @@ class SystemPart(Model): engine = StringField() # Name of the table engine, without parameters. partition = StringField() # Name of the partition, in the format YYYYMM. name = StringField() # Name of the part. - replicated = UInt8Field() # Whether the part belongs to replicated data. # Whether the part is used in a table, or is no longer needed and will be deleted soon. # Inactive parts remain after merging. diff --git a/tests/test_nullable_fields.py b/tests/test_nullable_fields.py index 7dcab92..0c485a8 100644 --- a/tests/test_nullable_fields.py +++ b/tests/test_nullable_fields.py @@ -75,18 +75,10 @@ class NullableFieldsTest(unittest.TestCase): def _insert_sample_data(self): dt = date(1970, 1, 1) self.database.insert([ - ModelWithNullable(date_field='2016-08-30', - null_str='', null_int=42, null_date=dt, - null_array=None), - ModelWithNullable(date_field='2016-08-30', - null_str='nothing', null_int=None, null_date=None, - null_array=[1, 2, 3]), - ModelWithNullable(date_field='2016-08-31', - null_str=None, null_int=42, null_date=dt, - null_array=[]), - ModelWithNullable(date_field='2016-08-31', - null_str=None, null_int=None, null_date=None, - null_array=[3, 2, 1]) + ModelWithNullable(date_field='2016-08-30', null_str='', null_int=42, null_date=dt), + ModelWithNullable(date_field='2016-08-30', null_str='nothing', null_int=None, null_date=None), + ModelWithNullable(date_field='2016-08-31', null_str=None, null_int=42, null_date=dt), + ModelWithNullable(date_field='2016-08-31', null_str=None, null_int=None, null_date=None) ]) def _assert_sample_data(self, results): @@ -105,11 +97,6 @@ class NullableFieldsTest(unittest.TestCase): self.assertIsNone(results[3].null_str) self.assertIsNone(results[3].null_date) - self.assertIsNone(results[0].null_array) - self.assertEquals(results[1].null_array, [1, 2, 3]) - self.assertEquals(results[2].null_array, []) - self.assertEquals(results[3].null_array, [3, 2, 1]) - def test_insert_and_select(self): self._insert_sample_data() query = 'SELECT * from $table ORDER BY date_field' @@ -129,6 +116,5 @@ class ModelWithNullable(Model): null_str = NullableField(StringField(), extra_null_values={''}) null_int = NullableField(Int32Field()) null_date = NullableField(DateField()) - null_array = NullableField(ArrayField(Int32Field())) engine = MergeTree('date_field', ('date_field',)) From 59a4f1cecc73d17afdbbd0550f11bf62ced8b8d2 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 09:36:26 +0300 Subject: [PATCH 46/53] test_freeze fails, possibly due to race condition --- tests/test_system_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index 3e48b0c..0412ad6 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -1,6 +1,6 @@ import unittest from datetime import date - +from time import sleep import os import shutil from infi.clickhouse_orm.database import Database @@ -61,6 +61,7 @@ class SystemPartTest(unittest.TestCase): # There can be other backups in the folder prev_backups = set(self._get_backups()) parts[0].freeze() + sleep(1) backups = set(self._get_backups()) self.assertEqual(len(backups), len(prev_backups) + 1) # Clean created backup From 06ed53e4ecfc635273f45b577bbbfaedd08ab00e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 09:46:00 +0300 Subject: [PATCH 47/53] fix test_freeze --- tests/test_system_models.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index 0412ad6..d932bd6 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -1,6 +1,5 @@ import unittest from datetime import date -from time import sleep import os import shutil from infi.clickhouse_orm.database import Database @@ -11,7 +10,8 @@ from infi.clickhouse_orm.system_models import SystemPart class SystemPartTest(unittest.TestCase): - BACKUP_DIR = '/opt/clickhouse/shadow/' + + BACKUP_DIRS = ['/var/lib/clickhouse/shadow', '/opt/clickhouse/shadow/'] def setUp(self): self.database = Database('test-db') @@ -22,10 +22,11 @@ class SystemPartTest(unittest.TestCase): self.database.drop_database() def _get_backups(self): - if not os.path.exists(self.BACKUP_DIR): - return [] - _, dirnames, _ = next(os.walk(self.BACKUP_DIR)) - return dirnames + for dir in self.BACKUP_DIRS: + if os.path.exists(dir): + _, dirnames, _ = next(os.walk(dir)) + return dirnames + raise unittest.SkipTest('Cannot find backups dir') def test_get_all(self): parts = SystemPart.get(self.database) @@ -61,11 +62,8 @@ class SystemPartTest(unittest.TestCase): # There can be other backups in the folder prev_backups = set(self._get_backups()) parts[0].freeze() - sleep(1) backups = set(self._get_backups()) self.assertEqual(len(backups), len(prev_backups) + 1) - # Clean created backup - shutil.rmtree(self.BACKUP_DIR + '{0}'.format(list(backups - prev_backups)[0])) def test_fetch(self): # TODO Not tested, as I have no replication set From 70d34b097f4d923384566ee4319b0959b171566c Mon Sep 17 00:00:00 2001 From: Marsel Date: Wed, 16 Aug 2017 17:03:49 +0300 Subject: [PATCH 48/53] Fix python3 compatibility --- src/infi/clickhouse_orm/query.py | 3 +-- tests/test_querysets.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 217fb68..496b2b4 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -190,7 +190,6 @@ class QuerySet(object): """ Iterates over the model instances matching this queryset """ - print self.as_sql() return self._database.select(self.as_sql(), self._model_cls) def __bool__(self): @@ -211,7 +210,7 @@ class QuerySet(object): assert s >= 0, 'negative indexes are not supported' qs = copy(self) qs._limits = (s, 1) - return iter(qs).next() + return next(iter(qs)) else: # Slice assert s.step in (None, 1), 'step is not supported in slices' diff --git a/tests/test_querysets.py b/tests/test_querysets.py index ded1d84..a888a29 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -140,7 +140,7 @@ class QuerySetTestCase(TestCaseWithData): def test_slicing(self): db = Database('system') - numbers = range(100) + numbers = list(range(100)) qs = Numbers.objects_in(db) self.assertEquals(qs[0].number, numbers[0]) self.assertEquals(qs[5].number, numbers[5]) @@ -211,7 +211,7 @@ class AggregateTestCase(TestCaseWithData): def test_aggregate_no_grouping(self): qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()') - print qs.as_sql() + print(qs.as_sql()) self.assertEquals(qs.count(), 1) for row in qs: self.assertAlmostEqual(row.average_height, 1.6923, places=4) @@ -220,14 +220,14 @@ class AggregateTestCase(TestCaseWithData): def test_aggregate_with_filter(self): # When filter comes before aggregate qs = Person.objects_in(self.database).filter(first_name='Warren').aggregate(average_height='avg(height)', count='count()') - print qs.as_sql() + print(qs.as_sql()) self.assertEquals(qs.count(), 1) for row in qs: self.assertAlmostEqual(row.average_height, 1.675, places=4) self.assertEquals(row.count, 2) # When filter comes after aggregate qs = Person.objects_in(self.database).aggregate(average_height='avg(height)', count='count()').filter(first_name='Warren') - print qs.as_sql() + print(qs.as_sql()) self.assertEquals(qs.count(), 1) for row in qs: self.assertAlmostEqual(row.average_height, 1.675, places=4) @@ -235,7 +235,7 @@ class AggregateTestCase(TestCaseWithData): def test_aggregate_with_implicit_grouping(self): qs = Person.objects_in(self.database).aggregate('first_name', average_height='avg(height)', count='count()') - print qs.as_sql() + print(qs.as_sql()) self.assertEquals(qs.count(), 94) total = 0 for row in qs: @@ -246,7 +246,7 @@ class AggregateTestCase(TestCaseWithData): def test_aggregate_with_explicit_grouping(self): qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') - print qs.as_sql() + print(qs.as_sql()) self.assertEquals(qs.count(), 7) total = 0 for row in qs: @@ -256,7 +256,7 @@ class AggregateTestCase(TestCaseWithData): def test_aggregate_with_order_by(self): qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') days = [row.weekday for row in qs.order_by('weekday')] - self.assertEquals(days, range(1, 8)) + self.assertEquals(days, list(range(1, 8))) def test_aggregate_with_indexing(self): qs = Person.objects_in(self.database).aggregate(weekday='toDayOfWeek(birthday)', count='count()').group_by('weekday') @@ -326,4 +326,4 @@ class SampleModel(Model): class Numbers(Model): - number = UInt64Field() \ No newline at end of file + number = UInt64Field() From c4e60ca2a8cb4d7ae87de8b4cd30ab7db5edbba0 Mon Sep 17 00:00:00 2001 From: Marsel Date: Wed, 16 Aug 2017 23:48:18 +0300 Subject: [PATCH 49/53] Include unicode_literals --- src/infi/clickhouse_orm/database.py | 10 ++++++---- src/infi/clickhouse_orm/engines.py | 1 + src/infi/clickhouse_orm/fields.py | 1 + src/infi/clickhouse_orm/models.py | 13 +++++++------ src/infi/clickhouse_orm/query.py | 6 ++++-- src/infi/clickhouse_orm/system_models.py | 1 + src/infi/clickhouse_orm/utils.py | 3 ++- tests/base_test_with_data.py | 2 +- tests/test_alias_fields.py | 3 +-- tests/test_array_fields.py | 16 ++++++++-------- tests/test_buffer.py | 6 ++---- tests/test_database.py | 4 ++-- tests/test_engines.py | 1 + tests/test_enum_fields.py | 1 + tests/test_fixed_string_fields.py | 4 ++-- tests/test_inheritance.py | 2 +- tests/test_materialized_fields.py | 3 +-- tests/test_migrations.py | 7 ++++--- tests/test_models.py | 2 +- tests/test_nullable_fields.py | 1 + tests/test_querysets.py | 2 +- tests/test_readonly.py | 2 +- tests/test_simple_fields.py | 5 +++-- tests/test_system_models.py | 1 + 24 files changed, 54 insertions(+), 43 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index ae73059..5c816d6 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import requests from collections import namedtuple from .models import ModelBase @@ -24,11 +26,11 @@ class DatabaseException(Exception): class Database(object): ''' - Database instances connect to a specific ClickHouse database for running queries, + Database instances connect to a specific ClickHouse database for running queries, inserting data and other operations. ''' - def __init__(self, db_name, db_url='http://localhost:8123/', + def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False, autocreate=True): ''' Initializes a database instance. Unless it's readonly, the database will be @@ -186,7 +188,7 @@ class Database(object): - `conditions`: optional SQL conditions (contents of the WHERE clause). - `settings`: query settings to send as HTTP GET parameters - The result is a namedtuple containing `objects` (list), `number_of_objects`, + The result is a namedtuple containing `objects` (list), `number_of_objects`, `pages_total`, `number` (of the current page), and `page_size`. ''' count = self.count(model_class, conditions) @@ -214,7 +216,7 @@ class Database(object): ''' Executes schema migrations. - - `migrations_package_name` - fully qualified name of the Python package + - `migrations_package_name` - fully qualified name of the Python package containing the migrations. - `up_to` - number of the last migration to apply. ''' diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py index b28fedd..caa05c7 100644 --- a/src/infi/clickhouse_orm/engines.py +++ b/src/infi/clickhouse_orm/engines.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals from .utils import comma_join diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index e6e100a..a57314e 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals from six import string_types, text_type, binary_type import datetime import pytz diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index b7f4fa5..8714447 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals from logging import getLogger from six import with_metaclass @@ -18,7 +19,7 @@ class ModelBase(type): ad_hoc_model_cache = {} def __new__(cls, name, bases, attrs): - new_cls = super(ModelBase, cls).__new__(cls, name, bases, attrs) + new_cls = super(ModelBase, cls).__new__(cls, str(name), bases, attrs) # Collect fields from parent classes base_fields = [] for base in bases: @@ -76,7 +77,7 @@ class ModelBase(type): class Model(with_metaclass(ModelBase)): ''' A base class for ORM models. Each model class represent a ClickHouse table. For example: - + class CPUStats(Model): timestamp = DateTimeField() cpu_id = UInt16Field() @@ -123,7 +124,7 @@ class Model(with_metaclass(ModelBase)): def set_database(self, db): ''' - Sets the `Database` that this model instance belongs to. + Sets the `Database` that this model instance belongs to. This is done automatically when the instance is read from the database or written to it. ''' # This can not be imported globally due to circular import @@ -133,7 +134,7 @@ class Model(with_metaclass(ModelBase)): def get_database(self): ''' - Gets the `Database` that this model instance belongs to. + Gets the `Database` that this model instance belongs to. Returns `None` unless the instance was read from the database or written to it. ''' return self._database @@ -214,7 +215,7 @@ class Model(with_metaclass(ModelBase)): def to_dict(self, include_readonly=True, field_names=None): ''' Returns the instance's column values as a dict. - + - `include_readonly`: if false, returns only fields that can be inserted into database. - `field_names`: an iterable of field names to return (optional) ''' @@ -233,7 +234,7 @@ class Model(with_metaclass(ModelBase)): ''' return QuerySet(cls, database) - + class BufferModel(Model): @classmethod diff --git a/src/infi/clickhouse_orm/query.py b/src/infi/clickhouse_orm/query.py index 496b2b4..c1c1dd0 100644 --- a/src/infi/clickhouse_orm/query.py +++ b/src/infi/clickhouse_orm/query.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import six import pytz from copy import copy @@ -167,6 +168,7 @@ class Q(object): return q +@six.python_2_unicode_compatible class QuerySet(object): """ A queryset is an object that represents a database query using a specific `Model`. @@ -201,7 +203,7 @@ class QuerySet(object): def __nonzero__(self): # Python 2 compatibility return type(self).__bool__(self) - def __unicode__(self): + def __str__(self): return self.as_sql() def __getitem__(self, s): @@ -210,7 +212,7 @@ class QuerySet(object): assert s >= 0, 'negative indexes are not supported' qs = copy(self) qs._limits = (s, 1) - return next(iter(qs)) + return six.next(iter(qs)) else: # Slice assert s.step in (None, 1), 'step is not supported in slices' diff --git a/src/infi/clickhouse_orm/system_models.py b/src/infi/clickhouse_orm/system_models.py index 174b21f..49247d9 100644 --- a/src/infi/clickhouse_orm/system_models.py +++ b/src/infi/clickhouse_orm/system_models.py @@ -2,6 +2,7 @@ This file contains system readonly models that can be got from database https://clickhouse.yandex/reference_en.html#System tables """ +from __future__ import unicode_literals from six import string_types from .database import Database diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 5a8a17a..5f788f3 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals from six import string_types, binary_type, text_type, PY3 import codecs import re @@ -43,7 +44,7 @@ def parse_tsv(line): line = line.decode() if line and line[-1] == '\n': line = line[:-1] - return [unescape(value) for value in line.split('\t')] + return [unescape(value) for value in line.split(b'\t')] def parse_array(array_string): diff --git a/tests/base_test_with_data.py b/tests/base_test_with_data.py index d50d311..352d3d3 100644 --- a/tests/base_test_with_data.py +++ b/tests/base_test_with_data.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database diff --git a/tests/test_alias_fields.py b/tests/test_alias_fields.py index e8d896f..1a17c73 100644 --- a/tests/test_alias_fields.py +++ b/tests/test_alias_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from datetime import date @@ -65,5 +66,3 @@ class ModelWithAliasFields(Model): alias_date = DateField(alias='date_field') engine = MergeTree('date_field', ('date_field',)) - - diff --git a/tests/test_array_fields.py b/tests/test_array_fields.py index ba4e94b..5676f04 100644 --- a/tests/test_array_fields.py +++ b/tests/test_array_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from datetime import date @@ -18,8 +19,8 @@ class ArrayFieldsTest(unittest.TestCase): def test_insert_and_select(self): instance = ModelWithArrays( - date_field='2016-08-30', - arr_str=['goodbye,', 'cruel', 'world', 'special chars: ,"\\\'` \n\t\\[]'], + date_field='2016-08-30', + arr_str=['goodbye,', 'cruel', 'world', 'special chars: ,"\\\'` \n\t\\[]'], arr_date=['2010-01-01'] ) self.database.insert([instance]) @@ -52,11 +53,11 @@ class ArrayFieldsTest(unittest.TestCase): self.assertEquals(parse_array("[1, 2, 395, -44]"), ["1", "2", "395", "-44"]) self.assertEquals(parse_array("['big','mouse','','!']"), ["big", "mouse", "", "!"]) self.assertEquals(parse_array(unescape("['\\r\\n\\0\\t\\b']")), ["\r\n\0\t\b"]) - for s in ("", - "[", - "]", - "[1, 2", - "3, 4]", + for s in ("", + "[", + "]", + "[1, 2", + "3, 4]", "['aaa', 'aaa]"): with self.assertRaises(ValueError): parse_array(s) @@ -70,4 +71,3 @@ class ModelWithArrays(Model): arr_date = ArrayField(DateField()) engine = MergeTree('date_field', ('date_field',)) - diff --git a/tests/test_buffer.py b/tests/test_buffer.py index 91bf656..744b6cf 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.models import BufferModel @@ -23,7 +23,5 @@ class BufferTestCase(TestCaseWithData): class PersonBuffer(BufferModel, Person): - - engine = Buffer(Person) - + engine = Buffer(Person) diff --git a/tests/test_database.py b/tests/test_database.py index fddf383..2eed732 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database, DatabaseException @@ -99,7 +99,7 @@ class DatabaseTestCase(TestCaseWithData): page_a = self.database.paginate(Person, 'first_name, last_name', -1, page_size) page_b = self.database.paginate(Person, 'first_name, last_name', page_a.pages_total, page_size) self.assertEquals(page_a[1:], page_b[1:]) - self.assertEquals([obj.to_tsv() for obj in page_a.objects], + self.assertEquals([obj.to_tsv() for obj in page_a.objects], [obj.to_tsv() for obj in page_b.objects]) def test_pagination_invalid_page(self): diff --git a/tests/test_engines.py b/tests/test_engines.py index 3639960..ddc3a85 100644 --- a/tests/test_engines.py +++ b/tests/test_engines.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database, DatabaseException diff --git a/tests/test_enum_fields.py b/tests/test_enum_fields.py index 78df6d3..1364b95 100644 --- a/tests/test_enum_fields.py +++ b/tests/test_enum_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database diff --git a/tests/test_fixed_string_fields.py b/tests/test_fixed_string_fields.py index f9490af..05c544d 100644 --- a/tests/test_fixed_string_fields.py +++ b/tests/test_fixed_string_fields.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database @@ -37,7 +37,7 @@ class FixedStringFieldsTest(unittest.TestCase): query = 'SELECT * from $table ORDER BY date_field' results = list(self.database.select(query, FixedStringModel)) self._assert_sample_data(results) - + def test_ad_hoc_model(self): self._insert_sample_data() query = 'SELECT * from $db.fixedstringmodel ORDER BY date_field' diff --git a/tests/test_inheritance.py b/tests/test_inheritance.py index 08bc084..f209995 100644 --- a/tests/test_inheritance.py +++ b/tests/test_inheritance.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest import datetime import pytz @@ -49,4 +50,3 @@ class Model1(ParentModel): class Model2(ParentModel): float_field = Float32Field() - diff --git a/tests/test_materialized_fields.py b/tests/test_materialized_fields.py index f877116..fccd722 100644 --- a/tests/test_materialized_fields.py +++ b/tests/test_materialized_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from datetime import date @@ -65,5 +66,3 @@ class ModelWithMaterializedFields(Model): mat_date = DateField(materialized=u'toDate(date_time_field)') engine = MergeTree('mat_date', ('mat_date',)) - - diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 4541a6b..3478f9f 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.database import Database @@ -54,11 +55,11 @@ class MigrationsTestCase(unittest.TestCase): # Altering enum fields self.database.migrate('tests.sample_migrations', 6) self.assertTrue(self.tableExists(EnumModel1)) - self.assertEquals(self.getTableFields(EnumModel1), + self.assertEquals(self.getTableFields(EnumModel1), [('date', 'Date'), ('f1', "Enum8('dog' = 1, 'cat' = 2, 'cow' = 3)")]) self.database.migrate('tests.sample_migrations', 7) self.assertTrue(self.tableExists(EnumModel1)) - self.assertEquals(self.getTableFields(EnumModel2), + self.assertEquals(self.getTableFields(EnumModel2), [('date', 'Date'), ('f1', "Enum16('dog' = 1, 'cat' = 2, 'horse' = 3, 'pig' = 4)")]) self.database.migrate('tests.sample_migrations', 8) self.assertTrue(self.tableExists(MaterializedModel)) @@ -157,4 +158,4 @@ class AliasModel(Model): @classmethod def table_name(cls): - return 'alias_date' \ No newline at end of file + return 'alias_date' diff --git a/tests/test_models.py b/tests/test_models.py index b52a2c6..3228a5b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest import datetime import pytz @@ -89,4 +90,3 @@ class SimpleModel(Model): alias_field = Float32Field(alias='float_field') engine = MergeTree('date_field', ('int_field', 'date_field')) - diff --git a/tests/test_nullable_fields.py b/tests/test_nullable_fields.py index 0c485a8..c062ca3 100644 --- a/tests/test_nullable_fields.py +++ b/tests/test_nullable_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest import pytz diff --git a/tests/test_querysets.py b/tests/test_querysets.py index a888a29..ad834bb 100644 --- a/tests/test_querysets.py +++ b/tests/test_querysets.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals, print_function import unittest from infi.clickhouse_orm.database import Database diff --git a/tests/test_readonly.py b/tests/test_readonly.py index a192b62..ae3d54f 100644 --- a/tests/test_readonly.py +++ b/tests/test_readonly.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +from __future__ import unicode_literals import unittest import six diff --git a/tests/test_simple_fields.py b/tests/test_simple_fields.py index 645d9ed..7720dc1 100644 --- a/tests/test_simple_fields.py +++ b/tests/test_simple_fields.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from infi.clickhouse_orm.fields import * from datetime import date, datetime @@ -10,7 +11,7 @@ class SimpleFieldsTest(unittest.TestCase): f = DateTimeField() epoch = datetime(1970, 1, 1, tzinfo=pytz.utc) # Valid values - for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch, + for value in (date(1970, 1, 1), datetime(1970, 1, 1), epoch, epoch.astimezone(pytz.timezone('US/Eastern')), epoch.astimezone(pytz.timezone('Asia/Jerusalem')), '1970-01-01 00:00:00', '1970-01-17 00:00:17', '0000-00-00 00:00:00', 0): dt = f.to_python(value, pytz.utc) @@ -60,4 +61,4 @@ class SimpleFieldsTest(unittest.TestCase): # Range check for value in (-1, 1000): with self.assertRaises(ValueError): - f.validate(value) \ No newline at end of file + f.validate(value) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index d932bd6..1a3b49a 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import unittest from datetime import date import os From 88b1cea98c2b97c88f263fbd5a621d181a9a8109 Mon Sep 17 00:00:00 2001 From: Marsel Date: Thu, 17 Aug 2017 00:00:47 +0300 Subject: [PATCH 50/53] Fix TypeError in Py3 --- src/infi/clickhouse_orm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py index 5f788f3..e3eb4bb 100644 --- a/src/infi/clickhouse_orm/utils.py +++ b/src/infi/clickhouse_orm/utils.py @@ -44,7 +44,7 @@ def parse_tsv(line): line = line.decode() if line and line[-1] == '\n': line = line[:-1] - return [unescape(value) for value in line.split(b'\t')] + return [unescape(value) for value in line.split(str('\t'))] def parse_array(array_string): From 9891ccffafc865644b65b7adfcc83b86fa9179be Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 09:36:26 +0300 Subject: [PATCH 51/53] test_freeze fails, possibly due to race condition --- tests/test_system_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index 1a3b49a..feff71a 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -63,6 +63,7 @@ class SystemPartTest(unittest.TestCase): # There can be other backups in the folder prev_backups = set(self._get_backups()) parts[0].freeze() + sleep(1) backups = set(self._get_backups()) self.assertEqual(len(backups), len(prev_backups) + 1) From 6bea4cfc681b33a6a3f6f308512967ee3c505eec Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 09:46:00 +0300 Subject: [PATCH 52/53] fix test_freeze --- tests/test_system_models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_system_models.py b/tests/test_system_models.py index feff71a..1a3b49a 100644 --- a/tests/test_system_models.py +++ b/tests/test_system_models.py @@ -63,7 +63,6 @@ class SystemPartTest(unittest.TestCase): # There can be other backups in the folder prev_backups = set(self._get_backups()) parts[0].freeze() - sleep(1) backups = set(self._get_backups()) self.assertEqual(len(backups), len(prev_backups) + 1) From bcd3c2ae45dc9c754dced78599ae0e46bf27b459 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 20 Aug 2017 10:36:21 +0300 Subject: [PATCH 53/53] Fix python3 compatibility (TvoroG) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 470a9a2..f1f50a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ Change Log Unreleased ---------- +- Fix python3 compatibility (TvoroG) - Nullable arrays not supported in latest ClickHouse version - system.parts table no longer includes "replicated" column in latest ClickHouse version