From c6c9f13e5114c5fd31f1b119d8f3230f7c1a9752 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 30 Jun 2016 11:36:54 +0300 Subject: [PATCH 1/6] Support dashes and other special characters in database names and table names --- src/infi/clickhouse_orm/database.py | 8 ++++---- src/infi/clickhouse_orm/models.py | 4 ++-- tests/test_database.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index b8c2e9f..2db3294 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -14,7 +14,7 @@ class Database(object): self.db_url = db_url self.username = username self.password = password - self._send('CREATE DATABASE IF NOT EXISTS ' + db_name) + self._send('CREATE DATABASE IF NOT EXISTS `%s`' % db_name) def create_table(self, model_class): # TODO check that model has an engine @@ -24,7 +24,7 @@ class Database(object): self._send(model_class.drop_table_sql(self.db_name)) def drop_database(self): - self._send('DROP DATABASE ' + self.db_name) + self._send('DROP DATABASE `%s`' % self.db_name) def insert(self, model_instances): i = iter(model_instances) @@ -34,7 +34,7 @@ class Database(object): return # model_instances is empty model_class = first_instance.__class__ def gen(): - yield 'INSERT INTO %s.%s FORMAT TabSeparated\n' % (self.db_name, model_class.table_name()) + yield 'INSERT INTO `%s`.`%s` FORMAT TabSeparated\n' % (self.db_name, model_class.table_name()) yield first_instance.to_tsv() yield '\n' for instance in i: @@ -43,7 +43,7 @@ class Database(object): self._send(gen()) def count(self, model_class, conditions=None): - query = 'SELECT count() FROM %s.%s' % (self.db_name, model_class.table_name()) + query = 'SELECT count() FROM `%s`.`%s`' % (self.db_name, model_class.table_name()) if conditions: query += ' WHERE ' + conditions r = self._send(query) diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py index ece5557..e71a8e9 100644 --- a/src/infi/clickhouse_orm/models.py +++ b/src/infi/clickhouse_orm/models.py @@ -92,7 +92,7 @@ class Model(object): ''' Returns the SQL command for creating a table for this model. ''' - parts = ['CREATE TABLE IF NOT EXISTS %s.%s (' % (db_name, cls.table_name())] + parts = ['CREATE TABLE IF NOT EXISTS `%s`.`%s` (' % (db_name, cls.table_name())] cols = [] for name, field in cls._fields: default = field.get_db_prep_value(field.default) @@ -107,7 +107,7 @@ class Model(object): ''' Returns the SQL command for deleting this model's table. ''' - return 'DROP TABLE IF EXISTS %s.%s' % (db_name, cls.table_name()) + return 'DROP TABLE IF EXISTS `%s`.`%s`' % (db_name, cls.table_name()) @classmethod def from_tsv(cls, line, field_names=None): diff --git a/tests/test_database.py b/tests/test_database.py index fa6b7cc..46cf3b2 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -9,7 +9,7 @@ from infi.clickhouse_orm.engines import * class DatabaseTestCase(unittest.TestCase): def setUp(self): - self.database = Database('test_db') + self.database = Database('test-db') self.database.create_table(Person) def tearDown(self): @@ -41,7 +41,7 @@ class DatabaseTestCase(unittest.TestCase): def test_select(self): self._insert_and_check(self._sample_data(), len(data)) - query = "SELECT * FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name" results = list(self.database.select(query, Person)) self.assertEquals(len(results), 2) self.assertEquals(results[0].last_name, 'Durham') @@ -51,7 +51,7 @@ class DatabaseTestCase(unittest.TestCase): def test_select_partial_fields(self): self._insert_and_check(self._sample_data(), len(data)) - query = "SELECT first_name, last_name FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + query = "SELECT first_name, last_name FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name" results = list(self.database.select(query, Person)) self.assertEquals(len(results), 2) self.assertEquals(results[0].last_name, 'Durham') @@ -61,7 +61,7 @@ class DatabaseTestCase(unittest.TestCase): def test_select_ad_hoc_model(self): self._insert_and_check(self._sample_data(), len(data)) - query = "SELECT * FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + query = "SELECT * FROM `test-db`.person WHERE first_name = 'Whitney' ORDER BY last_name" results = list(self.database.select(query)) self.assertEquals(len(results), 2) self.assertEquals(results[0].__class__.__name__, 'AdHocModel') From 9489115d6eee5ab138fc6a4208fdb47cb85dfe55 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 30 Jun 2016 12:11:47 +0300 Subject: [PATCH 2/6] Fix bug in datetime parsing --- src/infi/clickhouse_orm/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 1dfe3ee..20052f9 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -97,7 +97,7 @@ class DateTimeField(Field): if isinstance(value, int): return datetime.datetime.fromtimestamp(value, pytz.utc) if isinstance(value, basestring): - return datetime.datetime.strptime(value, '%Y-%m-%d %H-%M-%S') + return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) def get_db_prep_value(self, value): From 4efdcda97b11404f45e42788732445af260c915c Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 30 Jun 2016 16:12:02 +0300 Subject: [PATCH 3/6] Fix bug in date parsing --- src/infi/clickhouse_orm/fields.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 20052f9..2829910 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -73,7 +73,8 @@ class DateField(Field): if isinstance(value, int): return DateField.class_default + datetime.timedelta(days=value) if isinstance(value, basestring): - # TODO parse '0000-00-00' + if value == '0000-00-00': + return DateField.min_value return datetime.datetime.strptime(value, '%Y-%m-%d').date() raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) From a3bd21213331fa45fd3f7f9222273a2d01424f7e Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Thu, 30 Jun 2016 16:17:53 +0300 Subject: [PATCH 4/6] Simplify conversion of int and float fields --- src/infi/clickhouse_orm/fields.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py index 2829910..44e833a 100644 --- a/src/infi/clickhouse_orm/fields.py +++ b/src/infi/clickhouse_orm/fields.py @@ -108,11 +108,10 @@ class DateTimeField(Field): class BaseIntField(Field): def to_python(self, value): - if isinstance(value, int): - return value - if isinstance(value, basestring): + try: return int(value) - raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + except: + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) def validate(self, value): self._range_check(value, self.min_value, self.max_value) @@ -177,11 +176,10 @@ class Int64Field(BaseIntField): class BaseFloatField(Field): def to_python(self, value): - if isinstance(value, float): - return value - if isinstance(value, basestring) or isinstance(value, int): + try: return float(value) - raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + except: + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) class Float32Field(BaseFloatField): From b4f4af8c75866e49efc4ab6b021f8a3f78213a8c Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Sun, 3 Jul 2016 15:39:08 +0300 Subject: [PATCH 5/6] support pagination --- README.rst | 28 +++++++++++++++++++++++++++- src/infi/clickhouse_orm/database.py | 22 ++++++++++++++++++++++ tests/test_database.py | 21 +++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 09f5e0a..13a5519 100644 --- a/README.rst +++ b/README.rst @@ -113,6 +113,32 @@ The ``Database`` class also supports counting records easily:: >>> db.count(Person, conditions="height > 1.90") 6 +Pagination +---------- + +It is possible to paginate through model instances:: + + >>> order_by = 'first_name, last_name' + >>> page = db.paginate(Person, order_by, page_num=1, page_size=100) + >>> print page.number_of_objects + 2507 + >>> print page.pages_total + 251 + >>> for person in page.objects: + >>> # do something + +The ``paginate`` method returns a ``namedtuple`` containing the following fields: + +- ``objects`` - the list of objects in this page +- ``number_of_objects`` - total number of objects in all pages +- ``pages_total`` - total number of pages +- ``number`` - the page number +- ``page_size`` - the number of objects per page + +You can optionally pass conditions to the query:: + + >>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90') + Field Types ----------- @@ -178,4 +204,4 @@ After cloning the project, run the following commands:: To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run:: - bin/nosetests + bin/nosetests \ No newline at end of file diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py index 2db3294..fd85044 100644 --- a/src/infi/clickhouse_orm/database.py +++ b/src/infi/clickhouse_orm/database.py @@ -1,6 +1,11 @@ import requests +from collections import namedtuple from models import ModelBase from utils import escape, parse_tsv +from math import ceil + + +Page = namedtuple('Page', 'objects number_of_objects pages_total number page_size') class DatabaseException(Exception): @@ -59,6 +64,23 @@ class Database(object): for line in lines: yield model_class.from_tsv(line, field_names) + def paginate(self, model_class, order_by, page_num=1, page_size=100, conditions=None, settings=None): + count = self.count(model_class, conditions) + pages_total = int(ceil(count / float(page_size))) + offset = (page_num - 1) * page_size + query = 'SELECT * FROM `%s`.`%s`' % (self.db_name, model_class.table_name()) + if conditions: + query += ' WHERE ' + conditions + query += ' ORDER BY %s' % order_by + query += ' LIMIT %d, %d' % (offset, page_size) + return Page( + objects=list(self.select(query, model_class, settings)), + number_of_objects=count, + pages_total=pages_total, + number=page_num, + page_size=page_size + ) + def _send(self, data, settings=None): params = self._build_params(settings) r = requests.post(self.db_url, params=params, data=data, stream=True) diff --git a/tests/test_database.py b/tests/test_database.py index 46cf3b2..6950753 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -5,6 +5,9 @@ from infi.clickhouse_orm.models import Model from infi.clickhouse_orm.fields import * from infi.clickhouse_orm.engines import * +import logging +logging.getLogger("requests").setLevel(logging.WARNING) + class DatabaseTestCase(unittest.TestCase): @@ -70,6 +73,24 @@ class DatabaseTestCase(unittest.TestCase): self.assertEquals(results[1].last_name, 'Scott') self.assertEquals(results[1].height, 1.70) + def test_pagination(self): + self._insert_and_check(self._sample_data(), len(data)) + # Try different page sizes + for page_size in (1, 2, 7, 10, 30, 100, 150): + # Iterate over pages and collect all intances + page_num = 1 + instances = set() + while True: + page = self.database.paginate(Person, 'first_name, last_name', page_num, page_size) + self.assertEquals(page.number_of_objects, len(data)) + self.assertGreater(page.pages_total, 0) + [instances.add(obj.to_tsv()) for obj in page.objects] + if page.pages_total == page_num: + break + page_num += 1 + # Verify that all instances were returned + self.assertEquals(len(instances), len(data)) + def _sample_data(self): for entry in data: yield Person(**entry) From 255df6091ab54534575c6d683386e97fbc05f918 Mon Sep 17 00:00:00 2001 From: Itai Shirav Date: Mon, 4 Jul 2016 11:42:30 +0300 Subject: [PATCH 6/6] added README note --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 13a5519..7070323 100644 --- a/README.rst +++ b/README.rst @@ -139,6 +139,9 @@ You can optionally pass conditions to the query:: >>> page = db.paginate(Person, order_by, page_num=1, page_size=100, conditions='height > 1.90') +Note that ``order_by`` must be chosen so that the ordering is unique, otherwise there might be +inconsistencies in the pagination (such as an instance that appears on two different pages). + Field Types -----------