diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c1eacdb --- /dev/null +++ b/.gitignore @@ -0,0 +1,59 @@ + +# generic +*.tmp +*.swp +*.pyc +*.pyo +*.bak +*.egg-info +*.EGG-INFO +.coverage +.settings + +# eclipse +.project +.pydevproject + +# setuptools +bin/ +build/ +dist/ +setup.py + +# buildout +buildout.in.cfg +.installed*.cfg +.cache +.cache/ +eggs +eggs/ +develop-eggs/ +parts +parts/ +build/ +dist/ +src/infi/hello/__version__.py +.achievements + +# scm +.bzr/ +.svn/ +.hg/ + +# msi-related +parts/wix +parts/product* +parts/*.wixpdb +parts/*.msi + + +src/infi/projector/__version__.py +.codeintel +*sublime* + +tmp* +!tmp*py +buildout.in + +src/infi/clickhouse_orm/__version__.py +bootstrap.py diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..8c8c107 --- /dev/null +++ b/README.rst @@ -0,0 +1,211 @@ +Overview +======== + +This project is simple ORM for working with the `ClickHouse database `_. +It allows you to define model classes whose instances can be written to the database and read from it. + +Installation +============ + +To install infi.clickhouse_orm:: + + pip install infi.clickhouse_orm + +Usage +===== + +Defining Models +--------------- + +Models are defined in a way reminiscent of Django's ORM: + +.. code:: python + + from infi.clickhouse_orm import models, fields, engines + + class Person(models.Model): + + first_name = fields.StringField() + last_name = fields.StringField() + birthday = fields.DateField() + height = fields.Float32Field() + + engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + +It is possible to provide a default value for a field, instead of its "natural" default (empty string for string fields, zero for numeric fields etc.). + +See below for the supported field types and table engines. + +Using Models +------------ + +Once you have a model, you can create model instances: + +.. code:: python + + >>> dan = Person(first_name='Dan', last_name='Schwartz') + >>> suzy = Person(first_name='Suzy', last_name='Jones') + >>> dan.first_name + u'Dan' + +When values are assigned to model fields, they are immediately converted to their Pythonic data type. +In case the value is invalid, a ``ValueError`` is raised: + +.. code:: python + + >>> suzy.birthday = '1980-01-17' + >>> suzy.birthday + datetime.date(1980, 1, 17) + >>> suzy.birthday = 0.5 + ValueError: Invalid value for DateField - 0.5 + >>> suzy.birthday = '1922-05-31' + ValueError: DateField out of range - 1922-05-31 is not between 1970-01-01 and 2038-01-19 + +Inserting to the Database +------------------------- + +To write your instances to ClickHouse, you need a ``Database`` instance: + +.. code:: python + + from infi.clickhouse_orm.database import Database + + db = Database('my_test_db') + +This automatically connects to http://localhost:8123 and creates a database called my_test_db, unless it already exists. +If necessary, you can specify a different database URL and optional credentials: + +.. code:: python + + db = Database('my_test_db', db_url='http://192.168.1.1:8050', username='scott', password='tiger') + +Using the ``Database`` instance you can create a table for your model, and insert instances to it: + +.. code:: python + + db.create_table(Person) + db.insert([dan, suzy]) + +The ``insert`` method can take any iterable of model instances, but they all must belong to the same model class. + +Reading from the Database +------------------------- + +Loading model instances from the database is simple: + +.. code:: python + + for person in db.select("SELECT * FROM my_test_db.person", model_class=Person): + print person.first_name, person.last_name + +Do not include a ``FORMAT`` clause in the query, since the ORM automatically sets the format to ``TabSeparatedWithNamesAndTypes``. + +It is possible to select only a subset of the columns, and the rest will receive their default values: + +.. code:: python + + for person in db.select("SELECT first_name FROM my_test_db.person WHERE last_name='Smith'", model_class=Person): + print person.first_name + +Ad-Hoc Models +************* + +Specifying a model class is not required. In case you do not provide a model class, an ad-hoc class will +be defined based on the column names and types returned by the query: + +.. code:: python + + for row in db.select("SELECT max(height) as max_height FROM my_test_db.person"): + print row.max_height + +This is a very convenient feature that saves you the need to define a model for each query, while still letting +you work with Pythonic column values and an elegant syntax. + +Counting +-------- + +The ``Database`` class also supports counting records easily: + +.. code:: python + + >>> db.count(Person) + 117 + >>> db.count(Person, conditions="height > 1.90") + 6 + +Field Types +----------- + +Currently the following field types are supported: + +============= ======== ================= =================================================== +Class DB Type Pythonic Type Comments +============= ======== ================= =================================================== +StringField String unicode Encoded as UTF-8 when written to ClickHouse +DateField Date datetime.date Range 1970-01-01 to 2038-01-19 +DateTimeField DateTime datetime.datetime Minimal value is 1970-01-01 00:00:00; Always in UTC +Int8Field Int8 int Range -128 to 127 +Int16Field Int16 int Range -32768 to 32767 +Int32Field Int32 int Range -2147483648 to 2147483647 +Int64Field Int64 int/long Range -9223372036854775808 to 9223372036854775807 +UInt8Field UInt8 int Range 0 to 255 +UInt16Field UInt16 int Range 0 to 65535 +UInt32Field UInt32 int Range 0 to 4294967295 +UInt64Field UInt64 int/long Range 0 to 18446744073709551615 +Float32Field Float32 float +Float64Field Float64 float +============= ======== ================= =================================================== + +Table Engines +------------- + +Each model must have an engine instance, used when creating the table in ClickHouse. + +To define a ``MergeTree`` engine, supply the date column name and the names (or expressions) for the key columns: + +.. code:: python + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate')) + +You may also provide a sampling expression: + +.. code:: python + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), sampling_expr='intHash32(UserID)') + +A ``CollapsingMergeTree`` engine is defined in a similar manner, but requires also a sign column: + +.. code:: python + + engine = engines.CollapsingMergeTree('EventDate', ('CounterID', 'EventDate'), 'Sign') + +For a ``SummingMergeTree`` you can optionally specify the summing columns: + +.. code:: python + + engine = engines.SummingMergeTree('EventDate', ('OrderID', 'EventDate', 'BannerID'), + summing_cols=('Shows', 'Clicks', 'Cost')) + +Data Replication +**************** + +Any of the above engines can be converted to a replicated engine (e.g. ``ReplicatedMergeTree``) by adding two parameters, ``replica_table_path`` and ``replica_name``: + +.. code:: python + + engine = engines.MergeTree('EventDate', ('CounterID', 'EventDate'), + replica_table_path='/clickhouse/tables/{layer}-{shard}/hits', + replica_name='{replica}') + +Development +=========== + +After cloning the project, run the following commands:: + + easy_install -U infi.projector + cd infi.clickhouse_orm + projector devenv build + +To run the tests, ensure that the ClickHouse server is running on http://localhost:8123/ (this is the default), and run:: + + bin/nosetests diff --git a/buildout.cfg b/buildout.cfg new file mode 100644 index 0000000..8238f89 --- /dev/null +++ b/buildout.cfg @@ -0,0 +1,60 @@ +[buildout] +prefer-final = false +newest = false +download-cache = .cache +develop = . +parts = + +[project] +name = infi.clickhouse_orm +company = Infinidat +namespace_packages = ['infi'] +install_requires = [ + 'pytz', + 'requests', + 'setuptools' + ] +version_file = src/infi/clickhouse_orm/__version__.py +description = A Python library for working with the ClickHouse database +long_description = A Python library for working with the ClickHouse database +console_scripts = [] +gui_scripts = [] +package_data = [] +upgrade_code = {58530fba-3932-11e6-a20e-7071bc32067f} +product_name = infi.clickhouse_orm +post_install_script_name = None +pre_uninstall_script_name = None + +[isolated-python] +recipe = infi.recipe.python +version = v2.7.8.4 + +[setup.py] +recipe = infi.recipe.template.version +input = setup.in +output = setup.py + +[__version__.py] +recipe = infi.recipe.template.version +output = ${project:version_file} + +[development-scripts] +dependent-scripts = true +recipe = infi.recipe.console_scripts +eggs = ${project:name} + ipython + nose + infi.unittest + infi.traceback + zc.buildout +scripts = ipython + nosetests +interpreter = python + +[pack] +recipe = infi.recipe.application_packager + +[sublime] +recipe = corneti.recipes.codeintel +eggs = ${development-scripts:eggs} + diff --git a/setup.in b/setup.in new file mode 100644 index 0000000..2f3cef0 --- /dev/null +++ b/setup.in @@ -0,0 +1,48 @@ + +SETUP_INFO = dict( + name = '${project:name}', + version = '${infi.recipe.template.version:version}', + author = '${infi.recipe.template.version:author}', + author_email = '${infi.recipe.template.version:author_email}', + + url = ${infi.recipe.template.version:homepage}, + license = 'PSF', + description = """${project:description}""", + long_description = """${project:long_description}""", + + # http://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers = [ + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: Python Software Foundation License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + + install_requires = ${project:install_requires}, + namespace_packages = ${project:namespace_packages}, + + package_dir = {'': 'src'}, + package_data = {'': ${project:package_data}}, + include_package_data = True, + zip_safe = False, + + entry_points = dict( + console_scripts = ${project:console_scripts}, + gui_scripts = ${project:gui_scripts}, + ), +) + +if SETUP_INFO['url'] is None: + _ = SETUP_INFO.pop('url') + +def setup(): + from setuptools import setup as _setup + from setuptools import find_packages + SETUP_INFO['packages'] = find_packages('src') + _setup(**SETUP_INFO) + +if __name__ == '__main__': + setup() + diff --git a/src/infi/__init__.py b/src/infi/__init__.py new file mode 100644 index 0000000..5284146 --- /dev/null +++ b/src/infi/__init__.py @@ -0,0 +1 @@ +__import__("pkg_resources").declare_namespace(__name__) diff --git a/src/infi/clickhouse_orm/__init__.py b/src/infi/clickhouse_orm/__init__.py new file mode 100644 index 0000000..5284146 --- /dev/null +++ b/src/infi/clickhouse_orm/__init__.py @@ -0,0 +1 @@ +__import__("pkg_resources").declare_namespace(__name__) diff --git a/src/infi/clickhouse_orm/database.py b/src/infi/clickhouse_orm/database.py new file mode 100644 index 0000000..b8c2e9f --- /dev/null +++ b/src/infi/clickhouse_orm/database.py @@ -0,0 +1,75 @@ +import requests +from models import ModelBase +from utils import escape, parse_tsv + + +class DatabaseException(Exception): + pass + + +class Database(object): + + def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None): + self.db_name = db_name + self.db_url = db_url + self.username = username + self.password = password + self._send('CREATE DATABASE IF NOT EXISTS ' + db_name) + + def create_table(self, model_class): + # TODO check that model has an engine + self._send(model_class.create_table_sql(self.db_name)) + + def drop_table(self, model_class): + self._send(model_class.drop_table_sql(self.db_name)) + + def drop_database(self): + self._send('DROP DATABASE ' + self.db_name) + + def insert(self, model_instances): + i = iter(model_instances) + try: + first_instance = i.next() + except StopIteration: + return # model_instances is empty + model_class = first_instance.__class__ + def gen(): + yield 'INSERT INTO %s.%s FORMAT TabSeparated\n' % (self.db_name, model_class.table_name()) + yield first_instance.to_tsv() + yield '\n' + for instance in i: + yield instance.to_tsv() + yield '\n' + self._send(gen()) + + def count(self, model_class, conditions=None): + query = 'SELECT count() FROM %s.%s' % (self.db_name, model_class.table_name()) + if conditions: + query += ' WHERE ' + conditions + r = self._send(query) + return int(r.text) if r.text else 0 + + def select(self, query, model_class=None, settings=None): + query += ' FORMAT TabSeparatedWithNamesAndTypes' + r = self._send(query, settings) + lines = r.iter_lines() + field_names = parse_tsv(next(lines)) + field_types = parse_tsv(next(lines)) + model_class = model_class or ModelBase.create_ad_hoc_model(zip(field_names, field_types)) + for line in lines: + yield model_class.from_tsv(line, field_names) + + def _send(self, data, settings=None): + params = self._build_params(settings) + r = requests.post(self.db_url, params=params, data=data, stream=True) + if r.status_code != 200: + raise DatabaseException(r.text) + return r + + def _build_params(self, settings): + params = dict(settings or {}) + if self.username: + params['username'] = username + if self.password: + params['password'] = password + return params diff --git a/src/infi/clickhouse_orm/engines.py b/src/infi/clickhouse_orm/engines.py new file mode 100644 index 0000000..3f4870b --- /dev/null +++ b/src/infi/clickhouse_orm/engines.py @@ -0,0 +1,64 @@ + +class Engine(object): + + def create_table_sql(self): + raise NotImplementedError() + + +class MergeTree(Engine): + + def __init__(self, date_col, key_cols, sampling_expr=None, + index_granularity=8192, replica_table_path=None, replica_name=None): + self.date_col = date_col + self.key_cols = key_cols + self.sampling_expr = sampling_expr + self.index_granularity = index_granularity + self.replica_table_path = replica_table_path + self.replica_name = replica_name + # TODO verify that both replica fields are either present or missing + + def create_table_sql(self): + name = self.__class__.__name__ + if self.replica_name: + name = 'Replicated' + name + params = self._build_sql_params() + return '%s(%s)' % (name, ', '.join(params)) + + def _build_sql_params(self): + params = [] + if self.replica_name: + params += ["'%s'" % self.replica_table_path, "'%s'" % self.replica_name] + params.append(self.date_col) + if self.sampling_expr: + params.append(self.sampling_expr) + params.append('(%s)' % ', '.join(self.key_cols)) + params.append(str(self.index_granularity)) + return params + + +class CollapsingMergeTree(MergeTree): + + def __init__(self, date_col, key_cols, sign_col, sampling_expr=None, + index_granularity=8192, replica_table_path=None, replica_name=None): + super(CollapsingMergeTree, self).__init__(date_col, key_cols, sampling_expr, index_granularity, replica_table_path, replica_name) + self.sign_col = sign_col + + def _build_sql_params(self): + params = super(CollapsingMergeTree, self)._build_sql_params() + params.append(self.sign_col) + return params + + +class SummingMergeTree(MergeTree): + + def __init__(self, date_col, key_cols, summing_cols=None, sampling_expr=None, + index_granularity=8192, replica_table_path=None, replica_name=None): + super(SummingMergeTree, self).__init__(date_col, key_cols, sampling_expr, index_granularity, replica_table_path, replica_name) + self.summing_cols = summing_cols + + def _build_sql_params(self): + params = super(SummingMergeTree, self)._build_sql_params() + if self.summing_cols: + params.append('(%s)' % ', '.join(self.summing_cols)) + return params + diff --git a/src/infi/clickhouse_orm/fields.py b/src/infi/clickhouse_orm/fields.py new file mode 100644 index 0000000..1dfe3ee --- /dev/null +++ b/src/infi/clickhouse_orm/fields.py @@ -0,0 +1,194 @@ +import datetime +import pytz +import time + + +class Field(object): + + creation_counter = 0 + class_default = 0 + db_type = None + + def __init__(self, default=None): + self.creation_counter = Field.creation_counter + Field.creation_counter += 1 + self.default = default or self.class_default + + def to_python(self, value): + ''' + Converts the input value into the expected Python data type, raising ValueError if the + data can't be converted. Returns the converted value. Subclasses should override this. + ''' + return value + + def validate(self, value): + ''' + Called after to_python to validate that the value is suitable for the field's database type. + Subclasses should override this. + ''' + pass + + def _range_check(self, value, min_value, max_value): + ''' + Utility method to check that the given value is between min_value and max_value. + ''' + if value < min_value or value > max_value: + raise ValueError('%s out of range - %s is not between %s and %s' % (self.__class__.__name__, value, min_value, max_value)) + + def get_db_prep_value(self, value): + ''' + Returns the field's value prepared for interacting with the database. + ''' + return value + + +class StringField(Field): + + class_default = '' + db_type = 'String' + + def to_python(self, value): + if isinstance(value, unicode): + return value + if isinstance(value, str): + return value.decode('UTF-8') + raise ValueError('Invalid value for %s: %r' % (self.__class__.__name__, value)) + + def get_db_prep_value(self, value): + if isinstance(value, unicode): + return value.encode('UTF-8') + return value + + +class DateField(Field): + + min_value = datetime.date(1970, 1, 1) + max_value = datetime.date(2038, 1, 19) + class_default = min_value + db_type = 'Date' + + def to_python(self, value): + if isinstance(value, datetime.date): + return value + if isinstance(value, int): + return DateField.class_default + datetime.timedelta(days=value) + if isinstance(value, basestring): + # TODO parse '0000-00-00' + return datetime.datetime.strptime(value, '%Y-%m-%d').date() + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + + def validate(self, value): + self._range_check(value, DateField.min_value, DateField.max_value) + + def get_db_prep_value(self, value): + return value.isoformat() + + +class DateTimeField(Field): + + class_default = datetime.datetime.fromtimestamp(0, pytz.utc) + db_type = 'DateTime' + + def to_python(self, value): + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime(value.year, value.month, value.day) + if isinstance(value, int): + return datetime.datetime.fromtimestamp(value, pytz.utc) + if isinstance(value, basestring): + return datetime.datetime.strptime(value, '%Y-%m-%d %H-%M-%S') + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + + def get_db_prep_value(self, value): + return int(time.mktime(value.timetuple())) + + +class BaseIntField(Field): + + def to_python(self, value): + if isinstance(value, int): + return value + if isinstance(value, basestring): + return int(value) + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + + def validate(self, value): + self._range_check(value, self.min_value, self.max_value) + + +class UInt8Field(BaseIntField): + + min_value = 0 + max_value = 2**8 - 1 + db_type = 'UInt8' + + +class UInt16Field(BaseIntField): + + min_value = 0 + max_value = 2**16 - 1 + db_type = 'UInt16' + + +class UInt32Field(BaseIntField): + + min_value = 0 + max_value = 2**32 - 1 + db_type = 'UInt32' + + +class UInt64Field(BaseIntField): + + min_value = 0 + max_value = 2**64 - 1 + db_type = 'UInt64' + + +class Int8Field(BaseIntField): + + min_value = -2**7 + max_value = 2**7 - 1 + db_type = 'Int8' + + +class Int16Field(BaseIntField): + + min_value = -2**15 + max_value = 2**15 - 1 + db_type = 'Int16' + + +class Int32Field(BaseIntField): + + min_value = -2**31 + max_value = 2**31 - 1 + db_type = 'Int32' + + +class Int64Field(BaseIntField): + + min_value = -2**63 + max_value = 2**63 - 1 + db_type = 'Int64' + + +class BaseFloatField(Field): + + def to_python(self, value): + if isinstance(value, float): + return value + if isinstance(value, basestring) or isinstance(value, int): + return float(value) + raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) + + +class Float32Field(BaseFloatField): + + db_type = 'Float32' + + +class Float64Field(BaseFloatField): + + db_type = 'Float64' + diff --git a/src/infi/clickhouse_orm/models.py b/src/infi/clickhouse_orm/models.py new file mode 100644 index 0000000..13dffa4 --- /dev/null +++ b/src/infi/clickhouse_orm/models.py @@ -0,0 +1,129 @@ +from utils import escape, parse_tsv +from engines import * +from fields import Field + + +class ModelBase(type): + ''' + A metaclass for ORM models. It adds the _fields list to model classes. + ''' + + def __new__(cls, name, bases, attrs): + new_cls = super(ModelBase, cls).__new__(cls, name, bases, attrs) + # Build a list of fields, in the order they were listed in the class + fields = [item for item in attrs.items() if isinstance(item[1], Field)] + fields.sort(key=lambda item: item[1].creation_counter) + setattr(new_cls, '_fields', fields) + return new_cls + + @classmethod + def create_ad_hoc_model(cls, fields): + # fields is a list of tuples (name, db_type) + import fields as orm_fields + attrs = {} + for name, db_type in fields: + field_class = db_type + 'Field' + if not hasattr(orm_fields, field_class): + raise NotImplementedError('No field class for %s' % db_type) + attrs[name] = getattr(orm_fields, field_class)() + return cls.__new__(cls, 'AdHocModel', (Model,), attrs) + + +class Model(object): + ''' + A base class for ORM models. + ''' + + __metaclass__ = ModelBase + engine = None + + def __init__(self, **kwargs): + ''' + Creates a model instance, using keyword arguments as field values. + Since values are immediately converted to their Pythonic type, + invalid values will cause a ValueError to be raised. + Unrecognized field names will cause an AttributeError. + ''' + super(Model, self).__init__() + # Assign field values from keyword arguments + for name, value in kwargs.iteritems(): + field = self.get_field(name) + if field: + setattr(self, name, value) + else: + raise AttributeError('%s does not have a field called %s' % (self.__class__.__name__, name)) + # Assign default values for fields not included in the keyword arguments + for name, field in self._fields: + if name not in kwargs: + setattr(self, name, field.default) + + def __setattr__(self, name, value): + ''' + When setting a field value, converts the value to its Pythonic type and validates it. + This may raise a ValueError. + ''' + field = self.get_field(name) + if field: + value = field.to_python(value) + field.validate(value) + super(Model, self).__setattr__(name, value) + + def get_field(self, name): + ''' + Get a Field instance given its name, or None if not found. + ''' + field = getattr(self.__class__, name, None) + return field if isinstance(field, Field) else None + + @classmethod + def table_name(cls): + ''' + Returns the model's database table name. + ''' + return cls.__name__.lower() + + @classmethod + def create_table_sql(cls, db_name): + ''' + Returns the SQL command for creating a table for this model. + ''' + parts = ['CREATE TABLE IF NOT EXISTS %s.%s (' % (db_name, cls.table_name())] + cols = [] + for name, field in cls._fields: + default = field.get_db_prep_value(field.default) + cols.append(' %s %s DEFAULT %s' % (name, field.db_type, escape(default))) + parts.append(',\n'.join(cols)) + parts.append(')') + parts.append('ENGINE = ' + cls.engine.create_table_sql()) + return '\n'.join(parts) + + @classmethod + def drop_table_sql(cls, db_name): + ''' + Returns the SQL command for deleting this model's table. + ''' + return 'DROP TABLE IF EXISTS %s.%s' % (db_name, cls.table_name()) + + @classmethod + def from_tsv(cls, line, field_names=None): + ''' + Create a model instance from a tab-separated line. The line may or may not include a newline. + The field_names list must match the fields defined in the model, but does not have to include all of them. + If omitted, it is assumed to be the names of all fields in the model, in order of definition. + ''' + field_names = field_names or [name for name, field in cls._fields] + values = iter(parse_tsv(line)) + kwargs = {} + for name in field_names: + kwargs[name] = values.next() + return cls(**kwargs) + + def to_tsv(self): + ''' + Returns the instance's column values as a tab-separated line. A newline is not included. + ''' + parts = [] + for name, field in self._fields: + value = field.get_db_prep_value(field.to_python(getattr(self, name))) + parts.append(escape(value, quote=False)) + return '\t'.join(parts) diff --git a/src/infi/clickhouse_orm/utils.py b/src/infi/clickhouse_orm/utils.py new file mode 100644 index 0000000..62c61e2 --- /dev/null +++ b/src/infi/clickhouse_orm/utils.py @@ -0,0 +1,28 @@ + +SPECIAL_CHARS = { + "\b" : "\\b", + "\f" : "\\f", + "\r" : "\\r", + "\n" : "\\n", + "\t" : "\\t", + "\0" : "\\0", + "\\" : "\\\\", + "'" : "\\'" +} + + +def escape(value, quote=True): + if isinstance(value, basestring): + chars = (SPECIAL_CHARS.get(c, c) for c in value) + return "'" + "".join(chars) + "'" if quote else "".join(chars) + return str(value) + + +def unescape(value): + return value.decode('string_escape') + + +def parse_tsv(line): + if line[-1] == '\n': + line = line[:-1] + return [unescape(value) for value in line.split('\t')] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_database.py b/tests/test_database.py new file mode 100644 index 0000000..fa6b7cc --- /dev/null +++ b/tests/test_database.py @@ -0,0 +1,189 @@ +import unittest + +from infi.clickhouse_orm.database import Database +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class DatabaseTestCase(unittest.TestCase): + + def setUp(self): + self.database = Database('test_db') + self.database.create_table(Person) + + def tearDown(self): + self.database.drop_table(Person) + self.database.drop_database() + + def _insert_and_check(self, data, count): + self.database.insert(data) + self.assertEquals(count, self.database.count(Person)) + + def test_insert__generator(self): + self._insert_and_check(self._sample_data(), len(data)) + + def test_insert__list(self): + self._insert_and_check(list(self._sample_data()), len(data)) + + def test_insert__iterator(self): + self._insert_and_check(iter(self._sample_data()), len(data)) + + def test_insert__empty(self): + self._insert_and_check([], 0) + + def test_count(self): + self.database.insert(self._sample_data()) + self.assertEquals(self.database.count(Person), 100) + self.assertEquals(self.database.count(Person, "first_name = 'Courtney'"), 2) + self.assertEquals(self.database.count(Person, "birthday > '2000-01-01'"), 22) + self.assertEquals(self.database.count(Person, "birthday < '1970-03-01'"), 0) + + def test_select(self): + self._insert_and_check(self._sample_data(), len(data)) + query = "SELECT * FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + results = list(self.database.select(query, Person)) + self.assertEquals(len(results), 2) + self.assertEquals(results[0].last_name, 'Durham') + self.assertEquals(results[0].height, 1.72) + self.assertEquals(results[1].last_name, 'Scott') + self.assertEquals(results[1].height, 1.70) + + def test_select_partial_fields(self): + self._insert_and_check(self._sample_data(), len(data)) + query = "SELECT first_name, last_name FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + results = list(self.database.select(query, Person)) + self.assertEquals(len(results), 2) + self.assertEquals(results[0].last_name, 'Durham') + self.assertEquals(results[0].height, 0) # default value + self.assertEquals(results[1].last_name, 'Scott') + self.assertEquals(results[1].height, 0) # default value + + def test_select_ad_hoc_model(self): + self._insert_and_check(self._sample_data(), len(data)) + query = "SELECT * FROM test_db.person WHERE first_name = 'Whitney' ORDER BY last_name" + results = list(self.database.select(query)) + self.assertEquals(len(results), 2) + self.assertEquals(results[0].__class__.__name__, 'AdHocModel') + self.assertEquals(results[0].last_name, 'Durham') + self.assertEquals(results[0].height, 1.72) + self.assertEquals(results[1].last_name, 'Scott') + self.assertEquals(results[1].height, 1.70) + + def _sample_data(self): + for entry in data: + yield Person(**entry) + + +class Person(Model): + + first_name = StringField() + last_name = StringField() + birthday = DateField() + height = Float32Field() + + engine = MergeTree('birthday', ('first_name', 'last_name', 'birthday')) + + +data = [ + {"first_name": "Abdul", "last_name": "Hester", "birthday": "1970-12-02", "height": "1.63"}, + {"first_name": "Adam", "last_name": "Goodman", "birthday": "1986-01-07", "height": "1.74"}, + {"first_name": "Adena", "last_name": "Norman", "birthday": "1979-05-14", "height": "1.66"}, + {"first_name": "Aline", "last_name": "Crane", "birthday": "1988-05-01", "height": "1.62"}, + {"first_name": "Althea", "last_name": "Barrett", "birthday": "2004-07-28", "height": "1.71"}, + {"first_name": "Amanda", "last_name": "Vang", "birthday": "1973-02-23", "height": "1.68"}, + {"first_name": "Angela", "last_name": "Sanders", "birthday": "2016-01-08", "height": "1.66"}, + {"first_name": "Anne", "last_name": "Rasmussen", "birthday": "1995-04-03", "height": "1.77"}, + {"first_name": "Ariana", "last_name": "Cole", "birthday": "1977-12-20", "height": "1.72"}, + {"first_name": "Ashton", "last_name": "Fuller", "birthday": "1995-11-17", "height": "1.75"}, + {"first_name": "Ava", "last_name": "Sanders", "birthday": "1997-08-10", "height": "1.60"}, + {"first_name": "Barrett", "last_name": "Clemons", "birthday": "1985-07-03", "height": "1.71"}, + {"first_name": "Beatrice", "last_name": "Gregory", "birthday": "1992-01-19", "height": "1.80"}, + {"first_name": "Buffy", "last_name": "Webb", "birthday": "1990-03-06", "height": "1.68"}, + {"first_name": "Callie", "last_name": "Wiley", "birthday": "1987-11-24", "height": "1.69"}, + {"first_name": "Cara", "last_name": "Fox", "birthday": "2004-05-15", "height": "1.71"}, + {"first_name": "Caryn", "last_name": "Sears", "birthday": "1999-02-17", "height": "1.71"}, + {"first_name": "Cassady", "last_name": "Knapp", "birthday": "1977-12-15", "height": "1.72"}, + {"first_name": "Cassady", "last_name": "Rogers", "birthday": "2013-11-04", "height": "1.71"}, + {"first_name": "Catherine", "last_name": "Hicks", "birthday": "1989-05-23", "height": "1.80"}, + {"first_name": "Cathleen", "last_name": "Frank", "birthday": "1977-09-04", "height": "1.61"}, + {"first_name": "Celeste", "last_name": "James", "birthday": "1990-03-08", "height": "1.67"}, + {"first_name": "Chelsea", "last_name": "Castro", "birthday": "2001-08-10", "height": "1.71"}, + {"first_name": "Ciaran", "last_name": "Carver", "birthday": "2016-12-25", "height": "1.76"}, + {"first_name": "Ciaran", "last_name": "Hurley", "birthday": "1995-10-25", "height": "1.65"}, + {"first_name": "Clementine", "last_name": "Moon", "birthday": "1994-03-29", "height": "1.73"}, + {"first_name": "Connor", "last_name": "Jenkins", "birthday": "1999-07-23", "height": "1.67"}, + {"first_name": "Courtney", "last_name": "Cannon", "birthday": "1997-10-26", "height": "1.76"}, + {"first_name": "Courtney", "last_name": "Hoffman", "birthday": "1994-11-07", "height": "1.65"}, + {"first_name": "Denton", "last_name": "Sanchez", "birthday": "1971-10-16", "height": "1.72"}, + {"first_name": "Dominique", "last_name": "Sandoval", "birthday": "1972-02-01", "height": "1.72"}, + {"first_name": "Dora", "last_name": "Cabrera", "birthday": "2016-04-26", "height": "1.68"}, + {"first_name": "Eagan", "last_name": "Dodson", "birthday": "2015-10-22", "height": "1.67"}, + {"first_name": "Edan", "last_name": "Dennis", "birthday": "1989-09-18", "height": "1.73"}, + {"first_name": "Ella", "last_name": "Castillo", "birthday": "1973-03-28", "height": "1.73"}, + {"first_name": "Elton", "last_name": "Ayers", "birthday": "1994-06-20", "height": "1.68"}, + {"first_name": "Elton", "last_name": "Smith", "birthday": "1982-06-20", "height": "1.66"}, + {"first_name": "Emma", "last_name": "Clements", "birthday": "1996-08-07", "height": "1.75"}, + {"first_name": "Evangeline", "last_name": "Weber", "birthday": "1984-06-03", "height": "1.70"}, + {"first_name": "Faith", "last_name": "Emerson", "birthday": "1989-12-30", "height": "1.62"}, + {"first_name": "Fritz", "last_name": "Atkinson", "birthday": "2011-06-15", "height": "1.73"}, + {"first_name": "Galvin", "last_name": "Phillips", "birthday": "2004-01-17", "height": "1.74"}, + {"first_name": "Georgia", "last_name": "Kennedy", "birthday": "1974-12-29", "height": "1.66"}, + {"first_name": "Griffith", "last_name": "Henry", "birthday": "1985-04-02", "height": "1.66"}, + {"first_name": "Hedy", "last_name": "Strong", "birthday": "2001-10-04", "height": "1.60"}, + {"first_name": "Hu", "last_name": "May", "birthday": "1976-10-01", "height": "1.76"}, + {"first_name": "Hyacinth", "last_name": "Kent", "birthday": "1971-07-18", "height": "1.72"}, + {"first_name": "Idola", "last_name": "Fulton", "birthday": "1974-11-27", "height": "1.66"}, + {"first_name": "Jarrod", "last_name": "Gibbs", "birthday": "1987-06-13", "height": "1.62"}, + {"first_name": "Jesse", "last_name": "Gomez", "birthday": "2011-01-28", "height": "1.71"}, + {"first_name": "Josiah", "last_name": "Hodges", "birthday": "2011-09-04", "height": "1.68"}, + {"first_name": "Karleigh", "last_name": "Bartlett", "birthday": "1991-10-24", "height": "1.69"}, + {"first_name": "Keelie", "last_name": "Mathis", "birthday": "1993-10-26", "height": "1.69"}, + {"first_name": "Kieran", "last_name": "Solomon", "birthday": "1993-10-30", "height": "1.69"}, + {"first_name": "Laith", "last_name": "Howell", "birthday": "1991-07-07", "height": "1.70"}, + {"first_name": "Leroy", "last_name": "Pacheco", "birthday": "1998-12-30", "height": "1.70"}, + {"first_name": "Lesley", "last_name": "Stephenson", "birthday": "2010-04-10", "height": "1.64"}, + {"first_name": "Macaulay", "last_name": "Rowe", "birthday": "1982-03-02", "height": "1.68"}, + {"first_name": "Macey", "last_name": "Griffin", "birthday": "1971-09-18", "height": "1.63"}, + {"first_name": "Madeline", "last_name": "Kidd", "birthday": "1984-12-09", "height": "1.69"}, + {"first_name": "Maia", "last_name": "Hyde", "birthday": "1972-06-09", "height": "1.74"}, + {"first_name": "Mary", "last_name": "Kirkland", "birthday": "1987-10-09", "height": "1.73"}, + {"first_name": "Molly", "last_name": "Salas", "birthday": "1994-04-23", "height": "1.70"}, + {"first_name": "Montana", "last_name": "Bruce", "birthday": "1982-06-28", "height": "1.66"}, + {"first_name": "Naomi", "last_name": "Hays", "birthday": "2004-11-27", "height": "1.70"}, + {"first_name": "Norman", "last_name": "Santos", "birthday": "1989-01-10", "height": "1.68"}, + {"first_name": "Octavius", "last_name": "Floyd", "birthday": "1985-02-22", "height": "1.68"}, + {"first_name": "Odette", "last_name": "Mcneil", "birthday": "1978-05-21", "height": "1.76"}, + {"first_name": "Oliver", "last_name": "Ashley", "birthday": "2004-08-13", "height": "1.68"}, + {"first_name": "Quon", "last_name": "Wiggins", "birthday": "1992-05-06", "height": "1.74"}, + {"first_name": "Rafael", "last_name": "Parker", "birthday": "2016-01-24", "height": "1.76"}, + {"first_name": "Reese", "last_name": "Noel", "birthday": "1996-11-04", "height": "1.77"}, + {"first_name": "Rhona", "last_name": "Camacho", "birthday": "1976-12-17", "height": "1.59"}, + {"first_name": "Rigel", "last_name": "Oneal", "birthday": "1993-11-05", "height": "1.63"}, + {"first_name": "Roary", "last_name": "Simmons", "birthday": "1986-07-23", "height": "1.63"}, + {"first_name": "Russell", "last_name": "Pruitt", "birthday": "1979-05-04", "height": "1.63"}, + {"first_name": "Sawyer", "last_name": "Fischer", "birthday": "1995-04-01", "height": "1.78"}, + {"first_name": "Scarlett", "last_name": "Durham", "birthday": "2005-09-29", "height": "1.65"}, + {"first_name": "Seth", "last_name": "Serrano", "birthday": "2017-06-02", "height": "1.71"}, + {"first_name": "Shad", "last_name": "Bradshaw", "birthday": "1998-08-25", "height": "1.72"}, + {"first_name": "Shana", "last_name": "Jarvis", "birthday": "1997-05-21", "height": "1.72"}, + {"first_name": "Sharon", "last_name": "Shelton", "birthday": "1970-05-02", "height": "1.65"}, + {"first_name": "Shoshana", "last_name": "Solis", "birthday": "1998-07-18", "height": "1.65"}, + {"first_name": "Stephen", "last_name": "Baxter", "birthday": "2004-09-24", "height": "1.74"}, + {"first_name": "Sydney", "last_name": "Stevens", "birthday": "1989-07-11", "height": "1.70"}, + {"first_name": "Tasha", "last_name": "Campos", "birthday": "1984-02-11", "height": "1.72"}, + {"first_name": "Ulla", "last_name": "Arnold", "birthday": "1990-06-04", "height": "1.63"}, + {"first_name": "Vaughan", "last_name": "Schmidt", "birthday": "1985-06-19", "height": "1.61"}, + {"first_name": "Velma", "last_name": "English", "birthday": "1999-01-18", "height": "1.65"}, + {"first_name": "Venus", "last_name": "Hurst", "birthday": "1993-10-22", "height": "1.72"}, + {"first_name": "Victor", "last_name": "Woods", "birthday": "1989-06-23", "height": "1.67"}, + {"first_name": "Victoria", "last_name": "Slater", "birthday": "2009-07-19", "height": "1.72"}, + {"first_name": "Wang", "last_name": "Goodwin", "birthday": "1983-05-15", "height": "1.66"}, + {"first_name": "Warren", "last_name": "Bowen", "birthday": "2000-07-20", "height": "1.76"}, + {"first_name": "Warren", "last_name": "Dudley", "birthday": "1995-10-23", "height": "1.59"}, + {"first_name": "Whilemina", "last_name": "Blankenship", "birthday": "1970-07-14", "height": "1.66"}, + {"first_name": "Whitney", "last_name": "Durham", "birthday": "1977-09-15", "height": "1.72"}, + {"first_name": "Whitney", "last_name": "Scott", "birthday": "1971-07-04", "height": "1.70"}, + {"first_name": "Wynter", "last_name": "Garcia", "birthday": "1975-01-10", "height": "1.69"}, + {"first_name": "Yolanda", "last_name": "Duke", "birthday": "1997-02-25", "height": "1.74"} +]; diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..4b259a9 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,68 @@ +import unittest +import datetime +import pytz + +from infi.clickhouse_orm.models import Model +from infi.clickhouse_orm.fields import * +from infi.clickhouse_orm.engines import * + + +class ModelTestCase(unittest.TestCase): + + def test_defaults(self): + # Check that all fields have their explicit or implicit defaults + instance = SimpleModel() + self.assertEquals(instance.date_field, datetime.date(1970, 1, 1)) + self.assertEquals(instance.datetime_field, datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)) + self.assertEquals(instance.str_field, 'dozo') + self.assertEquals(instance.int_field, 17) + self.assertEquals(instance.float_field, 0) + + def test_assignment(self): + # Check that all fields are assigned during construction + kwargs = dict( + date_field=datetime.date(1973, 12, 6), + datetime_field=datetime.datetime(2000, 5, 24, 10, 22, tzinfo=pytz.utc), + str_field='aloha', + int_field=-50, + float_field=3.14 + ) + instance = SimpleModel(**kwargs) + for name, value in kwargs.items(): + self.assertEquals(kwargs[name], getattr(instance, name)) + + def test_assignment_error(self): + # Check non-existing field during construction + with self.assertRaises(AttributeError): + instance = SimpleModel(int_field=7450, pineapple='tasty') + # Check invalid field values during construction + with self.assertRaises(ValueError): + instance = SimpleModel(int_field='nope') + with self.assertRaises(ValueError): + instance = SimpleModel(date_field='nope') + # Check invalid field values during assignment + instance = SimpleModel() + with self.assertRaises(ValueError): + instance.datetime_field = datetime.timedelta(days=1) + + def test_string_conversion(self): + # Check field conversion from string during construction + instance = SimpleModel(date_field='1973-12-06', int_field='100', float_field='7') + self.assertEquals(instance.date_field, datetime.date(1973, 12, 6)) + self.assertEquals(instance.int_field, 100) + self.assertEquals(instance.float_field, 7) + # Check field conversion from string during assignment + instance.int_field = '99' + self.assertEquals(instance.int_field, 99) + + +class SimpleModel(Model): + + date_field = DateField() + datetime_field = DateTimeField() + str_field = StringField(default='dozo') + int_field = Int32Field(default=17) + float_field = Float32Field() + + engine = MergeTree('date_field', ('int_field', 'date_field')) +