Functions WIP

This commit is contained in:
Itai Shirav 2020-04-14 23:03:11 +03:00
parent 19439e45ef
commit 7b3eb943e2
7 changed files with 68 additions and 12 deletions

View File

@ -58,6 +58,16 @@ class Person(Model):
... ...
``` ```
### Parametric functions
Some of ClickHouse's aggregate functions can accept not only argument columns, but a set of parameters - constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. For example:
```python
# Most common last names
F.topK(5)(Person.last_name)
# Find 90th, 95th and 99th percentile of heights
F.quantiles(0.9, 0.95, 0.99)(Person.height)
```
### Creating new "functions" ### Creating new "functions"
Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores: Since expressions are just Python objects until they get converted to SQL, it is possible to invent new "functions" by combining existing ones into useful building blocks. For example, we can create a reusable expression that takes a string and trims whitespace, converts it to uppercase, and changes blanks to underscores:

View File

@ -165,6 +165,24 @@ class Database(object):
r = self._send(sql % (self.db_name, model_class.table_name())) r = self._send(sql % (self.db_name, model_class.table_name()))
return r.text.strip() == '1' return r.text.strip() == '1'
def get_model_for_table(self, table_name, system_table=False):
'''
Generates a model class from an existing table in the database.
This can be used for querying tables which don't have a corresponding model class,
for example system tables.
- `table_name`: the table to create a model for
- `system_table`: whether the table is a system table, or belongs to the current database
'''
db_name = 'system' if system_table else self.db_name
sql = "DESCRIBE `%s`.`%s` FORMAT TSV" % (db_name, table_name)
lines = self._send(sql).iter_lines()
fields = [parse_tsv(line)[:2] for line in lines]
model = ModelBase.create_ad_hoc_model(fields, table_name)
if system_table:
model._system = model._readonly = True
return model
def add_setting(self, name, value): def add_setting(self, name, value):
''' '''
Adds a database setting that will be sent with every request. Adds a database setting that will be sent with every request.
@ -363,7 +381,7 @@ class Database(object):
mapping = dict(db="`%s`" % self.db_name) mapping = dict(db="`%s`" % self.db_name)
if model_class: if model_class:
if model_class.is_system_model(): if model_class.is_system_model():
mapping['table'] = model_class.table_name() mapping['table'] = "`system`.`%s`" % model_class.table_name()
else: else:
mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name()) mapping['table'] = "`%s`.`%s`" % (self.db_name, model_class.table_name())
query = Template(query).safe_substitute(mapping) query = Template(query).safe_substitute(mapping)

View File

@ -423,7 +423,7 @@ class BaseEnumField(Field):
import re import re
from enum import Enum from enum import Enum
members = {} members = {}
for match in re.finditer("'(\w+)' = (\d+)", db_type): for match in re.finditer("'([\w ]+)' = (\d+)", db_type):
members[match.group(1)] = int(match.group(2)) members[match.group(1)] = int(match.group(2))
enum_cls = Enum('AdHocEnum', members) enum_cls = Enum('AdHocEnum', members)
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field

View File

@ -68,7 +68,7 @@ class ModelBase(type):
# fields is a list of tuples (name, db_type) # fields is a list of tuples (name, db_type)
# Check if model exists in cache # Check if model exists in cache
fields = list(fields) fields = list(fields)
cache_key = str(fields) cache_key = model_name + ' ' + str(fields)
if cache_key in cls.ad_hoc_model_cache: if cache_key in cls.ad_hoc_model_cache:
return cls.ad_hoc_model_cache[cache_key] return cls.ad_hoc_model_cache[cache_key]
# Create an ad hoc model class # Create an ad hoc model class

View File

@ -375,10 +375,9 @@ class QuerySet(object):
""" """
distinct = 'DISTINCT ' if self._distinct else '' distinct = 'DISTINCT ' if self._distinct else ''
final = ' FINAL' if self._final else '' final = ' FINAL' if self._final else ''
table_name = self._model_cls.table_name() table_name = '`%s`' % self._model_cls.table_name()
if not self._model_cls.is_system_model(): if self._model_cls.is_system_model():
table_name = '`%s`' % table_name table_name = '`system`.' + table_name
params = (distinct, self.select_fields_as_sql(), table_name, final) params = (distinct, self.select_fields_as_sql(), table_name, final)
sql = u'SELECT %s%s\nFROM %s%s' % params sql = u'SELECT %s%s\nFROM %s%s' % params

View File

@ -51,7 +51,7 @@ class SystemPart(Model):
@classmethod @classmethod
def table_name(cls): def table_name(cls):
return 'system.parts' return 'parts'
""" """
Next methods return SQL for some operations, which can be done with partitions Next methods return SQL for some operations, which can be done with partitions
@ -141,7 +141,7 @@ class SystemPart(Model):
if conditions: if conditions:
conditions += " AND" conditions += " AND"
field_names = ','.join(cls.fields()) field_names = ','.join(cls.fields())
return database.select("SELECT %s FROM %s WHERE %s database='%s'" % return database.select("SELECT %s FROM `system`.%s WHERE %s database='%s'" %
(field_names, cls.table_name(), conditions, database.db_name), model_class=cls) (field_names, cls.table_name(), conditions, database.db_name), model_class=cls)
@classmethod @classmethod

View File

@ -166,8 +166,12 @@ class DatabaseTestCase(TestCaseWithData):
Database(self.database.db_name, username='default', password='wrong') Database(self.database.db_name, username='default', password='wrong')
exc = cm.exception exc = cm.exception
self.assertEqual(exc.code, 193) print(exc.code, exc.message)
self.assertTrue(exc.message.startswith('Wrong password for user default')) self.assertIn(exc.code, (193, 516))
if exc.code == 193:
self.assertTrue('Wrong password for user default' in exc.message)
else:
self.assertTrue('default: Authentication failed: password is incorrect' in exc.message)
def test_nonexisting_db(self): def test_nonexisting_db(self):
db = Database('db_not_here', autocreate=False) db = Database('db_not_here', autocreate=False)
@ -233,3 +237,28 @@ class DatabaseTestCase(TestCaseWithData):
query = "SELECT DISTINCT type FROM system.columns" query = "SELECT DISTINCT type FROM system.columns"
for row in self.database.select(query): for row in self.database.select(query):
ModelBase.create_ad_hoc_field(row.type) ModelBase.create_ad_hoc_field(row.type)
def test_get_model_for_table(self):
# Tests that get_model_for_table works for a non-system model
model = self.database.get_model_for_table('person')
self.assertFalse(model.is_system_model())
self.assertFalse(model.is_read_only())
self.assertEqual(model.table_name(), 'person')
# Read a few records
list(model.objects_in(self.database)[:10])
# Inserts should work too
self.database.insert([
model(first_name='aaa', last_name='bbb', height=1.77)
])
def test_get_model_for_table__system(self):
# Tests that get_model_for_table works for all system tables
query = "SELECT name FROM system.tables WHERE database='system'"
for row in self.database.select(query):
print(row.name)
model = self.database.get_model_for_table(row.name, system_table=True)
self.assertTrue(model.is_system_model())
self.assertTrue(model.is_read_only())
self.assertEqual(model.table_name(), row.name)
# Read a few records
list(model.objects_in(self.database)[:10])