Functions WIP

This commit is contained in:
Itai Shirav 2020-04-09 16:40:02 +03:00
parent 9f36b17fee
commit db3dc70ebf
3 changed files with 396 additions and 58 deletions

View File

@ -22,6 +22,8 @@ def binary_operator(func):
def type_conversion(func):
"""
Decorates a function to mark it as a type conversion function.
The metaclass automatically generates "OrZero" and "OrNull" combinators
for the decorated function.
"""
@wraps(func)
def wrapper(*args, **kwargs):
@ -33,6 +35,8 @@ def type_conversion(func):
def aggregate(func):
"""
Decorates a function to mark it as an aggregate function.
The metaclass automatically generates combinators such as "OrDefault",
"OrNull", "If" etc. for the decorated function.
"""
@wraps(func)
def wrapper(*args, **kwargs):
@ -41,6 +45,36 @@ def aggregate(func):
return wrapper
def with_utf8_support(func):
"""
Decorates a function to mark it as a string function that has a UTF8 variant.
The metaclass automatically generates a "UTF8" combinator for the decorated function.
"""
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper.f_type = 'with_utf8_support'
return wrapper
def parametric(func):
"""
Decorates a function to convert it to a parametric function, such
as `quantile(level)(expr)`.
"""
@wraps(func)
def wrapper(*parameters):
@wraps(func)
def inner(*args, **kwargs):
f = func(*args, **kwargs)
# Append the parameter to the function name
parameters_str = comma_join([str(p) for p in parameters])
f.name = '%s(%s)' % (f.name, parameters_str)
return f
return inner
wrapper.f_parametric = True
return wrapper
class FunctionOperatorsMixin(object):
"""
@ -93,6 +127,12 @@ class FunctionOperatorsMixin(object):
def __rtruediv__(self, other):
return F.divide(other, self)
def __floordiv__(self, other):
return F.intDiv(self, other)
def __rfloordiv__(self, other):
return F.intDiv(other, self)
def __mod__(self, other):
return F.modulo(self, other)
@ -139,9 +179,12 @@ class FMeta(type):
'aggregate': [
{'suffix': 'OrDefault'},
{'suffix': 'OrNull'},
{'suffix': 'If', 'args': ['cond']},
{'suffix': 'If', 'args': ['cond']},
{'suffix': 'OrDefaultIf', 'args': ['cond']},
{'suffix': 'OrNullIf', 'args': ['cond']},
{'suffix': 'OrNullIf', 'args': ['cond']},
],
'with_utf8_support': [
{'suffix': 'UTF8'},
]
}
@ -175,6 +218,9 @@ class FMeta(type):
# Build the new function
new_code = compile(f'def {new_name}({new_sig}): return F("{new_name}", {args})', __file__, 'exec')
new_func = FunctionType(code=new_code.co_consts[0], globals=globals(), name=new_name, argdefs=argdefs)
# If base_func was parametric, new_func should be too
if getattr(base_func, 'f_parametric', False):
new_func = parametric(new_func)
# Attach to class
setattr(cls, new_name, new_func)
@ -350,10 +396,30 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def toYear(d):
return F('toYear', d)
@staticmethod
def toISOYear(d, timezone=''):
return F('toISOYear', d, timezone)
@staticmethod
def toQuarter(d, timezone=''):
return F('toQuarter', d, timezone)
@staticmethod
def toMonth(d):
return F('toMonth', d)
@staticmethod
def toWeek(d, mode=0, timezone=''):
return F('toWeek', d, mode, timezone)
@staticmethod
def toISOWeek(d, timezone=''):
return F('toISOWeek', d, timezone)
@staticmethod
def toDayOfYear(d):
return F('toDayOfYear', d)
@staticmethod
def toDayOfMonth(d):
return F('toDayOfMonth', d)
@ -390,6 +456,18 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def toStartOfYear(d):
return F('toStartOfYear', d)
@staticmethod
def toStartOfISOYear(d):
return F('toStartOfISOYear', d)
@staticmethod
def toStartOfTenMinutes(d):
return F('toStartOfTenMinutes', d)
@staticmethod
def toStartOfWeek(d, mode=0):
return F('toStartOfWeek', d)
@staticmethod
def toStartOfMinute(d):
return F('toStartOfMinute', d)
@ -414,6 +492,26 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def toTime(d, timezone=''):
return F('toTime', d, timezone)
@staticmethod
def toTimeZone(dt, timezone):
return F('toTimeZone', dt, timezone)
@staticmethod
def toUnixTimestamp(dt, timezone=''):
return F('toUnixTimestamp', dt, timezone)
@staticmethod
def toYYYYMM(dt, timezone=''):
return F('toYYYYMM', dt, timezone)
@staticmethod
def toYYYYMMDD(dt, timezone=''):
return F('toYYYYMMDD', dt, timezone)
@staticmethod
def toYYYYMMDDhhmmss(dt, timezone=''):
return F('toYYYYMMDDhhmmss', dt, timezone)
@staticmethod
def toRelativeYearNum(d, timezone=''):
return F('toRelativeYearNum', d, timezone)
@ -639,49 +737,34 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
return F('notEmpty', s)
@staticmethod
@with_utf8_support
def length(s):
return F('length', s)
@staticmethod
def lengthUTF8(s):
return F('lengthUTF8', s)
@staticmethod
@with_utf8_support
def lower(s):
return F('lower', s)
@staticmethod
@with_utf8_support
def upper(s):
return F('upper', s)
@staticmethod
def lowerUTF8(s):
return F('lowerUTF8', s)
@staticmethod
def upperUTF8(s):
return F('upperUTF8', s)
@staticmethod
@with_utf8_support
def reverse(s):
return F('reverse', s)
@staticmethod
def reverseUTF8(s):
return F('reverseUTF8', s)
@staticmethod
def concat(*args):
return F('concat', *args)
@staticmethod
@with_utf8_support
def substring(s, offset, length):
return F('substring', s, offset, length)
@staticmethod
def substringUTF8(s, offset, length):
return F('substringUTF8', s, offset, length)
@staticmethod
def appendTrailingCharIfAbsent(s, c):
return F('appendTrailingCharIfAbsent', s, c)
@ -726,6 +809,58 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def CRC32(s):
return F('CRC32', s)
# Functions for searching in strings
@staticmethod
@with_utf8_support
def position(haystack, needle):
return F('position', haystack, needle)
@staticmethod
@with_utf8_support
def positionCaseInsensitive(haystack, needle):
return F('positionCaseInsensitive', haystack, needle)
@staticmethod
def like(haystack, pattern):
return F('like', haystack, pattern)
@staticmethod
def notLike(haystack, pattern):
return F('notLike', haystack, pattern)
@staticmethod
def match(haystack, pattern):
return F('match', haystack, pattern)
@staticmethod
def extract(haystack, pattern):
return F('extract', haystack, pattern)
@staticmethod
def extractAll(haystack, pattern):
return F('extractAll', haystack, pattern)
@staticmethod
@with_utf8_support
def ngramDistance(haystack, needle):
return F('ngramDistance', haystack, needle)
@staticmethod
@with_utf8_support
def ngramDistanceCaseInsensitive(haystack, needle):
return F('ngramDistanceCaseInsensitive', haystack, needle)
@staticmethod
@with_utf8_support
def ngramSearch(haystack, needle):
return F('ngramSearch', haystack, needle)
@staticmethod
@with_utf8_support
def ngramSearchCaseInsensitive(haystack, needle):
return F('ngramSearchCaseInsensitive', haystack, needle)
# Functions for replacing in strings
@staticmethod
@ -1012,11 +1147,11 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
@staticmethod
def arrayResize(array, size, extender=None):
return F('arrayResize',array, size, extender) if extender is not None else F('arrayResize', array, size)
return F('arrayResize', array, size, extender) if extender is not None else F('arrayResize', array, size)
@staticmethod
def arraySlice(array, offset, length=None):
return F('arraySlice',array, offset, length) if length is not None else F('arraySlice', array, offset)
return F('arraySlice', array, offset, length) if length is not None else F('arraySlice', array, offset)
@staticmethod
def arrayUniq(*args):
@ -1466,45 +1601,159 @@ class F(Cond, FunctionOperatorsMixin, metaclass=FMeta):
def varSamp(x):
return F('varSamp', x)
# Higher-order functions
# arrayMap: Function arrayMap needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42)
@staticmethod
@aggregate
@parametric
def quantile(expr):
return F('quantile', expr)
@staticmethod
def arrayCount(*args):
return F('arrayCount', *args)
@aggregate
@parametric
def quantileDeterministic(expr, determinator):
return F('quantileDeterministic', expr, determinator)
@staticmethod
def arraySum(*args):
return F('arraySum', *args)
@aggregate
@parametric
def quantileExact(expr):
return F('quantileExact', expr)
@staticmethod
def arrayExists(*args):
return F('arrayExists', *args)
@aggregate
@parametric
def quantileExactWeighted(expr, weight):
return F('quantileExactWeighted', expr, weight)
@staticmethod
def arrayAll(*args):
return F('arrayAll', *args)
# arrayFilter: Function arrayFilter needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42)
# arrayFirst: Function arrayFirst needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42)
# arrayFirstIndex: Function arrayFirstIndex needs at least 2 argument; passed 0. (version 19.8.3.8 (official build)) (42)
@aggregate
@parametric
def quantileTiming(expr):
return F('quantileTiming', expr)
@staticmethod
def arrayCumSum(*args):
return F('arrayCumSum', *args)
@aggregate
@parametric
def quantileTimingWeighted(expr, weight):
return F('quantileTimingWeighted', expr, weight)
@staticmethod
def arrayCumSumNonNegative(*args):
return F('arrayCumSumNonNegative', *args)
@aggregate
@parametric
def quantileTDigest(expr):
return F('quantileTDigest', expr)
@staticmethod
def arraySort(*args):
return F('arraySort', *args)
@aggregate
@parametric
def quantileTDigestWeighted(expr, weight):
return F('quantileTDigestWeighted', expr, weight)
@staticmethod
def arrayReverseSort(*args):
return F('arrayReverseSort', *args)
@aggregate
@parametric
def quantiles(expr):
return F('quantiles', expr)
@staticmethod
@aggregate
@parametric
def quantilesDeterministic(expr, determinator):
return F('quantilesDeterministic', expr, determinator)
@staticmethod
@aggregate
@parametric
def quantilesExact(expr):
return F('quantilesExact', expr)
@staticmethod
@aggregate
@parametric
def quantilesExactWeighted(expr, weight):
return F('quantilesExactWeighted', expr, weight)
@staticmethod
@aggregate
@parametric
def quantilesTiming(expr):
return F('quantilesTiming', expr)
@staticmethod
@aggregate
@parametric
def quantilesTimingWeighted(expr, weight):
return F('quantilesTimingWeighted', expr, weight)
@staticmethod
@aggregate
@parametric
def quantilesTDigest(expr):
return F('quantilesTDigest', expr)
@staticmethod
@aggregate
@parametric
def quantilesTDigestWeighted(expr, weight):
return F('quantilesTDigestWeighted', expr, weight)
@staticmethod
@aggregate
@parametric
def topK(expr):
return F('topK', expr)
@staticmethod
@aggregate
@parametric
def topKWeighted(expr, weight):
return F('topKWeighted', expr, weight)
# Null handling functions
@staticmethod
def ifNull(x, y):
return F('ifNull', x, y)
@staticmethod
def nullIf(x, y):
return F('nullIf', x, y)
@staticmethod
def isNotNull(x):
return F('isNotNull', x)
@staticmethod
def isNull(x):
return F('isNull', x)
@staticmethod
def coalesce(*args):
return F('coalesce', *args)
# Misc functions
@staticmethod
def ifNotFinite(x, y):
return F('ifNotFinite', x, y)
@staticmethod
def isFinite(x):
return F('isFinite', x)
@staticmethod
def isInfinite(x):
return F('isInfinite', x)
@staticmethod
def isNaN(x):
return F('isNaN', x)
@staticmethod
def least(x, y):
return F('least', x, y)
@staticmethod
def greatest(x, y):
return F('greatest', x, y)

View File

@ -301,7 +301,6 @@ class QuerySet(object):
self._grouping_fields = []
self._grouping_with_totals = False
self._fields = model_cls.fields().keys()
self._extra = {}
self._limits = None
self._limit_by = None
self._limit_by_fields = None
@ -368,8 +367,6 @@ class QuerySet(object):
fields = '*'
if self._fields:
fields = comma_join('`%s`' % field for field in self._fields)
for name, func in self._extra.items():
fields += ', %s AS %s' % (func.to_sql(), name)
return fields
def as_sql(self):
@ -457,11 +454,6 @@ class QuerySet(object):
qs._fields = field_names
return qs
def extra(self, **kwargs):
qs = copy(self)
qs._extra = kwargs
return qs
def _filter_or_exclude(self, *q, **kwargs):
from .funcs import F

View File

@ -139,6 +139,11 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(one / two, 0.5)
self._test_func(one / 2, 0.5)
self._test_func(1 / two, 0.5)
# //
self._test_func(one // two, 0)
self._test_func(two // one, 2)
self._test_func(one // 2, 0)
self._test_func(1 // two, 0)
# %
self._test_func(one % two, 1)
self._test_func(one % 2, 1)
@ -178,8 +183,17 @@ class FuncsTestCase(TestCaseWithData):
dt = datetime(2018, 12, 31, 11, 22, 33)
self._test_func(F.toYear(d), 2018)
self._test_func(F.toYear(dt), 2018)
self._test_func(F.toISOYear(dt, 'Europe/Athens'), 2019) # 2018-12-31 is ISO year 2019, week 1, day 1
self._test_func(F.toQuarter(d), 4)
self._test_func(F.toQuarter(dt), 4)
self._test_func(F.toMonth(d), 12)
self._test_func(F.toMonth(dt), 12)
self._test_func(F.toWeek(d), 52)
self._test_func(F.toWeek(dt), 52)
self._test_func(F.toISOWeek(d), 1) # 2018-12-31 is ISO year 2019, week 1, day 1
self._test_func(F.toISOWeek(dt), 1)
self._test_func(F.toDayOfYear(d), 365)
self._test_func(F.toDayOfYear(dt), 365)
self._test_func(F.toDayOfMonth(d), 31)
self._test_func(F.toDayOfMonth(dt), 31)
self._test_func(F.toDayOfWeek(d), 1)
@ -200,10 +214,24 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(F.toStartOfFifteenMinutes(dt), datetime(2018, 12, 31, 11, 15, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfHour(dt), datetime(2018, 12, 31, 11, 0, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfDay(dt), datetime(2018, 12, 31, 0, 0, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfISOYear(dt), date(2018, 12, 31))
self._test_func(F.toStartOfTenMinutes(dt), datetime(2018, 12, 31, 11, 20, 0, tzinfo=pytz.utc))
self._test_func(F.toStartOfWeek(dt), date(2018, 12, 30))
self._test_func(F.toTime(dt), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, pytz.utc), datetime(1970, 1, 2, 11, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, 'Europe/Athens'), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTime(dt, pytz.timezone('Europe/Athens')), datetime(1970, 1, 2, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toTimeZone(dt, 'Europe/Athens'), datetime(2018, 12, 31, 13, 22, 33, tzinfo=pytz.utc))
self._test_func(F.toUnixTimestamp(dt, 'UTC'), int(dt.replace(tzinfo=pytz.utc).timestamp()))
self._test_func(F.toYYYYMM(d), 201812)
self._test_func(F.toYYYYMM(dt), 201812)
self._test_func(F.toYYYYMM(dt, 'Europe/Athens'), 201812)
self._test_func(F.toYYYYMMDD(d), 20181231)
self._test_func(F.toYYYYMMDD(dt), 20181231)
self._test_func(F.toYYYYMMDD(dt, 'Europe/Athens'), 20181231)
self._test_func(F.toYYYYMMDDhhmmss(d), 20181231000000)
self._test_func(F.toYYYYMMDDhhmmss(dt), 20181231112233)
self._test_func(F.toYYYYMMDDhhmmss(dt, 'Europe/Athens'), 20181231132233)
self._test_func(F.toRelativeYearNum(dt), 2018)
self._test_func(F.toRelativeYearNum(dt, 'Europe/Athens'), 2018)
self._test_func(F.toRelativeMonthNum(dt), 2018 * 12 + 12)
@ -313,6 +341,25 @@ class FuncsTestCase(TestCaseWithData):
self._test_func(F.trimBoth(' abc '), 'abc')
self._test_func(F.CRC32('whoops'), 3361378926)
def test_string_search_functions(self):
self._test_func(F.position('Hello, world!', '!'), 13)
self._test_func(F.positionCaseInsensitive('Hello, world!', 'hello'), 1)
self._test_func(F.positionUTF8('Привет, мир!', '!'), 12)
self._test_func(F.positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'), 9)
self._test_func(F.like('Hello, world!', '%ll%'), 1)
self._test_func(F.notLike('Hello, world!', '%ll%'), 0)
self._test_func(F.match('Hello, world!', '[lmnop]{3}'), 1)
self._test_func(F.extract('Hello, world!', '[lmnop]{3}'), 'llo')
self._test_func(F.extractAll('Hello, world!', '[a-z]+'), ['ello', 'world'])
self._test_func(F.ngramDistance('Hello', 'Hello'), 0)
self._test_func(F.ngramDistanceCaseInsensitive('Hello', 'hello'), 0)
self._test_func(F.ngramDistanceUTF8('Hello', 'Hello'), 0)
self._test_func(F.ngramDistanceCaseInsensitiveUTF8('Hello', 'hello'), 0)
self._test_func(F.ngramSearch('Hello', 'Hello'), 1)
self._test_func(F.ngramSearchCaseInsensitive('Hello', 'hello'), 1)
self._test_func(F.ngramSearchUTF8('Hello', 'Hello'), 1)
self._test_func(F.ngramSearchCaseInsensitiveUTF8('Hello', 'hello'), 1)
def test_base64_functions(self):
try:
self._test_func(F.base64Decode(F.base64Encode('Hello')), 'Hello')
@ -573,3 +620,53 @@ class FuncsTestCase(TestCaseWithData):
self._test_aggr(F.argMinOrNullIf(Person.first_name, Person.height, Person.last_name > 'Z'))
self._test_aggr(F.countOrNullIf(Person.last_name > 'Z'), None)
self._test_aggr(F.minOrNullIf(Person.height, Person.last_name > 'Z'), None)
def test_quantile_funcs(self):
self._test_aggr(F.quantile(0.9)(Person.height))
self._test_aggr(F.quantileOrDefault(0.9)(Person.height))
self._test_aggr(F.quantileOrNull(0.9)(Person.height))
self._test_aggr(F.quantileIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileOrDefaultIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileDeterministic(0.9)(Person.height, 17))
self._test_aggr(F.quantileExactOrDefault(0.9)(Person.height))
weight_expr = F.toUInt32(F.round(Person.height))
self._test_aggr(F.quantileExactWeightedOrNull(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantileTimingIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileTimingWeightedOrDefaultIf(0.9)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.quantileTDigestOrNullIf(0.9)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantileTDigestWeighted(0.9)(Person.height, weight_expr))
self._test_aggr(F.quantiles(0.9, 0.95, 0.99)(Person.height))
self._test_aggr(F.quantilesExactWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
self._test_aggr(F.quantilesTimingIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantilesTimingWeightedOrDefaultIf(0.9, 0.95, 0.99)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.quantilesTDigestIf(0.9, 0.95, 0.99)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.quantilesTDigestWeighted(0.9, 0.95, 0.99)(Person.height, weight_expr))
def test_top_k_funcs(self):
self._test_aggr(F.topK(3)(Person.height))
self._test_aggr(F.topKOrDefault(3)(Person.height))
self._test_aggr(F.topKIf(3)(Person.height, Person.last_name > 'H'))
self._test_aggr(F.topKOrDefaultIf(3)(Person.height, Person.last_name > 'H'))
weight_expr = F.toUInt32(F.round(Person.height))
self._test_aggr(F.topKWeighted(3)(Person.height, weight_expr))
self._test_aggr(F.topKWeightedOrDefault(3)(Person.height, weight_expr))
self._test_aggr(F.topKWeightedIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
self._test_aggr(F.topKWeightedOrDefaultIf(3)(Person.height, weight_expr, Person.last_name > 'H'))
def test_null_funcs(self):
self._test_func(F.ifNull(17, 18), 17)
self._test_func(F.ifNull(None, 18), 18)
self._test_func(F.nullIf(17, 18), 17)
self._test_func(F.nullIf(18, 18), None)
self._test_func(F.isNotNull(17), 1)
self._test_func(F.isNull(17), 0)
self._test_func(F.coalesce(None, None, 17, 18), 17)
def test_misc_funcs(self):
self._test_func(F.ifNotFinite(17, 18), 17)
self._test_func(F.isFinite(17), 1)
self._test_func(F.isInfinite(17), 0)
self._test_func(F.isNaN(17), 0)
self._test_func(F.least(17, 18), 17)
self._test_func(F.greatest(17, 18), 18)