From 1dc4caac651958b9fedaaeaa9fd99e8e524a6756 Mon Sep 17 00:00:00 2001 From: Ran Benita Date: Thu, 2 May 2019 18:46:02 +0300 Subject: [PATCH] Avoid Django's lazy() when creating validators because it is too slow As a fix for issue #3354, commit 607e4edca77441f057ce40cd90175 made the evaluation of some validation error messages lazy. To achieve that, Django's django.utils.functional.lazy() function was used. However, that function is extremely heavy and slow, and slows down string validation significantly (lazy() is evaluated each time for each validator for each field which has one). We noticed this in our production system. Use a custom lazy_format() object instead which does the formatting lazily with less overhead. Using the benchmark attached to the PR (snipped to tottime>100ms): Before, model serializer: 9225123 function calls (9200068 primitive calls) in 8.337 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 25000 1.299 0.000 3.534 0.000 functional.py:125(__prepare_class__) 2415001 0.954 0.000 0.954 0.000 {built-in method builtins.hasattr} 1350000 0.901 0.000 0.901 0.000 functional.py:145(__promise__) 1550001 0.521 0.000 0.521 0.000 {built-in method builtins.setattr} 25000 0.494 0.000 0.735 0.000 {built-in method builtins.__build_class__} 30000 0.298 0.000 0.385 0.000 fields.py:297(__init__) 25000 0.289 0.000 5.895 0.000 fields.py:740(__init__) 25000 0.264 0.000 0.802 0.000 field_mapping.py:66(get_field_kwargs) 25000 0.241 0.000 0.241 0.000 functional.py:100(__proxy__) 670003/670001 0.203 0.000 0.211 0.000 {built-in method builtins.getattr} 5000 0.189 0.000 7.722 0.002 serializers.py:990(get_fields) 25000 0.186 0.000 0.400 0.000 functools.py:186(total_ordering) 25000 0.158 0.000 0.299 0.000 functional.py:234(wrapper) 5000 0.129 0.000 0.136 0.000 serializers.py:1066(get_field_names) 25000 0.104 0.000 1.002 0.000 serializers.py:1195(build_standard_field) After, model serializer: 3265096 function calls (3240059 primitive calls) in 2.645 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 30000 0.237 0.000 0.315 0.000 fields.py:295(__init__) 25000 0.218 0.000 0.639 0.000 field_mapping.py:66(get_field_kwargs) 25000 0.214 0.000 0.665 0.000 fields.py:743(__init__) 5000 0.156 0.000 2.086 0.000 serializers.py:988(get_fields) 25000 0.107 0.000 0.210 0.000 functional.py:234(wrapper) Before, regular serializer: 8060003 function calls (7960003 primitive calls) in 7.123 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 25000 1.569 0.000 3.897 0.000 functional.py:125(__prepare_class__) 1350000 1.013 0.000 1.013 0.000 functional.py:145(__promise__) 2365000 0.925 0.000 0.925 0.000 {built-in method builtins.hasattr} 1550000 0.512 0.000 0.512 0.000 {built-in method builtins.setattr} 25000 0.378 0.000 0.550 0.000 {built-in method builtins.__build_class__} 25000 0.307 0.000 5.946 0.000 fields.py:740(__init__) 30000 0.277 0.000 0.360 0.000 fields.py:297(__init__) 80000/5000 0.202 0.000 6.526 0.001 copy.py:132(deepcopy) 25000 0.172 0.000 0.172 0.000 functional.py:100(__proxy__) 540000 0.152 0.000 0.152 0.000 {built-in method builtins.getattr} 25000 0.119 0.000 6.199 0.000 fields.py:604(__deepcopy__) After, regular serializer: 2150003 function calls (2050003 primitive calls) in 1.609 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 30000 0.224 0.000 0.293 0.000 fields.py:295(__init__) 25000 0.181 0.000 0.607 0.000 fields.py:743(__init__) 80000/5000 0.151 0.000 1.074 0.000 copy.py:132(deepcopy) 25000 0.102 0.000 0.819 0.000 fields.py:607(__deepcopy__) --- rest_framework/compat.py | 30 ------------------ rest_framework/fields.py | 51 ++++++++++-------------------- rest_framework/utils/formatting.py | 26 +++++++++++++++ tests/test_utils.py | 19 +++++++++++ 4 files changed, 62 insertions(+), 64 deletions(-) diff --git a/rest_framework/compat.py b/rest_framework/compat.py index 2e6a8adda..4bee21f91 100644 --- a/rest_framework/compat.py +++ b/rest_framework/compat.py @@ -5,7 +5,6 @@ versions of Django/Python, and compatibility wrappers around optional packages. import sys from django.conf import settings -from django.core import validators from django.views.generic import View try: @@ -238,34 +237,5 @@ LONG_SEPARATORS = (', ', ': ') INDENT_SEPARATORS = (',', ': ') -class CustomValidatorMessage: - """ - We need to avoid evaluation of `lazy` translated `message` in `django.core.validators.BaseValidator.__init__`. - https://github.com/django/django/blob/75ed5900321d170debef4ac452b8b3cf8a1c2384/django/core/validators.py#L297 - - Ref: https://github.com/encode/django-rest-framework/pull/5452 - """ - - def __init__(self, *args, **kwargs): - self.message = kwargs.pop('message', self.message) - super().__init__(*args, **kwargs) - - -class MinValueValidator(CustomValidatorMessage, validators.MinValueValidator): - pass - - -class MaxValueValidator(CustomValidatorMessage, validators.MaxValueValidator): - pass - - -class MinLengthValidator(CustomValidatorMessage, validators.MinLengthValidator): - pass - - -class MaxLengthValidator(CustomValidatorMessage, validators.MaxLengthValidator): - pass - - # Version Constants. PY36 = sys.version_info >= (3, 6) diff --git a/rest_framework/fields.py b/rest_framework/fields.py index 213134793..860ae03ae 100644 --- a/rest_framework/fields.py +++ b/rest_framework/fields.py @@ -12,7 +12,8 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError as DjangoValidationError from django.core.validators import ( - EmailValidator, RegexValidator, URLValidator, ip_address_validators + EmailValidator, MaxLengthValidator, MaxValueValidator, MinLengthValidator, + MinValueValidator, RegexValidator, URLValidator, ip_address_validators ) from django.forms import FilePathField as DjangoFilePathField from django.forms import ImageField as DjangoImageField @@ -23,20 +24,17 @@ from django.utils.dateparse import ( from django.utils.duration import duration_string from django.utils.encoding import is_protected_type, smart_text from django.utils.formats import localize_input, sanitize_separators -from django.utils.functional import lazy from django.utils.ipv6 import clean_ipv6_address from django.utils.timezone import utc from django.utils.translation import gettext_lazy as _ from pytz.exceptions import InvalidTimeError from rest_framework import ISO_8601 -from rest_framework.compat import ( - MaxLengthValidator, MaxValueValidator, MinLengthValidator, - MinValueValidator, ProhibitNullCharactersValidator -) +from rest_framework.compat import ProhibitNullCharactersValidator from rest_framework.exceptions import ErrorDetail, ValidationError from rest_framework.settings import api_settings from rest_framework.utils import html, humanize_datetime, json, representation +from rest_framework.utils.formatting import lazy_format class empty: @@ -749,12 +747,11 @@ class CharField(Field): self.min_length = kwargs.pop('min_length', None) super().__init__(**kwargs) if self.max_length is not None: - message = lazy(self.error_messages['max_length'].format, str)(max_length=self.max_length) + message = lazy_format(self.error_messages['max_length'], max_length=self.max_length) self.validators.append( MaxLengthValidator(self.max_length, message=message)) if self.min_length is not None: - message = lazy( - self.error_messages['min_length'].format, str)(min_length=self.min_length) + message = lazy_format(self.error_messages['min_length'], min_length=self.min_length) self.validators.append( MinLengthValidator(self.min_length, message=message)) @@ -915,13 +912,11 @@ class IntegerField(Field): self.min_value = kwargs.pop('min_value', None) super().__init__(**kwargs) if self.max_value is not None: - message = lazy( - self.error_messages['max_value'].format, str)(max_value=self.max_value) + message = lazy_format(self.error_messages['max_value'], max_value=self.max_value) self.validators.append( MaxValueValidator(self.max_value, message=message)) if self.min_value is not None: - message = lazy( - self.error_messages['min_value'].format, str)(min_value=self.min_value) + message = lazy_format(self.error_messages['min_value'], min_value=self.min_value) self.validators.append( MinValueValidator(self.min_value, message=message)) @@ -953,15 +948,11 @@ class FloatField(Field): self.min_value = kwargs.pop('min_value', None) super().__init__(**kwargs) if self.max_value is not None: - message = lazy( - self.error_messages['max_value'].format, - str)(max_value=self.max_value) + message = lazy_format(self.error_messages['max_value'], max_value=self.max_value) self.validators.append( MaxValueValidator(self.max_value, message=message)) if self.min_value is not None: - message = lazy( - self.error_messages['min_value'].format, - str)(min_value=self.min_value) + message = lazy_format(self.error_messages['min_value'], min_value=self.min_value) self.validators.append( MinValueValidator(self.min_value, message=message)) @@ -1012,14 +1003,11 @@ class DecimalField(Field): super().__init__(**kwargs) if self.max_value is not None: - message = lazy( - self.error_messages['max_value'].format, - str)(max_value=self.max_value) + message = lazy_format(self.error_messages['max_value'], max_value=self.max_value) self.validators.append( MaxValueValidator(self.max_value, message=message)) if self.min_value is not None: - message = lazy( - self.error_messages['min_value'].format, str)(min_value=self.min_value) + message = lazy_format(self.error_messages['min_value'], min_value=self.min_value) self.validators.append( MinValueValidator(self.min_value, message=message)) @@ -1357,15 +1345,11 @@ class DurationField(Field): self.min_value = kwargs.pop('min_value', None) super().__init__(**kwargs) if self.max_value is not None: - message = lazy( - self.error_messages['max_value'].format, - str)(max_value=self.max_value) + message = lazy_format(self.error_messages['max_value'], max_value=self.max_value) self.validators.append( MaxValueValidator(self.max_value, message=message)) if self.min_value is not None: - message = lazy( - self.error_messages['min_value'].format, - str)(min_value=self.min_value) + message = lazy_format(self.error_messages['min_value'], min_value=self.min_value) self.validators.append( MinValueValidator(self.min_value, message=message)) @@ -1610,10 +1594,10 @@ class ListField(Field): super().__init__(*args, **kwargs) self.child.bind(field_name='', parent=self) if self.max_length is not None: - message = lazy(self.error_messages['max_length'].format, str)(max_length=self.max_length) + message = lazy_format(self.error_messages['max_length'], max_length=self.max_length) self.validators.append(MaxLengthValidator(self.max_length, message=message)) if self.min_length is not None: - message = lazy(self.error_messages['min_length'].format, str)(min_length=self.min_length) + message = lazy_format(self.error_messages['min_length'], min_length=self.min_length) self.validators.append(MinLengthValidator(self.min_length, message=message)) def get_value(self, dictionary): @@ -1886,8 +1870,7 @@ class ModelField(Field): max_length = kwargs.pop('max_length', None) super().__init__(**kwargs) if max_length is not None: - message = lazy( - self.error_messages['max_length'].format, str)(max_length=self.max_length) + message = lazy_format(self.error_messages['max_length'], max_length=self.max_length) self.validators.append( MaxLengthValidator(self.max_length, message=message)) diff --git a/rest_framework/utils/formatting.py b/rest_framework/utils/formatting.py index 4e003f614..e96cc6719 100644 --- a/rest_framework/utils/formatting.py +++ b/rest_framework/utils/formatting.py @@ -65,3 +65,29 @@ def markup_description(description): description = escape(description).replace('\n', '
') description = '

' + description + '

' return mark_safe(description) + + +class lazy_format: + """ + Delay formatting until it's actually needed. + + Useful when the format string or one of the arguments is lazy. + + Not using Django's lazy because it is too slow. + """ + __slots__ = ('format_string', 'args', 'kwargs', 'result') + + def __init__(self, format_string, *args, **kwargs): + self.result = None + self.format_string = format_string + self.args = args + self.kwargs = kwargs + + def __str__(self): + if self.result is None: + self.result = self.format_string.format(*self.args, **self.kwargs) + self.format_string, self.args, self.kwargs = None, None, None + return self.result + + def __mod__(self, value): + return str(self) % value diff --git a/tests/test_utils.py b/tests/test_utils.py index 0d61cc2f9..885b5a58d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,3 +1,5 @@ +from unittest import mock + from django.conf.urls import url from django.test import TestCase, override_settings @@ -6,6 +8,7 @@ from rest_framework.routers import SimpleRouter from rest_framework.serializers import ModelSerializer from rest_framework.utils import json from rest_framework.utils.breadcrumbs import get_breadcrumbs +from rest_framework.utils.formatting import lazy_format from rest_framework.utils.urls import remove_query_param, replace_query_param from rest_framework.views import APIView from rest_framework.viewsets import ModelViewSet @@ -257,3 +260,19 @@ class UrlsRemoveQueryParamTests(TestCase): removed_key = 'page' assert key in remove_query_param(q, removed_key) + + +class LazyFormatTests(TestCase): + def test_it_formats_correctly(self): + formatted = lazy_format('Does {} work? {answer}: %s', 'it', answer='Yes') + assert str(formatted) == 'Does it work? Yes: %s' + assert formatted % 'it does' == 'Does it work? Yes: it does' + + def test_it_formats_lazily(self): + message = mock.Mock(wraps='message') + formatted = lazy_format(message) + assert message.format.call_count == 0 + str(formatted) + assert message.format.call_count == 1 + str(formatted) + assert message.format.call_count == 1