Align SearchFilter behaviour to django.contrib.admin search (#9017)

* Use subquery to remove duplicates in SearchFilter

* Align SearchFilter behaviour to django.contrib.admin

* Add compatibility with older django/python versions

* Allow search to split also by comma after smart split

* Use generator to build search conditions to reduce iterations

* Improve search documentation

* Update docs/api-guide/filtering.md

---------

Co-authored-by: Asif Saif Uddin <auvipy@gmail.com>
This commit is contained in:
Devid 2023-07-25 14:01:23 +01:00 committed by GitHub
parent 5c3b6e496c
commit b99df0cf78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 146 additions and 40 deletions

View File

@ -213,19 +213,23 @@ This will allow the client to filter the items in the list by making queries suc
You can also perform a related lookup on a ForeignKey or ManyToManyField with the lookup API double-underscore notation: You can also perform a related lookup on a ForeignKey or ManyToManyField with the lookup API double-underscore notation:
search_fields = ['username', 'email', 'profile__profession'] search_fields = ['username', 'email', 'profile__profession']
For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter based on nested values within the data structure using the same double-underscore notation: For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter based on nested values within the data structure using the same double-underscore notation:
search_fields = ['data__breed', 'data__owner__other_pets__0__name'] search_fields = ['data__breed', 'data__owner__other_pets__0__name']
By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term.
The search behavior may be restricted by prepending various characters to the `search_fields`.
* '^' Starts-with search. The search behavior may be specified by prefixing field names in `search_fields` with one of the following characters (which is equivalent to adding `__<lookup>` to the field):
* '=' Exact matches.
* '@' Full-text search. (Currently only supported Django's [PostgreSQL backend][postgres-search].) | Prefix | Lookup | |
* '$' Regex search. | ------ | --------------| ------------------ |
| `^` | `istartswith` | Starts-with search.|
| `=` | `iexact` | Exact matches. |
| `$` | `iregex` | Regex search. |
| `@` | `search` | Full-text search (Currently only supported Django's [PostgreSQL backend][postgres-search]). |
| None | `icontains` | Contains search (Default). |
For example: For example:

View File

@ -3,7 +3,6 @@ The `compat` module provides support for backwards compatibility with older
versions of Django/Python, and compatibility wrappers around optional packages. versions of Django/Python, and compatibility wrappers around optional packages.
""" """
import django import django
from django.conf import settings
from django.views.generic import View from django.views.generic import View
@ -14,13 +13,6 @@ def unicode_http_header(value):
return value return value
def distinct(queryset, base):
if settings.DATABASES[queryset.db]["ENGINE"] == "django.db.backends.oracle":
# distinct analogue for Oracle users
return base.filter(pk__in=set(queryset.values_list('pk', flat=True)))
return queryset.distinct()
# django.contrib.postgres requires psycopg2 # django.contrib.postgres requires psycopg2
try: try:
from django.contrib.postgres import fields as postgres_fields from django.contrib.postgres import fields as postgres_fields

View File

@ -6,18 +6,35 @@ import operator
import warnings import warnings
from functools import reduce from functools import reduce
from django.core.exceptions import ImproperlyConfigured from django.core.exceptions import FieldDoesNotExist, ImproperlyConfigured
from django.db import models from django.db import models
from django.db.models.constants import LOOKUP_SEP from django.db.models.constants import LOOKUP_SEP
from django.template import loader from django.template import loader
from django.utils.encoding import force_str from django.utils.encoding import force_str
from django.utils.text import smart_split, unescape_string_literal
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from rest_framework import RemovedInDRF317Warning from rest_framework import RemovedInDRF317Warning
from rest_framework.compat import coreapi, coreschema, distinct from rest_framework.compat import coreapi, coreschema
from rest_framework.fields import CharField
from rest_framework.settings import api_settings from rest_framework.settings import api_settings
def search_smart_split(search_terms):
"""generator that first splits string by spaces, leaving quoted phrases togheter,
then it splits non-quoted phrases by commas.
"""
for term in smart_split(search_terms):
# trim commas to avoid bad matching for quoted phrases
term = term.strip(',')
if term.startswith(('"', "'")) and term[0] == term[-1]:
# quoted phrases are kept togheter without any other split
yield unescape_string_literal(term)
else:
# non-quoted tokens are split by comma, keeping only non-empty ones
yield from (sub_term.strip() for sub_term in term.split(',') if sub_term)
class BaseFilterBackend: class BaseFilterBackend:
""" """
A base class from which all filter backend classes should inherit. A base class from which all filter backend classes should inherit.
@ -64,18 +81,41 @@ class SearchFilter(BaseFilterBackend):
def get_search_terms(self, request): def get_search_terms(self, request):
""" """
Search terms are set by a ?search=... query parameter, Search terms are set by a ?search=... query parameter,
and may be comma and/or whitespace delimited. and may be whitespace delimited.
""" """
params = request.query_params.get(self.search_param, '') value = request.query_params.get(self.search_param, '')
params = params.replace('\x00', '') # strip null characters field = CharField(trim_whitespace=False, allow_blank=True)
params = params.replace(',', ' ') return field.run_validation(value)
return params.split()
def construct_search(self, field_name): def construct_search(self, field_name, queryset):
lookup = self.lookup_prefixes.get(field_name[0]) lookup = self.lookup_prefixes.get(field_name[0])
if lookup: if lookup:
field_name = field_name[1:] field_name = field_name[1:]
else: else:
# Use field_name if it includes a lookup.
opts = queryset.model._meta
lookup_fields = field_name.split(LOOKUP_SEP)
# Go through the fields, following all relations.
prev_field = None
for path_part in lookup_fields:
if path_part == "pk":
path_part = opts.pk.name
try:
field = opts.get_field(path_part)
except FieldDoesNotExist:
# Use valid query lookups.
if prev_field and prev_field.get_lookup(path_part):
return field_name
else:
prev_field = field
if hasattr(field, "path_infos"):
# Update opts to follow the relation.
opts = field.path_infos[-1].to_opts
# django < 4.1
elif hasattr(field, 'get_path_info'):
# Update opts to follow the relation.
opts = field.get_path_info()[-1].to_opts
# Otherwise, use the field with icontains.
lookup = 'icontains' lookup = 'icontains'
return LOOKUP_SEP.join([field_name, lookup]) return LOOKUP_SEP.join([field_name, lookup])
@ -113,26 +153,27 @@ class SearchFilter(BaseFilterBackend):
return queryset return queryset
orm_lookups = [ orm_lookups = [
self.construct_search(str(search_field)) self.construct_search(str(search_field), queryset)
for search_field in search_fields for search_field in search_fields
] ]
base = queryset base = queryset
conditions = [] # generator which for each term builds the corresponding search
for search_term in search_terms: conditions = (
queries = [ reduce(
models.Q(**{orm_lookup: search_term}) operator.or_,
for orm_lookup in orm_lookups (models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups)
] ) for term in search_smart_split(search_terms)
conditions.append(reduce(operator.or_, queries)) )
queryset = queryset.filter(reduce(operator.and_, conditions)) queryset = queryset.filter(reduce(operator.and_, conditions))
# Remove duplicates from results, if necessary
if self.must_call_distinct(queryset, search_fields): if self.must_call_distinct(queryset, search_fields):
# Filtering against a many-to-many field requires us to # inspired by django.contrib.admin
# call queryset.distinct() in order to avoid duplicate items # this is more accurate than .distinct form M2M relationship
# in the resulting queryset. # also is cross-database
# We try to avoid this if possible, for performance reasons. queryset = queryset.filter(pk=models.OuterRef('pk'))
queryset = distinct(queryset, base) queryset = base.filter(models.Exists(queryset))
return queryset return queryset
def to_html(self, request, queryset, view): def to_html(self, request, queryset, view):

View File

@ -6,16 +6,36 @@ from django.core.exceptions import ImproperlyConfigured
from django.db import models from django.db import models
from django.db.models import CharField, Transform from django.db.models import CharField, Transform
from django.db.models.functions import Concat, Upper from django.db.models.functions import Concat, Upper
from django.test import TestCase from django.test import SimpleTestCase, TestCase
from django.test.utils import override_settings from django.test.utils import override_settings
from rest_framework import filters, generics, serializers from rest_framework import filters, generics, serializers
from rest_framework.compat import coreschema from rest_framework.compat import coreschema
from rest_framework.exceptions import ValidationError
from rest_framework.test import APIRequestFactory from rest_framework.test import APIRequestFactory
factory = APIRequestFactory() factory = APIRequestFactory()
class SearchSplitTests(SimpleTestCase):
def test_keep_quoted_togheter_regardless_of_commas(self):
assert ['hello, world'] == list(filters.search_smart_split('"hello, world"'))
def test_strips_commas_around_quoted(self):
assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world"'))
assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world",,'))
assert ['hello, world'] == list(filters.search_smart_split('"hello, world",,'))
def test_splits_by_comma(self):
assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world'))
assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world,,'))
assert ['hello', 'world'] == list(filters.search_smart_split('hello, world,,'))
def test_splits_quotes_followed_by_comma_and_sentence(self):
assert ['"hello', 'world"', 'found'] == list(filters.search_smart_split('"hello, world",found'))
class BaseFilterTests(TestCase): class BaseFilterTests(TestCase):
def setUp(self): def setUp(self):
self.original_coreapi = filters.coreapi self.original_coreapi = filters.coreapi
@ -50,7 +70,8 @@ class SearchFilterSerializer(serializers.ModelSerializer):
class SearchFilterTests(TestCase): class SearchFilterTests(TestCase):
def setUp(self): @classmethod
def setUpTestData(cls):
# Sequence of title/text is: # Sequence of title/text is:
# #
# z abc # z abc
@ -66,6 +87,9 @@ class SearchFilterTests(TestCase):
) )
SearchFilterModel(title=title, text=text).save() SearchFilterModel(title=title, text=text).save()
SearchFilterModel(title='A title', text='The long text').save()
SearchFilterModel(title='The title', text='The "text').save()
def test_search(self): def test_search(self):
class SearchListView(generics.ListAPIView): class SearchListView(generics.ListAPIView):
queryset = SearchFilterModel.objects.all() queryset = SearchFilterModel.objects.all()
@ -186,9 +210,21 @@ class SearchFilterTests(TestCase):
request = factory.get('/?search=\0as%00d\x00f') request = factory.get('/?search=\0as%00d\x00f')
request = view.initialize_request(request) request = view.initialize_request(request)
terms = filters.SearchFilter().get_search_terms(request) with self.assertRaises(ValidationError):
filters.SearchFilter().get_search_terms(request)
assert terms == ['asdf'] def test_search_field_with_custom_lookup(self):
class SearchListView(generics.ListAPIView):
queryset = SearchFilterModel.objects.all()
serializer_class = SearchFilterSerializer
filter_backends = (filters.SearchFilter,)
search_fields = ('text__iendswith',)
view = SearchListView.as_view()
request = factory.get('/', {'search': 'c'})
response = view(request)
assert response.data == [
{'id': 1, 'title': 'z', 'text': 'abc'},
]
def test_search_field_with_additional_transforms(self): def test_search_field_with_additional_transforms(self):
from django.test.utils import register_lookup from django.test.utils import register_lookup
@ -242,6 +278,32 @@ class SearchFilterTests(TestCase):
) )
assert search_query in rendered_search_field assert search_query in rendered_search_field
def test_search_field_with_escapes(self):
class SearchListView(generics.ListAPIView):
queryset = SearchFilterModel.objects.all()
serializer_class = SearchFilterSerializer
filter_backends = (filters.SearchFilter,)
search_fields = ('title', 'text',)
view = SearchListView.as_view()
request = factory.get('/', {'search': '"\\\"text"'})
response = view(request)
assert response.data == [
{'id': 12, 'title': 'The title', 'text': 'The "text'},
]
def test_search_field_with_quotes(self):
class SearchListView(generics.ListAPIView):
queryset = SearchFilterModel.objects.all()
serializer_class = SearchFilterSerializer
filter_backends = (filters.SearchFilter,)
search_fields = ('title', 'text',)
view = SearchListView.as_view()
request = factory.get('/', {'search': '"long text"'})
response = view(request)
assert response.data == [
{'id': 11, 'title': 'A title', 'text': 'The long text'},
]
class AttributeModel(models.Model): class AttributeModel(models.Model):
label = models.CharField(max_length=32) label = models.CharField(max_length=32)
@ -284,6 +346,13 @@ class SearchFilterFkTests(TestCase):
["%sattribute__label" % prefix, "%stitle" % prefix] ["%sattribute__label" % prefix, "%stitle" % prefix]
) )
def test_custom_lookup_to_related_model(self):
# In this test case the attribute of the fk model comes first in the
# list of search fields.
filter_ = filters.SearchFilter()
assert 'attribute__label__icontains' == filter_.construct_search('attribute__label', SearchFilterModelFk._meta)
assert 'attribute__label__iendswith' == filter_.construct_search('attribute__label__iendswith', SearchFilterModelFk._meta)
class SearchFilterModelM2M(models.Model): class SearchFilterModelM2M(models.Model):
title = models.CharField(max_length=20) title = models.CharField(max_length=20)