From dee83cebf4c25b409f9e2e4c01d24546ff502bec Mon Sep 17 00:00:00 2001 From: sevdog Date: Thu, 22 Jun 2023 10:39:52 +0200 Subject: [PATCH 1/7] Use subquery to remove duplicates in SearchFilter --- rest_framework/compat.py | 8 -------- rest_framework/filters.py | 13 +++++++------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/rest_framework/compat.py b/rest_framework/compat.py index ac5cbc572a..7e80704e11 100644 --- a/rest_framework/compat.py +++ b/rest_framework/compat.py @@ -3,7 +3,6 @@ versions of Django/Python, and compatibility wrappers around optional packages. """ import django -from django.conf import settings from django.views.generic import View @@ -14,13 +13,6 @@ def unicode_http_header(value): return value -def distinct(queryset, base): - if settings.DATABASES[queryset.db]["ENGINE"] == "django.db.backends.oracle": - # distinct analogue for Oracle users - return base.filter(pk__in=set(queryset.values_list('pk', flat=True))) - return queryset.distinct() - - # django.contrib.postgres requires psycopg2 try: from django.contrib.postgres import fields as postgres_fields diff --git a/rest_framework/filters.py b/rest_framework/filters.py index c48504957c..ec7fdbb53c 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -14,7 +14,7 @@ from django.utils.translation import gettext_lazy as _ from rest_framework import RemovedInDRF317Warning -from rest_framework.compat import coreapi, coreschema, distinct +from rest_framework.compat import coreapi, coreschema from rest_framework.settings import api_settings @@ -127,12 +127,13 @@ def filter_queryset(self, request, queryset, view): conditions.append(reduce(operator.or_, queries)) queryset = queryset.filter(reduce(operator.and_, conditions)) + # Remove duplicates from results, if necessary if self.must_call_distinct(queryset, search_fields): - # Filtering against a many-to-many field requires us to - # call queryset.distinct() in order to avoid duplicate items - # in the resulting queryset. - # We try to avoid this if possible, for performance reasons. - queryset = distinct(queryset, base) + # inspired by django.contrib.admin + # this is more accurate than .distinct form M2M relationship + # also is cross-database + queryset = queryset.filter(pk=models.OuterRef('pk')) + queryset = base.filter(models.Exists(queryset)) return queryset def to_html(self, request, queryset, view): From ad96159234a04481fc2bd4be0384d86aa959410c Mon Sep 17 00:00:00 2001 From: sevdog Date: Thu, 22 Jun 2023 11:37:21 +0200 Subject: [PATCH 2/7] Align SearchFilter behaviour to django.contrib.admin --- docs/api-guide/filtering.md | 4 +-- rest_framework/filters.py | 43 ++++++++++++++++++++------- tests/test_filters.py | 59 ++++++++++++++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 16 deletions(-) diff --git a/docs/api-guide/filtering.md b/docs/api-guide/filtering.md index 47ea8592df..6479efd2d4 100644 --- a/docs/api-guide/filtering.md +++ b/docs/api-guide/filtering.md @@ -213,12 +213,12 @@ This will allow the client to filter the items in the list by making queries suc You can also perform a related lookup on a ForeignKey or ManyToManyField with the lookup API double-underscore notation: search_fields = ['username', 'email', 'profile__profession'] - + For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter based on nested values within the data structure using the same double-underscore notation: search_fields = ['data__breed', 'data__owner__other_pets__0__name'] -By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. +By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. The search behavior may be restricted by prepending various characters to the `search_fields`. diff --git a/rest_framework/filters.py b/rest_framework/filters.py index ec7fdbb53c..01437f0f34 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -6,15 +6,17 @@ import warnings from functools import reduce -from django.core.exceptions import ImproperlyConfigured +from django.core.exceptions import FieldDoesNotExist, ImproperlyConfigured from django.db import models from django.db.models.constants import LOOKUP_SEP from django.template import loader from django.utils.encoding import force_str +from django.utils.text import smart_split, unescape_string_literal from django.utils.translation import gettext_lazy as _ from rest_framework import RemovedInDRF317Warning from rest_framework.compat import coreapi, coreschema +from rest_framework.fields import CharField from rest_framework.settings import api_settings @@ -64,18 +66,37 @@ def get_search_fields(self, view, request): def get_search_terms(self, request): """ Search terms are set by a ?search=... query parameter, - and may be comma and/or whitespace delimited. + and may be whitespace delimited. """ - params = request.query_params.get(self.search_param, '') - params = params.replace('\x00', '') # strip null characters - params = params.replace(',', ' ') - return params.split() + value = request.query_params.get(self.search_param, '') + field = CharField(trim_whitespace=False, allow_blank=True) + return field.run_validation(value) - def construct_search(self, field_name): + def construct_search(self, field_name, queryset): lookup = self.lookup_prefixes.get(field_name[0]) if lookup: field_name = field_name[1:] else: + # Use field_name if it includes a lookup. + opts = queryset.model._meta + lookup_fields = field_name.split(LOOKUP_SEP) + # Go through the fields, following all relations. + prev_field = None + for path_part in lookup_fields: + if path_part == "pk": + path_part = opts.pk.name + try: + field = opts.get_field(path_part) + except FieldDoesNotExist: + # Use valid query lookups. + if prev_field and prev_field.get_lookup(path_part): + return field_name + else: + prev_field = field + if hasattr(field, "path_infos"): + # Update opts to follow the relation. + opts = field.path_infos[-1].to_opts + # Otherwise, use the field with icontains. lookup = 'icontains' return LOOKUP_SEP.join([field_name, lookup]) @@ -113,15 +134,17 @@ def filter_queryset(self, request, queryset, view): return queryset orm_lookups = [ - self.construct_search(str(search_field)) + self.construct_search(str(search_field), queryset) for search_field in search_fields ] base = queryset conditions = [] - for search_term in search_terms: + for term in smart_split(search_terms): + if term.startswith(('"', "'")) and term[0] == term[-1]: + term = unescape_string_literal(term) queries = [ - models.Q(**{orm_lookup: search_term}) + models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups ] conditions.append(reduce(operator.or_, queries)) diff --git a/tests/test_filters.py b/tests/test_filters.py index 2a22e30f97..f8eed4b97f 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -11,6 +11,7 @@ from rest_framework import filters, generics, serializers from rest_framework.compat import coreschema +from rest_framework.exceptions import ValidationError from rest_framework.test import APIRequestFactory factory = APIRequestFactory() @@ -50,7 +51,8 @@ class Meta: class SearchFilterTests(TestCase): - def setUp(self): + @classmethod + def setUpTestData(cls): # Sequence of title/text is: # # z abc @@ -66,6 +68,9 @@ def setUp(self): ) SearchFilterModel(title=title, text=text).save() + SearchFilterModel(title='A title', text='The long text').save() + SearchFilterModel(title='The title', text='The "text').save() + def test_search(self): class SearchListView(generics.ListAPIView): queryset = SearchFilterModel.objects.all() @@ -177,6 +182,7 @@ class SearchListView(generics.ListAPIView): request = factory.get('/', {'search': r'^\w{3}$', 'title_only': 'true'}) response = view(request) + print(response.data) assert response.data == [ {'id': 3, 'title': 'zzz', 'text': 'cde'} ] @@ -186,9 +192,21 @@ def test_search_field_with_null_characters(self): request = factory.get('/?search=\0as%00d\x00f') request = view.initialize_request(request) - terms = filters.SearchFilter().get_search_terms(request) + with self.assertRaises(ValidationError): + filters.SearchFilter().get_search_terms(request) - assert terms == ['asdf'] + def test_search_field_with_custom_lookup(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('text__iendswith',) + view = SearchListView.as_view() + request = factory.get('/', {'search': 'c'}) + response = view(request) + assert response.data == [ + {'id': 1, 'title': 'z', 'text': 'abc'}, + ] def test_search_field_with_additional_transforms(self): from django.test.utils import register_lookup @@ -242,6 +260,32 @@ class SearchListView(generics.ListAPIView): ) assert search_query in rendered_search_field + def test_search_field_with_escapes(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('title', 'text',) + view = SearchListView.as_view() + request = factory.get('/', {'search': '"\\\"text"'}) + response = view(request) + assert response.data == [ + {'id': 12, 'title': 'The title', 'text': 'The "text'}, + ] + + def test_search_field_with_quotes(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('title', 'text',) + view = SearchListView.as_view() + request = factory.get('/', {'search': '"long text"'}) + response = view(request) + assert response.data == [ + {'id': 11, 'title': 'A title', 'text': 'The long text'}, + ] + class AttributeModel(models.Model): label = models.CharField(max_length=32) @@ -284,6 +328,13 @@ def test_must_call_distinct_restores_meta_for_each_field(self): ["%sattribute__label" % prefix, "%stitle" % prefix] ) + def test_custom_lookup_to_related_model(self): + # In this test case the attribute of the fk model comes first in the + # list of search fields. + filter_ = filters.SearchFilter() + assert 'attribute__label__icontains' == filter_.construct_search('attribute__label', SearchFilterModelFk._meta) + assert 'attribute__label__iendswith' == filter_.construct_search('attribute__label__iendswith', SearchFilterModelFk._meta) + class SearchFilterModelM2M(models.Model): title = models.CharField(max_length=20) @@ -385,7 +436,7 @@ class SearchListView(generics.ListAPIView): search_fields = ('=name', 'entry__headline', '=entry__pub_date__year') view = SearchListView.as_view() - request = factory.get('/', {'search': 'Lennon,1979'}) + request = factory.get('/', {'search': 'Lennon 1979'}) response = view(request) assert len(response.data) == 1 From 82c42dc5db75c23fe28c88a95d5c04566a00d771 Mon Sep 17 00:00:00 2001 From: sevdog Date: Thu, 22 Jun 2023 14:41:48 +0200 Subject: [PATCH 3/7] Add compatibility with older django/python versions --- rest_framework/filters.py | 4 ++++ tests/test_filters.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/rest_framework/filters.py b/rest_framework/filters.py index 01437f0f34..375242b6be 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -96,6 +96,10 @@ def construct_search(self, field_name, queryset): if hasattr(field, "path_infos"): # Update opts to follow the relation. opts = field.path_infos[-1].to_opts + # django < 4.1 + elif hasattr(field, 'get_path_info'): + # Update opts to follow the relation. + opts = field.get_path_info()[-1].to_opts # Otherwise, use the field with icontains. lookup = 'icontains' return LOOKUP_SEP.join([field_name, lookup]) diff --git a/tests/test_filters.py b/tests/test_filters.py index f8eed4b97f..2a87165ca3 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -182,7 +182,6 @@ class SearchListView(generics.ListAPIView): request = factory.get('/', {'search': r'^\w{3}$', 'title_only': 'true'}) response = view(request) - print(response.data) assert response.data == [ {'id': 3, 'title': 'zzz', 'text': 'cde'} ] From 1ce85d01b51929b32b0e2c95659fcbad591fa622 Mon Sep 17 00:00:00 2001 From: sevdog Date: Fri, 23 Jun 2023 09:33:06 +0200 Subject: [PATCH 4/7] Allow search to split also by comma after smart split --- rest_framework/filters.py | 19 ++++++++++++++++--- tests/test_filters.py | 23 +++++++++++++++++++++-- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/rest_framework/filters.py b/rest_framework/filters.py index 375242b6be..6a625ec5fc 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -20,6 +20,21 @@ from rest_framework.settings import api_settings +def search_smart_split(search_terms): + """generator that first splits string by spaces, leaving quoted phrases togheter, + then it splits non-quoted phrases by commas. + """ + for term in smart_split(search_terms): + # trim commas to avoid bad matching for quoted phrases + term = term.strip(',') + if term.startswith(('"', "'")) and term[0] == term[-1]: + # quoted phrases are kept togheter without any other split + yield unescape_string_literal(term) + else: + # non-quoted tokens are split by comma, keeping only non-empty ones + yield from (sub_term.strip() for sub_term in term.split(',') if sub_term) + + class BaseFilterBackend: """ A base class from which all filter backend classes should inherit. @@ -144,9 +159,7 @@ def filter_queryset(self, request, queryset, view): base = queryset conditions = [] - for term in smart_split(search_terms): - if term.startswith(('"', "'")) and term[0] == term[-1]: - term = unescape_string_literal(term) + for term in search_smart_split(search_terms): queries = [ models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups diff --git a/tests/test_filters.py b/tests/test_filters.py index 2a87165ca3..6db0c3deb2 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -6,7 +6,7 @@ from django.db import models from django.db.models import CharField, Transform from django.db.models.functions import Concat, Upper -from django.test import TestCase +from django.test import SimpleTestCase, TestCase from django.test.utils import override_settings from rest_framework import filters, generics, serializers @@ -17,6 +17,25 @@ factory = APIRequestFactory() +class SearchSplitTests(SimpleTestCase): + + def test_keep_quoted_togheter_regardless_of_commas(self): + assert ['hello, world'] == list(filters.search_smart_split('"hello, world"')) + + def test_strips_commas_around_quoted(self): + assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world"')) + assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world",,')) + assert ['hello, world'] == list(filters.search_smart_split('"hello, world",,')) + + def test_splits_by_comma(self): + assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world')) + assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world,,')) + assert ['hello', 'world'] == list(filters.search_smart_split('hello, world,,')) + + def test_splits_quotes_followed_by_comma_and_sentence(self): + assert ['"hello', 'world"', 'found'] == list(filters.search_smart_split('"hello, world",found')) + + class BaseFilterTests(TestCase): def setUp(self): self.original_coreapi = filters.coreapi @@ -435,7 +454,7 @@ class SearchListView(generics.ListAPIView): search_fields = ('=name', 'entry__headline', '=entry__pub_date__year') view = SearchListView.as_view() - request = factory.get('/', {'search': 'Lennon 1979'}) + request = factory.get('/', {'search': 'Lennon,1979'}) response = view(request) assert len(response.data) == 1 From a7bafb7392681d36ce120b0a795da71ca73386a9 Mon Sep 17 00:00:00 2001 From: sevdog Date: Wed, 12 Jul 2023 17:39:08 +0200 Subject: [PATCH 5/7] Use generator to build search conditions to reduce iterations --- rest_framework/filters.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rest_framework/filters.py b/rest_framework/filters.py index 6a625ec5fc..065e72f94a 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -158,13 +158,13 @@ def filter_queryset(self, request, queryset, view): ] base = queryset - conditions = [] - for term in search_smart_split(search_terms): - queries = [ - models.Q(**{orm_lookup: term}) - for orm_lookup in orm_lookups - ] - conditions.append(reduce(operator.or_, queries)) + # generator which for each term builds the corresponding search + conditions = ( + reduce( + operator.or_, + (models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups) + ) for term in search_smart_split(search_terms) + ) queryset = queryset.filter(reduce(operator.and_, conditions)) # Remove duplicates from results, if necessary From 67cd9c22596847beaaa950cc2aa0d497ba288b44 Mon Sep 17 00:00:00 2001 From: sevdog Date: Mon, 24 Jul 2023 12:23:31 +0200 Subject: [PATCH 6/7] Improve search documentation --- docs/api-guide/filtering.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/api-guide/filtering.md b/docs/api-guide/filtering.md index 6479efd2d4..f7bdb3fe43 100644 --- a/docs/api-guide/filtering.md +++ b/docs/api-guide/filtering.md @@ -218,14 +218,18 @@ For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter search_fields = ['data__breed', 'data__owner__other_pets__0__name'] -By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. +By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated . If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term. -The search behavior may be restricted by prepending various characters to the `search_fields`. -* '^' Starts-with search. -* '=' Exact matches. -* '@' Full-text search. (Currently only supported Django's [PostgreSQL backend][postgres-search].) -* '$' Regex search. +The search behavior may be specified by prefixing field names in `search_fields` with one of the following characters (which is equivalent to adding `__` to the field): + +| Prefix | Lookup | | +| ------ | --------------| ------------------ | +| `^` | `istartswith` | Starts-with search.| +| `=` | `iexact` | Exact matches. | +| `$` | `iregex` | Regex search. | +| `@` | `search` | Full-text search (Currently only supported Django's [PostgreSQL backend][postgres-search]). | +| None | `icontains` | Contains search (Default). | For example: From 140b3986b853fe52929fa56054ef5d64658be70c Mon Sep 17 00:00:00 2001 From: Asif Saif Uddin Date: Tue, 25 Jul 2023 13:37:13 +0600 Subject: [PATCH 7/7] Update docs/api-guide/filtering.md --- docs/api-guide/filtering.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api-guide/filtering.md b/docs/api-guide/filtering.md index f7bdb3fe43..ff5f3c7759 100644 --- a/docs/api-guide/filtering.md +++ b/docs/api-guide/filtering.md @@ -218,7 +218,7 @@ For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter search_fields = ['data__breed', 'data__owner__other_pets__0__name'] -By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated . If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term. +By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term. The search behavior may be specified by prefixing field names in `search_fields` with one of the following characters (which is equivalent to adding `__` to the field):