diff --git a/docs/api-guide/filtering.md b/docs/api-guide/filtering.md index 47ea8592df..ff5f3c7759 100644 --- a/docs/api-guide/filtering.md +++ b/docs/api-guide/filtering.md @@ -213,19 +213,23 @@ This will allow the client to filter the items in the list by making queries suc You can also perform a related lookup on a ForeignKey or ManyToManyField with the lookup API double-underscore notation: search_fields = ['username', 'email', 'profile__profession'] - + For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter based on nested values within the data structure using the same double-underscore notation: search_fields = ['data__breed', 'data__owner__other_pets__0__name'] -By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. +By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term. + -The search behavior may be restricted by prepending various characters to the `search_fields`. +The search behavior may be specified by prefixing field names in `search_fields` with one of the following characters (which is equivalent to adding `__` to the field): -* '^' Starts-with search. -* '=' Exact matches. -* '@' Full-text search. (Currently only supported Django's [PostgreSQL backend][postgres-search].) -* '$' Regex search. +| Prefix | Lookup | | +| ------ | --------------| ------------------ | +| `^` | `istartswith` | Starts-with search.| +| `=` | `iexact` | Exact matches. | +| `$` | `iregex` | Regex search. | +| `@` | `search` | Full-text search (Currently only supported Django's [PostgreSQL backend][postgres-search]). | +| None | `icontains` | Contains search (Default). | For example: diff --git a/rest_framework/compat.py b/rest_framework/compat.py index ac5cbc572a..7e80704e11 100644 --- a/rest_framework/compat.py +++ b/rest_framework/compat.py @@ -3,7 +3,6 @@ versions of Django/Python, and compatibility wrappers around optional packages. """ import django -from django.conf import settings from django.views.generic import View @@ -14,13 +13,6 @@ def unicode_http_header(value): return value -def distinct(queryset, base): - if settings.DATABASES[queryset.db]["ENGINE"] == "django.db.backends.oracle": - # distinct analogue for Oracle users - return base.filter(pk__in=set(queryset.values_list('pk', flat=True))) - return queryset.distinct() - - # django.contrib.postgres requires psycopg2 try: from django.contrib.postgres import fields as postgres_fields diff --git a/rest_framework/filters.py b/rest_framework/filters.py index c48504957c..065e72f94a 100644 --- a/rest_framework/filters.py +++ b/rest_framework/filters.py @@ -6,18 +6,35 @@ import warnings from functools import reduce -from django.core.exceptions import ImproperlyConfigured +from django.core.exceptions import FieldDoesNotExist, ImproperlyConfigured from django.db import models from django.db.models.constants import LOOKUP_SEP from django.template import loader from django.utils.encoding import force_str +from django.utils.text import smart_split, unescape_string_literal from django.utils.translation import gettext_lazy as _ from rest_framework import RemovedInDRF317Warning -from rest_framework.compat import coreapi, coreschema, distinct +from rest_framework.compat import coreapi, coreschema +from rest_framework.fields import CharField from rest_framework.settings import api_settings +def search_smart_split(search_terms): + """generator that first splits string by spaces, leaving quoted phrases togheter, + then it splits non-quoted phrases by commas. + """ + for term in smart_split(search_terms): + # trim commas to avoid bad matching for quoted phrases + term = term.strip(',') + if term.startswith(('"', "'")) and term[0] == term[-1]: + # quoted phrases are kept togheter without any other split + yield unescape_string_literal(term) + else: + # non-quoted tokens are split by comma, keeping only non-empty ones + yield from (sub_term.strip() for sub_term in term.split(',') if sub_term) + + class BaseFilterBackend: """ A base class from which all filter backend classes should inherit. @@ -64,18 +81,41 @@ def get_search_fields(self, view, request): def get_search_terms(self, request): """ Search terms are set by a ?search=... query parameter, - and may be comma and/or whitespace delimited. + and may be whitespace delimited. """ - params = request.query_params.get(self.search_param, '') - params = params.replace('\x00', '') # strip null characters - params = params.replace(',', ' ') - return params.split() + value = request.query_params.get(self.search_param, '') + field = CharField(trim_whitespace=False, allow_blank=True) + return field.run_validation(value) - def construct_search(self, field_name): + def construct_search(self, field_name, queryset): lookup = self.lookup_prefixes.get(field_name[0]) if lookup: field_name = field_name[1:] else: + # Use field_name if it includes a lookup. + opts = queryset.model._meta + lookup_fields = field_name.split(LOOKUP_SEP) + # Go through the fields, following all relations. + prev_field = None + for path_part in lookup_fields: + if path_part == "pk": + path_part = opts.pk.name + try: + field = opts.get_field(path_part) + except FieldDoesNotExist: + # Use valid query lookups. + if prev_field and prev_field.get_lookup(path_part): + return field_name + else: + prev_field = field + if hasattr(field, "path_infos"): + # Update opts to follow the relation. + opts = field.path_infos[-1].to_opts + # django < 4.1 + elif hasattr(field, 'get_path_info'): + # Update opts to follow the relation. + opts = field.get_path_info()[-1].to_opts + # Otherwise, use the field with icontains. lookup = 'icontains' return LOOKUP_SEP.join([field_name, lookup]) @@ -113,26 +153,27 @@ def filter_queryset(self, request, queryset, view): return queryset orm_lookups = [ - self.construct_search(str(search_field)) + self.construct_search(str(search_field), queryset) for search_field in search_fields ] base = queryset - conditions = [] - for search_term in search_terms: - queries = [ - models.Q(**{orm_lookup: search_term}) - for orm_lookup in orm_lookups - ] - conditions.append(reduce(operator.or_, queries)) + # generator which for each term builds the corresponding search + conditions = ( + reduce( + operator.or_, + (models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups) + ) for term in search_smart_split(search_terms) + ) queryset = queryset.filter(reduce(operator.and_, conditions)) + # Remove duplicates from results, if necessary if self.must_call_distinct(queryset, search_fields): - # Filtering against a many-to-many field requires us to - # call queryset.distinct() in order to avoid duplicate items - # in the resulting queryset. - # We try to avoid this if possible, for performance reasons. - queryset = distinct(queryset, base) + # inspired by django.contrib.admin + # this is more accurate than .distinct form M2M relationship + # also is cross-database + queryset = queryset.filter(pk=models.OuterRef('pk')) + queryset = base.filter(models.Exists(queryset)) return queryset def to_html(self, request, queryset, view): diff --git a/tests/test_filters.py b/tests/test_filters.py index 2a22e30f97..6db0c3deb2 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -6,16 +6,36 @@ from django.db import models from django.db.models import CharField, Transform from django.db.models.functions import Concat, Upper -from django.test import TestCase +from django.test import SimpleTestCase, TestCase from django.test.utils import override_settings from rest_framework import filters, generics, serializers from rest_framework.compat import coreschema +from rest_framework.exceptions import ValidationError from rest_framework.test import APIRequestFactory factory = APIRequestFactory() +class SearchSplitTests(SimpleTestCase): + + def test_keep_quoted_togheter_regardless_of_commas(self): + assert ['hello, world'] == list(filters.search_smart_split('"hello, world"')) + + def test_strips_commas_around_quoted(self): + assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world"')) + assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world",,')) + assert ['hello, world'] == list(filters.search_smart_split('"hello, world",,')) + + def test_splits_by_comma(self): + assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world')) + assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world,,')) + assert ['hello', 'world'] == list(filters.search_smart_split('hello, world,,')) + + def test_splits_quotes_followed_by_comma_and_sentence(self): + assert ['"hello', 'world"', 'found'] == list(filters.search_smart_split('"hello, world",found')) + + class BaseFilterTests(TestCase): def setUp(self): self.original_coreapi = filters.coreapi @@ -50,7 +70,8 @@ class Meta: class SearchFilterTests(TestCase): - def setUp(self): + @classmethod + def setUpTestData(cls): # Sequence of title/text is: # # z abc @@ -66,6 +87,9 @@ def setUp(self): ) SearchFilterModel(title=title, text=text).save() + SearchFilterModel(title='A title', text='The long text').save() + SearchFilterModel(title='The title', text='The "text').save() + def test_search(self): class SearchListView(generics.ListAPIView): queryset = SearchFilterModel.objects.all() @@ -186,9 +210,21 @@ def test_search_field_with_null_characters(self): request = factory.get('/?search=\0as%00d\x00f') request = view.initialize_request(request) - terms = filters.SearchFilter().get_search_terms(request) + with self.assertRaises(ValidationError): + filters.SearchFilter().get_search_terms(request) - assert terms == ['asdf'] + def test_search_field_with_custom_lookup(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('text__iendswith',) + view = SearchListView.as_view() + request = factory.get('/', {'search': 'c'}) + response = view(request) + assert response.data == [ + {'id': 1, 'title': 'z', 'text': 'abc'}, + ] def test_search_field_with_additional_transforms(self): from django.test.utils import register_lookup @@ -242,6 +278,32 @@ class SearchListView(generics.ListAPIView): ) assert search_query in rendered_search_field + def test_search_field_with_escapes(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('title', 'text',) + view = SearchListView.as_view() + request = factory.get('/', {'search': '"\\\"text"'}) + response = view(request) + assert response.data == [ + {'id': 12, 'title': 'The title', 'text': 'The "text'}, + ] + + def test_search_field_with_quotes(self): + class SearchListView(generics.ListAPIView): + queryset = SearchFilterModel.objects.all() + serializer_class = SearchFilterSerializer + filter_backends = (filters.SearchFilter,) + search_fields = ('title', 'text',) + view = SearchListView.as_view() + request = factory.get('/', {'search': '"long text"'}) + response = view(request) + assert response.data == [ + {'id': 11, 'title': 'A title', 'text': 'The long text'}, + ] + class AttributeModel(models.Model): label = models.CharField(max_length=32) @@ -284,6 +346,13 @@ def test_must_call_distinct_restores_meta_for_each_field(self): ["%sattribute__label" % prefix, "%stitle" % prefix] ) + def test_custom_lookup_to_related_model(self): + # In this test case the attribute of the fk model comes first in the + # list of search fields. + filter_ = filters.SearchFilter() + assert 'attribute__label__icontains' == filter_.construct_search('attribute__label', SearchFilterModelFk._meta) + assert 'attribute__label__iendswith' == filter_.construct_search('attribute__label__iendswith', SearchFilterModelFk._meta) + class SearchFilterModelM2M(models.Model): title = models.CharField(max_length=20)