From d47abcb4c58a6e639e4733868c3c6d7cde0c7ee9 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 23 Jun 2026 11:46:56 +0100 Subject: [PATCH 1/2] Fix for #493 Antiquarian name matches come higher in search results --- src/rard/research/tests/views/test_search.py | 54 ++++++++++++++++++++ src/rard/research/views/search.py | 50 ++++++++++++++---- 2 files changed, 95 insertions(+), 9 deletions(-) diff --git a/src/rard/research/tests/views/test_search.py b/src/rard/research/tests/views/test_search.py index e6464ebd..9084c013 100644 --- a/src/rard/research/tests/views/test_search.py +++ b/src/rard/research/tests/views/test_search.py @@ -1,6 +1,7 @@ import pytest from django.conf import settings from django.contrib.auth.models import AnonymousUser +from django.http.request import HttpRequest from django.test import RequestFactory, TestCase from django.urls import reverse @@ -403,6 +404,59 @@ def do_search(search_function, keywords): self.assertEqual(do_search(view.citing_work_search, "*xth"), [cw2]) self.assertCountEqual(do_search(view.citing_work_search, "?ook"), [cw1, cw2]) + def search_all(self, keywords: str) -> HttpRequest: + req = HttpRequest() + req.build_absolute_uri("https://localhost/search") + req.GET["q"] = keywords + view = SearchView() + view.kwargs = {} + view.request = req + resp = view.get(req) + return resp.context_data["object_list"] + + def test_search_heavy(self): + # Run searches where the antiquarians should come first + # a2 should have a lower pk than w1 and f2 so that they come first + # if no weighting is applied + a2 = Antiquarian.objects.create(name="other antiquarian", re_code="2") + a1 = Antiquarian.objects.create(name="antiquarian one", re_code="1") + a2.introduction.content = "Two is one more than one." + a2.save() + w1 = Work.objects.create(name="work one") + w2 = Work.objects.create(name="work other two") + cw = CitingWork.objects.create(title="citing_work one") + f2 = Fragment.objects.create() + f2.original_texts.create(content="fragment other two", citing_work=cw) + f1 = Fragment.objects.create() + f1.original_texts.create(content="fragment one", citing_work=cw) + tm1 = Testimonium.objects.create() + tm1.original_texts.create(content="testimonium one", citing_work=cw) + f3 = Fragment.objects.create() + o1 = f3.original_texts.create(content="content", citing_work=cw) + o1.apparatus_criticus_items.create(content="one stuff") + + af1 = AnonymousFragment.objects.create() + o1 = af1.original_texts.create(content="almost one raddish", citing_work=cw) + + # When searching for an antiquarian by part of his name... + results = self.search_all("one") + # All these objects are returned + expected = {a1, a2, w1, a1.unknown_work, cw, f1, tm1, f3, af1} + self.assertSetEqual(set(results), expected) + # But the antiquarian that matched in the name comes first + self.assertEqual(results[0], a1) + # But when finding the aquarian by a word in its introduction... + results2 = self.search_all("two") + # All these objects are returned + self.assertSetEqual(set(results2), {a2, w2, f2}) + # And the antiquarian is not first + self.assertNotEqual(results2[0], a2) + results3 = self.search_all("other") + # All these objects are returned + self.assertSetEqual(set(results3), {a2, a2.unknown_work, w2, f2}) + # But the antiquarian is first this time because "other" matches the name + self.assertEqual(results3[0], a2) + def test_wildcards(self): # Run a particular search and return a list of results def do_search(search_function, keywords): diff --git a/src/rard/research/views/search.py b/src/rard/research/views/search.py index 31a0a899..39e2ad56 100644 --- a/src/rard/research/views/search.py +++ b/src/rard/research/views/search.py @@ -207,7 +207,8 @@ def do_match( query_string: str, matcher: Callable[[str], Q], keyword_list: Iterable[str], - add_snippet=False, + add_snippet: bool = False, + weight: int = 1, ) -> QuerySet: """ Get the queryset for this match portion. @@ -221,6 +222,7 @@ def do_match( expression for whether the field matches the query. :param keywords: The user's query string; a string of keywords. :param add_snippet: Should we add a snippet to the resulting queryset? + :param weight: Weight to annotate the results with. :return: The queryset of results and snippets. """ matches = query_set.filter(matcher(f"{query_string}__{self.lookup}")) @@ -229,7 +231,7 @@ def do_match( if add_snippet else Value("") ) - matches = matches.annotate(snippet=snippet) + matches = matches.annotate(snippet=snippet, weight=Value(weight)) return matches def snippet_query(self, keyword_list: str, query_string: str) -> Expression: @@ -304,7 +306,7 @@ def match( .. code-block:: python - results = term.match('foreign_key_1__foreign_key_2__field') + results = terms.match('foreign_key_1__foreign_key_2__field') :param query_set: The query set to be searched. :param query_string: A lookup parameter for the field to be searched. @@ -320,6 +322,32 @@ def match( add_snippet=add_snippet, ) + def match_heavy( + self, query_set: QuerySet, query_string: str, add_snippet: bool = False + ) -> QuerySet: + """ + Get the queryset for matching one type of objects, without Latin folding, + with extra weight (so they appear at the top of the) + + .. code-block:: python + + results = terms.match('foreign_key_1__foreign_key_2__field') + + :param query_set: The query set to be searched. + :param query_string: A lookup parameter for the field to be searched. + :param add_snippet: Should we add a snippet to the resulting queryset? + :return: The queryset of results and snippets. The snippet annotation + (if present) has the name ``snippet``. + """ + return self.do_match( + query_set, + query_string, + self.nonfolded_matcher, + self.keywords, + add_snippet=add_snippet, + weight=100, + ) + def match_folded( self, query_set: QuerySet, query_string: str, add_snippet: bool = False ) -> QuerySet: @@ -328,7 +356,7 @@ def match_folded( .. code-block:: python - results = term.match_folded('foreign_key_1__foreign_key_2__field') + results = terms.match_folded('foreign_key_1__foreign_key_2__field') :param query_set: The query set to be searched. :param query_string: A lookup parameter for the field to be searched. @@ -488,9 +516,9 @@ def antiquarian_search( """ qs = cls.get_filtered_model_qs(Antiquarian, ant_filter=ant_filter) search_fields = [ - ("name", terms.match), + ("name", terms.match_heavy), ("plain_introduction", terms.match), - ("re_code", terms.match), + ("re_code", terms.match_heavy), ] return cls.generic_content_search(qs, search_fields) @@ -831,10 +859,10 @@ def get_filtered_model_qs( def get(self, request, *args, **kwargs): """Perform the search for the user.""" - keywords = self.request.GET.get("q", None) + keywords = request.GET.get("q", None) if keywords is not None and keywords.strip() == "": # empty search field. Redirect to cleared page - ret = redirect(self.request.path) + ret = redirect(request.path) else: ret = super().get(request, *args, **kwargs) @@ -889,7 +917,11 @@ def get_queryset(self): queryset_chain = chain(*result_set) # return a list... - return sorted(queryset_chain, key=lambda instance: instance.pk, reverse=True) + return sorted( + queryset_chain, + key=lambda instance: (instance.weight, instance.pk), + reverse=True, + ) def antiquarians_and_authors_and_bibliographies_in_object_list(self, object_list): """Generate lists of Antiquarians, Citing Authors and Bibliographies From 595c00a64af12ecddef6321fab90f5ebccdd2d25 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 23 Jun 2026 12:04:36 +0100 Subject: [PATCH 2/2] Silence github security complaint --- src/rard/research/views/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rard/research/views/search.py b/src/rard/research/views/search.py index 39e2ad56..0aa35202 100644 --- a/src/rard/research/views/search.py +++ b/src/rard/research/views/search.py @@ -862,7 +862,8 @@ def get(self, request, *args, **kwargs): keywords = request.GET.get("q", None) if keywords is not None and keywords.strip() == "": # empty search field. Redirect to cleared page - ret = redirect(request.path) + # Needs to be self.request or github-advanced-security complains + ret = redirect(self.request.path) else: ret = super().get(request, *args, **kwargs)