From 54f0106f2956b968164eb0dbe7f3442c39da1e30 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Fri, 22 Jun 2018 22:12:05 +0600 Subject: [PATCH] Implement exact match search and rewrite for operator ordering --- readthedocs/search/faceted_search.py | 22 ++++++++++++++++++++++ readthedocs/search/tests/test_views.py | 17 +++++++++++++++++ tox.ini | 1 - 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 56a1d3dec..95a074ef4 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,4 +1,5 @@ from elasticsearch_dsl import FacetedSearch, TermsFacet +from elasticsearch_dsl.query import SimpleQueryString, Bool class RTDFacetedSearch(FacetedSearch): @@ -29,3 +30,24 @@ class FileSearch(RTDFacetedSearch): 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') } + + def query(self, search, query): + """ + Add query part to ``search``. + """ + + if query: + all_queries = [] + + # Need to search for both 'AND' and 'OR' operations + # The score of AND should be higher as it comes first + for operator in ['AND', 'OR']: + query_string = SimpleQueryString(query=query, fields=self.fields, + default_operator=operator) + all_queries.append(query_string) + + # Run bool query with should, so it returns result where either of the query matches + bool_query = Bool(should=all_queries) + search = search.query(bool_query) + + return search diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index cda207fb4..1c7d9a18b 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -104,6 +104,23 @@ class TestPageSearch(object): # Check the actual text is in the result, not the cased one assert query_text in result.text() + def test_file_search_exact_match(self, client, project): + """Check quoted query match exact phrase + + Making a query with quoted text like *"foo bar"* should match + exactly *foo bar* phrase. + """ + + # `Github` word is present both in `kuma` and `pipeline` files + # But the phrase Github can is available only in kuma docs. + # So search with this phrase to check + query = r'"GitHub can"' + + result, _ = self._get_search_result(url=self.url, client=client, + search_params={'q': query, 'type': 'file'}) + + assert len(result) == 1 + def test_page_search_not_return_removed_page(self, client, project): """Check removed page are not in the search index""" query = get_search_query_from_project_file(project_slug=project.slug) diff --git a/tox.ini b/tox.ini index c7e5b7487..821fded07 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,6 @@ setenv = DJANGO_SETTINGS_MODULE=readthedocs.settings.test LANG=C LC_CTYPE=C.UTF-8 - DJANGO_SETTINGS_SKIP_LOCAL=True deps = -r{toxinidir}/requirements/testing.txt changedir = {toxinidir}/readthedocs commands =