diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py index 15bc810c9a..c1a46ad1d3 100644 --- a/cl/lib/search_utils.py +++ b/cl/lib/search_utils.py @@ -1,15 +1,54 @@ +import pickle import re -from typing import Any, Dict, List, Optional, Tuple, cast +from typing import Any, Dict, List, Optional, Tuple, TypedDict from urllib.parse import parse_qs, urlencode -from asgiref.sync import sync_to_async -from django.core.paginator import Page +from asgiref.sync import async_to_sync, sync_to_async +from django.conf import settings +from django.core.cache import cache +from django.core.paginator import EmptyPage, Page, PageNotAnInteger from django.http import HttpRequest +from django.http.request import QueryDict +from django_elasticsearch_dsl.search import Search +from eyecite.models import FullCaseCitation from eyecite.tokenizers import HyperscanTokenizer +from cl.citations.match_citations_queries import es_get_query_citation from cl.citations.utils import get_citation_depth_between_clusters -from cl.lib.types import SearchParam -from cl.search.forms import SearchForm +from cl.lib.crypto import sha256 +from cl.lib.elasticsearch_utils import ( + build_es_main_query, + compute_lowest_possible_estimate, + convert_str_date_fields_to_date_objects, + fetch_es_results, + get_facet_dict_for_search_query, + limit_inner_hits, + merge_courts_from_db, + merge_unavailable_fields_on_parent_document, + set_results_highlights, + simplify_estimated_count, +) +from cl.lib.paginators import ESPaginator +from cl.lib.types import CleanData +from cl.lib.utils import ( + sanitize_unbalanced_parenthesis, + sanitize_unbalanced_quotes, +) +from cl.search.constants import RELATED_PATTERN +from cl.search.documents import ( + AudioDocument, + DocketDocument, + OpinionClusterDocument, + ParentheticalGroupDocument, + PersonDocument, +) +from cl.search.exception import ( + BadProximityQuery, + DisallowedWildcardPattern, + UnbalancedParenthesesQuery, + UnbalancedQuotesQuery, +) +from cl.search.forms import SearchForm, _clean_form from cl.search.models import ( SEARCH_TYPES, Court, @@ -261,3 +300,323 @@ def store_search_api_query( source=SearchQuery.API, engine=engine, ) + + +class CachedESSearchResults(TypedDict): + results: Page | list + main_total: int | None + child_total: int | None + + +def retrieve_cached_search_results( + get_params: QueryDict, +) -> tuple[CachedESSearchResults | None, str]: + """ + Retrieve cached search results based on the GET parameters. + + :param get_params: The GET parameters provided by the user. + :return: A two-tuple containing either the cached search results and the + cache key based ona prefix and the get parameters, or None and the cache key + if no cached results were found. + """ + + params = get_params.copy() + # If no page is present in the parameters, set it to 1 to generate the same + # hash for page 1, regardless of whether the page parameter is included. + # Apply the same to the q parameter when it is not present in params. + params.setdefault("page", "1") + params.setdefault("q", "") + sorted_params = dict(sorted(params.items())) + key_prefix = "search_results_cache:" + params_hash = sha256(pickle.dumps(sorted_params)) + cache_key = f"{key_prefix}{params_hash}" + cached_results = cache.get(cache_key) + if cached_results: + return pickle.loads(cached_results), cache_key + return None, cache_key + + +def fetch_and_paginate_results( + get_params: QueryDict, + search_query: Search, + child_docs_count_query: Search | None, + rows_per_page: int = settings.SEARCH_PAGE_SIZE, + cache_key: str | None = None, +) -> tuple[Page | list, int, bool, int | None, int | None]: + """Fetch and paginate elasticsearch results. + + :param get_params: The user get params. + :param search_query: Elasticsearch DSL Search object + :param child_docs_count_query: The ES DSL Query to perform the count for + child documents if required, otherwise None. + :param rows_per_page: Number of records wanted per page + :param cache_key: The cache key to use. + :return: A five-tuple: the paginated results, the ES query time, whether + there was an error, the total number of hits for the main document, and + the total number of hits for the child document. + """ + + # Run the query and set up pagination + if cache_key is not None: + # Check cache for displaying insights on the Home Page. + results = cache.get(cache_key) + if results is not None: + return results, 0, False, None, None + + # Check micro-cache for all other search requests. + results_dict, micro_cache_key = retrieve_cached_search_results(get_params) + if results_dict: + # Return results and counts. Set query time to 1ms. + return ( + results_dict["results"], + 1, + False, + results_dict["main_total"], + results_dict["child_total"], + ) + + try: + page = int(get_params.get("page", 1)) + except ValueError: + page = 1 + + # Check pagination depth + check_pagination_depth(page) + + # Fetch results from ES + hits, query_time, error, main_total, child_total = fetch_es_results( + get_params, search_query, child_docs_count_query, page, rows_per_page + ) + + if error: + return [], query_time, error, main_total, child_total + paginator = ESPaginator(main_total, hits, rows_per_page) + try: + results = paginator.page(page) + except PageNotAnInteger: + results = paginator.page(1) + except EmptyPage: + results = paginator.page(paginator.num_pages) + + search_type = get_params.get("type", SEARCH_TYPES.OPINION) + # Set highlights in results. + convert_str_date_fields_to_date_objects(results, search_type) + merge_courts_from_db(results, search_type) + limit_inner_hits(get_params, results, search_type) + set_results_highlights(results, search_type) + merge_unavailable_fields_on_parent_document(results, search_type) + + if cache_key is not None: + # Cache only Page results for displaying insights on the Home Page. + cache.set(cache_key, results, settings.QUERY_RESULTS_CACHE) + elif settings.ELASTICSEARCH_MICRO_CACHE_ENABLED: + # Cache Page results and counts for all other search requests. + results_dict = { + "results": results, + "main_total": main_total, + "child_total": child_total, + } + serialized_data = pickle.dumps(results_dict) + cache.set( + micro_cache_key, + serialized_data, + settings.SEARCH_RESULTS_MICRO_CACHE, + ) + + return results, query_time, error, main_total, child_total + + +def remove_missing_citations( + missing_citations: list[FullCaseCitation], cd: CleanData +) -> tuple[list[str], str]: + """Removes missing citations from the query and returns the missing + citations as strings and the modified query. + + :param missing_citations: A list of FullCaseCitation objects representing + the citations that are missing from the query. + :param cd: A CleanData object containing the query string. + :return: A two-tuple containing a list of missing citation strings and the + suggested query string with missing citations removed. + """ + missing_citations_str = [ + citation.corrected_citation() for citation in missing_citations + ] + query_string = cd["q"] + for citation in missing_citations_str: + query_string = query_string.replace(citation, "") + suggested_query = ( + " ".join(query_string.split()) if missing_citations_str else "" + ) + return missing_citations_str, suggested_query + + +def do_es_search( + get_params: QueryDict, + rows: int = settings.SEARCH_PAGE_SIZE, + facet: bool = True, + cache_key: str | None = None, +): + """Run Elasticsearch searching and filtering and prepare data to display + + :param get_params: The request.GET params sent by user. + :param rows: The number of Elasticsearch results to request + :param facet: Whether to complete faceting in the query + :param cache_key: A cache key with which to save the results. Note that it + does not do anything clever with the actual query, so if you use this, your + cache key should *already* have factored in the query. If None, no caching + is set or used. Results are saved for six hours. + :return: A big dict of variables for use in the search results, homepage, or + other location. + """ + paged_results = None + courts = Court.objects.filter(in_use=True) + query_time: int | None = 0 + total_query_results: int | None = 0 + top_hits_limit: int | None = 5 + document_type = None + error_message = "" + suggested_query = "" + total_child_results: int | None = 0 + related_cluster = None + cited_cluster = None + query_citation = None + facet_fields = [] + missing_citations_str: list[str] = [] + error = True + + search_form = SearchForm(get_params, courts=courts) + match get_params.get("type", SEARCH_TYPES.OPINION): + case SEARCH_TYPES.PARENTHETICAL: + document_type = ParentheticalGroupDocument + case SEARCH_TYPES.ORAL_ARGUMENT: + document_type = AudioDocument + case SEARCH_TYPES.PEOPLE: + document_type = PersonDocument + case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: + document_type = DocketDocument + # Set a different number of results per page for RECAP SEARCH + rows = settings.RECAP_SEARCH_PAGE_SIZE + case SEARCH_TYPES.OPINION: + document_type = OpinionClusterDocument + + if search_form.is_valid() and document_type: + # Copy cleaned_data to preserve the original data when displaying the form + cd = search_form.cleaned_data.copy() + try: + # Create necessary filters to execute ES query + search_query = document_type.search() + + if cd["type"] in [ + SEARCH_TYPES.OPINION, + SEARCH_TYPES.RECAP, + SEARCH_TYPES.DOCKETS, + ]: + query_citation, missing_citations = es_get_query_citation(cd) + if cd["type"] in [ + SEARCH_TYPES.OPINION, + ]: + missing_citations_str, suggested_query = ( + remove_missing_citations(missing_citations, cd) + ) + cd["q"] = suggested_query if suggested_query else cd["q"] + ( + s, + child_docs_count_query, + top_hits_limit, + ) = build_es_main_query(search_query, cd) + ( + paged_results, + query_time, + error, + total_query_results, + total_child_results, + ) = fetch_and_paginate_results( + get_params, + s, + child_docs_count_query, + rows_per_page=rows, + cache_key=cache_key, + ) + cited_cluster = async_to_sync(add_depth_counts)( + # Also returns cited cluster if found + search_data=cd, + search_results=paged_results, + ) + related_prefix = RELATED_PATTERN.search(cd["q"]) + if related_prefix: + related_pks = related_prefix.group("pks").split(",") + related_cluster = OpinionCluster.objects.filter( + sub_opinions__pk__in=related_pks + ).distinct("pk") + except UnbalancedParenthesesQuery as e: + error = True + error_message = "unbalanced_parentheses" + if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: + suggested_query = sanitize_unbalanced_parenthesis( + cd.get("q", "") + ) + except UnbalancedQuotesQuery as e: + error = True + error_message = "unbalanced_quotes" + if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: + suggested_query = sanitize_unbalanced_quotes(cd.get("q", "")) + except BadProximityQuery as e: + error = True + error_message = "bad_proximity_token" + suggested_query = "proximity_filter" + if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: + suggested_query = "proximity_query" + except DisallowedWildcardPattern: + error = True + error_message = "disallowed_wildcard_pattern" + finally: + # Make sure to always call the _clean_form method + search_form = _clean_form( + get_params, search_form.cleaned_data, courts + ) + if cd["type"] in [SEARCH_TYPES.OPINION] and facet: + # If the search query is valid, pass the cleaned data to filter and + # retrieve the correct number of opinions per status. Otherwise (if + # the query has errors), just provide a dictionary containing the + # search type to get the total number of opinions per status + facet_fields = get_facet_dict_for_search_query( + search_query, + cd if not error else {"type": cd["type"]}, + search_form, + ) + + courts, court_count_human, court_count = merge_form_with_courts( + courts, search_form + ) + search_summary_str = search_form.as_text(court_count_human) + search_summary_dict = search_form.as_display_dict(court_count_human) + results_details = [ + query_time, + total_query_results, + top_hits_limit, + total_child_results, + ] + + return { + "results": paged_results, + "results_details": results_details, + "search_form": search_form, + "search_summary_str": search_summary_str, + "search_summary_dict": search_summary_dict, + "error": error, + "courts": courts, + "court_count_human": court_count_human, + "court_count": court_count, + "query_citation": query_citation, + "cited_cluster": cited_cluster, + "related_cluster": related_cluster, + "facet_fields": facet_fields, + "error_message": error_message, + "suggested_query": suggested_query, + "estimated_count_threshold": simplify_estimated_count( + compute_lowest_possible_estimate( + settings.ELASTICSEARCH_CARDINALITY_PRECISION + ) + ), + "missing_citations": missing_citations_str, + } diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py index 32fee0691b..629f7686ed 100644 --- a/cl/opinion_page/views.py +++ b/cl/opinion_page/views.py @@ -57,7 +57,7 @@ from cl.lib.model_helpers import choices_to_csv from cl.lib.models import THUMBNAIL_STATUSES from cl.lib.ratelimiter import ratelimiter_all_10_per_h -from cl.lib.search_utils import make_get_string +from cl.lib.search_utils import do_es_search, make_get_string from cl.lib.string_utils import trunc from cl.lib.thumbnails import make_png_thumbnail_for_instance from cl.lib.url_utils import get_redirect_or_abort @@ -98,7 +98,6 @@ RECAPDocument, ) from cl.search.selectors import get_clusters_from_citation_str -from cl.search.views import do_es_search HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan") diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py index 933913ea7e..3cead60fe3 100644 --- a/cl/search/tests/tests_es_recap.py +++ b/cl/search/tests/tests_es_recap.py @@ -2584,7 +2584,7 @@ def test_initial_document_button(self) -> None: for docket in dockets_to_remove: docket.delete() - @mock.patch("cl.search.views.fetch_es_results") + @mock.patch("cl.lib.search_utils.fetch_es_results") @override_settings( RECAP_SEARCH_PAGE_SIZE=2, ELASTICSEARCH_MICRO_CACHE_ENABLED=True ) @@ -7162,7 +7162,7 @@ def test_search_pagination_results_limit(self) -> None: # 100 results, 10 pages. total_results = 100 with mock.patch( - "cl.search.views.fetch_es_results", + "cl.lib.search_utils.fetch_es_results", side_effect=lambda *x: ( [], 1, @@ -7182,7 +7182,7 @@ def test_search_pagination_results_limit(self) -> None: # 101 results, 11 pages. total_results = 101 with mock.patch( - "cl.search.views.fetch_es_results", + "cl.lib.search_utils.fetch_es_results", side_effect=lambda *x: ( [], 1, @@ -7202,7 +7202,7 @@ def test_search_pagination_results_limit(self) -> None: # 20,000 results, 2,000 pages. total_results = 20_000 with mock.patch( - "cl.search.views.fetch_es_results", + "cl.lib.search_utils.fetch_es_results", side_effect=lambda *x: ( [], 1, diff --git a/cl/search/views.py b/cl/search/views.py index 4c10e94659..6f6df3f529 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -1,5 +1,4 @@ import logging -import pickle from datetime import date, datetime, timedelta, timezone from urllib.parse import quote @@ -8,70 +7,32 @@ from django.conf import settings from django.contrib import messages from django.contrib.auth.models import User -from django.core.cache import cache from django.core.exceptions import PermissionDenied -from django.core.paginator import EmptyPage, Page, PageNotAnInteger from django.db.models import Count, Sum from django.http import HttpRequest, HttpResponse -from django.http.request import QueryDict from django.shortcuts import HttpResponseRedirect, get_object_or_404, render from django.template.response import TemplateResponse from django.urls import reverse from django.utils.timezone import make_aware from django.views.decorators.cache import never_cache -from django_elasticsearch_dsl.search import Search -from eyecite.models import FullCaseCitation from waffle.decorators import waffle_flag from cl.alerts.forms import CreateAlertForm from cl.alerts.models import Alert from cl.audio.models import Audio -from cl.citations.match_citations_queries import es_get_query_citation from cl.custom_filters.templatetags.text_filters import naturalduration from cl.lib.bot_detector import is_bot -from cl.lib.crypto import sha256 -from cl.lib.elasticsearch_utils import ( - build_es_main_query, - compute_lowest_possible_estimate, - convert_str_date_fields_to_date_objects, - fetch_es_results, - get_facet_dict_for_search_query, - get_only_status_facets, - limit_inner_hits, - merge_courts_from_db, - merge_unavailable_fields_on_parent_document, - set_results_highlights, - simplify_estimated_count, -) -from cl.lib.paginators import ESPaginator +from cl.lib.elasticsearch_utils import get_only_status_facets from cl.lib.redis_utils import get_redis_interface from cl.lib.search_utils import ( - add_depth_counts, + do_es_search, make_get_string, merge_form_with_courts, store_search_query, ) -from cl.lib.types import CleanData -from cl.lib.utils import ( - sanitize_unbalanced_parenthesis, - sanitize_unbalanced_quotes, -) -from cl.search.constants import RELATED_PATTERN -from cl.search.documents import ( - AudioDocument, - DocketDocument, - OpinionClusterDocument, - ParentheticalGroupDocument, - PersonDocument, -) -from cl.search.exception import ( - BadProximityQuery, - DisallowedWildcardPattern, - UnbalancedParenthesesQuery, - UnbalancedQuotesQuery, -) +from cl.search.documents import OpinionClusterDocument from cl.search.forms import SearchForm, _clean_form -from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster +from cl.search.models import SEARCH_TYPES, Court, Opinion from cl.stats.models import Stat from cl.stats.utils import tally_stat from cl.visualizations.models import SCOTUSMap @@ -416,316 +377,3 @@ def es_search(request: HttpRequest) -> HttpResponse: ) return render(request, template, render_dict) - - -def remove_missing_citations( - missing_citations: list[FullCaseCitation], cd: CleanData -) -> tuple[list[str], str]: - """Removes missing citations from the query and returns the missing - citations as strings and the modified query. - - :param missing_citations: A list of FullCaseCitation objects representing - the citations that are missing from the query. - :param cd: A CleanData object containing the query string. - :return: A two-tuple containing a list of missing citation strings and the - suggested query string with missing citations removed. - """ - missing_citations_str = [ - citation.corrected_citation() for citation in missing_citations - ] - query_string = cd["q"] - for citation in missing_citations_str: - query_string = query_string.replace(citation, "") - suggested_query = ( - " ".join(query_string.split()) if missing_citations_str else "" - ) - return missing_citations_str, suggested_query - - -def do_es_search( - get_params: QueryDict, - rows: int = settings.SEARCH_PAGE_SIZE, - facet: bool = True, - cache_key: str = None, -): - """Run Elasticsearch searching and filtering and prepare data to display - - :param get_params: The request.GET params sent by user. - :param rows: The number of Elasticsearch results to request - :param facet: Whether to complete faceting in the query - :param cache_key: A cache key with which to save the results. Note that it - does not do anything clever with the actual query, so if you use this, your - cache key should *already* have factored in the query. If None, no caching - is set or used. Results are saved for six hours. - :return: A big dict of variables for use in the search results, homepage, or - other location. - """ - paged_results = None - courts = Court.objects.filter(in_use=True) - query_time = total_query_results = 0 - top_hits_limit = 5 - document_type = None - error_message = "" - suggested_query = "" - total_child_results = 0 - related_cluster = None - cited_cluster = None - query_citation = None - facet_fields = [] - missing_citations_str = [] - error = True - - search_form = SearchForm(get_params, courts=courts) - match get_params.get("type", SEARCH_TYPES.OPINION): - case SEARCH_TYPES.PARENTHETICAL: - document_type = ParentheticalGroupDocument - case SEARCH_TYPES.ORAL_ARGUMENT: - document_type = AudioDocument - case SEARCH_TYPES.PEOPLE: - document_type = PersonDocument - case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: - document_type = DocketDocument - # Set a different number of results per page for RECAP SEARCH - rows = settings.RECAP_SEARCH_PAGE_SIZE - case SEARCH_TYPES.OPINION: - document_type = OpinionClusterDocument - - if search_form.is_valid() and document_type: - # Copy cleaned_data to preserve the original data when displaying the form - cd = search_form.cleaned_data.copy() - try: - # Create necessary filters to execute ES query - search_query = document_type.search() - - if cd["type"] in [ - SEARCH_TYPES.OPINION, - SEARCH_TYPES.RECAP, - SEARCH_TYPES.DOCKETS, - ]: - query_citation, missing_citations = es_get_query_citation(cd) - if cd["type"] in [ - SEARCH_TYPES.OPINION, - ]: - missing_citations_str, suggested_query = ( - remove_missing_citations(missing_citations, cd) - ) - cd["q"] = suggested_query if suggested_query else cd["q"] - ( - s, - child_docs_count_query, - top_hits_limit, - ) = build_es_main_query(search_query, cd) - ( - paged_results, - query_time, - error, - total_query_results, - total_child_results, - ) = fetch_and_paginate_results( - get_params, - s, - child_docs_count_query, - rows_per_page=rows, - cache_key=cache_key, - ) - cited_cluster = async_to_sync(add_depth_counts)( - # Also returns cited cluster if found - search_data=cd, - search_results=paged_results, - ) - related_prefix = RELATED_PATTERN.search(cd["q"]) - if related_prefix: - related_pks = related_prefix.group("pks").split(",") - related_cluster = OpinionCluster.objects.filter( - sub_opinions__pk__in=related_pks - ).distinct("pk") - except UnbalancedParenthesesQuery as e: - error = True - error_message = "unbalanced_parentheses" - if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: - suggested_query = sanitize_unbalanced_parenthesis( - cd.get("q", "") - ) - except UnbalancedQuotesQuery as e: - error = True - error_message = "unbalanced_quotes" - if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: - suggested_query = sanitize_unbalanced_quotes(cd.get("q", "")) - except BadProximityQuery as e: - error = True - error_message = "bad_proximity_token" - suggested_query = "proximity_filter" - if e.error_type == UnbalancedParenthesesQuery.QUERY_STRING: - suggested_query = "proximity_query" - except DisallowedWildcardPattern: - error = True - error_message = "disallowed_wildcard_pattern" - finally: - # Make sure to always call the _clean_form method - search_form = _clean_form( - get_params, search_form.cleaned_data, courts - ) - if cd["type"] in [SEARCH_TYPES.OPINION] and facet: - # If the search query is valid, pass the cleaned data to filter and - # retrieve the correct number of opinions per status. Otherwise (if - # the query has errors), just provide a dictionary containing the - # search type to get the total number of opinions per status - facet_fields = get_facet_dict_for_search_query( - search_query, - cd if not error else {"type": cd["type"]}, - search_form, - ) - - courts, court_count_human, court_count = merge_form_with_courts( - courts, search_form - ) - search_summary_str = search_form.as_text(court_count_human) - search_summary_dict = search_form.as_display_dict(court_count_human) - results_details = [ - query_time, - total_query_results, - top_hits_limit, - total_child_results, - ] - - return { - "results": paged_results, - "results_details": results_details, - "search_form": search_form, - "search_summary_str": search_summary_str, - "search_summary_dict": search_summary_dict, - "error": error, - "courts": courts, - "court_count_human": court_count_human, - "court_count": court_count, - "query_citation": query_citation, - "cited_cluster": cited_cluster, - "related_cluster": related_cluster, - "facet_fields": facet_fields, - "error_message": error_message, - "suggested_query": suggested_query, - "estimated_count_threshold": simplify_estimated_count( - compute_lowest_possible_estimate( - settings.ELASTICSEARCH_CARDINALITY_PRECISION - ) - ), - "missing_citations": missing_citations_str, - } - - -def retrieve_cached_search_results( - get_params: QueryDict, -) -> tuple[dict[str, Page | int] | None, str]: - """ - Retrieve cached search results based on the GET parameters. - - :param get_params: The GET parameters provided by the user. - :return: A two-tuple containing either the cached search results and the - cache key based ona prefix and the get parameters, or None and the cache key - if no cached results were found. - """ - - params = get_params.copy() - # If no page is present in the parameters, set it to 1 to generate the same - # hash for page 1, regardless of whether the page parameter is included. - # Apply the same to the q parameter when it is not present in params. - params.setdefault("page", "1") - params.setdefault("q", "") - sorted_params = dict(sorted(params.items())) - key_prefix = "search_results_cache:" - params_hash = sha256(pickle.dumps(sorted_params)) - cache_key = f"{key_prefix}{params_hash}" - cached_results = cache.get(cache_key) - if cached_results: - return pickle.loads(cached_results), cache_key - return None, cache_key - - -def fetch_and_paginate_results( - get_params: QueryDict, - search_query: Search, - child_docs_count_query: Search | None, - rows_per_page: int = settings.SEARCH_PAGE_SIZE, - cache_key: str = None, -) -> tuple[Page | list, int, bool, int | None, int | None]: - """Fetch and paginate elasticsearch results. - - :param get_params: The user get params. - :param search_query: Elasticsearch DSL Search object - :param child_docs_count_query: The ES DSL Query to perform the count for - child documents if required, otherwise None. - :param rows_per_page: Number of records wanted per page - :param cache_key: The cache key to use. - :return: A five-tuple: the paginated results, the ES query time, whether - there was an error, the total number of hits for the main document, and - the total number of hits for the child document. - """ - - # Run the query and set up pagination - if cache_key is not None: - # Check cache for displaying insights on the Home Page. - results = cache.get(cache_key) - if results is not None: - return results, 0, False, None, None - - # Check micro-cache for all other search requests. - results_dict, micro_cache_key = retrieve_cached_search_results(get_params) - if results_dict: - # Return results and counts. Set query time to 1ms. - return ( - results_dict["results"], - 1, - False, - results_dict["main_total"], - results_dict["child_total"], - ) - - try: - page = int(get_params.get("page", 1)) - except ValueError: - page = 1 - - # Check pagination depth - check_pagination_depth(page) - - # Fetch results from ES - hits, query_time, error, main_total, child_total = fetch_es_results( - get_params, search_query, child_docs_count_query, page, rows_per_page - ) - - if error: - return [], query_time, error, main_total, child_total - paginator = ESPaginator(main_total, hits, rows_per_page) - try: - results = paginator.page(page) - except PageNotAnInteger: - results = paginator.page(1) - except EmptyPage: - results = paginator.page(paginator.num_pages) - - search_type = get_params.get("type", SEARCH_TYPES.OPINION) - # Set highlights in results. - convert_str_date_fields_to_date_objects(results, search_type) - merge_courts_from_db(results, search_type) - limit_inner_hits(get_params, results, search_type) - set_results_highlights(results, search_type) - merge_unavailable_fields_on_parent_document(results, search_type) - - if cache_key is not None: - # Cache only Page results for displaying insights on the Home Page. - cache.set(cache_key, results, settings.QUERY_RESULTS_CACHE) - elif settings.ELASTICSEARCH_MICRO_CACHE_ENABLED: - # Cache Page results and counts for all other search requests. - results_dict = { - "results": results, - "main_total": main_total, - "child_total": child_total, - } - serialized_data = pickle.dumps(results_dict) - cache.set( - micro_cache_key, - serialized_data, - settings.SEARCH_RESULTS_MICRO_CACHE, - ) - - return results, query_time, error, main_total, child_total