From 4be870984321eeede5bbc04da350e7791d27230e Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 20 Dec 2024 12:15:24 -0500 Subject: [PATCH] wipwip --- .../index_strategy/trove_indexcard_flats.py | 7 +- .../index_strategy/trovesearch_denorm.py | 5 + tests/trove/render/_base.py | 15 +-- trove/render/_base.py | 17 ++- trove/trovesearch/page_cursor.py | 2 +- trove/trovesearch/search_handle.py | 101 +++++++++++------- trove/views/_responder.py | 4 +- trove/views/search.py | 36 +++++-- 8 files changed, 120 insertions(+), 67 deletions(-) diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py index 20574f9bf..30e34c2d5 100644 --- a/share/search/index_strategy/trove_indexcard_flats.py +++ b/share/search/index_strategy/trove_indexcard_flats.py @@ -546,6 +546,8 @@ def _valuesearch_handle( self._valuesearch_iri_result(_iri_bucket) for _iri_bucket in _bucket_page ], + index_strategy=self.index_strategy, + valuesearch_params=valuesearch_params, ) else: # assume date _year_buckets = ( @@ -558,6 +560,8 @@ def _valuesearch_handle( self._valuesearch_date_result(_year_bucket) for _year_bucket in _year_buckets ], + index_strategy=self.index_strategy, + valuesearch_params=valuesearch_params, ) def _valuesearch_iri_result(self, iri_bucket): @@ -660,7 +664,7 @@ def _iter_nested_date_filters(self, search_filter) -> Iterator[dict]: else: raise ValueError(f'invalid date filter operator (got {search_filter.operator})') - def _cardsearch_sort(self, sort_list: tuple[SortParam]): + def _cardsearch_sort(self, sort_list: tuple[SortParam, ...]): if not sort_list: return None return [ @@ -718,6 +722,7 @@ def _cardsearch_handle( search_result_page=_results, related_propertypath_results=_relatedproperty_list, cardsearch_params=cardsearch_params, + index_strategy=self.index_strategy, ) def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]: diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index acbcc7890..2bebaf78a 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -434,6 +434,8 @@ def _valuesearch_iris_response( self._valuesearch_iri_result(_iri_bucket) for _iri_bucket in _bucket_page ], + index_strategy=self, + valuesearch_params=valuesearch_params, ) def _valuesearch_dates_response( @@ -452,6 +454,8 @@ def _valuesearch_dates_response( self._valuesearch_date_result(_year_bucket) for _year_bucket in _year_buckets ], + index_strategy=self, + valuesearch_params=valuesearch_params, ) def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult: @@ -511,6 +515,7 @@ def _cardsearch_handle( search_result_page=_results, related_propertypath_results=_relatedproperty_list, cardsearch_params=cardsearch_params, + index_strategy=self, ) def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]: diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py index 0e31a17fa..9055ebd89 100644 --- a/tests/trove/render/_base.py +++ b/tests/trove/render/_base.py @@ -15,8 +15,8 @@ class TroveRendererTests(BasicInputOutputTestCase): def compute_output(self, given_input: RdfCase): _renderer = self.renderer_class( - response_focus_iri=given_input.focus, - response_tripledict=given_input.tripledict, + response_focus=given_input.focus, + response_gathering=..., ) return _renderer.render_document() @@ -30,17 +30,6 @@ def assert_outputs_equal(self, expected_output, actual_output) -> None: self._get_rendered_output(actual_output), ) - def missing_case_message(self, name: str, given_input) -> str: - _cls = self.__class__ - _actual_output = self.compute_output(given_input) - return '\n'.join(( - super().missing_case_message(name, given_input) - 'missing test case!', - f'\tadd "{name}" to {_cls.__module__}.{_cls.__qualname__}.expected_outputs', - '\tactual output, fwiw:', - self._get_rendered_output(_actual_output) - ))) - def _get_rendered_output(self, rendering: ProtoRendering): # for now, they always iter strings (update if/when bytes are in play) return ''.join(rendering.iter_content()) # type: ignore[arg-type] diff --git a/trove/render/_base.py b/trove/render/_base.py index 2b843a1a5..fb8c9a360 100644 --- a/trove/render/_base.py +++ b/trove/render/_base.py @@ -4,7 +4,10 @@ import json from typing import ClassVar -from primitive_metadata import primitive_rdf as rdf +from primitive_metadata import ( + gather, + primitive_rdf as rdf, +) from trove import exceptions as trove_exceptions from trove.vocab import mediatypes @@ -23,8 +26,8 @@ class BaseRenderer(abc.ABC): INDEXCARD_DERIVER_IRI: ClassVar[str | None] = None # instance fields - response_focus_iri: str - response_tripledict: rdf.RdfTripleDictionary = dataclasses.field(default_factory=dict) + response_focus: gather.Focus + response_gathering: gather.Gathering iri_shorthand: rdf.IriShorthand = NAMESPACES_SHORTHAND thesaurus_tripledict: rdf.RdfTripleDictionary = dataclasses.field(default_factory=lambda: TROVE_API_THESAURUS) @@ -36,6 +39,11 @@ def thesaurus(self): def response_data(self): return rdf.RdfGraph(self.response_tripledict) + @functools.cached_property + def response_tripledict(self) -> rdf.RdfTripleDictionary: + # TODO: self.response_gathering.ask_all_about or a default ask... + return self.response_gathering.leaf_a_record() + def simple_render_document(self) -> str: raise NotImplementedError @@ -50,7 +58,8 @@ def render_document(self) -> ProtoRendering: rendered_content=_content, ) - def render_error_document(self, error: trove_exceptions.TroveError) -> ProtoRendering: + @classmethod + def render_error_document(cls, error: trove_exceptions.TroveError) -> ProtoRendering: # may override, but default to jsonapi return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 mediatype=mediatypes.JSONAPI, diff --git a/trove/trovesearch/page_cursor.py b/trove/trovesearch/page_cursor.py index 7fb3c6cb0..1aeac7213 100644 --- a/trove/trovesearch/page_cursor.py +++ b/trove/trovesearch/page_cursor.py @@ -15,7 +15,7 @@ MAX_OFFSET = 9997 DEFAULT_PAGE_SIZE = 13 -MAX_PAGE_SIZE = 10000 +MAX_PAGE_SIZE = 101 @dataclasses.dataclass diff --git a/trove/trovesearch/search_handle.py b/trove/trovesearch/search_handle.py index e38c55023..cd6b8820b 100644 --- a/trove/trovesearch/search_handle.py +++ b/trove/trovesearch/search_handle.py @@ -1,8 +1,12 @@ from __future__ import annotations import dataclasses import functools -import itertools -from typing import Literal, Iterable, Union, Optional, Generator +from typing import ( + Generator, + Iterable, + Optional, + TYPE_CHECKING, +) from primitive_metadata import primitive_rdf @@ -11,28 +15,25 @@ ReproduciblyRandomSampleCursor, ) from trove.trovesearch.search_params import ( + BaseTroveParams, CardsearchParams, ValuesearchParams, ) from trove.vocab.namespaces import TROVE from trove.vocab.trove import trove_indexcard_namespace -# TODO: add `metadata={OWL.sameAs: ...}` to each field; use dataclass-to-rdf to simplify gatherers - - -BoundedCount = Union[ - int, # exact count, if less than ten thousands - Literal[TROVE['ten-thousands-and-more']], -] +if TYPE_CHECKING: + from share.search.index_strategy import IndexStrategy @dataclasses.dataclass class BasicSearchHandle: cursor: PageCursor - search_result_generator: Generator + index_strategy: IndexStrategy | None # TODO: make the handle the one that knows how to use the strategy + search_params: BaseTroveParams @property - def total_result_count(self) -> BoundedCount: + def total_result_count(self) -> primitive_rdf.Literal: return ( TROVE['ten-thousands-and-more'] if self.cursor.has_many_more() @@ -40,12 +41,33 @@ def total_result_count(self) -> BoundedCount: ) @functools.cached_property - def search_result_page(self) -> tuple: - # note: use either search_result_page or search_result_generator, not both - return tuple( - itertools.islice(self.search_result_generator, self.cursor.page_size) + def search_result_page(self) -> Iterable | None: + ... + + def iter_all_pages(self) -> Generator: + _handle: BasicSearchHandle | None = self + while _handle is not None: + yield from _handle.search_result_page + _handle = _handle.get_next() + + def get_next(self) -> BasicSearchHandle | None: + _next_cursor = self.cursor.next_cursor() + return ( + None + if _next_cursor is None + else dataclasses.replace( + self, + cursor=_next_cursor, + **self._next_replace_kwargs(), + ) ) + def _next_replace_kwargs(self) -> dict: + return { + 'cursor': self.cursor.next_cursor(), + 'search_result_page': None, + } + @dataclasses.dataclass class CardsearchHandle(BasicSearchHandle): @@ -54,35 +76,38 @@ class CardsearchHandle(BasicSearchHandle): def __post_init__(self): _cursor = self.cursor - if ( # TODO: move this logic into the... index strategy? + _page = self.search_result_page + if ( # TODO: move this logic into the... cursor? isinstance(_cursor, ReproduciblyRandomSampleCursor) and _cursor.is_first_page() - and not _cursor.first_page_ids - and not _cursor.has_many_more() - ): - _cursor.first_page_ids = [_result.card_id for _result in self.search_result_page] - - @functools.cached_property - def search_result_page(self) -> tuple: - _page = super().search_result_page - if ( - isinstance(self.cursor, ReproduciblyRandomSampleCursor) - and self.cursor.is_first_page() - and self.cursor.first_page_ids + and _page is not None ): - # revisiting first page; reproduce original random order - _ordering_by_id = { - _id: _i - for (_i, _id) in enumerate(self.cursor.first_page_ids) - } - return tuple( - sorted( + if _cursor.first_page_ids: + # revisiting first page; reproduce original random order + _ordering_by_id = { + _id: _i + for (_i, _id) in enumerate(_cursor.first_page_ids) + } + self.search_result_page = sorted( _page, key=lambda _r: _ordering_by_id[_r.card_id], - ), - ) + ) + elif not _cursor.has_many_more(): + # visiting first page for the first time + _cursor.first_page_ids = [_result.card_id for _result in _page] return _page + def _next_replace_kwargs(self) -> dict: + _next_kwargs = super()._next_replace_kwargs() + return { + **_next_kwargs, + 'related_propertypath_results': [], + 'cardsearch_params': dataclasses.replace( + self.cardsearch_params, + page_cursor=_next_kwargs['cursor'], + ), + } + @dataclasses.dataclass class ValuesearchHandle(BasicSearchHandle): @@ -133,7 +158,7 @@ class ValuesearchResult: total_count: int = 0 def __post_init__(self): - assert self.value_iri or self.value_value, ( + assert (self.value_iri is not None) or (self.value_value is not None), ( f'either value_iri or value_value required (on {self})' ) diff --git a/trove/views/_responder.py b/trove/views/_responder.py index e9965a3b7..3702085b8 100644 --- a/trove/views/_responder.py +++ b/trove/views/_responder.py @@ -32,10 +32,10 @@ def make_http_response( def make_http_error_response( *, error: TroveError, - renderer: BaseRenderer, + renderer_type: type[BaseRenderer], http_headers: typing.Iterable[tuple[str, str]] = () ) -> djhttp.HttpResponse: - _content_rendering = renderer.render_error_document(error) + _content_rendering = renderer_type.render_error_document(error) return djhttp.HttpResponse( _content_rendering.iter_content(), status=error.http_status, diff --git a/trove/views/search.py b/trove/views/search.py index f669d219b..a26f236be 100644 --- a/trove/views/search.py +++ b/trove/views/search.py @@ -1,3 +1,4 @@ +import abc import logging from django import http @@ -6,6 +7,7 @@ from share.search import index_strategy from trove import exceptions as trove_exceptions +from trove.trovesearch.search_handle import BasicSearchHandle from trove.trovesearch.search_params import ( BaseTroveParams, CardsearchParams, @@ -62,28 +64,30 @@ } -class _BaseTrovesearchView(View): +class _BaseTrovesearchView(View, abc.ABC): # expected on inheritors focus_type_iri: str params_dataclass: type[BaseTroveParams] def get(self, request): - _url = request.build_absolute_uri() try: _renderer_type = get_renderer_type(request) except trove_exceptions.CannotRenderMediatype as _error: return make_http_error_response( error=_error, - renderer=DEFAULT_RENDERER_TYPE(_url), + renderer_type=DEFAULT_RENDERER_TYPE, ) try: _search_gathering = self._start_gathering( search_params=self._parse_search_params(request), renderer_type=_renderer_type, ) + _url = request.build_absolute_uri() _focus = gather.Focus.new(_url, self.focus_type_iri) - _search_gathering.ask(self._get_asktree(request), focus=_focus) - _renderer = _renderer_type(_url, _search_gathering.leaf_a_record()) + # fill the gathering's cache with requested info + self._gather_by_request(_search_gathering, _focus, request) + # take gathered data into a response + _renderer = _renderer_type(_focus, _search_gathering) return make_http_response( content_rendering=_renderer.render_document(), http_request=request, @@ -91,32 +95,48 @@ def get(self, request): except trove_exceptions.TroveError as _error: return make_http_error_response( error=_error, - renderer=_renderer_type(_url), + renderer_type=_renderer_type, ) - def _parse_search_params(self, request: http.HttpRequest): + def _parse_search_params(self, request: http.HttpRequest) -> BaseTroveParams: return self.params_dataclass.from_querystring( request.META['QUERY_STRING'], ) - def _start_gathering(self, search_params, renderer_type) -> gather.Gathering: + def _start_gathering(self, search_params: BaseTroveParams, renderer_type) -> gather.Gathering: _specific_index = index_strategy.get_index_for_trovesearch(search_params) # TODO: 404 for unknown strategy return trovesearch_by_indexstrategy.new_gathering({ 'search_params': search_params, + 'search_handle': self._get_search_handle(_specific_index), 'specific_index': _specific_index, 'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI, }) + def _gather_by_request(self, gathering, focus, request) -> None: + gathering.ask(self._get_asktree(request), focus=focus) + def _get_asktree(self, request: http.HttpRequest): ... + def _get_gathering_kwargs( + self, + specific_index: index_strategy.IndexStrategy.SpecificIndex, + ) -> BasicSearchHandle: + raise NotImplementedError + class CardsearchView(_BaseTrovesearchView): focus_type_iri = TROVE.Cardsearch params_dataclass = CardsearchParams + def _get_search_handle(self, specific_index, search_params) -> BasicSearchHandle: + return specific_index.pls_handle_cardsearch(search_params) + class ValuesearchView(_BaseTrovesearchView): focus_type_iri = TROVE.Valuesearch params_dataclass = ValuesearchParams + + def _get_search_handle(self, specific_index, search_params) -> BasicSearchHandle: + return specific_index.pls_handle_valuesearch(search_params)