wipwip

aaxelb · Dec 20, 2024 · 4be8709 · 4be8709
1 parent 8e2d7db
commit 4be8709
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 67 deletions.
diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py
@@ -546,6 +546,8 @@ def _valuesearch_handle(
                         self._valuesearch_iri_result(_iri_bucket)
                         for _iri_bucket in _bucket_page
                     ],
+                    index_strategy=self.index_strategy,
+                    valuesearch_params=valuesearch_params,
                 )
             else:  # assume date
                 _year_buckets = (
@@ -558,6 +560,8 @@ def _valuesearch_handle(
                         self._valuesearch_date_result(_year_bucket)
                         for _year_bucket in _year_buckets
                     ],
+                    index_strategy=self.index_strategy,
+                    valuesearch_params=valuesearch_params,
                 )
 
         def _valuesearch_iri_result(self, iri_bucket):
@@ -660,7 +664,7 @@ def _iter_nested_date_filters(self, search_filter) -> Iterator[dict]:
             else:
                 raise ValueError(f'invalid date filter operator (got {search_filter.operator})')
 
-        def _cardsearch_sort(self, sort_list: tuple[SortParam]):
+        def _cardsearch_sort(self, sort_list: tuple[SortParam, ...]):
             if not sort_list:
                 return None
             return [
@@ -718,6 +722,7 @@ def _cardsearch_handle(
                 search_result_page=_results,
                 related_propertypath_results=_relatedproperty_list,
                 cardsearch_params=cardsearch_params,
+                index_strategy=self.index_strategy,
             )
 
         def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]:

diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py
@@ -434,6 +434,8 @@ def _valuesearch_iris_response(
                 self._valuesearch_iri_result(_iri_bucket)
                 for _iri_bucket in _bucket_page
             ],
+            index_strategy=self,
+            valuesearch_params=valuesearch_params,
         )
 
     def _valuesearch_dates_response(
@@ -452,6 +454,8 @@ def _valuesearch_dates_response(
                 self._valuesearch_date_result(_year_bucket)
                 for _year_bucket in _year_buckets
             ],
+            index_strategy=self,
+            valuesearch_params=valuesearch_params,
         )
 
     def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult:
@@ -511,6 +515,7 @@ def _cardsearch_handle(
             search_result_page=_results,
             related_propertypath_results=_relatedproperty_list,
             cardsearch_params=cardsearch_params,
+            index_strategy=self,
         )
 
     def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]:

diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py
@@ -15,8 +15,8 @@ class TroveRendererTests(BasicInputOutputTestCase):
 
     def compute_output(self, given_input: RdfCase):
         _renderer = self.renderer_class(
-            response_focus_iri=given_input.focus,
-            response_tripledict=given_input.tripledict,
+            response_focus=given_input.focus,
+            response_gathering=...,
         )
         return _renderer.render_document()
 
@@ -30,17 +30,6 @@ def assert_outputs_equal(self, expected_output, actual_output) -> None:
             self._get_rendered_output(actual_output),
         )
 
-    def missing_case_message(self, name: str, given_input) -> str:
-        _cls = self.__class__
-        _actual_output = self.compute_output(given_input)
-        return '\n'.join((
-            super().missing_case_message(name, given_input)
-            'missing test case!',
-            f'\tadd "{name}" to {_cls.__module__}.{_cls.__qualname__}.expected_outputs',
-            '\tactual output, fwiw:',
-            self._get_rendered_output(_actual_output)
-        )))
-
     def _get_rendered_output(self, rendering: ProtoRendering):
         # for now, they always iter strings (update if/when bytes are in play)
         return ''.join(rendering.iter_content())  # type: ignore[arg-type]

diff --git a/trove/render/_base.py b/trove/render/_base.py
@@ -4,7 +4,10 @@
 import json
 from typing import ClassVar
 
-from primitive_metadata import primitive_rdf as rdf
+from primitive_metadata import (
+    gather,
+    primitive_rdf as rdf,
+)
 
 from trove import exceptions as trove_exceptions
 from trove.vocab import mediatypes
@@ -23,8 +26,8 @@ class BaseRenderer(abc.ABC):
     INDEXCARD_DERIVER_IRI: ClassVar[str | None] = None
 
     # instance fields
-    response_focus_iri: str
-    response_tripledict: rdf.RdfTripleDictionary = dataclasses.field(default_factory=dict)
+    response_focus: gather.Focus
+    response_gathering: gather.Gathering
     iri_shorthand: rdf.IriShorthand = NAMESPACES_SHORTHAND
     thesaurus_tripledict: rdf.RdfTripleDictionary = dataclasses.field(default_factory=lambda: TROVE_API_THESAURUS)
 
@@ -36,6 +39,11 @@ def thesaurus(self):
     def response_data(self):
         return rdf.RdfGraph(self.response_tripledict)
 
+    @functools.cached_property
+    def response_tripledict(self) -> rdf.RdfTripleDictionary:
+        # TODO: self.response_gathering.ask_all_about or a default ask...
+        return self.response_gathering.leaf_a_record()
+
     def simple_render_document(self) -> str:
         raise NotImplementedError
 
@@ -50,7 +58,8 @@ def render_document(self) -> ProtoRendering:
                 rendered_content=_content,
             )
 
-    def render_error_document(self, error: trove_exceptions.TroveError) -> ProtoRendering:
+    @classmethod
+    def render_error_document(cls, error: trove_exceptions.TroveError) -> ProtoRendering:
         # may override, but default to jsonapi
         return SimpleRendering(  # type: ignore[return-value]  # until ProtoRendering(typing.Protocol) with py3.12
             mediatype=mediatypes.JSONAPI,

diff --git a/trove/trovesearch/page_cursor.py b/trove/trovesearch/page_cursor.py
@@ -15,7 +15,7 @@
 MAX_OFFSET = 9997
 
 DEFAULT_PAGE_SIZE = 13
-MAX_PAGE_SIZE = 10000
+MAX_PAGE_SIZE = 101
 
 
 @dataclasses.dataclass

diff --git a/trove/trovesearch/search_handle.py b/trove/trovesearch/search_handle.py
@@ -1,8 +1,12 @@
 from __future__ import annotations
 import dataclasses
 import functools
-import itertools
-from typing import Literal, Iterable, Union, Optional, Generator
+from typing import (
+    Generator,
+    Iterable,
+    Optional,
+    TYPE_CHECKING,
+)
 
 from primitive_metadata import primitive_rdf
 
@@ -11,41 +15,59 @@
     ReproduciblyRandomSampleCursor,
 )
 from trove.trovesearch.search_params import (
+    BaseTroveParams,
     CardsearchParams,
     ValuesearchParams,
 )
 from trove.vocab.namespaces import TROVE
 from trove.vocab.trove import trove_indexcard_namespace
 
-# TODO: add `metadata={OWL.sameAs: ...}` to each field; use dataclass-to-rdf to simplify gatherers
-
-
-BoundedCount = Union[
-    int,  # exact count, if less than ten thousands
-    Literal[TROVE['ten-thousands-and-more']],
-]
+if TYPE_CHECKING:
+    from share.search.index_strategy import IndexStrategy
 
 
 @dataclasses.dataclass
 class BasicSearchHandle:
     cursor: PageCursor
-    search_result_generator: Generator
+    index_strategy: IndexStrategy | None  # TODO: make the handle the one that knows how to use the strategy
+    search_params: BaseTroveParams
 
     @property
-    def total_result_count(self) -> BoundedCount:
+    def total_result_count(self) -> primitive_rdf.Literal:
         return (
             TROVE['ten-thousands-and-more']
             if self.cursor.has_many_more()
             else self.cursor.total_count
         )
 
     @functools.cached_property
-    def search_result_page(self) -> tuple:
-        # note: use either search_result_page or search_result_generator, not both
-        return tuple(
-            itertools.islice(self.search_result_generator, self.cursor.page_size)
+    def search_result_page(self) -> Iterable | None:
+        ...
+
+    def iter_all_pages(self) -> Generator:
+        _handle: BasicSearchHandle | None = self
+        while _handle is not None:
+            yield from _handle.search_result_page
+            _handle = _handle.get_next()
+
+    def get_next(self) -> BasicSearchHandle | None:
+        _next_cursor = self.cursor.next_cursor()
+        return (
+            None
+            if _next_cursor is None
+            else dataclasses.replace(
+                self,
+                cursor=_next_cursor,
+                **self._next_replace_kwargs(),
+            )
         )
 
+    def _next_replace_kwargs(self) -> dict:
+        return {
+            'cursor': self.cursor.next_cursor(),
+            'search_result_page': None,
+        }
+
 
 @dataclasses.dataclass
 class CardsearchHandle(BasicSearchHandle):
@@ -54,35 +76,38 @@ class CardsearchHandle(BasicSearchHandle):
 
     def __post_init__(self):
         _cursor = self.cursor
-        if (  # TODO: move this logic into the... index strategy?
+        _page = self.search_result_page
+        if (  # TODO: move this logic into the... cursor?
             isinstance(_cursor, ReproduciblyRandomSampleCursor)
             and _cursor.is_first_page()
-            and not _cursor.first_page_ids
-            and not _cursor.has_many_more()
-        ):
-            _cursor.first_page_ids = [_result.card_id for _result in self.search_result_page]
-
-    @functools.cached_property
-    def search_result_page(self) -> tuple:
-        _page = super().search_result_page
-        if (
-            isinstance(self.cursor, ReproduciblyRandomSampleCursor)
-            and self.cursor.is_first_page()
-            and self.cursor.first_page_ids
+            and _page is not None
         ):
-            # revisiting first page; reproduce original random order
-            _ordering_by_id = {
-                _id: _i
-                for (_i, _id) in enumerate(self.cursor.first_page_ids)
-            }
-            return tuple(
-                sorted(
+            if _cursor.first_page_ids:
+                # revisiting first page; reproduce original random order
+                _ordering_by_id = {
+                    _id: _i
+                    for (_i, _id) in enumerate(_cursor.first_page_ids)
+                }
+                self.search_result_page = sorted(
                     _page,
                     key=lambda _r: _ordering_by_id[_r.card_id],
-                ),
-            )
+                )
+            elif not _cursor.has_many_more():
+                # visiting first page for the first time
+                _cursor.first_page_ids = [_result.card_id for _result in _page]
         return _page
 
+    def _next_replace_kwargs(self) -> dict:
+        _next_kwargs = super()._next_replace_kwargs()
+        return {
+            **_next_kwargs,
+            'related_propertypath_results': [],
+            'cardsearch_params': dataclasses.replace(
+                self.cardsearch_params,
+                page_cursor=_next_kwargs['cursor'],
+            ),
+        }
+
 
 @dataclasses.dataclass
 class ValuesearchHandle(BasicSearchHandle):
@@ -133,7 +158,7 @@ class ValuesearchResult:
     total_count: int = 0
 
     def __post_init__(self):
-        assert self.value_iri or self.value_value, (
+        assert (self.value_iri is not None) or (self.value_value is not None), (
             f'either value_iri or value_value required (on {self})'
         )
 

diff --git a/trove/views/_responder.py b/trove/views/_responder.py
@@ -32,10 +32,10 @@ def make_http_response(
 def make_http_error_response(
     *,
     error: TroveError,
-    renderer: BaseRenderer,
+    renderer_type: type[BaseRenderer],
     http_headers: typing.Iterable[tuple[str, str]] = ()
 ) -> djhttp.HttpResponse:
-    _content_rendering = renderer.render_error_document(error)
+    _content_rendering = renderer_type.render_error_document(error)
     return djhttp.HttpResponse(
         _content_rendering.iter_content(),
         status=error.http_status,