wipwipwip

aaxelb · Dec 20, 2024 · 3b19790 · 3b19790
1 parent 4be8709
commit 3b19790
Show file tree

Hide file tree

Showing 24 changed files with 666 additions and 342 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -43,4 +43,4 @@ xmltodict==0.12.0  # MIT
 # Allows custom-rendered IDs, hiding null values, and including data in error responses
 git+https://github.com/cos-forks/[email protected]+cos0
 
-git+https://github.com/aaxelb/[email protected].09
+git+https://github.com/aaxelb/[email protected].13
diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py
@@ -302,7 +302,7 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear
                 aggs=self._cardsearch_aggs(cardsearch_params),
                 sort=_sort,
                 from_=_from_offset,
-                size=_cursor.page_size,
+                size=_cursor.bounded_page_size,
                 source=False,  # no need to get _source; _id is enough
             )
             if settings.DEBUG:
@@ -445,7 +445,7 @@ def _valuesearch_iri_aggs(self, valuesearch_params: ValuesearchParams, cursor: O
             _nested_terms_agg = {
                 'field': 'nested_iri.iri_value',
                 # WARNING: terribly inefficient pagination (part one)
-                'size': cursor.start_offset + cursor.page_size + 1,
+                'size': cursor.start_offset + cursor.bounded_page_size + 1,
             }
             _iris = list(valuesearch_params.valuesearch_iris())
             if _iris:
@@ -533,7 +533,7 @@ def _valuesearch_handle(
                 _buckets = _iri_aggs['value_at_propertypath']['iri_values']['buckets']
                 _bucket_count = len(_buckets)
                 # WARNING: terribly inefficient pagination (part two)
-                _page_end_index = cursor.start_offset + cursor.page_size
+                _page_end_index = cursor.start_offset + cursor.bounded_page_size
                 _bucket_page = _buckets[cursor.start_offset:_page_end_index]  # discard prior pages
                 cursor.total_count = (
                     MANY_MORE
@@ -546,8 +546,7 @@ def _valuesearch_handle(
                         self._valuesearch_iri_result(_iri_bucket)
                         for _iri_bucket in _bucket_page
                     ],
-                    index_strategy=self.index_strategy,
-                    valuesearch_params=valuesearch_params,
+                    search_params=valuesearch_params,
                 )
             else:  # assume date
                 _year_buckets = (
@@ -560,8 +559,7 @@ def _valuesearch_handle(
                         self._valuesearch_date_result(_year_bucket)
                         for _year_bucket in _year_buckets
                     ],
-                    index_strategy=self.index_strategy,
-                    valuesearch_params=valuesearch_params,
+                    search_params=valuesearch_params,
                 )
 
         def _valuesearch_iri_result(self, iri_bucket):
@@ -721,8 +719,7 @@ def _cardsearch_handle(
                 cursor=cursor,
                 search_result_page=_results,
                 related_propertypath_results=_relatedproperty_list,
-                cardsearch_params=cardsearch_params,
-                index_strategy=self.index_strategy,
+                search_params=cardsearch_params,
             )
 
         def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]:

diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py
@@ -421,7 +421,7 @@ def _valuesearch_iris_response(
         _buckets = _iri_aggs['buckets']
         _bucket_count = len(_buckets)
         # WARNING: terribly hacky pagination (part two)
-        _page_end_index = cursor.start_offset + cursor.page_size
+        _page_end_index = cursor.start_offset + cursor.bounded_page_size
         _bucket_page = _buckets[cursor.start_offset:_page_end_index]  # discard prior pages
         cursor.total_count = (
             MANY_MORE
@@ -434,8 +434,7 @@ def _valuesearch_iris_response(
                 self._valuesearch_iri_result(_iri_bucket)
                 for _iri_bucket in _bucket_page
             ],
-            index_strategy=self,
-            valuesearch_params=valuesearch_params,
+            search_params=valuesearch_params,
         )
 
     def _valuesearch_dates_response(
@@ -454,8 +453,7 @@ def _valuesearch_dates_response(
                 self._valuesearch_date_result(_year_bucket)
                 for _year_bucket in _year_buckets
             ],
-            index_strategy=self,
-            valuesearch_params=valuesearch_params,
+            search_params=valuesearch_params,
         )
 
     def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult:
@@ -514,8 +512,7 @@ def _cardsearch_handle(
             cursor=cursor,
             search_result_page=_results,
             related_propertypath_results=_relatedproperty_list,
-            cardsearch_params=cardsearch_params,
-            index_strategy=self,
+            search_params=cardsearch_params,
         )
 
     def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]:
@@ -689,7 +686,7 @@ def build(self):
             'aggs': self._cardsearch_aggs(),
             'sort': list(self._cardsearch_sorts()) or None,
             'from_': self._cardsearch_start_offset(),
-            'size': self.response_cursor.page_size,
+            'size': self.response_cursor.bounded_page_size,
         }
 
     @functools.cached_property
@@ -818,7 +815,7 @@ def _build_iri_valuesearch(params: ValuesearchParams, cursor: OffsetCursor) -> d
                 'terms': {
                     'field': 'iri_value.single_focus_iri',
                     # WARNING: terribly hacky pagination (part one)
-                    'size': cursor.start_offset + cursor.page_size + 1,
+                    'size': cursor.start_offset + cursor.bounded_page_size + 1,
                 },
                 'aggs': {
                     'agg_type_iri': {'terms': {

diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py
@@ -1,8 +1,8 @@
 from typing import Iterable, Iterator
-import dataclasses
 from datetime import date, timedelta
 import math
 from urllib.parse import urlencode
+from unittest import mock
 
 from primitive_metadata import primitive_rdf as rdf
 
@@ -142,20 +142,18 @@ def test_cardsearch_pagination(self):
 
     def test_cardsearch_related_properties(self):
         self._fill_test_data_for_querying()
-        _cardsearch_params = dataclasses.replace(
-            CardsearchParams.from_querystring(''),
-            related_property_paths=(
-                (DCTERMS.creator,),
-                (DCTERMS.references,),
-                (BLARG.nada,),
-            ),
-        )
-        _cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params)
-        self.assertEqual(_cardsearch_handle.related_propertypath_results, [
-            PropertypathUsage((DCTERMS.creator,), 3),
-            PropertypathUsage((DCTERMS.references,), 2),
-            PropertypathUsage((BLARG.nada,), 0),
-        ])
+        _cardsearch_params = CardsearchParams.from_querystring('')
+        with mock.patch.object(_cardsearch_params, 'related_property_paths', new=(
+            (DCTERMS.creator,),
+            (DCTERMS.references,),
+            (BLARG.nada,),
+        )):
+            _cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params)
+            self.assertEqual(_cardsearch_handle.related_propertypath_results, [
+                PropertypathUsage((DCTERMS.creator,), 3),
+                PropertypathUsage((DCTERMS.references,), 2),
+                PropertypathUsage((BLARG.nada,), 0),
+            ])
 
     def test_valuesearch(self):
         self._fill_test_data_for_querying()

diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py
@@ -1,11 +1,42 @@
 import json
 
+from primitive_metadata import (
+    gather,
+    primitive_rdf as rdf,
+)
+
+from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy
 from trove.render._base import BaseRenderer
 from trove.render._rendering import ProtoRendering
+from trove.vocab.namespaces import RDF
 from tests.trove._input_output_tests import BasicInputOutputTestCase
 from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase
 
 
+class FakeGatherCache(gather.GatherCache):
+    def already_gathered(self, *args, **kwargs):
+        return True  # prevent gathering
+
+
+class FakeGathering(gather.Gathering):
+    def ask_exhaustively(self, *args, **kwargs):
+        # skip exhaustion for these tests (note: only works for non-streaming)
+        for _obj in self.ask(*args, **kwargs):
+            yield (_obj, self.cache.gathered)
+
+
+def _make_fake_gathering(tripledict, renderer_type):
+    _organizer = trovesearch_by_indexstrategy
+    return FakeGathering(
+        norms=_organizer.norms,
+        organizer=_organizer,
+        gatherer_kwargs={
+            'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI,
+        },
+        cache=FakeGatherCache(gathered=rdf.RdfGraph(tripledict))
+    )
+
+
 class TroveRendererTests(BasicInputOutputTestCase):
     inputs = UNRENDERED_RDF
 
@@ -15,8 +46,11 @@ class TroveRendererTests(BasicInputOutputTestCase):
 
     def compute_output(self, given_input: RdfCase):
         _renderer = self.renderer_class(
-            response_focus=given_input.focus,
-            response_gathering=...,
+            response_focus=gather.Focus.new(
+                given_input.focus,
+                given_input.tripledict.get(given_input.focus, {}).get(RDF.type),
+            ),
+            response_gathering=_make_fake_gathering(given_input.tripledict, self.renderer_class),
         )
         return _renderer.render_document()
 

diff --git a/tests/trove/render/_inputs.py b/tests/trove/render/_inputs.py
@@ -38,14 +38,12 @@ class RdfCase:
     'various_types': RdfCase(BLARG.aSubject, {
         BLARG.aSubject: {
             RDF.type: {BLARG.aType},
-            BLARG.aVerb: {
-                BLARG.anIri,  # an IRI value
-                rdf.literal('an rdf:string literal'),
-                rdf.literal('a rdf:langString literal', language='en'),
-                rdf.literal(17),  # an integer literal
-                rdf.literal(datetime.date(2024, 1, 1)),  # a date literal
-                rdf.literal('a literal of strange datatype', datatype_iris=BLARG.aStrangeDatatype),
-            },
+            BLARG.hasIri: {BLARG.anIri},
+            BLARG.hasRdfStringLiteral: {rdf.literal('an rdf:string literal')},
+            BLARG.hasRdfLangStringLiteral: {rdf.literal('a rdf:langString literal', language='en')},
+            BLARG.hasIntegerLiteral: {rdf.literal(17)},
+            BLARG.hasDateLiteral: {rdf.literal(datetime.date(2024, 1, 1))},
+            BLARG.hasStrangeLiteral: {rdf.literal('a literal of strange datatype', datatype_iris=BLARG.aStrangeDatatype)},
         },
     }),
 }