Skip to content

Commit

Permalink
wipwipwip
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Dec 20, 2024
1 parent 4be8709 commit 3b19790
Show file tree
Hide file tree
Showing 24 changed files with 666 additions and 342 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ xmltodict==0.12.0 # MIT
# Allows custom-rendered IDs, hiding null values, and including data in error responses
git+https://github.com/cos-forks/[email protected]+cos0

git+https://github.com/aaxelb/[email protected].09
git+https://github.com/aaxelb/[email protected].13
15 changes: 6 additions & 9 deletions share/search/index_strategy/trove_indexcard_flats.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear
aggs=self._cardsearch_aggs(cardsearch_params),
sort=_sort,
from_=_from_offset,
size=_cursor.page_size,
size=_cursor.bounded_page_size,
source=False, # no need to get _source; _id is enough
)
if settings.DEBUG:
Expand Down Expand Up @@ -445,7 +445,7 @@ def _valuesearch_iri_aggs(self, valuesearch_params: ValuesearchParams, cursor: O
_nested_terms_agg = {
'field': 'nested_iri.iri_value',
# WARNING: terribly inefficient pagination (part one)
'size': cursor.start_offset + cursor.page_size + 1,
'size': cursor.start_offset + cursor.bounded_page_size + 1,
}
_iris = list(valuesearch_params.valuesearch_iris())
if _iris:
Expand Down Expand Up @@ -533,7 +533,7 @@ def _valuesearch_handle(
_buckets = _iri_aggs['value_at_propertypath']['iri_values']['buckets']
_bucket_count = len(_buckets)
# WARNING: terribly inefficient pagination (part two)
_page_end_index = cursor.start_offset + cursor.page_size
_page_end_index = cursor.start_offset + cursor.bounded_page_size
_bucket_page = _buckets[cursor.start_offset:_page_end_index] # discard prior pages
cursor.total_count = (
MANY_MORE
Expand All @@ -546,8 +546,7 @@ def _valuesearch_handle(
self._valuesearch_iri_result(_iri_bucket)
for _iri_bucket in _bucket_page
],
index_strategy=self.index_strategy,
valuesearch_params=valuesearch_params,
search_params=valuesearch_params,
)
else: # assume date
_year_buckets = (
Expand All @@ -560,8 +559,7 @@ def _valuesearch_handle(
self._valuesearch_date_result(_year_bucket)
for _year_bucket in _year_buckets
],
index_strategy=self.index_strategy,
valuesearch_params=valuesearch_params,
search_params=valuesearch_params,
)

def _valuesearch_iri_result(self, iri_bucket):
Expand Down Expand Up @@ -721,8 +719,7 @@ def _cardsearch_handle(
cursor=cursor,
search_result_page=_results,
related_propertypath_results=_relatedproperty_list,
cardsearch_params=cardsearch_params,
index_strategy=self.index_strategy,
search_params=cardsearch_params,
)

def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]:
Expand Down
15 changes: 6 additions & 9 deletions share/search/index_strategy/trovesearch_denorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def _valuesearch_iris_response(
_buckets = _iri_aggs['buckets']
_bucket_count = len(_buckets)
# WARNING: terribly hacky pagination (part two)
_page_end_index = cursor.start_offset + cursor.page_size
_page_end_index = cursor.start_offset + cursor.bounded_page_size
_bucket_page = _buckets[cursor.start_offset:_page_end_index] # discard prior pages
cursor.total_count = (
MANY_MORE
Expand All @@ -434,8 +434,7 @@ def _valuesearch_iris_response(
self._valuesearch_iri_result(_iri_bucket)
for _iri_bucket in _bucket_page
],
index_strategy=self,
valuesearch_params=valuesearch_params,
search_params=valuesearch_params,
)

def _valuesearch_dates_response(
Expand All @@ -454,8 +453,7 @@ def _valuesearch_dates_response(
self._valuesearch_date_result(_year_bucket)
for _year_bucket in _year_buckets
],
index_strategy=self,
valuesearch_params=valuesearch_params,
search_params=valuesearch_params,
)

def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult:
Expand Down Expand Up @@ -514,8 +512,7 @@ def _cardsearch_handle(
cursor=cursor,
search_result_page=_results,
related_propertypath_results=_relatedproperty_list,
cardsearch_params=cardsearch_params,
index_strategy=self,
search_params=cardsearch_params,
)

def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]:
Expand Down Expand Up @@ -689,7 +686,7 @@ def build(self):
'aggs': self._cardsearch_aggs(),
'sort': list(self._cardsearch_sorts()) or None,
'from_': self._cardsearch_start_offset(),
'size': self.response_cursor.page_size,
'size': self.response_cursor.bounded_page_size,
}

@functools.cached_property
Expand Down Expand Up @@ -818,7 +815,7 @@ def _build_iri_valuesearch(params: ValuesearchParams, cursor: OffsetCursor) -> d
'terms': {
'field': 'iri_value.single_focus_iri',
# WARNING: terribly hacky pagination (part one)
'size': cursor.start_offset + cursor.page_size + 1,
'size': cursor.start_offset + cursor.bounded_page_size + 1,
},
'aggs': {
'agg_type_iri': {'terms': {
Expand Down
28 changes: 13 additions & 15 deletions tests/share/search/index_strategy/_common_trovesearch_tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Iterable, Iterator
import dataclasses
from datetime import date, timedelta
import math
from urllib.parse import urlencode
from unittest import mock

from primitive_metadata import primitive_rdf as rdf

Expand Down Expand Up @@ -142,20 +142,18 @@ def test_cardsearch_pagination(self):

def test_cardsearch_related_properties(self):
self._fill_test_data_for_querying()
_cardsearch_params = dataclasses.replace(
CardsearchParams.from_querystring(''),
related_property_paths=(
(DCTERMS.creator,),
(DCTERMS.references,),
(BLARG.nada,),
),
)
_cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params)
self.assertEqual(_cardsearch_handle.related_propertypath_results, [
PropertypathUsage((DCTERMS.creator,), 3),
PropertypathUsage((DCTERMS.references,), 2),
PropertypathUsage((BLARG.nada,), 0),
])
_cardsearch_params = CardsearchParams.from_querystring('')
with mock.patch.object(_cardsearch_params, 'related_property_paths', new=(
(DCTERMS.creator,),
(DCTERMS.references,),
(BLARG.nada,),
)):
_cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params)
self.assertEqual(_cardsearch_handle.related_propertypath_results, [
PropertypathUsage((DCTERMS.creator,), 3),
PropertypathUsage((DCTERMS.references,), 2),
PropertypathUsage((BLARG.nada,), 0),
])

def test_valuesearch(self):
self._fill_test_data_for_querying()
Expand Down
38 changes: 36 additions & 2 deletions tests/trove/render/_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
import json

from primitive_metadata import (
gather,
primitive_rdf as rdf,
)

from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy
from trove.render._base import BaseRenderer
from trove.render._rendering import ProtoRendering
from trove.vocab.namespaces import RDF
from tests.trove._input_output_tests import BasicInputOutputTestCase
from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase


class FakeGatherCache(gather.GatherCache):
def already_gathered(self, *args, **kwargs):
return True # prevent gathering


class FakeGathering(gather.Gathering):
def ask_exhaustively(self, *args, **kwargs):
# skip exhaustion for these tests (note: only works for non-streaming)
for _obj in self.ask(*args, **kwargs):
yield (_obj, self.cache.gathered)


def _make_fake_gathering(tripledict, renderer_type):
_organizer = trovesearch_by_indexstrategy
return FakeGathering(
norms=_organizer.norms,
organizer=_organizer,
gatherer_kwargs={
'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI,
},
cache=FakeGatherCache(gathered=rdf.RdfGraph(tripledict))
)


class TroveRendererTests(BasicInputOutputTestCase):
inputs = UNRENDERED_RDF

Expand All @@ -15,8 +46,11 @@ class TroveRendererTests(BasicInputOutputTestCase):

def compute_output(self, given_input: RdfCase):
_renderer = self.renderer_class(
response_focus=given_input.focus,
response_gathering=...,
response_focus=gather.Focus.new(
given_input.focus,
given_input.tripledict.get(given_input.focus, {}).get(RDF.type),
),
response_gathering=_make_fake_gathering(given_input.tripledict, self.renderer_class),
)
return _renderer.render_document()

Expand Down
14 changes: 6 additions & 8 deletions tests/trove/render/_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,12 @@ class RdfCase:
'various_types': RdfCase(BLARG.aSubject, {
BLARG.aSubject: {
RDF.type: {BLARG.aType},
BLARG.aVerb: {
BLARG.anIri, # an IRI value
rdf.literal('an rdf:string literal'),
rdf.literal('a rdf:langString literal', language='en'),
rdf.literal(17), # an integer literal
rdf.literal(datetime.date(2024, 1, 1)), # a date literal
rdf.literal('a literal of strange datatype', datatype_iris=BLARG.aStrangeDatatype),
},
BLARG.hasIri: {BLARG.anIri},
BLARG.hasRdfStringLiteral: {rdf.literal('an rdf:string literal')},
BLARG.hasRdfLangStringLiteral: {rdf.literal('a rdf:langString literal', language='en')},
BLARG.hasIntegerLiteral: {rdf.literal(17)},
BLARG.hasDateLiteral: {rdf.literal(datetime.date(2024, 1, 1))},
BLARG.hasStrangeLiteral: {rdf.literal('a literal of strange datatype', datatype_iris=BLARG.aStrangeDatatype)},
},
}),
}
Expand Down
Loading

0 comments on commit 3b19790

Please sign in to comment.