Skip to content

Commit

Permalink
prepare 25.0.0: rejoin develop's commit history
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Jan 6, 2025
2 parents deaeae4 + fd9dad5 commit da813d8
Show file tree
Hide file tree
Showing 13 changed files with 227 additions and 99 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Change Log

# [25.0.0] - 2024-12-27
- update calendar version to `25`, reset semantic versions to `0`
- trove-search api:
- support jsonapi `fields[TYPE]` query params; see https://jsonapi.org/format/#fetching-sparse-fieldsets
- when `TYPE` in `fields[TYPE]` matches the value of a `cardSearchFilter[resourceType]` query param,
interpret the given fields as shorthand property-paths and use for custom csv/tsv columns
- streaming "simple json" rendering (`acceptMediatype=application/json`)
- when sorting by integer values, treat missing values as zero
(tho there may be future times this is wrong...)

# [24.7.0] - 2024-12-27
- allow rendering search responses as downloadable CSVs/TSVs
Expand Down
3 changes: 3 additions & 0 deletions share/search/index_strategy/trovesearch_denorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,17 +771,20 @@ def _cardsearch_aggs(self):
def _cardsearch_sorts(self):
for _sortparam in self.params.sort_list:
_fieldkey = _path_field_name(_sortparam.propertypath)
_when_missing: str | int = '_last'
if _sortparam.value_type == ValueType.DATE:
_field = f'card.date_by_propertypath.{_fieldkey}'
_unmapped_type = 'date'
elif _sortparam.value_type == ValueType.INTEGER:
_field = f'card.int_by_propertypath.{_fieldkey}'
_unmapped_type = 'long'
_when_missing = 0 # HACK: treat missing values as zero
else:
raise ValueError(f'unsupported sort value type: {_sortparam}')
yield {_field: {
'order': 'desc' if _sortparam.descending else 'asc',
'unmapped_type': _unmapped_type,
'missing': _when_missing,
}}


Expand Down
2 changes: 1 addition & 1 deletion share/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '24.7.0'
__version__ = '25.0.0'
10 changes: 5 additions & 5 deletions tests/trove/render/test_simple_csv_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ class TestSimpleCsvRenderer(_base.TrovesearchRendererTests):
expected_outputs = {
'no_results': SimpleRendering(
mediatype='text/csv',
rendered_content='@id\r\n',
rendered_content='@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n',
),
'few_results': SimpleRendering(
mediatype='text/csv',
rendered_content=''.join((
'@id,title\r\n',
'http://blarg.example/vocab/anItem,"an item, yes"\r\n',
'http://blarg.example/vocab/anItemm,"an itemm, yes"\r\n',
'http://blarg.example/vocab/anItemmm,"an itemmm, yes"\r\n',
'@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n',
'http://blarg.example/vocab/anItem,,,,"an item, yes",,,,\r\n',
'http://blarg.example/vocab/anItemm,,,,"an itemm, yes",,,,\r\n',
'http://blarg.example/vocab/anItemmm,,,,"an itemmm, yes",,,,\r\n',
)),
),
}
10 changes: 5 additions & 5 deletions tests/trove/render/test_simple_tsv_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ class TestSimpleTsvRenderer(_base.TrovesearchRendererTests):
expected_outputs = {
'no_results': SimpleRendering(
mediatype='text/tab-separated-values',
rendered_content='@id\r\n',
rendered_content='@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n',
),
'few_results': SimpleRendering(
mediatype='text/tab-separated-values',
rendered_content=''.join((
'@id\ttitle\r\n',
'http://blarg.example/vocab/anItem\tan item, yes\r\n',
'http://blarg.example/vocab/anItemm\tan itemm, yes\r\n',
'http://blarg.example/vocab/anItemmm\tan itemmm, yes\r\n',
'@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n',
'http://blarg.example/vocab/anItem\t\t\t\tan item, yes\t\t\t\t\r\n',
'http://blarg.example/vocab/anItemm\t\t\t\tan itemm, yes\t\t\t\t\r\n',
'http://blarg.example/vocab/anItemmm\t\t\t\tan itemmm, yes\t\t\t\t\r\n',
)),
),
}
6 changes: 3 additions & 3 deletions trove/render/_simple_trovesearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ def multicard_rendering(self, card_pages: Iterator[dict[str, dict]]) -> ProtoRen
)

def render_document(self) -> ProtoRendering:
_focustypes = set(self.response_gathering.ask(RDF.type, focus=self.response_focus))
_focustypes = self.response_focus.type_iris
if (TROVE.Cardsearch in _focustypes) or (TROVE.Valuesearch in _focustypes):
return self.multicard_rendering(self._iter_card_pages())
if TROVE.Indexcard in _focustypes:
return self.unicard_rendering(
self.response_focus.iri,
self._get_card_content(self.response_focus.iri),
self.response_focus.single_iri(),
self._get_card_content(self.response_focus.single_iri()),
)
raise trove_exceptions.UnsupportedRdfType(_focustypes)

Expand Down
65 changes: 43 additions & 22 deletions trove/render/simple_csv.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
from __future__ import annotations
import csv
import functools
import itertools
import dataclasses
import typing

from trove.trovesearch.search_params import (
Propertypath,
BaseTroveParams,
CardsearchParams,
ValuesearchParams,
)
from trove.vocab import mediatypes
from trove.vocab import osfmap
from trove.vocab.namespaces import TROVE
Expand All @@ -15,6 +22,7 @@

_MULTIVALUE_DELIMITER = ' ; ' # possible improvement: smarter in-value delimiting?
_VALUE_KEY_PREFERENCE = ('@value', '@id', 'name', 'prefLabel', 'label')
_ID_JSONPATH = ('@id',)


class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer):
Expand All @@ -23,10 +31,13 @@ class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer):
CSV_DIALECT = csv.excel

def unicard_rendering(self, card_iri: str, osfmap_json: dict):
self.multicard_rendering(card_pages=[{card_iri: osfmap_json}])
self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}]))

def multicard_rendering(self, card_pages: typing.Iterable[dict[str, dict]]):
_doc = TabularDoc(card_pages)
def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict]]):
_doc = TabularDoc(
card_pages,
trove_params=getattr(self.response_focus, 'search_params', None),
)
return StreamableRendering(
mediatype=self.MEDIATYPE,
content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()),
Expand All @@ -43,21 +54,41 @@ def csv_stream(csv_dialect, header: list, rows: typing.Iterator[list]) -> typing
@dataclasses.dataclass
class TabularDoc:
card_pages: typing.Iterator[dict[str, dict]]
trove_params: BaseTroveParams | None = None
_started: bool = False

@functools.cached_property
def field_paths(self) -> tuple[Jsonpath, ...]:
# TODO: use jsonapi's "sparse fieldsets" to allow selecting
# https://jsonapi.org/format/#fetching-sparse-fieldsets
return tuple((
('@id',),
*self._nonempty_field_paths()
))
def column_jsonpaths(self) -> tuple[Jsonpath, ...]:
_column_jsonpaths = (
_osfmap_jsonpath(_path)
for _path in self._column_paths()
)
return (_ID_JSONPATH, *_column_jsonpaths)

@functools.cached_property
def first_page(self) -> dict[str, dict]:
return next(self.card_pages, {})

def _column_paths(self) -> typing.Iterator[Propertypath]:
_pathlists: list[typing.Iterable[Propertypath]] = []
if self.trove_params is not None: # hacks
if isinstance(self.trove_params, ValuesearchParams):
_expected_card_types = set(self.trove_params.valuesearch_type_iris())
elif isinstance(self.trove_params, CardsearchParams):
_expected_card_types = set(self.trove_params.cardsearch_type_iris())
else:
_expected_card_types = set()
for _type_iri in sorted(_expected_card_types, key=len):
try:
_pathlist = self.trove_params.attrpaths_by_type[_type_iri]
except KeyError:
pass
else:
_pathlists.append(_pathlist)
if not _pathlists:
_pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS)
return itertools.chain.from_iterable(_pathlists)

def _iter_card_pages(self):
assert not self._started
self._started = True
Expand All @@ -66,27 +97,17 @@ def _iter_card_pages(self):
yield from self.card_pages

def header(self) -> list[str]:
return ['.'.join(_path) for _path in self.field_paths]
return ['.'.join(_path) for _path in self.column_jsonpaths]

def rows(self) -> typing.Iterator[list[str]]:
for _page in self._iter_card_pages():
for _card_iri, _osfmap_json in _page.items():
yield self._row_values(_osfmap_json)

def _nonempty_field_paths(self) -> typing.Iterator[Jsonpath]:
for _path in osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS:
_jsonpath = _osfmap_jsonpath(_path)
_path_is_present = any(
_has_value(_card, _jsonpath)
for _card in self.first_page.values()
)
if _path_is_present:
yield _jsonpath

def _row_values(self, osfmap_json: dict) -> list[str]:
return [
self._row_field_value(osfmap_json, _field_path)
for _field_path in self.field_paths
for _field_path in self.column_jsonpaths
]

def _row_field_value(self, osfmap_json: dict, field_path: Jsonpath) -> str:
Expand Down
36 changes: 28 additions & 8 deletions trove/render/simple_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import re
import typing

from primitive_metadata import primitive_rdf as rdf

Expand All @@ -8,6 +10,7 @@
)
from trove.vocab import mediatypes
from trove.vocab.namespaces import TROVE, RDF
from ._rendering import StreamableRendering
from ._simple_trovesearch import SimpleTrovesearchRenderer


Expand All @@ -24,15 +27,32 @@ def simple_unicard_rendering(self, card_iri, osfmap_json):
'meta': self._render_meta(),
}, indent=2)

def simple_multicard_rendering(self, cards):
return json.dumps({
'data': [
self._render_card_content(_card_iri, _osfmap_json)
for _card_iri, _osfmap_json in cards
],
'links': self._render_links(),
def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict]]):
return StreamableRendering(
mediatype=self.MEDIATYPE,
content_stream=self._stream_json(card_pages),
)

def _stream_json(self, card_pages: typing.Iterator[dict[str, dict]]):
_prefix = '{"data": ['
yield _prefix
_datum_prefix = None
for _page in card_pages:
for _card_iri, _osfmap_json in _page.items():
if _datum_prefix is not None:
yield _datum_prefix
yield json.dumps(self._render_card_content(_card_iri, _osfmap_json))
_datum_prefix = ','
_nondata = json.dumps({
'meta': self._render_meta(),
}, indent=2)
'links': self._render_links(),
})
yield re.sub(
'^{', # replace the opening {
'],', # ...with a closing ] for the "data" list
_nondata,
count=1,
)

def _render_card_content(self, card_iri: str, osfmap_json: dict):
self._add_twople(osfmap_json, 'foaf:primaryTopicOf', card_iri)
Expand Down
11 changes: 7 additions & 4 deletions trove/trovesearch/search_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
PageCursor,
ReproduciblyRandomSampleCursor,
)
from trove.trovesearch.search_params import BaseTroveParams
from trove.trovesearch.search_params import (
BaseTroveParams,
CardsearchParams,
)
from trove.vocab.namespaces import TROVE
from trove.vocab.trove import trove_indexcard_namespace

Expand Down Expand Up @@ -62,13 +65,13 @@ def __post_init__(self):
def get_next_streaming_handle(self) -> typing.Self | None:
_next_cursor = self.cursor.next_cursor()
if (_next_cursor is not None) and (self.handler is not None):
assert isinstance(self.search_params, CardsearchParams)
_next_params = dataclasses.replace(
self.search_params,
page_cursor=_next_cursor,
include=frozenset([(TROVE.searchResultPage,)]),
included_relations=frozenset([(TROVE.searchResultPage,)]),
)
if self.handler is not None:
return self.handler(_next_params)
return self.handler(_next_params)
return None


Expand Down
Loading

0 comments on commit da813d8

Please sign in to comment.