From abb54e3ab61a8da410e1284a6400e17fdabd2da6 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 2 Jan 2025 10:26:35 -0500 Subject: [PATCH 1/5] prefer names in default csv/tsv columns --- trove/vocab/osfmap.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trove/vocab/osfmap.py b/trove/vocab/osfmap.py index b85266d3e..bf5ae94e3 100644 --- a/trove/vocab/osfmap.py +++ b/trove/vocab/osfmap.py @@ -912,11 +912,11 @@ def osfmap_shorthand() -> IriShorthand: (RDF.type,), (OSFMAP.storageRegion, SKOS.prefLabel), (OSFMAP.storageByteCount,), - (DCTERMS.creator,), - (DCTERMS.rights,), - (DCTERMS.publisher,), - (OSFMAP.affiliation,), - (OSFMAP.funder,), + (DCTERMS.creator, FOAF.name), + (DCTERMS.rights, FOAF.name), + (DCTERMS.publisher, FOAF.name), + (OSFMAP.affiliation, FOAF.name), + (OSFMAP.funder, FOAF.name), (DCTERMS.conformsTo, DCTERMS.title), (OSFMAP.usage, OSFMAP.viewCount), (OSFMAP.usage, OSFMAP.downloadCount), From f9c744dbdbc7d11a41a622b2ef5fd12637b1eda0 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 3 Jan 2025 13:29:08 -0500 Subject: [PATCH 2/5] feat: sparse fieldsets for csv/tsv columns --- .../trove/render/test_simple_csv_renderer.py | 10 +- .../trove/render/test_simple_tsv_renderer.py | 10 +- trove/render/_simple_trovesearch.py | 6 +- trove/render/simple_csv.py | 65 +++++++---- trove/trovesearch/search_params.py | 110 ++++++++++++------ trove/util/frozen.py | 33 ++++++ trove/views/search.py | 16 ++- trove/vocab/osfmap.py | 17 +-- 8 files changed, 180 insertions(+), 87 deletions(-) create mode 100644 trove/util/frozen.py diff --git a/tests/trove/render/test_simple_csv_renderer.py b/tests/trove/render/test_simple_csv_renderer.py index 00f3291c8..ca06aa273 100644 --- a/tests/trove/render/test_simple_csv_renderer.py +++ b/tests/trove/render/test_simple_csv_renderer.py @@ -10,15 +10,15 @@ class TestSimpleCsvRenderer(_base.TrovesearchRendererTests): expected_outputs = { 'no_results': SimpleRendering( mediatype='text/csv', - rendered_content='@id\r\n', + rendered_content='@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', ), 'few_results': SimpleRendering( mediatype='text/csv', rendered_content=''.join(( - '@id,title\r\n', - 'http://blarg.example/vocab/anItem,"an item, yes"\r\n', - 'http://blarg.example/vocab/anItemm,"an itemm, yes"\r\n', - 'http://blarg.example/vocab/anItemmm,"an itemmm, yes"\r\n', + '@id,sameAs,resourceType,resourceNature,title,name,dateCreated,dateModified,rights\r\n', + 'http://blarg.example/vocab/anItem,,,,"an item, yes",,,,\r\n', + 'http://blarg.example/vocab/anItemm,,,,"an itemm, yes",,,,\r\n', + 'http://blarg.example/vocab/anItemmm,,,,"an itemmm, yes",,,,\r\n', )), ), } diff --git a/tests/trove/render/test_simple_tsv_renderer.py b/tests/trove/render/test_simple_tsv_renderer.py index 7ee25e15d..752493362 100644 --- a/tests/trove/render/test_simple_tsv_renderer.py +++ b/tests/trove/render/test_simple_tsv_renderer.py @@ -10,15 +10,15 @@ class TestSimpleTsvRenderer(_base.TrovesearchRendererTests): expected_outputs = { 'no_results': SimpleRendering( mediatype='text/tab-separated-values', - rendered_content='@id\r\n', + rendered_content='@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', ), 'few_results': SimpleRendering( mediatype='text/tab-separated-values', rendered_content=''.join(( - '@id\ttitle\r\n', - 'http://blarg.example/vocab/anItem\tan item, yes\r\n', - 'http://blarg.example/vocab/anItemm\tan itemm, yes\r\n', - 'http://blarg.example/vocab/anItemmm\tan itemmm, yes\r\n', + '@id\tsameAs\tresourceType\tresourceNature\ttitle\tname\tdateCreated\tdateModified\trights\r\n', + 'http://blarg.example/vocab/anItem\t\t\t\tan item, yes\t\t\t\t\r\n', + 'http://blarg.example/vocab/anItemm\t\t\t\tan itemm, yes\t\t\t\t\r\n', + 'http://blarg.example/vocab/anItemmm\t\t\t\tan itemmm, yes\t\t\t\t\r\n', )), ), } diff --git a/trove/render/_simple_trovesearch.py b/trove/render/_simple_trovesearch.py index f49cbfe50..6827c7918 100644 --- a/trove/render/_simple_trovesearch.py +++ b/trove/render/_simple_trovesearch.py @@ -43,13 +43,13 @@ def multicard_rendering(self, card_pages: Iterator[dict[str, dict]]) -> ProtoRen ) def render_document(self) -> ProtoRendering: - _focustypes = set(self.response_gathering.ask(RDF.type, focus=self.response_focus)) + _focustypes = self.response_focus.type_iris if (TROVE.Cardsearch in _focustypes) or (TROVE.Valuesearch in _focustypes): return self.multicard_rendering(self._iter_card_pages()) if TROVE.Indexcard in _focustypes: return self.unicard_rendering( - self.response_focus.iri, - self._get_card_content(self.response_focus.iri), + self.response_focus.single_iri(), + self._get_card_content(self.response_focus.single_iri()), ) raise trove_exceptions.UnsupportedRdfType(_focustypes) diff --git a/trove/render/simple_csv.py b/trove/render/simple_csv.py index b94949c02..0642ed179 100644 --- a/trove/render/simple_csv.py +++ b/trove/render/simple_csv.py @@ -1,9 +1,16 @@ from __future__ import annotations import csv import functools +import itertools import dataclasses import typing +from trove.trovesearch.search_params import ( + Propertypath, + BaseTroveParams, + CardsearchParams, + ValuesearchParams, +) from trove.vocab import mediatypes from trove.vocab import osfmap from trove.vocab.namespaces import TROVE @@ -15,6 +22,7 @@ _MULTIVALUE_DELIMITER = ' ; ' # possible improvement: smarter in-value delimiting? _VALUE_KEY_PREFERENCE = ('@value', '@id', 'name', 'prefLabel', 'label') +_ID_JSONPATH = ('@id',) class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer): @@ -23,10 +31,13 @@ class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer): CSV_DIALECT = csv.excel def unicard_rendering(self, card_iri: str, osfmap_json: dict): - self.multicard_rendering(card_pages=[{card_iri: osfmap_json}]) + self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}])) - def multicard_rendering(self, card_pages: typing.Iterable[dict[str, dict]]): - _doc = TabularDoc(card_pages) + def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict]]): + _doc = TabularDoc( + card_pages, + trove_params=getattr(self.response_focus, 'search_params', None), + ) return StreamableRendering( mediatype=self.MEDIATYPE, content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()), @@ -43,21 +54,41 @@ def csv_stream(csv_dialect, header: list, rows: typing.Iterator[list]) -> typing @dataclasses.dataclass class TabularDoc: card_pages: typing.Iterator[dict[str, dict]] + trove_params: BaseTroveParams | None = None _started: bool = False @functools.cached_property - def field_paths(self) -> tuple[Jsonpath, ...]: - # TODO: use jsonapi's "sparse fieldsets" to allow selecting - # https://jsonapi.org/format/#fetching-sparse-fieldsets - return tuple(( - ('@id',), - *self._nonempty_field_paths() - )) + def column_jsonpaths(self) -> tuple[Jsonpath, ...]: + _column_jsonpaths = ( + _osfmap_jsonpath(_path) + for _path in self._column_paths() + ) + return (_ID_JSONPATH, *_column_jsonpaths) @functools.cached_property def first_page(self) -> dict[str, dict]: return next(self.card_pages, {}) + def _column_paths(self) -> typing.Iterator[Propertypath]: + _pathlists: list[typing.Iterable[Propertypath]] = [] + if self.trove_params is not None: # hacks + if isinstance(self.trove_params, ValuesearchParams): + _expected_card_types = set(self.trove_params.valuesearch_type_iris()) + elif isinstance(self.trove_params, CardsearchParams): + _expected_card_types = set(self.trove_params.cardsearch_type_iris()) + else: + _expected_card_types = set() + for _type_iri in sorted(_expected_card_types, key=len): + try: + _pathlist = self.trove_params.attrpaths_by_type[_type_iri] + except KeyError: + pass + else: + _pathlists.append(_pathlist) + if not _pathlists: + _pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS) + return itertools.chain.from_iterable(_pathlists) + def _iter_card_pages(self): assert not self._started self._started = True @@ -66,27 +97,17 @@ def _iter_card_pages(self): yield from self.card_pages def header(self) -> list[str]: - return ['.'.join(_path) for _path in self.field_paths] + return ['.'.join(_path) for _path in self.column_jsonpaths] def rows(self) -> typing.Iterator[list[str]]: for _page in self._iter_card_pages(): for _card_iri, _osfmap_json in _page.items(): yield self._row_values(_osfmap_json) - def _nonempty_field_paths(self) -> typing.Iterator[Jsonpath]: - for _path in osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS: - _jsonpath = _osfmap_jsonpath(_path) - _path_is_present = any( - _has_value(_card, _jsonpath) - for _card in self.first_page.values() - ) - if _path_is_present: - yield _jsonpath - def _row_values(self, osfmap_json: dict) -> list[str]: return [ self._row_field_value(osfmap_json, _field_path) - for _field_path in self.field_paths + for _field_path in self.column_jsonpaths ] def _row_field_value(self, osfmap_json: dict, field_path: Jsonpath) -> str: diff --git a/trove/trovesearch/search_params.py b/trove/trovesearch/search_params.py index 027ed5756..efcb2b0d5 100644 --- a/trove/trovesearch/search_params.py +++ b/trove/trovesearch/search_params.py @@ -5,6 +5,7 @@ import functools import itertools import logging +import types import typing import urllib @@ -16,6 +17,7 @@ DEFAULT_PAGE_SIZE, PageCursor, ) +from trove.util.frozen import freeze from trove.util.queryparams import ( QueryparamDict, QueryparamName, @@ -59,7 +61,7 @@ ONE_GLOB_PROPERTYPATH: Propertypath = (GLOB_PATHSTEP,) DEFAULT_PROPERTYPATH_SET: PropertypathSet = frozenset([ONE_GLOB_PROPERTYPATH]) -DEFAULT_INCLUDES_BY_TYPE = { +DEFAULT_INCLUDES_BY_TYPE: collections.abc.Mapping[str, frozenset[Propertypath]] = freeze({ TROVE.Cardsearch: { (TROVE.searchResultPage,), (TROVE.relatedPropertyList,), @@ -70,29 +72,29 @@ TROVE.SearchResult: { (TROVE.indexCard,), }, -} +}) -DEFAULT_FIELDS_BY_TYPE = { - TROVE.Indexcard: { +DEFAULT_FIELDS_BY_TYPE: collections.abc.Mapping[str, tuple[Propertypath, ...]] = freeze({ + TROVE.Indexcard: [ (TROVE.resourceMetadata,), (TROVE.focusIdentifier,), (DCTERMS.issued,), (DCTERMS.modified,), (FOAF.primaryTopic), - }, - TROVE.Cardsearch: { + ], + TROVE.Cardsearch: [ (TROVE.totalResultCount,), (TROVE.cardSearchText,), (TROVE.cardSearchFilter,), - }, - TROVE.Valuesearch: { + ], + TROVE.Valuesearch: [ (TROVE.propertyPath,), (TROVE.valueSearchText,), (TROVE.valueSearchFilter,), (TROVE.cardSearchText,), (TROVE.cardSearchFilter,), - }, -} + ], +}) class ValueType(enum.Enum): @@ -121,9 +123,12 @@ def to_shortname(self) -> str: @dataclasses.dataclass(frozen=True) class BaseTroveParams: + static_focus_type: typing.ClassVar[str] # expected on subclasses + iri_shorthand: primitive_rdf.IriShorthand = dataclasses.field(repr=False) - include: PropertypathSet accept_mediatype: str | None + included_relations: PropertypathSet = dataclasses.field(repr=False, compare=False) + attrpaths_by_type: collections.abc.Mapping[str, PropertypathSet] = dataclasses.field(repr=False, compare=False) @classmethod def from_querystring(cls, querystring: str) -> typing.Self: @@ -138,7 +143,8 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: # subclasses should override and add their fields to super().parse_queryparams(queryparams) return { 'iri_shorthand': cls._gather_shorthand(queryparams), - 'include': frozenset(cls._gather_include(queryparams)), + 'included_relations': cls._gather_include(queryparams), + 'attrpaths_by_type': cls._gather_attrpaths(queryparams), 'accept_mediatype': _get_single_value(queryparams, QueryparamName('acceptMediatype')), } @@ -150,7 +156,7 @@ def to_querydict(self) -> QueryDict: _querydict = QueryDict(mutable=True) if self.accept_mediatype: _querydict['acceptMediatype'] = self.accept_mediatype - # TODO: self.iri_shorthand, self.include + # TODO: iriShorthand, include, fields[...] return _querydict @classmethod @@ -166,11 +172,40 @@ def _gather_shorthand(cls, queryparams: QueryparamDict): return NAMESPACES_SHORTHAND.with_update(_prefixmap) @classmethod - def _gather_include(cls, queryparams: QueryparamDict): - return itertools.chain.from_iterable( - _parse_propertypath_set(_include_value) - for _, _include_value in queryparams.get('include', []) + def _gather_include(cls, queryparams: QueryparamDict) -> PropertypathSet: + _include_params = queryparams.get('include', []) + if _include_params: + return frozenset(itertools.chain.from_iterable( + _parse_propertypath_set(_include_value) + for _, _include_value in _include_params + )) + return DEFAULT_INCLUDES_BY_TYPE[cls.static_focus_type] + + @classmethod + def _gather_attrpaths(cls, queryparams: QueryparamDict) -> collections.abc.Mapping[ + str, + tuple[Propertypath, ...], + ]: + _attrpaths: collections.ChainMap[str, tuple[Propertypath, ...]] = collections.ChainMap( + DEFAULT_FIELDS_BY_TYPE, # type: ignore[arg-type] ) + _fields_params = queryparams.get('fields', []) + if _fields_params: + _requested: dict[str, list[Propertypath]] = collections.defaultdict(list) + for _param_name, _param_value in _fields_params: + try: + (_typenames,) = filter(bool, _param_name.bracketed_names) + except (IndexError, ValueError): + raise trove_exceptions.InvalidQueryParamName( + f'expected "fields[TYPE]" (with exactly one non-empty bracketed segment)' + f' (got "{_param_name}")' + ) + else: + for _type in split_queryparam_value(_typenames): + _type_iri = osfmap_shorthand().expand_iri(_type) + _requested[_type_iri].extend(_parse_propertypaths(_param_value)) + _attrpaths = _attrpaths.new_child(freeze(_requested)) + return _attrpaths @dataclasses.dataclass(frozen=True) @@ -509,6 +544,8 @@ class CardsearchParams(BaseTroveParams): sort_list: tuple[SortParam, ...] page_cursor: PageCursor + static_focus_type = TROVE.Cardsearch + @classmethod def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: _filter_set = SearchFilter.from_queryparam_family(queryparams, 'cardSearchFilter') @@ -521,22 +558,19 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: 'page_cursor': _get_page_cursor(queryparams), } - @classmethod - def _gather_include(cls, queryparams: QueryparamDict): - _explicit_includes = set(super()._gather_include(queryparams)) - return itertools.chain( - _explicit_includes or DEFAULT_INCLUDES_BY_TYPE[TROVE.Cardsearch], - DEFAULT_FIELDS_BY_TYPE[TROVE.Cardsearch], - ) - @functools.cached_property def related_property_paths(self) -> tuple[Propertypath, ...]: return ( _get_related_property_paths(self.cardsearch_filter_set) - if (TROVE.relatedPropertyList,) in self.include + if (TROVE.relatedPropertyList,) in self.included_relations else () ) + def cardsearch_type_iris(self): + for _filter in self.cardsearch_filter_set: + if _filter.is_type_filter(): + yield from _filter.value_set + def to_querydict(self) -> QueryDict: _querydict = super().to_querydict() for _qp_name, _qp_value in Textsegment.queryparams_from_textsegments('cardSearchText', self.cardsearch_textsegment_set): @@ -564,6 +598,8 @@ class ValuesearchParams(CardsearchParams): valuesearch_textsegment_set: frozenset[Textsegment] valuesearch_filter_set: frozenset[SearchFilter] + static_focus_type = TROVE.Valuesearch + # override CardsearchParams @classmethod def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: @@ -577,14 +613,6 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: 'valuesearch_filter_set': SearchFilter.from_queryparam_family(queryparams, 'valueSearchFilter'), } - @classmethod - def _gather_include(cls, queryparams: QueryparamDict): - _explicit_includes = set(super()._gather_include(queryparams)) - return itertools.chain( - _explicit_includes or DEFAULT_INCLUDES_BY_TYPE[TROVE.Valuesearch], - DEFAULT_FIELDS_BY_TYPE[TROVE.Valuesearch], - ) - def __post_init__(self): if is_date_property(self.valuesearch_propertypath[-1]): # date-value limitations @@ -686,10 +714,12 @@ def _get_single_value( def _parse_propertypath_set(serialized_path_set: str, *, allow_globs=True) -> PropertypathSet: # comma-delimited set of dot-delimited paths - return frozenset( - _parse_propertypath(_path, allow_globs=allow_globs) - for _path in split_queryparam_value(serialized_path_set) - ) + return frozenset(_parse_propertypaths(serialized_path_set, allow_globs=allow_globs)) + + +def _parse_propertypaths(serialized_path_set: str, *, allow_globs=True) -> typing.Iterator[Propertypath]: + for _path in split_queryparam_value(serialized_path_set): + yield _parse_propertypath(_path, allow_globs=allow_globs) def _parse_propertypath(serialized_path: str, *, allow_globs=True) -> Propertypath: @@ -738,3 +768,7 @@ def _get_page_cursor(queryparams: QueryparamDict) -> PageCursor: except ValueError: raise trove_exceptions.InvalidQueryParamValue('page[size]') return PageCursor(page_size=_size) + + +def _frozen_mapping(**kwargs) -> collections.abc.Mapping: + return types.MappingProxyType(kwargs) diff --git a/trove/util/frozen.py b/trove/util/frozen.py new file mode 100644 index 000000000..0e57eb531 --- /dev/null +++ b/trove/util/frozen.py @@ -0,0 +1,33 @@ +import collections.abc +import types + + +_FROZEN_TYPES = ( + tuple, + frozenset, + types.MappingProxyType, + str, + int, + float, +) + + +def freeze(obj): + if isinstance(obj, dict): + return freeze_mapping(obj) + if isinstance(obj, set): + return frozenset(obj) + if isinstance(obj, list): + return tuple(obj) + if isinstance(obj, _FROZEN_TYPES): + return obj + raise ValueError(f'how freeze {obj!r}?') + + +def freeze_mapping(_base_mapping=None, /, **kwargs) -> collections.abc.Mapping: + _mutable_mapping = {} + for _map in (_base_mapping, kwargs): + if _map is not None: + for _key, _val in _map.items(): + _mutable_mapping[_key] = freeze(_val) + return types.MappingProxyType(_mutable_mapping) diff --git a/trove/views/search.py b/trove/views/search.py index 573133566..fd4043259 100644 --- a/trove/views/search.py +++ b/trove/views/search.py @@ -59,8 +59,7 @@ def get(self, request): search_handle=self.get_search_handle(_specific_index, _search_params), ) if _renderer_type.PASSIVE_RENDER: - # fill the gathering's cache with requested info - _search_gathering.ask(_search_params.include, focus=_focus) + self._fill_gathering(_search_gathering, _search_params, _focus) # take gathered data into a response _renderer = _renderer_type(_focus, _search_gathering) return make_http_response( @@ -84,6 +83,19 @@ def _start_gathering(self, renderer_type) -> gather.Gathering: 'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI, }) + def _fill_gathering(self, search_gathering, search_params, start_focus): + # fill the gathering's cache with included related resources... + search_gathering.ask(search_params.included_relations, focus=start_focus) + # ...and add requested attributes on the focus and related resources + for _focus in search_gathering.cache.focus_set: + for _focustype in _focus.type_iris: + try: + _attrpaths = search_params.attrpaths_by_type[_focustype] + except KeyError: + pass # no attribute fields for this type + else: + search_gathering.ask(_attrpaths, focus=_focus) + def get_search_handle(self, specific_index, search_params) -> BasicSearchHandle: return self._get_wrapped_handler(specific_index)(search_params) diff --git a/trove/vocab/osfmap.py b/trove/vocab/osfmap.py index bf5ae94e3..10fdf002e 100644 --- a/trove/vocab/osfmap.py +++ b/trove/vocab/osfmap.py @@ -905,21 +905,14 @@ def osfmap_shorthand() -> IriShorthand: SKIPPABLE_PROPERTIES = (OSFMAP.contains, OWL.sameAs) DEFAULT_TABULAR_SEARCH_COLUMN_PATHS: tuple[tuple[str, ...], ...] = ( - (DCTERMS.title,), (OWL.sameAs,), # includes DOI + (RDF.type,), + (DCTERMS.type,), + (DCTERMS.title,), + (FOAF.name,), (DCTERMS.created,), (DCTERMS.modified,), - (RDF.type,), - (OSFMAP.storageRegion, SKOS.prefLabel), - (OSFMAP.storageByteCount,), - (DCTERMS.creator, FOAF.name), - (DCTERMS.rights, FOAF.name), - (DCTERMS.publisher, FOAF.name), - (OSFMAP.affiliation, FOAF.name), - (OSFMAP.funder, FOAF.name), - (DCTERMS.conformsTo, DCTERMS.title), - (OSFMAP.usage, OSFMAP.viewCount), - (OSFMAP.usage, OSFMAP.downloadCount), + (DCTERMS.rights,), ) From d393156df2268c470f1c237a99750e3c3efac54e Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 3 Jan 2025 14:36:25 -0500 Subject: [PATCH 3/5] fix: treat missing int as zero for sort --- share/search/index_strategy/trovesearch_denorm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index a40242112..2dbeb4614 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -771,17 +771,20 @@ def _cardsearch_aggs(self): def _cardsearch_sorts(self): for _sortparam in self.params.sort_list: _fieldkey = _path_field_name(_sortparam.propertypath) + _when_missing: str | int = '_last' if _sortparam.value_type == ValueType.DATE: _field = f'card.date_by_propertypath.{_fieldkey}' _unmapped_type = 'date' elif _sortparam.value_type == ValueType.INTEGER: _field = f'card.int_by_propertypath.{_fieldkey}' _unmapped_type = 'long' + _when_missing = 0 # HACK: treat missing values as zero else: raise ValueError(f'unsupported sort value type: {_sortparam}') yield {_field: { 'order': 'desc' if _sortparam.descending else 'asc', 'unmapped_type': _unmapped_type, + 'missing': _when_missing, }} From cc0e4c5b3b7c3d3c2b0ad57aba67a86f3da2910f Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 6 Jan 2025 10:12:12 -0500 Subject: [PATCH 4/5] fix: simple-json stream --- trove/render/simple_json.py | 36 +++++++++++++++++++++++------- trove/trovesearch/search_handle.py | 11 +++++---- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/trove/render/simple_json.py b/trove/render/simple_json.py index 60271a701..a962d8aae 100644 --- a/trove/render/simple_json.py +++ b/trove/render/simple_json.py @@ -1,4 +1,6 @@ import json +import re +import typing from primitive_metadata import primitive_rdf as rdf @@ -8,6 +10,7 @@ ) from trove.vocab import mediatypes from trove.vocab.namespaces import TROVE, RDF +from ._rendering import StreamableRendering from ._simple_trovesearch import SimpleTrovesearchRenderer @@ -24,15 +27,32 @@ def simple_unicard_rendering(self, card_iri, osfmap_json): 'meta': self._render_meta(), }, indent=2) - def simple_multicard_rendering(self, cards): - return json.dumps({ - 'data': [ - self._render_card_content(_card_iri, _osfmap_json) - for _card_iri, _osfmap_json in cards - ], - 'links': self._render_links(), + def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict]]): + return StreamableRendering( + mediatype=self.MEDIATYPE, + content_stream=self._stream_json(card_pages), + ) + + def _stream_json(self, card_pages: typing.Iterator[dict[str, dict]]): + _prefix = '{"data": [' + yield _prefix + _datum_prefix = None + for _page in card_pages: + for _card_iri, _osfmap_json in _page.items(): + if _datum_prefix is not None: + yield _datum_prefix + yield json.dumps(self._render_card_content(_card_iri, _osfmap_json)) + _datum_prefix = ',' + _nondata = json.dumps({ 'meta': self._render_meta(), - }, indent=2) + 'links': self._render_links(), + }) + yield re.sub( + '^{', # replace the opening { + '],', # ...with a closing ] for the "data" list + _nondata, + count=1, + ) def _render_card_content(self, card_iri: str, osfmap_json: dict): self._add_twople(osfmap_json, 'foaf:primaryTopicOf', card_iri) diff --git a/trove/trovesearch/search_handle.py b/trove/trovesearch/search_handle.py index 45a3449be..3278cf8c6 100644 --- a/trove/trovesearch/search_handle.py +++ b/trove/trovesearch/search_handle.py @@ -8,7 +8,10 @@ PageCursor, ReproduciblyRandomSampleCursor, ) -from trove.trovesearch.search_params import BaseTroveParams +from trove.trovesearch.search_params import ( + BaseTroveParams, + CardsearchParams, +) from trove.vocab.namespaces import TROVE from trove.vocab.trove import trove_indexcard_namespace @@ -62,13 +65,13 @@ def __post_init__(self): def get_next_streaming_handle(self) -> typing.Self | None: _next_cursor = self.cursor.next_cursor() if (_next_cursor is not None) and (self.handler is not None): + assert isinstance(self.search_params, CardsearchParams) _next_params = dataclasses.replace( self.search_params, page_cursor=_next_cursor, - include=frozenset([(TROVE.searchResultPage,)]), + included_relations=frozenset([(TROVE.searchResultPage,)]), ) - if self.handler is not None: - return self.handler(_next_params) + return self.handler(_next_params) return None From fd9dad5667b8571b5054a6874e58f03d3af6550a Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 6 Jan 2025 12:13:10 -0500 Subject: [PATCH 5/5] prepare release 25.0.0 --- CHANGELOG.md | 9 +++++++++ share/version.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cfe20f77..648a5a40f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Change Log +# [25.0.0] - 2024-12-27 +- update calendar version to `25`, reset semantic versions to `0` +- trove-search api: + - support jsonapi `fields[TYPE]` query params; see https://jsonapi.org/format/#fetching-sparse-fieldsets + - when `TYPE` in `fields[TYPE]` matches the value of a `cardSearchFilter[resourceType]` query param, + interpret the given fields as shorthand property-paths and use for custom csv/tsv columns + - streaming "simple json" rendering (`acceptMediatype=application/json`) + - when sorting by integer values, treat missing values as zero + (tho there may be future times this is wrong...) # [24.7.0] - 2024-12-27 - allow rendering search responses as downloadable CSVs/TSVs diff --git a/share/version.py b/share/version.py index b7d78b402..afc638f51 100644 --- a/share/version.py +++ b/share/version.py @@ -1 +1 @@ -__version__ = '24.7.0' +__version__ = '25.0.0'