diff --git a/invenio_vocabularies/contrib/funders/config.py b/invenio_vocabularies/contrib/funders/config.py index 2065bb2b..c2216cd5 100644 --- a/invenio_vocabularies/contrib/funders/config.py +++ b/invenio_vocabularies/contrib/funders/config.py @@ -9,6 +9,7 @@ """Vocabulary funders configuration.""" from flask import current_app +from invenio_i18n import get_locale from invenio_i18n import lazy_gettext as _ from invenio_records_resources.services import SearchOptions from invenio_records_resources.services.records.components import DataComponent @@ -22,6 +23,7 @@ funder_fundref_doi_prefix = LocalProxy( lambda: current_app.config["VOCABULARIES_FUNDER_DOI_PREFIX"] ) +localized_title = LocalProxy(lambda: f"title.{get_locale()}^20") class FundersSearchOptions(SearchOptions): @@ -30,9 +32,12 @@ class FundersSearchOptions(SearchOptions): suggest_parser_cls = SuggestQueryParser.factory( fields=[ "name^100", + "acronym.keyword^100", + "acronym^40", + localized_title, + "id^20", + "aliases^20", "identifiers.identifier^10", - "acronym^10", - "aliases^10", ], type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness diff --git a/invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json b/invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json index ed050066..e047a159 100644 --- a/invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +++ b/invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json @@ -29,6 +29,16 @@ ] } }, + "normalizer": { + "accent_normalizer": { + "type": "custom", + "char_filter": ["strip_special_chars"], + "filter": [ + "lowercase", + "asciifolding" + ] + } + }, "filter": { "lowercase": { "type": "lowercase", @@ -112,7 +122,13 @@ "acronym": { "type": "text", "analyzer": "accent_edge_analyzer", - "search_analyzer": "accent_analyzer" + "search_analyzer": "accent_analyzer", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "accent_normalizer" + } + } }, "status": { "type": "keyword" diff --git a/invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json b/invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json index ed050066..e047a159 100644 --- a/invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +++ b/invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json @@ -29,6 +29,16 @@ ] } }, + "normalizer": { + "accent_normalizer": { + "type": "custom", + "char_filter": ["strip_special_chars"], + "filter": [ + "lowercase", + "asciifolding" + ] + } + }, "filter": { "lowercase": { "type": "lowercase", @@ -112,7 +122,13 @@ "acronym": { "type": "text", "analyzer": "accent_edge_analyzer", - "search_analyzer": "accent_analyzer" + "search_analyzer": "accent_analyzer", + "fields": { + "keyword": { + "type": "keyword", + "normalizer": "accent_normalizer" + } + } }, "status": { "type": "keyword" diff --git a/tests/contrib/funders/test_funders_resource.py b/tests/contrib/funders/test_funders_resource.py index 287e5b12..de5c39c3 100644 --- a/tests/contrib/funders/test_funders_resource.py +++ b/tests/contrib/funders/test_funders_resource.py @@ -127,9 +127,12 @@ def test_funders_suggest_sort(client, h, prefix, example_funders): # Should show 2 results, and id=cern as first due to name res = client.get(f"{prefix}?suggest=CERN", headers=h) assert res.status_code == 200 - assert res.json["hits"]["total"] == 2 # should be 2 + assert res.json["hits"]["total"] == 3 assert res.json["hits"]["hits"][0]["name"] == "CERN" assert res.json["hits"]["hits"][1]["name"] == "CERT" + # Matches lower, since title is boosted less + assert res.json["hits"]["hits"][2]["name"] == "OTHER" + assert res.json["hits"]["hits"][2]["title"]["en"] == "CERN" res = client.get(f"{prefix}?suggest=N%C3%B5rthw%C3%AAst", headers=h) # Nõrthwêst assert res.status_code == 200