Skip to content

Commit

Permalink
fix: normalize special characters from search
Browse files Browse the repository at this point in the history
resolves #362
  • Loading branch information
sennierer committed Nov 13, 2024
1 parent 0c9d012 commit 8b95961
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion apis_ontology/filtersets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.contrib.postgres.search import TrigramWordSimilarity
from django.db.models.functions import Greatest
from django.db import models
import unicodedata

from apis_core.apis_entities.filtersets import AbstractEntityFilterSet
from apis_core.collections.models import SkosCollection, SkosCollectionContentObject
Expand All @@ -23,6 +24,12 @@ def remove_quotes(token):
return token.strip('"')


def remove_accents(input_str):
nfkd_form = unicodedata.normalize("NFKD", input_str)
only_ascii = nfkd_form.encode("ASCII", "ignore")
return only_ascii.decode()


################
# filter methods
################
Expand All @@ -49,7 +56,7 @@ def trigram_search_filter(queryset, fields, value):
trig_vector_list = []
for token in tokens:
for field in fields:
trig_vector_list.append(TrigramWordSimilarity(token, field))
trig_vector_list.append(TrigramWordSimilarity(remove_accents(token), field))
trig_vector = Greatest(*trig_vector_list, None)
return (
queryset.annotate(similarity=trig_vector)
Expand Down

0 comments on commit 8b95961

Please sign in to comment.