From 9ce9a09a6219abdd98b7a4297bb9f2a2338abfdc Mon Sep 17 00:00:00 2001 From: "Meti A. Bayissa" Date: Wed, 31 Jul 2024 18:39:27 +0300 Subject: [PATCH] Fix #760: Duration parser in DIEF fails for words written in non latin alphabets. --- .../org/dbpedia/extraction/dataparser/DurationParser.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/dataparser/DurationParser.scala b/core/src/main/scala/org/dbpedia/extraction/dataparser/DurationParser.scala index adad0f77d7..52c98b0291 100644 --- a/core/src/main/scala/org/dbpedia/extraction/dataparser/DurationParser.scala +++ b/core/src/main/scala/org/dbpedia/extraction/dataparser/DurationParser.scala @@ -131,7 +131,7 @@ class DurationParser( context : { def language : Language } ) extends java.io.Se val durationsMap = TimeValueUnitRegex.findAllIn(input).matchData.map{ m => { // Seconds and minutes could be indicated as ',''," - val unit = timeUnits.get(m.subgroups(1).replaceAll("""[^\'\"a-zA-Z]""", "")).getOrElse(return None) // hack to deal with e.g "min)" matches + val unit = timeUnits.get(m.subgroups(1).replaceAll(s"""[^\'\"${timeUnitsRegex}]""", "")).getOrElse(return None) // hack to deal with e.g "min)" matches val num = getNum(m).getOrElse(return None) (unit, num) } }.toMap