diff --git a/src/main/java/org/grobid/core/utilities/WordsToNumber.java b/src/main/java/org/grobid/core/utilities/WordsToNumber.java index e9d7c623..43d084c0 100644 --- a/src/main/java/org/grobid/core/utilities/WordsToNumber.java +++ b/src/main/java/org/grobid/core/utilities/WordsToNumber.java @@ -35,8 +35,8 @@ public class WordsToNumber { private final String VALUES_PATH = "lexicon/en/values.json"; private final Pattern NUMERIC_PATTERN = Pattern.compile("\\b(?:\\d+(?:[.,]\\d+)*|\\d+[.,]?\\d*\\b)\\b", Pattern.CASE_INSENSITIVE); - private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of (the )?([0-9.,]+)", Pattern.CASE_INSENSITIVE); - private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE); + private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of ([a-z]+ )?([0-9.,]+)", Pattern.CASE_INSENSITIVE); + private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) ( out)? of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE); private static List bases = null; private static List tens = null; @@ -199,8 +199,14 @@ public BigDecimal normalize(String text, Locale locale) throws NormalizationExce } else if (OUT_OF_PATTERN_NUMBERS.matcher(text).find()) { Matcher m = OUT_OF_PATTERN_NUMBERS.matcher(text); m.matches(); - String numerator = m.group(1); - String denominator = m.group(m.groupCount()); + String numerator = ""; + String denominator = ""; + try { + numerator = m.group(1); + denominator = m.group(m.groupCount()); + } catch(Exception e){ + throw new NormalizationException("Cannot process the expression '" + text + "'. Skipping."); + } BigDecimal division = null; BigDecimal numeratorAsBigDecimal = null; @@ -219,31 +225,21 @@ public BigDecimal normalize(String text, Locale locale) throws NormalizationExce } catch (Exception e) { throw new NormalizationException("Cannot process the values '" + text + "'. The conversion is failing. Skipping them."); } - -// catch (NumberFormatException nfe) { -// -// -// String cleanedNumerator = formatter.parse(numerator.); -// String cleanedDenominator = StringUtils.replaceChars(denominator, ",.", ""); -// try { -// division = new BigDecimal(cleanedNumerator).divide(new BigDecimal(cleanedDenominator)); -// } catch (ArithmeticException ae) { -// division = new BigDecimal(cleanedNumerator).divide(new BigDecimal(cleanedDenominator), 10, BigDecimal.ROUND_HALF_UP); -// } catch (Exception e) { -// throw new NormalizationException("Cannot process the values '" + text + "'. The conversion is failing. Skipping them."); -// } -// } return division; } else if (OUT_OF_PATTERN_ALPHABETIC.matcher(text).find()) { Matcher m = OUT_OF_PATTERN_ALPHABETIC.matcher(text); m.matches(); - String numerator = m.group(1); - String denominator = m.group(m.groupCount()); BigDecimal division = null; try { - division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator)); - } catch (ArithmeticException ae) { - division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator), 10, BigDecimal.ROUND_HALF_UP); + String numerator = m.group(1); + String denominator = m.group(m.groupCount()); + try { + division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator)); + } catch (ArithmeticException ae) { + division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator), 10, BigDecimal.ROUND_HALF_UP); + } + } catch (Exception e) { + throw new NormalizationException("Cannot process the expression '" + text + "'. Skipping."); } return division; } else if (StringUtils.isNotBlank(numericPart)) { diff --git a/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt b/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt index 11db1809..8d62a426 100644 --- a/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt +++ b/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt @@ -219,4 +219,12 @@ class WordsToNumberTest { MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.0617928044"))) } + @Test(expected = NormalizationException::class) + @Throws(Exception::class) + fun testErrorCase_3() { + val input = "one out of currently 62" + val number = target.normalize(input, Locale.ENGLISH) + MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.01612903226"))) + } + } \ No newline at end of file