Skip to content

Commit

Permalink
more resilience on the regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Mar 29, 2024
1 parent d9cfcef commit a2005c3
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 23 deletions.
42 changes: 19 additions & 23 deletions src/main/java/org/grobid/core/utilities/WordsToNumber.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ public class WordsToNumber {
private final String VALUES_PATH = "lexicon/en/values.json";

private final Pattern NUMERIC_PATTERN = Pattern.compile("\\b(?:\\d+(?:[.,]\\d+)*|\\d+[.,]?\\d*\\b)\\b", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of (the )?([0-9.,]+)", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of ([a-z]+ )?([0-9.,]+)", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) ( out)? of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);

private static List<String> bases = null;
private static List<String> tens = null;
Expand Down Expand Up @@ -199,8 +199,14 @@ public BigDecimal normalize(String text, Locale locale) throws NormalizationExce
} else if (OUT_OF_PATTERN_NUMBERS.matcher(text).find()) {
Matcher m = OUT_OF_PATTERN_NUMBERS.matcher(text);
m.matches();
String numerator = m.group(1);
String denominator = m.group(m.groupCount());
String numerator = "";
String denominator = "";
try {
numerator = m.group(1);
denominator = m.group(m.groupCount());
} catch(Exception e){
throw new NormalizationException("Cannot process the expression '" + text + "'. Skipping.");
}

BigDecimal division = null;
BigDecimal numeratorAsBigDecimal = null;
Expand All @@ -219,31 +225,21 @@ public BigDecimal normalize(String text, Locale locale) throws NormalizationExce
} catch (Exception e) {
throw new NormalizationException("Cannot process the values '" + text + "'. The conversion is failing. Skipping them.");
}

// catch (NumberFormatException nfe) {
//
//
// String cleanedNumerator = formatter.parse(numerator.);
// String cleanedDenominator = StringUtils.replaceChars(denominator, ",.", "");
// try {
// division = new BigDecimal(cleanedNumerator).divide(new BigDecimal(cleanedDenominator));
// } catch (ArithmeticException ae) {
// division = new BigDecimal(cleanedNumerator).divide(new BigDecimal(cleanedDenominator), 10, BigDecimal.ROUND_HALF_UP);
// } catch (Exception e) {
// throw new NormalizationException("Cannot process the values '" + text + "'. The conversion is failing. Skipping them.");
// }
// }
return division;
} else if (OUT_OF_PATTERN_ALPHABETIC.matcher(text).find()) {
Matcher m = OUT_OF_PATTERN_ALPHABETIC.matcher(text);
m.matches();
String numerator = m.group(1);
String denominator = m.group(m.groupCount());
BigDecimal division = null;
try {
division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator));
} catch (ArithmeticException ae) {
division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator), 10, BigDecimal.ROUND_HALF_UP);
String numerator = m.group(1);
String denominator = m.group(m.groupCount());
try {
division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator));
} catch (ArithmeticException ae) {
division = convertIntegerPart(numerator).divide(convertIntegerPart(denominator), 10, BigDecimal.ROUND_HALF_UP);
}
} catch (Exception e) {
throw new NormalizationException("Cannot process the expression '" + text + "'. Skipping.");
}
return division;
} else if (StringUtils.isNotBlank(numericPart)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,12 @@ class WordsToNumberTest {
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.0617928044")))
}

@Test(expected = NormalizationException::class)
@Throws(Exception::class)
fun testErrorCase_3() {
val input = "one out of currently 62"
val number = target.normalize(input, Locale.ENGLISH)
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.01612903226")))
}

}

0 comments on commit a2005c3

Please sign in to comment.