Skip to content

Commit

Permalink
Fix another two corner cases
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Mar 28, 2024
1 parent f23e4d3 commit 10edbf0
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
1 change: 1 addition & 0 deletions resources/dataset/values/corpus/trainingdata1.tei.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
<value><base>10</base><pow>-1</pow></value>
<!-- <value>e<exp>-1</exp></value>-->
<value><number>10</number></value>
<value><alpha>Six</alpha>, <number>12</number></value>
</values>

4 changes: 2 additions & 2 deletions src/main/java/org/grobid/core/utilities/WordsToNumber.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ public class WordsToNumber {

private final String VALUES_PATH = "lexicon/en/values.json";

private final Pattern NUMERIC_PATTERN = Pattern.compile("[0-9.,]+", Pattern.CASE_INSENSITIVE);
private final Pattern NUMERIC_PATTERN = Pattern.compile("\\b(?:\\d+(?:[.,]\\d+)*|\\d+[.,]?\\d*\\b)\\b", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of (the )?([0-9.,]+)", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of (the )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);

private static List<String> bases = null;
private static List<String> tens = null;
Expand Down
26 changes: 26 additions & 0 deletions src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,22 @@ class WordsToNumberTest {
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
}

@Test
@Throws(Exception::class)
fun testConvertFractions6_1() {
val input = "three out of these four"
val number = target.normalize(input, Locale.ENGLISH)
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
}

@Test
@Throws(Exception::class)
fun testConvertFractions6_2() {
val input = "three out of that four"
val number = target.normalize(input, Locale.ENGLISH)
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
}

@Test
@Throws(Exception::class)
fun testConvertFractions4Numeric() {
Expand All @@ -186,4 +202,14 @@ class WordsToNumberTest {
val input = "a temperature of 20"
target.normalize(input, Locale.ENGLISH)
}


@Throws(Exception::class)
fun testErrorCase_1() {
val input = "six, 12"
val output = target.normalize(input, Locale.ENGLISH)


}

}

0 comments on commit 10edbf0

Please sign in to comment.