diff --git a/resources/dataset/values/corpus/trainingdata1.tei.xml b/resources/dataset/values/corpus/trainingdata1.tei.xml
index 3cdfbfe4..e2deda33 100644
--- a/resources/dataset/values/corpus/trainingdata1.tei.xml
+++ b/resources/dataset/values/corpus/trainingdata1.tei.xml
@@ -11,5 +11,6 @@
10-1
10
+ Six, 12
diff --git a/src/main/java/org/grobid/core/utilities/WordsToNumber.java b/src/main/java/org/grobid/core/utilities/WordsToNumber.java
index 71193b46..2c8f9285 100644
--- a/src/main/java/org/grobid/core/utilities/WordsToNumber.java
+++ b/src/main/java/org/grobid/core/utilities/WordsToNumber.java
@@ -33,9 +33,9 @@ public class WordsToNumber {
private final String VALUES_PATH = "lexicon/en/values.json";
- private final Pattern NUMERIC_PATTERN = Pattern.compile("[0-9.,]+", Pattern.CASE_INSENSITIVE);
+ private final Pattern NUMERIC_PATTERN = Pattern.compile("\\b(?:\\d+(?:[.,]\\d+)*|\\d+[.,]?\\d*\\b)\\b", Pattern.CASE_INSENSITIVE);
private final Pattern OUT_OF_PATTERN_NUMBERS = Pattern.compile("([0-9.,]+)( out)? of (the )?([0-9.,]+)", Pattern.CASE_INSENSITIVE);
- private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of (the )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);
+ private final Pattern OUT_OF_PATTERN_ALPHABETIC = Pattern.compile("([A-Za-z ]+) out of ([a-z]+ )?([A-Za-z]+)", Pattern.CASE_INSENSITIVE);
private static List bases = null;
private static List tens = null;
diff --git a/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt b/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt
index ced664c2..96420fd2 100644
--- a/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt
+++ b/src/test/kotlin/org/grobid/core/utilities/WordsToNumberTest.kt
@@ -164,6 +164,22 @@ class WordsToNumberTest {
MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
}
+ @Test
+ @Throws(Exception::class)
+ fun testConvertFractions6_1() {
+ val input = "three out of these four"
+ val number = target.normalize(input, Locale.ENGLISH)
+ MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
+ }
+
+ @Test
+ @Throws(Exception::class)
+ fun testConvertFractions6_2() {
+ val input = "three out of that four"
+ val number = target.normalize(input, Locale.ENGLISH)
+ MatcherAssert.assertThat(number, Is.`is`(BigDecimal("0.75")))
+ }
+
@Test
@Throws(Exception::class)
fun testConvertFractions4Numeric() {
@@ -186,4 +202,14 @@ class WordsToNumberTest {
val input = "a temperature of 20"
target.normalize(input, Locale.ENGLISH)
}
+
+
+ @Throws(Exception::class)
+ fun testErrorCase_1() {
+ val input = "six, 12"
+ val output = target.normalize(input, Locale.ENGLISH)
+
+
+ }
+
}
\ No newline at end of file