From 13e0fe003316d1a640cb94e48ab687237318d603 Mon Sep 17 00:00:00 2001
From: Michael Beckerle
Date: Thu, 26 Oct 2023 19:21:15 -0400
Subject: [PATCH 1/2] Commited tests for ICU bug
https://unicode-org.atlassian.net/browse/ICU-22558
DAFFODIL-2859
---
.../lib/util/FixedICUDecimalFormat.java | 88 +++++++++++
.../lib/util/TestFixedICUDecimalFormat.java | 143 ++++++++++++++++++
2 files changed, 231 insertions(+)
create mode 100644 daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
create mode 100644 daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
new file mode 100644
index 0000000000..50b75dabc7
--- /dev/null
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
@@ -0,0 +1,88 @@
+package org.apache.daffodil.lib.util;
+
+import com.ibm.icu.text.DecimalFormat;
+import com.ibm.icu.text.DecimalFormatSymbols;
+
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class FixedICUDecimalFormat {
+
+ /**
+ * This regex enables us to correct a problem in ICUs interpretation of the pattern in the case
+ * where the pattern has no digits before the decimal points, such as "#.##"
+ * so that there should be zero leading digits if the integer part is 0.
+ *
+ * See https://unicode-org.atlassian.net/browse/ICU-22558
+ *
+ * It captures unescaped digits or '#' characters
+ * preceding a non-escaped decimal point in an ICU4J pattern string.
+ *
+ * The whole regex (without java escapes) is {@code (?
+ *
{@code (?
+ * {@code ([0-9#]*)}: Capture zero or more consecutive digits or '#' characters in group 1.
+ * {@code (?
+ *
+ *
+ * With this pattern:
+ *
+ * - Group 1 will capture any preceding unescaped digits or '#' characters.
+ * - Group 2 will capture the non-escaped decimal point.
+ *
+ *
+ * The match will fail only if there is no non-escaped decimal point.
+ *
+ * Package private for unit testing.
+ */
+ static Pattern pattern = Pattern.compile("(?
+ * See https://unicode-org.atlassian.net/browse/ICU-22558
+ *
+ *
+ * @param patternString - an ICU pattern for a DecimalFormat, to be created and fixed (if needed)
+ * @return a DecimalFormat properly initialized by the pattern.
+ */
+ public static DecimalFormat fromPattern(String patternString) {
+ DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(); // default locale
+ return fromPattern(patternString, symbols);
+ }
+ /**
+ * Package private helper version that takes symbols so that you can define a locale.
+ * This allows tests to not have to adjust for locale decimal points "." vs. ",",
+ * by specifying a locale. But it isn't needed for regular usage.
+ *
+ * @param patternString - ICU pattern to fix (if needed)
+ * @param symbols - used to specify locale (because decimal points can be "." or ","
+ * @return a DecimalFormat properly initialized by the pattern.
+ */
+ static DecimalFormat fromPattern(String patternString, DecimalFormatSymbols symbols) {
+ DecimalFormat decimalFormat = new DecimalFormat(patternString, symbols);
+ Matcher matcher = pattern.matcher(patternString);
+ if (matcher.find()) {
+ // There IS a decimal point.
+ String digits = matcher.group(1); // digits or # unescaped, before the decimal point
+ assert(Objects.equals(".", matcher.group(2))); // 2nd group matches the decimal point itself
+ long count = digits.chars().filter(Character::isDigit).count();
+ if (count == 0) {
+ // The decimal point is not preceded by any required digits.
+ // This is the fix ICU is getting wrong.
+ //
+ // Note even when ICU fixes this, this code won't break. It's just
+ // unnecessary.
+ //
+ //
+ decimalFormat.setMinimumIntegerDigits(0);
+ }
+ }
+ return decimalFormat;
+ }
+}
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
new file mode 100644
index 0000000000..0c230ba058
--- /dev/null
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
@@ -0,0 +1,143 @@
+package org.apache.daffodil.lib.util;
+
+import com.ibm.icu.text.DecimalFormat;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import org.junit.Test;
+import static junit.framework.TestCase.assertEquals;
+import static org.junit.Assert.*;
+
+import java.util.Locale;
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Tests about ICU-22558
+ *
+ * https://unicode-org.atlassian.net/browse/ICU-22558
+ */
+public class TestFixedICUDecimalFormat {
+
+ // Create DecimalFormatSymbols for a Locale where the decimal separator is a dot
+ DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.US);
+
+ /**
+ * Illustrates the bug in ICU.
+ */
+ @Test
+ public void testDecimalFormatWithHash() {
+ // Create DecimalFormat instance with the pattern "#.##"
+ DecimalFormat decimalFormat = new DecimalFormat("#.##", symbols);
+
+ // Convert the number 0.12 to String
+ String formattedNumber = decimalFormat.format(0.12);
+
+ // This assert tests for the current behavior. (which is buggy)
+ assertEquals("0.12", formattedNumber); // Wrong answer: See ICU-22558
+ // Assert that the formatted string is ".12"
+ if (formattedNumber == ".12") {
+ // see https://unicode-org.atlassian.net/browse/ICU-22558
+ fail("Bug ICU-22558 has been fixed. This test set is no longer needed to illustrate the problem.");
+ }
+ }
+
+
+ Pattern pattern = FixedICUDecimalFormat.pattern;
+
+ /**
+ * Tests to be sure the regex that is used in the fix is robust.
+ */
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedDigits0() {
+ Matcher matcher = pattern.matcher("\\.");
+ assertFalse(matcher.find());
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedDigits1() {
+ Matcher matcher = pattern.matcher("#.##");
+ assertTrue(matcher.find());
+ assertEquals("#", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedDigits2() {
+ Matcher matcher = pattern.matcher("\\#.##");
+ assertTrue(matcher.find());
+ assertEquals("", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar3() {
+ Matcher matcher = pattern.matcher("'text'#.##");
+ assertTrue(matcher.find());
+ assertEquals("#", matcher.group(1));
+ assertEquals(".", matcher.group(2)); }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar4() {
+ Matcher matcher = pattern.matcher(".##");
+ assertTrue(matcher.find());
+ assertEquals("", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar5() {
+ Matcher matcher = pattern.matcher(".");
+ assertTrue(matcher.find());
+ assertEquals("", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar6() {
+ Matcher matcher = pattern.matcher("5.#");
+ assertTrue(matcher.find());
+ assertEquals("5", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar7() {
+ Matcher matcher = pattern.matcher("5#.#");
+ assertTrue(matcher.find());
+ assertEquals("5#", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+ @Test
+ public void testExtractDecimalPointAndPrecedingUnescapedChar8() {
+ Matcher matcher = pattern.matcher("$\\.$#7\\##9#5#.#");
+ assertTrue(matcher.find());
+ assertEquals("#9#5#", matcher.group(1));
+ assertEquals(".", matcher.group(2));
+ }
+
+
+ /**
+ * Show that the fix works.
+ */
+ @Test
+ public void testFixedDecimalFormatWithHash() {
+ DecimalFormat decimalFormat = FixedICUDecimalFormat.fromPattern("#.##", symbols);
+ String formattedNumber = decimalFormat.format(0.12);
+ assertEquals(".12", formattedNumber);
+ }
+
+ /**
+ * Show that the fix doesn't break patterns with leading digits.
+ */
+ @Test
+ public void testFixedDecimalFormatWithZero() {
+ DecimalFormat decimalFormat = FixedICUDecimalFormat.fromPattern("0.##", symbols);
+ String formattedNumber = decimalFormat.format(0.12);
+ assertEquals("0.12", formattedNumber);
+ }
+}
+
+
+
+
From f8d5fd81a7d669c94affa410f5af1dec2c02b1e9 Mon Sep 17 00:00:00 2001
From: Michael Beckerle
Date: Fri, 27 Oct 2023 17:25:22 -0400
Subject: [PATCH 2/2] Add APLv2 banners to satisfy rat check
DAFFODIL-2859
---
.../daffodil/lib/util/FixedICUDecimalFormat.java | 16 ++++++++++++++++
.../lib/util/TestFixedICUDecimalFormat.java | 16 ++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
index 50b75dabc7..b295ba0869 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.daffodil.lib.util;
import com.ibm.icu.text.DecimalFormat;
diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
index 0c230ba058..e1721b8efc 100644
--- a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
+++ b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.daffodil.lib.util;
import com.ibm.icu.text.DecimalFormat;