Use the negative pattern of textNumberPattern to resolve padding ambi…

…guities ICU only uses the positive pattern of the textNumberPattern property to determine pad character and position, completely ignoring the negative pattern. If the positive pattern has no affix associated with the pad character, then there is an ambiguity if the pad character should appear before or after the negative affix when unparsing negative numbers. In this case, ICU defaults to before the affix, with no way to change it via the pattern. This effectively means it is not possible for ICU number padding to appear after a negative affix if there is no positive affix. To resolve this ambiguity and allow configuring where pad characters appear, we inspect the negative pattern. If both negative and positive patterns define padding on the same affix, and the positive pattern has an empty string for that affix, then we use the pad position from the negative pattern. In all other cases, the pad character in the negative pattern is ignored following usual ICU behavior. For example, a textNumberPattern of "*0####0;-*00" formats a negative number with zero padding after the hyphen, whereas normal ICU behavior would ignore the negative pattern and zero pad before the hyphen. Deprecation/Compatibility: The pad character in the negative part of textNumberPattern is no longer ignored if the positive part of textNumberPattern defines a pad character without an associated affix (e.g. "*0###0;-*00"). In these cases, the position of the pad charcter in the negative part is used to define whether padding occurs before or after the negative affix. All other cases follow existing rules of textNumberPattern (i.e. the pad character in the negative part is ignored). DAFFODIL-2871
apache · Jan 16, 2024 · cdb4121 · cdb4121
1 parent e2a1835
commit cdb4121
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 8 deletions.
diff --git a/...ore/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala b/...ore/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
@@ -28,6 +28,7 @@ import org.apache.daffodil.lib.schema.annotation.props.gen.TextNumberRounding
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.processors.Delimiter
 import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
@@ -38,6 +39,10 @@ import org.apache.daffodil.runtime1.processors.unparsers.Unparser
 import org.apache.daffodil.unparsers.runtime1.ConvertTextCombinatorUnparser
 import org.apache.daffodil.unparsers.runtime1.ConvertTextNumberUnparser
 
+import com.ibm.icu.impl.number.AffixPatternProvider
+import com.ibm.icu.impl.number.Padder.PadPosition
+import com.ibm.icu.impl.number.PatternStringParser
+import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo
 import com.ibm.icu.text.DecimalFormat
 
 case class ConvertTextCombinator(e: ElementBase, value: Gram, converter: Gram)
@@ -399,14 +404,20 @@ trait ConvertTextNumberMixin {
     } else pattern
   }
 
-  final protected def checkPatternWithICU(e: ElementBase) = {
-    // Load the pattern to make sure it is valid
+  /**
+   * Validates the textNumberPattern using ICU's PatternStringParser. Although this class is
+   * public, it is not part of the ICU API, so it maybe not be stable. However, this is what
+   * DecimalFormat uses internally to parse patterns and extract information for initialization,
+   * and that likely won't change significantly. Plus, by using this class instead of parsing
+   * with DeciamlFormat we can return the ParsedPatternInfo to give callers raw access to what
+   * was in the pattern without having to parse it ourselves. This can be useful for additional
+   * validation or logic using parts of the pattern ICU might normally ignore.
+   */
+  final protected def checkPatternWithICU(e: ElementBase): ParsedPatternInfo = {
     try {
-      if (hasV || hasP) {
-        new DecimalFormat(runtimePattern)
-      } else {
-        new DecimalFormat(pattern)
-      }
+      val patternToCheck = if (hasV || hasP) runtimePattern else pattern
+      val parsedPatternInfo = PatternStringParser.parseToPatternInfo(patternToCheck)
+      parsedPatternInfo
     } catch {
       case ex: IllegalArgumentException =>
         if (hasV || hasP) {
@@ -551,7 +562,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
       pattern.startsWith(";"),
       "The positive part of the dfdl:textNumberPattern is required. The dfdl:textNumberPattern cannot begin with ';'.",
     )
-    checkPatternWithICU(e)
+    val parsedPatternInfo = checkPatternWithICU(e)
 
     val (roundingIncrement: MaybeDouble, roundingMode) =
       e.textNumberRounding match {
@@ -586,6 +597,29 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
         Nope
       }
 
+    // ICU does not have a way to set the pad position to after an affix if the positive pattern
+    // does not have that affix. For example, "* 0", will always have a pad position of
+    // BEFORE_PREFIX, with no way to set it to AFTER_PREFIX because the positive pattern has no
+    // prefix. In cases where formats do not have a postive affix but want to specify the pad
+    // position to AFTER, we allow them to do so in the negative pattern. For example, a pattern
+    // of "* 0;-* 0" will have a pad position of AFTER_PREFIX. ICU normally ignores the negative
+    // pattern for pad position. Note that we require the pad char to be defined on the same
+    // affix or else it is ignored.
+    val posPadLoc = parsedPatternInfo.positive.paddingLocation
+    val negPadLoc =
+      if (parsedPatternInfo.negative != null) parsedPatternInfo.negative.paddingLocation
+      else null
+    val posPrefix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX)
+    val posSuffix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX)
+    val icuPadPosition =
+      (posPadLoc, negPadLoc, posPrefix, posSuffix) match {
+        case (PadPosition.BEFORE_PREFIX, PadPosition.AFTER_PREFIX, "", _) =>
+          MaybeInt(DecimalFormat.PAD_AFTER_PREFIX)
+        case (PadPosition.BEFORE_SUFFIX, PadPosition.AFTER_SUFFIX, _, "") =>
+          MaybeInt(DecimalFormat.PAD_AFTER_SUFFIX)
+        case _ => MaybeInt.Nope
+      }
+
     val ev = new TextNumberFormatEv(
       e.tci,
       decSep,
@@ -599,6 +633,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
       roundingMode,
       roundingIncrement,
       zeroRepsRaw,
+      icuPadPosition,
       e.primType,
     )
     ev.compile(tunable)

diff --git a/...dil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala b/...dil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
@@ -27,6 +27,7 @@ import org.apache.daffodil.lib.util.DecimalUtils.OverpunchLocation
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
 import org.apache.daffodil.runtime1.processors.parsers.ConvertZonedCombinatorParser
@@ -179,6 +180,7 @@ case class ConvertZonedNumberPrim(e: ElementBase)
       roundingMode,
       roundingIncrement,
       Nil,
+      MaybeInt.Nope,
       e.primType,
     )
     ev.compile(tunable)

diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
@@ -32,6 +32,7 @@ import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeChar
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.dsom._
 
@@ -81,6 +82,7 @@ class TextNumberFormatEv(
   roundingMode: Maybe[TextNumberRoundingMode],
   roundingIncrement: MaybeDouble,
   zeroRepsRaw: List[String],
+  icuPadPosition: MaybeInt,
   primType: PrimType,
 ) extends Evaluatable[DecimalFormat](tci)
   with InfosetCachedEvaluatable[DecimalFormat] {
@@ -186,6 +188,10 @@ class TextNumberFormatEv(
       }
     }
 
+    if (icuPadPosition.isDefined) {
+      df.setPadPosition(icuPadPosition.get)
+    }
+
     df
   }
 

diff --git a/...t/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml b/...t/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -321,6 +321,20 @@
     <xs:element name="tnp103" type="xs:integer" dfdl:textNumberPattern="###0.0##"
       dfdl:textNumberCheckPolicy="strict" />
 
+    <!--
+      pad position is after prefix because of ambiguity in the postive pattern
+      resolved by the negative pattern. Otherwise ICU defaults to before prefix
+    -->
+    <xs:element name="tnp104" type="xs:integer" dfdl:textNumberPattern="*_####0;(*_0)"
+      dfdl:textNumberCheckPolicy="strict" />
+
+    <!--
+      pad position is after suffix because of ambiguity in the postive pattern
+      resolved by the negative pattern. Otherwise ICU defaults to before suffix
+    -->
+    <xs:element name="tnp105" type="xs:integer" dfdl:textNumberPattern="####0*_;(0)*_"
+      dfdl:textNumberCheckPolicy="strict" />
+
   </tdml:defineSchema>
 
   <tdml:defineSchema name="textNumberPattern2"  elementFormDefault="qualified">
@@ -4504,4 +4518,26 @@
     </tdml:errors>
   </tdml:parserTestCase>
 
+  <tdml:parserTestCase name="textNumberPaddingAmbiguity01" root="tnp104" model="textNumberPattern">
+    <tdml:document>
+      <tdml:documentPart type="text">(__1)</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <tnp104>-1</tnp104>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="textNumberPaddingAmbiguity02" root="tnp105" model="textNumberPattern">
+    <tdml:document>
+      <tdml:documentPart type="text">(1)__</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <tnp105>-1</tnp105>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
 </tdml:testSuite>
diff --git a/.../src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala b/.../src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
@@ -523,4 +523,11 @@ class TestTextNumberProps {
   @Test def test_textNumberIntegerWithDecimal04(): Unit = {
     runner.runOneTest("textNumberIntegerWithDecimal04")
   }
+
+  @Test def test_textNumberPaddingAmbiguity01(): Unit = {
+    runner.runOneTest("textNumberPaddingAmbiguity01")
+  }
+  @Test def test_textNumberPaddingAmbiguity02(): Unit = {
+    runner.runOneTest("textNumberPaddingAmbiguity02")
+  }
 }