From cdb41212d755c45f23f981c395dd24ab56303b45 Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Thu, 11 Jan 2024 12:58:10 -0500 Subject: [PATCH] Use the negative pattern of textNumberPattern to resolve padding ambiguities ICU only uses the positive pattern of the textNumberPattern property to determine pad character and position, completely ignoring the negative pattern. If the positive pattern has no affix associated with the pad character, then there is an ambiguity if the pad character should appear before or after the negative affix when unparsing negative numbers. In this case, ICU defaults to before the affix, with no way to change it via the pattern. This effectively means it is not possible for ICU number padding to appear after a negative affix if there is no positive affix. To resolve this ambiguity and allow configuring where pad characters appear, we inspect the negative pattern. If both negative and positive patterns define padding on the same affix, and the positive pattern has an empty string for that affix, then we use the pad position from the negative pattern. In all other cases, the pad character in the negative pattern is ignored following usual ICU behavior. For example, a textNumberPattern of "*0####0;-*00" formats a negative number with zero padding after the hyphen, whereas normal ICU behavior would ignore the negative pattern and zero pad before the hyphen. Deprecation/Compatibility: The pad character in the negative part of textNumberPattern is no longer ignored if the positive part of textNumberPattern defines a pad character without an associated affix (e.g. "*0###0;-*00"). In these cases, the position of the pad charcter in the negative part is used to define whether padding occurs before or after the negative affix. All other cases follow existing rules of textNumberPattern (i.e. the pad character in the negative part is ignored). DAFFODIL-2871 --- .../primitives/PrimitivesTextNumber.scala | 51 ++++++++++++++++--- .../grammar/primitives/PrimitivesZoned.scala | 2 + .../runtime1/processors/EvTextNumber.scala | 6 +++ .../text_number_props/TextNumberProps.tdml | 36 +++++++++++++ .../TestTextNumberProps.scala | 7 +++ 5 files changed, 94 insertions(+), 8 deletions(-) diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala index 648d656b8d..95da5f6776 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala @@ -28,6 +28,7 @@ import org.apache.daffodil.lib.schema.annotation.props.gen.TextNumberRounding import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Maybe._ import org.apache.daffodil.lib.util.MaybeDouble +import org.apache.daffodil.lib.util.MaybeInt import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType import org.apache.daffodil.runtime1.processors.Delimiter import org.apache.daffodil.runtime1.processors.TextNumberFormatEv @@ -38,6 +39,10 @@ import org.apache.daffodil.runtime1.processors.unparsers.Unparser import org.apache.daffodil.unparsers.runtime1.ConvertTextCombinatorUnparser import org.apache.daffodil.unparsers.runtime1.ConvertTextNumberUnparser +import com.ibm.icu.impl.number.AffixPatternProvider +import com.ibm.icu.impl.number.Padder.PadPosition +import com.ibm.icu.impl.number.PatternStringParser +import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo import com.ibm.icu.text.DecimalFormat case class ConvertTextCombinator(e: ElementBase, value: Gram, converter: Gram) @@ -399,14 +404,20 @@ trait ConvertTextNumberMixin { } else pattern } - final protected def checkPatternWithICU(e: ElementBase) = { - // Load the pattern to make sure it is valid + /** + * Validates the textNumberPattern using ICU's PatternStringParser. Although this class is + * public, it is not part of the ICU API, so it maybe not be stable. However, this is what + * DecimalFormat uses internally to parse patterns and extract information for initialization, + * and that likely won't change significantly. Plus, by using this class instead of parsing + * with DeciamlFormat we can return the ParsedPatternInfo to give callers raw access to what + * was in the pattern without having to parse it ourselves. This can be useful for additional + * validation or logic using parts of the pattern ICU might normally ignore. + */ + final protected def checkPatternWithICU(e: ElementBase): ParsedPatternInfo = { try { - if (hasV || hasP) { - new DecimalFormat(runtimePattern) - } else { - new DecimalFormat(pattern) - } + val patternToCheck = if (hasV || hasP) runtimePattern else pattern + val parsedPatternInfo = PatternStringParser.parseToPatternInfo(patternToCheck) + parsedPatternInfo } catch { case ex: IllegalArgumentException => if (hasV || hasP) { @@ -551,7 +562,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase) pattern.startsWith(";"), "The positive part of the dfdl:textNumberPattern is required. The dfdl:textNumberPattern cannot begin with ';'.", ) - checkPatternWithICU(e) + val parsedPatternInfo = checkPatternWithICU(e) val (roundingIncrement: MaybeDouble, roundingMode) = e.textNumberRounding match { @@ -586,6 +597,29 @@ case class ConvertTextStandardNumberPrim(e: ElementBase) Nope } + // ICU does not have a way to set the pad position to after an affix if the positive pattern + // does not have that affix. For example, "* 0", will always have a pad position of + // BEFORE_PREFIX, with no way to set it to AFTER_PREFIX because the positive pattern has no + // prefix. In cases where formats do not have a postive affix but want to specify the pad + // position to AFTER, we allow them to do so in the negative pattern. For example, a pattern + // of "* 0;-* 0" will have a pad position of AFTER_PREFIX. ICU normally ignores the negative + // pattern for pad position. Note that we require the pad char to be defined on the same + // affix or else it is ignored. + val posPadLoc = parsedPatternInfo.positive.paddingLocation + val negPadLoc = + if (parsedPatternInfo.negative != null) parsedPatternInfo.negative.paddingLocation + else null + val posPrefix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX) + val posSuffix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX) + val icuPadPosition = + (posPadLoc, negPadLoc, posPrefix, posSuffix) match { + case (PadPosition.BEFORE_PREFIX, PadPosition.AFTER_PREFIX, "", _) => + MaybeInt(DecimalFormat.PAD_AFTER_PREFIX) + case (PadPosition.BEFORE_SUFFIX, PadPosition.AFTER_SUFFIX, _, "") => + MaybeInt(DecimalFormat.PAD_AFTER_SUFFIX) + case _ => MaybeInt.Nope + } + val ev = new TextNumberFormatEv( e.tci, decSep, @@ -599,6 +633,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase) roundingMode, roundingIncrement, zeroRepsRaw, + icuPadPosition, e.primType, ) ev.compile(tunable) diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala index d47694f085..060613858f 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala @@ -27,6 +27,7 @@ import org.apache.daffodil.lib.util.DecimalUtils.OverpunchLocation import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Maybe._ import org.apache.daffodil.lib.util.MaybeDouble +import org.apache.daffodil.lib.util.MaybeInt import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType import org.apache.daffodil.runtime1.processors.TextNumberFormatEv import org.apache.daffodil.runtime1.processors.parsers.ConvertZonedCombinatorParser @@ -179,6 +180,7 @@ case class ConvertZonedNumberPrim(e: ElementBase) roundingMode, roundingIncrement, Nil, + MaybeInt.Nope, e.primType, ) ev.compile(tunable) diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala index d4de6012f3..05bba8dfa3 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala @@ -32,6 +32,7 @@ import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Maybe._ import org.apache.daffodil.lib.util.MaybeChar import org.apache.daffodil.lib.util.MaybeDouble +import org.apache.daffodil.lib.util.MaybeInt import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType import org.apache.daffodil.runtime1.dsom._ @@ -81,6 +82,7 @@ class TextNumberFormatEv( roundingMode: Maybe[TextNumberRoundingMode], roundingIncrement: MaybeDouble, zeroRepsRaw: List[String], + icuPadPosition: MaybeInt, primType: PrimType, ) extends Evaluatable[DecimalFormat](tci) with InfosetCachedEvaluatable[DecimalFormat] { @@ -186,6 +188,10 @@ class TextNumberFormatEv( } } + if (icuPadPosition.isDefined) { + df.setPadPosition(icuPadPosition.get) + } + df } diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml index cac85281f0..6b28b9e54b 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml @@ -321,6 +321,20 @@ + + + + + + @@ -4504,4 +4518,26 @@ + + + (__1) + + + + -1 + + + + + + + (1)__ + + + + -1 + + + + diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala index 787c6819e5..4a4a6a5ec4 100644 --- a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala +++ b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala @@ -523,4 +523,11 @@ class TestTextNumberProps { @Test def test_textNumberIntegerWithDecimal04(): Unit = { runner.runOneTest("textNumberIntegerWithDecimal04") } + + @Test def test_textNumberPaddingAmbiguity01(): Unit = { + runner.runOneTest("textNumberPaddingAmbiguity01") + } + @Test def test_textNumberPaddingAmbiguity02(): Unit = { + runner.runOneTest("textNumberPaddingAmbiguity02") + } }