Fix zoned EBCDIC overpunched signs

Canonical form for a positive overpunched sign of +5 in ebcdic is C5, not F5. So positive overpunched sign numbers that don't have the C0 to C9 will be parsed fine, but unparsed as C0 to C9. Implemented alternate negative overpunched sign chars B0 to B9 also. These are also canonicalized by unparsing to D0 to D9. Added tests at unit and TDML level Compatibility: There is no compatibiliy issue for parse-only applications. This changes the behavior of unparsing for overpunched signs for EBCDIC characters. The prior behavior was incorrect, to the point of being relatively unusable for unparsing, so there is no feature in this PR to provide backward compatibility with the prior behavior. DAFFODIL-2873, DAFFODIL-2874
apache · Jan 21, 2024 · 3ce37b4 · 3ce37b4
1 parent fd70059
commit 3ce37b4
Show file tree

Hide file tree

Showing 9 changed files with 1,710 additions and 289 deletions.
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/ElementBaseGrammarMixin.scala b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/ElementBaseGrammarMixin.scala
@@ -27,13 +27,13 @@ import org.apache.daffodil.core.grammar.primitives.ConvertTextBooleanPrim
 import org.apache.daffodil.core.grammar.primitives.LeadingSkipRegion
 import org.apache.daffodil.core.grammar.primitives.LiteralValueNilOfSpecifiedLength
 import org.apache.daffodil.core.grammar.primitives.SimpleNilOrValue
-import org.apache.daffodil.core.grammar.primitives._ // there are too many to show individually
+import org.apache.daffodil.core.grammar.primitives._
 import org.apache.daffodil.core.runtime1.ElementBaseRuntime1Mixin
 import org.apache.daffodil.lib.api.WarnID
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.schema.annotation.props.Found
 import org.apache.daffodil.lib.schema.annotation.props.NotFound
-import org.apache.daffodil.lib.schema.annotation.props.gen._ // there are too many to show individually
+import org.apache.daffodil.lib.schema.annotation.props.gen._
 import org.apache.daffodil.lib.util.PackedSignCodes
 import org.apache.daffodil.lib.xml.GlobalQName
 import org.apache.daffodil.lib.xml.XMLUtils
@@ -700,6 +700,22 @@ trait ElementBaseGrammarMixin
       ConvertZonedCombinator(this, stringValue, textZonedConverter)
     }
 
+  /**
+   * True if the encoding is known to be an EBCDIC one, as in the encoding is not  a runtime expression
+   * and it's some ebcdic flavor. If it's any ascii flavor or a runtime expression this is false.
+   */
+  lazy val isKnownEBCDICEncoding: Boolean =
+    charsetEv.optConstant.map { _.isEbcdicFamily() }.getOrElse(false)
+
+  /**
+   * Avoids requesting the textZonedSignStyle property if we know the encoding
+   * is an EBCDIC flavor.
+   */
+  lazy val optTextZonedSignStyle = {
+    if (isKnownEBCDICEncoding) None
+    else Some(textZonedSignStyle)
+  }
+
   private lazy val textConverter = {
     primType match {
       case _: NodeInfo.Numeric.Kind => ConvertTextStandardNumberPrim(this)

diff --git a/...dil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala b/...dil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
@@ -201,15 +201,15 @@ case class ConvertZonedNumberPrim(e: ElementBase)
     new ConvertZonedNumberParser(
       opl,
       textNumberFormatEv,
-      e.textZonedSignStyle,
+      e.optTextZonedSignStyle,
       e.elementRuntimeData,
       textDecimalVirtualPoint,
     )
 
   override lazy val unparser: Unparser =
     new ConvertZonedNumberUnparser(
       opl,
-      e.textZonedSignStyle,
+      e.optTextZonedSignStyle,
       e.elementRuntimeData,
       textDecimalVirtualPoint,
     )

diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala b/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
@@ -22,6 +22,7 @@ import java.nio.charset.CoderResult
 import java.nio.charset.CodingErrorAction
 import java.nio.charset.{ Charset => JavaCharset }
 import java.nio.charset.{ CharsetEncoder => JavaCharsetEncoder }
+import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
 
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.schema.annotation.props.gen.BitOrder
@@ -59,6 +60,15 @@ trait BitsCharset extends Serializable {
   def newDecoder(): BitsCharsetDecoder
   def newEncoder(): BitsCharsetEncoder
 
+  /**
+   * Used to determine if zoned numbers use ascii or ebcdic conventions
+   * for overpunched signs. This determines whether the textZonedSignStyle property is needed or not.
+   *
+   * Override in any EBCDIC family charset definition.
+   * @return true if the charset is an EBCDIC family charset.
+   */
+  def isEbcdicFamily(): Boolean = false
+
   def maybeFixedWidth: MaybeInt
 
   final def padCharWidthInBits = {
@@ -108,6 +118,16 @@ trait BitsCharsetJava extends BitsCharset {
     else MaybeInt.Nope
   }
 
+  private lazy val hasNameOrAliasContainingEBCDIC = {
+    val allCharsetNames = (javaCharset.aliases().toSeq :+ name :+ javaCharset.name()).map {
+      _.toUpperCase
+    }
+    val res = allCharsetNames.exists(_.contains("EBCDIC"))
+    res
+  }
+
+  override def isEbcdicFamily(): Boolean = hasNameOrAliasContainingEBCDIC
+
   override def newEncoder() =
     new BitsCharsetWrappingJavaCharsetEncoder(this, javaCharset.newEncoder())
 

diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/DecimalUtils.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/DecimalUtils.scala
@@ -386,6 +386,45 @@ object DecimalUtils {
     outArray
   }
 
+  /**
+   * These are the java characters corresponding to EBCDIC B0 to B9.
+   * These are commonly "^£¥·©§¶¼½¾" but we compute them by decoding to
+   * avoid literal charset issues.
+   *
+   * Note that in Java chars (which are unicode), these do not have adjacent code points.
+   * They are (hex) 5e, a3, a5, b7, a9, a7, b6, bc, bd, be.
+   */
+  private val B0_to_B9_chars = "^£¥·©§¶¼½¾"
+
+  /**
+   * Despite the name, this does not actually convert the digit from EBCDIC encoding.
+   * The characters have already been decoded from whatever charset into Unicode code points.
+   * What this does is deal with overpunched positive and negative sign character mappings.
+   * In this case positive are 0-9 and "{ABCDEFGHI", negative are "}JKLMNOPQR" and
+   * what we call the B0_to_B9 characters, which are "^£¥·©§¶¼½¾".
+   * @param digit
+   * @return A pair of the integer the digit represents, and a boolean indicating if negative.
+   */
+  def convertFromZonedEBCDIC(digit: Char): (Int, Boolean) = {
+    if ((digit >= '0') && (digit <= '9')) // positive 0-9
+      (digit - 48, false)
+    else if (digit == '{') // positive 0 aka hex C0 ebcdic
+      (0, false)
+    else if ((digit >= 'A') && (digit <= 'I')) // positive 1-9 as C1 to C9 i.e, "ABCDEFGHI"
+      (digit - 'A' + 1, false)
+    else if (digit == '}') // negative 0 aka hex D0 ebcdic
+      (0, true)
+    else if ((digit >= 'J') && (digit <= 'R')) // negative 1-9 as hex D1 to D9.
+      (digit - 'J' + 1, true)
+    else {
+      val index = B0_to_B9_chars.indexOf(digit)
+      if (index >= 0)
+        (index, true)
+      else
+        throw new NumberFormatException("Invalid zoned digit: " + digit)
+    }
+  }
+
   def convertFromAsciiStandard(digit: Char): (Int, Boolean) = {
     if ((digit >= '0') && (digit <= '9')) // positive 0-9
       (digit - 48, false)
@@ -417,6 +456,25 @@ object DecimalUtils {
       throw new NumberFormatException("Invalid zoned digit: " + digit)
   }
 
+  /**
+   * Does not encode to the EBCDIC charset, but converts overpunched sign
+   * digits to their corresponding Zoned overpunched representation characters.
+   * Positive 0 to 9 become "{ABCDEFGHI" respectively. Negative 0 to 9
+   * become "}JKLMNOPQR" respectively.
+   * @param digit - the digit, as a char '0' to '9'
+   * @param positive - true if positive, false if negative
+   * @return the character needed to represent this character as an overpunched sign digit.
+   */
+  def convertToZonedEBCDIC(digit: Char, positive: Boolean): Char = {
+    val pos: Int = digit - '0'
+    if (pos > 9 || pos < 0)
+      throw new NumberFormatException("Invalid zoned digit: " + digit)
+    if (positive)
+      "{ABCDEFGHI".charAt(pos)
+    else
+      "}JKLMNOPQR".charAt(pos)
+  }
+
   def convertToAsciiTranslatedEBCDIC(digit: Char, positive: Boolean): Char = {
     if (positive) {
       if (digit == '0')
@@ -470,7 +528,7 @@ object DecimalUtils {
 
   def zonedToNumber(
     num: String,
-    zonedStyle: TextZonedSignStyle,
+    optZonedStyle: Option[TextZonedSignStyle],
     opl: OverpunchLocation.Value,
   ): String = {
     val opindex = opl match {
@@ -483,24 +541,27 @@ object DecimalUtils {
       if (opl == OverpunchLocation.None) {
         num
       } else {
-        val (digit, opneg) = zonedStyle match {
-          case TextZonedSignStyle.AsciiStandard => convertFromAsciiStandard(num(opindex))
-          case TextZonedSignStyle.AsciiTranslatedEBCDIC =>
+        val (digit, opneg) = optZonedStyle match {
+          case None => convertFromZonedEBCDIC(num(opindex))
+          case Some(TextZonedSignStyle.AsciiStandard) => convertFromAsciiStandard(num(opindex))
+          case Some(TextZonedSignStyle.AsciiTranslatedEBCDIC) =>
             convertFromAsciiTranslatedEBCDIC(num(opindex))
-          case TextZonedSignStyle.AsciiCARealiaModified =>
+          case Some(TextZonedSignStyle.AsciiCARealiaModified) =>
             convertFromAsciiCARealiaModified(num(opindex))
-          case TextZonedSignStyle.AsciiTandemModified =>
+          case Some(TextZonedSignStyle.AsciiTandemModified) =>
             convertFromAsciiTandemModified(num(opindex))
         }
 
-        val convertedNum = (opneg, opl) match {
-          case (true, OverpunchLocation.Start) => "-" + digit + num.substring(1)
-          case (false, OverpunchLocation.Start) => digit + num.substring(1)
-          case (true, OverpunchLocation.End) => "-" + num.substring(0, opindex) + digit
-          case (false, OverpunchLocation.End) => num.substring(0, opindex) + digit
+        val allDigits = opl match {
+          case OverpunchLocation.Start => digit + num.substring(1)
+          case OverpunchLocation.End => num.substring(0, opindex) + digit
           case _ => Assert.impossible()
         }
+        val convertedNum = if (opneg) "-" + allDigits else allDigits
 
+        // It is still possible for this to be an illegal/malformed number like "-1K3"
+        // because nothing has yet checked the interior chars are all digits.
+        // But this will be caught later when the string is converted to a number.
         convertedNum
       }
     }
@@ -510,7 +571,7 @@ object DecimalUtils {
 
   def zonedFromNumber(
     num: String,
-    zonedStyle: TextZonedSignStyle,
+    optZonedStyle: Option[TextZonedSignStyle],
     opl: OverpunchLocation.Value,
   ): String = {
     val positive = (num.charAt(0) != '-')
@@ -529,14 +590,15 @@ object DecimalUtils {
         if (!positive) Assert.impossible()
         inStr
       } else {
-        val digit = zonedStyle match {
-          case TextZonedSignStyle.AsciiStandard =>
+        val digit = optZonedStyle match {
+          case None => convertToZonedEBCDIC(inStr(opindex), positive)
+          case Some(TextZonedSignStyle.AsciiStandard) =>
             convertToAsciiStandard(inStr(opindex), positive)
-          case TextZonedSignStyle.AsciiTranslatedEBCDIC =>
+          case Some(TextZonedSignStyle.AsciiTranslatedEBCDIC) =>
             convertToAsciiTranslatedEBCDIC(inStr(opindex), positive)
-          case TextZonedSignStyle.AsciiCARealiaModified =>
+          case Some(TextZonedSignStyle.AsciiCARealiaModified) =>
             convertToAsciiCARealiaModified(inStr(opindex), positive)
-          case TextZonedSignStyle.AsciiTandemModified =>
+          case Some(TextZonedSignStyle.AsciiTandemModified) =>
             convertToAsciiTandemModified(inStr(opindex), positive)
         }