Skip to content

Commit

Permalink
Fix zoned EBCDIC overpunched signs
Browse files Browse the repository at this point in the history
Canonical form for a positive overpunched sign of +5 in ebcdic is C5,
not F5.

So positive overpunched sign numbers that don't have the C0 to C9 will
be parsed fine, but unparsed as C0 to C9.

Implemented alternate negative overpunched sign chars B0 to B9 also.
These are also canonicalized by unparsing to D0 to D9.

Added tests at unit and TDML level

Compatibility:

There is no compatibiliy issue for parse-only applications.

This changes the behavior of unparsing for overpunched signs for EBCDIC
characters. The prior behavior was incorrect, to the point of being
relatively unusable for unparsing, so there is no feature in this
PR to provide backward compatibility with the prior behavior.

DAFFODIL-2873, DAFFODIL-2874
  • Loading branch information
mbeckerle committed Jan 21, 2024
1 parent fd70059 commit 3ce37b4
Show file tree
Hide file tree
Showing 9 changed files with 1,710 additions and 289 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ import org.apache.daffodil.core.grammar.primitives.ConvertTextBooleanPrim
import org.apache.daffodil.core.grammar.primitives.LeadingSkipRegion
import org.apache.daffodil.core.grammar.primitives.LiteralValueNilOfSpecifiedLength
import org.apache.daffodil.core.grammar.primitives.SimpleNilOrValue
import org.apache.daffodil.core.grammar.primitives._ // there are too many to show individually
import org.apache.daffodil.core.grammar.primitives._
import org.apache.daffodil.core.runtime1.ElementBaseRuntime1Mixin
import org.apache.daffodil.lib.api.WarnID
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.Found
import org.apache.daffodil.lib.schema.annotation.props.NotFound
import org.apache.daffodil.lib.schema.annotation.props.gen._ // there are too many to show individually
import org.apache.daffodil.lib.schema.annotation.props.gen._
import org.apache.daffodil.lib.util.PackedSignCodes
import org.apache.daffodil.lib.xml.GlobalQName
import org.apache.daffodil.lib.xml.XMLUtils
Expand Down Expand Up @@ -700,6 +700,22 @@ trait ElementBaseGrammarMixin
ConvertZonedCombinator(this, stringValue, textZonedConverter)
}

/**
* True if the encoding is known to be an EBCDIC one, as in the encoding is not a runtime expression
* and it's some ebcdic flavor. If it's any ascii flavor or a runtime expression this is false.
*/
lazy val isKnownEBCDICEncoding: Boolean =
charsetEv.optConstant.map { _.isEbcdicFamily() }.getOrElse(false)

/**
* Avoids requesting the textZonedSignStyle property if we know the encoding
* is an EBCDIC flavor.
*/
lazy val optTextZonedSignStyle = {
if (isKnownEBCDICEncoding) None
else Some(textZonedSignStyle)
}

private lazy val textConverter = {
primType match {
case _: NodeInfo.Numeric.Kind => ConvertTextStandardNumberPrim(this)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,15 @@ case class ConvertZonedNumberPrim(e: ElementBase)
new ConvertZonedNumberParser(
opl,
textNumberFormatEv,
e.textZonedSignStyle,
e.optTextZonedSignStyle,
e.elementRuntimeData,
textDecimalVirtualPoint,
)

override lazy val unparser: Unparser =
new ConvertZonedNumberUnparser(
opl,
e.textZonedSignStyle,
e.optTextZonedSignStyle,
e.elementRuntimeData,
textDecimalVirtualPoint,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.nio.charset.CoderResult
import java.nio.charset.CodingErrorAction
import java.nio.charset.{ Charset => JavaCharset }
import java.nio.charset.{ CharsetEncoder => JavaCharsetEncoder }
import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`

import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.gen.BitOrder
Expand Down Expand Up @@ -59,6 +60,15 @@ trait BitsCharset extends Serializable {
def newDecoder(): BitsCharsetDecoder
def newEncoder(): BitsCharsetEncoder

/**
* Used to determine if zoned numbers use ascii or ebcdic conventions
* for overpunched signs. This determines whether the textZonedSignStyle property is needed or not.
*
* Override in any EBCDIC family charset definition.
* @return true if the charset is an EBCDIC family charset.
*/
def isEbcdicFamily(): Boolean = false

def maybeFixedWidth: MaybeInt

final def padCharWidthInBits = {
Expand Down Expand Up @@ -108,6 +118,16 @@ trait BitsCharsetJava extends BitsCharset {
else MaybeInt.Nope
}

private lazy val hasNameOrAliasContainingEBCDIC = {
val allCharsetNames = (javaCharset.aliases().toSeq :+ name :+ javaCharset.name()).map {
_.toUpperCase
}
val res = allCharsetNames.exists(_.contains("EBCDIC"))
res
}

override def isEbcdicFamily(): Boolean = hasNameOrAliasContainingEBCDIC

override def newEncoder() =
new BitsCharsetWrappingJavaCharsetEncoder(this, javaCharset.newEncoder())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,45 @@ object DecimalUtils {
outArray
}

/**
* These are the java characters corresponding to EBCDIC B0 to B9.
* These are commonly "^£¥·©§¶¼½¾" but we compute them by decoding to
* avoid literal charset issues.
*
* Note that in Java chars (which are unicode), these do not have adjacent code points.
* They are (hex) 5e, a3, a5, b7, a9, a7, b6, bc, bd, be.
*/
private val B0_to_B9_chars = "^£¥·©§¶¼½¾"

/**
* Despite the name, this does not actually convert the digit from EBCDIC encoding.
* The characters have already been decoded from whatever charset into Unicode code points.
* What this does is deal with overpunched positive and negative sign character mappings.
* In this case positive are 0-9 and "{ABCDEFGHI", negative are "}JKLMNOPQR" and
* what we call the B0_to_B9 characters, which are "^£¥·©§¶¼½¾".
* @param digit
* @return A pair of the integer the digit represents, and a boolean indicating if negative.
*/
def convertFromZonedEBCDIC(digit: Char): (Int, Boolean) = {
if ((digit >= '0') && (digit <= '9')) // positive 0-9
(digit - 48, false)
else if (digit == '{') // positive 0 aka hex C0 ebcdic
(0, false)
else if ((digit >= 'A') && (digit <= 'I')) // positive 1-9 as C1 to C9 i.e, "ABCDEFGHI"
(digit - 'A' + 1, false)
else if (digit == '}') // negative 0 aka hex D0 ebcdic
(0, true)
else if ((digit >= 'J') && (digit <= 'R')) // negative 1-9 as hex D1 to D9.
(digit - 'J' + 1, true)
else {
val index = B0_to_B9_chars.indexOf(digit)
if (index >= 0)
(index, true)
else
throw new NumberFormatException("Invalid zoned digit: " + digit)
}
}

def convertFromAsciiStandard(digit: Char): (Int, Boolean) = {
if ((digit >= '0') && (digit <= '9')) // positive 0-9
(digit - 48, false)
Expand Down Expand Up @@ -417,6 +456,25 @@ object DecimalUtils {
throw new NumberFormatException("Invalid zoned digit: " + digit)
}

/**
* Does not encode to the EBCDIC charset, but converts overpunched sign
* digits to their corresponding Zoned overpunched representation characters.
* Positive 0 to 9 become "{ABCDEFGHI" respectively. Negative 0 to 9
* become "}JKLMNOPQR" respectively.
* @param digit - the digit, as a char '0' to '9'
* @param positive - true if positive, false if negative
* @return the character needed to represent this character as an overpunched sign digit.
*/
def convertToZonedEBCDIC(digit: Char, positive: Boolean): Char = {
val pos: Int = digit - '0'
if (pos > 9 || pos < 0)
throw new NumberFormatException("Invalid zoned digit: " + digit)
if (positive)
"{ABCDEFGHI".charAt(pos)
else
"}JKLMNOPQR".charAt(pos)
}

def convertToAsciiTranslatedEBCDIC(digit: Char, positive: Boolean): Char = {
if (positive) {
if (digit == '0')
Expand Down Expand Up @@ -470,7 +528,7 @@ object DecimalUtils {

def zonedToNumber(
num: String,
zonedStyle: TextZonedSignStyle,
optZonedStyle: Option[TextZonedSignStyle],
opl: OverpunchLocation.Value,
): String = {
val opindex = opl match {
Expand All @@ -483,24 +541,27 @@ object DecimalUtils {
if (opl == OverpunchLocation.None) {
num
} else {
val (digit, opneg) = zonedStyle match {
case TextZonedSignStyle.AsciiStandard => convertFromAsciiStandard(num(opindex))
case TextZonedSignStyle.AsciiTranslatedEBCDIC =>
val (digit, opneg) = optZonedStyle match {
case None => convertFromZonedEBCDIC(num(opindex))
case Some(TextZonedSignStyle.AsciiStandard) => convertFromAsciiStandard(num(opindex))
case Some(TextZonedSignStyle.AsciiTranslatedEBCDIC) =>
convertFromAsciiTranslatedEBCDIC(num(opindex))
case TextZonedSignStyle.AsciiCARealiaModified =>
case Some(TextZonedSignStyle.AsciiCARealiaModified) =>
convertFromAsciiCARealiaModified(num(opindex))
case TextZonedSignStyle.AsciiTandemModified =>
case Some(TextZonedSignStyle.AsciiTandemModified) =>
convertFromAsciiTandemModified(num(opindex))
}

val convertedNum = (opneg, opl) match {
case (true, OverpunchLocation.Start) => "-" + digit + num.substring(1)
case (false, OverpunchLocation.Start) => digit + num.substring(1)
case (true, OverpunchLocation.End) => "-" + num.substring(0, opindex) + digit
case (false, OverpunchLocation.End) => num.substring(0, opindex) + digit
val allDigits = opl match {
case OverpunchLocation.Start => digit + num.substring(1)
case OverpunchLocation.End => num.substring(0, opindex) + digit
case _ => Assert.impossible()
}
val convertedNum = if (opneg) "-" + allDigits else allDigits

// It is still possible for this to be an illegal/malformed number like "-1K3"
// because nothing has yet checked the interior chars are all digits.
// But this will be caught later when the string is converted to a number.
convertedNum
}
}
Expand All @@ -510,7 +571,7 @@ object DecimalUtils {

def zonedFromNumber(
num: String,
zonedStyle: TextZonedSignStyle,
optZonedStyle: Option[TextZonedSignStyle],
opl: OverpunchLocation.Value,
): String = {
val positive = (num.charAt(0) != '-')
Expand All @@ -529,14 +590,15 @@ object DecimalUtils {
if (!positive) Assert.impossible()
inStr
} else {
val digit = zonedStyle match {
case TextZonedSignStyle.AsciiStandard =>
val digit = optZonedStyle match {
case None => convertToZonedEBCDIC(inStr(opindex), positive)
case Some(TextZonedSignStyle.AsciiStandard) =>
convertToAsciiStandard(inStr(opindex), positive)
case TextZonedSignStyle.AsciiTranslatedEBCDIC =>
case Some(TextZonedSignStyle.AsciiTranslatedEBCDIC) =>
convertToAsciiTranslatedEBCDIC(inStr(opindex), positive)
case TextZonedSignStyle.AsciiCARealiaModified =>
case Some(TextZonedSignStyle.AsciiCARealiaModified) =>
convertToAsciiCARealiaModified(inStr(opindex), positive)
case TextZonedSignStyle.AsciiTandemModified =>
case Some(TextZonedSignStyle.AsciiTandemModified) =>
convertToAsciiTandemModified(inStr(opindex), positive)
}

Expand Down
Loading

0 comments on commit 3ce37b4

Please sign in to comment.