za.co.absa.cobrix.cobol.parser.common.BinaryUtils.scala Maven / Gradle / Ivy
/*
* Copyright 2018 Barclays Africa Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package za.co.absa.cobrix.cobol.parser.common
import java.nio.ByteBuffer
import com.typesafe.scalalogging.LazyLogging
import scodec._
import scodec.bits.BitVector
import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC, Encoding}
import za.co.absa.cobrix.cobol.parser.position
import scala.collection.mutable.ListBuffer
import scala.util.control.NonFatal
/** Utilites for decoding Cobol binary data files **/
//noinspection RedundantBlock
object BinaryUtils extends LazyLogging {
// Binary number format codecs (big endian and little endian)
lazy val int8B: Codec[Int] = scodec.codecs.int8
lazy val int8L: Codec[Int] = scodec.codecs.int8L
lazy val int16B: Codec[Int] = scodec.codecs.int16
lazy val int16L: Codec[Int] = scodec.codecs.int16L
lazy val int32B: Codec[Int] = scodec.codecs.int32
lazy val int32L: Codec[Int] = scodec.codecs.int32L
lazy val int64B: Codec[Long] = scodec.codecs.int64
lazy val int64L: Codec[Long] = scodec.codecs.int64L
lazy val uint8B: Codec[Int] = scodec.codecs.uint8
lazy val uint8L: Codec[Int] = scodec.codecs.uint8L
lazy val uint16B: Codec[Int] = scodec.codecs.uint16
lazy val uint16L: Codec[Int] = scodec.codecs.uint16L
lazy val uint32B: Codec[Long] = scodec.codecs.uint32
lazy val uint32L: Codec[Long] = scodec.codecs.uint32L
lazy val floatB: Codec[Float] = scodec.codecs.float
lazy val floatL: Codec[Float] = scodec.codecs.floatL
lazy val doubleB: Codec[Double] = scodec.codecs.double
lazy val doubleL: Codec[Double] = scodec.codecs.doubleL
/** This is the EBCDIC to ASCII conversion table **/
lazy val ebcdic2ascii: Array[Char] = {
val clf = '\r'
val ccr = '\n'
val spc = ' '
val qts = '\''
val qtd = '\"'
val bsh = '\\'
Array[Char](
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, ccr, spc, spc, // 0 - 15
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 16 - 31
spc, spc, spc, spc, spc, clf, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 32 - 47
spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 48 - 63
' ', ' ', spc, spc, spc, spc, spc, spc, spc, spc, '¢', '.', '<', '(', '+', '|', // 64 - 79
'&', spc, spc, spc, spc, spc, spc, spc, spc, spc, '!', '$', '*', ')', ';', '¬', // 80 - 95
'-', '/', spc, spc, spc, spc, spc, spc, spc, spc, '¦', ',', '%', '_', '>', '?', // 96 - 111
spc, spc, spc, spc, spc, spc, spc, spc, spc, '`', ':', '#', '@', qts, '=', qtd, // 112 - 127
spc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', spc, spc, spc, spc, spc, '±', // 128 - 143
spc, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', spc, spc, spc, spc, spc, spc, // 144 - 159
spc, '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', spc, spc, spc, spc, spc, spc, // 160 - 175
'^', spc, spc, spc, spc, spc, spc, spc, spc, spc, '[', ']', spc, spc, spc, spc, // 176 - 191
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', '-', spc, spc, spc, spc, spc, // 192 - 207
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', spc, spc, spc, spc, spc, spc, // 208 - 223
bsh, spc, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', spc, spc, spc, spc, spc, spc, // 224 - 239
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', spc, spc, spc, spc, spc, spc) // 240 - 255
}
def wordAlign(f: BitVector, wordSize: Int, align: position.Position): BitVector = {
require(f.size <= wordSize)
align match {
case position.Left if f.size != wordSize => f.padLeft(wordSize - f.size)
case position.Right if f.size != wordSize => f.padRight(wordSize - f.size)
case _ => f
}
}
/** Get the bit count of a cobol data type
*
* @param codec EBCDIC / ASCII
* @param comp A type of compact stirage
* @param precision The precision (the number of digits) of the type
* @return
*/
def getBitCount(codec: Codec[_ <: AnyVal], comp: Option[Int], precision: Int): Int = {
comp match {
case Some(value) =>
value match {
case compact if compact == 3 =>
(precision + 1) * codec.sizeBound.lowerBound.toInt //bcd
case _ => codec.sizeBound.lowerBound.toInt // bin/float/floatL
}
case None => precision * codec.sizeBound.lowerBound.toInt
}
}
/** Decode the bits that are located in a binary file to actual human readable information
*
* @param codec scodec codec
* @param enc encoding type
* @param scale size of data stucture
* @param bits bits that need to be decoded
* @param comp compaction of the bits
* @param align bits alignment
* @param signPosition sign position of a signed data type
* @return
*/
def decode(codec: Codec[_ <: AnyVal], enc: Encoding, scale: Int, bits: BitVector, comp: Option[Int], align: Option[position.Position] = None,
signPosition: Option[position.Position]): Array[Byte] = {
val digitBitSize = codec.sizeBound.lowerBound.toInt
val bytes = enc match {
case _: ASCII => comp match {
case Some(compact) => compact match {
case a if a == 3 => { //bcd
val bte = for (x <- 0 until scale) yield {
val bts = wordAlign(bits.slice(x * digitBitSize, (x * digitBitSize) + digitBitSize), digitBitSize, align.getOrElse(position.Left))
Codec.decode(bts)(codec).require.value.asInstanceOf[Double].toByte
}
bte.toArray
}
case _ => { //bin
// val bts = wordAlign(bits, digitBitSize, align.getOrElse(Left))
val bte = Codec.decode(bits)(codec).require.value.asInstanceOf[Double].toByte
(bte :: Nil).toArray
}
}
case None => { // display i.e. no comp
val bte = for (x <- 0 until scale) yield {
val bts = wordAlign(bits.slice(x * digitBitSize, (x * digitBitSize) + digitBitSize), digitBitSize, align.getOrElse(position.Left))
Codec.decode(bts)(codec).require.value.asInstanceOf[Double].toByte
}
bte.toArray
}
}
case _: EBCDIC => comp match {
case Some(compact) => compact match {
case a if a == 3 => { //bcd
logger.debug("BCD, bits : " + bits)
val bte = for (x <- 0 to scale) yield {
val bts = wordAlign(bits.slice(x * digitBitSize, (x * digitBitSize) + digitBitSize), digitBitSize, align.getOrElse(position.Left))
logger.debug("bts : " + bts.toBin)
logger.debug("codec : " + codec)
logger.debug("value : " + Codec.decode(bts)(codec).require.value.asInstanceOf[Number].doubleValue())
Codec.decode(bts)(codec).require.value.asInstanceOf[Number].doubleValue().toByte
}
bte.toArray
}
case _ => { //bin
// val bts = wordAlign(bits, digitBitSize, align.getOrElse(Left))
logger.debug("bts : " + bits.toBin)
logger.debug("codec : " + codec)
val buf = ByteBuffer.allocate(8)
logger.debug("codec : " + codec.toString)
val decValue = Codec.decode(bits)(codec).require.value.asInstanceOf[Number].doubleValue()
logger.debug("decValue : " + decValue)
val byteArr = buf.putDouble(decValue).array()
logger.debug("byteArr : " + byteArr)
byteArr
}
}
case None => { // display i.e. no comp
val bte = for (x <- 0 until scale) yield {
val bts = wordAlign(bits.slice(x * digitBitSize, (x * digitBitSize) + digitBitSize), digitBitSize, align.getOrElse(position.Left))
logger.debug("bts : " + bts.toBin)
Codec.decode(bts)(codec).require.value.asInstanceOf[Number].doubleValue().toByte
}
bte.toArray
}
}
}
bytes
}
/** decode an array of bytes to actual characters that represent their binary counterparts.
*
* @param byteArr byte array that represents the binary data
* @param enc encoding type
* @param comp binary compaction type
* @return a string representation of the binary data
*/
def charDecode(byteArr: Array[Byte], enc: Option[Encoding], comp: Option[Int]): String = {
val ans = enc match {
case Some(ASCII()) => byteArr.map(byte => {
byte.toInt
})
case Some(EBCDIC()) =>
val finalStringVal = comp match {
case Some(compact) => {
val compValue = compact match {
case a if a == 3 => { //bcd
val digitString = for {
idx <- byteArr.indices
} yield {
if (idx == byteArr.length - 1) { //last byte is sign
byteArr(idx) match {
case 0x0C => " " // was +
case 0x0D => "-"
case 0x0F => " " // was +, unsigned
case _ =>
// Todo Remove this
println(s"Unknown singature nybble encountered! ${byteArr(idx).toString}")
byteArr(idx).toString // No sign
}
}
else {
byteArr(idx).toString
}
}
logger.debug("digitString : " + digitString)
s"${digitString.last}${digitString.head}${digitString.tail.dropRight(1).mkString("")}"
}
case _ => { //bin
val buf = ByteBuffer.wrap(byteArr)
// Todo Why Double??? Revis the logic
buf.getDouble.toString //returns number value as a string "1500"
}
}
compValue
}
case None => {
val digitString = for {
idx <- byteArr.indices
} yield {
val unsignedByte = (256 + byteArr(idx).toInt) % 256
ebcdic2ascii(unsignedByte)
}
digitString.mkString("")
}
}
finalStringVal
case _ => throw new Exception("No character set was defined for decoding")
}
ans.toString
}
/** A decoder for any string fields (alphabetical or any char)
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the binary data
*/
def decodeString(enc: Encoding, bytes: Array[Byte], length: Int): String = {
val str = enc match {
case _: EBCDIC => bytes.take(length).map(byte => ebcdic2ascii((byte + 256) % 256)).mkString
case _ => bytes.take(length).map(_.toChar).mkString
}
str.trim
}
/** A decoder for various numeric formats
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the binary data
*/
def decodeCobolNumber(enc: Encoding, bytes: Array[Byte], compact: Option[Int], precision: Int, scale: Int, explicitDecimal: Boolean, signed: Boolean): Option[String] = {
compact match {
case None =>
// DISPLAY format
decodeUncompressedNumber(enc, bytes, explicitDecimal, scale)
case Some(1) =>
// COMP-1 aka 32-bit floating point number
Some(decodeFloatingPointNumber(bytes, bigEndian = true))
case Some(2) =>
// COMP-2 aka 64-bit floating point number
Some(decodeFloatingPointNumber(bytes, bigEndian = true))
case Some(3) =>
// COMP-3 aka BCD-encoded number
decodeSignedBCD(bytes, scale)
case Some(4) =>
// COMP aka BINARY encoded number
Some(decodeBinaryNumber(bytes, bigEndian = true, signed = signed, scale))
case _ =>
throw new IllegalStateException(s"Unknown compression format ($compact).")
}
}
/** A decoder for uncompressed (aka DISPLAY) binary numbers
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the binary data
*/
def decodeUncompressedNumber(enc: Encoding, bytes: Array[Byte], explicitDecimal: Boolean, scale: Int): Option[String] = {
val chars: ListBuffer[Char] = new ListBuffer[Char]()
val decimalPointPosition = bytes.length - scale
var i = 0
while (i < bytes.length) {
if (i == decimalPointPosition && !explicitDecimal) {
chars += '.'
}
enc match {
case _: EBCDIC => chars += ebcdic2ascii((bytes(i) + 256) % 256)
case _ => chars += bytes(i).toChar
}
i += 1
}
validateAndFormatNumber(chars.mkString)
}
/** Decode a binary encoded decimal (BCD) aka COMP-3 format to a String
*
* @param bytes A byte array that represents the binary data
* @param scale A decimal scale if a number is a decimal. Should be greater or equal to zero
* @return Some(str) - a string representation of the binary data, None if the data is not properly formatted
*/
def decodeSignedBCD(bytes: Array[Byte], scale: Int = 0): Option[String] = {
if (scale < 0) {
throw new IllegalArgumentException(s"Invalid scele=$scale, should be greater or equal to zero.")
}
if (bytes.length < 1) {
return Some("0")
}
val bits = BitVector(bytes)
var i: Int = 0
var sign = ""
val chars: ListBuffer[Char] = new ListBuffer[Char]()
val decimalPointPosition = bits.length - (scale + 1) * 4
while (i < bits.length) {
val nybble = bits.slice(i, i + 4).toByte(false)
if (i >= bits.length - 4) {
// The last nybble is a sign
sign = nybble match {
case 0x0C => "" // +, signed
case 0x0D => "-"
case 0x0F => "" // +, unsigned
case _ =>
// invalid nybble encountered - the format is wrong
return None
}
} else {
if (nybble >= 0 && nybble < 10) {
if (i == decimalPointPosition) {
chars += '.'
}
chars += ('0'.toByte + nybble).toChar
}
else {
// invalid nybble encountered - the format is wrong
return None
}
}
i += 4
}
validateAndFormatNumber(sign + chars.mkString(""))
}
/** Transforms a string representation of an integer to a string representation of decimal
* by adding a decimal point into the proper place
*
* @param intValue A number as an integer
* @param scale A scale - the number of digits to the right of decimal point separator
* @return A string representation of decimal
*/
private[cobol] def addDecimalPoint(intValue: String, scale: Int): String = {
if (scale < 0) {
throw new IllegalArgumentException(s"Invalid scele=$scale, should be greater or equal to zero.")
}
if (scale == 0) {
intValue
} else {
val isNegative = intValue.length > 0 && intValue(0) == '-'
if (isNegative) {
if (intValue.length - 1 > scale) {
val (part1, part2) = intValue.splitAt(intValue.length - scale)
part1 + '.' + part2
} else {
"-" + "0." + "0" * (scale - intValue.length + 1) + intValue.splitAt(1)._2
}
} else {
if (intValue.length > scale) {
val (part1, part2) = intValue.splitAt(intValue.length - scale)
part1 + '.' + part2
} else {
"0." + "0" * (scale - intValue.length) + intValue.splitAt(0)._2
}
}
}
}
/** A generic decoder for 2s compliment binary numbers aka COMP
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the binary data
*/
def decodeBinaryNumber(bytes: Array[Byte], bigEndian: Boolean, signed: Boolean, scale: Int = 0): String = {
if (bytes.length == 0) {
return "0"
}
val bits = BitVector(bytes)
val value = (signed, bigEndian, bytes.length) match {
case (true, true, 1) => int8B.decode(bits).require.value
case (true, true, 2) => int16B.decode(bits).require.value
case (true, true, 4) => int32B.decode(bits).require.value
case (true, true, 8) => int64B.decode(bits).require.value
case (true, false, 1) => int8L.decode(bits).require.value
case (true, false, 2) => int16L.decode(bits).require.value
case (true, false, 4) => int32L.decode(bits).require.value
case (true, false, 8) => int64L.decode(bits).require.value
case (false, true, 1) => uint8B.decode(bits).require.value
case (false, true, 2) => uint16B.decode(bits).require.value
case (false, true, 4) => uint32B.decode(bits).require.value
case (false, false, 1) => uint8L.decode(bits).require.value
case (false, false, 2) => uint16L.decode(bits).require.value
case (false, false, 4) => uint32L.decode(bits).require.value
case _ =>
// Generic arbitrary precision decoder
val bigInt = (bigEndian, signed) match {
case (false, false) => BigInt(1, bytes.reverse)
case (false, true) => BigInt(bytes.reverse)
case (true, false) => BigInt(1, bytes)
case (true, true) => BigInt(bytes)
}
bigInt
}
addDecimalPoint(value.toString, scale)
}
/** A decoder for floating point numbers
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the binary data
*/
def decodeFloatingPointNumber(bytes: Array[Byte], bigEndian: Boolean): String = {
val bits = BitVector(bytes)
val value = (bigEndian, bytes.length) match {
case (true, 4) => floatB.decode(bits).require.value
case (true, 8) => doubleB.decode(bits).require.value
case (false, 4) => floatL.decode(bits).require.value
case (false, 8) => doubleL.decode(bits).require.value
case _ => throw new IllegalArgumentException(s"Illegal number of bytes to decode (${bytes.length}). Expected either 4 or 8 for floating point" +
s" type.")
}
value.toString
}
/** Formats and validates a string as a number. Returns None if the string doesn't pass the validation **/
private def validateAndFormatNumber(str: String): Option[String] = {
val value = try {
Some(BigDecimal(str).toString)
} catch {
case NonFatal(_) => None
}
value
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy