
org.apache.daffodil.io.Dump.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.daffodil.io
import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.CodingErrorAction
import org.apache.daffodil.exceptions.Assert
import com.ibm.icu.lang.UCharacter
import com.ibm.icu.lang.UCharacterEnums
import com.ibm.icu.lang.UProperty
import org.apache.daffodil.util.Misc
import org.apache.daffodil.equality._
import java.nio.charset.{ CharsetDecoder => JavaCharsetDecoder }
import java.nio.charset.{ Charset => JavaCharset }
import java.nio.charset.CoderResult
/**
* Hex/Bits and text dump formats for debug/trace purposes.
*
* By definition this is a dump, so doesn't know much about where the
* fields in the data are. (To do that you'd need a format description
* language, like DFDL, but this is here to help debug DFDL descriptions,
* so it really cannot exploit any information about the data format)
*/
class DataDumper {
import scala.language.reflectiveCalls
type ByteSource = {
def get(byteAddress0b: Int): Byte // arg must be Int, not Long, so ByteBuffers satisfy this constract.
}
val defaultMaxLineLength = 70
/**
* What kind of dump do you want?
* Text only - no hex will be displayed.
* Binary only - do you want hex? or binary bits?
* Mixed - show both hex/bits and text side by side
*/
sealed trait Kind
protected sealed class TextKind(val optCharset: Option[String]) extends Kind
protected sealed trait BinaryKind extends Kind
protected sealed trait HexKind extends BinaryKind // hexadecimal
// sealed trait BitsKind extends BinaryKind // individual ones and zeros
protected sealed trait Direction
protected sealed trait RTL extends Direction // used with least-signif-bit first data like mil-std-2045
protected sealed trait LTR extends Direction
// case object HexLTR extends HexKind with LTR
// case object BitsLTR extends BitsKind with LTR
// case object HexRTL extends HexKind with RTL
case class TextOnly(override val optCharset: Option[String] = None) extends TextKind(optCharset)
case class MixedHexLTR(override val optCharset: Option[String] = None) extends TextKind(optCharset) with HexKind with LTR
case class MixedHexRTL(override val optCharset: Option[String] = None) extends TextKind(optCharset) with HexKind with RTL
// case class MixedBits(optCharset: Option[String] = None) extends TextKind(optCharset) with BitsKind
def convertBitsToBytesUnits(startBitAddress0b: Long, lengthInBits: Long): (Long, Int, Long) = {
Assert.usage(startBitAddress0b >= 0)
Assert.usage(lengthInBits >= 0)
val startByteAddress0b = startBitAddress0b >> 3
val lengthInBytes = {
val endBit0b = startBitAddress0b + lengthInBits
val extraStartByte =
if (startBitAddress0b % 8 == 0) 0 else 1
val extraEndByte =
if (endBit0b % 8 == 0) 0 else 1
val res = (lengthInBits >> 3) + extraStartByte + extraEndByte
res
}
val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0)
(startByteAddress0b, lengthInBytes.toInt, endByteAddress0b)
}
/**
* A dump is a sequence of dump lines.
*
* An optional first line is a header that numbers the bits/bytes
* An optional indicator line uses draw-characters to point at the
* significant part of the data - e.g., where in the dump the current
* element is.
* The data lines follow. They contain up to 3 sections (each of which is
* optional) an address, a hex/binary dump, a text dump.
*
* If the kind includes charset that charset is used to display text. Decode
* errors will display as the Unicode replacement character. If no charset
* then text will be displayed as iso-8859-1, augmented by using glyph characters
* for the control C0 and C1 and any other non-glyph codepoint.
*
* indicator info is a start position and length for the "region of interest". The units
* are in bits.
*
* The shamStartBitAddress0b is the location where the data in the byteSource starts.
* E.g., the byte at byteSource.get(0) is from the data stream at the shamStartBitAddress0b.
*
* The byte source is a window into the data stream.
*/
def dump(kind: Kind, shamStartBitAddress0b: Long, lengthInBits: Int, byteSource: ByteSource, maxLineLength: Int = defaultMaxLineLength,
includeHeadingLine: Boolean = true,
indicatorInfo: Option[(Long, Int)] = None): Seq[String] = {
val (shamStartByteAddress0b, lengthInBytes, _) = convertBitsToBytesUnits(shamStartBitAddress0b, lengthInBits)
val indicatorInfoInBytes = indicatorInfo.map {
case (indStartBits0b, indLenBits) =>
val (indStartByteAddress0b, indLengthInBytes, _) = convertBitsToBytesUnits(indStartBits0b, indLenBits)
(indStartByteAddress0b, indLengthInBytes)
}
val optEncName = kind match {
case t: TextKind => t.optCharset
case _ => None
}
kind match {
case TextOnly(enc) => {
dumpTextLine(maxLineLength, shamStartByteAddress0b, lengthInBytes, byteSource, enc, indicatorInfoInBytes)
}
case MixedHexLTR(optionCS) =>
dumpHexAndTextBytes(shamStartByteAddress0b, lengthInBytes, byteSource, includeHeadingLine, optEncName, indicatorInfoInBytes)
case MixedHexRTL(None) =>
dumpHexAndTextBytesLSBFirst(shamStartByteAddress0b, lengthInBytes, byteSource, includeHeadingLine, optEncName)
case _ => Assert.usageError("unsupported dump kind")
}
}
//
// These vars are used by the txt dump when the multiple bytes of a
// character wrap from one line to the next.
//
var paddingFromPriorLine = ""
var nPadBytesFromPriorLine = 0
private def textDump(addr: Long, rowStart0b: Int, txtsb: StringBuilder,
limit0b: Int, endByteAddress0b: Long, byteSource: ByteSource, decoder: Option[JavaCharsetDecoder],
textByteWidth: Int) {
var i = rowStart0b + nPadBytesFromPriorLine
txtsb ++= paddingFromPriorLine
while (i <= limit0b) {
val bytePos0b = addr + i
val (charRep, nBytesConsumed, width) = convertToCharRepr(bytePos0b, endByteAddress0b, byteSource, decoder)
Assert.invariant(nBytesConsumed > 0)
// some characters will print double width. It is assumed all such
// characters occupy at least one byte.
Assert.invariant(nBytesConsumed >= width)
//
// Will padding wrap to next line?
//
val padByteRep = "~" * (textByteWidth - 1)
val nBytesPastEnd =
if (nBytesConsumed == 1) 0
else {
(limit0b - i + 1, nBytesConsumed) match {
case (1, 2) => 1
case (1, 3) => 2
case (1, 4) => 3
case (2, 2) => 0
case (2, 3) => 1
case (2, 4) => 2
case (3, 2) => 0
case (3, 3) => 0
case (3, 4) => 1
case (4, _) => 0
case _ => 0
}
}
paddingFromPriorLine = padByteRep * 2 * nBytesPastEnd
nPadBytesFromPriorLine = nBytesPastEnd
//
// Adjust padding downward if the character is double wide.
//
val padding = padByteRep * ((nBytesConsumed, width) match {
case (1, 1) => 1
case (1, 2) => 0
case (2, x) => 4 - x
case (3, x) => 6 - x
case (4, x) => 8 - x
case (n, x) => Assert.impossible()
})
val trimmedPadding = padding.take(padding.length - paddingFromPriorLine.length)
txtsb ++= charRep + trimmedPadding
i += nBytesConsumed
}
}
/**
* Creates a dump that looks like Emacs Hexl mode.
*
*
* Note that the character glphs on the right depend on the
* font being used. These all are printing characters but whether they
* line up perfectly under the heading columns depends on the font
* being used. In particular, it makes use of the C0 control picture
* unicode characters to give glyphs to those otherwise non-printing
* characters, but these are not all monospaced widths.
*
* For examples see the TestDump class.
*/
private[io] def dumpHexAndTextBytes(startByteAddress0b: Long, lengthInBytes: Int,
byteSource: ByteSource,
includeHeadingLine: Boolean,
optEncodingName: Option[String],
indicatorInfoInBytes: Option[(Long, Int)]): Seq[String] = {
Assert.usage(startByteAddress0b >= 0)
Assert.usage(lengthInBytes >= 0)
val (textDataHeader, textByteWidth, optEncName) = getTextParameters(optEncodingName)
val decoder = getReportingDecoder(optEncName)
val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0)
val addressHeader = """87654321 """
val hexHeader = """0011 2233 4455 6677 8899 aabb ccdd eeff""" // space on the end is needed
val headingHex = addressHeader + hexHeader
val firstGutter = ": "
val offset0b = (startByteAddress0b & 0xF).toInt
val hexRegionInitialWhitespace = {
val offset2 = offset0b / 2
val res = " " * offset2 +
(" " * (offset0b & 0x1)) // blank first half of pair
res
}
val textRegionInitialWhitespace = (" " * textByteWidth) * offset0b
val indicatorLine =
makeHexAndTextIndicatorLine(indicatorInfoInBytes, startByteAddress0b, lengthInBytes,
hexHeader.length, addressHeader.length, textByteWidth)
var isFirstRow = true
var isLastRow = false
val firstLeftAddress = startByteAddress0b & 0x7FFFFFFFFFFFFF0L
val lastLeftAddress = math.max(0, (startByteAddress0b + lengthInBytes - 1)) & 0x7FFFFFFFFFFFFFF0L
val headingLine = headingHex + " " + textDataHeader
val ab = scala.collection.mutable.ArrayBuffer[String]()
indicatorLine.foreach { line => ab += line }
if (includeHeadingLine) ab += headingLine
val hexsb = new StringBuilder
val txtsb = new StringBuilder
var rowStart0b = offset0b
var limit0b = 15 // except for last row it will be shortened. Inclusive limit.
//
// These vars are used by the txt dump when the multiple bytes of a
// character wrap from one line to the next.
//
paddingFromPriorLine = ""
nPadBytesFromPriorLine = 0
firstLeftAddress to lastLeftAddress by 16 foreach {
//
// for each line/row, we assemble the address part, the hex part, and the text part
//
addr =>
if (addr == lastLeftAddress) {
isLastRow = true
limit0b = (endByteAddress0b & 0xf).toInt // might be fewer than all 16 for last row
}
val addrString = "%08x".format(addr)
hexsb ++= addrString + firstGutter
if (isFirstRow) {
isFirstRow = false
hexsb ++= hexRegionInitialWhitespace
txtsb ++= textRegionInitialWhitespace
}
//
// Hex dump
//
rowStart0b to limit0b foreach { i =>
val bytePos0b = addr + i - startByteAddress0b
val byteValue = try {
byteSource.get(bytePos0b.toInt)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
val hex = "%02x".format(byteValue)
val gutter = if ((i & 0x1) == 0) "" else " "
hexsb ++= hex + gutter
}
//
// Text dump
//
textDump(addr - startByteAddress0b, rowStart0b, txtsb,
limit0b, endByteAddress0b, byteSource, decoder,
textByteWidth)
if (isLastRow) {
//
// Trailing spaces on the hex dump
//
(limit0b + 1) to 15 foreach { i =>
val gutter = if ((i & 0x1) == 1) " " else ""
hexsb ++= " " + gutter
}
//
// Trailing spaces on the text dump
//
(limit0b + 1) to 15 foreach { i =>
txtsb ++= (" " * textByteWidth)
}
}
ab += hexsb.mkString + " " + txtsb.mkString
hexsb.clear()
txtsb.clear()
//
// we're done with first row, so subsequent rows will have
// zero as the row start.
//
rowStart0b = 0
}
ab
}
// indicators over these dumps are of maximum length 16 bytes.
//
// like this:
// ├─────────────────────────────────────═ ├──────────────═
// 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
// or
// ├─────────────────────────────────────┤ ├──────────────┤
// 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
//
// but they can also be shorter than 16 bytes if the region starts further
// in from the left.
//
private def makeHexAndTextIndicatorLine(indicatorInfoInBytes: Option[(Long, Int)], startByteAddress0b: Long, lengthInBytes: Int,
hexHeaderLength: Int, addressHeaderLength: Int, textByteWidth: Int) = {
indicatorInfoInBytes.map {
case (goalIndByteAddress0b: Long, indLengthInBytes: Int) =>
val indByteAddress0b = math.max(goalIndByteAddress0b, startByteAddress0b)
val delta = indByteAddress0b - startByteAddress0b
val realLengthInBytes = math.min(indLengthInBytes, lengthInBytes)
//
// if the delta is more than this, the indicator will be ambiguous because what it
// points at isn't directly below, but possibly a further row down.
//
Assert.usage(delta < 16)
Assert.usage(indLengthInBytes >= 0) // if too big we'll clamp it.
val indicatorOffset0b = indByteAddress0b.toInt % 16
val indOffset2 = indicatorOffset0b / 2
val initialHexSpaces = " " * indOffset2 +
(" " * (indicatorOffset0b & 0x1)) // blank first half of pair
val pictureLengthInBytes = math.min(16 - indicatorOffset0b.toInt, realLengthInBytes)
val hexIndicator = {
val picture =
(pictureLengthInBytes, indByteAddress0b % 2) match {
case (0, _) => "│"
case (1, _) if realLengthInBytes =#= 1 => "├┤"
case (1, _) => "├═"
case (2, 0) if realLengthInBytes =#= 2 => "├──┤"
case (2, 0) => "├──═"
case (2, 1) => "├───┤" // middle dash spans the gutter
case (n, s) => {
Assert.invariant(n >= 3)
val startCap = "├─"
val endCap =
if (realLengthInBytes > n) "─═"
else "─┤"
val startBytePic = if (s =#= 0) startCap + "──" else startCap
val endBytePic = if (((indicatorOffset0b.toInt + n) % 2) =#= 0) "──" + endCap else endCap
val startRoundUp2 = (indicatorOffset0b.toInt + 2) - (indicatorOffset0b.toInt + 2) % 2
val endRoundDown2 = (indicatorOffset0b.toInt + n - 1) - (indicatorOffset0b.toInt + n - 1) % 2
val middleBytes = (endRoundDown2 - startRoundUp2) / 2
val middleBytePics = Seq.fill(middleBytes.toInt)("────")
val bytePix = startBytePic +: middleBytePics :+ endBytePic
val pic = bytePix.mkString("─") // for the single space gutters between
pic
}
}
val pictureOnly = initialHexSpaces + picture
val endPadLength = hexHeaderLength - pictureOnly.length
val endPad = " " * endPadLength
val finalPicture = pictureOnly + endPad
finalPicture
}
val textIndicator = {
val initialTextSpaces = " " * textByteWidth * indicatorOffset0b
val picture =
(pictureLengthInBytes, textByteWidth) match {
case (0, _) => "│"
case (1, 1) => "║"
case (1, 2) => "├┤"
case (n, w) => {
val pad = if (w =#= 1) "" else "─"
val startCap = "├" + pad
val endCap =
if (realLengthInBytes > n) pad + "═"
else pad + "┤"
val middleBytePics = 1 to (pictureLengthInBytes - 2) map { _ => "─" + pad }
val allPix = startCap +: middleBytePics :+ endCap
val pic = allPix.mkString
pic
}
}
val finalPicture = initialTextSpaces + picture
finalPicture
}
val initialSpaces = " " * addressHeaderLength
val line = initialSpaces + hexIndicator + " " + textIndicator
line
}
}
/**
* Some characters act as combining marks and modify characters surrounding them.
* In order for us to display these characters as they are, we need to combine them with
* the appropriate number of spaces so they don't disturb other characters around them
*/
private def homogenizeChars(codepoint: Int): (String, Int) = {
val charType = UCharacter.getType(codepoint)
val nCols = charNColumns(codepoint)
// supposed to always modify preceding character,
// but sometimes appears to modify succeeding character
// e.g \u093f or \u064d. Dual spacing is here to protect from this
charType match {
case UCharacterEnums.ECharacterCategory.COMBINING_SPACING_MARK => {
// can occupy spacing position by themselves
(" " + Character.toChars(codepoint).mkString + " ", nCols + 2)
}
case UCharacterEnums.ECharacterCategory.NON_SPACING_MARK => {
// do not occupy spacing position by themselves
// (so 1 space should be consumed, leaving an extra 1 for padding)
(" " + Character.toChars(codepoint).mkString + " ", nCols + 1)
}
case _ => {
("" + Character.toChars(codepoint).mkString, nCols)
}
}
}
/**
* The width of the character in terms of how many "places" it uses up
* relative to a regular monospaced font character. This is for trying to get
* east asian and other double-wide characters to line up properly in columns.
*/
private def charNColumns(codepoint: Int): Int = {
val charWidth = UCharacter.getIntPropertyValue(codepoint, UProperty.EAST_ASIAN_WIDTH)
charWidth match {
//
// see http://unicode.org/reports/tr11/tr11-8.html
//
case UCharacter.EastAsianWidth.AMBIGUOUS => 1
case UCharacter.EastAsianWidth.FULLWIDTH => 2
case UCharacter.EastAsianWidth.HALFWIDTH => 1
case UCharacter.EastAsianWidth.NARROW => 1
case UCharacter.EastAsianWidth.WIDE => 2
case UCharacter.EastAsianWidth.NEUTRAL => 1
}
}
private def getReportingDecoder(optEncodingName: Option[String]): Option[JavaCharsetDecoder] = {
val cs = optEncodingName.map { JavaCharset.forName(_) }
lazy val decoder = cs.map { _.newDecoder() }
decoder
}
/**
* Decoder must be setup for REPORT (default) on decode error.
* We will manually handle the replacing
*/
private def convertToCharRepr(
startingBytePos0b: Long,
endingBytePos0b: Long,
bs: ByteSource,
decoder: Option[JavaCharsetDecoder]): (String, Int, Int) = {
Assert.invariant(decoder.map { d => Misc.isAsciiBased(d.charset()) }.getOrElse(true))
decoder match {
case Some(dec) => {
val bb = ByteBuffer.allocate(6)
var cb = CharBuffer.allocate(1)
var cr = CoderResult.OVERFLOW
var nConsumedBytes = 0
var remapped = ""
var nCols = 0
val INVALID_CODEPOINT = -1
val lastAvailableBytePos0b = scala.math.min(endingBytePos0b, startingBytePos0b + 5) // widest possible char representation is 6 bytes.
val nBytes = (lastAvailableBytePos0b - startingBytePos0b).toInt + 1
Assert.invariant(nBytes > 0) // have to have at least 1 byte left
0 until nBytes foreach { i =>
val thePos = (startingBytePos0b + i).toInt
Assert.invariant(thePos >= 0)
val theByte = try {
bs.get(thePos)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
bb.put(theByte)
}
bb.flip()
Assert.invariant(bb.remaining > 0)
do {
// An overflow means we were able to start to decode at least 1 sequence of characters, but there was either insufficient
// space in the output buffer to store said decoded char or there were left over bytes after parsing. If it is
// the former, we can proceed and we'll get the left over bytes on the next run, if it was the latter
// (as can be the case with decoding a 4 byte character sequence), we will call decode with a larger buffer
// until we consume something or the output buffer is at same capacity as input buffer
cr = dec.decode(bb, cb, true)
nConsumedBytes = bb.position()
if (cr.isOverflow && nConsumedBytes == 0) {
cb = CharBuffer.allocate(cb.capacity + 1)
}
} while (cr.isOverflow && nConsumedBytes == 0 && cb.capacity <= bb.capacity)
// Once we leave the loop, we will either have consumed bytes to process (with a variety of left over bytes that we
// don't care about) or malformed/unmappable results with no consumed bytes that we do care about so we will do a
// manual replace and set consumed bytes ourselves. We should not do an automatic replace as it creates ambiguity
// with the malformed/unmapped/consumed bytes with our current implementation of handling a decoded character at a time.
// We should never have an underflow condition with no bytes consumed. As that would indicate it needs more input than
// we've provided. Even if we only provide 1 byte of a 4 byte sequence, it will return a malformed[1]
Assert.invariant(!(cr.isUnderflow && nConsumedBytes == 0))
if ((cr.isMalformed || cr.isUnmappable) && nConsumedBytes == 0) {
//do manual replacement
remapped = dec.replacement()
// grab malformed/unmappable byte so we can keep decoding
nConsumedBytes = cr.length
nCols = charNColumns(remapped(0))
} else {
// An overflow, at this point, means that we got our one character, but there were more bytes available that could
// be decoded. We're not interested in those right now.
//
// An underflow means that we got our one character, but the bytes were exactly used up
// by constructing that one character.
//
// Either way, we got our one character
Assert.invariant(nConsumedBytes > 0)
Assert.invariant(cb.hasArray)
val allChars = cb.array
val uCodePoint =
if (allChars.length > 1) {
if (UCharacter.isSurrogatePair(allChars(0), allChars(1))) {
UCharacter.getCodePoint(allChars(0), allChars(1))
} else {
INVALID_CODEPOINT
}
} else allChars(0)
val (r: String, n: Int) =
if (allChars.length > 1) {
if (uCodePoint == INVALID_CODEPOINT) {
allChars.map(c => homogenizeChars(c)).foldLeft(("", 0)) {
(accForRemappedAndNcols, tupResultRemappedAndNcols) =>
(accForRemappedAndNcols._1 + tupResultRemappedAndNcols._1, //concat remapped value for each char
accForRemappedAndNcols._2 + tupResultRemappedAndNcols._2) // add width value for each char
}
} else {
homogenizeChars(uCodePoint)
}
} else {
homogenizeChars(Misc.remapCodepointToVisibleGlyph(allChars(0)))
}
remapped = r
nCols = n
}
(remapped, nConsumedBytes, nCols)
}
case None => {
// no encoding, so use the general one based on windows-1252 where
// every byte corresponds to a character with a glyph.
val byteValue = try {
bs.get(startingBytePos0b.toInt)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
// decoding using a decoder might produce C0 or C1 control characters or
// other whitespace characters. But we want visible glyphs no matter what for those.
//
// FIXME: This will be really broken for EBCDIC-based encodings. Pass the encoding
// so that the glyph routine can be ascii/ebcdic sensitive.
val remapped = Misc.remapByteToVisibleGlyph(byteValue)
(remapped.toChar.toString, 1, 1)
}
}
}
/**
* If displaying ONLY text, then we just display one long line
* and replace any whitespace or non-glyph characters with glyph characters.
*/
def dumpTextLine(maxLineLen: Int, startByteAddress0b: Long, lengthInBytesRequested: Int, byteSource: ByteSource,
optEncodingName: Option[String] = None,
indicatorInfoInBytes: Option[(Long, Int)] = None): Seq[String] = {
Assert.usage(startByteAddress0b >= 0)
Assert.usage(lengthInBytesRequested >= 0)
val lengthInBytes = math.min(lengthInBytesRequested, maxLineLen)
val indicatorLine = indicatorInfoInBytes.map {
case (indicatorStartByteAddress0b, indicatorLengthInBytes) => {
Assert.usage(indicatorStartByteAddress0b >= 0)
Assert.usage(indicatorLengthInBytes >= 0)
val numLeadingSpaces = (indicatorStartByteAddress0b - startByteAddress0b).toInt
Assert.invariant(numLeadingSpaces >= 0)
val leadingSpaces = " " * numLeadingSpaces
val maxIndicatorLength = math.min(maxLineLen - numLeadingSpaces, lengthInBytes)
val realIndicatorLength = math.min(indicatorLengthInBytes, maxIndicatorLength)
val maxLineLength = math.min(maxLineLen, lengthInBytes)
val indicatorEndLength = realIndicatorLength + numLeadingSpaces
val indicator = realIndicatorLength match {
case 0 => "│"
case 1 => "║"
case n => {
Assert.invariant(n >= 2)
val nDashes = (n - 2).toInt
val closeOrOpenEnd =
if (lengthInBytesRequested <= maxLineLength) {
// the number of characters displayed will be shorter than
// the max width
if (indicatorEndLength <= lengthInBytesRequested) "┤" // indicator ends at or before the data
else "═" // indicator indicates past the end. This shouldn't really happen.
} else {
// the number of characters displayed will meet the maximum
if (indicatorEndLength < maxLineLength) "┤"
else "═"
}
val picture = "├" + ("─" * nDashes) + closeOrOpenEnd
picture
}
}
leadingSpaces + indicator
}
}
val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0)
// val cs = optEncodingName.map { Charset.forName(_) }
val decoder = getReportingDecoder(optEncodingName)
var i = startByteAddress0b
val sb = new StringBuilder
while (i <= endByteAddress0b) {
val (cR, nBytesConsumed, _) = convertToCharRepr(i - startByteAddress0b, endByteAddress0b, byteSource, decoder)
sb ++= cR
i += nBytesConsumed
}
val s = sb.mkString
val lines: Seq[String] = indicatorLine.toSeq :+ s
lines
}
/**
* gets header line, width of a character, and encoding name to actually use
*/
private def getTextParameters(optEncodingName: Option[String]): (String, Int, Option[String]) = {
//
// this def and subsequent match-case are done this way to silence
// a scala compiler warning
//
def unicode = ("0~1~2~3~4~5~6~7~8~9~a~b~c~d~e~f~", 2, optEncodingName)
optEncodingName.map { _.toLowerCase } match {
case Some("utf-8") => unicode
case Some("utf-16be") | Some("utf-16le") => unicode
case Some("utf-32be") | Some("utf-32le") => unicode
case None | Some("ascii") | Some("us-ascii") | Some("iso-8859-1") =>
("0123456789abcdef", 1, optEncodingName)
case Some("utf-32") | Some("utf-16") => unicode
case Some(x) => {
// Don't know how to dump this text specific to this encoding
// so proceed but without encoding information
("0123456789abcdef", 1, None)
}
}
}
/**
* Create a right-to-left presentation of the kind used for LSB-first
* little-endian data
*/
private[io] def dumpHexAndTextBytesLSBFirst(startByteAddress0b: Long, lengthInBytes: Int,
byteSource: ByteSource,
includeHeadingLine: Boolean = true,
optEncodingName: Option[String] = None): Seq[String] = {
val ltrDump = dumpHexAndTextBytes(startByteAddress0b, lengthInBytes,
byteSource, includeHeadingLine, optEncodingName, None)
val ltrLines =
ltrDump.filterNot { _.length() == 0 }
val wholeLineRegex = """([0-9a-fA-F]{8})(:?\s+)([0-9a-fA-F ]+[0-9a-fA-F])(\s+)(.*)""".r
val rtlLines = ltrLines.map {
ltrLine =>
ltrLine match {
case wholeLineRegex(addr, sep1, hexlBytes, sep2, asciiText) => {
val hexlNibblesSwitch = hexlBytes
.split(" ").map { hexlGroup =>
hexlGroup
.sliding(2, 2) // grab each incorrectly reversed (nibbles are switched) byte
.map(_.reverse) // reverse the byte
.mkString // and convert back to string
}.mkString(" ") //convert back to string
asciiText.reverse + sep2 + hexlNibblesSwitch.reverse + sep1.reverse + addr
}
case x => x
}
}
val rtlDump =
rtlLines
rtlDump
}
}