All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.daffodil.io.Dump.scala Maven / Gradle / Ivy

There is a newer version: 2.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.daffodil.io

import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.CodingErrorAction
import org.apache.daffodil.exceptions.Assert
import com.ibm.icu.lang.UCharacter
import com.ibm.icu.lang.UCharacterEnums
import com.ibm.icu.lang.UProperty
import org.apache.daffodil.util.Misc
import org.apache.daffodil.equality._
import java.nio.charset.{ CharsetDecoder => JavaCharsetDecoder }
import java.nio.charset.{ Charset => JavaCharset }
import java.nio.charset.CoderResult

/**
 * Hex/Bits and text dump formats for debug/trace purposes.
 *
 * By definition this is a dump, so doesn't know much about where the
 * fields in the data are. (To do that you'd need a format description
 * language, like DFDL, but this is here to help debug DFDL descriptions,
 * so it really cannot exploit any information about the data format)
 */
class DataDumper {

  import scala.language.reflectiveCalls

  type ByteSource = {
    def get(byteAddress0b: Int): Byte // arg must be Int, not Long, so ByteBuffers satisfy this constract.
  }

  val defaultMaxLineLength = 70

  /**
   * What kind of dump do you want?
   * Text only - no hex will be displayed.
   * Binary only - do you want hex? or binary bits?
   * Mixed - show both hex/bits and text side by side
   */
  sealed trait Kind
  protected sealed class TextKind(val optCharset: Option[String]) extends Kind
  protected sealed trait BinaryKind extends Kind
  protected sealed trait HexKind extends BinaryKind // hexadecimal
  // sealed trait BitsKind extends BinaryKind // individual ones and zeros
  protected sealed trait Direction
  protected sealed trait RTL extends Direction // used with least-signif-bit first data like mil-std-2045
  protected sealed trait LTR extends Direction
  // case object HexLTR extends HexKind with LTR
  // case object BitsLTR extends BitsKind with LTR
  // case object HexRTL extends HexKind with RTL
  case class TextOnly(override val optCharset: Option[String] = None) extends TextKind(optCharset)
  case class MixedHexLTR(override val optCharset: Option[String] = None) extends TextKind(optCharset) with HexKind with LTR
  case class MixedHexRTL(override val optCharset: Option[String] = None) extends TextKind(optCharset) with HexKind with RTL
  // case class MixedBits(optCharset: Option[String] = None) extends TextKind(optCharset) with BitsKind

  def convertBitsToBytesUnits(startBitAddress0b: Long, lengthInBits: Long): (Long, Int, Long) = {
    Assert.usage(startBitAddress0b >= 0)
    Assert.usage(lengthInBits >= 0)
    val startByteAddress0b = startBitAddress0b >> 3
    val lengthInBytes = {
      val endBit0b = startBitAddress0b + lengthInBits
      val extraStartByte =
        if (startBitAddress0b % 8 == 0) 0 else 1
      val extraEndByte =
        if (endBit0b % 8 == 0) 0 else 1
      val res = (lengthInBits >> 3) + extraStartByte + extraEndByte
      res
    }
    val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0)
    (startByteAddress0b, lengthInBytes.toInt, endByteAddress0b)
  }

  /**
   * A dump is a sequence of dump lines.
   *
   * An optional first line is a header that numbers the bits/bytes
   * An optional indicator line uses draw-characters to point at the
   * significant part of the data - e.g., where in the dump the current
   * element is.
   * The data lines follow. They contain up to 3 sections (each of which is
   * optional) an address, a hex/binary dump, a text dump.
   *
   * If the kind includes charset that charset is used to display text. Decode
   * errors will display as the Unicode replacement character. If no charset
   * then text will be displayed as iso-8859-1, augmented by using glyph characters
   * for the control C0 and C1 and any other non-glyph codepoint.
   *
   * indicator info is a start position and length for the "region of interest". The units
   * are in bits.
   *
   * The shamStartBitAddress0b is the location where the data in the byteSource starts.
   * E.g., the byte at byteSource.get(0) is from the data stream at the shamStartBitAddress0b.
   *
   * The byte source is a window into the data stream.
   */
  def dump(kind: Kind, shamStartBitAddress0b: Long, lengthInBits: Int, byteSource: ByteSource, maxLineLength: Int = defaultMaxLineLength,
    includeHeadingLine: Boolean = true,
    indicatorInfo: Option[(Long, Int)] = None): Seq[String] = {
    val (shamStartByteAddress0b, lengthInBytes, _) = convertBitsToBytesUnits(shamStartBitAddress0b, lengthInBits)
    val indicatorInfoInBytes = indicatorInfo.map {
      case (indStartBits0b, indLenBits) =>
        val (indStartByteAddress0b, indLengthInBytes, _) = convertBitsToBytesUnits(indStartBits0b, indLenBits)
        (indStartByteAddress0b, indLengthInBytes)
    }
    val optEncName = kind match {
      case t: TextKind => t.optCharset
      case _ => None
    }
    kind match {
      case TextOnly(enc) => {
        dumpTextLine(maxLineLength, shamStartByteAddress0b, lengthInBytes, byteSource, enc, indicatorInfoInBytes)
      }
      case MixedHexLTR(optionCS) =>
        dumpHexAndTextBytes(shamStartByteAddress0b, lengthInBytes, byteSource, includeHeadingLine, optEncName, indicatorInfoInBytes)
      case MixedHexRTL(None) =>
        dumpHexAndTextBytesLSBFirst(shamStartByteAddress0b, lengthInBytes, byteSource, includeHeadingLine, optEncName)
      case _ => Assert.usageError("unsupported dump kind")
    }
  }

  //
  // These vars are used by the txt dump when the multiple bytes of a
  // character wrap from one line to the next.
  //
  var paddingFromPriorLine = ""
  var nPadBytesFromPriorLine = 0

  private def textDump(addr: Long, rowStart0b: Int, txtsb: StringBuilder,
    limit0b: Int, endByteAddress0b: Long, byteSource: ByteSource, decoder: Option[JavaCharsetDecoder],
    textByteWidth: Int) {
    var i = rowStart0b + nPadBytesFromPriorLine
    txtsb ++= paddingFromPriorLine
    while (i <= limit0b) {
      val bytePos0b = addr + i
      val (charRep, nBytesConsumed, width) = convertToCharRepr(bytePos0b, endByteAddress0b, byteSource, decoder)
      Assert.invariant(nBytesConsumed > 0)
      // some characters will print double width. It is assumed all such
      // characters occupy at least one byte.
      Assert.invariant(nBytesConsumed >= width)
      //
      // Will padding wrap to next line?
      //
      val padByteRep = "~" * (textByteWidth - 1)
      val nBytesPastEnd =
        if (nBytesConsumed == 1) 0
        else {
          (limit0b - i + 1, nBytesConsumed) match {
            case (1, 2) => 1
            case (1, 3) => 2
            case (1, 4) => 3
            case (2, 2) => 0
            case (2, 3) => 1
            case (2, 4) => 2
            case (3, 2) => 0
            case (3, 3) => 0
            case (3, 4) => 1
            case (4, _) => 0
            case _ => 0
          }
        }
      paddingFromPriorLine = padByteRep * 2 * nBytesPastEnd
      nPadBytesFromPriorLine = nBytesPastEnd
      //
      // Adjust padding downward if the character is double wide.
      //
      val padding = padByteRep * ((nBytesConsumed, width) match {
        case (1, 1) => 1
        case (1, 2) => 0
        case (2, x) => 4 - x
        case (3, x) => 6 - x
        case (4, x) => 8 - x
        case (n, x) => Assert.impossible()
      })
      val trimmedPadding = padding.take(padding.length - paddingFromPriorLine.length)
      txtsb ++= charRep + trimmedPadding
      i += nBytesConsumed
    }
  }
  /**
   * Creates a dump that looks like Emacs Hexl mode.
   *
   * 

* Note that the character glphs on the right depend on the * font being used. These all are printing characters but whether they * line up perfectly under the heading columns depends on the font * being used. In particular, it makes use of the C0 control picture * unicode characters to give glyphs to those otherwise non-printing * characters, but these are not all monospaced widths. *

* For examples see the TestDump class. */ private[io] def dumpHexAndTextBytes(startByteAddress0b: Long, lengthInBytes: Int, byteSource: ByteSource, includeHeadingLine: Boolean, optEncodingName: Option[String], indicatorInfoInBytes: Option[(Long, Int)]): Seq[String] = { Assert.usage(startByteAddress0b >= 0) Assert.usage(lengthInBytes >= 0) val (textDataHeader, textByteWidth, optEncName) = getTextParameters(optEncodingName) val decoder = getReportingDecoder(optEncName) val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0) val addressHeader = """87654321 """ val hexHeader = """0011 2233 4455 6677 8899 aabb ccdd eeff""" // space on the end is needed val headingHex = addressHeader + hexHeader val firstGutter = ": " val offset0b = (startByteAddress0b & 0xF).toInt val hexRegionInitialWhitespace = { val offset2 = offset0b / 2 val res = " " * offset2 + (" " * (offset0b & 0x1)) // blank first half of pair res } val textRegionInitialWhitespace = (" " * textByteWidth) * offset0b val indicatorLine = makeHexAndTextIndicatorLine(indicatorInfoInBytes, startByteAddress0b, lengthInBytes, hexHeader.length, addressHeader.length, textByteWidth) var isFirstRow = true var isLastRow = false val firstLeftAddress = startByteAddress0b & 0x7FFFFFFFFFFFFF0L val lastLeftAddress = math.max(0, (startByteAddress0b + lengthInBytes - 1)) & 0x7FFFFFFFFFFFFFF0L val headingLine = headingHex + " " + textDataHeader val ab = scala.collection.mutable.ArrayBuffer[String]() indicatorLine.foreach { line => ab += line } if (includeHeadingLine) ab += headingLine val hexsb = new StringBuilder val txtsb = new StringBuilder var rowStart0b = offset0b var limit0b = 15 // except for last row it will be shortened. Inclusive limit. // // These vars are used by the txt dump when the multiple bytes of a // character wrap from one line to the next. // paddingFromPriorLine = "" nPadBytesFromPriorLine = 0 firstLeftAddress to lastLeftAddress by 16 foreach { // // for each line/row, we assemble the address part, the hex part, and the text part // addr => if (addr == lastLeftAddress) { isLastRow = true limit0b = (endByteAddress0b & 0xf).toInt // might be fewer than all 16 for last row } val addrString = "%08x".format(addr) hexsb ++= addrString + firstGutter if (isFirstRow) { isFirstRow = false hexsb ++= hexRegionInitialWhitespace txtsb ++= textRegionInitialWhitespace } // // Hex dump // rowStart0b to limit0b foreach { i => val bytePos0b = addr + i - startByteAddress0b val byteValue = try { byteSource.get(bytePos0b.toInt) } catch { case e: IndexOutOfBoundsException => 0.toByte } val hex = "%02x".format(byteValue) val gutter = if ((i & 0x1) == 0) "" else " " hexsb ++= hex + gutter } // // Text dump // textDump(addr - startByteAddress0b, rowStart0b, txtsb, limit0b, endByteAddress0b, byteSource, decoder, textByteWidth) if (isLastRow) { // // Trailing spaces on the hex dump // (limit0b + 1) to 15 foreach { i => val gutter = if ((i & 0x1) == 1) " " else "" hexsb ++= " " + gutter } // // Trailing spaces on the text dump // (limit0b + 1) to 15 foreach { i => txtsb ++= (" " * textByteWidth) } } ab += hexsb.mkString + " " + txtsb.mkString hexsb.clear() txtsb.clear() // // we're done with first row, so subsequent rows will have // zero as the row start. // rowStart0b = 0 } ab } // indicators over these dumps are of maximum length 16 bytes. // // like this: // ├─────────────────────────────────────═ ├──────────────═ // 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef // or // ├─────────────────────────────────────┤ ├──────────────┤ // 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef // // but they can also be shorter than 16 bytes if the region starts further // in from the left. // private def makeHexAndTextIndicatorLine(indicatorInfoInBytes: Option[(Long, Int)], startByteAddress0b: Long, lengthInBytes: Int, hexHeaderLength: Int, addressHeaderLength: Int, textByteWidth: Int) = { indicatorInfoInBytes.map { case (goalIndByteAddress0b: Long, indLengthInBytes: Int) => val indByteAddress0b = math.max(goalIndByteAddress0b, startByteAddress0b) val delta = indByteAddress0b - startByteAddress0b val realLengthInBytes = math.min(indLengthInBytes, lengthInBytes) // // if the delta is more than this, the indicator will be ambiguous because what it // points at isn't directly below, but possibly a further row down. // Assert.usage(delta < 16) Assert.usage(indLengthInBytes >= 0) // if too big we'll clamp it. val indicatorOffset0b = indByteAddress0b.toInt % 16 val indOffset2 = indicatorOffset0b / 2 val initialHexSpaces = " " * indOffset2 + (" " * (indicatorOffset0b & 0x1)) // blank first half of pair val pictureLengthInBytes = math.min(16 - indicatorOffset0b.toInt, realLengthInBytes) val hexIndicator = { val picture = (pictureLengthInBytes, indByteAddress0b % 2) match { case (0, _) => "│" case (1, _) if realLengthInBytes =#= 1 => "├┤" case (1, _) => "├═" case (2, 0) if realLengthInBytes =#= 2 => "├──┤" case (2, 0) => "├──═" case (2, 1) => "├───┤" // middle dash spans the gutter case (n, s) => { Assert.invariant(n >= 3) val startCap = "├─" val endCap = if (realLengthInBytes > n) "─═" else "─┤" val startBytePic = if (s =#= 0) startCap + "──" else startCap val endBytePic = if (((indicatorOffset0b.toInt + n) % 2) =#= 0) "──" + endCap else endCap val startRoundUp2 = (indicatorOffset0b.toInt + 2) - (indicatorOffset0b.toInt + 2) % 2 val endRoundDown2 = (indicatorOffset0b.toInt + n - 1) - (indicatorOffset0b.toInt + n - 1) % 2 val middleBytes = (endRoundDown2 - startRoundUp2) / 2 val middleBytePics = Seq.fill(middleBytes.toInt)("────") val bytePix = startBytePic +: middleBytePics :+ endBytePic val pic = bytePix.mkString("─") // for the single space gutters between pic } } val pictureOnly = initialHexSpaces + picture val endPadLength = hexHeaderLength - pictureOnly.length val endPad = " " * endPadLength val finalPicture = pictureOnly + endPad finalPicture } val textIndicator = { val initialTextSpaces = " " * textByteWidth * indicatorOffset0b val picture = (pictureLengthInBytes, textByteWidth) match { case (0, _) => "│" case (1, 1) => "║" case (1, 2) => "├┤" case (n, w) => { val pad = if (w =#= 1) "" else "─" val startCap = "├" + pad val endCap = if (realLengthInBytes > n) pad + "═" else pad + "┤" val middleBytePics = 1 to (pictureLengthInBytes - 2) map { _ => "─" + pad } val allPix = startCap +: middleBytePics :+ endCap val pic = allPix.mkString pic } } val finalPicture = initialTextSpaces + picture finalPicture } val initialSpaces = " " * addressHeaderLength val line = initialSpaces + hexIndicator + " " + textIndicator line } } /** * Some characters act as combining marks and modify characters surrounding them. * In order for us to display these characters as they are, we need to combine them with * the appropriate number of spaces so they don't disturb other characters around them */ private def homogenizeChars(codepoint: Int): (String, Int) = { val charType = UCharacter.getType(codepoint) val nCols = charNColumns(codepoint) // supposed to always modify preceding character, // but sometimes appears to modify succeeding character // e.g \u093f or \u064d. Dual spacing is here to protect from this charType match { case UCharacterEnums.ECharacterCategory.COMBINING_SPACING_MARK => { // can occupy spacing position by themselves (" " + Character.toChars(codepoint).mkString + " ", nCols + 2) } case UCharacterEnums.ECharacterCategory.NON_SPACING_MARK => { // do not occupy spacing position by themselves // (so 1 space should be consumed, leaving an extra 1 for padding) (" " + Character.toChars(codepoint).mkString + " ", nCols + 1) } case _ => { ("" + Character.toChars(codepoint).mkString, nCols) } } } /** * The width of the character in terms of how many "places" it uses up * relative to a regular monospaced font character. This is for trying to get * east asian and other double-wide characters to line up properly in columns. */ private def charNColumns(codepoint: Int): Int = { val charWidth = UCharacter.getIntPropertyValue(codepoint, UProperty.EAST_ASIAN_WIDTH) charWidth match { // // see http://unicode.org/reports/tr11/tr11-8.html // case UCharacter.EastAsianWidth.AMBIGUOUS => 1 case UCharacter.EastAsianWidth.FULLWIDTH => 2 case UCharacter.EastAsianWidth.HALFWIDTH => 1 case UCharacter.EastAsianWidth.NARROW => 1 case UCharacter.EastAsianWidth.WIDE => 2 case UCharacter.EastAsianWidth.NEUTRAL => 1 } } private def getReportingDecoder(optEncodingName: Option[String]): Option[JavaCharsetDecoder] = { val cs = optEncodingName.map { JavaCharset.forName(_) } lazy val decoder = cs.map { _.newDecoder() } decoder } /** * Decoder must be setup for REPORT (default) on decode error. * We will manually handle the replacing */ private def convertToCharRepr( startingBytePos0b: Long, endingBytePos0b: Long, bs: ByteSource, decoder: Option[JavaCharsetDecoder]): (String, Int, Int) = { Assert.invariant(decoder.map { d => Misc.isAsciiBased(d.charset()) }.getOrElse(true)) decoder match { case Some(dec) => { val bb = ByteBuffer.allocate(6) var cb = CharBuffer.allocate(1) var cr = CoderResult.OVERFLOW var nConsumedBytes = 0 var remapped = "" var nCols = 0 val INVALID_CODEPOINT = -1 val lastAvailableBytePos0b = scala.math.min(endingBytePos0b, startingBytePos0b + 5) // widest possible char representation is 6 bytes. val nBytes = (lastAvailableBytePos0b - startingBytePos0b).toInt + 1 Assert.invariant(nBytes > 0) // have to have at least 1 byte left 0 until nBytes foreach { i => val thePos = (startingBytePos0b + i).toInt Assert.invariant(thePos >= 0) val theByte = try { bs.get(thePos) } catch { case e: IndexOutOfBoundsException => 0.toByte } bb.put(theByte) } bb.flip() Assert.invariant(bb.remaining > 0) do { // An overflow means we were able to start to decode at least 1 sequence of characters, but there was either insufficient // space in the output buffer to store said decoded char or there were left over bytes after parsing. If it is // the former, we can proceed and we'll get the left over bytes on the next run, if it was the latter // (as can be the case with decoding a 4 byte character sequence), we will call decode with a larger buffer // until we consume something or the output buffer is at same capacity as input buffer cr = dec.decode(bb, cb, true) nConsumedBytes = bb.position() if (cr.isOverflow && nConsumedBytes == 0) { cb = CharBuffer.allocate(cb.capacity + 1) } } while (cr.isOverflow && nConsumedBytes == 0 && cb.capacity <= bb.capacity) // Once we leave the loop, we will either have consumed bytes to process (with a variety of left over bytes that we // don't care about) or malformed/unmappable results with no consumed bytes that we do care about so we will do a // manual replace and set consumed bytes ourselves. We should not do an automatic replace as it creates ambiguity // with the malformed/unmapped/consumed bytes with our current implementation of handling a decoded character at a time. // We should never have an underflow condition with no bytes consumed. As that would indicate it needs more input than // we've provided. Even if we only provide 1 byte of a 4 byte sequence, it will return a malformed[1] Assert.invariant(!(cr.isUnderflow && nConsumedBytes == 0)) if ((cr.isMalformed || cr.isUnmappable) && nConsumedBytes == 0) { //do manual replacement remapped = dec.replacement() // grab malformed/unmappable byte so we can keep decoding nConsumedBytes = cr.length nCols = charNColumns(remapped(0)) } else { // An overflow, at this point, means that we got our one character, but there were more bytes available that could // be decoded. We're not interested in those right now. // // An underflow means that we got our one character, but the bytes were exactly used up // by constructing that one character. // // Either way, we got our one character Assert.invariant(nConsumedBytes > 0) Assert.invariant(cb.hasArray) val allChars = cb.array val uCodePoint = if (allChars.length > 1) { if (UCharacter.isSurrogatePair(allChars(0), allChars(1))) { UCharacter.getCodePoint(allChars(0), allChars(1)) } else { INVALID_CODEPOINT } } else allChars(0) val (r: String, n: Int) = if (allChars.length > 1) { if (uCodePoint == INVALID_CODEPOINT) { allChars.map(c => homogenizeChars(c)).foldLeft(("", 0)) { (accForRemappedAndNcols, tupResultRemappedAndNcols) => (accForRemappedAndNcols._1 + tupResultRemappedAndNcols._1, //concat remapped value for each char accForRemappedAndNcols._2 + tupResultRemappedAndNcols._2) // add width value for each char } } else { homogenizeChars(uCodePoint) } } else { homogenizeChars(Misc.remapCodepointToVisibleGlyph(allChars(0))) } remapped = r nCols = n } (remapped, nConsumedBytes, nCols) } case None => { // no encoding, so use the general one based on windows-1252 where // every byte corresponds to a character with a glyph. val byteValue = try { bs.get(startingBytePos0b.toInt) } catch { case e: IndexOutOfBoundsException => 0.toByte } // decoding using a decoder might produce C0 or C1 control characters or // other whitespace characters. But we want visible glyphs no matter what for those. // // FIXME: This will be really broken for EBCDIC-based encodings. Pass the encoding // so that the glyph routine can be ascii/ebcdic sensitive. val remapped = Misc.remapByteToVisibleGlyph(byteValue) (remapped.toChar.toString, 1, 1) } } } /** * If displaying ONLY text, then we just display one long line * and replace any whitespace or non-glyph characters with glyph characters. */ def dumpTextLine(maxLineLen: Int, startByteAddress0b: Long, lengthInBytesRequested: Int, byteSource: ByteSource, optEncodingName: Option[String] = None, indicatorInfoInBytes: Option[(Long, Int)] = None): Seq[String] = { Assert.usage(startByteAddress0b >= 0) Assert.usage(lengthInBytesRequested >= 0) val lengthInBytes = math.min(lengthInBytesRequested, maxLineLen) val indicatorLine = indicatorInfoInBytes.map { case (indicatorStartByteAddress0b, indicatorLengthInBytes) => { Assert.usage(indicatorStartByteAddress0b >= 0) Assert.usage(indicatorLengthInBytes >= 0) val numLeadingSpaces = (indicatorStartByteAddress0b - startByteAddress0b).toInt Assert.invariant(numLeadingSpaces >= 0) val leadingSpaces = " " * numLeadingSpaces val maxIndicatorLength = math.min(maxLineLen - numLeadingSpaces, lengthInBytes) val realIndicatorLength = math.min(indicatorLengthInBytes, maxIndicatorLength) val maxLineLength = math.min(maxLineLen, lengthInBytes) val indicatorEndLength = realIndicatorLength + numLeadingSpaces val indicator = realIndicatorLength match { case 0 => "│" case 1 => "║" case n => { Assert.invariant(n >= 2) val nDashes = (n - 2).toInt val closeOrOpenEnd = if (lengthInBytesRequested <= maxLineLength) { // the number of characters displayed will be shorter than // the max width if (indicatorEndLength <= lengthInBytesRequested) "┤" // indicator ends at or before the data else "═" // indicator indicates past the end. This shouldn't really happen. } else { // the number of characters displayed will meet the maximum if (indicatorEndLength < maxLineLength) "┤" else "═" } val picture = "├" + ("─" * nDashes) + closeOrOpenEnd picture } } leadingSpaces + indicator } } val endByteAddress0b = math.max(startByteAddress0b + lengthInBytes - 1, 0) // val cs = optEncodingName.map { Charset.forName(_) } val decoder = getReportingDecoder(optEncodingName) var i = startByteAddress0b val sb = new StringBuilder while (i <= endByteAddress0b) { val (cR, nBytesConsumed, _) = convertToCharRepr(i - startByteAddress0b, endByteAddress0b, byteSource, decoder) sb ++= cR i += nBytesConsumed } val s = sb.mkString val lines: Seq[String] = indicatorLine.toSeq :+ s lines } /** * gets header line, width of a character, and encoding name to actually use */ private def getTextParameters(optEncodingName: Option[String]): (String, Int, Option[String]) = { // // this def and subsequent match-case are done this way to silence // a scala compiler warning // def unicode = ("0~1~2~3~4~5~6~7~8~9~a~b~c~d~e~f~", 2, optEncodingName) optEncodingName.map { _.toLowerCase } match { case Some("utf-8") => unicode case Some("utf-16be") | Some("utf-16le") => unicode case Some("utf-32be") | Some("utf-32le") => unicode case None | Some("ascii") | Some("us-ascii") | Some("iso-8859-1") => ("0123456789abcdef", 1, optEncodingName) case Some("utf-32") | Some("utf-16") => unicode case Some(x) => { // Don't know how to dump this text specific to this encoding // so proceed but without encoding information ("0123456789abcdef", 1, None) } } } /** * Create a right-to-left presentation of the kind used for LSB-first * little-endian data */ private[io] def dumpHexAndTextBytesLSBFirst(startByteAddress0b: Long, lengthInBytes: Int, byteSource: ByteSource, includeHeadingLine: Boolean = true, optEncodingName: Option[String] = None): Seq[String] = { val ltrDump = dumpHexAndTextBytes(startByteAddress0b, lengthInBytes, byteSource, includeHeadingLine, optEncodingName, None) val ltrLines = ltrDump.filterNot { _.length() == 0 } val wholeLineRegex = """([0-9a-fA-F]{8})(:?\s+)([0-9a-fA-F ]+[0-9a-fA-F])(\s+)(.*)""".r val rtlLines = ltrLines.map { ltrLine => ltrLine match { case wholeLineRegex(addr, sep1, hexlBytes, sep2, asciiText) => { val hexlNibblesSwitch = hexlBytes .split(" ").map { hexlGroup => hexlGroup .sliding(2, 2) // grab each incorrectly reversed (nibbles are switched) byte .map(_.reverse) // reverse the byte .mkString // and convert back to string }.mkString(" ") //convert back to string asciiText.reverse + sep2 + hexlNibblesSwitch.reverse + sep1.reverse + addr } case x => x } } val rtlDump = rtlLines rtlDump } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy