org.apache.daffodil.unparsers.runtime1.SpecifiedLengthUnparsers.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of daffodil-runtime1-unparser_2.12
daffodil-runtime1-unparser
There is a newer version: 3.8.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.daffodil.unparsers.runtime1

import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
import org.apache.daffodil.lib.schema.annotation.props.gen.Representation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeJULong
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.infoset.DIElement
import org.apache.daffodil.runtime1.infoset.DISimple
import org.apache.daffodil.runtime1.infoset.Infoset
import org.apache.daffodil.runtime1.infoset.RetryableException
import org.apache.daffodil.runtime1.processors.CharsetEv
import org.apache.daffodil.runtime1.processors.ElementRuntimeData
import org.apache.daffodil.runtime1.processors.Evaluatable
import org.apache.daffodil.runtime1.processors.ParseOrUnparseState
import org.apache.daffodil.runtime1.processors.Success
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv
import org.apache.daffodil.runtime1.processors.unparsers._

import passera.unsigned.ULong

/**
 * Restricts the bits available for unparsing to just those within
 * the specified length computed.
 *
 * If a unparser (supplied as arg) runs past the available space,
 * that's an unparse error.
 *
 * Truncation of strings - the only case where we truncate, and only when
 * dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere.
 */
sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd: ElementRuntimeData)

final class SpecifiedLengthExplicitImplicitUnparser(
  eUnparser: Unparser,
  erd: ElementRuntimeData,
  targetLengthInBitsEv: UnparseTargetLengthInBitsEv,
  maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv]
) extends CombinatorUnparser(erd) {

  override lazy val runtimeDependencies = Vector()

  override lazy val childProcessors = Vector(eUnparser)

  private val libEv = targetLengthInBitsEv.lengthInBitsEv
  private val mcsEv = libEv.maybeCharsetEv
  private val lengthUnits = libEv.lengthUnits
  private val lengthKind = libEv.lengthKind

  private def getCharset(state: UState) = {
    val csEv: CharsetEv = mcsEv.get
    val dcs = csEv.evaluate(state)
    dcs
  }

  override final def unparse(state: UState): Unit = {

    erd.impliedRepresentation match {
      case Representation.Binary =>
        unparseBits(state)
      case Representation.Text => {
        val dcs = getCharset(state)
        if (dcs.maybeFixedWidth.isDefined)
          unparseBits(state)
        else {
          // we know the encoding is variable width characters
          // but we don't know if the length units characters or bits/bytes.
          lengthUnits match {
            case LengthUnits.Bits | LengthUnits.Bytes =>
              unparseVarWidthCharactersInBits(state)
            case LengthUnits.Characters =>
              unparseVarWidthCharactersInCharacters(state)
          }
        }
      }
    }
  }

  /**
   * Encoding is variable width (e.g., utf-8, but the
   * target length is expressed in bits.
   *
   * Truncation, in this case, requires determining how many
   * of the string's characters fit within the available target length
   * bits.
   */
  def unparseVarWidthCharactersInBits(state: UState): Unit = {
    val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)

    if (maybeTLBits.isDefined) {
      //
      // We know the target length. We can use it.
      //
      if (areTruncating) {
        val diSimple = state.currentInfosetNode.asSimple
        val v = diSimple.dataValue.getString
        val tl = maybeTLBits.get
        val cs = getCharset(state)
        val newV = state.truncateToBits(v, cs, tl)
        //
        // JIRA DFDL-1592
        //
        // BUG: should not modify the actual dataset value.
        // as fn:string-length of the value should always return the same
        // value which is the un-truncated length.
        //
        diSimple.overwriteDataValue(newV)
      }
      eUnparser.unparse1(state)
    } else {
      // target length unknown
      // ignore constraining the output length. Just unparse it.
      //
      // This happens when we're unparsing, and this element depends on a prior element for
      // determining its length, but that prior element has dfdl:outputValueCalc that depends
      // on this element.
      // This breaks the chicken-egg cycle.
      //
      eUnparser.unparse1(state)
    }
  }

  private def areTruncating = {
    if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) {
      Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined)
      erd.optTruncateSpecifiedLengthString.get
    } else
      false
  }

  /**
   * Encoding is variable width (e.g., utf-8). The target
   * length is expressed in characters.
   */
  def unparseVarWidthCharactersInCharacters(state: UState): Unit = {

    //
    // variable-width encodings and lengthUnits characters, and lengthKind explicit
    // is not supported (currently) for complex types
    //
    state.schemaDefinitionUnless(
      erd.isSimpleType,
      "Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
      getCharset(state).name,
      lengthKind.toString,
      lengthUnits.toString
    )

    Assert.invariant(erd.isSimpleType)
    Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined)
    val tlEv = this.maybeTargetLengthInCharactersEv.get
    val tlChars = this.getMaybeTL(state, tlEv)

    if (tlChars.isDefined) {
      //
      // possibly truncate
      //
      if (areTruncating) {
        val diSimple = state.currentInfosetNode.asSimple
        val v = diSimple.dataValue.getString
        val tl = tlChars.get
        if (v.length > tl) {
          // string is too long, truncate to target length
          val newV = v.substring(0, tl.toInt)
          //
          // BUG: JIRA DFDL-1592 - should not be overwriting the value with
          // truncated value.
          //
          diSimple.overwriteDataValue(newV)
        }
      }
      eUnparser.unparse1(state, erd)
    } else {
      // target length unknown
      // ignore constraining the output length. Just unparse it.
      //
      // This happens when we're unparsing, and this element depends on a prior element for
      // determining its length, but that prior element has dfdl:outputValueCalc that depends
      // on this element.
      // This breaks the chicken-egg cycle.
      //
      eUnparser.unparse1(state, erd)
    }
  }

  private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]): MaybeJULong = {
    val maybeTLBits =
      try {
        val tlRes = TLEv.evaluate(state)
        Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this method at all
        tlRes
      } catch {
        case e: RetryableException => {
          //
          // TargetLength expression couldn't be evaluated.
          //
          MaybeJULong.Nope
        }
      }
    maybeTLBits
  }

  /**
   * Regardless of the type (text or binary), the target length
   * will be provided in bits.
   */
  def unparseBits(state: UState): Unit = {
    val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)

    if (maybeTLBits.isDefined) {
      //
      // We know the target length. We can use it.
      //
      //      val nBits = maybeTLBits.get
      //      val dos = state.dataOutputStream

      //
      // withBitLengthLimit is incorrect. It doesn't take into account
      // that after the unparse, we could be looking at a current state
      // with a distinct DOS
      //
      //      val isLimitOk = dos.withBitLengthLimit(nBits) {
      //        eUnparser.unparse1(state, erd)
      //      }
      //
      //      if (!isLimitOk) {
      //        val availBits = if (dos.remainingBits.isDefined) dos.remainingBits.get.toString else "(unknown)"
      //        UE(state, "Insufficient bits available. Required %s bits, but only %s were available.", nBits, availBits)
      //      }

      eUnparser.unparse1(state, erd)

      // at this point the recursive parse of the children is finished

      if (state.processorStatus ne Success) return

      // We might not have used up all the bits. So some bits may need to
      // be skipped and filled in by fillbyte.
      //
      // In the DFDL data grammar the region being skipped is either the
      // RightFill region, or the ElementUnused region. Skipping this is handled
      // elsewhere, along with insertion of padding before/after a string.
      //

    } else {
      //
      // we couldn't get the target length
      //
      // This happens when we're unparsing, and this element depends on a prior element for
      // determining its length via a length expression, but that prior element
      // has dfdl:outputValueCalc that depends on this element, typically by a
      // call to dfdl:valueLength of this, or of some structure that includes
      // this.
      //
      // This breaks the chicken-egg cycle, by just unparsing it without
      // constraint. That produces the value which (ignoring truncation)
      // can be unparsed to produce the dfdl:valueLength of this element.
      //
      // This does assume that the value will get truncated properly for the
      // case where we do truncation (type string, with
      // dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism.
      //
      eUnparser.unparse1(state, erd)
    }
  }
}

// TODO: implement the capture length unparsers as just using this trait?
trait CaptureUnparsingValueLength {

  def captureValueLengthStart(state: UState, elem: DIElement): Unit = {
    val dos = state.dataOutputStream
    if (dos.maybeAbsBitPos0b.isDefined) {
      elem.valueLength.setAbsStartPos0bInBits(dos.maybeAbsBitPos0b.getULong)
    } else {
      elem.valueLength.setRelStartPos0bInBits(dos.relBitPos0b, dos)
    }
  }

  def captureValueLengthEnd(state: UState, elem: DIElement): Unit = {
    val dos = state.dataOutputStream
    if (dos.maybeAbsBitPos0b.isDefined) {
      elem.valueLength.setAbsEndPos0bInBits(dos.maybeAbsBitPos0b.getULong)
    } else {
      elem.valueLength.setRelEndPos0bInBits(dos.relBitPos0b, dos)
    }
  }
}

/**
 * This trait is to be used with prefixed length unparsers where the length is
 * known without needing to unparse the data. This means there is either a
 * fixed length (like in the case of some binary numbers), or the length can be
 * determined completly be inspecting the infoset data (like in the case of
 * packed decimals). The length calculation performed in the getBitLength
 * function, which returns the length of the data in bits.
 */
trait KnownPrefixedLengthUnparserMixin {
  def prefixedLengthERD: ElementRuntimeData
  def prefixedLengthUnparser: Unparser
  def lengthUnits: LengthUnits
  def getBitLength(s: ParseOrUnparseState): Int
  def prefixedLengthAdjustmentInUnits: Long

  def unparsePrefixedLength(state: UState): Unit = {
    val bits = getBitLength(state)
    val lenInUnits =
      if (lengthUnits == LengthUnits.Bytes) {
        bits >> 3
      } else {
        bits
      }
    val adjustedLenInUnits = lenInUnits + prefixedLengthAdjustmentInUnits
    // Create a "detached" DIDocument with a single child element that the
    // prefix length will be unparsed from. This creates a completely new
    // infoset and unparses from that, so care is taken to ensure this infoset
    // is only used for the prefix length unparsing and is removed afterwards
    val plElement = Infoset.newDetachedElement(state, prefixedLengthERD).asInstanceOf[DISimple]

    plElement.setDataValue(java.lang.Integer.valueOf(adjustedLenInUnits.toInt))

    // unparse the prefixed length element
    state.currentInfosetNodeStack.push(One(plElement))
    prefixedLengthUnparser.unparse1(state)
    state.currentInfosetNodeStack.pop
  }
}

/**
 * This trait is to be used with prefixed length unparsers where the length
 * must be calculated based on the value length of the data. This means the
 * data must be unparsed, the value length calculated, and that value will be
 * assigned to the prefix length element.
 */
trait CalculatedPrefixedLengthUnparserMixin {
  def lengthUnits: LengthUnits
  def prefixedLengthAdjustmentInUnits: Long

  /**
   * Gets the length of a provided element and stores the unit adjusted length
   * as the value in the element that represent its prefix length
   *
   * @param elem The element who's length to get
   * @param plElem The element to store the length
   * @param lengthUnits The length units (bytes or bits)
   */
  def assignPrefixLength(state: UState, elem: DIElement, plElem: DISimple): Unit = {
    val lenInUnits = lengthUnits match {
      case LengthUnits.Bits => elem.valueLength.lengthInBits
      case LengthUnits.Bytes => elem.valueLength.lengthInBytes
      case LengthUnits.Characters => {
        val maybeFixedWidth =
          elem.erd.encInfo.getEncoderInfo(state).coder.bitsCharset.maybeFixedWidth
        val lengthInChars =
          if (maybeFixedWidth.isDefined) {
            val fixedWidth = maybeFixedWidth.get
            Assert.invariant((elem.valueLength.lengthInBits % fixedWidth) == 0) // divisible
            elem.valueLength.lengthInBits / fixedWidth
          } else {
            // This is checked for statically, so should not get here.
            // $COVERAGE-OFF$
            Assert.invariantFailed(
              "Not supported: prefixed length with variable-width or non-constant encoding."
            )
            // $COVERAGE-ON$
          }
        ULong(lengthInChars)
      }
    }
    val adjustedLenInUnits = lenInUnits + prefixedLengthAdjustmentInUnits
    plElem.setDataValue(java.lang.Integer.valueOf(adjustedLenInUnits.toInt))
  }
}

class SpecifiedLengthPrefixedUnparser(
  eUnparser: Unparser,
  erd: ElementRuntimeData,
  prefixedLengthUnparser: Unparser,
  prefixedLengthERD: ElementRuntimeData,
  override val lengthUnits: LengthUnits,
  override val prefixedLengthAdjustmentInUnits: Long
) extends CombinatorUnparser(erd)
  with CaptureUnparsingValueLength
  with CalculatedPrefixedLengthUnparserMixin {

  override lazy val runtimeDependencies = Vector()

  override lazy val childProcessors = Vector(prefixedLengthUnparser, eUnparser)

  override def unparse(state: UState): Unit = {
    // Create a "detached" DIDocument with a single child element that the
    // prefix length will be parsed to. This creates a completely new
    // infoset and parses to that, so care is taken to ensure this infoset
    // is only used for the prefix length parsing and is removed afterwards
    val plElem = Infoset.newDetachedElement(state, prefixedLengthERD).asInstanceOf[DISimple]

    // The prefixedLengthUnparser is going to end up creating a suspension
    // because plElem does not have a value yet. We will temporary push the
    // detached element to the stack, the suspension will clone the ustate and
    // keep track of that detached element, then we can pop it off the stack as
    // it's no longer needed.
    state.currentInfosetNodeStack.push(One(plElem))
    prefixedLengthUnparser.unparse1(state)
    state.currentInfosetNodeStack.pop

    // We now need to capture the length of the actual element
    val elem = state.currentInfosetNode.asInstanceOf[DIElement]
    captureValueLengthStart(state, elem)
    eUnparser.unparse1(state)
    captureValueLengthEnd(state, elem)

    if (elem.valueLength.maybeLengthInBits.isDefined) {
      // If we were able to immediately calculate the length of the element,
      // then just set it as the value of the detached element created above so
      // that when the prefixedLengthUnparser suspension resumes it can unparse
      // the value
      assignPrefixLength(state, elem, plElem)
    } else {
      // The length was not able to be calculated, likely because there was a
      // suspension when unparsing the eUnparser. So let's create a new
      // suspension with the only goal to retry until the valueLength of this
      // element is determined. Once determined, it will set the value of the
      // prefix length element, ultimately allowing the prefix length element
      // suspension to resume and unparse the value
      val suspension = new PrefixLengthSuspendableOperation(
        erd,
        elem,
        plElem,
        lengthUnits,
        prefixedLengthAdjustmentInUnits
      )

      // Run the suspension--we know the suspension will not be able to succeed
      // since maybeLengthInBits is not defined, but this performs various
      // actions to suspend the operation and allow it to be run later
      suspension.run(state)
    }
  }

}