All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lame.Gunzip.scala Maven / Gradle / Ivy

The newest version!
package lame

import akka.util.ByteString
import akka.NotUsed
import akka.stream.scaladsl.Flow
import scala.annotation.tailrec
import java.util.zip.{CRC32, Inflater, ZipException}
import scala.collection.mutable

object Gunzip {

  case class Configuration(
      maxBytesPerChunk: Int,
      expectedCompressionRatio: Double
  )

  object GunzipState {
    sealed trait Phase
    case object ReadHeader extends Phase
    case object Inflate extends Phase
    case object ReadTrailer extends Phase

    def apply(
        maxBytesPerChunk: Int,
        expectedCompressionRatio: Double
    ): GunzipState =
      GunzipState(
        dataHead = ByteString.empty,
        dataTail = ByteString.empty,
        phase = ReadHeader,
        emit = mutable.ArrayBuffer.empty,
        inflater = new Inflater(true),
        crc32 = new CRC32,
        config = Configuration(maxBytesPerChunk, expectedCompressionRatio)
      )

  }

  case class GunzipState(
      dataHead: ByteString,
      dataTail: ByteString,
      phase: GunzipState.Phase,
      emit: mutable.ArrayBuffer[ByteString],
      inflater: Inflater,
      crc32: CRC32,
      config: Configuration
  ) {

    override def toString =
      s"GunzipState($dataHead, $dataTail, $phase, ${emit.size}, $inflater, $crc32, $config)"

    def update(bs: ByteString): GunzipState = {
      copy(dataTail = dataTail ++ bs)
    }

    def inputIsEmpty = dataHead.isEmpty && dataTail.isEmpty

  }

  private def fail(msg: String) = throw new ZipException(msg)
  private def crc16(data: ByteString) = {
    val crc = new CRC32
    data.asByteBuffers.foreach(crc.update)
    crc.getValue.toInt & 0xFFFF
  }

  @tailrec
  def cyclePhases(currentState: GunzipState): GunzipState = {
    if (currentState.inputIsEmpty) currentState
    else
      currentState.phase match {
        case GunzipState.ReadHeader =>
          cyclePhases(readHeader(currentState))
        case GunzipState.Inflate =>
          if (currentState.emit.nonEmpty) currentState
          else cyclePhases(inflate(currentState))
        case GunzipState.ReadTrailer =>
          cyclePhases(readTrailer(currentState))
      }
  }

  def inflate(currentState: GunzipState) = {
    import currentState.config.{maxBytesPerChunk, expectedCompressionRatio}
    val (reader, tail) = {
      if (currentState.dataHead.nonEmpty)
        (new ByteReader(currentState.dataHead), currentState.dataTail)
      else {
        val effectiveExpectedCompressionRatio =
          math.max(1 / 1032d, math.min(1.0, expectedCompressionRatio))
        val headSize = math.max(
          1,
          (effectiveExpectedCompressionRatio * maxBytesPerChunk).toInt
        )
        val (head, tail) = currentState.dataTail.splitAt(headSize)
        (new ByteReader(head), tail)
      }
    }

    import currentState.inflater
    import currentState.crc32
    /* The following lines feeding the inflater and crc32 bear the copyright of:
     * Copyright (C) 2009-2019 Lightbend Inc. 
     */
    inflater.setInput(reader.remainingData.toArray)

    val outputBuffer = new Array[Byte](maxBytesPerChunk)
    val read = inflater.inflate(outputBuffer)
    crc32.update(outputBuffer, 0, read)

    reader.skip(reader.remainingSize - inflater.getRemaining)

    currentState.emit.append(ByteString.fromArray(outputBuffer, 0, read))
    /* Lightbend copyright end */
    if (inflater.finished)
      GunzipState(
        dataHead = reader.remainingData,
        dataTail = tail,
        phase = GunzipState.ReadTrailer,
        emit = currentState.emit,
        inflater = inflater,
        crc32 = crc32,
        config = currentState.config
      )
    else
      GunzipState(
        dataHead = reader.remainingData,
        dataTail = tail,
        phase = GunzipState.Inflate,
        emit = currentState.emit,
        inflater = inflater,
        crc32 = crc32,
        config = currentState.config
      )

  }

  def readTrailer(currentState: GunzipState) = {
    val reader = new ByteReader(currentState.dataHead ++ currentState.dataTail)
    import reader._
    try {
      /* The following lines parsing the gzip trailer bear the copyright of:
       * Copyright (C) 2009-2019 Lightbend Inc. 
       */
      if (readIntLE() != currentState.crc32.getValue.toInt)
        fail("Corrupt data (CRC32 checksum error)")
      if (readIntLE() != currentState.inflater.getBytesWritten.toInt /* truncated to 32bit */ )
        fail("Corrupt GZIP trailer ISIZE")

      currentState.inflater.reset()
      currentState.crc32.reset()
      /* Lightbend copyright end */
      GunzipState(
        dataHead = ByteString.empty,
        dataTail = reader.remainingData,
        phase = GunzipState.ReadHeader,
        emit = currentState.emit,
        inflater = currentState.inflater,
        crc32 = currentState.crc32,
        config = currentState.config
      )
    } catch {
      case ByteReader.NeedMoreData => currentState
    }

  }

  def readHeader(currentState: GunzipState) = {
    val reader = new ByteReader(currentState.dataHead ++ currentState.dataTail)
    import reader._
    try {
      /* The following lines parsing the gzip header bear the copyright of:
       * Copyright (C) 2009-2019 Lightbend Inc. 
       */
      if (readByte() != 0x1F || readByte() != 0x8B) fail("Not in GZIP format") // check magic header
      if (readByte() != 8) fail("Unsupported GZIP compression method") // check compression method
      val flags = readByte()
      skip(6) // skip MTIME, XFL and OS fields
      if ((flags & 4) > 0) skip(readShortLE()) // skip optional extra fields
      if ((flags & 8) > 0) skipZeroTerminatedString() // skip optional file name
      if ((flags & 16) > 0) skipZeroTerminatedString() // skip optional file comment
      if ((flags & 2) > 0 && crc16(fromStartToHere) != readShortLE())
        fail("Corrupt GZIP header")
      /* Lightbend copyright end */
      GunzipState(
        dataHead = ByteString.empty,
        dataTail = reader.remainingData,
        phase = GunzipState.Inflate,
        emit = currentState.emit,
        inflater = currentState.inflater,
        crc32 = currentState.crc32,
        config = currentState.config
      )
    } catch {
      case ByteReader.NeedMoreData => currentState
    }

  }
  import akka.stream.stage._
  import akka.stream._
  class GunzipStage(
      maxBytesPerChunk: Int = 65536,
      expectedCompressionRatio: Double = 1.0,
      inflater: Option[() => Inflater] = None
  ) extends GraphStage[FlowShape[ByteString, ByteString]] {

    val in = Inlet[ByteString]("lame.gunzip.in")
    val out = Outlet[ByteString]("lame.gunzip.out")

    val shape = FlowShape.of(in, out)

    override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
      new GraphStageLogic(shape) {

        var state = GunzipState(
          maxBytesPerChunk = maxBytesPerChunk,
          expectedCompressionRatio = expectedCompressionRatio
        )

        if (inflater.isDefined) {
          state = state.copy(inflater = inflater.get.apply())
        }

        override def postStop(): Unit = state.inflater.end()

        setHandler(
          in,
          new InHandler {
            override def onPush(): Unit = {
              val byteString = grab(in)
              state = state.update(byteString)
              state = cyclePhases(state)
              if (isAvailable(out) && state.emit.nonEmpty) {
                val emit = state.emit.toList
                state.emit.clear()
                emitMultiple(out, emit)
              } else {
                if (state.emit.isEmpty) {
                  pull(in)
                }
              }
            }

            override def onUpstreamFinish(): Unit = {
              if (state.inputIsEmpty && state.emit.isEmpty) {
                complete(out)
              }
            }

          }
        )

        setHandler(
          out,
          new OutHandler {
            override def onPull(): Unit = {
              if (state.inputIsEmpty) {
                pull(in)
              }
              state = cyclePhases(state)
              if (state.emit.nonEmpty) {
                val emit = state.emit.toList
                state.emit.clear()
                emitMultiple(out, emit)
              }
              if (isClosed(in) && state.inputIsEmpty && state.emit.isEmpty) {
                complete(out)
              }

            }
          }
        )
      }
  }

  def apply(
      maxBytesPerChunk: Int = 65536,
      expectedCompressionRatio: Double = 1.0,
      customInflater: Option[() => Inflater] = None
  ): Flow[ByteString, ByteString, NotUsed] =
    Flow[ByteString].via(
      new GunzipStage(
        maxBytesPerChunk,
        expectedCompressionRatio,
        customInflater
      )
    )

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy