All Downloads are FREE. Search and download functionalities are using the official Maven repository.

packer.Reader.scala Maven / Gradle / Ivy

// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.mothra.packer

import org.cert.netsa.io.ipfix.{
  InfoModel, Record, RecordReader, SessionGroup, StreamMessageReader}

import com.typesafe.scalalogging.StrictLogging
import java.nio.channels.{Channels, ReadableByteChannel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, Path => HPath}
import org.apache.hadoop.io.compress.{CodecPool,
  CompressionCodecFactory, Decompressor}
import scala.util.{Failure, Try}

/**
  * A class that wraps opening a file for reading: opening the file,
  * enabling compression, and creating a org.cert.netsa.io.RecordReader.
  *
  * @param path The file to read
  * @param codecFactory The compression factory used to read a compressed file
  * @param infoModel The information model to use while reading
  * @param hadoopConf The hadoop configuration
  */
private[mothra] case class Reader(
  path: HPath,
  codecFactory: CompressionCodecFactory)(
  implicit
    infoModel: InfoModel,
    hadoopConf: Configuration)
    extends Iterator[Record]
    with StrictLogging
{
  // create a SessionGroup for this input
  private[this] val inGroup = SessionGroup(infoModel, path)

  //logger.trace(
  //  s"Reading records from '${path.getName}' in ${path.getParent}/")
  //
  // get a decompressor for the source file 'path'

  /** The Decompressor as an Option. */
  private[this] var decompr: Option[Decompressor] = None

  /** The raw input stream. */
  private[this] var rawDataInStream: FSDataInputStream = _

  /** The input stream as a channel. */
  private[this] var channel: ReadableByteChannel = _

  /** The reader */
  private[this] var reader: RecordReader = _

  open()


  def hasNext: Boolean = reader.hasNext

  def next(): Record = reader.next()

  /** Opens the file, sets up decompression, and creates a reader.  Called
    * automatically during contruction. */
  private[this] def open(): Unit = {
    val fileSystem = path.getFileSystem(hadoopConf)

    // get the compression codec based on the pathname
    val codec = Try {
      Option(codecFactory.getCodec(path))
    }.getOrElse(None)
    // get a decompressor
    Try {
      decompr = codec.map {c => CodecPool.getDecompressor(c)}
    } match {
      case Failure(e) =>
        logger.error(s"Failed to get decompressor" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
        throw e
      case _ =>
    }
    // open the file
    Try {
      rawDataInStream = fileSystem.open(path)
    } match {
      case Failure(e) =>
        logger.error(s"Failed to open raw input" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
        close()
        throw e
      case _ =>
    }
    // create a channel, enabling the decompressor
    Try {
      channel = codec match {
        case None => Channels.newChannel(rawDataInStream)
        case Some(c) => Channels.newChannel(
          c.createInputStream(rawDataInStream, decompr.get))
      }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to open channel" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
        close()
        throw e
      case _ =>
    }
    // create the reader
    Try {
      reader = RecordReader(StreamMessageReader(channel, inGroup))
    } match {
      case Failure(e) =>
        logger.error(s"Failed to create record reader" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
        close()
        throw e
      case _ =>
    }
  }

  /**
    * Close the reader and attempt to release all resources.  This should
    * never error.
    */
  def close(): Unit = {
    reader = null
    Try {
      for (c <- Option(channel)) {
        c.close()
        channel = null
        rawDataInStream = null
      }
    } match {
      case Failure(e) =>
        logger.warn(s"Failed to close channel" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
      case _ =>
    }
    Try {
      for (s <- Option(rawDataInStream)) {
        s.close()
        rawDataInStream = null
      }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to close raw input stream" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
      case _ =>
    }
    Try {
      decompr.foreach {d => CodecPool.returnDecompressor(d) }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to return compressor to CodecPool" +
          s" for '${path.getName}' in ${path.getParent}: ${e}")
      case _ =>
    }
  }
}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143




© 2015 - 2024 Weber Informatics LLC | Privacy Policy