packer.Reader.scala Maven / Gradle / Ivy
// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt
package org.cert.netsa.mothra.packer
import org.cert.netsa.io.ipfix.{
InfoModel, Record, RecordReader, SessionGroup, StreamMessageReader}
import com.typesafe.scalalogging.StrictLogging
import java.nio.channels.{Channels, ReadableByteChannel}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, Path => HPath}
import org.apache.hadoop.io.compress.{CodecPool,
CompressionCodecFactory, Decompressor}
import scala.util.{Failure, Try}
/**
* A class that wraps opening a file for reading: opening the file,
* enabling compression, and creating a org.cert.netsa.io.RecordReader.
*
* @param path The file to read
* @param codecFactory The compression factory used to read a compressed file
* @param infoModel The information model to use while reading
* @param hadoopConf The hadoop configuration
*/
private[mothra] case class Reader(
path: HPath,
codecFactory: CompressionCodecFactory)(
implicit
infoModel: InfoModel,
hadoopConf: Configuration)
extends Iterator[Record]
with StrictLogging
{
// create a SessionGroup for this input
private[this] val inGroup = SessionGroup(infoModel, path)
//logger.trace(
// s"Reading records from '${path.getName}' in ${path.getParent}/")
//
// get a decompressor for the source file 'path'
/** The Decompressor as an Option. */
private[this] var decompr: Option[Decompressor] = None
/** The raw input stream. */
private[this] var rawDataInStream: FSDataInputStream = _
/** The input stream as a channel. */
private[this] var channel: ReadableByteChannel = _
/** The reader */
private[this] var reader: RecordReader = _
open()
def hasNext: Boolean = reader.hasNext
def next(): Record = reader.next()
/** Opens the file, sets up decompression, and creates a reader. Called
* automatically during contruction. */
private[this] def open(): Unit = {
val fileSystem = path.getFileSystem(hadoopConf)
// get the compression codec based on the pathname
val codec = Try {
Option(codecFactory.getCodec(path))
}.getOrElse(None)
// get a decompressor
Try {
decompr = codec.map {c => CodecPool.getDecompressor(c)}
} match {
case Failure(e) =>
logger.error(s"Failed to get decompressor" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
throw e
case _ =>
}
// open the file
Try {
rawDataInStream = fileSystem.open(path)
} match {
case Failure(e) =>
logger.error(s"Failed to open raw input" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
close()
throw e
case _ =>
}
// create a channel, enabling the decompressor
Try {
channel = codec match {
case None => Channels.newChannel(rawDataInStream)
case Some(c) => Channels.newChannel(
c.createInputStream(rawDataInStream, decompr.get))
}
} match {
case Failure(e) =>
logger.error(s"Failed to open channel" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
close()
throw e
case _ =>
}
// create the reader
Try {
reader = RecordReader(StreamMessageReader(channel, inGroup))
} match {
case Failure(e) =>
logger.error(s"Failed to create record reader" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
close()
throw e
case _ =>
}
}
/**
* Close the reader and attempt to release all resources. This should
* never error.
*/
def close(): Unit = {
reader = null
Try {
for (c <- Option(channel)) {
c.close()
channel = null
rawDataInStream = null
}
} match {
case Failure(e) =>
logger.warn(s"Failed to close channel" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
case _ =>
}
Try {
for (s <- Option(rawDataInStream)) {
s.close()
rawDataInStream = null
}
} match {
case Failure(e) =>
logger.error(s"Failed to close raw input stream" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
case _ =>
}
Try {
decompr.foreach {d => CodecPool.returnDecompressor(d) }
} match {
case Failure(e) =>
logger.error(s"Failed to return compressor to CodecPool" +
s" for '${path.getName}' in ${path.getParent}: ${e}")
case _ =>
}
}
}
// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143