All Downloads are FREE. Search and download functionalities are using the official Maven repository.

packer.Writer.scala Maven / Gradle / Ivy

// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.mothra.packer

import org.cert.netsa.io.ipfix.{
  ExportStream, InfoModel, Record, SessionGroup, StreamSession}

import com.typesafe.scalalogging.StrictLogging
import java.io.{DataOutputStream}
import java.util.UUID.randomUUID
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataOutputStream, Path => HPath}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.hadoop.io.compress.{
  CodecPool, Compressor, CompressionCodec}
import scala.util.{Failure, Try}

/**
  * A class that wraps opening a file for writing: generating a file name,
  * opening the file, enabling compression, and creating a
  * org.cert.netsa.io.ExportStream.
  *
  * When the `maximumSize` parameter is not None, the Writer installs a
  * callback that watches the size of the file.  When the size exceeds this
  * value, the `reachedMaxSize` function returns `true`.  The caller must
  * check this function periodically to notice when the size is exceeded.  The
  * file's size will exceed the maximum size by 64k or so.  When the paramer
  * is None, `reachedMaxSize` always returns `false`.
  *
  * @param dir The directory in HDFS in which to create the file
  * @param basename The basename of the file to create. A UUID is added, and a
  * compression suffix is appended if needed.
  * @param compressCodec The compression to use for the file.
  * @param maximumSize The approximate maximum size for the file
  * @param infoModel The information model to use while reading
  * @param hadoopConf The hadoop configuration
  */
private[mothra] case class Writer(
  dir: HPath,
  basename: String,
  compressCodec: Option[CompressionCodec],
  maximumSize: Option[Long])(
  implicit
    infoModel: InfoModel,
    hadoopConf: Configuration)
    extends ExportStream.DataWrittenCallback
    with StrictLogging
{
  /** The compression suffix */
  private[this] val compressSuffix =
    compressCodec.map {c => c.getDefaultExtension}.getOrElse("")

  /** The SessionGroup used when exporting records */
  private[this] val exportGroup = SessionGroup(infoModel, hadoopConf)

  /** The Session for the ExportStream */
  protected val exportSession = new StreamSession(exportGroup, 0)

  /** Pre-define all YAF templates in the Session. */
  YafTemplates.addToSession(exportSession)

  /** The Path being written to */
  val exportFile = new HPath(
    dir, s"${basename}${randomUUID().toString}${compressSuffix}")

  /** Write-only permission for this user (--w-------).  There is no octal
    * support in Scala since octal is "horse and buggy" technology according
    * to a comment on stackoverflow, so instead: (0200 == (2 << 6))
    *
    * The Writer sets the permission of each file it creates to write-only
    * immediately after opening it.  Once all existing files for the basename
    * are read, the original permission bits on the new file are restored and
    * the existing files are deleted.
    *
    * This is done to reduce the time window during which a reading process
    * could potentially see duplicate records (a record in both an old file
    * and a new file).  In addition, a sudden crash of the program will not
    * leave duplicate records visible in the repository.
    */
  private[this] val writeOnly = new FsPermission((2 << 6).toShort)

  private[this] var compressor: Option[Compressor] = None

  private[this] var rawDataOutStream: FSDataOutputStream = _

  private[this] var dataOutStream: DataOutputStream = _

  private[this] var _originalPermission: FsPermission = _

  /** The exportStream */
  protected var exportStream: ExportStream = _

  /** The maximum size for this file, as a number. */
  private[this] val maxSize = maximumSize.getOrElse(Long.MaxValue)

  /** whether the maximum size has been reached. */
  @volatile
  private[this] var _reachedMaxSize = false

  open()


  /** Indicates whether the file's size has reached the maximumSize parameter
    * specified when the class was created. */
  def reachedMaxSize: Boolean = _reachedMaxSize

  /** Returns the permissions the file had when it was initially opened, before
    * being set to write-only.  May be None if the implementation does not
    * support getting permission of newly created files (e.g. S3). */
  def originalPermission: Option[FsPermission] = Option(_originalPermission)

  /** Gets the basename of the output file. */
  def getName: String = exportFile.getName

  /** Adds `record` to the output file. */
  def add(record: Record): Unit = {
    exportStream.add(record)
    ()
  }

  /** Gets the current position in the output file.  This should be the byte
    * offset into the file, not the number of octets written, which may
    * differ due to compression.  Also, this does not reflect any IPFIX
    * Message being creating in memory that has not been written. */
  def getPos(): Long = {
    rawDataOutStream.getPos()
  }

  /** Creates and opens the file, enables compression, and makes an export
    * stream.  Called automatically during contruction. */
  private[this] def open(): Unit = {
    logger.trace(
      s"Creating new file '${exportFile.getName}' in ${dir}/")
    val fileSystem = dir.getFileSystem(hadoopConf)
    Try {
      compressor = compressCodec map { c => CodecPool.getCompressor(c) }
      rawDataOutStream = fileSystem.create(exportFile, false)
      // S3 doesn't like stat()ing a file that has just been created
      _originalPermission = Try {
        val perm = fileSystem.getFileStatus(exportFile).getPermission
        fileSystem.setPermission(exportFile, writeOnly)
        perm
      }.getOrElse(null)
      dataOutStream = compressor match {
        case None => rawDataOutStream
        case Some(c) => new DataOutputStream(
          compressCodec.get.createOutputStream(rawDataOutStream, c))
      }
      exportStream = ExportStream(dataOutStream, exportSession)
      exportStream.elementDescriptionTID = 0xd006
      exportStream.templateDescriptionTID = 0xd007
      if ( maximumSize.isDefined ) {
        exportStream.dataWrittenCallback = this
      }
    } match {
      case Failure(exception) =>
        logger.error(s"Failed to open '${getName}' in ${dir}: ${exception}")
        Try {
          for (s <- Option(dataOutStream)) {
            s.close()
            rawDataOutStream = null
          }
        }
        Try {
          for (s <- Option(rawDataOutStream)) {
            s.close
            rawDataOutStream = null
          }
        }
        Try {
          for (c <- compressor) { CodecPool.returnCompressor(c) }
        }
        throw exception
      case _ =>
    }
  }

  /** Closes the file, and attempts to ensure all resources are released. */
  def close(): Unit = {
    var exception: Option[Throwable] = None
    Try {
      for (s <- Option(exportStream)) {
        s.close()
        dataOutStream = null
        rawDataOutStream = null
      }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to close ExportStream" +
          s" for '${getName}' in ${dir}: ${e}")
        if ( exception.isEmpty ) { exception = Option(e) }
      case _ =>
    }
    Try {
      for (s <- Option(dataOutStream)) {
        s.close()
        rawDataOutStream = null
      }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to close DataOutputStream" +
          s" for '${getName}' in ${dir}: ${e}")
        if ( exception.isEmpty ) { exception = Option(e) }
      case _ =>
    }
    Try {
      for (s <- Option(rawDataOutStream)) {
        s.close
        rawDataOutStream = null
      }
    } match {
      case Failure(e) =>
        logger.warn(s"Failed to close FSDataOutputStream" +
          s" for '${getName}' in ${dir}: ${e}")
        if ( exception.isEmpty ) { exception = Option(e) }
      case _ =>
    }
    Try {
      for (c <- compressor) { CodecPool.returnCompressor(c) }
    } match {
      case Failure(e) =>
        logger.error(s"Failed to return compressor to CodecPool" +
          s" for '${getName}' in ${dir}: ${e}")
        // do not propagate the error
      case _ =>
    }
    // throw if non-empty
    for (e <- exception) { throw e }
  }

  /**
    * Implements a callback that is invoked when an IPFIX Message is written
    * to the [[ExportStream]].
    */
  // Required for this class to extend ExportStream.DataWrittenCallback
  final def wrote(s: ExportStream, bytesWritten: Long): Unit = {
    if ( !_reachedMaxSize && rawDataOutStream.size() >= maxSize ) {
      logger.trace(s"Callback fired and size met for '${getName}'")
      _reachedMaxSize = true
    }
  }

  /**
    * Implements a callback that is invoked when an IPFIX Message is written
    * to the [[ExportStream]] due to the stream being closed.
    */
  // Required for this class to extend ExportStream.DataWrittenCallback
  final def closed(s: ExportStream, bytesWritten: Long): Unit = { }

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143




© 2015 - 2024 Weber Informatics LLC | Privacy Policy