All Downloads are FREE. Search and download functionalities are using the official Maven repository.

packer.PackFileJob.scala Maven / Gradle / Ivy

// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt

package org.cert.netsa.mothra.packer

import com.typesafe.scalalogging.StrictLogging
import java.io.{PrintWriter, StringWriter}
import java.nio.channels.Channels
import java.util.concurrent.Callable
import java.util.concurrent.{Future, ThreadPoolExecutor}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory}
import resource.managed // see http://jsuereth.com/scala-arm/index.html
import scala.util.{Failure, Try}

/**
  * Spawn a thread to pack the records in `sourcePath` according to
  * the rules specified in the [[PackingLogic]] specified when
  * `packer` was created.
  *
  * @param sourcePath The file containing IPFIX records to be packed.
  * @param packer The code to determine where to write the records.
  */
private[mothra] case class PackFileJob(sourcePath: Path, packer: CorePacker)
  (implicit conf: Configuration)
    extends Callable[Boolean] with StrictLogging
{
  private[this] val tCreate = System.currentTimeMillis()
  private[this] var tStart = 0L

  // number of this job has been started
  @volatile
  private[this] var count = 0

  private[this] val codec = Try {
    val factory = new CompressionCodecFactory(conf)
    Option(factory.getCodec(sourcePath))
  }.getOrElse(None)

  /** Returns the number of times run() has been called. */
  def runCount: Int = synchronized { count }

  /** Holds the result of submitting this job to a thread pool. */
  @volatile
  private[this] var _result: Future[Boolean] = _

  /** Submits this PackFileJob to run in a particular ThreadPool.  Returns
    * `this`. */
  def submitTo(pool: ThreadPoolExecutor): PackFileJob = {
    _result = pool.submit(this)
    this
  }

  /** Returns the result of the most recent call to `submitTo`.  Returns null if
    * the `submitTo` has never been called. */
  def result: Future[Boolean] = _result

  /** Pack the files in `sourcePath` according to the rules in
    * `packer`.
    * @return `true` on success; `false` on failure. */
  def call(): Boolean = {
    tStart = System.currentTimeMillis()
    count += 1
    logger.debug(s"Attempting to pack '${sourcePath}', try #${count}")
    var fileSize: Long = 0L
    val decompr = codec.map {c => CodecPool.getDecompressor(c)}
    val succeeded = Try {
      val sourceFs = sourcePath.getFileSystem(conf)
      fileSize = Try {sourceFs.getFileStatus(sourcePath).getLen()}.getOrElse(0L)
      // pack the records found in sourcePath
      for (channel <- managed({
        val stream = sourceFs.open(sourcePath)
        codec match {
          case None => Channels.newChannel(stream)
          case Some(c) =>
            Channels.newChannel(c.createInputStream(stream, decompr.get))
        }
      })) {
        packer.packStream(channel)
      }

      // finally, now that all that worked, remove the source file
      sourceFs.delete(sourcePath, false)
    } match {
      case Failure(e) =>
        logger.error(s"Failed to pack '${sourcePath}': ${e.toString}")
        val sw = new StringWriter
        e.printStackTrace(new PrintWriter(sw))
        logger.debug(s"Failed to pack '${sourcePath}' on attempt #${count}:"
          + s" ${sw.toString}")
        false
      case _ =>
        true
    }
    decompr.foreach {d => CodecPool.returnDecompressor(d) }
    if ( succeeded ) {
      val tEnd = System.currentTimeMillis()
      val humanSize = if (fileSize <= 0) {
        "unknown"
      } else {
        val units = Array("B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
        var s = fileSize.toDouble
        var u = 0
        while (s > 1024.0) {
          s /= 1024.0
          u += 1
        }
        f"${s}%.3f${units(u)}"
      }
      logger.info(
        s"Packed '${sourcePath}'; size ${humanSize};"
          + f" waited ${(tStart-tCreate).toDouble/1000.0}%.3f seconds;"
          + f" worked ${(tEnd-tStart).toDouble/1000.0}%.3f seconds")
    }
    succeeded
  }
}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143




© 2015 - 2024 Weber Informatics LLC | Privacy Policy