packer.PackFileJob.scala Maven / Gradle / Ivy
// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt
package org.cert.netsa.mothra.packer
import com.typesafe.scalalogging.StrictLogging
import java.io.{PrintWriter, StringWriter}
import java.nio.channels.Channels
import java.util.concurrent.Callable
import java.util.concurrent.{Future, ThreadPoolExecutor}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory}
import resource.managed // see http://jsuereth.com/scala-arm/index.html
import scala.util.{Failure, Try}
/**
* Spawn a thread to pack the records in `sourcePath` according to
* the rules specified in the [[PackingLogic]] specified when
* `packer` was created.
*
* @param sourcePath The file containing IPFIX records to be packed.
* @param packer The code to determine where to write the records.
*/
private[mothra] case class PackFileJob(sourcePath: Path, packer: CorePacker)
(implicit conf: Configuration)
extends Callable[Boolean] with StrictLogging
{
private[this] val tCreate = System.currentTimeMillis()
private[this] var tStart = 0L
// number of this job has been started
@volatile
private[this] var count = 0
private[this] val codec = Try {
val factory = new CompressionCodecFactory(conf)
Option(factory.getCodec(sourcePath))
}.getOrElse(None)
/** Returns the number of times run() has been called. */
def runCount: Int = synchronized { count }
/** Holds the result of submitting this job to a thread pool. */
@volatile
private[this] var _result: Future[Boolean] = _
/** Submits this PackFileJob to run in a particular ThreadPool. Returns
* `this`. */
def submitTo(pool: ThreadPoolExecutor): PackFileJob = {
_result = pool.submit(this)
this
}
/** Returns the result of the most recent call to `submitTo`. Returns null if
* the `submitTo` has never been called. */
def result: Future[Boolean] = _result
/** Pack the files in `sourcePath` according to the rules in
* `packer`.
* @return `true` on success; `false` on failure. */
def call(): Boolean = {
tStart = System.currentTimeMillis()
count += 1
logger.debug(s"Attempting to pack '${sourcePath}', try #${count}")
var fileSize: Long = 0L
val decompr = codec.map {c => CodecPool.getDecompressor(c)}
val succeeded = Try {
val sourceFs = sourcePath.getFileSystem(conf)
fileSize = Try {sourceFs.getFileStatus(sourcePath).getLen()}.getOrElse(0L)
// pack the records found in sourcePath
for (channel <- managed({
val stream = sourceFs.open(sourcePath)
codec match {
case None => Channels.newChannel(stream)
case Some(c) =>
Channels.newChannel(c.createInputStream(stream, decompr.get))
}
})) {
packer.packStream(channel)
}
// finally, now that all that worked, remove the source file
sourceFs.delete(sourcePath, false)
} match {
case Failure(e) =>
logger.error(s"Failed to pack '${sourcePath}': ${e.toString}")
val sw = new StringWriter
e.printStackTrace(new PrintWriter(sw))
logger.debug(s"Failed to pack '${sourcePath}' on attempt #${count}:"
+ s" ${sw.toString}")
false
case _ =>
true
}
decompr.foreach {d => CodecPool.returnDecompressor(d) }
if ( succeeded ) {
val tEnd = System.currentTimeMillis()
val humanSize = if (fileSize <= 0) {
"unknown"
} else {
val units = Array("B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
var s = fileSize.toDouble
var u = 0
while (s > 1024.0) {
s /= 1024.0
u += 1
}
f"${s}%.3f${units(u)}"
}
logger.info(
s"Packed '${sourcePath}'; size ${humanSize};"
+ f" waited ${(tStart-tCreate).toDouble/1000.0}%.3f seconds;"
+ f" worked ${(tEnd-tStart).toDouble/1000.0}%.3f seconds")
}
succeeded
}
}
// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143