silk_appender.SilkFileJob.scala Maven / Gradle / Ivy
// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt
package org.cert.netsa.mothra.tools.silk_appender
import com.typesafe.scalalogging.StrictLogging
import java.io.IOException
import java.util.UUID.randomUUID
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{
FSDataOutputStream, FileAlreadyExistsException, Path}
import org.apache.hadoop.io.IOUtils
import resource.managed
import scala.util.{Failure, Try}
/**
* Spawn a thread to copy the contents of `sourcePath` to
* `targetPath` and then remove `sourcePath`. If `targetPath`
* exists, a GUID suffix is added to the file name.
*
* @param sourcePath The file containing data to be copied.
* @param targetPath The destination for the data.
*/
case class SilkFileJob(sourcePath: Path, targetPath: Path)
(implicit conf: Configuration)
extends Runnable with StrictLogging
{
// whether the most recent run failed
private[this] var failed = false
// number of times this job has been started
private[this] var count = 0
/** Indicates whether the most recent call to run() resulted in an
* error. */
def runFailed: Boolean = synchronized { failed }
/** Returns the number of times run() has been called. */
def runCount: Int = synchronized { count }
/** Move `sourcePath` to `destPath`. */
def run(): Unit = {
synchronized {
failed = false
count += 1
}
logger.debug(s"attempting '${sourcePath}', try #${count}")
var finalPath = targetPath
Try {
val sourceFs = sourcePath.getFileSystem(conf)
val targetFs = targetPath.getFileSystem(conf)
// Open 'finalPath', adding a suffix to it if necessary and
// changing the value
def openTarget(): FSDataOutputStream = {
var stream: Option[FSDataOutputStream] = None
try {
logger.trace(s"creating '${finalPath}'...")
stream = Option(targetFs.create(targetPath, false))
logger.trace(s"created '${finalPath}'.")
} catch {
case _: FileAlreadyExistsException =>
logger.trace("creation failed: file exists; adding a suffix")
}
stream match {
case Some(s) => s
case None =>
// add a GUID suffix
finalPath = new Path(
targetPath.toString + s".${randomUUID().toString}")
logger.trace(s"creating '${finalPath}'...")
targetFs.create(finalPath, false)
}
}
// ensure the parent directory to the target exists
targetFs.mkdirs(targetPath.getParent())
// copy bytes from source to target path
for {
sourceStream <- managed(sourceFs.open(sourcePath))
targetStream <- managed(openTarget())
} {
IOUtils.copyBytes(sourceStream, targetStream, conf)
}
// set modification time for finalPath
// The -1 means "don't set the atime".
val sourceStatus = sourceFs.getFileStatus(sourcePath)
targetFs.setTimes(finalPath, sourceStatus.getModificationTime(), -1)
// remove the source file
if ( ! sourceFs.delete(sourcePath, false) ) {
throw new IOException(
s"Failed to remove source file '${sourcePath}' after successful loading into '${finalPath}'")
}
} match {
case Failure(e) =>
synchronized { failed = true }
logger.error(e.toString)
case _ => logger.info(s"'${sourcePath}' => '${finalPath}'")
}
}
}
// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143