Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt
package org.cert.netsa.mothra.packer
import com.typesafe.scalalogging.StrictLogging
import java.util.concurrent.{
LinkedBlockingDeque, ScheduledThreadPoolExecutor, TimeUnit}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, Path => HPath, PathFilter}
import scala.collection.immutable.Set
import scala.collection.mutable.ArrayBuffer
import scala.util.{Failure, Success, Try}
/**
* An abstract class to support iterating over the files in the directory
* `incomingDir` and remembering the list of files between scans of the
* directory. When a file is first seen, it is given to the `handleFile`
* abstract method to map the file to the type `Option[T]`. The result is
* added to `deque` if it is not empty.
*
* Filenames in `incomingDir` that begin with a dot are ignored. Files whose
* size is 0 are also ignored.
*
* @param incomingDir The directory to check for incoming files.
* @param deque Where to store the names of the files.
* @param conf The Hadoop configuration.
*/
private[mothra] abstract class AbstractDirWatcher(incomingDir: HPath)
(implicit conf: Configuration)
extends StrictLogging
{
private[this] val sFs = incomingDir.getFileSystem(conf)
/**
* The KnownFiles object is to prevent the creation of multiple jobs
* for the same file or creating jobs for files that fail after
* `packAttemps` attempts.
*
* The object maintains two sets of file paths: Those seen during
* this scan and those seen during the previous scan. Calling
* `checkPath(path)` adds `path` to the current set of files and
* returns `true` if `path` was seen the previous scan or `false`
* otherwise. Calling `freeze()` indicates that a scan has
* completed, and it makes the current set be the previous set and
* creates a new empty set for current.
*
* FIXME: Since we only compare path names, if we process file
* 'foo' and a new file named 'foo' arrives within the interval,
* the new 'foo' will be treated as the same file and be ignored.
*/
private[this] object KnownFiles {
private[this] val current = ArrayBuffer.empty[HPath]
private[this] var previous = Set.empty[HPath]
/** Record the presence of `path` and return `true` if `path` has been
* seen before; `false` otherwise. */
def checkPath(path: HPath): Boolean = {
current += path
previous.contains(path)
}
/** Freeze the set of known files. The next call to checkPath() will
* open a new set of known files. */
def freeze(): Unit = {
previous = current.toSet
current.clear()
}
}
private[this] class Scanner() extends Runnable {
def run(): Unit = {
beforeScan() match {
case DirWatcherResult.TERMINATE =>
shutdown()
case DirWatcherResult.SKIP =>
// do nothing
case DirWatcherResult.CONTINUE =>
logger.debug(s"Scanning '${incomingDir}'")
var visitResult: DirWatcherResult =
DirWatcherResult.CONTINUE
try {
for {
sourceStatus <- sFs.listStatus(
incomingDir, AbstractDirWatcher.excludeDotFiles)
// Workaround scala/bug#11175 -Ywarn-unused:params false positive
_ = sourceStatus
if DirWatcherResult.CONTINUE == visitResult
if Try(sourceStatus.isFile()).getOrElse(false)
if sourceStatus.getLen() > 0
if !KnownFiles.checkPath(sourceStatus.getPath())
sourcePath = sourceStatus.getPath()
} {
logger.trace(s"Found new file '${sourcePath}'")
visitResult = Try {
handleFile(sourcePath, sourceStatus)
} match {
case Success(x) => x
case Failure(e) =>
logger.debug(s"Error handling incoming file: ${e.toString}")
DirWatcherResult.CONTINUE
}
}
if ( DirWatcherResult.TERMINATE != visitResult ) {
KnownFiles.freeze()
visitResult = afterScan()
}
} catch {
case ex: Exception =>
logger.error(
s"Failure reading source directory '${incomingDir}': $ex")
}
if ( DirWatcherResult.TERMINATE == visitResult ) {
shutdown()
}
}
}
}
private[this] val factory = new PackerThreadFactory("DirWatcher-")
protected val pool = new ScheduledThreadPoolExecutor(1, factory)
pool.setContinueExistingPeriodicTasksAfterShutdownPolicy(false)
pool.setExecuteExistingDelayedTasksAfterShutdownPolicy(false)
pool.setRemoveOnCancelPolicy(true)
/**
* Called before performing a periodic scan. The scan is skipped if this
* method returns `SKIP`.
*/
protected def beforeScan(): DirWatcherResult
/**
* Called after performing a scan. This is not invoked if beforeScan()
* returned `SKIP`.
*/
protected def afterScan(): DirWatcherResult
/**
* Called at shutdown.
*/
protected def atShutdown(): Unit = {}
/**
* Called on each file in the directory.
*/
protected def handleFile(file: HPath, status: FileStatus): DirWatcherResult
/**
* Scans the directory one time.
*/
final def runOnce(): Unit = {
pool.submit(new Scanner()).get()
()
}
/**
* Schedules a scan to occur every `interval` seconds.
*/
final def runPeriodically(interval: Int): Unit = {
pool.scheduleAtFixedRate(new Scanner(), 0, interval, TimeUnit.SECONDS)
()
}
/**
* Stops the periodic scanning of the directory.
*/
final def shutdown(): Unit = {
Try { atShutdown() }
pool.shutdown()
}
}
private[mothra] object AbstractDirWatcher {
/** Returns filenames that do not begin with a dot. */
private object excludeDotFiles extends PathFilter {
def accept(p: HPath): Boolean = !p.getName().startsWith(".")
}
}
private[mothra] final case class DirWatcher(
incomingDir: HPath,
deque: LinkedBlockingDeque[HPath])
(implicit conf: Configuration)
extends AbstractDirWatcher(incomingDir)(conf)
{
var newfiles = 0
def beforeScan(): DirWatcherResult = {
newfiles = 0
logger.info(s"Scanning ${incomingDir}...")
DirWatcherResult.CONTINUE
}
def afterScan(): DirWatcherResult = {
logger.info(s"Scanned ${incomingDir}; found ${newfiles} new files")
DirWatcherResult.CONTINUE
}
def handleFile(file: HPath, status: FileStatus): DirWatcherResult = {
deque.put(file)
DirWatcherResult.CONTINUE
}
}
// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143