All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tools.InvariantPackerMain.scala Maven / Gradle / Ivy

// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt


package org.cert.netsa.mothra.tools

import org.cert.netsa.io.ipfix.{CollectedMessage, ExportStream, InfoModel,
  MessageReader, RecordSet, SessionGroup, StreamSession,
  StreamMessageReader, TemplateSet}
import org.cert.netsa.mothra.packer.{
  AbstractDirWatcher, DirWatcherResult, PackerDeque, PackerDequeNode,
  PackerThreadFactory, PackingLogic, PartitionerConfigurator,
  PartitionerPackLogic, RunTimeCodeLoader, Version}

import com.typesafe.scalalogging.StrictLogging
import java.io.{DataOutputStream, PrintWriter, StringWriter}
import java.lang.management.ManagementFactory
import java.nio.channels.{Channels, ReadableByteChannel}
import java.util.UUID.randomUUID
import java.util.concurrent.{LinkedBlockingDeque, ScheduledThreadPoolExecutor,
  ThreadPoolExecutor, TimeUnit}
import java.util.concurrent.locks.ReentrantLock
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream,
  FileStatus, Path => HPath}
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory,
  Compressor, Decompressor}
import org.apache.hadoop.util.ShutdownHookManager
import scala.collection.mutable.{ArrayBuffer, PriorityQueue}
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try}


/**
  * Object to implement the InvariantPacker application.
  *
  * Typical Usage in a Spark environment:
  *
  * `spark-submit --class org.cert.netsa.mothra.packer.tools.InvariantPackerMain mothra-tools.jar [--one-shot]   
  *
  *
  * Processes files created by super_mediator running in invariant mode and
  * writes them into HDFS.
  *
  *
  */
object InvariantPackerMain extends App with StrictLogging {


  def usage(full: Boolean = false): Unit = {
    print("""
Usage: spark-submit --class org.cert.netsa.mothra.packer.tools.InvariantPackerMain mothra-tools.jar [--version] [--one-shot]   

sourceDir:         Directory to read from, as Hadoop URI
destinationDir:    Directory to write to, as Hadoop URI
partitionerFile:   A Scala source file containing Partitioner Packing Logic
""")
    if ( full ) {
      print(s"""
${APP} processes files created by super_mediator running in invariant mode and
writes them into HDFS.  It reads IPRIX records from files from `sourceDir` and
writes the records to files in `destinationDir`.

The `partitionerFile` is a Scala source file that may contain either
PartitionerConfigurator or PartitionerPackLogic.  ${APP} reads, parses,
compiles, and loads the Scala code contained in the file, then extracts the
sequence of Partitions defined in that file.  The partitioners are used by
${APP} when writing flow records to the `destinationDir`.

If "--version" is included on the command line, the application prints its
version number and exists.

If "--one-shot" is included on the command line, the `sourceDir` is only
scanned one time.  Once all files in sourceDir have been packed, the
application exits.  When "--one-shot" is not provided, the application runs
until it is signaled to end.

The Java property values that are used by this application are:

${appProp}compression -- The compression to use for files written to
HDFS.  Values typically supported by Hadoop include `bzip2`, `gzip`,
`lz4`, `lzo`, `lzop`, `snappy`, and `default`.  The empty string indicates
no compression.  The default is `${DEFAULT_COMPRESSION}`.

${appProp}maxThreads -- The maximum number of threads that attempt to write to
output files simultaneously.  The default is ${DEFAULT_MAX_THREADS}.

${appProp}pollingInterval -- How often the `sourceDir` is checked (polled) for
IPFIX files to process.  The default is ${DEFAULT_POLL_INTERVAL} seconds.

${appProp}maximumSize -- The (approximate) maximum size file to create, in
octets.  The default is no maximum.  When a file's size exceeds this value,
the file is closed and a new file is started.  Typically a file's size will
not exceed this value by more than the maximum size of an IPFIX message, 64k.
This value may not be less than ${MINIMUM_MAXIMUM_SIZE}.

${appProp}outputIdleSeconds -- The maximum number of seconds to allow an idle
output file to remain open so additional incoming records may appended to it.
It defaults to the value ${DEFAULT_OUTPUT_FILE_IDLE_SECONDS}.  This value may
not be less than ${OUTPUT_FILE_IDLE_SECONDS_MINIMUM}.

${appProp}fileCacheSize -- The maximum size of the open file cache.  This is
the maximum number of open files maintained by the file cache for writing to
output files.  Once the cache reaches this number of open files, the
application closes the least-recently-used file when it needs to open another.
This value does not include the file handles required when reading incoming
files.  The default is ${DEFAULT_FILE_CACHE_SIZE}; the minimum permitted is
${MINIMUM_FILE_CACHE_SIZE}.

${appProp}observationDomainId -- The observationDomainId to use in output
files.  The default is ${DEFAULT_OBSERVATION_DOMAIN_ID}.

When an output file has not been opened yet, it is not opened until the oldest
input file reaches a particular age, the number of input files reaches a
count, or the sum of the sizes of the input files reaches a value.  (Reaching
any threshold allows the output file to be opened.)  The next three properties
describe these values:

${appProp}maxInputAgeSeconds -- The age for an input file that causes its
destination output file to be opened.  Here, "age" refers to how long ago the
file was noticed, not the age of the file itself.  The default is
${DEFAULT_MAXIMUM_AGE_SECONDS} seconds.

${appProp}minInputCount -- The number of queued input files that causes their
destination output file to be opened.  The default is
${DEFAULT_MINIMUM_INPUT_COUNT}.

${appProp}minInputSize -- The sum of the sizes of queued input files that
causes their destination output file to be opened, in octets.  The default is
${DEFAULT_MINIMUM_INPUT_SIZE}.
""")
    }
    System.exit(if ( full ) { 0 } else { 1 })
  }

  def version(): Unit = {
    println(Version.get())
    System.exit(0)
  }


  /**
    * Reads the [[CollectedMessage messages]] in an incoming file.
    *
    * @param path     The complete path to the incoming file.
    * @param relPath  The path to the output file, relative to `destinationDir`.
    * @param fileSize The size of the incoming file, in octets.
    * @param modTime  The modification time of the file.
    */
  private[this] case class IncomingFileReader(
    path: HPath, relPath: String, fileSize: Long, modTime: Long)
      extends Iterator[CollectedMessage]
  {
    // create a SessionGroup for this input
    private[this] val inGroup = SessionGroup(infoModel, path)

    /** The Decompressor as an Option. */
    private[this] var decompr: Option[Decompressor] = None

    /** The raw input stream. */
    private[this] var rawDataInStream: FSDataInputStream = _

    /** The input stream as a channel. */
    private[this] var channel: ReadableByteChannel = _

    /** The reader */
    private[this] var reader: MessageReader = _

    /**
      * The time when this object was created.  Has nothing to do with the
      * timestamps of the file at `path`.
      */
    val noticedTime = System.currentTimeMillis()

    /**
      * Checks whether there is another [[CollectedMessage]] to read from the
      * file.  Returns false if the open() method has not yet been called.
      */
    def hasNext: Boolean = reader.hasNext

    /** Returns the next message. */
    def next(): CollectedMessage = reader.next()

    /**
      * Opens the file, sets up decompression, and creates a reader.  Throws
      * an Exception on error.
      */
    def open(): Unit = {
      val fileSystem = path.getFileSystem(hadoopConf)

      // get the compression codec based on the pathname
      val codec = Try {
        Option(codecFactory.getCodec(path))
      }.getOrElse(None)
      // get a decompressor
      Try {
        decompr = codec.map {c => CodecPool.getDecompressor(c)}
      } match {
        case Failure(e) =>
          logger.error(s"Failed to get decompressor" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
          throw e
        case _ =>
      }
      // open the file
      Try {
        rawDataInStream = fileSystem.open(path)
      } match {
        case Failure(e) =>
          logger.error(s"Failed to open raw input" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
          close()
          throw e
        case _ =>
      }
      // create a channel, enabling the decompressor
      Try {
        channel = codec match {
          case None => Channels.newChannel(rawDataInStream)
          case Some(c) => Channels.newChannel(
            c.createInputStream(rawDataInStream, decompr.get))
        }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to open channel" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
          close()
          throw e
        case _ =>
      }
      // create the reader and read first message
      Try {
        reader = StreamMessageReader(channel, inGroup)
      } match {
        case Failure(e) =>
          logger.error(s"Failed to create message reader" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
          close()
          throw e
        case _ =>
      }
    }

    /**
      * Closes the reader and attempt to release all resources.  This should
      * never error.
      */
    def close(): Unit = {
      reader = null
      Try {
        for (c <- Option(channel)) {
          c.close()
          channel = null
          rawDataInStream = null
        }
      } match {
        case Failure(e) =>
          logger.warn(s"Failed to close channel" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
        case _ =>
      }
      Try {
        for (s <- Option(rawDataInStream)) {
          s.close()
          rawDataInStream = null
        }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to close raw input stream" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
        case _ =>
      }
      Try {
        decompr.foreach {d => CodecPool.returnDecompressor(d) }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to return compressor to CodecPool" +
            s" for '${path.getName}' in ${path.getParent}: ${e}")
        case _ =>
      }
    }

    /**
      * Deletes the file at `path`.
      */
    def deleteFile(): Unit = {
      val fileSystem = path.getFileSystem(hadoopConf)
      fileSystem.delete(path, false)
      ()
    }
  }

  /**
    * Provides a factory method for [[IncomingFileReader]].
    */
  private[this] object IncomingFileReader {
    /**
      * Creates a new [[IncomingFileReader]].
      */
    def apply(path: HPath, relPath: String, status: FileStatus):
        IncomingFileReader =
      new IncomingFileReader(
        path, relPath, status.getLen(), status.getModificationTime())
  }

  /**
    * Provides an Ordering for [[IncomingFileReader]] instances.
    */
  private[this] object IncomingFileReaderOrdering
      extends scala.math.Ordering[IncomingFileReader]
  {
    def compare(a: IncomingFileReader, b: IncomingFileReader): Int =
      b.noticedTime compare a.noticedTime
  }


  /**
    * Holds a sorted list of [[IncomingFileReader]] objects.
    */
  private[this] case class ReaderQueue() {
    /** The list of input files to be read. */
    private[this] var files =
      PriorityQueue.empty[IncomingFileReader](IncomingFileReaderOrdering)

    /** The sum of the sizes of the input files to be written. */
    private[this] var fileSizes = 0L

    /** Indicates whether the ReaderQueue is empty. */
    def isEmpty: Boolean = synchronized { 0 == files.size }

    /** Indicates whether the ReaderQueue is not empty. */
    def nonEmpty: Boolean = synchronized { files.size > 0 }

    /** Gets the number of input files. */
    def fileCount: Int = synchronized { files.size }

    /** Gets the sum of the sizes of the files in the queue, in octets */
    def totalOctets: Long = synchronized { fileSizes }

    /** The time of the oldest input file, in epoch milliseconds. */
    def oldestFileTime: Option[Long] = synchronized {
      files.headOption map { _.noticedTime }
    }

    /** Adds an input file to be processed. */
    def push(reader: IncomingFileReader): Unit = {
      push(Iterable(reader))
    }

    /** Adds multiple input files to be processed. */
    def push(readers: Iterable[IncomingFileReader]): Unit = synchronized {
      files ++= readers
      for ( r <- readers ) {
        fileSizes += r.fileSize
      }
    }

    /** Returns the first file as an Option or [[None]] when empty. */
    def pop(): Option[IncomingFileReader] = synchronized {
      if ( files.isEmpty ) {
        None
      } else {
        val result = files.dequeue()
        fileSizes -= result.fileSize
        Option(result)
      }
    }

  }


  /**
    * A RepositoryPath represents an output file in the data repository.  This
    * object includes a [[ReaderQueue]] of input files to by copied to the
    * output file and [[RepoWriter]] for writing to the output file.
    *
    * This object extends [[PackerDequeNode]] so it may be used in a
    * [[PackerDeque]].
    *
    * This object extends [[Runnable]] which causes files in its
    * [[ReaderQueue]] may be written to the [[RepoWriter]].  The `run()`
    * method may be called multiple times.
    *
    * @param repoDir The root directory of the directory tree where output
    * files should be written.
    * @param relPath The path for the output file relative to `repoDir`.
    */
  private[this] case class RepositoryPath(rootDir: HPath, relPath: String)
      extends PackerDequeNode[RepositoryPath] with Runnable
  {
    /**
      * The set of input files whose records should be copied to the `writer`.
      * Additional input files may be added to this queue over the lifetime of
      * this object.
      */
    val readers = ReaderQueue()

    /**
      * The output file.
      */
    val writer = RepoWriter(rootDir, relPath)

    /** The timestamp of the most recent write to the output file. */
    @volatile
    var lastWrite = 0L

    /**
      * Whether this object has been told to no longer process files.
      */
    @volatile
    var endThread = false

    /**
      * Whether this object has been submitted to a thread pool for execution.
      */
    @volatile
    var runPending = false

    /**
      * Opens the writer, closing the oldest file if the number of open files
      * is at the maximum; return `true` on success and `false` if unable to
      * get a file handle
      */
    private[this] def openWriter(): Boolean = repoFilesMutex.synchronized {
      // close files in excess of the maximum
      while ( openFileCount >= fileCacheSize ) {
        val oldest = repoLRU.last
        logger.debug(s"LRU is full; must remove '${oldest.relPath}'")
        //oldest.lock.lock()
        oldest.synchronized {
          if ( oldest.readers.nonEmpty ) {
            logger.debug("Taking no action: Not open, no file handles," +
              " and oldest still has files to process")
            return false
          }
          // remove oldest from the LRU
          if ( repoFiles.contains(oldest.relPath) ) {
            repoLRU.remove(oldest)
            repoFiles = repoFiles - oldest.relPath
            oldest.writer.close()
            oldest.endThread = true
            repoFileCount -= 1
            openFileCount -= 1
          }
        }
        logger.trace(s"LRU was full; removed '${oldest.relPath}'")
      }
      // open the new file
      writer.open()
      openFileCount += 1
      repoLRU.prepend(this)
      true
    }

    /**
      * Processes all the input files on this object.  Opens the output file
      * if needed.
      */
    private[this] def processFiles(): Unit = {
      if ( writer.isOpen ) {
        repoFilesMutex.synchronized {
          repoLRU.moveToHead(this)
        }
      } else if ( !openWriter() ) {
        return
      }
      lastWrite = System.currentTimeMillis()

      var optReader = readers.pop()
      while ( optReader.nonEmpty && (running || oneshot) && !endThread ) {
        val reader = optReader.get
        if ( Try { reader.open() }.isSuccess ) {
          var count = 0L
          val t0 = System.currentTimeMillis()
          for ( message <- reader ) {
            if ( writer.getPos() > repoFileMaxSize ) {
              logger.debug("Maximum size reached;" +
                s" closing and reopening '${relPath}'")
              writer.replaceStream()
            }
            for ( set <- message.iterator ) {
              set match {
                // Ignore template sets, they're added as needed
                case _: TemplateSet => ()
                case recSet: RecordSet =>
                  for ( rec <- recSet.iterator ) {
                    count += 1
                    writer.exportStream.add(rec)
                  }
              }
            }
          }
          val t = System.currentTimeMillis()
          logger.debug(s"Copied ${count} records" +
            s" to '${relPath}'* from '${reader.path}'" +
            s" in ${timediff(t, t0)}," +
            s" waited ${timediff(t, reader.noticedTime)}")
          reader.close()

          // FIXME: At this point the reader file should be moved the
          // archive directory or deleted from the incoming path.  Haven't
          // done so since I keep testing with the same set of input
          // files.
          if ( !oneshot ) {
            reader.deleteFile()
          }

        }
        optReader = readers.pop()
      }
      writer.exportStream.flush()
      ()
    }

    /** Writes a log message. */
    private[this] def logStats(prefix: String): Unit = {
      val now = System.currentTimeMillis()
      logger.trace(s"${prefix}:" +
        s" fileCount = ${readers.fileCount}," +
        s" oldestFile = ${readers.oldestFileTime}" +
        ( if ( readers.oldestFileTime.isEmpty ) { "," } else {
          s" (${now - readers.oldestFileTime.get})," } ) +
        s" lastWrite = ${lastWrite} (${now - lastWrite})," +
        s" fileSizes = ${readers.totalOctets}," +
        s" exportStream = ${writer.exportStream},")
    }

    /**
      * Processes the input files on this object.
      */
    def run(): Unit = {
      logger.trace(s"RUNNING: Task ${relPath}")
      synchronized {
        if ( !endThread && readers.nonEmpty ) {
          logStats("ENTERED RUN")
          processFiles()
          logStats("AT END")
        }
        runPending = false
      }
      logger.trace(s"PAUSING: Task ${relPath}")
    }
  }


  /**
    * Class to run periodically to find [[RepositoryPath]] objects that have
    * input files that can be processed.  The RepositoryPaths are added to a
    * thread pool.
    *
    * If the RepoWriter of an RepositoryPath is open and files are in the
    * ReaderQueue, the RepositoryPath is allowed to run.
    *
    * Otherwise, the RepositoryPath is only run when its set of input file
    * meet a set of requirements that includes:
    *
    * -- The number of input files in the ReaderQueue.
    * -- The sum of the byte count of the input files.
    * -- The time of the oldest file in the ReaderQueue.
    */
  private[this] case class CheckRepositoryPaths() extends Runnable {
    def run(): Unit = {
      logger.trace(
        s"Starting status check of ${repoFileCount} existing repoFiles")
      val t0 = System.currentTimeMillis()
      //val stopped = ArrayBuffer.empty[RepositoryPath]
      //val started = ArrayBuffer.empty[RepositoryPath]
      val repoPaths = repoFilesMutex.synchronized {
        ArrayBuffer.empty[RepositoryPath] ++ repoFiles.values.filter {
          rp => !rp.runPending && !rp.endThread
        }
      }
      logger.trace(s"Found ${repoPaths.size} repoFiles to status check" +
        s" in ${timediff(System.currentTimeMillis(), t0)}")
      val runTask = ArrayBuffer.empty[RepositoryPath]
      val expireTask = ArrayBuffer.empty[RepositoryPath]
      var inactive = 0
      var startThreshAge = 0
      var startThreshFiles = 0
      var startThreshSize = 0
      var continue = 0
      var noInputs = 0
      var belowThresh = 0
      for {
        rp <- repoPaths
        // Workaround scala/bug#11175 -Ywarn-unused:params false positive
        _ = rp
        if running
      } {
        //logger.trace(s"Status checking ${rp.relPath}")
        rp.synchronized {
          if ( rp.runPending || rp.endThread ) {
            inactive += 1
          } else {
            val now = System.currentTimeMillis()
            val count = rp.readers.fileCount
            if ( count == 0 ) {
              if ( now - rp.lastWrite >= (1000 * outputIdleSeconds) ) {
                //logger.trace(s"EXPIRING: Task ${rp.relPath} - max idle")
                expireTask += rp
                rp.endThread = true
                //stopped += rp
              } else {
                noInputs += 1
                //logger.trace(s"EMPTY: Task ${rp.relPath} has no files")
              }
            } else if ( rp.writer.isOpen ) {
              continue += 1
              //logger.trace(
              //  s"CONTINUE: Task ${rp.relPath} is open and has ${count} files")
              rp.runPending = true
              runTask += rp
            } else if ( now - rp.readers.oldestFileTime.get >= maxInputAge ) {
              //logger.trace(s"OPENING: Task ${rp.relPath} - " +
              //  s"Oldest file (${rp.readers.oldestFileTime.get}) has " +
              //  s" age (${now - rp.readers.oldestFileTime.get})" +
              //  s" greater than max (${maxInputAge})")
              //logStats("WORKING")
              runTask += rp
              startThreshAge += 1
            } else if ( rp.readers.totalOctets >= minInputSize ) {
              //logger.trace(s"OPENING: Task ${rp.relPath} - " +
              //  s"Total octets (${rp.readers.totalOctets}) is " +
              //  s" greater than minimum (${minInputSize})")
              //logStats("WORKING")
              runTask += rp
              startThreshSize += 1
            } else if ( rp.readers.fileCount >= minInputCount ) {
              //logger.trace(s"OPENING: Task ${rp.relPath} - " +
              //  s"File count (${count}) is " +
              //  s" greater than min (${minInputCount})")
              //logStats("WORKING")
              runTask += rp
              startThreshFiles += 1
            } else {
              //logger.trace(s"IGNORING: Task ${rp.relPath} has" +
              //  s" ${count} files, ${rp.readers.totalOctets} octets, and" +
              //  s" age (${now - rp.readers.oldestFileTime.get})")
              belowThresh += 1
            }
          }
        }
      }
      val t1 = System.currentTimeMillis()
      if ( repoPaths.isEmpty ) {
        logger.info(
          s"Finished status check of 0 output files in ${timediff(t1, t0)}")
      } else {
        logger.info(s"Finished status check of ${repoPaths.size}" +
          s" output files in ${timediff(t1, t0)}:" +
          s" Continued: ${continue}," +
          s" Started: ${startThreshAge + startThreshFiles + startThreshSize}" +
          s" (age: ${startThreshAge}, size ${startThreshSize}," +
          s" count: ${startThreshFiles})," +
          s" Expired: ${expireTask.size}, Inactive: ${inactive}," +
          s" Ignored: ${belowThresh + noInputs}" +
          s" (below-threshold: ${belowThresh}, no-inputs: ${noInputs})")
        for ( rp <- runTask ) {
          writerPool.execute(rp)
        }
        val t2 = System.currentTimeMillis()
        logger.trace("Submitted " +
          s"${startThreshAge + startThreshFiles + startThreshSize + continue}" +
          s" tasks in ${timediff(t2, t1)}")
        repoFilesMutex.synchronized {
          for ( rp <- expireTask ) {
            rp.synchronized {
              if ( repoFiles.contains(rp.relPath) ) {
                repoLRU.remove(rp)
                repoFiles = repoFiles - rp.relPath
                rp.writer.close()
                repoFileCount -= 1
                openFileCount -= 1
              }
            }
          }
        }
        val t3 = System.currentTimeMillis()
        logger.trace(s"Expired ${expireTask.size} tasks" +
          s" in ${timediff(t3, t2)}")
      }
      logger.trace("Completed status check" +
        s" in ${timediff(System.currentTimeMillis(), t0)}")
    }

  }

  /**
    * A class that wraps opening a file for writing: generating a file name,
    * opening the file, enabling compression, and creating a
    * org.cert.netsa.io.ExportStream.
    *
    * @param roodDir The base directory in HDFS in which to create files
    * @param relPath The relative path and name of the file to create. A UUID
    * is added, and a compression suffix is appended if needed.
    */
  private[this] final case class RepoWriter(rootDir: HPath, relPath: String) {
    /** The Path being written to */
    private[this] var exportFile: HPath = _

    /** The Session for the ExportStream */
    private[this] var exportSession: StreamSession = _

    /** Compressor to use for the output file. */
    private[this] var compressor: Option[Compressor] = None

    /** Output stream of the output file. */
    private[this] var rawDataOutStream: FSDataOutputStream = _

    /** Data stream of the output file. */
    private[this] var dataOutStream: DataOutputStream = _

    /** The exportStream */
    var exportStream: ExportStream = _

    /** The time when the current file was opened */
    var createTime = Long.MaxValue

    /** Whether the outfile file is open. */
    var isOpen = false

    /** A lock for accessing the output file. */
    val lock = new ReentrantLock()

    // Set `exportFile` and `exportSession` during construction.
    initStream()

    /** Gets the complete path of the output file. */
    def fullPath: HPath = exportFile

    /** Gets the basename of the output file. */
    def basename: String = exportFile.getName

    /** Gets the complete path (including UUID) relative to `rootDir`. */
    def fullRelativePath: String =
      rootDir.toUri().relativize(exportFile.toUri()).toString()

    /** Gets the current position in the output file.  This should be the byte
      * offset into the file, not the number of octets written, which may
      * differ due to compression.  Also, this does not reflect any IPFIX
      * Message being created in memory that has not been written. */
    def getPos(): Long = {
      rawDataOutStream.getPos()
    }

    private[this] def initStream(): Unit = {
      exportFile = new HPath(
        rootDir, s"${relPath}.${randomUUID().toString}${compressSuffix}")

      exportSession = new StreamSession(
        SessionGroup(infoModel, exportFile), observationDomain)
    }

    /** Creates and opens the file, enables compression, and makes an export
      * stream. */
    private[this] def openStream(): Unit = {
      logger.trace(s"Creating new file '${fullRelativePath}'")
      val fileSystem = rootDir.getFileSystem(hadoopConf)
      Try {
        compressor = compressCodec map { c => CodecPool.getCompressor(c) }
        rawDataOutStream = fileSystem.create(exportFile, false)
        dataOutStream = compressor match {
          case None => rawDataOutStream
          case Some(c) => new DataOutputStream(
            compressCodec.get.createOutputStream(rawDataOutStream, c))
        }
        exportStream = ExportStream(dataOutStream, exportSession)
        //exportStream.elementDescriptionTID = 0xd006
        //exportStream.templateDescriptionTID = 0xd007
        exportStream.describeElements = false
        exportStream.describeTemplates = false
        createTime = System.currentTimeMillis()
        isOpen = true
      } match {
        case Failure(exception) =>
          logger.error(
            s"Failed to open '${fullRelativePath}' in ${rootDir}: ${exception}")
          createTime = Long.MaxValue
          Try {
            for (s <- Option(dataOutStream)) {
              s.close()
              rawDataOutStream = null
            }
          }
          Try {
            for (s <- Option(rawDataOutStream)) {
              s.close
              rawDataOutStream = null
            }
          }
          Try {
            for (c <- compressor) { CodecPool.returnCompressor(c) }
          }
          throw exception
        case _ =>
      }
    }

    /** Opens the output file if it has not been opened yet. */
    def open(): Unit = {
      if ( !isOpen ) {
        assert(Option(exportStream).isEmpty)
        openStream()
      }
    }

    /**
      * Closes the current [[ExportStream]] supporting this MyWriter and opens
      * another.  This function assumes the writer's lock is held by the
      * caller.
      */
    def replaceStream(): Unit = {
      close()
      initStream()
      openStream()
    }

    /**
      * Closes the file, and attempts to ensure all resources are released.
      * This function assumes the writer's lock is held by the caller.
      */
    def close(): Unit = {
      createTime = Long.MaxValue
      isOpen = false
      var exception: Option[Throwable] = None
      Try {
        for (s <- Option(exportStream)) {
          s.close()
          dataOutStream = null
          rawDataOutStream = null
        }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to close ExportStream" +
            s" for '${fullRelativePath}' in ${rootDir}: ${e}")
          if ( exception.isEmpty ) { exception = Option(e) }
        case _ =>
      }
      Try {
        for (s <- Option(dataOutStream)) {
          s.close()
          rawDataOutStream = null
        }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to close DataOutputStream" +
            s" for '${fullRelativePath}' in ${rootDir}: ${e}")
          if ( exception.isEmpty ) { exception = Option(e) }
        case _ =>
      }
      Try {
        for (s <- Option(rawDataOutStream)) {
          s.close
          rawDataOutStream = null
        }
      } match {
        case Failure(e) =>
          logger.warn(s"Failed to close FSDataOutputStream" +
            s" for '${fullRelativePath}' in ${rootDir}: ${e}")
          if ( exception.isEmpty ) { exception = Option(e) }
        case _ =>
      }
      Try {
        for (c <- compressor) { CodecPool.returnCompressor(c) }
      } match {
        case Failure(e) =>
          logger.error(s"Failed to return compressor to CodecPool" +
            s" for '${fullRelativePath}' in ${rootDir}: ${e}")
        // do not propagate the error
        case _ =>
      }
      // throw if non-empty
      for (e <- exception) { throw e }
    }
  }


  /**
    * Class for flushing and closing open files at shutdown.  Its run() method
    * is invoked by the ShutdownHookManager.
    */
  private[this] class Cleanup extends Runnable {
    def run(): Unit = {
      running = false
      logger.info("Shutting down...")
      writerPool.shutdown()
      watcher.shutdown()
      logger.info("Waiting for threads to end....")
      while ( !writerPool.awaitTermination(1, TimeUnit.SECONDS) ) {
        printTaskCounts()
      }
      logger.info("Waiting for threads to end....done")
      logger.debug("Closed files")
      logger.info("Shutting down...done.")
      logger.info(s"=================  ${APP} has ended  =================")
    }
  }


  /**
    * Periodically scans the incoming directory for new incoming files.  Newly
    * seen incoming files are added to an existing [[RepositoryPath]] or to a
    * newly created [[RepositoryPath]] if the incoming files do not match an
    * existing [[RepositoryPath]].
    */
  private[this] case class MyDirWatcher(inDir: HPath)
      extends AbstractDirWatcher(inDir)
  {
    // Import from the companion object
    import MyDirWatcher.fileRegex

    /** Number of non-ignored files seen for the most recent scan */
    private[this] var newfiles = 0

    /** Number of files ignored for the most recent scan */
    private[this] var ignored = 0

    /**
      * [[IncomingFileReader]] instances created for each newly seen input
      * file during the most recent scan.
      */
    private[this] val readers = ArrayBuffer.empty[IncomingFileReader]

    /** Called before scanning a directory, it resets the counters */
    def beforeScan(): DirWatcherResult = {
      newfiles = 0
      ignored = 0
      readers.clear()
      logger.info(s"Scanning ${incomingDir}...")
      DirWatcherResult.CONTINUE
    }

    /**
      * Called after the scan, it maps the incoming files to a RepositoryPath,
      * creating new instances as required.
      */
    def afterScan(): DirWatcherResult = {
      if ( ignored == 0 ) {
        logger.info(s"Scanned ${incomingDir}; found ${newfiles} new files")
      } else {
        logger.info(s"Scanned ${incomingDir}; found ${newfiles} new files" +
          s" (ignored ${ignored} files)")
      }
      val grouped = readers.groupBy(x => x.relPath)
      repoFilesMutex.synchronized {
        for ( (relPath, entries) <- grouped ) {
          repoFiles.get(relPath) match {
            case Some(rp) =>
              rp.readers.push(entries)
            case None =>
              val rp = RepositoryPath(destinationDir, relPath)
              rp.readers.push(entries)
              repoFiles = repoFiles.updated(relPath, rp)
              repoFileCount += 1
          }
        }
      }
      DirWatcherResult.CONTINUE
    }

    /**
      * Called for each file found during the scan, this checks whether the
      * file matches the regular expression.  For those that do, a
      * [[IncomingFileReader]] is created and added to `readers`.
      */
    def handleFile(file: HPath, status: FileStatus): DirWatcherResult = {
      // getName() returns the basename
      val filename = file.getName()
      fileRegex.findFirstMatchIn(file.getName()) match {
        case None =>
          logger.info(s"Ignoring file that does not match regex: '${file}'")
          ignored += 1
        case Some(m) =>
          // Compute path to the repository file
          val year = m.group("year").toInt
          val month = m.group("month").toInt
          val day = m.group("day").toInt
          val hour = m.group("hour").toInt

          val basename = f"${year}%04d${month}%02d${day}%02d.${hour}%02d"

          // create the output path
          val relPath = (Seq[String](
            f"${year}%04d", f"${month}%02d", f"${day}%02d",
            s"v${packVersion}") ++ (partitioners collect {
              _.pathForFilename(filename) match { case Some(s) => s } })
          ).mkString("/") + s"/${basename}"

          /*
          ** val fields: Seq[Option[String]] =
          **   for (p <- partitioners) yield (p.pathForFilename(filename))
          ** // create the output path
          ** val relPath = (Seq[String](
          **   f"${year}%04d", f"${month}%02d", f"${day}%02d",
          **   s"v${packVersion}") ++ (for { f <- fields ; f2 <- f } yield f2)
          ** ).mkString("/") + s"/${basename}"
          */
          readers += IncomingFileReader(file, relPath, status)
          newfiles += 1
      }
      DirWatcherResult.CONTINUE
    }
  }

  private[this] object MyDirWatcher {
    // FIXME: Ignores files that do not match this regex.
    /** Regular expression that matches incoming file names */
    private val fileRegex =
      new Regex(
        """-inv-year-(\d{4})-month-(\d+)-day-(\d+)-hour-(\d+)-.*\.med""",
        "year", "month", "day", "hour")
  }

  /**
    * Returns a string for the time elapsed between the millisecond values
    * `t0` and `t1` as fractional seconds, e.g., "123.456 seconds".
    */
  private[this] def timediff(t1: Long, t0: Long): String = {
    f"${(t1 - t0).toDouble / 1000.0}%.03f seconds"
  }

  /**
    * Writes a log message with number of tasks that are running, queued, and
    * completed, and the number of known output files.
    */
  private[this] def printTaskCounts(): Unit = {
    val active = writerPool.getActiveCount()
    val total = writerPool.getTaskCount()
    val completed = writerPool.getCompletedTaskCount()
    logger.info(s"Task Counts: Active: ${active}," +
      s" Queued: ${total - completed - active}," +
      s" Completed: ${completed}," +
      s" Total: ${total}")
    logger.info(
      s"Output Files: Open: ${openFileCount}, Total: ${repoFileCount}")
  }

  // /////  Constants & Values Determined from Properties  /////

  /** The name of this application, for log messages. */
  private val APP = "InvariantPacker"

  /** The prefix used when checking properties */
  private val appProp = "mothra.invariantpacker."

  /**
    * Priority at which Cleanup.run() is invoked during shutdown.  For
    * reference, hadoop file system shutdown priority is 10.
    */
  private val SHUTDOWN_PRIORITY = 50

  /**
    * The default compression codec to use for files written to HDFS.  This
    * may be modified by specifying the following property:
    * mothra.invariantpacker.compression.
    *
    * Values typically supported by Hadoop include `bzip2`, `gzip`, `lz4`,
    * `lzo`, `lzop`, `snappy`, and `default`.  The empty string indicates no
    * compression.
    */
  val DEFAULT_COMPRESSION = ""

  /**
    * The compression codec used for files written to HDFS.  This may be set
    * by setting the "mothra.invariantpacker.compression" property.  If that
    * property is not set, DEFAULT_COMPRESSION is used.
    */
  val compressCodec = {
    val compressName = sys.props.get(s"${appProp}compression").
      getOrElse(DEFAULT_COMPRESSION)
    if ( compressName == "" ) {
      None
    } else {
      Try {
        //logger.trace(s"have a name ${compressName}")
        val codec = codecFactory.getCodecByName(compressName)
        //logger.trace(s"have a codec ${codec}")
        // Make sure we can create a compressor, not using it here.
        codec.createCompressor()
        //logger.trace(s"have a compressor ${compressor}")
        codec
      } match {
        case Success(ok) =>
          Option(ok)
        case Failure(e) =>
          logger.error("Unable to initialize compressor" +
            s" '${compressName}': ${e.toString}")
          val sw = new StringWriter
          e.printStackTrace(new PrintWriter(sw))
          logger.info("Unable to initialize compressor" +
            s" '${compressName}': ${sw.toString}")
          logger.warn("Using no compression for IPFIX files")
          None
      }
    }
  }

  /** The compression suffix */
  private[this] val compressSuffix =
    compressCodec.map {c => c.getDefaultExtension}.getOrElse("")


  /**
    * The default maximum number of threads that attempt to write to output
    * files simultaneously.
    *
    * This run-time behavior may be modified by setting the
    * mothra.invariantpacker.maxThreads property.
    */
  val DEFAULT_MAX_THREADS = 6

  /**
    * The maximum number of threads that attempt to write to output files
    * simultaneously.  It defaults to the value `DEFAULT_MAX_THREADS`.
    *
    * This run-time behavior may be modified by setting the
    * mothra.invariantpacker.maxThreads property.
    */
  val maxThreads = (sys.props.get(s"${appProp}maxThreads").
    map { _.toInt }).getOrElse(DEFAULT_MAX_THREADS)
  require(maxThreads >= 1)

  /**
    * The minimum value allowed for the maximum file size as specified by the
    * `mothra.invariantpacker.maximumSize` Java property.
    */
  val MINIMUM_MAXIMUM_SIZE = 1L << 19

  /**
    * The (approximate) maximum size file to create.  The default is no
    * maximum.  When a file's size exceeds this value, the file is closed and
    * a new file is started.  Typically a file's size will not exceed this
    * value by more than the maximum size of an IPFIX message, 64k.
    */
  val maximumSize = (sys.props.get(s"${appProp}maximumSize").
    map { _.toLong })
  require(maximumSize.isEmpty || maximumSize.get >= MINIMUM_MAXIMUM_SIZE)

  /**
    * The default maximum number of seconds to allow an idle output file to
    * remain open so additional incoming records may appended to it.
    *
    * The run-time behavior may be modified by setting the
    * `mothra.invariantpacker.outputIdleSeconds` Java property.
    */
  val DEFAULT_OUTPUT_FILE_IDLE_SECONDS = 900

  /**
    * The minimum allowed value for the `mothra.invariantpacker.outputIdleSeconds`
    * Java property.
    */
  val OUTPUT_FILE_IDLE_SECONDS_MINIMUM = 60

  /**
    * The maximum number of seconds to allow an idle output file to remain
    * open so additional incoming records may appended to it.  It defaults to
    * the value `DEFAULT_OUTPUT_FILE_IDLE_SECONDS`.
    *
    * This run-time behavior may be modified by setting the
    * `mothra.invariantpacker.outputIdleSeconds` Java property.
    */
  val outputIdleSeconds = (sys.props.get(s"${appProp}outputIdleSeconds").
    map { _.toInt }).getOrElse(DEFAULT_OUTPUT_FILE_IDLE_SECONDS)
  require(outputIdleSeconds >= OUTPUT_FILE_IDLE_SECONDS_MINIMUM)

  /**
    * When an output file has not been opened yet, it is not opened until the
    * oldest input file reaches a particular age, the number of input files
    * reaches a count, or the sum of the sizes of the input files reaches a
    * value.  This holds the default setting for the age of the oldest input
    * file, in seconds.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.maxInputAgeSeconds' Java property.
    */
  val DEFAULT_MAXIMUM_AGE_SECONDS = 900

  /**
    * Have an unopened output file process its input files when there exists
    * an input file whose age is older than this (number of milliseconds),
    * regardless of the number or sizes of the input files.  It defaults to
    * the value `DEFAULT_MAX_AGE_SECONDS` (converted to milliseonds).
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.maxInputAgeSeconds' Java property.
    */
  val maxInputAge = ((sys.props.get(s"${appProp}maxInputAgeSeconds").
    map { _.toInt }).getOrElse(DEFAULT_MAXIMUM_AGE_SECONDS)) * 1000

  /**
    * When an output file has not been opened yet, it is not opened until the
    * oldest input file reaches a particular age, the number of input files
    * reaches a count, or the sum of the sizes of the input files reaches a
    * value.  This holds the default setting for the minimum count of input
    * files.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.minInputCount' Java property.
    */
  val DEFAULT_MINIMUM_INPUT_COUNT = 3

  /**
    * Have an unopened output file process its input files when there are more
    * than this number of input files.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.minInputCount' Java property.
    */
  val minInputCount = (sys.props.get(s"${appProp}minInputCount").
    map { _.toInt }).getOrElse(DEFAULT_MINIMUM_INPUT_COUNT)
  require(minInputCount > 0)

  /**
    * When an output file has not been opened yet, it is not opened until the
    * oldest input file reaches a particular age, the number of input files
    * reaches a count, or the sum of the sizes of the input files reaches a
    * value.  This holds the default setting for sum of the sizes of the input
    * files, in octets.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.minInputSize' Java property.
    */
  val DEFAULT_MINIMUM_INPUT_SIZE = 1L << 20

  /**
    * Have an unopened output file process its input files when the sum of the
    * sizes of the input files is larger than this number of octets.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.minInputSize' Java property.
    */
  val minInputSize = (sys.props.get(s"${appProp}minInputSize").
    map { _.toLong }).getOrElse(DEFAULT_MINIMUM_INPUT_SIZE)
  require(minInputSize > 0)

  /**
    * The default values for the maximum number output files allowed to be
    * open at once.
    *
    * This value may be set at run time via the
    * `mothra.invariantpacker.fileCacheSize` Java property.
    */
  val DEFAULT_FILE_CACHE_SIZE = 2000

  /**
    * The minimum value allowed for the maximum number of output files that
    * may be opened at once.
    */
  val MINIMUM_FILE_CACHE_SIZE = 128

  /**
    * The maximum number of output files allowed to be open at once.  Defaults
    * to `DEFAULT_FILE_CACHE_SIZE`.
    *
    * This value may be set at run time via the
    * `mothra.invariantpacker.fileCacheSize` Java property.
    */
  val fileCacheSize = sys.props.get(s"${appProp}fileCacheSize").
      map { _.toInt }.getOrElse(DEFAULT_FILE_CACHE_SIZE)
  require( fileCacheSize >= MINIMUM_FILE_CACHE_SIZE )
  require( fileCacheSize >= maxThreads )

  /**
    * Default value for often the source directory is scanned (polled) for
    * IPFIX files to process.  This value may be specified at run-time by
    * specifying the `mothra.invariantpacker.pollingInterval` Java property.
    */
  val DEFAULT_POLL_INTERVAL = 15

  /**
    * How often the incoming directory is scanned for files to process, in
    * seconds.  Defaults to `DEFAULT_POLL_INTERVAL`.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.pollingInterval' Java property.
    */
  val pollingInterval = (sys.props.get(s"${appProp}pollingInterval").
    map { _.toInt }).getOrElse(DEFAULT_POLL_INTERVAL)
  require( pollingInterval > 0 )

  /**
    * The default observationDomainId to use for output files.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.observationDomainId' Java property.
    */
  val DEFAULT_OBSERVATION_DOMAIN_ID = 0

  /**
    * The observationDomainId to use for output files.
    *
    * The run-time behavior may be modified by setting the
    * 'mothra.invariantpacker.observationDomainId' Java property.
    */
  val observationDomain = (sys.props.get(s"${appProp}observationDomainId").
    map { _.toInt }).getOrElse(DEFAULT_OBSERVATION_DOMAIN_ID)


  // /////  InvariantPacker procedural code begins here  /////

  var oneshot = false

  System.err.println(s"ARgs is $args")

  val (switches, positionalArgs) = args.partition { _.substring(0, 2) == "--" }

  System.err.println(s"Switches is $switches")

  switches.collect {
    case "-V" | "--version" => version()
    case "-h" | "--help" => usage(true)
    case "--one-shot" | "--oneshot" => oneshot = true
    case unknown: String =>
      println(s"Unknown argument '${unknown}'")
      usage()
  }

  if ( positionalArgs.length != 3 ) {
    var errmsg = if ( positionalArgs.length == 1 ) {
      "Called with 1 argument"
    } else {
      s"Called with ${positionalArgs.length} arguments"
    }
    errmsg += (
      "; exactly 3 required (incomingDir destinationDir partitionerFile)")
    logger.error(errmsg)
    println(errmsg)
    usage()
  }

  logger.info("\n=============================" +
    s" ${APP} is starting =============================\n")
  logger.info(s"This is ${APP} ${Version.get()}")

  val incomingDir = new HPath(positionalArgs(0))

  val destinationDir = new HPath(positionalArgs(1))

  val packLogicPath = new HPath(positionalArgs(2))

  // Create an object to use as a Mutex
  private[this] val repoFilesMutex = new AnyRef

  /**
    * A mapping from `relPath` (i.e., an output file's path relative to the
    * `destinationDir`) to the RepositoryPath object used to write to that
    * relative path.
    */
  private[this] var repoFiles = Map.empty[String, RepositoryPath]
  //new ConcurrentHashMap[String, RepositoryPath](
  //    fileCacheSize, 0.75, maxThreads)

  /**
    * A Deque used to expire files when the maximum number of file handles has
    * been exhausted.
    */
  private[this] var repoLRU = PackerDeque.empty[RepositoryPath]

  /**
    * The number of repository files.
    */
  @volatile
  private[this] var repoFileCount = 0

  /**
    * The number of open output files.
    */
  @volatile
  private[this] var openFileCount = 0

  /**
    * Set a default maximum output file size if none was provided, and reduce
    * the maximum size by the maximum IPFIX message length.
    */
  private[this] val repoFileMaxSize =
    maximumSize.getOrElse(Long.MaxValue) - 0xffff

  // how often to print the task counts and queue sizes (seconds)
  private[this] val taskCountInterval = 3

  /** The information model */
  implicit val infoModel = InfoModel.getCERTStandardInfoModel()

  /** The Hadoop configuration */
  implicit val hadoopConf = new Configuration()

  /** A Compression Codec Factory */
  private[this] val codecFactory = new CompressionCodecFactory(hadoopConf)

  // open, load, parse, compile, and run the packing logic file, then get the
  // file layout version and the partitioner sequence from the packing logic
  private[this] val (packVersion, partitioners) = {
    val stream = packLogicPath.getFileSystem(hadoopConf).open(packLogicPath)
    val loader = Try { RunTimeCodeLoader(stream) } match {
      case Success(x) => x
      case Failure(e) => throw new Exception(
        s"Failed to compile run-time packing logic in '${packLogicPath}': ${e}")
    }
    val result = Try { loader.load() } match {
      case Success(x) => x
      case Failure(e) => throw new Exception(
        s"Failed to load run-time packing logic from '${packLogicPath}: ${e}")
    }
    result match {
      case ppl: PartitionerPackLogic =>
        (ppl.version, ppl.partitioners)
      case pc: PartitionerConfigurator =>
        val ppl = PartitionerPackLogic(pc.partitioners)
        (ppl.version, ppl.partitioners)
      case _: PackingLogic => throw new Exception(
        s"partitionerFile must contain partitions, not other PackingLogic")
      case _ => throw new Exception(
        s"Unexpected type returned from compiled code: result.getClass")
    }
  }


  // log our settings
  logger.info(s"${APP} Settings::")
  logger.info(s"Incoming directory: ${incomingDir}")
  logger.info(s"Destination directory: ${destinationDir}")
  logger.info(s"Partitioner logic file: ${packLogicPath}")
  logger.info(s"Output compression: ${compressCodec.getOrElse("none")}")
  logger.info(s"Maximum simultaneous writing tasks: ${maxThreads}")
  val ignored = if ( oneshot ) {
    logger.info("Will shut down after a single scan of the incoming directory")
    logger.info("Ignoring the following settings:")
    "- "
  } else {
    ""
  }
  logger.info(s"${ignored}Polling interval of the incoming directory: ${pollingInterval}")
  logger.info(s"${ignored}Open an output file when it has an input file noticed this long ago: ${maxInputAge / 1000} seconds")
  logger.info(s"${ignored}Open an output file when it has this number of input files: ${minInputCount}")
  logger.info(s"${ignored}Open an output file when the sum of the sizes of the input files exceeds this value: ${minInputSize} octets")
  logger.info(s"${ignored}Maximum time an output file may remain idle before being closed: ${outputIdleSeconds} seconds")
  logger.info("Approximate maximum size of output files: " +
    maximumSize.map{ _.toString }.getOrElse("unlimited"))
  logger.info(s"Maximum number of open output files: ${fileCacheSize}")
  logger.info(s"Observation domain for output files: ${observationDomain}")
  logger.info(s"""JVM Parameters: ${ManagementFactory.getRuntimeMXBean.getInputArguments.toArray.mkString(",")}""")

  logger.info(s"${APP} threads are starting::")

  ShutdownHookManager.get().addShutdownHook(new Cleanup(), SHUTDOWN_PRIORITY)

  private[this] val writerPool: ThreadPoolExecutor =
    new ThreadPoolExecutor(
      maxThreads, maxThreads, 0L, TimeUnit.SECONDS,
      new LinkedBlockingDeque[Runnable](),
      new PackerThreadFactory("ActiveWriter-"))

  private[this] val watcher = MyDirWatcher(incomingDir)

  var running = !oneshot

  if ( oneshot ) {
    watcher.runOnce()
    watcher.shutdown()

    for ( rp <- repoFiles.values ) {
      writerPool.execute(rp)
    }

    logger.debug("Running with one-shot enabled; waiting for threads to end...")

    // all tasks are queued; shutdown the thread pool and allow the
    // running/queued tasks to complete
    writerPool.shutdown()

    do {
      printTaskCounts()
    } while ( !writerPool.awaitTermination(1, TimeUnit.SECONDS) )

    logger.debug("All tasks have completed")

    logger.info(s"${APP} is done")

  } else {

    logger.debug("Starting the incoming directory poller")
    watcher.runPeriodically(pollingInterval)

    val checkFactory = new PackerThreadFactory("OutputFileCheck-")
    val checkTaskPool = new ScheduledThreadPoolExecutor(1, checkFactory)

    logger.debug("Starting the output file checker")
    checkTaskPool.setContinueExistingPeriodicTasksAfterShutdownPolicy(false)
    checkTaskPool.setExecuteExistingDelayedTasksAfterShutdownPolicy(false)
    checkTaskPool.setRemoveOnCancelPolicy(true)

    // wait half a pollingInterval before starting the checker
    checkTaskPool.scheduleWithFixedDelay(new CheckRepositoryPaths(),
      pollingInterval / 2, pollingInterval, TimeUnit.SECONDS)

    do {
      printTaskCounts()
    } while ( running &&
      !writerPool.awaitTermination(taskCountInterval, TimeUnit.SECONDS) );
  }

}

// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143




© 2015 - 2024 Weber Informatics LLC | Privacy Policy