Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2015-2022 by Carnegie Mellon University
// See license information in LICENSE.txt
package org.cert.netsa.mothra.packer
import org.cert.netsa.io.ipfix.{Record, DeepFieldExtractor}
//import org.cert.netsa.data.net.{IPv4Address => NetIPv4, IPv6Address => NetIPv6,
// IPv4Block, IPv6Block}
import com.typesafe.scalalogging.StrictLogging
import java.net.URLEncoder
import scala.collection.immutable.{Vector, VectorBuilder}
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try}
/**
* A method for Mothra packers to determine into which single partition
* (if any) on one level of a partitioning hierarchy a
* [[org.cert.netsa.io.ipfix.Record Record]] should be stored.
* [[Partitioner]]s are used to partition (split) records into various
* directories in the long-term data storage location.
*
* A single [[Partitioner]] typically splits on one attribute of a
* [[org.cert.netsa.io.ipfix.Record Record]], such as its source IP
* address, IP protocol, or destination port.
*
* A sequence of multiple [[Partitioner]]s is used by
* [[PartitionerConfigurator]] to partition on multiple attributes in
* order.
*
* @see [[Partitioner$ the companion object]] for numerous ways to
* define simple [[Partitioner]]s on various data types. These
* allow you to quickly specify ranges or specific values for
* different partitions.
*/
trait Partitioner {
/** In the directory name generated by this partitoiner, the delimiter
* between the operator, field name, and argument(s) */
val delim: String = "="
// Should be one of the characters in
// org.cert.netsa.mothra.datasources.ipfix.IPFIXSource.CONSTRAINT_DELIMS_V2
/** The maximum length of a path component that the file system
* supports. */
val max_path_component = 255
/**
* Returns the path component for the Record `rec` based on this
* Partitioner as an [[scala.Option Option]]. Returns `None` when
* either the Record lacks the necessary information for the
* Partitioner or when the Partitioner does not designate a
* specific path component for the Record.
*/
def pathForRecord(rec: Record): Option[String]
/**
* Returns the path component extracted from a String representing a
* filename created by super_mediator running in invariant mode as an
* [[scala.Option Option]]. Returns `None` when either the String does not
* include the field or when the Partitioner does not designate a specific
* path component.
*/
def pathForFilename(filename: String): Option[String]
}
/**
* An object to hold classes that extend the [Partitioner] trait.
*/
object Partitioner {
/**
* NumericPartitioner supports partitioning based on the numeric
* field named `key` in a [[org.cert.netsa.io.ipfix.Record Record]],
* where each partition represents records where that field either
* has a specific (single) value or falls with a range of values.
*
* NumericPartitioner is the parent class of other classes for
* specific numeric types (such as 16-bit integers and 32-bit
* floating point numbers). These subclasses must specify a numeric
* type and the allowable range of values for that type.
*
* @param key The path of the Information Element to be partioned on
* @param min The minimum value supported for this numeric type
* @param max The maximum value supported for this numeric type
* @tparam T The type of the number
*/
sealed abstract class NumericPartitioner[T : Ordering] (
key: String,
min: T,
max: T) extends Partitioner with StrictLogging
{
import scala.math.Ordering.Implicits._
/** Whether a missing value in the record is recorded by an
* "is_null=FIELD" directory component. */
var useNullComponent = false
/** When `addNotIn` has been called, this is where `pathForRecord`
* stores records whose `key` value does map to an Interval. */
private[this] var notInString = Option.empty[String]
/** Whether `coverRanges` has been called */
private[this] var coverRangesUsed = false
/** URL encoded version of the key */
private[this] val encodedKey = URLEncoder.encode(key, "UTF-8")
/** Regex to parse key from a super_mediator invariant filename */
private[this] val regex = new Regex(s"-${key}-" + """(\d+)\D""")
/**
* Class that contains an [[Interval]] within this Partition.
*
* @param beg The begin value of the [[Interval]]
* @param begIncluded Whether the [[Interval]] includes `beg` ("<="
* vs "<")
* @param end The end value of the [[Interval]]
* @param endIncluded Whether the [[Interval]] includes `end` ("<="
* vs "<")
* @param pathPart The directory name for
* [[org.cert.netsa.io.ipfix.Record Record]]s that are in this
* [[Interval]]
* @param contains A function to determine whether a value is in
* this [[Interval]]
*/
private[this] case class Interval(
beg: T, begIncluded: Boolean,
end: T, endIncluded: Boolean,
pathPart: String,
contains: (T) => Boolean)
{
override def toString: String = {
if (begIncluded) {
if (endIncluded) {
s"${beg} <= _ <= ${end}"
} else {
s"${beg} <= _ < ${end}"
}
} else {
if (endIncluded) {
s"${beg} < _ <= ${end}"
} else {
s"${beg} < _ < ${end}"
}
}
}
}
/** The companion object for the Interval class above. */
private[this] object Interval {
/**
* Create an interval that goes from `beg` to `end` where `beg`
* and/or `end` may be excluded from the range.
*/
def apply(beg: T, begIncluded: Boolean, end: T, endIncluded: Boolean):
Interval =
{
if ( beg == end ) {
assert(begIncluded && endIncluded)
new Interval(beg, true, beg, true,
s"eq${delim}${encodedKey}${delim}${beg}",
{(x: T) => (x == beg)})
} else if ( begIncluded && endIncluded ) {
new Interval(beg, begIncluded, end, endIncluded,
s"ge_le${delim}${encodedKey}${delim}${beg}${delim}${end}",
{(x: T) => (x >= beg && x <= end)})
} else if ( begIncluded && !endIncluded ) {
new Interval(beg, begIncluded, end, endIncluded,
s"ge_lt${delim}${encodedKey}${delim}${beg}${delim}${end}",
{(x: T) => (x >= beg && x < end)})
} else if ( !begIncluded && endIncluded ) {
new Interval(beg, begIncluded, end, endIncluded,
s"gt_le${delim}${encodedKey}${delim}${beg}${delim}${end}",
{(x: T) => (x > beg && x <= end)})
} else {
assert( !begIncluded && !endIncluded )
new Interval(beg, begIncluded, end, endIncluded,
s"gt_lt${delim}${encodedKey}${delim}${beg}${delim}${end}",
{(x: T) => (x > beg && x < end)})
}
}
}
/** The set of Intervals that comprise this partitioner, sorted by
* the beginning of the interval.
*/
private[this] var parts = Vector.empty[Interval]
/** The object used to extract the field 'key' from a Record. */
private[this] val extractor = DeepFieldExtractor[T](key)
/**
* Finds the interval that holds `value` and returns its pathPart
* as an Option. Returns None if no intervals hold the value.
*/
private[this] def findInterval(value: T): Option[String] = {
// binary search
var bot = 0
var top = parts.length - 1
var s: Option[String] = None
while (s.isEmpty && top >= bot) {
val i = (bot + top) >> 1
if ( parts(i).contains(value) ) {
s = Option(parts(i).pathPart)
} else if ( value <= parts(i).beg) {
top = i - 1
} else {
bot = i + 1
}
}
s
}
/**
* Returns the path component for the
* [[org.cert.netsa.io.ipfix.Record Record]] `rec` based on this
* partition as an Option. Returns None there is not a specific
* component for the field's value. The return value when the
* field `key` is not present in the
* [[org.cert.netsa.io.ipfix.Record Record]] depends on the value
* of `useNullComponent`: returns None when it is `false` and
* `Some(s"is_null\${delim}\${encodedKey}")` when it is `true.
*/
def pathForRecord(rec: Record): Option[String] = {
rec.apply[T](extractor) match {
case Some(value) => findInterval(value).orElse(notInString)
case None =>
if (useNullComponent) {
Option(s"is_null${delim}${encodedKey}")
} else {
None
}
}
}
/**
* Returns the path component from the file named 'filename' based
* on this partition as an Option. Returns None there is not a
* specific component for the field's value. The return value when
* the field `key` is not present in the Record depends on the
* value of `useNullComponent`: returns None when it is `false` and
* `Some("is_null\${delim}\${encodedKey}")` when it is `true`.
*/
def pathForFilename(filename: String): Option[String] = {
regex.findFirstMatchIn(filename) match {
case Some(m) =>
// found a match; convert the string to a number
Try {
val extracted = m.group(1)
// The value gets Boxed, so ensure it gets the write type
val value: T = min match {
case _: Long => extracted.toLong.asInstanceOf[T]
case _: Int => extracted.toInt.asInstanceOf[T]
case _: Short => extracted.toShort.asInstanceOf[T]
case _: Byte => extracted.toByte.asInstanceOf[T]
case _ =>
throw new UnsupportedOperationException(
"Unsupported partition type")
}
findInterval(value).orElse(notInString)
} match {
case Success(s) => s
case Failure(e) =>
logger.trace(
s"Error parsing value for ${key} in '${filename}': ${e}")
notInString
}
case None =>
//logger.trace(
// s"Failed to find ${key} (regex='${regex}') in '${filename}'")
if (useNullComponent) {
Option(s"is_null${delim}${encodedKey}")
} else {
None
}
}
}
/**
* Checks that the value 'v' is within the range from 'min' to
* 'max'. This is helper function used when adding a partition.
*/
private[this] def checkRange(v: T): Unit = {
if (v < min) {
throw new IllegalArgumentException(
s"Value ${v} is less than the allowed minimum of ${min}")
}
if (v > max) {
throw new IllegalArgumentException(
s"Value ${v} is greater than the allowed maximum of ${max}")
}
}
/**
* Throws an error when the caller attempts to add a partition
* after calling one of the functions that covers all remaining
* intervals (e.g., `addMultiplePartitionsEqualsAny`,
* `addSinglePartitionEqualsAny`).
*/
private[this] def checkCompletelyCovered(): Unit = {
if ( coverRangesUsed ) {
throw new IllegalArgumentException(
"May not add a partition once coverRanges() has been called.")
}
if ( notInString.nonEmpty ) {
throw new IllegalArgumentException(
"May not add a partition once addNotIn() has been called.")
}
}
/**
* Inserts the interval 'v' into the global `parts` Vector at
* position 'pos' and returns a new vector. If 'pos' is -1, `v` is
* appended to the vector.
*/
private[this] def insertIntervalAt(v: Interval, pos: Int):
Vector[Interval] =
{
if ( pos == -1 ) {
// append
parts :+ v
} else if ( pos == 0 ) {
// prepend
v +: parts
} else {
// split and rejoin
val splitVec = parts.splitAt(pos)
((splitVec._1 :+ v) ++ splitVec._2)
}
}
/**
* Updates the global Vector `parts` with the Interval `v`.
*/
private[this] def addInterval(v: Interval): Unit = {
var prev: Option[Interval] = None
var next: Option[Interval] = None
// find the first interval 'p' that would follow the new interval
// 'v': either p.beg is greater the v.beg or (p.beg equals v.beg
// and v.begIncluded is true)
val pos = if ( v.begIncluded ) {
parts.indexWhere({p => (p.beg >= v.beg)})
} else {
parts.indexWhere({p => (p.beg > v.beg)})
}
if ( pos == -1 ) {
if ( !parts.isEmpty ) {
prev = Option(parts.last)
}
} else {
next = Option(parts(pos))
if ( pos > 0 ) {
prev = Option(parts(pos - 1))
}
}
for (p <- prev) {
// check overlap of end of p with begin of v
if (v.beg < p.end
|| (v.beg == p.end && v.begIncluded && p.endIncluded))
{
throw new IllegalArgumentException(
s"New interval (${v}) overlaps with existing interval (${p})")
}
}
for (p <- next) {
// check overlap of begin of p with end of v
if (p.beg < v.end
|| (p.beg == v.end && p.begIncluded && v.endIncluded))
{
throw new IllegalArgumentException(
s"New interval (${v}) overlaps with existing interval (${p})")
}
}
parts = insertIntervalAt(v, pos)
}
/** Add a partition for when Record value `x` == `v`.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes `v` or when `v` is outside the range
* `min`–`max`.
*/
def addPartitionEquals(v: T): Unit = {
checkCompletelyCovered()
checkRange(v)
val i = Interval(v, true, v, true,
s"eq${delim}${encodedKey}${delim}${v}", {(x: T) => (x == v)})
addInterval(i)
}
@deprecated("Replace with addMultiplePartitionsEqualsAny", "1.2.2")
def addPartitionEqualsAny(s: Seq[T]): Unit =
addMultiplePartitionsEqualsAny(s)
/** Add multiple partitions for when Record value `x` equals any
* value in the list. This is a convenience function that invokes
* [[addPartitionEquals]] for each value in `s`.
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value in `s` or when `v` is outside the
* range `min`–`max`.
*
* @see [[addSinglePartitionEqualsAny]] for a method that puts
* multiple values in a single partition
*/
def addMultiplePartitionsEqualsAny(s: Seq[T]): Unit = {
checkCompletelyCovered()
for (v <- s)
addPartitionEquals(v)
}
/** Add a single partition for when Record value `x` equals any
* value in the list.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value in `s` or when `v` is outside
* the range `min`–`max`.
*
* @see [[addMultiplePartitionsEqualsAny]] for a method that
* creates multiple partitions: one for each value in a
* sequence
*/
def addSinglePartitionEqualsAny(s: Seq[T]): Unit = {
checkCompletelyCovered()
for (v <- s)
checkRange(v)
// create multiple intervals that share the same path
val sb = new StringBuilder()
(for (v <- s) yield s"${v}").
addString(sb, s"in${delim}${encodedKey}${delim}", delim, "")
val path = sb.mkString
for (v <- s) {
val i = Interval(v, true, v, true, path, {(x: T) => (x == v)})
addInterval(i)
}
}
/**
* Add a partition for when Record value `x` >= `v`. Since
* partitions may not overlap, this method and
* [[addPartitionGreaterThan]] may only be called one time for a
* Partitioner.
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value greater than or equal to `v` or when
* `v` is outside the range `min`–`max`.
*/
def addPartitionGreaterEquals(v: T): Unit = {
checkCompletelyCovered()
checkRange(v)
val i = Interval(v, true, max, true,
s"ge${delim}${encodedKey}${delim}${v}", {(x: T) => (x >= v)})
addInterval(i)
}
/**
* Add a partition for when Record value `x` > `v`. Since
* partitions may not overlap, this method and
* [[addPartitionGreaterEquals]] may only be called one time for a
* Partitioner.
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value greater than `v`, when `v` is
* outside the range `min`–`max`, or when `v` equals `max`.
*/
def addPartitionGreaterThan(v: T): Unit = {
checkCompletelyCovered()
checkRange(v)
if (v == max) {
throw new IllegalArgumentException(
"GreaterThan partition will always be empty when" +
s" value equals the allowed maximum of ${max}")
}
val i = Interval(v, false, max, true,
s"gt${delim}${encodedKey}${delim}${v}", {(x: T) => (x > v)})
addInterval(i)
}
/** Add a partition for when Record value `x` <= `v`. Since
* partitions may not overlap, this method and
* [[addPartitionLessThan]] may only be called one time for a
* Partitioner.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value less than or equal to `v` or
* when `v` is outside the range `min`–`max`.
*/
def addPartitionLessEquals(v: T): Unit = {
checkCompletelyCovered()
checkRange(v)
val i = Interval(min, true, v, true,
s"le${delim}${encodedKey}${delim}${v}", {(x: T) => (x <= v)})
addInterval(i)
}
/** Add a partition for when Record value `x` < `v`. Since
* partitions may not overlap, this method and
* [[addPartitionLessEquals]] may only be called one time for a
* Partitioner.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value less than `v`, when `v` is
* outside the range `min`–`max`, or when `v` equals `min`.
*/
def addPartitionLessThan(v: T): Unit = {
checkCompletelyCovered()
checkRange(v)
if (v == min) {
throw new IllegalArgumentException(
"LessThan partition will always be empty when" +
s" value equals the allowed minimum of ${min}")
}
val i = Interval(min, true, v, false,
s"lt${delim}${encodedKey}${delim}${v}", {(x: T) => (x < v)})
addInterval(i)
}
/**
* Add a partition for when Record value `x` satifies `lower` <=
* `x` <= `upper`.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value between `lower` and `upper`
* inclusive or when `lower` or `upper` are outside the range
* `min`–`max`.
*/
def addPartitionRange(lower: T, upper: T): Unit =
addPartitionRange(lower, true, upper, true)
/**
* Add a partition for when Record value `x` is within the range
* `lower` to `upper` where `lower` and/or `upper` may be excluded
* from the range by specifying the `lowerIncluded` and/or
* `upperIncluded` parameters as `false`.
*
* @throws java.lang.IllegalArgumentException when an existing
* interval includes any value that would also be included in
* this interval, when `lower` or `upper` are outside the range
* `min`–`max`, when `lower` is greater than `upper`, or when
* either Boolean parameter is `false` and `lower` equals
* `upper`.
*/
def addPartitionRange(
lower: T, lowerIncluded: Boolean,
upper: T, upperIncluded: Boolean): Unit =
{
checkCompletelyCovered()
if (lower < min) {
throw new IllegalArgumentException(
s"Lower bound ${lower} is less than the allowed minimum of ${min}")
}
if (upper > max) {
throw new IllegalArgumentException(
s"Upper bound ${upper} is greater than the allowed maximum of ${max}")
}
if (lower > upper) {
throw new IllegalArgumentException(
s"Lower bound ${lower} is greater than the upper bound ${upper}")
}
if (lower == upper && !(lowerIncluded && upperIncluded)) {
throw new IllegalArgumentException(
s"When the lower bound and upper bound are equal ${lower} both" +
" lowerIncluded and upperIncluded must be true")
}
val i = Interval(lower, lowerIncluded, upper, upperIncluded)
addInterval(i)
}
/**
* Adds multiple range rules to this Partitioner for all values
* that are not currently covered by an existing partition so that
* every possible value between `min` and `max` is covered. In
* addtion, causes the Partitioner to create an "is_null=KEY"
* directory for Records that do not contain the field `key`.
*
* If called on an empty Partitioner, creates a Partition from
* `min` to `max` inclusive.
*
* @see [[addNotIn]] for an alternative way to handle remaining
* values
*/
def coverRanges(): Unit = {
checkCompletelyCovered()
if ( parts.isEmpty ) {
val i = Interval(min, true, max, true)
addInterval(i)
coverRangesUsed = true
useNullComponent = true
return
}
// new vector to build
val newParts = new VectorBuilder[Interval]
newParts.sizeHint(2 + 2 * parts.length)
// handle the first Interval and any unhandled values before it
val p = parts(0)
if ( p.beg != min ) {
val v = p.beg
val i = if ( p.begIncluded ) {
Interval(min, true, v, false,
s"lt${delim}${encodedKey}${delim}${v}", {(x: T) => (x < v)})
} else {
Interval(min, true, v, true,
s"le${delim}${encodedKey}${delim}${v}", {(x: T) => (x <= v)})
}
newParts ++= Seq(i, p)
} else if ( p.begIncluded ) {
newParts += p
} else {
val i = Interval(min, true, min, true)
newParts ++= Seq(i, p)
}
var cur = p.end
var curIncluded = p.endIncluded
// loop over the remaining existing Intervals
for (p <- parts.tail) {
// ensure the current set of Intervals is sane
if (p.beg < cur || (p.beg == cur && curIncluded && p.begIncluded)) {
// something is wrong
val upTo = if (curIncluded) { "<=" } else { "<" }
throw new RuntimeException(s"Interval ${p} overlaps with previously" +
s" processed values ${min} <= _ ${upTo} ${cur}")
}
if (p.beg > cur || (p.beg == cur && !curIncluded && !p.begIncluded)) {
// add interval from cur to b.beg
val i = Interval(cur, !curIncluded, p.beg, !p.begIncluded)
newParts ++= Seq(i, p)
} else {
newParts += p
}
cur = p.end
curIncluded = p.endIncluded
}
// handle any unhandled values after the final Interval
if ( cur < max ) {
val i = if ( curIncluded ) {
Interval(cur, false, max, true,
s"gt${delim}${encodedKey}${delim}${cur}", {(x: T) => (x > cur)})
} else {
Interval(cur, true, max, true,
s"ge${delim}${encodedKey}${delim}${cur}", {(x: T) => (x >= cur)})
}
newParts += i
} else if ( !curIncluded ) {
val i = Interval(max, true, max, true)
newParts += i
}
parts = newParts.result()
coverRangesUsed = true
useNullComponent = true
}
/**
* Adds a single additional rule to this Partitioner that covers
* all values that are not currently covered by an existing
* partition. This method may only be used when the Partitioner
* does not contain any ranges; that is, when only
* [[addPartitionEquals]], [[addMultiplePartitionsEqualsAny]], and
* [[addSinglePartitionEqualsAny]] have been used.
*
* @throws java.lang.RuntimeException when the resulting path
* component length would be larger than max_path_component, when
* the Partitioner contains ranges, or when the Partitioner is
* empty.
*
* @see [[coverRanges]] for an alternative way to handle remaining
* values
*/
def addNotIn(): Unit = {
checkCompletelyCovered()
if ( parts.isEmpty ) {
throw new RuntimeException(
"May not use addNotIn() when no Partitions exist")
}
// ensure that each existing Interval is a point, not a range
for (p <- parts) {
if ( p.beg != p.end ) {
throw new RuntimeException(
"May not use addNotIn() when Partitioner includes ranges")
}
assert(p.begIncluded)
assert(p.endIncluded)
}
// build a string of the values in the existing Intervals
val sb = new StringBuilder()
(for (p <- parts) yield s"${p.beg}").
addString(sb, s"not_in${delim}${encodedKey}${delim}", delim, "")
val path = sb.mkString
if (path.length > max_path_component) {
throw new RuntimeException("Resulting path component is too long")
}
// Store the string
notInString = Option(path)
// The following code is similar to [[coverRanges]] in that in creates
// Intervals for each unhandled value. Instead of doing this, we use
// the `notInString` whenever the binary search of the Intervals returns
// None.
//
// // create and add an Interval for each uncovered range and have
// // it use the path created above
//
// // new vector to build
// var newParts = new VectorBuilder[Interval]
// newParts.sizeHint(2 + parts.length)
//
// // handle the first Interval and any unhandled values before it
// var p = parts(0)
// if ( p.beg == min ) {
// newParts += p
// } else {
// val i = Interval(min, true, p.beg, false, path, {(x: T) => (x < p.beg)})
// newParts += (i, p)
// }
//
// var cur = p.beg
//
// // loop over the remaining existing Intervals
// for (p <- parts.tail) {
// // ensure the current set of Intervals is sane
// if (p.beg <= cur) {
// // something is wrong
// throw new RuntimeException(s"Interval ${p} overlaps with previously" +
// s" processed values ${min} <= _ <= ${cur}")
// }
// // add interval from cur to b.beg
// val i = Interval(cur, false, p.beg, false, path,
// {(x: T) => (x > cur && x < p.end)})
// newParts += (i, p)
// cur = p.beg
// }
// // handle any unhandled values after the final Interval
// if ( cur < max ) {
// val i = Interval(cur, false, max, true, path, {(x: T) => (x > cur)})
// newParts += i
// }
//
// parts = newParts.result()
useNullComponent = true
}
/**
* Checks that the ranges in the Partition are sane (i.e., that
* they do not overlap) and returns `true` if they do not overlap
* and `false` when they do.
*
* @param verbose If `true`, print each range as it is examined,
* any holes found in the Partitions, and ranges that overlap.
*/
def checkSanity(verbose: Boolean): Boolean = {
var result = true
var cur = min
var curIncluded = false
for (p <- parts) {
if (p.beg < cur || (p.beg == cur && curIncluded && p.begIncluded)) {
// something is wrong
result = false
if ( verbose ) {
val upTo = if (curIncluded) { "<=" } else { "<" }
println(s"Overlap ${p} with previous values ${upTo} ${cur}")
}
} else if ( !verbose ) {
// do nothing
} else if (p.beg > cur
|| (p.beg == cur && !curIncluded && !p.begIncluded))
{
// interval from cur to b.beg is missing
val i = Interval(cur, !curIncluded, p.beg, !p.begIncluded)
println(s"Missing ${i}")
println(s"Covered ${p}")
} else {
// this interval meets the previous one
println(s"Covered ${p}")
}
cur = p.end
curIncluded = p.endIncluded
}
if ( verbose && (cur < max || !curIncluded) ) {
val i = Interval(cur, !curIncluded, max, true)
println(s"Missing ${i}")
}
result
}
}
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains an unsigned 8-bit value. */
final case class UInt8(key: String)
extends NumericPartitioner[Short](key, 0.toShort, 0xff.toShort)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains an unsigned 16-bit value. */
final case class UInt16(key: String)
extends NumericPartitioner[Int](key, 0, 0xffff)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains an unsigned 32-bit value. */
final case class UInt32(key: String)
extends NumericPartitioner[Long](key, 0L, 0xffffffffL)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains an unsigned 64-bit value. */
// FIXME: This upper bound is wrong.
final case class UInt64(key: String)
extends NumericPartitioner[Long](key, 0L, 0x7fffffffffffffffL)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a signed 8-bit value. */
final case class Int8(key: String)
extends NumericPartitioner[Short](key, -0x80.toShort, 0x7f.toShort)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a signed 16-bit value. */
final case class Int16(key: String)
extends NumericPartitioner[Int](key, -0x8000, 0x7fff)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a signed 32-bit value. */
final case class Int32(key: String)
extends NumericPartitioner[Long](key, -0x80000000L, 0x7fffffffL)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a signed 64-bit value. */
final case class Int64(key: String)
extends NumericPartitioner[Long](key,
-0x8000000000000000L, 0x7fffffffffffffffL)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a 32-bit floating point value. */
final case class Float32(key: String)
extends NumericPartitioner[Float](key, Float.MinValue, Float.MaxValue)
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains a 64-bit floating point value. */
final case class Float64(key: String)
extends NumericPartitioner[Double](key, Double.MinValue, Double.MaxValue)
/*
* ************************************************************************
* These two classes allow partitioning based on IP addresses.
*
* Their logic is the same as that for the numeric types earlier in
* this file, meaning that both classes treat the value as a number.
* This does not work so well in Spark-land where IP addresses are
* represented as strings.
*
* In addition, the IPv6Address class assumes ":" is a valid character
* in a file system path. These addresses may need to be URL encoded
* instead.
*
* ************************************************************************
*/
/*
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
field which contains an IPv4
* Address. */
final case class IPv4Address(key: String)
extends NumericPartitioner[NetIPv4](key,
NetIPv4("0.0.0.0"), NetIPv4("255.255.255.255"))
{
/**
* Add a partition for when Record value `x` is contained in the
* IPv4 Block `cidr`.
*/
def addPartitionRange(cidr: IPv4Block): Unit =
addPartitionRange(cidr.min, true, cidr.max, true)
}
/** Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field which contains an IPv6 Address. */
final case class IPv6Address(key: String)
extends NumericPartitioner[NetIPv6](key,
NetIPv6("::"), NetIPv6("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")):
Unit =
{
/**
* Add a partition for when Record value `x` is contained in the
* IPv6 Block `cidr`.
*/
def addPartitionRange(cidr: IPv6Block): Unit =
addPartitionRange(cidr.min, true, cidr.max, true)
}
*/
/*
* Partition a [[org.cert.netsa.io.ipfix.Record Record]] on its `key`
* field, creating a separate
* partition for each unique value seen for that field.
*
* This Partitioner should be used for fields that have only a few
* unique values, such as vlanId or observationDomain. Other fields
* should use a subclass of the [[NumericPartitioner]]. The
* [[NumericPartitioner]] ensures that unexpected values go into a
* relatively small number of files. The [[NumericPartitioner]] is
* also good for floating point values because of the issues
* checking equality of floating point values.
*
* The type `T` should be an integer type. It is passed to the
* [[org.cert.netsa.io.ipfix.DeepFieldExtractor]].
*
* @param key The name of the Information Element to be partioned on
* @tparam T The type of the field which is expected to be an
* integer type ([[Short]], [[Int]], or [[Long]]).
*/
case class UniqueValue[T](key: String)
extends Partitioner
{
/** Whether a missing value in the record is recorded by an
* "is_null=FIELD" directory component. */
final val useNullComponent = true
/** The object used to extract the field 'key' from a Record. */
private[this] val extractor = DeepFieldExtractor[T](key)
/** URL encoded version of the key */
private[this] val encodedKey = URLEncoder.encode(key, "UTF-8")
private[this] val partitionEqual = s"eq${delim}${encodedKey}${delim}"
private[this] val partitionNull = Option(s"is_null${delim}${encodedKey}")
/** Regex to parse key from a super_mediator invariant filename */
private[this] val regex = new Regex(s"-${key}-" + """(\d+)\D""")
/**
* Returns the path component for the Record `rec` based on this
* partition as an Option. Returns None there is not a specific
* component for the field's value. The return value when the
* field `key` is not present in the Record depends on the value
* of `useNullComponent`: returns None when it is `false` and
* {{{Option("is_null\${delim}\${encodedKey}")}}} when it is `true`.
*/
final def pathForRecord(rec: Record): Option[String] = {
rec.apply[T](extractor) match {
case Some(value) =>
Option(s"${partitionEqual}${value}")
case None =>
if (useNullComponent) {
partitionNull
} else {
None
}
}
}
/**
* Returns the path component from the file named 'filename' based on
* this partition as an Option. Returns None there is not a specific
* component for the field's value. The return value when the field
* `key` is not present in the Record depends on the value of
* `useNullComponent`: returns None when it is `false` and
* {{{Option("is_null\${delim}\${encodedKey}")}}} when it is `true`.
*/
def pathForFilename(filename: String): Option[String] = {
// WARNING: This grabs whatever numbers follow the key, even if the
// value is outside the range of the key ("protocolIdentifier-777-")
regex.findFirstMatchIn(filename) match {
case Some(m) =>
Option(s"${partitionEqual}${m.group(1)}")
case None =>
if (useNullComponent) {
partitionNull
} else {
None
}
}
}
}
}
// @LICENSE_FOOTER@
//
// Copyright 2015-2022 Carnegie Mellon University. All Rights Reserved.
//
// This material is based upon work funded and supported by the
// Department of Defense and Department of Homeland Security under
// Contract No. FA8702-15-D-0002 with Carnegie Mellon University for the
// operation of the Software Engineering Institute, a federally funded
// research and development center sponsored by the United States
// Department of Defense. The U.S. Government has license rights in this
// software pursuant to DFARS 252.227.7014.
//
// NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE ENGINEERING
// INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS. CARNEGIE MELLON
// UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR
// IMPLIED, AS TO ANY MATTER INCLUDING, BUT NOT LIMITED TO, WARRANTY OF
// FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY, OR RESULTS
// OBTAINED FROM USE OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT
// MAKE ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM PATENT,
// TRADEMARK, OR COPYRIGHT INFRINGEMENT.
//
// Released under a GNU GPL 2.0-style license, please see LICENSE.txt or
// contact [email protected] for full terms.
//
// [DISTRIBUTION STATEMENT A] This material has been approved for public
// release and unlimited distribution. Please see Copyright notice for
// non-US Government use and distribution.
//
// Carnegie Mellon(R) and CERT(R) are registered in the U.S. Patent and
// Trademark Office by Carnegie Mellon University.
//
// This software includes and/or makes use of third party software each
// subject to its own license as detailed in LICENSE-thirdparty.tx
//
// DM20-1143