All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.prepare.spark.dataset.params.OutlierObject.scala Maven / Gradle / Ivy

package com.datastax.data.prepare.spark.dataset.params

private[spark] class OutlierObject(val label: String, val index: Long, val vector: Array[Double]) {

  private var outlierStatus: Boolean = false

  private var preKMean: Double = 0.0

  //densities
  private var proportion: Double = 0.0

  //与其他OutlierObject的路径  k上限
  private var container: Array[DistanceContainer] = _


  //kDistance
  private var kDistances: Array[Double] = _

  private var kSize: Int = 0

  //lrd = 1/(sum of reach-dist(this, OutlierObject of kDistanceObjects)/cardinality)   数组的下标与k的range(lower bound, upper bound)有关
  private var lrd: Array[Double] = _

  //kDistanceObjects的object的kDistanceObject的cardinality
  private var cardN: Array[Int] = _

  //k范围内的lof
  private var lof: Array[Double] = _

  private var maxLOF: Double = 0.0

  def getPreKMean: Double = preKMean

  def getOutlierStatus: Boolean = outlierStatus

  def getProportion: Double = proportion

  def getContainer: Array[DistanceContainer] = container

  def getMaxLOF: Double = maxLOF

  def setKSize(lower: Int, upper: Int): Unit = {
    kSize = upper - lower + 1
    kDistances = new Array[Double](kSize)
    lrd = new Array[Double](kSize)
    cardN = new Array[Int](kSize)
    lof = new Array[Double](kSize)
  }

  def getKSize: Int = kSize

  def setPreKMean(avg: Double): Unit = preKMean = avg

  def getKDistance(i: Int): Double = kDistances(i)

  def setKDistance(d: Double, k: Int): Unit = kDistances(k) = d

  def dimensions: Int = vector.length

  def setOutlierStatus(f: Boolean): Unit = outlierStatus = f

  def setContainer(kd: Array[DistanceContainer]): Unit = this.container = kd

  def setProportion(p: Double): Unit = this.proportion = p

  def setLRD(l: Double, k: Int): Unit = lrd(k) = l

  def getLRD(k: Int): Double = lrd(k)

  def setCardN(l: Int, k: Int): Unit = cardN(k) = l

  def getCardN(k: Int): Int = cardN(k)

  def setLOF(l: Double, k: Int): Unit = lof(k) = l

  def getLOF(k: Int): Double = lof(k)

  def setMaxLOF(l: Double): Unit = maxLOF = l

  def checkProportion(p: Double): Unit = {
    if(proportion >= p) {
      setOutlierStatus(true)
    }
  }


}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy