All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tencent.angel.sona.ml.evaluation.ClusteringEvaluator.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.tencent.angel.sona.ml.evaluation

import org.apache.spark.SparkContext
import com.tencent.angel.sona.ml.attribute.AttributeGroup
import org.apache.spark.linalg
import org.apache.spark.linalg.{BLAS, DenseVector, IntSparseVector, LongSparseVector, Vectors}
import com.tencent.angel.sona.ml.param.{Param, ParamMap, ParamValidators}
import com.tencent.angel.sona.ml.param.shared.{HasFeaturesCol, HasPredictionCol}
import org.apache.spark.broadcast.Broadcast
import com.tencent.angel.sona.ml.util._
import org.apache.spark.sql.{Column, DataFrame, Dataset}
import org.apache.spark.sql.functions.{avg, col, udf}
import org.apache.spark.sql.types.DoubleType
import org.apache.spark.sql.util.SONASchemaUtils
import org.apache.spark.util.DatasetUtil


/**
 * :: Experimental ::
 *
 * Evaluator for clustering results.
 * The metric computes the Silhouette measure using the specified distance measure.
 *
 * The Silhouette is a measure for the validation of the consistency within clusters. It ranges
 * between 1 and -1, where a value close to 1 means that the points in a cluster are close to the
 * other points in the same cluster and far from the points of the other clusters.
 */

class ClusteringEvaluator(override val uid: String)
  extends Evaluator with HasPredictionCol with HasFeaturesCol with DefaultParamsWritable {


  def this() = this(Identifiable.randomUID("cluEval"))


  override def copy(pMap: ParamMap): ClusteringEvaluator = this.defaultCopy(pMap)


  override def isLargerBetter: Boolean = true

  /** @group setParam */

  def setPredictionCol(value: String): this.type = set(predictionCol, value)

  /** @group setParam */

  def setFeaturesCol(value: String): this.type = set(featuresCol, value)

  /**
   * param for metric name in evaluation
   * (supports `"silhouette"` (default))
   * @group param
   */

  val metricName: Param[String] = {
    val allowedParams = ParamValidators.inArray(Array("silhouette"))
    new Param(
      this, "metricName", "metric name in evaluation (silhouette)", allowedParams)
  }

  /** @group getParam */

  def getMetricName: String = $(metricName)

  /** @group setParam */

  def setMetricName(value: String): this.type = set(metricName, value)

  /**
   * param for distance measure to be used in evaluation
   * (supports `"squaredEuclidean"` (default), `"cosine"`)
   * @group param
   */

  val distanceMeasure: Param[String] = {
    val availableValues = Array("squaredEuclidean", "cosine")
    val allowedParams = ParamValidators.inArray(availableValues)
    new Param(this, "distanceMeasure", "distance measure in evaluation. Supported options: " +
      availableValues.mkString("'", "', '", "'"), allowedParams)
  }

  /** @group getParam */

  def getDistanceMeasure: String = $(distanceMeasure)

  /** @group setParam */

  def setDistanceMeasure(value: String): this.type = set(distanceMeasure, value)

  setDefault(metricName -> "silhouette", distanceMeasure -> "squaredEuclidean")


  override def evaluate(dataset: Dataset[_]): Double = {
    SONASchemaUtils.validateVectorCompatibleColumn(dataset.schema, $(featuresCol))
    SONASchemaUtils.checkNumericType(dataset.schema, $(predictionCol))

    val vectorCol = DatasetUtil.columnToVector(dataset, $(featuresCol))
    val df = dataset.select(col($(predictionCol)),
      vectorCol.as($(featuresCol), dataset.schema($(featuresCol)).metadata))

    ($(metricName), $(distanceMeasure)) match {
      case ("silhouette", "squaredEuclidean") =>
        SquaredEuclideanSilhouette.computeSilhouetteScore(
          df, $(predictionCol), $(featuresCol))
      case ("silhouette", "cosine") =>
        CosineSilhouette.computeSilhouetteScore(df, $(predictionCol), $(featuresCol))
      case (mn, dm) =>
        throw new IllegalArgumentException(s"No support for metric $mn, distance $dm")
    }
  }
}



object ClusteringEvaluator
  extends DefaultParamsReadable[ClusteringEvaluator] {


  override def load(path: String): ClusteringEvaluator = super.load(path)

}


private[evaluation] abstract class Silhouette {

  /**
   * It computes the Silhouette coefficient for a point.
   */
  def pointSilhouetteCoefficient(
      clusterIds: Set[Double],
      pointClusterId: Double,
      pointClusterNumOfPoints: Long,
      averageDistanceToCluster: (Double) => Double): Double = {
    // Here we compute the average dissimilarity of the current point to any cluster of which the
    // point is not a member.
    // The cluster with the lowest average dissimilarity - i.e. the nearest cluster to the current
    // point - is said to be the "neighboring cluster".
    val otherClusterIds = clusterIds.filter(_ != pointClusterId)
    val neighboringClusterDissimilarity = otherClusterIds.map(averageDistanceToCluster).min

    // adjustment for excluding the node itself from the computation of the average dissimilarity
    val currentClusterDissimilarity = if (pointClusterNumOfPoints == 1) {
      0.0
    } else {
      averageDistanceToCluster(pointClusterId) * pointClusterNumOfPoints /
        (pointClusterNumOfPoints - 1)
    }

    if (currentClusterDissimilarity < neighboringClusterDissimilarity) {
      1 - (currentClusterDissimilarity / neighboringClusterDissimilarity)
    } else if (currentClusterDissimilarity > neighboringClusterDissimilarity) {
      (neighboringClusterDissimilarity / currentClusterDissimilarity) - 1
    } else {
      0.0
    }
  }

  /**
   * Compute the mean Silhouette values of all samples.
   */
  def overallScore(df: DataFrame, scoreColumn: Column): Double = {
    df.select(avg(scoreColumn)).collect()(0).getDouble(0)
  }

  protected def getNumberOfFeatures(dataFrame: DataFrame, columnName: String): Int = {
    val group = AttributeGroup.fromStructField(dataFrame.schema(columnName))
    if (group.size < 0) {
      dataFrame.select(col(columnName)).first().getAs[linalg.Vector](0).size.toInt
    } else {
      group.size.toInt
    }
  }
}

/**
 * SquaredEuclideanSilhouette computes the average of the
 * Silhouette over all the data of the dataset, which is
 * a measure of how appropriately the data have been clustered.
 *
 * The Silhouette for each point `i` is defined as:
 *
 * 
* $$ * s_{i} = \frac{b_{i}-a_{i}}{max\{a_{i},b_{i}\}} * $$ *
* * which can be rewritten as * *
* $$ * s_{i}= \begin{cases} * 1-\frac{a_{i}}{b_{i}} & \text{if } a_{i} \leq b_{i} \\ * \frac{b_{i}}{a_{i}}-1 & \text{if } a_{i} \gt b_{i} \end{cases} * $$ *
* * where `$a_{i}$` is the average dissimilarity of `i` with all other data * within the same cluster, `$b_{i}$` is the lowest average dissimilarity * of `i` to any other cluster, of which `i` is not a member. * `$a_{i}$` can be interpreted as how well `i` is assigned to its cluster * (the smaller the value, the better the assignment), while `$b_{i}$` is * a measure of how well `i` has not been assigned to its "neighboring cluster", * ie. the nearest cluster to `i`. * * Unfortunately, the naive implementation of the algorithm requires to compute * the distance of each couple of points in the dataset. Since the computation of * the distance measure takes `D` operations - if `D` is the number of dimensions * of each point, the computational complexity of the algorithm is `O(N^2^*D)`, where * `N` is the cardinality of the dataset. Of course this is not scalable in `N`, * which is the critical number in a Big Data context. * * The algorithm which is implemented in this object, instead, is an efficient * and parallel implementation of the Silhouette using the squared Euclidean * distance measure. * * With this assumption, the total distance of the point `X` * to the points `$C_{i}$` belonging to the cluster `$\Gamma$` is: * *
* $$ * \sum\limits_{i=1}^N d(X, C_{i} ) = * \sum\limits_{i=1}^N \Big( \sum\limits_{j=1}^D (x_{j}-c_{ij})^2 \Big) * = \sum\limits_{i=1}^N \Big( \sum\limits_{j=1}^D x_{j}^2 + * \sum\limits_{j=1}^D c_{ij}^2 -2\sum\limits_{j=1}^D x_{j}c_{ij} \Big) * = \sum\limits_{i=1}^N \sum\limits_{j=1}^D x_{j}^2 + * \sum\limits_{i=1}^N \sum\limits_{j=1}^D c_{ij}^2 * -2 \sum\limits_{i=1}^N \sum\limits_{j=1}^D x_{j}c_{ij} * $$ *
* * where `$x_{j}$` is the `j`-th dimension of the point `X` and * `$c_{ij}$` is the `j`-th dimension of the `i`-th point in cluster `$\Gamma$`. * * Then, the first term of the equation can be rewritten as: * *
* $$ * \sum\limits_{i=1}^N \sum\limits_{j=1}^D x_{j}^2 = N \xi_{X} \text{ , * with } \xi_{X} = \sum\limits_{j=1}^D x_{j}^2 * $$ *
* * where `$\xi_{X}$` is fixed for each point and it can be precomputed. * * Moreover, the second term is fixed for each cluster too, * thus we can name it `$\Psi_{\Gamma}$` * *
* $$ * \sum\limits_{i=1}^N \sum\limits_{j=1}^D c_{ij}^2 = * \sum\limits_{i=1}^N \xi_{C_{i}} = \Psi_{\Gamma} * $$ *
* * Last, the third element becomes * *
* $$ * \sum\limits_{i=1}^N \sum\limits_{j=1}^D x_{j}c_{ij} = * \sum\limits_{j=1}^D \Big(\sum\limits_{i=1}^N c_{ij} \Big) x_{j} * $$ *
* * thus defining the vector * *
* $$ * Y_{\Gamma}:Y_{\Gamma j} = \sum\limits_{i=1}^N c_{ij} , j=0, ..., D * $$ *
* * which is fixed for each cluster `$\Gamma$`, we have * *
* $$ * \sum\limits_{j=1}^D \Big(\sum\limits_{i=1}^N c_{ij} \Big) x_{j} = * \sum\limits_{j=1}^D Y_{\Gamma j} x_{j} * $$ *
* * In this way, the previous equation becomes * *
* $$ * N\xi_{X} + \Psi_{\Gamma} - 2 \sum\limits_{j=1}^D Y_{\Gamma j} x_{j} * $$ *
* * and the average distance of a point to a cluster can be computed as * *
* $$ * \frac{\sum\limits_{i=1}^N d(X, C_{i} )}{N} = * \frac{N\xi_{X} + \Psi_{\Gamma} - 2 \sum\limits_{j=1}^D Y_{\Gamma j} x_{j}}{N} = * \xi_{X} + \frac{\Psi_{\Gamma} }{N} - 2 \frac{\sum\limits_{j=1}^D Y_{\Gamma j} x_{j}}{N} * $$ *
* * Thus, it is enough to precompute: the constant `$\xi_{X}$` for each point `X`; the * constants `$\Psi_{\Gamma}$`, `N` and the vector `$Y_{\Gamma}$` for * each cluster `$\Gamma$`. * * In the implementation, the precomputed values for the clusters * are distributed among the worker nodes via broadcasted variables, * because we can assume that the clusters are limited in number and * anyway they are much fewer than the points. * * The main strengths of this algorithm are the low computational complexity * and the intrinsic parallelism. The precomputed information for each point * and for each cluster can be computed with a computational complexity * which is `O(N/W)`, where `N` is the number of points in the dataset and * `W` is the number of worker nodes. After that, every point can be * analyzed independently of the others. * * For every point we need to compute the average distance to all the clusters. * Since the formula above requires `O(D)` operations, this phase has a * computational complexity which is `O(C*D*N/W)` where `C` is the number of * clusters (which we assume quite low), `D` is the number of dimensions, * `N` is the number of points in the dataset and `W` is the number * of worker nodes. */ private[evaluation] object SquaredEuclideanSilhouette extends Silhouette { private[this] var kryoRegistrationPerformed: Boolean = false /** * This method registers the class * [[SquaredEuclideanSilhouette.ClusterStats]] * for kryo serialization. * * @param sc `SparkContext` to be used */ def registerKryoClasses(sc: SparkContext): Unit = { if (!kryoRegistrationPerformed) { sc.getConf.registerKryoClasses( Array( classOf[SquaredEuclideanSilhouette.ClusterStats] ) ) kryoRegistrationPerformed = true } } case class ClusterStats(featureSum: linalg.Vector, squaredNormSum: Double, numOfPoints: Long) /** * The method takes the input dataset and computes the aggregated values * about a cluster which are needed by the algorithm. * * @param df The DataFrame which contains the input data * @param predictionCol The name of the column which contains the predicted cluster id * for the point. * @param featuresCol The name of the column which contains the feature vector of the point. * @return A [[scala.collection.immutable.Map]] which associates each cluster id * to a [[ClusterStats]] object (which contains the precomputed values `N`, * `$\Psi_{\Gamma}$` and `$Y_{\Gamma}$` for a cluster). */ def computeClusterStats( df: DataFrame, predictionCol: String, featuresCol: String): Map[Double, ClusterStats] = { val numFeatures = getNumberOfFeatures(df, featuresCol) val clustersStatsRDD = df.select( col(predictionCol).cast(DoubleType), col(featuresCol), col("squaredNorm")) .rdd .map { row => (row.getDouble(0), (row.getAs[linalg.Vector](1), row.getDouble(2))) } .aggregateByKey[(DenseVector, Double, Long)]((Vectors.zeros(numFeatures).toDense, 0.0, 0L))( seqOp = { case ( (featureSum: DenseVector, squaredNormSum: Double, numOfPoints: Long), (features, squaredNorm) ) => BLAS.axpy(1.0, features, featureSum) (featureSum, squaredNormSum + squaredNorm, numOfPoints + 1) }, combOp = { case ( (featureSum1, squaredNormSum1, numOfPoints1), (featureSum2, squaredNormSum2, numOfPoints2) ) => BLAS.axpy(1.0, featureSum2, featureSum1) (featureSum1, squaredNormSum1 + squaredNormSum2, numOfPoints1 + numOfPoints2) } ) clustersStatsRDD .collectAsMap() .mapValues { case (featureSum: DenseVector, squaredNormSum: Double, numOfPoints: Long) => SquaredEuclideanSilhouette.ClusterStats(featureSum, squaredNormSum, numOfPoints) } .toMap } /** * It computes the Silhouette coefficient for a point. * * @param broadcastedClustersMap A map of the precomputed values for each cluster. * @param point The [[linalg.Vector]] representing the current point. * @param clusterId The id of the cluster the current point belongs to. * @param squaredNorm The `$\Xi_{X}$` (which is the squared norm) precomputed for the point. * @return The Silhouette for the point. */ def computeSilhouetteCoefficient( broadcastedClustersMap: Broadcast[Map[Double, ClusterStats]], point: linalg.Vector, clusterId: Double, squaredNorm: Double): Double = { def compute(targetClusterId: Double): Double = { val clusterStats = broadcastedClustersMap.value(targetClusterId) val pointDotClusterFeaturesSum = BLAS.dot(point, clusterStats.featureSum) squaredNorm + clusterStats.squaredNormSum / clusterStats.numOfPoints - 2 * pointDotClusterFeaturesSum / clusterStats.numOfPoints } pointSilhouetteCoefficient(broadcastedClustersMap.value.keySet, clusterId, broadcastedClustersMap.value(clusterId).numOfPoints, compute) } /** * Compute the Silhouette score of the dataset using squared Euclidean distance measure. * * @param dataset The input dataset (previously clustered) on which compute the Silhouette. * @param predictionCol The name of the column which contains the predicted cluster id * for the point. * @param featuresCol The name of the column which contains the feature vector of the point. * @return The average of the Silhouette values of the clustered data. */ def computeSilhouetteScore( dataset: Dataset[_], predictionCol: String, featuresCol: String): Double = { SquaredEuclideanSilhouette.registerKryoClasses(dataset.sparkSession.sparkContext) val squaredNormUDF = udf { features: linalg.Vector => math.pow(Vectors.norm(features, 2.0), 2.0) } val dfWithSquaredNorm = dataset.withColumn("squaredNorm", squaredNormUDF(col(featuresCol))) // compute aggregate values for clusters needed by the algorithm val clustersStatsMap = SquaredEuclideanSilhouette .computeClusterStats(dfWithSquaredNorm, predictionCol, featuresCol) // Silhouette is reasonable only when the number of clusters is greater then 1 assert(clustersStatsMap.size > 1, "Number of clusters must be greater than one.") val bClustersStatsMap = dataset.sparkSession.sparkContext.broadcast(clustersStatsMap) val computeSilhouetteCoefficientUDF = udf { computeSilhouetteCoefficient(bClustersStatsMap, _: linalg.Vector, _: Double, _: Double) } val silhouetteScore = overallScore(dfWithSquaredNorm, computeSilhouetteCoefficientUDF(col(featuresCol), col(predictionCol).cast(DoubleType), col("squaredNorm"))) bClustersStatsMap.destroy() silhouetteScore } } /** * The algorithm which is implemented in this object, instead, is an efficient and parallel * implementation of the Silhouette using the cosine distance measure. The cosine distance * measure is defined as `1 - s` where `s` is the cosine similarity between two points. * * The total distance of the point `X` to the points `$C_{i}$` belonging to the cluster `$\Gamma$` * is: * *
* $$ * \sum\limits_{i=1}^N d(X, C_{i} ) = * \sum\limits_{i=1}^N \Big( 1 - \frac{\sum\limits_{j=1}^D x_{j}c_{ij} }{ \|X\|\|C_{i}\|} \Big) * = \sum\limits_{i=1}^N 1 - \sum\limits_{i=1}^N \sum\limits_{j=1}^D \frac{x_{j}}{\|X\|} * \frac{c_{ij}}{\|C_{i}\|} * = N - \sum\limits_{j=1}^D \frac{x_{j}}{\|X\|} \Big( \sum\limits_{i=1}^N * \frac{c_{ij}}{\|C_{i}\|} \Big) * $$ *
* * where `$x_{j}$` is the `j`-th dimension of the point `X` and `$c_{ij}$` is the `j`-th dimension * of the `i`-th point in cluster `$\Gamma$`. * * Then, we can define the vector: * *
* $$ * \xi_{X} : \xi_{X i} = \frac{x_{i}}{\|X\|}, i = 1, ..., D * $$ *
* * which can be precomputed for each point and the vector * *
* $$ * \Omega_{\Gamma} : \Omega_{\Gamma i} = \sum\limits_{j=1}^N \xi_{C_{j}i}, i = 1, ..., D * $$ *
* * which can be precomputed too for each cluster `$\Gamma$` by its points `$C_{i}$`. * * With these definitions, the numerator becomes: * *
* $$ * N - \sum\limits_{j=1}^D \xi_{X j} \Omega_{\Gamma j} * $$ *
* * Thus the average distance of a point `X` to the points of the cluster `$\Gamma$` is: * *
* $$ * 1 - \frac{\sum\limits_{j=1}^D \xi_{X j} \Omega_{\Gamma j}}{N} * $$ *
* * In the implementation, the precomputed values for the clusters are distributed among the worker * nodes via broadcasted variables, because we can assume that the clusters are limited in number. * * The main strengths of this algorithm are the low computational complexity and the intrinsic * parallelism. The precomputed information for each point and for each cluster can be computed * with a computational complexity which is `O(N/W)`, where `N` is the number of points in the * dataset and `W` is the number of worker nodes. After that, every point can be analyzed * independently from the others. * * For every point we need to compute the average distance to all the clusters. Since the formula * above requires `O(D)` operations, this phase has a computational complexity which is * `O(C*D*N/W)` where `C` is the number of clusters (which we assume quite low), `D` is the number * of dimensions, `N` is the number of points in the dataset and `W` is the number of worker * nodes. */ private[evaluation] object CosineSilhouette extends Silhouette { private[this] val normalizedFeaturesColName = "normalizedFeatures" /** * The method takes the input dataset and computes the aggregated values * about a cluster which are needed by the algorithm. * * @param df The DataFrame which contains the input data * @param predictionCol The name of the column which contains the predicted cluster id * for the point. * @return A [[scala.collection.immutable.Map]] which associates each cluster id to a * its statistics (ie. the precomputed values `N` and `$\Omega_{\Gamma}$`). */ def computeClusterStats( df: DataFrame, featuresCol: String, predictionCol: String): Map[Double, (linalg.Vector, Long)] = { val numFeatures = getNumberOfFeatures(df, featuresCol) val clustersStatsRDD = df.select( col(predictionCol).cast(DoubleType), col(normalizedFeaturesColName)) .rdd .map { row => (row.getDouble(0), row.getAs[linalg.Vector](1)) } .aggregateByKey[(DenseVector, Long)]((Vectors.zeros(numFeatures).toDense, 0L))( seqOp = { case ((normalizedFeaturesSum: DenseVector, numOfPoints: Long), (normalizedFeatures)) => BLAS.axpy(1.0, normalizedFeatures, normalizedFeaturesSum) (normalizedFeaturesSum, numOfPoints + 1) }, combOp = { case ((normalizedFeaturesSum1, numOfPoints1), (normalizedFeaturesSum2, numOfPoints2)) => BLAS.axpy(1.0, normalizedFeaturesSum2, normalizedFeaturesSum1) (normalizedFeaturesSum1, numOfPoints1 + numOfPoints2) } ) clustersStatsRDD .collectAsMap() .toMap } /** * It computes the Silhouette coefficient for a point. * * @param broadcastedClustersMap A map of the precomputed values for each cluster. * @param normalizedFeatures The [[linalg.Vector]] representing the * normalized features of the current point. * @param clusterId The id of the cluster the current point belongs to. */ def computeSilhouetteCoefficient( broadcastedClustersMap: Broadcast[Map[Double, (linalg.Vector, Long)]], normalizedFeatures: linalg.Vector, clusterId: Double): Double = { def compute(targetClusterId: Double): Double = { val (normalizedFeatureSum, numOfPoints) = broadcastedClustersMap.value(targetClusterId) 1 - BLAS.dot(normalizedFeatures, normalizedFeatureSum) / numOfPoints } pointSilhouetteCoefficient(broadcastedClustersMap.value.keySet, clusterId, broadcastedClustersMap.value(clusterId)._2, compute) } /** * Compute the Silhouette score of the dataset using the cosine distance measure. * * @param dataset The input dataset (previously clustered) on which compute the Silhouette. * @param predictionCol The name of the column which contains the predicted cluster id * for the point. * @param featuresCol The name of the column which contains the feature vector of the point. * @return The average of the Silhouette values of the clustered data. */ def computeSilhouetteScore( dataset: Dataset[_], predictionCol: String, featuresCol: String): Double = { val normalizeFeatureUDF = udf { features: linalg.Vector => { val norm = Vectors.norm(features, 2.0) features match { case d: DenseVector => Vectors.dense(d.values.map(_ / norm)) case s: IntSparseVector => Vectors.sparse(s.size.toInt, s.indices, s.values.map(_ / norm)) case s: LongSparseVector => Vectors.sparse(s.size.toInt, s.indices, s.values.map(_ / norm)) case _ => throw new Exception("Vector Type Error!") } } } val dfWithNormalizedFeatures = dataset.withColumn(normalizedFeaturesColName, normalizeFeatureUDF(col(featuresCol))) // compute aggregate values for clusters needed by the algorithm val clustersStatsMap = computeClusterStats(dfWithNormalizedFeatures, featuresCol, predictionCol) // Silhouette is reasonable only when the number of clusters is greater then 1 assert(clustersStatsMap.size > 1, "Number of clusters must be greater than one.") val bClustersStatsMap = dataset.sparkSession.sparkContext.broadcast(clustersStatsMap) val computeSilhouetteCoefficientUDF = udf { computeSilhouetteCoefficient(bClustersStatsMap, _: linalg.Vector, _: Double) } val silhouetteScore = overallScore(dfWithNormalizedFeatures, computeSilhouetteCoefficientUDF(col(normalizedFeaturesColName), col(predictionCol).cast(DoubleType))) bClustersStatsMap.destroy() silhouetteScore } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy