All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.csiro.variantspark.tests.TestWideKMeans.scala Maven / Gradle / Ivy

The newest version!
package au.csiro.variantspark.tests

import au.csiro.pbdava.ssparkle.spark.SparkApp
import au.csiro.variantspark.algo.WideKMeans
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.rdd.RDD

object TestWideKMeans extends SparkApp {
  conf.setAppName("VCF cluster")

  def main(args: Array[String]) {
    println("Testing WideKMeans")

    val dims = 1000
    val centersNo = 5
    val samples = 30
    val clusterVariance = 0.005
    val centers =
      sc.parallelize(Range(0, dims).map(i => Vectors.dense(Array.fill(centersNo)(Math.random()))),
        10)
    val clusterAssignment =
      Range(0, samples).map(i => Math.floor(Math.random() * centersNo).toInt).toList
    println(clusterAssignment)

    val data: RDD[Vector] = centers.map(
        v =>
          Vectors
            .dense(
                clusterAssignment
                  .map(c => v(c) + (Math.random() * clusterVariance - clusterVariance / 2))
                  .toArray)
            .toSparse)

    val test = data.cache().count()
    println(test)

    val kmeans = new WideKMeans(centersNo, 30)
    val result = kmeans.run(data)

    println(kmeans.assignClusters(data, centers).toList)
    println(kmeans.assignClusters(data, result).toList)

  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy