com.datastax.data.prepare.spark.dataset.GraphXBFS.scala Maven / Gradle / Ivy
The newest version!
package com.datastax.data.prepare.spark.dataset
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.graphx.{Edge, Graph}
import scala.collection.mutable.ListBuffer
object GraphXBFS {
def devideCluster(sc:JavaSparkContext, listRemain:java.util.List[String]):java.util.List[String] = {
import scala.collection.JavaConverters._
val edges = new ListBuffer[Edge[Int]]()
for(edge:java.lang.String <- listRemain.asScala) {
val splits = edge.split(",")
edges += new Edge(splits(0).toLong,splits(1).toLong)
}
val edgeRdd = sc.parallelize(edges)
val graphFile = Graph.fromEdges(edgeRdd,0.0)
val result = new ListBuffer[String]()
val cc = graphFile.connectedComponents()
val tuples = cc.vertices.map(x => {
val vertexId = x._1
val prop = x._2
(prop,vertexId)
}).groupByKey()
val listResult = new ListBuffer[String]()
for (row <- tuples.collect()) {
listResult += row._2.mkString(";")
}
listResult.asJava
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy