
info.debatty.spark.knngraphs.builder.AbstractPartitioningBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-knn-graphs Show documentation
Show all versions of spark-knn-graphs Show documentation
Spark algorithms for building k-nn graphs
The newest version!
package info.debatty.spark.knngraphs.builder;
import info.debatty.java.graphs.Graph;
import info.debatty.java.graphs.NeighborList;
import info.debatty.java.graphs.Node;
import info.debatty.java.graphs.build.Brute;
import info.debatty.java.graphs.build.GraphBuilder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
/**
*
* @author Thibault Debatty
* @param
*/
public abstract class AbstractPartitioningBuilder extends DistributedGraphBuilder {
protected int stages = 3;
protected int buckets = 10;
protected GraphBuilder inner_graph_builder;
public void setStages(int stages) {
this.stages = stages;
}
public void setBuckets(int buckets) {
this.buckets = buckets;
}
public void setInnerGraphBuilder(GraphBuilder inner_graph_builder) {
this.inner_graph_builder = inner_graph_builder;
}
@Override
protected JavaPairRDD, NeighborList> doComputeGraph(JavaRDD> nodes) throws Exception {
JavaPairRDD> bucketsofnodes = _binNodes(nodes);
JavaPairRDD, NeighborList> graph = bucketsofnodes.groupByKey().flatMapToPair(
new PairFlatMapFunction>>, Node, NeighborList>() {
public Iterator, NeighborList>> call(Tuple2>> tuple) throws Exception {
ArrayList> nodes = new ArrayList>();
for (Node n : tuple._2) {
nodes.add(n);
}
if (inner_graph_builder == null) {
inner_graph_builder = new Brute();
}
inner_graph_builder.setK(k);
inner_graph_builder.setSimilarity(similarity);
Graph graph = inner_graph_builder.computeGraph(nodes);
ArrayList, NeighborList>> r = new ArrayList, NeighborList>>();
for (Object e : graph.entrySet()) {
Map.Entry entry = (Map.Entry) e;
r.add(new Tuple2, NeighborList>(entry.getKey(), entry.getValue()));
}
return r.iterator();
}
});
graph = graph.groupByKey().mapToPair(
new PairFunction, Iterable>, Node, NeighborList>() {
public Tuple2, NeighborList> call(Tuple2, Iterable> tuple) throws Exception {
NeighborList nl = new NeighborList(k);
for (NeighborList n : tuple._2) {
nl.addAll(n);
}
return new Tuple2, NeighborList>(tuple._1, nl);
}
});
return graph;
}
protected abstract JavaPairRDD> _binNodes(JavaRDD> nodes) throws Exception;
}