All Downloads are FREE. Search and download functionalities are using the official Maven repository.

info.debatty.spark.knngraphs.builder.AbstractPartitioningBuilder Maven / Gradle / Ivy

The newest version!
package info.debatty.spark.knngraphs.builder;

import info.debatty.java.graphs.Graph;
import info.debatty.java.graphs.NeighborList;
import info.debatty.java.graphs.Node;
import info.debatty.java.graphs.build.Brute;
import info.debatty.java.graphs.build.GraphBuilder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

/**
 *
 * @author Thibault Debatty
 * @param 
 */
public abstract class AbstractPartitioningBuilder extends DistributedGraphBuilder {
    protected int stages = 3;
    protected int buckets = 10;

    protected GraphBuilder inner_graph_builder;

    public void setStages(int stages) {
        this.stages = stages;
    }

    public void setBuckets(int buckets) {
        this.buckets = buckets;
    }

    public void setInnerGraphBuilder(GraphBuilder inner_graph_builder) {
        this.inner_graph_builder = inner_graph_builder;
    }

    @Override
    protected JavaPairRDD, NeighborList> doComputeGraph(JavaRDD> nodes) throws Exception {

        JavaPairRDD> bucketsofnodes = _binNodes(nodes);

        JavaPairRDD, NeighborList> graph = bucketsofnodes.groupByKey().flatMapToPair(
                new  PairFlatMapFunction>>, Node, NeighborList>() {

            public Iterator, NeighborList>> call(Tuple2>> tuple) throws Exception {
                ArrayList> nodes = new ArrayList>();
                for (Node n : tuple._2) {
                    nodes.add(n);
                }

                if (inner_graph_builder == null) {
                    inner_graph_builder = new Brute();
                }
                inner_graph_builder.setK(k);
                inner_graph_builder.setSimilarity(similarity);
                Graph graph = inner_graph_builder.computeGraph(nodes);

                ArrayList, NeighborList>> r = new ArrayList, NeighborList>>();
                for (Object e : graph.entrySet()) {
                    Map.Entry entry = (Map.Entry) e;
                    r.add(new Tuple2, NeighborList>(entry.getKey(), entry.getValue()));
                }

                return r.iterator();
            }
        });


        graph = graph.groupByKey().mapToPair(
                new PairFunction, Iterable>, Node, NeighborList>() {

            public Tuple2, NeighborList> call(Tuple2, Iterable> tuple) throws Exception {
                NeighborList nl = new NeighborList(k);
                for (NeighborList n : tuple._2) {
                    nl.addAll(n);
                }
                return new Tuple2, NeighborList>(tuple._1, nl);
            }
        });

        return graph;
    }

    protected abstract JavaPairRDD> _binNodes(JavaRDD> nodes) throws Exception;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy