All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vertexium.titan.hadoop.accumulo.AccumuloVertexiumInputFormat Maven / Gradle / Ivy

package org.vertexium.titan.hadoop.accumulo;

import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.thinkaurelius.titan.hadoop.formats.VertexQueryFilter;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.mapreduce.AccumuloRowInputFormat;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.vertexium.*;
import org.vertexium.accumulo.AccumuloAuthorizations;
import org.vertexium.accumulo.AccumuloGraph;
import org.vertexium.accumulo.AccumuloGraphConfiguration;
import org.vertexium.accumulo.mapreduce.AccumuloVertexInputFormat;
import org.vertexium.accumulo.mapreduce.VertexiumMRUtils;
import org.vertexium.property.StreamingPropertyValue;
import org.vertexium.util.MapUtils;

import java.io.IOException;
import java.util.List;
import java.util.Map;

public class AccumuloVertexiumInputFormat extends InputFormat implements Configurable {
    private final AccumuloRowInputFormat accumuloInputFormat;
    private Configuration config;
    private VertexQueryFilter vertexQuery;

    public AccumuloVertexiumInputFormat() {
        this.accumuloInputFormat = new AccumuloRowInputFormat();
    }

    @Override
    public List getSplits(JobContext context) throws IOException, InterruptedException {
        return this.accumuloInputFormat.getSplits(context);
    }

    public static void configure(Job job) {
        try {
            AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(job.getConfiguration(), "graph.");
            AccumuloGraph graph = AccumuloGraph.create(accumuloGraphConfiguration);
            String principal = accumuloGraphConfiguration.getAccumuloUsername();
            AuthenticationToken token = accumuloGraphConfiguration.getAuthenticationToken();
            String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
            String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
            String[] authorizations = job.getConfiguration().get("titan.hadoop.input.authorizations", "").split(",");
            if (authorizations.length == 1 && authorizations[0].trim().length() == 0) {
                authorizations = new String[0];
            }

            String tableName = graph.getVerticesTableName();
            AccumuloRowInputFormat.setInputTableName(job, tableName);
            AccumuloRowInputFormat.setConnectorInfo(job, principal, token);
            ClientConfiguration clientConfig = new ClientConfiguration()
                    .withInstance(instanceName)
                    .withZkHosts(zooKeepers);
            AccumuloRowInputFormat.setZooKeeperInstance(job, clientConfig);
            AccumuloRowInputFormat.setScanAuthorizations(job, new org.apache.accumulo.core.security.Authorizations(authorizations));
            job.getConfiguration().setStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS, authorizations);
        } catch (Exception ex) {
            throw new VertexiumException("Could not configure", ex);
        }
    }

    @Override
    public void setConf(Configuration config) {
        this.config = config;
        this.vertexQuery = VertexQueryFilter.create(config);
    }

    @Override
    public Configuration getConf() {
        return this.config;
    }

    @Override
    public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        final RecordReader>> reader = accumuloInputFormat.createRecordReader(inputSplit, taskAttemptContext);
        return new RecordReader() {
            public FaunusVertex vertex;
            public AccumuloGraph graph;
            public Authorizations authorizations;

            @Override
            public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
                reader.initialize(inputSplit, ctx);

                Map configurationMap = VertexiumMRUtils.toMap(ctx.getConfiguration());
                this.graph = (AccumuloGraph) new GraphFactory().createGraph(MapUtils.getAllWithPrefix(configurationMap, "graph"));
                this.authorizations = new AccumuloAuthorizations(ctx.getConfiguration().getStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS));
            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                if (!reader.nextKeyValue()) {
                    return false;
                }

                PeekingIterator> row = reader.getCurrentValue();
                vertex = createFaunusVertexFromRow(graph, row, authorizations);
                vertexQuery.defaultFilter(vertex);
                return true;
            }

            @Override
            public NullWritable getCurrentKey() throws IOException, InterruptedException {
                return NullWritable.get();
            }

            @Override
            public FaunusVertex getCurrentValue() throws IOException, InterruptedException {
                return vertex;
            }

            @Override
            public float getProgress() throws IOException, InterruptedException {
                return reader.getProgress();
            }

            @Override
            public void close() throws IOException {
                reader.close();
            }
        };
    }

    private FaunusVertex createFaunusVertexFromRow(AccumuloGraph graph, PeekingIterator> row, Authorizations authorizations) {
        final Vertex v = AccumuloVertexInputFormat.createVertex(graph, row, authorizations);
        final long vertexId = toFaunusVertexId(v.getId());
        FaunusVertex faunusVertex = new FaunusVertex();
        faunusVertex.setId(vertexId);
        faunusVertex.setVertexLabel(v.getId());
        faunusVertex.addProperty("vertexiumId", v.getId());
        for (Property property : v.getProperties()) {
            if (property.getValue() instanceof StreamingPropertyValue) {
                continue;
            }
            faunusVertex.addProperty(property.getName(), property.getValue());
        }
        for (EdgeInfo edgeInfo : v.getEdgeInfos(Direction.OUT, authorizations)) {
            faunusVertex.addEdge(com.tinkerpop.blueprints.Direction.OUT, edgeInfo.getLabel(), toFaunusVertexId(edgeInfo.getVertexId()));
        }
        for (EdgeInfo edgeInfo : v.getEdgeInfos(Direction.IN, authorizations)) {
            faunusVertex.addEdge(com.tinkerpop.blueprints.Direction.IN, edgeInfo.getLabel(), toFaunusVertexId(edgeInfo.getVertexId()));
        }
        return faunusVertex;
    }

    private long toFaunusVertexId(String id) {
        long h = 0;
        for (char ch : id.toCharArray()) {
            h = 31 * h + ch;
        }
        return Math.abs(h);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy