org.vertexium.titan.hadoop.accumulo.AccumuloVertexiumInputFormat Maven / Gradle / Ivy
package org.vertexium.titan.hadoop.accumulo;
import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.thinkaurelius.titan.hadoop.formats.VertexQueryFilter;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.mapreduce.AccumuloRowInputFormat;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.vertexium.*;
import org.vertexium.accumulo.AccumuloAuthorizations;
import org.vertexium.accumulo.AccumuloGraph;
import org.vertexium.accumulo.AccumuloGraphConfiguration;
import org.vertexium.accumulo.mapreduce.AccumuloVertexInputFormat;
import org.vertexium.accumulo.mapreduce.VertexiumMRUtils;
import org.vertexium.property.StreamingPropertyValue;
import org.vertexium.util.MapUtils;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class AccumuloVertexiumInputFormat extends InputFormat implements Configurable {
private final AccumuloRowInputFormat accumuloInputFormat;
private Configuration config;
private VertexQueryFilter vertexQuery;
public AccumuloVertexiumInputFormat() {
this.accumuloInputFormat = new AccumuloRowInputFormat();
}
@Override
public List getSplits(JobContext context) throws IOException, InterruptedException {
return this.accumuloInputFormat.getSplits(context);
}
public static void configure(Job job) {
try {
AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(job.getConfiguration(), "graph.");
AccumuloGraph graph = AccumuloGraph.create(accumuloGraphConfiguration);
String principal = accumuloGraphConfiguration.getAccumuloUsername();
AuthenticationToken token = accumuloGraphConfiguration.getAuthenticationToken();
String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
String[] authorizations = job.getConfiguration().get("titan.hadoop.input.authorizations", "").split(",");
if (authorizations.length == 1 && authorizations[0].trim().length() == 0) {
authorizations = new String[0];
}
String tableName = graph.getVerticesTableName();
AccumuloRowInputFormat.setInputTableName(job, tableName);
AccumuloRowInputFormat.setConnectorInfo(job, principal, token);
ClientConfiguration clientConfig = new ClientConfiguration()
.withInstance(instanceName)
.withZkHosts(zooKeepers);
AccumuloRowInputFormat.setZooKeeperInstance(job, clientConfig);
AccumuloRowInputFormat.setScanAuthorizations(job, new org.apache.accumulo.core.security.Authorizations(authorizations));
job.getConfiguration().setStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS, authorizations);
} catch (Exception ex) {
throw new VertexiumException("Could not configure", ex);
}
}
@Override
public void setConf(Configuration config) {
this.config = config;
this.vertexQuery = VertexQueryFilter.create(config);
}
@Override
public Configuration getConf() {
return this.config;
}
@Override
public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
final RecordReader>> reader = accumuloInputFormat.createRecordReader(inputSplit, taskAttemptContext);
return new RecordReader() {
public FaunusVertex vertex;
public AccumuloGraph graph;
public Authorizations authorizations;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
reader.initialize(inputSplit, ctx);
Map configurationMap = VertexiumMRUtils.toMap(ctx.getConfiguration());
this.graph = (AccumuloGraph) new GraphFactory().createGraph(MapUtils.getAllWithPrefix(configurationMap, "graph"));
this.authorizations = new AccumuloAuthorizations(ctx.getConfiguration().getStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS));
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (!reader.nextKeyValue()) {
return false;
}
PeekingIterator> row = reader.getCurrentValue();
vertex = createFaunusVertexFromRow(graph, row, authorizations);
vertexQuery.defaultFilter(vertex);
return true;
}
@Override
public NullWritable getCurrentKey() throws IOException, InterruptedException {
return NullWritable.get();
}
@Override
public FaunusVertex getCurrentValue() throws IOException, InterruptedException {
return vertex;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return reader.getProgress();
}
@Override
public void close() throws IOException {
reader.close();
}
};
}
private FaunusVertex createFaunusVertexFromRow(AccumuloGraph graph, PeekingIterator> row, Authorizations authorizations) {
final Vertex v = AccumuloVertexInputFormat.createVertex(graph, row, authorizations);
final long vertexId = toFaunusVertexId(v.getId());
FaunusVertex faunusVertex = new FaunusVertex();
faunusVertex.setId(vertexId);
faunusVertex.setVertexLabel(v.getId());
faunusVertex.addProperty("vertexiumId", v.getId());
for (Property property : v.getProperties()) {
if (property.getValue() instanceof StreamingPropertyValue) {
continue;
}
faunusVertex.addProperty(property.getName(), property.getValue());
}
for (EdgeInfo edgeInfo : v.getEdgeInfos(Direction.OUT, authorizations)) {
faunusVertex.addEdge(com.tinkerpop.blueprints.Direction.OUT, edgeInfo.getLabel(), toFaunusVertexId(edgeInfo.getVertexId()));
}
for (EdgeInfo edgeInfo : v.getEdgeInfos(Direction.IN, authorizations)) {
faunusVertex.addEdge(com.tinkerpop.blueprints.Direction.IN, edgeInfo.getLabel(), toFaunusVertexId(edgeInfo.getVertexId()));
}
return faunusVertex;
}
private long toFaunusVertexId(String id) {
long h = 0;
for (char ch : id.toCharArray()) {
h = 31 * h + ch;
}
return Math.abs(h);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy