org.vertexium.accumulo.mapreduce.AccumuloElementInputFormatBase Maven / Gradle / Ivy
package org.vertexium.accumulo.mapreduce;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapreduce.AccumuloRowInputFormat;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.Pair;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.vertexium.*;
import org.vertexium.accumulo.AccumuloAuthorizations;
import org.vertexium.accumulo.AccumuloGraph;
import org.vertexium.accumulo.LazyMutableProperty;
import org.vertexium.accumulo.LazyPropertyMetadata;
import org.vertexium.accumulo.iterator.model.ElementData;
import org.vertexium.accumulo.iterator.model.IteratorFetchHints;
import org.vertexium.util.MapUtils;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.*;
public abstract class AccumuloElementInputFormatBase extends InputFormat {
private final AccumuloRowInputFormat accumuloInputFormat;
public AccumuloElementInputFormatBase() {
accumuloInputFormat = new AccumuloRowInputFormat();
}
public static void setInputInfo(Job job, String instanceName, String zooKeepers, String principal, AuthenticationToken token, String[] authorizations, String tableName) throws AccumuloSecurityException {
AccumuloRowInputFormat.setInputTableName(job, tableName);
AccumuloRowInputFormat.setConnectorInfo(job, principal, token);
ClientConfiguration clientConfig = new ClientConfiguration()
.withInstance(instanceName)
.withZkHosts(zooKeepers);
AccumuloRowInputFormat.setZooKeeperInstance(job, clientConfig);
AccumuloRowInputFormat.setScanAuthorizations(job, new org.apache.accumulo.core.security.Authorizations(authorizations));
job.getConfiguration().setStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS, authorizations);
}
public static void setFetchHints(Job job, ElementType elementType, FetchHints fetchHints) {
Iterable columnFamiliesToFetch = AccumuloGraph.getColumnFamiliesToFetch(elementType, fetchHints);
Collection> columnFamilyColumnQualifierPairs = new ArrayList<>();
for (Text columnFamilyToFetch : columnFamiliesToFetch) {
columnFamilyColumnQualifierPairs.add(new Pair<>(columnFamilyToFetch, null));
}
AccumuloInputFormat.fetchColumns(job, columnFamilyColumnQualifierPairs);
}
@Override
public List getSplits(JobContext jobContext) throws IOException {
return accumuloInputFormat.getSplits(jobContext);
}
@Override
public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
final RecordReader>> reader = accumuloInputFormat.createRecordReader(inputSplit, taskAttemptContext);
return new RecordReader() {
public AccumuloGraph graph;
public Authorizations authorizations;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
reader.initialize(inputSplit, ctx);
Map configurationMap = VertexiumMRUtils.toMap(ctx.getConfiguration());
this.graph = (AccumuloGraph) new GraphFactory().createGraph(MapUtils.getAllWithPrefix(configurationMap, "graph"));
this.authorizations = new AccumuloAuthorizations(ctx.getConfiguration().getStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS));
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
return reader.nextKeyValue();
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return reader.getCurrentKey();
}
@Override
public TValue getCurrentValue() throws IOException, InterruptedException {
PeekingIterator> row = reader.getCurrentValue();
return createElementFromRow(graph, row, authorizations);
}
@Override
public float getProgress() throws IOException, InterruptedException {
return reader.getProgress();
}
@Override
public void close() throws IOException {
reader.close();
}
};
}
protected abstract TValue createElementFromRow(AccumuloGraph graph, PeekingIterator> row, Authorizations authorizations);
protected static Iterable makePropertiesFromElementData(final AccumuloGraph graph, ElementData elementData, IteratorFetchHints fetchHints) {
return Iterables.transform(elementData.getProperties(fetchHints), new Function() {
@Nullable
@Override
public Property apply(@Nullable org.vertexium.accumulo.iterator.model.Property property) {
return makePropertyFromIteratorProperty(graph, property);
}
});
}
private static Property makePropertyFromIteratorProperty(AccumuloGraph graph, org.vertexium.accumulo.iterator.model.Property property) {
LazyPropertyMetadata metadata = null;
Set hiddenVisibilities = null;
if (property.hiddenVisibilities != null) {
hiddenVisibilities = Sets.newHashSet(Iterables.transform(property.hiddenVisibilities, new Function() {
@Nullable
@Override
public Visibility apply(Text visibilityText) {
return AccumuloGraph.accumuloVisibilityToVisibility(AccumuloGraph.visibilityToAccumuloVisibility(visibilityText.toString()));
}
}));
}
Visibility visibility = AccumuloGraph.accumuloVisibilityToVisibility(AccumuloGraph.visibilityToAccumuloVisibility(property.visibility));
return new LazyMutableProperty(
graph,
graph.getVertexiumSerializer(),
graph.getNameSubstitutionStrategy().inflate(property.key),
graph.getNameSubstitutionStrategy().inflate(property.name),
property.value,
metadata,
hiddenVisibilities,
visibility,
property.timestamp,
FetchHints.ALL_INCLUDING_HIDDEN
);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy