
com.thinkaurelius.titan.hadoop.formats.util.TitanGraphOutputMapReduce Maven / Gradle / Ivy
package com.thinkaurelius.titan.hadoop.formats.util;
import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;
import static com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration.OUTPUT_FORMAT;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.thinkaurelius.titan.core.*;
import com.thinkaurelius.titan.diskstorage.cassandra.AbstractCassandraStoreManager;
import com.thinkaurelius.titan.diskstorage.configuration.ModifiableConfiguration;
import com.thinkaurelius.titan.graphdb.types.system.BaseVertexLabel;
import com.thinkaurelius.titan.hadoop.*;
import com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration;
import com.tinkerpop.blueprints.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import static com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration.OUTPUT_LOADER_SCRIPT_FILE;
import static com.tinkerpop.blueprints.Direction.IN;
import static com.tinkerpop.blueprints.Direction.OUT;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public class TitanGraphOutputMapReduce {
public enum Counters {
VERTICES_ADDED,
VERTICES_REMOVED,
VERTEX_PROPERTIES_ADDED,
VERTEX_PROPERTIES_REMOVED,
EDGES_ADDED,
EDGES_REMOVED,
EDGE_PROPERTIES_ADDED,
EDGE_PROPERTIES_REMOVED,
NULL_VERTEX_EDGES_IGNORED,
NULL_VERTICES_IGNORED,
NULL_RELATIONS_IGNORED,
SUCCESSFUL_TRANSACTIONS,
FAILED_TRANSACTIONS,
}
public static final Logger LOGGER = LoggerFactory.getLogger(TitanGraphOutputMapReduce.class);
// TODO move this out-of-band
// some random property that will 'never' be used by anyone
public static final String TITAN_ID = "_bId0192834";
public static final String ID_MAP_KEY = "_iDMaPKeY";
private static final String HADOOP_VERTEX = "hadoopVertex";
private static final String HADOOP_EDGE = "hadoopEdge";
private static final String TITAN_OUT_VERTEX = "titanOutVertex";
private static final String TITAN_IN_VERTEX = "titanInVertex";
private static final String GRAPH = "graph";
private static final String MAP_CONTEXT = "mapContext";
/*private static final String FAUNUS_VERTEX = "faunusVertex";
private static final String GRAPH = "graph";
private static final String MAP_CONTEXT = "mapContext"; */
public static TitanGraph generateGraph(final ModifiableHadoopConfiguration titanConf) {
final Class extends OutputFormat> format = titanConf.getClass(OUTPUT_FORMAT, OutputFormat.class, OutputFormat.class);
if (TitanOutputFormat.class.isAssignableFrom(format)) {
ModifiableConfiguration mc = titanConf.getOutputConf();
boolean present = mc.has(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE);
LOGGER.trace("Keyspace in_config=" + present + " value=" + mc.get(AbstractCassandraStoreManager.CASSANDRA_KEYSPACE));
return TitanFactory.open(mc);
} else {
throw new RuntimeException("The provide graph output format is not a supported TitanOutputFormat: " + format.getName());
}
}
//UTILITY METHODS
private static Object getValue(TitanRelation relation, TitanGraph graph) {
if (relation.isProperty()) return ((TitanProperty)relation).getValue();
else return graph.getVertex(((TitanEdge) relation).getVertex(IN).getLongId());
}
// WRITE ALL THE VERTICES AND THEIR PROPERTIES
public static class VertexMap extends Mapper> {
private TitanGraph graph;
private boolean trackState;
private ModifiableHadoopConfiguration faunusConf;
private LoaderScriptWrapper loaderScript;
private final Holder vertexHolder = new Holder();
private final LongWritable longWritable = new LongWritable();
@Override
public void setup(final Mapper.Context context) throws IOException, InterruptedException {
faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));
graph = TitanGraphOutputMapReduce.generateGraph(faunusConf);
trackState = context.getConfiguration().getBoolean(Tokens.TITAN_HADOOP_PIPELINE_TRACK_STATE, false);
// Check whether a script is defined in the config
if (faunusConf.has(OUTPUT_LOADER_SCRIPT_FILE)) {
Path scriptPath = new Path(faunusConf.get(OUTPUT_LOADER_SCRIPT_FILE));
FileSystem scriptFS = FileSystem.get(DEFAULT_COMPAT.getJobContextConfiguration(context));
loaderScript = new LoaderScriptWrapper(scriptFS, scriptPath);
}
}
@Override
public void map(final NullWritable key, final FaunusVertex value, final Mapper>.Context context) throws IOException, InterruptedException {
try {
final TitanVertex titanVertex = this.getCreateOrDeleteVertex(value, context);
if (null != titanVertex) { // the vertex was state != deleted (if it was we know incident edges are deleted too)
// Propagate shell vertices with Titan ids
final FaunusVertex shellVertex = new FaunusVertex(faunusConf, value.getLongId());
shellVertex.setProperty(TITAN_ID, titanVertex.getLongId());
for (final TitanEdge edge : value.query().direction(OUT).titanEdges()) {
if (!trackState || edge.isNew()) { //Only need to propagate ids for new edges
this.longWritable.set(edge.getVertex(IN).getLongId());
context.write(this.longWritable, this.vertexHolder.set('s', shellVertex));
}
}
this.longWritable.set(value.getLongId());
// value.getPropertiesWithState().clear(); // no longer needed in reduce phase
value.setProperty(TITAN_ID, titanVertex.getLongId()); // need this for id resolution in edge-map phase
// value.removeEdges(Tokens.Action.DROP, OUT); // no longer needed in reduce phase
context.write(this.longWritable, this.vertexHolder.set('v', value));
}
} catch (final Exception e) {
graph.rollback();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
throw new IOException(e.getMessage(), e);
}
}
@Override
public void cleanup(final Mapper>.Context context) throws IOException, InterruptedException {
try {
graph.commit();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.SUCCESSFUL_TRANSACTIONS, 1L);
} catch (Exception e) {
LOGGER.error("Could not commit transaction during Map.cleanup(): ", e);
graph.rollback();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
throw new IOException(e.getMessage(), e);
}
graph.shutdown();
}
public TitanVertex getCreateOrDeleteVertex(final FaunusVertex faunusVertex, final Mapper>.Context context) throws InterruptedException {
if (this.trackState && faunusVertex.isRemoved()) {
final Vertex titanVertex = graph.getVertex(faunusVertex.getLongId());
if (null == titanVertex)
DEFAULT_COMPAT.incrementContextCounter(context, Counters.NULL_VERTICES_IGNORED, 1L);
else {
titanVertex.remove();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTICES_REMOVED, 1L);
}
return null;
} else {
final TitanVertex titanVertex;
if (faunusVertex.isNew()) {
// Vertex is new to this faunus run, but might already exist in Titan
titanVertex = getTitanVertex(faunusVertex, context);
} else {
titanVertex = (TitanVertex) graph.getVertex(faunusVertex.getLongId());
if (titanVertex==null) {
DEFAULT_COMPAT.incrementContextCounter(context, Counters.NULL_VERTICES_IGNORED, 1L);
return null;
}
}
if (faunusVertex.isNew() || faunusVertex.isModified()) {
//Synchronize properties
for (final TitanProperty p : faunusVertex.query().queryAll().properties()) {
if (null != loaderScript && loaderScript.hasVPropMethod()) {
loaderScript.getVProp(p, titanVertex, graph, context);
} else {
getCreateOrDeleteRelation(graph, trackState, OUT, faunusVertex, titanVertex,
(StandardFaunusProperty) p, context);
}
}
}
return titanVertex;
}
}
private TitanVertex getTitanVertex(FaunusVertex faunusVertex, Mapper>.Context context) {
if (null != loaderScript && loaderScript.hasVertexMethod()) {
return loaderScript.getVertex(faunusVertex, graph, context);
} else {
VertexLabel titanLabel = BaseVertexLabel.DEFAULT_VERTEXLABEL;
FaunusVertexLabel faunusLabel = faunusVertex.getVertexLabel();
if (!faunusLabel.isDefault()) titanLabel = graph.getVertexLabel(faunusLabel.getName());
TitanVertex tv = graph.addVertexWithLabel(titanLabel);
DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTICES_ADDED, 1L);
return tv;
}
}
}
private static TitanRelation getCreateOrDeleteRelation(final TitanGraph graph, final boolean trackState, final Direction dir,
final FaunusVertex faunusVertex, final TitanVertex titanVertex,
final StandardFaunusRelation faunusRelation, final Mapper.Context context) {
assert dir==IN || dir==OUT;
final TitanRelation titanRelation;
if (trackState && (faunusRelation.isModified() || faunusRelation.isRemoved())) { //Modify existing
Map idMap = getIdMap(faunusVertex);
titanRelation = getIncidentRelation(graph, dir, titanVertex, faunusRelation,
faunusRelation.isEdge()?idMap.get(((FaunusEdge)faunusRelation).getVertexId(dir.opposite())):null);
if (null == titanRelation) {
DEFAULT_COMPAT.incrementContextCounter(context, Counters.NULL_RELATIONS_IGNORED, 1L);
return null;
} else if (faunusRelation.isRemoved()) {
titanRelation.remove();
DEFAULT_COMPAT.incrementContextCounter(context,
faunusRelation.isEdge() ? Counters.EDGES_REMOVED : Counters.VERTEX_PROPERTIES_REMOVED, 1L);
return null;
}
} else if (trackState && faunusRelation.isLoaded()) {
return null;
} else { //Create new
assert faunusRelation.isNew();
if (faunusRelation.isEdge()) {
StandardFaunusEdge faunusEdge = (StandardFaunusEdge)faunusRelation;
TitanVertex otherVertex = getOtherTitanVertex(faunusVertex, faunusEdge, dir.opposite(), graph);
if (dir==IN) {
titanRelation = otherVertex.addEdge(faunusEdge.getLabel(), titanVertex);
} else {
titanRelation = titanVertex.addEdge(faunusEdge.getLabel(), otherVertex);
}
DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGES_ADDED, 1L);
} else {
StandardFaunusProperty faunusProperty = (StandardFaunusProperty)faunusRelation;
assert dir==OUT;
titanRelation = titanVertex.addProperty(faunusProperty.getTypeName(),faunusProperty.getValue());
DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_PROPERTIES_ADDED, 1L);
}
}
synchronizeRelationProperties(graph, faunusRelation, titanRelation, context);
return titanRelation;
}
private static TitanRelation synchronizeRelationProperties(final TitanGraph graph,
final StandardFaunusRelation faunusRelation,
final TitanRelation titanRelation,
final Mapper.Context context) {
if (faunusRelation.isModified() || faunusRelation.isNew()) { //Synchronize incident properties + unidirected edges
for (TitanRelation faunusProp : faunusRelation.query().queryAll().relations()) {
if (faunusProp.isRemoved()) {
titanRelation.removeProperty(faunusProp.getType().getName());
DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_PROPERTIES_REMOVED, 1L);
}
}
for (TitanRelation faunusProp : faunusRelation.query().queryAll().relations()) {
if (faunusProp.isNew()) {
Object value;
if (faunusProp.isProperty()) {
value = ((FaunusProperty)faunusProp).getValue();
} else {
//TODO: ensure that the adjacent vertex has been previous assigned an id since ids don't propagate along unidirected edges
value = graph.getVertex(((FaunusEdge)faunusProp).getVertexId(IN));
}
titanRelation.setProperty(faunusProp.getType().getName(),value);
DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_PROPERTIES_ADDED, 1L);
}
}
}
return titanRelation;
}
private static TitanVertex getOtherTitanVertex(final FaunusVertex faunusVertex, final FaunusEdge faunusEdge, final Direction otherDir, final TitanGraph graph) {
Map idMap = getIdMap(faunusVertex);
Long othervertexid = faunusEdge.getVertexId(otherDir);
if (null != idMap && idMap.containsKey(othervertexid))
othervertexid = idMap.get(othervertexid);
TitanVertex otherVertex = (TitanVertex)graph.getVertex(othervertexid);
//TODO: check that other vertex has valid id assignment for unidirected edges
return otherVertex;
}
private static Map getIdMap(final FaunusVertex faunusVertex) {
Map idMap = faunusVertex.getProperty(ID_MAP_KEY);
if (null == idMap)
idMap = ImmutableMap.of();
return idMap;
}
private static TitanRelation getIncidentRelation(final TitanGraph graph, final Direction dir,
final TitanVertex titanVertex, final StandardFaunusRelation faunusRelation, Long otherTitanVertexId) {
TitanVertexQuery qb = titanVertex.query().direction(dir).types(graph.getRelationType(faunusRelation.getTypeName()));
if (faunusRelation.isEdge()) {
TitanVertex otherVertex;
if (otherTitanVertexId!=null) {
otherVertex = (TitanVertex)graph.getVertex(otherTitanVertexId);
} else {
StandardFaunusEdge edge = (StandardFaunusEdge)faunusRelation;
otherVertex = (TitanVertex) graph.getVertex(edge.getVertexId(dir.opposite()));
}
if (otherVertex!=null) qb.adjacent(otherVertex);
else return null;
}
// qb.has(ImplicitKey.TITANID.getName(), Cmp.EQUAL, faunusRelation.getLongId()); TODO: must check for multiplicity constraints
TitanRelation titanRelation = (TitanRelation)Iterables.getFirst(Iterables.filter(faunusRelation.isEdge()?qb.titanEdges():qb.properties(),new Predicate() {
@Override
public boolean apply(@Nullable TitanRelation rel) {
return rel.getLongId()==faunusRelation.getLongId();
}
}),null);
assert titanRelation==null || titanRelation.getLongId()==faunusRelation.getLongId();
return titanRelation;
}
//MAPS FAUNUS VERTEXIDs to TITAN VERTEXIDs
public static class Reduce extends Reducer, NullWritable, FaunusVertex> {
@Override
public void reduce(final LongWritable key, final Iterable> values, final Reducer, NullWritable, FaunusVertex>.Context context) throws IOException, InterruptedException {
FaunusVertex faunusVertex = null;
// generate a map of the Titan/Hadoop id with the Titan id for all shell vertices (vertices incoming adjacent)
final java.util.Map idMap = new HashMap();
for (final Holder holder : values) {
if (holder.getTag() == 's') {
idMap.put(holder.get().getLongId(), holder.get().getProperty(TITAN_ID));
} else {
faunusVertex = holder.get();
}
}
if (null != faunusVertex) {
faunusVertex.setProperty(ID_MAP_KEY, idMap);
context.write(NullWritable.get(), faunusVertex);
} else {
LOGGER.warn("No source vertex: hadoopVertex[" + key.get() + "]");
DEFAULT_COMPAT.incrementContextCounter(context, Counters.NULL_VERTICES_IGNORED, 1L);
}
}
}
// WRITE ALL THE EDGES CONNECTING THE VERTICES
public static class EdgeMap extends Mapper {
private TitanGraph graph;
private boolean trackState;
private ModifiableHadoopConfiguration faunusConf;
private LoaderScriptWrapper loaderScript;
@Override
public void setup(final Mapper.Context context) throws IOException, InterruptedException {
faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));
graph = TitanGraphOutputMapReduce.generateGraph(faunusConf);
trackState = context.getConfiguration().getBoolean(Tokens.TITAN_HADOOP_PIPELINE_TRACK_STATE, false);
// Check whether a script is defined in the config
if (faunusConf.has(OUTPUT_LOADER_SCRIPT_FILE)) {
Path scriptPath = new Path(faunusConf.get(OUTPUT_LOADER_SCRIPT_FILE));
FileSystem scriptFS = FileSystem.get(DEFAULT_COMPAT.getJobContextConfiguration(context));
loaderScript = new LoaderScriptWrapper(scriptFS, scriptPath);
}
}
@Override
public void map(final NullWritable key, final FaunusVertex value, final Mapper.Context context) throws IOException, InterruptedException {
try {
for (final TitanEdge edge : value.query().queryAll().direction(IN).titanEdges()) {
this.getCreateOrDeleteEdge(value, (StandardFaunusEdge)edge, context);
}
} catch (final Exception e) {
graph.rollback();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
throw new IOException(e.getMessage(), e);
}
}
@Override
public void cleanup(final Mapper.Context context) throws IOException, InterruptedException {
try {
graph.commit();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.SUCCESSFUL_TRANSACTIONS, 1L);
} catch (Exception e) {
LOGGER.error("Could not commit transaction during Reduce.cleanup(): ", e);
graph.rollback();
DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
throw new IOException(e.getMessage(), e);
}
graph.shutdown();
}
public TitanEdge getCreateOrDeleteEdge(final FaunusVertex faunusVertex, final StandardFaunusEdge faunusEdge, final Mapper.Context context) throws InterruptedException {
final Direction dir = IN;
final TitanVertex titanVertex = (TitanVertex) this.graph.getVertex(faunusVertex.getProperty(TITAN_ID));
if (null != loaderScript && loaderScript.hasEdgeMethod()) {
TitanEdge te = loaderScript.getEdge(faunusEdge, titanVertex, getOtherTitanVertex(faunusVertex, faunusEdge, dir.opposite(), graph), graph, context);
synchronizeRelationProperties(graph, faunusEdge, te, context);
return te;
} else {
return (TitanEdge) getCreateOrDeleteRelation(graph, trackState, dir, faunusVertex, titanVertex, faunusEdge, context);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy