All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.formats.util.SchemaInferencerMapReduce Maven / Gradle / Ivy

package com.thinkaurelius.titan.hadoop.formats.util;

import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;

import com.thinkaurelius.titan.core.TitanTransaction;
import com.thinkaurelius.titan.core.VertexLabel;
import com.thinkaurelius.titan.core.schema.DefaultSchemaMaker;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.diskstorage.configuration.Configuration;
import com.thinkaurelius.titan.graphdb.blueprints.BlueprintsDefaultSchemaMaker;
import com.thinkaurelius.titan.graphdb.types.system.BaseVertexLabel;
import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration;
import com.thinkaurelius.titan.hadoop.mapreduce.util.EmptyConfiguration;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.security.token.Token;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;

/**
 * @author Marko A. Rodriguez (http://markorodriguez.com)
 */
public class SchemaInferencerMapReduce {

    public enum Counters {
        EDGE_LABELS_CREATED,
        VERTEX_LABELS_CREATED,
        PROPERTY_KEYS_CREATED
    }

    private static final long funnyLong = Long.MAX_VALUE; // TODO delete this, move it out-of-band
    private static final LongWritable funnyKey = new LongWritable(funnyLong);
    private static final Logger log = LoggerFactory.getLogger(SchemaInferencerMapReduce.class);

    public static class Map extends Mapper {

        private FaunusVertex funnyVertex;
        private Configuration faunusConf;
        private final LongWritable longWritable = new LongWritable();

        @Override
        public void setup(final Mapper.Context context) throws IOException, InterruptedException {
            faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));
            this.funnyVertex = new FaunusVertex(faunusConf, funnyLong);
        }

        @Override
        public void map(final NullWritable key, final FaunusVertex value, final Mapper.Context context) throws IOException, InterruptedException {
            //Vertex labels
            VertexLabel vl = value.getVertexLabel();
            if (vl!= BaseVertexLabel.DEFAULT_VERTEXLABEL) {
                this.funnyVertex.setProperty("v"+vl.getName(),String.class.getName());
            }

            //Vertex keys
            for (final String property : value.getPropertyKeys()) {
                this.funnyVertex.setProperty("k" + property, Object.class.getName());
                // TODO: Automated type inference
            }

            //Edge Labels
            for (final Edge edge : value.getEdges(Direction.OUT)) {
                this.funnyVertex.setProperty("l" + edge.getLabel(), String.class.getName());
                //Edge keys
                for (final String property : edge.getPropertyKeys()) {
                    this.funnyVertex.setProperty("k" + property, Object.class.getName());
                }
            }

            this.longWritable.set(value.getLongId());
            context.write(this.longWritable, value);
        }

        @Override
        public void cleanup(final Mapper.Context context) throws IOException, InterruptedException {
            context.write(funnyKey, this.funnyVertex);
        }
    }

    public static class Reduce extends org.apache.hadoop.mapreduce.Reducer {

        private TitanGraph graph;
        private ModifiableHadoopConfiguration faunusConf;
        private TitanTransaction tx;

        @Override
        public void setup(final Reduce.Context context) throws IOException, InterruptedException {
            faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));
            graph = TitanGraphOutputMapReduce.generateGraph(faunusConf);
            tx = graph.buildTransaction().disableBatchLoading().start();

            log.debug("Dumping configuration");
            for (java.util.Map.Entry ent : faunusConf.getHadoopConfiguration()) {
                log.debug("[SchemaInferencerMRConfig] {}={}", ent.getKey(), ent.getValue());
            }
            log.debug("Done dumping configuration");

            log.debug("Dumping credentials");
            for (Token token : context.getCredentials().getAllTokens()) {
                log.debug("[Credentials] kind={} ident={} token={}", token.getKind(), token.getIdentifier(), token);
            }
            log.debug("Done dumping credentials");
        }

        @Override
        public void reduce(final LongWritable key, final Iterable value, final Reducer.Context context) throws IOException, InterruptedException {
            if (key.get() == funnyLong) {
                final DefaultSchemaMaker typeMaker = BlueprintsDefaultSchemaMaker.INSTANCE;
                for (final FaunusVertex vertex : value) {
                    for (final String property : vertex.getPropertyKeys()) {
                        final char type = property.charAt(0);
                        final String typeName = property.substring(1);
                        if ( ((type=='k' || type=='l') && tx.getRelationType(typeName)!=null)
                                || (type=='v' && tx.containsVertexLabel(typeName))) continue;

                        if (type=='k') {
                            // TODO: Automated type inference
                            // typeMaker.makeKey(property2, tx.makeType().dataType(Class.forName(vertex.getProperty(property).toString())));
                            typeMaker.makePropertyKey(tx.makePropertyKey(typeName));
                            DEFAULT_COMPAT.incrementContextCounter(context, Counters.PROPERTY_KEYS_CREATED, 1L);
                        } else if (type=='l') {
                            //typeMaker.makeLabel(property2, tx.makeType());
                            typeMaker.makeEdgeLabel(tx.makeEdgeLabel(typeName));
                            DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_LABELS_CREATED, 1L);
                        } else if (type=='v') {
                            typeMaker.makeVertexLabel(tx.makeVertexLabel(typeName));
                            DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_LABELS_CREATED, 1L);

                        } else throw new IllegalArgumentException("Unexpected type: " + type);
                    }
                }
            } else {
                for (final FaunusVertex vertex : value) {
                    context.write(NullWritable.get(), vertex);
                }
            }
        }

        @Override
        public void cleanup(final Reducer.Context context) throws IOException, InterruptedException {
            tx.commit();
            graph.shutdown();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy