All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.mapreduce.sideeffect.LinkMapReduce Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
package com.thinkaurelius.titan.hadoop.mapreduce.sideeffect;

import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;

import com.thinkaurelius.titan.diskstorage.configuration.Configuration;
import com.thinkaurelius.titan.hadoop.*;
import com.thinkaurelius.titan.hadoop.StandardFaunusEdge;
import com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration;
import com.thinkaurelius.titan.hadoop.mapreduce.util.CounterMap;
import com.tinkerpop.blueprints.Direction;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;

import static com.tinkerpop.blueprints.Direction.IN;
import static com.tinkerpop.blueprints.Direction.OUT;

import static com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration.*;

/**
 * @author Marko A. Rodriguez (http://markorodriguez.com)
 */
public class LinkMapReduce {

//    public static final String DIRECTION = Tokens.makeNamespace(LinkMapReduce.class) + ".direction";
//    public static final String LABEL = Tokens.makeNamespace(LinkMapReduce.class) + ".label";
//    public static final String STEP = Tokens.makeNamespace(LinkMapReduce.class) + ".step";
//    public static final String MERGE_DUPLICATES = Tokens.makeNamespace(LinkMapReduce.class) + ".mergeDuplicates";
//    public static final String MERGE_WEIGHT_KEY = Tokens.makeNamespace(LinkMapReduce.class) + ".mergeWeightKey";

    public static final String NO_WEIGHT_KEY = "_";

    public enum Counters {
        IN_EDGES_CREATED,
        OUT_EDGES_CREATED
    }

    public static org.apache.hadoop.conf.Configuration createConfiguration(final Direction direction, final String label, final int step, final String mergeWeightKey) {
        ModifiableHadoopConfiguration c = ModifiableHadoopConfiguration.withoutResources();

        c.set(LINK_STEP, step);
        c.set(LINK_DIRECTION, direction);
        c.set(LINK_LABEL, label);

        if (null == mergeWeightKey) {
            c.set(LINK_MERGE_DUPLICATES, false);
            c.set(LINK_MERGE_WEIGHT_KEY, NO_WEIGHT_KEY);
        } else {
            c.set(LINK_MERGE_DUPLICATES, true);
            c.set(LINK_MERGE_WEIGHT_KEY, mergeWeightKey);
        }

        c.set(PIPELINE_TRACK_PATHS, true);

        return c.getHadoopConfiguration();
    }

    public static class Map extends Mapper {

        private Direction direction;
        private String label;
        private int step;
        private final Holder holder = new Holder();
        private final LongWritable longWritable = new LongWritable();
        private boolean mergeDuplicates;
        private String mergeWeightKey;
        private Configuration faunusConf;

        @Override
        public void setup(final Mapper.Context context) throws IOException, InterruptedException {
            faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));

            if (!faunusConf.get(PIPELINE_TRACK_PATHS))
                throw new IllegalStateException(LinkMapReduce.class.getSimpleName() + " requires that paths be enabled");

            step = faunusConf.get(LINK_STEP);
            direction = faunusConf.get(LINK_DIRECTION);
            label = faunusConf.get(LINK_LABEL);
            mergeDuplicates = faunusConf.get(LINK_MERGE_DUPLICATES);
            mergeWeightKey = faunusConf.get(LINK_MERGE_WEIGHT_KEY);
        }

        @Override
        public void map(final NullWritable key, final FaunusVertex value, final Mapper.Context context) throws IOException, InterruptedException {
            final long valueId = value.getLongId();

            if (value.hasPaths()) {
                long edgesCreated = 0;
                if (mergeDuplicates) {
                    final CounterMap map = new CounterMap();
                    for (final List path : value.getPaths()) {
                        map.incr(path.get(step).getId(), 1);
                    }
                    for (java.util.Map.Entry entry : map.entrySet()) {
                        final long linkElementId = entry.getKey();
                        final StandardFaunusEdge edge;
                        if (direction.equals(IN))
                            edge = new StandardFaunusEdge(faunusConf, linkElementId, valueId, label);
                        else
                            edge = new StandardFaunusEdge(faunusConf, valueId, linkElementId, label);

                        if (!mergeWeightKey.equals(NO_WEIGHT_KEY))
                            edge.setProperty(mergeWeightKey, entry.getValue());

                        value.addEdge(direction, edge);
                        edgesCreated++;
                        longWritable.set(linkElementId);
                        context.write(longWritable, holder.set('e', edge));
                    }
                } else {
                    for (final List path : value.getPaths()) {
                        final long linkElementId = path.get(step).getId();
                        final StandardFaunusEdge edge;
                        if (direction.equals(IN))
                            edge = new StandardFaunusEdge(faunusConf, linkElementId, valueId, label);
                        else
                            edge = new StandardFaunusEdge(faunusConf, valueId, linkElementId, label);

                        value.addEdge(direction, edge);
                        edgesCreated++;
                        longWritable.set(linkElementId);
                        context.write(longWritable, holder.set('e', edge));
                    }
                }
                if (direction.equals(OUT)) {
                    DEFAULT_COMPAT.incrementContextCounter(context, Counters.OUT_EDGES_CREATED, edgesCreated);
                } else {
                    DEFAULT_COMPAT.incrementContextCounter(context, Counters.IN_EDGES_CREATED, edgesCreated);
                }

            }

            longWritable.set(valueId);
            context.write(longWritable, holder.set('v', value));
        }
    }

    public static class Combiner extends Reducer {

        private Direction direction;
        private Configuration faunusConf;

        private static final Logger log =
                LoggerFactory.getLogger(Combiner.class);

        @Override
        public void setup(final Reducer.Context context) throws IOException, InterruptedException {
            faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));

            if (!faunusConf.has(LINK_DIRECTION)) {
                Iterator> it = context.getConfiguration().iterator();
                log.error("Broken configuration missing {}", LINK_DIRECTION);
                log.error("---- Start config dump ----");
                while (it.hasNext()) {
                    Entry ent = it.next();
                    log.error("k:{} -> v:{}", ent.getKey(), ent.getValue());
                }
                log.error("---- End config dump   ----");
                throw new NullPointerException();
            }
            direction = faunusConf.get(LINK_DIRECTION).opposite();
        }

        private final Holder holder = new Holder();

        @Override
        public void reduce(final LongWritable key, final Iterable values, final Reducer.Context context) throws IOException, InterruptedException {
            long edgesCreated = 0;
            final FaunusVertex vertex = new FaunusVertex(faunusConf, key.get());
            char outTag = 'x';
            for (final Holder holder : values) {
                final char tag = holder.getTag();
                if (tag == 'v') {
                    vertex.addAll((FaunusVertex) holder.get());
                    outTag = 'v';
                } else if (tag == 'e') {
                    vertex.addEdge(direction, (StandardFaunusEdge) holder.get());
                    edgesCreated++;
                } else {
                    vertex.addEdges(Direction.BOTH, (FaunusVertex) holder.get());
                }
            }

            context.write(key, holder.set(outTag, vertex));

            if (direction.equals(OUT)) {
                DEFAULT_COMPAT.incrementContextCounter(context, Counters.OUT_EDGES_CREATED, edgesCreated);
            } else {
                DEFAULT_COMPAT.incrementContextCounter(context, Counters.IN_EDGES_CREATED, edgesCreated);
            }
        }

    }

    public static class Reduce extends Reducer {

        private Direction direction;
        private Configuration faunusConf;

        @Override
        public void setup(final Reducer.Context context) throws IOException, InterruptedException {
            faunusConf = ModifiableHadoopConfiguration.of(DEFAULT_COMPAT.getContextConfiguration(context));
            direction = faunusConf.get(LINK_DIRECTION).opposite();
        }

        @Override
        public void reduce(final LongWritable key, final Iterable values, final Reducer.Context context) throws IOException, InterruptedException {
            long edgesCreated = 0;
            final FaunusVertex vertex = new FaunusVertex(faunusConf, key.get());
            for (final Holder holder : values) {
                final char tag = holder.getTag();
                if (tag == 'v') {
                    vertex.addAll((FaunusVertex) holder.get());
                } else if (tag == 'e') {
                    vertex.addEdge(direction, (StandardFaunusEdge) holder.get());
                    edgesCreated++;
                } else {
                    vertex.addEdges(Direction.BOTH, (FaunusVertex) holder.get());
                }
            }
            context.write(NullWritable.get(), vertex);

            if (direction.equals(OUT)) {
                DEFAULT_COMPAT.incrementContextCounter(context, Counters.OUT_EDGES_CREATED, edgesCreated);
            } else {
                DEFAULT_COMPAT.incrementContextCounter(context, Counters.IN_EDGES_CREATED, edgesCreated);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy