All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.TitanIndexRepairMapper Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
package com.thinkaurelius.titan.hadoop;

import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.base.Joiner;
import com.thinkaurelius.titan.hadoop.config.ModifiableHadoopConfiguration;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.thinkaurelius.titan.core.PropertyKey;
import com.thinkaurelius.titan.core.RelationType;
import com.thinkaurelius.titan.core.TitanEdge;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanProperty;
import com.thinkaurelius.titan.core.TitanRelation;
import com.thinkaurelius.titan.core.schema.RelationTypeIndex;
import com.thinkaurelius.titan.core.schema.SchemaAction;
import com.thinkaurelius.titan.core.schema.SchemaStatus;
import com.thinkaurelius.titan.core.schema.TitanGraphIndex;
import com.thinkaurelius.titan.core.schema.TitanIndex;
import com.thinkaurelius.titan.diskstorage.BackendTransaction;
import com.thinkaurelius.titan.diskstorage.Entry;
import com.thinkaurelius.titan.diskstorage.StaticBuffer;
import com.thinkaurelius.titan.diskstorage.configuration.BasicConfiguration;
import com.thinkaurelius.titan.diskstorage.indexing.IndexEntry;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.cache.KCVSCache;
import com.thinkaurelius.titan.graphdb.database.EdgeSerializer;
import com.thinkaurelius.titan.graphdb.database.IndexSerializer;
import com.thinkaurelius.titan.graphdb.database.StandardTitanGraph;
import com.thinkaurelius.titan.graphdb.database.management.ManagementSystem;
import com.thinkaurelius.titan.graphdb.database.management.RelationTypeIndexWrapper;
import com.thinkaurelius.titan.graphdb.internal.ElementLifeCycle;
import com.thinkaurelius.titan.graphdb.internal.InternalRelationType;
import com.thinkaurelius.titan.graphdb.internal.InternalVertex;
import com.thinkaurelius.titan.graphdb.relations.EdgeDirection;
import com.thinkaurelius.titan.graphdb.relations.StandardEdge;
import com.thinkaurelius.titan.graphdb.relations.StandardProperty;
import com.thinkaurelius.titan.graphdb.relations.StandardRelation;
import com.thinkaurelius.titan.graphdb.schema.SchemaContainer;
import com.thinkaurelius.titan.graphdb.transaction.StandardTitanTx;
import com.thinkaurelius.titan.graphdb.types.CompositeIndexType;
import com.thinkaurelius.titan.graphdb.types.IndexType;
import com.thinkaurelius.titan.graphdb.types.MixedIndexType;
import com.thinkaurelius.titan.graphdb.types.vertices.TitanSchemaVertex;
import com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration;
import com.tinkerpop.blueprints.Direction;

/**
 * Given (1) an InputFormat that iterates a Titan edgestore and converts each
 * row into a FaunusVertex and (2) the name of an already-defined index in
 * either the REGISTERED or ENABLED state, consider each FaunusVertex and
 * rebuild the named index accordingly. The index is written through a
 * TitanGraph instance. The KEYOUT and VALUEOUT type parameters are NullWritable
 * because this Mapper produces no conventional SequenceFile output. There's
 * nothing to combine or reduce since the writes go through TitanGraph and its
 * non-Hadoop backend interface.
 */
public class TitanIndexRepairMapper extends Mapper {

    private static final Logger log =
            LoggerFactory.getLogger(TitanIndexRepairMapper.class);

    private StandardTitanGraph graph;
    private ManagementSystem mgmt;
    private String indexName;
    private String indexType;
    private TitanIndex index;
    private RelationType indexRelationType;

    public enum Counters {
        SUCCESSFUL_TRANSACTIONS,
        FAILED_TRANSACTIONS,
        SUCCESSFUL_GRAPH_SHUTDOWNS,
        FAILED_GRAPH_SHUTDOWNS,
    }

    @Override
    public void setup(
            final Mapper.Context context) throws IOException {
        Configuration hadoopConf = DEFAULT_COMPAT.getContextConfiguration(context);
        ModifiableHadoopConfiguration faunusConf = ModifiableHadoopConfiguration.of(hadoopConf);
        BasicConfiguration titanConf = faunusConf.getOutputConf();
        indexName = faunusConf.get(TitanHadoopConfiguration.INDEX_NAME);
        indexType = faunusConf.get(TitanHadoopConfiguration.INDEX_TYPE);

        try {
            Preconditions.checkNotNull(indexName, "Need to provide at least an index name for re-index job");
            log.info("Read index information: name={} type={}", indexName, indexType);
            graph = (StandardTitanGraph)TitanFactory.open(titanConf);
            SchemaContainer schema = new SchemaContainer(graph);
            FaunusSchemaManager typeManager = FaunusSchemaManager.getTypeManager(titanConf);
            typeManager.setSchemaProvider(schema);
            log.info("Opened graph {}", graph);
            mgmt = (ManagementSystem) graph.getManagementSystem();
            validateIndexStatus();
        } catch (final Exception e) {
            if (null != mgmt && mgmt.isOpen())
                mgmt.rollback();
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
            throw new IOException(e.getMessage(), e);
        }
    }

    @Override
    public void cleanup(final Mapper.Context context) {
        try {
            if (null != mgmt && mgmt.isOpen())
                mgmt.commit();
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.SUCCESSFUL_TRANSACTIONS, 1L);
        } catch (RuntimeException e) {
            log.error("Transaction commit threw runtime exception:", e);
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
            throw e;
        }

        try {
            if (null != graph && graph.isOpen())
                graph.shutdown();
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.SUCCESSFUL_GRAPH_SHUTDOWNS, 1L);
        } catch (RuntimeException e) {
            log.error("Graph shutdown threw runtime exception:", e);
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_GRAPH_SHUTDOWNS, 1L);
            throw e;
        }
    }

    private StandardRelation getTitanRelation(StandardFaunusRelation faunusRelation, StandardTitanTx tx) {
        StandardRelation titanRelation;
        if (faunusRelation.isEdge()) {
            StandardFaunusEdge faunusEdge = (StandardFaunusEdge)faunusRelation;
            InternalVertex start = tx.getInternalVertex(faunusEdge.getVertexId(Direction.OUT)),
                    end = tx.getInternalVertex(faunusEdge.getVertexId(Direction.IN));
            if (start==null || end==null) return null;
            titanRelation = new StandardEdge(faunusRelation.getLongId(),tx.getOrCreateEdgeLabel(indexType),start,end, ElementLifeCycle.Loaded);
        } else {
            assert faunusRelation.isProperty();
            StandardFaunusProperty faunusProperty = (StandardFaunusProperty)faunusRelation;
            InternalVertex v = tx.getInternalVertex(faunusProperty.getVertex().getLongId());
            if (v==null) return null;
            titanRelation = new StandardProperty(faunusProperty.getLongId(),tx.getOrCreatePropertyKey(indexType),v,faunusProperty.getValue(), ElementLifeCycle.Loaded);
        }
        //Add properties
        for (TitanRelation rel : faunusRelation.query().relations()) {
            Object value;
            if (rel.isProperty()) value = ((FaunusProperty)rel).getValue();
            else value = tx.getInternalVertex(((FaunusEdge) rel).getVertexId(Direction.IN));
            if (value!=null) titanRelation.setProperty(rel.getType().getName(),value);
        }
        return titanRelation;
    }

    @Override
    public void map(
            final NullWritable key, // ignored
            final FaunusVertex faunusVertex,
            final Mapper.Context context) throws IOException {
        try {
            StandardTitanTx tx = mgmt.getWrappedTx();
            BackendTransaction mutator = tx.getTxHandle();
            if (index instanceof RelationTypeIndex) {
                RelationTypeIndexWrapper wrapper = (RelationTypeIndexWrapper)index;
                InternalRelationType wrappedType = wrapper.getWrappedType();
                EdgeSerializer edgeSerializer = ((StandardTitanGraph)graph).getEdgeSerializer();
                List additions = new ArrayList();

                for (TitanRelation faunusRelation : faunusVertex.query().relations()) {
                    if (!faunusRelation.getType().getName().equals(indexType) ||
                            faunusRelation.getDirection(faunusVertex)!=Direction.OUT) continue; //Isolate relevant relations and only outgoing ones
                    StandardRelation titanRelation = getTitanRelation((StandardFaunusRelation)faunusRelation,tx);
                    for (int pos = 0; pos < titanRelation.getArity(); pos++) {
                        if (!wrappedType.isUnidirected(Direction.BOTH) && !wrappedType.isUnidirected(EdgeDirection.fromPosition(pos)))
                            continue; //Directionality is not covered
                        Entry entry = edgeSerializer.writeRelation(titanRelation, wrappedType, pos, tx);
                        additions.add(entry);
                    }
                }
                StaticBuffer vertexKey = graph.getIDManager().getKey(faunusVertex.getLongId());
                mutator.mutateEdges(vertexKey, additions, KCVSCache.NO_DELETIONS);
            } else if (index instanceof TitanGraphIndex) {
                IndexType indexType = mgmt.getSchemaVertex(index).asIndexType();
                assert indexType!=null;
                IndexSerializer indexSerializer = graph.getIndexSerializer();
                //Gather elements to index
                List elements;
                switch (indexType.getElement()) {
                    case VERTEX:
                        elements = ImmutableList.of((FaunusElement)faunusVertex);
                        break;
                    case PROPERTY:
                        elements = Lists.newArrayList();
                        for (TitanProperty p : faunusVertex.query().properties()) {
                            elements.add((StandardFaunusProperty)p);
                        }
                        break;
                    case EDGE:
                        elements = Lists.newArrayList();
                        for (TitanEdge e : faunusVertex.query().titanEdges()) {
                            elements.add((StandardFaunusEdge)e);
                        }
                        break;
                    default: throw new AssertionError("Unexpected category: " + indexType.getElement());
                }
                if (indexType.isCompositeIndex()) {
                    for (FaunusElement element : elements) {
                        Set> updates =
                            indexSerializer.reindexElement(element, (CompositeIndexType) indexType);
                        for (IndexSerializer.IndexUpdate update : updates) {
                            log.debug("Mutating index {}: {}", indexType, update.getEntry());
                            mutator.mutateIndex(update.getKey(), Lists.newArrayList(update.getEntry()), KCVSCache.NO_DELETIONS);
                        }
                    }
                } else {
                    assert indexType.isMixedIndex();
                    Map>> documentsPerStore = Maps.newHashMap();
                    for (FaunusElement element : elements) {
                        indexSerializer.reindexElement(element, (MixedIndexType) indexType, documentsPerStore);
                    }
                    mutator.getIndexTransaction(indexType.getBackingIndexName()).restore(documentsPerStore);
                }

            } else throw new UnsupportedOperationException("Unsupported index found: "+index);

//            log.info("Committing mutator {} in mapper {}", mutator, this);
//            mutator.commit();
        } catch (final Exception e) {
            mgmt.rollback();
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.FAILED_TRANSACTIONS, 1L);
            throw new IOException(e.getMessage(), e);
        }
    }

    /**
     * Check that our target index is in either the ENABLED or REGISTERED state.
     */
    private void validateIndexStatus() {
        if (indexType==null || StringUtils.isBlank(indexType)) {
            index = mgmt.getGraphIndex(indexName);
        } else {
            indexRelationType = mgmt.getRelationType(indexType);
            Preconditions.checkArgument(indexRelationType!=null,"Could not find relation type: %s",indexType);
            index = mgmt.getRelationIndex(indexRelationType,indexName);
        }
        Preconditions.checkArgument(index!=null,"Could not find index: %s",indexName);
        log.info("Found index {}", indexName);
        TitanSchemaVertex schemaVertex = mgmt.getSchemaVertex(index);
        Set acceptableStatuses = SchemaAction.REINDEX.getApplicableStatus();
        boolean isValidIndex = true;
        String invalidIndexHint;
        if (index instanceof RelationTypeIndex ||
                (index instanceof TitanGraphIndex && ((TitanGraphIndex)index).isCompositeIndex()) ) {
            SchemaStatus actualStatus = schemaVertex.getStatus();
            isValidIndex = acceptableStatuses.contains(actualStatus);
            invalidIndexHint = String.format(
                    "The index has status %s, but one of %s is required",
                    actualStatus, acceptableStatuses);
        } else {
            Preconditions.checkArgument(index instanceof TitanGraphIndex,"Unexpected index: %s",index);
            TitanGraphIndex gindex = (TitanGraphIndex)index;
            Preconditions.checkArgument(gindex.isMixedIndex());
            Map invalidKeyStatuses = Maps.newHashMap();
            for (PropertyKey key : gindex.getFieldKeys()) {
                SchemaStatus status = gindex.getIndexStatus(key);
                if (status!=SchemaStatus.DISABLED && !acceptableStatuses.contains(status)) {
                    isValidIndex=false;
                    invalidKeyStatuses.put(key.getName(), status);
                    log.warn("Index {} has key {} in an invalid status {}",index,key,status);
                }
            }
            invalidIndexHint = String.format(
                    "The following index keys have invalid status: %s (status must be one of %s)",
                    Joiner.on(",").withKeyValueSeparator(" has status ").join(invalidKeyStatuses), acceptableStatuses);
        }
        Preconditions.checkArgument(isValidIndex, "The index %s is in an invalid state and cannot be indexed. %s", indexName, invalidIndexHint);
        // TODO consider retrieving the current Job object and calling killJob() if !isValidIndex -- would be more efficient than throwing an exception on the first pair processed by each mapper
        log.debug("Index {} is valid for re-indexing");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy