All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.formats.util.LoaderScriptWrapper Maven / Gradle / Ivy

package com.thinkaurelius.titan.hadoop.formats.util;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.thinkaurelius.titan.core.TitanEdge;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanProperty;
import com.thinkaurelius.titan.core.TitanVertex;
import com.thinkaurelius.titan.hadoop.FaunusEdge;
import com.thinkaurelius.titan.hadoop.FaunusProperty;
import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.thinkaurelius.titan.util.system.IOUtils;
import com.tinkerpop.gremlin.groovy.jsr223.DefaultImportCustomizerProvider;
import com.tinkerpop.gremlin.groovy.jsr223.GremlinGroovyScriptEngine;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.script.Bindings;
import javax.script.CompiledScript;
import javax.script.ScriptException;
import javax.script.SimpleBindings;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.util.*;

import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;

/**
 * Encapsulates a user-provided Gremlin-Groovy incremental loading script.
 * Checks which methods the script provides (if any), compiles each method,
 * and prepares context variable bindings when executing the compiled
 * method(s).
 */
public class LoaderScriptWrapper {

    private static final Logger LOGGER =
            LoggerFactory.getLogger(LoaderScriptWrapper.class);

    private static final String TASK_IO_CONTEXT =
            "org.apache.hadoop.mapreduce.TaskInputOutputContext";

    private static final DefaultImportCustomizerProvider importCustomizer =
            new DefaultImportCustomizerProvider(
                    ImmutableSet.of( /* nonstatic */
                            FaunusVertex.class.getCanonicalName(),
                            FaunusEdge.class.getCanonicalName(),
                            FaunusProperty.class.getCanonicalName(),
                            TitanGraph.class.getCanonicalName(),
                            TitanVertex.class.getCanonicalName(),
                            TitanEdge.class.getCanonicalName(),
                            TitanProperty.class.getCanonicalName(),
                            Logger.class.getCanonicalName(),
                            TASK_IO_CONTEXT),
                    ImmutableSet.of() /* static */);

    public enum Counters {
        VERTEX_LOADER_SCRIPT_CALLS,
        VERTEX_LOADER_SCRIPT_EXCEPTIONS,
        VERTEX_LOADER_SCRIPT_RETURNS,
        EDGE_LOADER_SCRIPT_CALLS,
        EDGE_LOADER_SCRIPT_EXCEPTIONS,
        EDGE_LOADER_SCRIPT_RETURNS,
        VERTEX_PROP_LOADER_SCRIPT_CALLS,
        VERTEX_PROP_LOADER_SCRIPT_EXCEPTIONS,
        VERTEX_PROP_LOADER_SCRIPT_RETURNS,
    }

    static final String EDGE_METHOD_NAME = "getOrCreateEdge";
    static final String VERTEX_METHOD_NAME = "getOrCreateVertex";
    static final String VERTEX_PROP_METHOD_NAME = "getOrCreateVertexProperty";

    private final GremlinGroovyScriptEngine loaderEngine;
    private final CompiledScript vertexMethod;
    private final CompiledScript vpropMethod;
    private final CompiledScript edgeMethod;

    private static final ImmutableMap vertexArguments = ImmutableMap.of(
            "faunusVertex", FaunusVertex.class.getCanonicalName(),
            "graph", TitanGraph.class.getCanonicalName(),
            "context", TASK_IO_CONTEXT,
            "log", Logger.class.getCanonicalName()
    );

    private static final ImmutableMap vpropArguments = ImmutableMap.of(
            "titanProperty", TitanProperty.class.getCanonicalName(),
            "vertex", TitanVertex.class.getCanonicalName(),
            "graph", TitanGraph.class.getCanonicalName(),
            "context", TASK_IO_CONTEXT,
            "log", Logger.class.getCanonicalName()
    );

    private static final ImmutableMap edgeArguments;

    static {
        ImmutableMap.Builder b = ImmutableMap.builder();
        b.put("faunusEdge", FaunusEdge.class.getCanonicalName());
        b.put("inVertex", TitanVertex.class.getCanonicalName());
        b.put("outVertex", TitanVertex.class.getCanonicalName());
        b.put("graph", TitanGraph.class.getCanonicalName());
        b.put("context", TASK_IO_CONTEXT);
        b.put("log", Logger.class.getCanonicalName());
        edgeArguments = b.build();
    }

    public LoaderScriptWrapper(FileSystem fs, Path scriptPath) throws IOException {
        String scriptString = getScriptString(fs, scriptPath);

        loaderEngine = new GremlinGroovyScriptEngine(importCustomizer);
        vertexMethod = getVertexMethod(scriptString, loaderEngine);
        vpropMethod = getVPropMethod(scriptString, loaderEngine);
        edgeMethod = getEdgeMethod(scriptString, loaderEngine);

        /* Configuring an incremental loading script that contains no usable
         * methods probably indicates a syntax or compile error in the provided
         * script.  Throw an exception.
         */
        if (null == vertexMethod && null == vpropMethod && null == edgeMethod) {
            throw new RuntimeException("No methods could be compiled from the loader script " + scriptPath +
                    ".  See Slf4j log output for debugging info.");
        }
    }

    public boolean hasVertexMethod() {
        return null != vertexMethod;
    }

    public boolean hasVPropMethod() {
        return null != vpropMethod;
    }

    public boolean hasEdgeMethod() {
        return null != edgeMethod;
    }

    public TitanVertex getVertex(FaunusVertex faunusVertex, TitanGraph graph, Mapper.Context context) {
        Bindings bindings = new SimpleBindings();
        bindings.put("faunusVertex", faunusVertex);
        bindings.put("graph", graph);
        bindings.put("context", context);
        bindings.put("log", LOGGER);
        DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_LOADER_SCRIPT_CALLS, 1L);
        try {
            TitanVertex tv = (TitanVertex)vertexMethod.eval(bindings);
            LOGGER.debug("Compiled vertex loader script returned {}", tv);
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_LOADER_SCRIPT_RETURNS, 1L);
            return tv;
        } catch (ScriptException e) {
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_LOADER_SCRIPT_EXCEPTIONS, 1L);
            throw new RuntimeException(e);
        }
    }

    public void getVProp(TitanProperty titanProperty, TitanVertex vertex, TitanGraph graph, Mapper.Context context) {
        Bindings bindings = new SimpleBindings();
        bindings.put("titanProperty", titanProperty);
        bindings.put("vertex", vertex);
        bindings.put("graph", graph);
        bindings.put("context", context);
        bindings.put("log", LOGGER);
        DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_PROP_LOADER_SCRIPT_CALLS, 1L);
        try {
            vpropMethod.eval(bindings);
            LOGGER.debug("Compiled property loader method invoked");
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_PROP_LOADER_SCRIPT_RETURNS, 1L);
        } catch (ScriptException e) {
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.VERTEX_PROP_LOADER_SCRIPT_EXCEPTIONS, 1L);
            throw new RuntimeException(e);
        }
    }

    public TitanEdge getEdge(FaunusEdge faunusEdge, TitanVertex in, TitanVertex out, TitanGraph graph, Mapper.Context context) {
        Bindings bindings = new SimpleBindings();
        bindings.put("faunusEdge", faunusEdge);
        bindings.put("inVertex", in);
        bindings.put("outVertex", out);
        bindings.put("graph", graph);
        bindings.put("context", context);
        bindings.put("log", LOGGER);
        DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_LOADER_SCRIPT_CALLS, 1L);
        try {
            TitanEdge edge = (TitanEdge)edgeMethod.eval(bindings);
            LOGGER.debug("Compiled edge method returned {}", edge);
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_LOADER_SCRIPT_RETURNS, 1L);
            return edge;
        } catch (ScriptException e) {
            DEFAULT_COMPAT.incrementContextCounter(context, Counters.EDGE_LOADER_SCRIPT_EXCEPTIONS, 1L);
            throw new RuntimeException(e);
        }
    }

    private static String getScriptString(FileSystem fs, Path scriptPath) throws IOException {
        // Read the Path argument off the FileSystem argument into a string
        InputStreamReader isr = null;
        try {
            isr = new InputStreamReader(fs.open(scriptPath));
            StringWriter wr = new StringWriter();
            org.apache.commons.io.IOUtils.copy(isr, wr);
            return wr.toString();
        } finally {
            IOUtils.closeQuietly(isr);
        }
    }

    private static CompiledScript getVertexMethod(String script, GremlinGroovyScriptEngine loaderEngine) {
        return getMethod(script, loaderEngine, VERTEX_METHOD_NAME, vertexArguments);

    }

    private static CompiledScript getVPropMethod(String script, GremlinGroovyScriptEngine loaderEngine) {
        return getMethod(script, loaderEngine, VERTEX_PROP_METHOD_NAME, vpropArguments);

    }

    private static CompiledScript getEdgeMethod(String script, GremlinGroovyScriptEngine loaderEngine) {
        return getMethod(script, loaderEngine, EDGE_METHOD_NAME, edgeArguments);
    }

    private static CompiledScript getMethod(String script, GremlinGroovyScriptEngine loaderEngine, String methodName, Map args) {
        CompiledScript compiled = null;

        // metaString will contain the user's script, a newline, and then check whether
        // the method described by methodName and args is actually defined in the user's script
        StringBuilder metaString = new StringBuilder();
        metaString.append(script);

        // callString will contain the user's script, a newline, and then a call
        // to the method described by methodName and args
        StringBuilder callString = new StringBuilder();
        callString.append(metaString.toString());

        String argTypeString = Joiner.on(",").join(args.values());

        metaString.append("\n");
        metaString.append(String.format("metaClass.getMetaMethod('%s', %s) != null", methodName, argTypeString));

        String argNameString = Joiner.on(",").join(args.keySet());
        String invocation = String.format("%s(%s)", methodName, argNameString);
        callString.append("\n");
        callString.append(invocation);

        try {
            LOGGER.debug("Check script:\n{}", metaString.toString());
            LOGGER.debug("Loader script:\n{}", callString.toString());
            CompiledScript checkScript = loaderEngine.compile(metaString.toString());
            // Check whether the method was defined
            Boolean s = (Boolean)checkScript.eval();
            if (null != s && s) {
                // It is defined: compile a script that calls the method
                compiled = loaderEngine.compile(callString.toString());
            }
            LOGGER.info("Tested whether script contained method {}: {}/{}", invocation, s, compiled);
        } catch (RuntimeException e) {
            LOGGER.info("Custom loader script does not define {}", invocation, e);
        } catch (ScriptException e) {
            LOGGER.info("Custom loader script does not define {}", invocation, e);
        }

        return compiled;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy