Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.scripting.jruby;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.FuncSpec;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.scripting.ScriptEngine;
import org.apache.pig.tools.pigstats.PigStats;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBoolean;
import org.jruby.embed.LocalContextScope;
import org.jruby.embed.LocalVariableBehavior;
import org.jruby.embed.ScriptingContainer;
import org.jruby.javasupport.JavaEmbedUtils.EvalUnit;
import org.jruby.CompatVersion;
import com.google.common.collect.Maps;
/**
* Implementation of the script engine for Jruby, which facilitates the registration
* of scripts as UDFs, and also provides information (via the nested class RubyFunctions)
* on the registered functions.
*/
public class JrubyScriptEngine extends ScriptEngine {
private static final Log LOG = LogFactory.getLog(JrubyScriptEngine.class);
//TODO test if it is necessary to have a per script (or even per method) runtime. PRO: avoid collisions CON: a bunch of runtimes, which could be slow
protected static final ScriptingContainer rubyEngine;
private boolean isInitialized = false;
static {
rubyEngine = new ScriptingContainer(LocalContextScope.SINGLETHREAD, LocalVariableBehavior.PERSISTENT);
rubyEngine.setCompatVersion(CompatVersion.RUBY1_9);
}
/**
* This is a static class which provides functionality around the functions that are registered with Pig.
*/
static class RubyFunctions {
/**
* This cache maps function type to a map that maps path to a map of function name to the object
* which contains information about that function. In the case of an EvalFunc, there is a special
* function which encapsulates information about the function. In the case of an Accumulator or
* Algebraic, it is just an instance of the Class object that extends AccumulatorPigUdf or
* AlgebraicPigUdf, respectively.
*/
private static Map>> functionsCache = Maps.newHashMap();
private static Map alreadyRunCache = Maps.newHashMap();
private static Map cacheFunction = Maps.newHashMap();
static {
//TODO use an enum instead?
cacheFunction.put("evalfunc", "PigUdf.get_functions_to_register");
cacheFunction.put("accumulator", "AccumulatorPigUdf.classes_to_register");
cacheFunction.put("algebraic", "AlgebraicPigUdf.classes_to_register");
functionsCache.put("evalfunc", new HashMap>());
functionsCache.put("accumulator", new HashMap>());
functionsCache.put("algebraic", new HashMap>());
}
@SuppressWarnings("unchecked")
private static Map getFromCache(String path, Map> cacheToUpdate, String regCommand) {
Boolean runCheck = alreadyRunCache.get(path);
if (runCheck == null || !runCheck.booleanValue()) {
for (Map.Entry>> entry : functionsCache.entrySet())
entry.getValue().remove(path);
rubyEngine.runScriptlet(getScriptAsStream(path), path);
alreadyRunCache.put(path, true);
}
Map funcMap = cacheToUpdate.get(path);
if (funcMap == null) {
funcMap = (Map)rubyEngine.runScriptlet(regCommand);
cacheToUpdate.put(path, funcMap);
}
return funcMap;
}
public static Map getFunctions(String cache, String path) {
return getFromCache(path, functionsCache.get(cache), cacheFunction.get(cache));
}
}
/**
* Evaluates the script containing ruby udfs to determine what udfs are defined as well as
* what libaries and other external resources are necessary. These libraries and resources
* are then packaged with the job jar itself.
*/
@Override
public void registerFunctions(String path, String namespace, PigContext pigContext) throws IOException {
if (!isInitialized) {
pigContext.scriptJars.add(getJarPath(Ruby.class));
pigContext.addScriptFile("pigudf.rb", "pigudf.rb");
isInitialized = true;
}
for (Map.Entry entry : RubyFunctions.getFunctions("evalfunc", path).entrySet()) {
String method = entry.getKey();
String functionType = rubyEngine.callMethod(entry.getValue(), "name", String.class);
FuncSpec funcspec = new FuncSpec(JrubyEvalFunc.class.getCanonicalName() + "('" + path + "','" + method +"')");
pigContext.registerFunction(namespace + "." + method, funcspec);
}
for (Map.Entry entry : RubyFunctions.getFunctions("accumulator", path).entrySet()) {
String method = entry.getKey();
if (rubyEngine.callMethod(entry.getValue(), "check_if_necessary_methods_present", RubyBoolean.class).isFalse())
throw new RuntimeException("Method " + method + " does not have all of the required methods present!");
pigContext.registerFunction(namespace + "." + method, new FuncSpec(JrubyAccumulatorEvalFunc.class.getCanonicalName() + "('" + path + "','" + method +"')"));
}
for (Map.Entry entry : RubyFunctions.getFunctions("algebraic", path).entrySet()) {
String method = entry.getKey();
if (rubyEngine.callMethod(entry.getValue(), "check_if_necessary_methods_present", RubyBoolean.class).isFalse())
throw new RuntimeException("Method " + method + " does not have all of the required methods present!");
Schema schema = PigJrubyLibrary.rubyToPig(rubyEngine.callMethod(entry.getValue(), "get_output_schema", RubySchema.class));
String canonicalName = JrubyAlgebraicEvalFunc.class.getCanonicalName() + "$";
// In the case of an Algebraic UDF, a type specific EvalFunc is necessary (ie not EvalFunc