org.apache.sysml.api.DMLScript Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.api;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.sysml.api.mlcontext.ScriptType;
import org.apache.sysml.conf.CompilerConfig;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.debug.DMLDebugger;
import org.apache.sysml.debug.DMLDebuggerException;
import org.apache.sysml.debug.DMLDebuggerProgramInfo;
import org.apache.sysml.hops.HopsException;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel;
import org.apache.sysml.hops.codegen.SpoofCompiler;
import org.apache.sysml.hops.codegen.SpoofCompiler.IntegrationType;
import org.apache.sysml.hops.codegen.SpoofCompiler.PlanCachePolicy;
import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.LanguageException;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.parser.ParserFactory;
import org.apache.sysml.parser.ParserWrapper;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLScriptException;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.matrix.CleanupMR;
import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.utils.Explain;
import org.apache.sysml.utils.Explain.ExplainCounts;
import org.apache.sysml.utils.Explain.ExplainType;
import org.apache.sysml.utils.GPUStatistics;
import org.apache.sysml.utils.Statistics;
import org.apache.sysml.yarn.DMLAppMasterUtils;
import org.apache.sysml.yarn.DMLYarnClientProxy;
public class DMLScript
{
public enum RUNTIME_PLATFORM {
HADOOP, // execute all matrix operations in MR
SINGLE_NODE, // execute all matrix operations in CP
HYBRID, // execute matrix operations in CP or MR
HYBRID_SPARK, // execute matrix operations in CP or Spark
SPARK // execute matrix operations in Spark
}
public static RUNTIME_PLATFORM rtplatform = OptimizerUtils.getDefaultExecutionMode();
public static boolean STATISTICS = false; //default statistics
public static int STATISTICS_COUNT = 10; //default statistics maximum heavy hitter count
public static boolean ENABLE_DEBUG_MODE = false; //default debug mode
public static boolean USE_LOCAL_SPARK_CONFIG = false; //set default local spark configuration - used for local testing
public static String DML_FILE_PATH_ANTLR_PARSER = null;
public static ExplainType EXPLAIN = ExplainType.NONE; //default explain
/**
* Global variable indicating the script type (DML or PYDML). Can be used
* for DML/PYDML-specific tasks, such as outputting booleans in the correct
* case (TRUE/FALSE for DML and True/False for PYDML).
*/
public static ScriptType SCRIPT_TYPE = ScriptType.DML;
public static boolean USE_ACCELERATOR = false;
public static boolean FORCE_ACCELERATOR = false;
// flag that indicates whether or not to suppress any prints to stdout
public static boolean _suppressPrint2Stdout = false;
public static String _uuid = IDHandler.createDistributedUniqueID();
public static boolean _activeAM = false;
private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
public static String USAGE =
"Usage is " + DMLScript.class.getCanonicalName() + " -f "
//+ " (-exec )?" + " (-explain )?" + " (-stats)?" + " (-clean)?" + " (-config=)?
+ " [-options] ([-args | -nvargs] )? \n"
+ " -f: will be interpreted as a filename path (if is prefixed\n"
+ " with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
//undocumented feature in beta 08/2014 release
//+ " -s: will be interpreted as a DML script string \n"
+ " -python: (optional) parses Python-like DML\n"
+ " -debug: (optional) run in debug mode\n"
+ " -gpu: (optional) use acceleration whenever possible. Current version only supports CUDA.\n"
+ " Supported for this mode is force=(true|false)\n"
// Later add optional flags to indicate optimizations turned on or off. Currently they are turned off.
//+ " -debug: (optional) run in debug mode\n"
//+ " Optional that is supported for this mode is optimize=(on|off)\n"
+ " -exec: (optional) execution mode (hadoop, singlenode, [hybrid], hybrid_spark)\n"
+ " -explain: (optional) explain plan (hops, [runtime], recompile_hops, recompile_runtime)\n"
+ " -stats: (optional) monitor and report caching/recompilation statistics, default heavy hitter count is 10\n"
+ " -clean: (optional) cleanup all SystemML working directories (FS, DFS).\n"
+ " All other flags are ignored in this mode. \n"
+ " -config: (optional) use config file (default: use parameter\n"
+ " values in default SystemML-config.xml config file; if is\n"
+ " prefixed with hdfs or gpfs it is read from DFS, otherwise from local file system)\n"
+ " -args: (optional) parameterize DML script with contents of [args list], ALL args\n"
+ " after -args flag, each argument must be an unnamed-argument, where 1st value\n"
+ " after -args will replace $1 in DML script, 2nd value will replace $2, etc.\n"
+ " -nvargs: (optional) parameterize DML script with contents of [args list], ALL args\n"
+ " after -nvargs flag, each argument must be be named-argument of form argName=argValue,\n"
+ " where value will replace $argName in DML script, argName must be a valid DML variable\n"
+ " name (start with letter, contain only letters, numbers, or underscores).\n"
+ " : (optional) args to DML script \n"
+ " -? | -help: (optional) show this help message \n";
///////////////////////////////
// public external interface
////////
public static String getUUID() {
return _uuid;
}
/**
* Used to set master UUID on all nodes (in parfor remote_mr, where DMLScript passed)
* in order to simplify cleanup of scratch_space and local working dirs.
*
* @param uuid master UUID to set on all nodes
*/
public static void setUUID(String uuid)
{
_uuid = uuid;
}
public static boolean suppressPrint2Stdout() {
return _suppressPrint2Stdout;
}
public static void setActiveAM(){
_activeAM = true;
}
public static boolean isActiveAM(){
return _activeAM;
}
/**
* Default DML script invocation (e.g., via 'hadoop jar SystemML.jar -f Test.dml')
*
* @param args command-line arguments
* @throws IOException if an IOException occurs
* @throws DMLException if a DMLException occurs
*/
public static void main(String[] args)
throws IOException, DMLException
{
Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
try {
DMLScript.executeScript(conf, otherArgs);
} catch (ParseException pe) {
System.err.println(pe.getMessage());
} catch (DMLScriptException e){
// In case of DMLScriptException, simply print the error message.
System.err.println(e.getMessage());
}
}
/**
* Single entry point for all public invocation alternatives (e.g.,
* main, executeScript, JaqlUdf etc)
*
* @param conf Hadoop configuration
* @param args arguments
* @return true if success, false otherwise
* @throws DMLException if DMLException occurs
* @throws ParseException if ParseException occurs
*/
public static boolean executeScript( Configuration conf, String[] args )
throws DMLException
{
//Step 1: parse arguments
//check for help
if( args.length==0 || (args.length==1 && (args[0].equalsIgnoreCase("-help")|| args[0].equalsIgnoreCase("-?"))) ){
System.err.println( USAGE );
return true;
}
//check for clean
else if( args.length==1 && args[0].equalsIgnoreCase("-clean") ){
cleanSystemMLWorkspace();
return true;
}
//check number of args - print usage if incorrect
if( args.length < 2 ){
System.err.println( "ERROR: Unrecognized invocation arguments." );
System.err.println( USAGE );
return false;
}
//check script arg - print usage if incorrect
if (!(args[0].equals("-f") || args[0].equals("-s"))){
System.err.println("ERROR: First argument must be either -f or -s");
System.err.println( USAGE );
return false;
}
//parse arguments and set execution properties
RUNTIME_PLATFORM oldrtplatform = rtplatform; //keep old rtplatform
ExplainType oldexplain = EXPLAIN; //keep old explain
// Reset global flags to avoid errors in test suite
ENABLE_DEBUG_MODE = false;
boolean parsePyDML = false;
try
{
String fnameOptConfig = null; //optional config filename
String[] scriptArgs = null; //optional script arguments
boolean namedScriptArgs = false;
for( int i=2; i (i+1) && !args[i+1].startsWith("-") )
EXPLAIN = Explain.parseExplainType(args[++i]);
}
else if( args[i].equalsIgnoreCase("-stats") ) {
STATISTICS = true;
if (args.length > (i + 1) && !args[i + 1].startsWith("-"))
STATISTICS_COUNT = Integer.parseInt(args[++i]);
}
else if ( args[i].equalsIgnoreCase("-exec")) {
rtplatform = parseRuntimePlatform(args[++i]);
if( rtplatform==null )
return false;
}
else if (args[i].startsWith("-config=")) // legacy
fnameOptConfig = args[i].substring(8).replaceAll("\"", "");
else if (args[i].equalsIgnoreCase("-config"))
fnameOptConfig = args[++i];
else if( args[i].equalsIgnoreCase("-debug") ) {
ENABLE_DEBUG_MODE = true;
}
else if( args[i].equalsIgnoreCase("-gpu") ) {
USE_ACCELERATOR = true;
if( args.length > (i+1) && !args[i+1].startsWith("-") ) {
String flag = args[++i];
if(flag.startsWith("force=")) {
String [] flagOptions = flag.split("=");
if(flagOptions.length == 2)
FORCE_ACCELERATOR = Boolean.parseBoolean(flagOptions[1]);
else
throw new DMLRuntimeException("Unsupported \"force\" option for -gpu:" + flag);
}
else {
throw new DMLRuntimeException("Unsupported flag for -gpu:" + flag);
}
}
GPUContext.getGPUContext(); // creates the singleton GPU context object. Return value ignored.
}
else if( args[i].equalsIgnoreCase("-python") ) {
parsePyDML = true;
}
else if (args[i].startsWith("-args") || args[i].startsWith("-nvargs")) {
namedScriptArgs = args[i].startsWith("-nvargs"); i++;
scriptArgs = new String[args.length - i];
System.arraycopy(args, i, scriptArgs, 0, scriptArgs.length);
break;
}
else{
System.err.println("ERROR: Unknown argument: " + args[i]);
return false;
}
}
//set log level
if (!ENABLE_DEBUG_MODE)
setLoggingProperties( conf );
//Step 2: prepare script invocation
if (StringUtils.endsWithIgnoreCase(args[1], ".pydml")) {
parsePyDML = true;
}
String dmlScriptStr = readDMLScript(args[0], args[1]);
Map argVals = createArgumentsMap(namedScriptArgs, scriptArgs);
DML_FILE_PATH_ANTLR_PARSER = args[1];
//Step 3: invoke dml script
printInvocationInfo(args[1], fnameOptConfig, argVals);
if (ENABLE_DEBUG_MODE) {
// inner try loop is just to isolate the debug exception, which will allow to manage the bugs from debugger v/s runtime
launchDebugger(dmlScriptStr, fnameOptConfig, argVals, parsePyDML);
}
else {
execute(dmlScriptStr, fnameOptConfig, argVals, args, parsePyDML);
}
}
catch (ParseException pe) {
throw pe;
}
catch (DMLScriptException e) {
//rethrow DMLScriptException to propagate stop call
throw e;
}
catch(Exception ex)
{
LOG.error("Failed to execute DML script.", ex);
throw new DMLException(ex);
}
finally
{
//reset runtime platform and visualize flag
rtplatform = oldrtplatform;
EXPLAIN = oldexplain;
}
return true;
}
///////////////////////////////
// private internal utils (argument parsing)
////////
protected static Map createArgumentsMap(boolean hasNamedArgs, String[] args)
throws LanguageException
{
Map argMap = new HashMap();
if (args == null)
return argMap;
for(int i=1; i<=args.length; i++)
{
String arg = args[i-1];
if (arg.equalsIgnoreCase("-l") || arg.equalsIgnoreCase("-log") ||
arg.equalsIgnoreCase("-v") || arg.equalsIgnoreCase("-visualize")||
arg.equalsIgnoreCase("-explain") ||
arg.equalsIgnoreCase("-debug") ||
arg.equalsIgnoreCase("-stats") ||
arg.equalsIgnoreCase("-exec") ||
arg.equalsIgnoreCase("-debug") ||
arg.startsWith("-config="))
{
throw new LanguageException("-args or -nvargs must be the final argument for DMLScript!");
}
//parse arguments (named args / args by position)
if(hasNamedArgs)
{
// CASE: named argument argName=argValue -- must add pair to _argVals
String[] argPieces = arg.split("=");
if(argPieces.length < 2)
throw new LanguageException("for -nvargs option, elements in arg list must be named and have form argName=argValue");
String argName = argPieces[0];
StringBuilder sb = new StringBuilder();
for (int jj=1; jj < argPieces.length; jj++){
sb.append(argPieces[jj]);
}
String varNameRegex = "^[a-zA-Z]([a-zA-Z0-9_])*$";
if (!argName.matches(varNameRegex))
throw new LanguageException("argName " + argName + " must be a valid variable name in DML. Valid variable names in DML start with upper-case or lower-case letter, and contain only letters, digits, or underscores");
argMap.put("$"+argName,sb.toString());
}
else
{
// CASE: unnamed argument -- use position in arg list for name
argMap.put("$"+i ,arg);
}
}
return argMap;
}
protected static String readDMLScript( String argname, String script )
throws IOException, LanguageException
{
boolean fromFile = argname.equals("-f");
String dmlScriptStr;
if( fromFile )
{
//read DML script from file
if(script == null)
throw new LanguageException("DML script path was not specified!");
StringBuilder sb = new StringBuilder();
BufferedReader in = null;
try
{
//read from hdfs or gpfs file system
if( script.startsWith("hdfs:")
|| script.startsWith("gpfs:") )
{
if( !LocalFileUtils.validateExternalFilename(script, true) )
throw new LanguageException("Invalid (non-trustworthy) hdfs filename.");
FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
Path scriptPath = new Path(script);
in = new BufferedReader(new InputStreamReader(fs.open(scriptPath)));
}
// from local file system
else
{
if( !LocalFileUtils.validateExternalFilename(script, false) )
throw new LanguageException("Invalid (non-trustworthy) local filename.");
in = new BufferedReader(new FileReader(script));
}
//core script reading
String tmp = null;
while ((tmp = in.readLine()) != null)
{
sb.append( tmp );
sb.append( "\n" );
}
}
catch (IOException ex)
{
LOG.error("Failed to read the script from the file system", ex);
throw ex;
}
finally {
IOUtilFunctions.closeSilently(in);
}
dmlScriptStr = sb.toString();
}
else
{
//parse given script string
if(script == null)
throw new LanguageException("DML script was not specified!");
InputStream is = new ByteArrayInputStream(script.getBytes());
Scanner scan = new Scanner(is);
dmlScriptStr = scan.useDelimiter("\\A").next();
scan.close();
}
return dmlScriptStr;
}
private static RUNTIME_PLATFORM parseRuntimePlatform( String platform )
{
RUNTIME_PLATFORM lrtplatform = null;
if ( platform.equalsIgnoreCase("hadoop"))
lrtplatform = RUNTIME_PLATFORM.HADOOP;
else if ( platform.equalsIgnoreCase("singlenode"))
lrtplatform = RUNTIME_PLATFORM.SINGLE_NODE;
else if ( platform.equalsIgnoreCase("hybrid"))
lrtplatform = RUNTIME_PLATFORM.HYBRID;
else if ( platform.equalsIgnoreCase("spark"))
lrtplatform = RUNTIME_PLATFORM.SPARK;
else if ( platform.equalsIgnoreCase("hybrid_spark"))
lrtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
else
System.err.println("ERROR: Unknown runtime platform: " + platform);
return lrtplatform;
}
private static void setLoggingProperties( Configuration conf )
{
String debug = conf.get("systemml.logging");
if (debug == null)
debug = System.getProperty("systemml.logging");
if (debug != null){
if (debug.equalsIgnoreCase("debug")){
Logger.getLogger("org.apache.sysml").setLevel((Level) Level.DEBUG);
}
else if (debug.equalsIgnoreCase("trace")){
Logger.getLogger("org.apache.sysml").setLevel((Level) Level.TRACE);
}
}
}
///////////////////////////////
// private internal interface
// (core compilation and execute)
////////
/**
* The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
* and customized parameters have been put into _argVals
*
* @param dmlScriptStr DML script string
* @param fnameOptConfig configuration file
* @param argVals map of argument values
* @param allArgs arguments
* @param parsePyDML true if PYDML, false if DML
* @throws ParseException if ParseException occurs
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws LanguageException if LanguageException occurs
* @throws HopsException if HopsException occurs
* @throws LopsException if LopsException occurs
*/
private static void execute(String dmlScriptStr, String fnameOptConfig, Map argVals, String[] allArgs, boolean parsePyDML)
throws ParseException, IOException, DMLRuntimeException, LanguageException, HopsException, LopsException
{
SCRIPT_TYPE = parsePyDML ? ScriptType.PYDML : ScriptType.DML;
//print basic time and environment info
printStartExecInfo( dmlScriptStr );
//Step 1: parse configuration files
DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig);
ConfigurationManager.setGlobalConfig(dmlconf);
CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
ConfigurationManager.setGlobalConfig(cconf);
LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());
//Step 2: set local/remote memory if requested (for compile in AM context)
if( dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER) ){
DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf);
}
//Step 3: parse dml script
Statistics.startCompileTimer();
ParserWrapper parser = ParserFactory.createParser(parsePyDML);
DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);
//Step 4: construct HOP DAGs (incl LVA, validate, and setup)
DMLTranslator dmlt = new DMLTranslator(prog);
dmlt.liveVariableAnalysis(prog);
dmlt.validateParseTree(prog);
dmlt.constructHops(prog);
//init working directories (before usage by following compilation steps)
initHadoopExecution( dmlconf );
//Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
dmlt.rewriteHopsDAG(prog);
//Step 5.1: Generate code for the rewritten Hop dags
if( dmlconf.getBooleanValue(DMLConfig.CODEGEN) ){
SpoofCompiler.PLAN_CACHE_POLICY = PlanCachePolicy.get(
dmlconf.getBooleanValue(DMLConfig.CODEGEN_PLANCACHE),
dmlconf.getIntValue(DMLConfig.CODEGEN_LITERALS)==2);
SpoofCompiler.setExecTypeSpecificJavaCompiler();
if( SpoofCompiler.INTEGRATION==IntegrationType.HOPS )
dmlt.codgenHopsDAG(prog);
}
//Step 6: construct lops (incl exec type and op selection)
dmlt.constructLops(prog);
if (LOG.isDebugEnabled()) {
LOG.debug("\n********************** LOPS DAG *******************");
dmlt.printLops(prog);
dmlt.resetLopsDAGVisitStatus(prog);
}
//Step 7: generate runtime program
Program rtprog = prog.getRuntimeProgram(dmlconf);
//Step 7.1: Generate code for the rewritten Hop dags w/o modify
if( dmlconf.getBooleanValue(DMLConfig.CODEGEN)
&& SpoofCompiler.INTEGRATION==IntegrationType.RUNTIME ){
dmlt.codgenHopsDAG(rtprog);
}
//Step 8: [optional global data flow optimization]
if(OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY) )
{
LOG.warn("Optimization level '" + OptimizationLevel.O4_GLOBAL_TIME_MEMORY + "' " +
"is still in experimental state and not intended for production use.");
rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, rtprog);
}
//launch SystemML appmaster (if requested and not already in launched AM)
if( dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER) ){
if( !isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog) )
return; //if AM launch unsuccessful, fall back to normal execute
if( isActiveAM() ) //in AM context (not failed AM launch)
DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
}
//Step 9: prepare statistics [and optional explain output]
//count number compiled MR jobs / SP instructions
ExplainCounts counts = Explain.countDistributedOperations(rtprog);
Statistics.resetNoOfCompiledJobs( counts.numJobs );
//explain plan of program (hops or runtime)
if( EXPLAIN != ExplainType.NONE ) {
LOG.info("EXPLAIN ("+EXPLAIN.toString()+"):\n"
+ Explain.explainMemoryBudget(counts)+"\n"
+ Explain.explainDegreeOfParallelism(counts)
+ Explain.explain(prog, rtprog, EXPLAIN));
}
Statistics.stopCompileTimer();
//double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
//System.out.println("Estimated costs: "+costs);
// Whether extra statistics useful for developers and others interested in digging
// into performance problems are recorded and displayed
GPUStatistics.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_GPU_STATS);
LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);
//Step 10: execute runtime program
Statistics.startRunTimer();
ExecutionContext ec = null;
try
{
//run execute (w/ exception handling to ensure proper shutdown)
ec = ExecutionContextFactory.createContext(rtprog);
rtprog.execute( ec );
}
finally //ensure cleanup/shutdown
{
if(DMLScript.USE_ACCELERATOR && ec != null)
ec.destroyGPUContext();
if( dmlconf.getBooleanValue(DMLConfig.CODEGEN) )
SpoofCompiler.cleanupCodeGenerator();
if(ec != null && ec instanceof SparkExecutionContext)
((SparkExecutionContext) ec).close();
//display statistics (incl caching stats if enabled)
Statistics.stopRunTimer();
LOG.info(Statistics.display());
LOG.info("END DML run " + getDateTime() );
//cleanup scratch_space and all working dirs
cleanupHadoopExecution( dmlconf );
}
}
/**
* Launcher for DML debugger. This method should be called after
* execution and debug properties have been correctly set, and customized parameters
*
* @param dmlScriptStr DML script contents (including new lines)
* @param fnameOptConfig Full path of configuration file for SystemML
* @param argVals Key-value pairs defining arguments of DML script
* @param parsePyDML true if PYDML, false if DML
* @throws ParseException if ParseException occurs
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws DMLDebuggerException if DMLDebuggerException occurs
* @throws LanguageException if LanguageException occurs
* @throws HopsException if HopsException occurs
* @throws LopsException if LopsException occurs
*/
private static void launchDebugger(String dmlScriptStr, String fnameOptConfig, Map argVals, boolean parsePyDML)
throws ParseException, IOException, DMLRuntimeException, DMLDebuggerException, LanguageException, HopsException, LopsException
{
DMLDebuggerProgramInfo dbprog = new DMLDebuggerProgramInfo();
//Step 1: parse configuration files
DMLConfig conf = DMLConfig.readConfigurationFile(fnameOptConfig);
ConfigurationManager.setGlobalConfig(conf);
//Step 2: parse dml script
ParserWrapper parser = ParserFactory.createParser(parsePyDML);
DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);
//Step 3: construct HOP DAGs (incl LVA and validate)
DMLTranslator dmlt = new DMLTranslator(prog);
dmlt.liveVariableAnalysis(prog);
dmlt.validateParseTree(prog);
dmlt.constructHops(prog);
//Step 4: rewrite HOP DAGs (incl IPA and memory estimates)
dmlt.rewriteHopsDAG(prog);
//Step 5: construct LOP DAGs
dmlt.constructLops(prog);
//Step 6: generate runtime program
dbprog.rtprog = prog.getRuntimeProgram(conf);
try {
//set execution environment
initHadoopExecution(conf);
//initialize an instance of SystemML debugger
DMLDebugger SystemMLdb = new DMLDebugger(dbprog, dmlScriptStr);
//run SystemML debugger
SystemMLdb.runSystemMLDebugger();
}
finally {
//cleanup scratch_space and all working dirs
cleanupHadoopExecution(conf);
}
}
public static void initHadoopExecution( DMLConfig config )
throws IOException, ParseException, DMLRuntimeException
{
//check security aspects
checkSecuritySetup( config );
//create scratch space with appropriate permissions
String scratch = config.getTextValue(DMLConfig.SCRATCH_SPACE);
MapReduceTool.createDirIfNotExistOnHDFS(scratch, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
//cleanup working dirs from previous aborted runs with same pid in order to prevent conflicts
cleanupHadoopExecution(config);
//init caching (incl set active)
LocalFileUtils.createWorkingDirectory();
CacheableData.initCaching();
//reset statistics (required if multiple scripts executed in one JVM)
Statistics.resetNoOfExecutedJobs();
if( STATISTICS ) {
CacheStatistics.reset();
Statistics.reset();
}
}
private static void checkSecuritySetup(DMLConfig config)
throws IOException, DMLRuntimeException
{
//analyze local configuration
String userName = System.getProperty( "user.name" );
HashSet groupNames = new HashSet();
try{
//check existence, for backwards compatibility to < hadoop 0.21
if( UserGroupInformation.class.getMethod("getCurrentUser") != null ){
String[] groups = UserGroupInformation.getCurrentUser().getGroupNames();
Collections.addAll(groupNames, groups);
}
}catch(Exception ex){}
//analyze hadoop configuration
JobConf job = ConfigurationManager.getCachedJobConf();
boolean localMode = InfrastructureAnalyzer.isLocalMode(job);
String taskController = job.get(MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER, "org.apache.hadoop.mapred.DefaultTaskController");
String ttGroupName = job.get(MRConfigurationNames.MR_TASKTRACKER_GROUP,"null");
String perm = job.get(MRConfigurationNames.DFS_PERMISSIONS_ENABLED,"null"); //note: job.get("dfs.permissions.supergroup",null);
URI fsURI = FileSystem.getDefaultUri(job);
//determine security states
boolean flagDiffUser = !( taskController.equals("org.apache.hadoop.mapred.LinuxTaskController") //runs map/reduce tasks as the current user
|| localMode // run in the same JVM anyway
|| groupNames.contains( ttGroupName) ); //user in task tracker group
boolean flagLocalFS = fsURI==null || fsURI.getScheme().equals("file");
boolean flagSecurity = perm.equals("yes");
LOG.debug("SystemML security check: "
+ "local.user.name = " + userName + ", "
+ "local.user.groups = " + ProgramConverter.serializeStringCollection(groupNames) + ", "
+ MRConfigurationNames.MR_JOBTRACKER_ADDRESS + " = " + job.get(MRConfigurationNames.MR_JOBTRACKER_ADDRESS) + ", "
+ MRConfigurationNames.MR_TASKTRACKER_TASKCONTROLLER + " = " + taskController + ","
+ MRConfigurationNames.MR_TASKTRACKER_GROUP + " = " + ttGroupName + ", "
+ MRConfigurationNames.FS_DEFAULTFS + " = " + ((fsURI!=null) ? fsURI.getScheme() : "null") + ", "
+ MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm );
//print warning if permission issues possible
if( flagDiffUser && ( flagLocalFS || flagSecurity ) )
{
LOG.warn("Cannot run map/reduce tasks as user '"+userName+"'. Using tasktracker group '"+ttGroupName+"'.");
}
//validate external filenames working directories
String localtmpdir = config.getTextValue(DMLConfig.LOCAL_TMP_DIR);
String hdfstmpdir = config.getTextValue(DMLConfig.SCRATCH_SPACE);
if( !LocalFileUtils.validateExternalFilename(localtmpdir, false) )
throw new DMLRuntimeException("Invalid (non-trustworthy) local working directory.");
if( !LocalFileUtils.validateExternalFilename(hdfstmpdir, true) )
throw new DMLRuntimeException("Invalid (non-trustworthy) hdfs working directory.");
}
public static void cleanupHadoopExecution( DMLConfig config )
throws IOException, ParseException
{
//create dml-script-specific suffix
StringBuilder sb = new StringBuilder();
sb.append(Lop.FILE_SEPARATOR);
sb.append(Lop.PROCESS_PREFIX);
sb.append(DMLScript.getUUID());
String dirSuffix = sb.toString();
//1) cleanup scratch space (everything for current uuid)
//(required otherwise export to hdfs would skip assumed unnecessary writes if same name)
MapReduceTool.deleteFileIfExistOnHDFS( config.getTextValue(DMLConfig.SCRATCH_SPACE) + dirSuffix );
//2) cleanup hadoop working dirs (only required for LocalJobRunner (local job tracker), because
//this implementation does not create job specific sub directories)
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
if( InfrastructureAnalyzer.isLocalMode(job) ) {
try
{
LocalFileUtils.deleteFileIfExists( DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only)
dirSuffix );
LocalFileUtils.deleteFileIfExists( MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
dirSuffix );
MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
dirSuffix );
MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
dirSuffix );
}
catch(Exception ex)
{
//we give only a warning because those directories are written by the mapred deamon
//and hence, execution can still succeed
LOG.warn("Unable to cleanup hadoop working dirs: "+ex.getMessage());
}
}
//3) cleanup systemml-internal working dirs
CacheableData.cleanupCacheDir(); //might be local/hdfs
LocalFileUtils.cleanupWorkingDirectory();
}
///////////////////////////////
// private internal helper functionalities
////////
private static void printInvocationInfo(String fnameScript, String fnameOptConfig, Map argVals)
{
LOG.debug("****** args to DML Script ******\n" + "UUID: " + getUUID() + "\n" + "SCRIPT PATH: " + fnameScript + "\n"
+ "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: " + DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n"
+ "OPTIONAL CONFIG: " + fnameOptConfig + "\n");
if( !argVals.isEmpty() ) {
LOG.debug("Script arguments are: \n");
for (int i=1; i<= argVals.size(); i++)
LOG.debug("Script argument $" + i + " = " + argVals.get("$" + i) );
}
}
private static void printStartExecInfo(String dmlScriptString)
{
LOG.info("BEGIN DML run " + getDateTime());
LOG.debug("DML script: \n" + dmlScriptString);
if (rtplatform == RUNTIME_PLATFORM.HADOOP || rtplatform == RUNTIME_PLATFORM.HYBRID) {
String hadoop_home = System.getenv("HADOOP_HOME");
LOG.info("HADOOP_HOME: " + hadoop_home);
}
}
private static String getDateTime()
{
DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
Date date = new Date();
return dateFormat.format(date);
}
private static void cleanSystemMLWorkspace()
throws DMLException
{
try
{
//read the default config
DMLConfig conf = DMLConfig.readConfigurationFile(null);
//run cleanup job to clean remote local tmp dirs
CleanupMR.runJob(conf);
//cleanup scratch space (on HDFS)
String scratch = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
if( scratch != null )
MapReduceTool.deleteFileIfExistOnHDFS(scratch);
//cleanup local working dir
String localtmp = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
if( localtmp != null )
LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
}
catch(Exception ex)
{
throw new DMLException("Failed to run SystemML workspace cleanup.", ex);
}
}
}