org.apache.sysml.api.MLContext Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Declarative Machine Learning
There is a newer version: 1.2.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.api;


import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.Scanner;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.api.jmlc.JMLCUtils;
import org.apache.sysml.api.monitoring.SparkMonitoringUtil;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel;
import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper;
import org.apache.sysml.hops.rewrite.ProgramRewriter;
import org.apache.sysml.hops.rewrite.RewriteRemovePersistentReadWrite;
import org.apache.sysml.parser.AParserWrapper;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.DataExpression;
import org.apache.sysml.parser.Expression;
import org.apache.sysml.parser.IntIdentifier;
import org.apache.sysml.parser.LanguageException;
import org.apache.sysml.parser.StringIdentifier;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction;
import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
import org.apache.sysml.runtime.instructions.spark.data.RDDProperties;
import org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair;
import org.apache.sysml.runtime.instructions.spark.functions.CopyBlockPairFunction;
import org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction;
import org.apache.sysml.runtime.instructions.spark.functions.SparkListener;
import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.utils.Explain;
import org.apache.sysml.utils.Statistics;
import org.apache.sysml.utils.Explain.ExplainCounts;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;

/**
 * MLContext is useful for passing RDDs as input/output to SystemML. This API avoids the need to read/write
 * from HDFS (which is another way to pass inputs to SystemML).
 * 
 * Typical usage for MLContext is as follows:
 * 

 * scala> import org.apache.sysml.api.MLContext
 * 
 * 
 * Create input DataFrame from CSV file and potentially perform some feature transformation
 * 

 * scala> val W = sqlContext.load("com.databricks.spark.csv", Map("path" -> "W.csv", "header" -> "false"))
 * scala> val H = sqlContext.load("com.databricks.spark.csv", Map("path" -> "H.csv", "header" -> "false"))
 * scala> val V = sqlContext.load("com.databricks.spark.csv", Map("path" -> "V.csv", "header" -> "false"))
 * 
 * 
 * Create MLContext
 * 

 * scala> val ml = new MLContext(sc)
 * 
 * 
 * Register input and output DataFrame/RDD 
 * Supported format: 
 * 

 *  DataFrame
 * 
 CSV/Text (as JavaRDD or JavaPairRDD)
 * 
 Binary blocked RDD (JavaPairRDD))
 * 
 * Also overloaded to support metadata information such as format, rlen, clen, ...
 * Please note the variable names given below in quotes correspond to the variables in DML script.
 * These variables need to have corresponding read/write associated in DML script.
 * Currently, only matrix variables are supported through registerInput/registerOutput interface.
 * To pass scalar variables, use named/positional arguments (described later) or wrap them into matrix variable.
 * 
 * scala> ml.registerInput("V", V)
 * scala> ml.registerInput("W", W)
 * scala> ml.registerInput("H", H)
 * scala> ml.registerOutput("H")
 * scala> ml.registerOutput("W")
 * 
 * 
 * Call script with default arguments:
 * 

 * scala> val outputs = ml.execute("GNMF.dml")
 * 
 * 
 * Also supported: calling script with positional arguments (args) and named arguments (nargs):
 * 
 
 * scala> val args = Array("V.mtx", "W.mtx",  "H.mtx",  "2000", "1500",  "50",  "1",  "WOut.mtx",  "HOut.mtx")
 * scala> val nargs = Map("maxIter"->"1", "V" -> "") 
 * scala> val outputs = ml.execute("GNMF.dml", args) # or ml.execute("GNMF_namedArgs.dml", nargs)
 *   
 * 
 * To run the script again using different (or even same arguments), but using same registered input/outputs:
 * 
 
 * scala> val new_outputs = ml.execute("GNMF.dml", new_args)
 * 
 * 
 * However, to register new input/outputs, you need to first reset MLContext
 * 
 
 * scala> ml.reset()
 * scala> ml.registerInput("V", newV)
 * 
 * 
 * Experimental API:
 * To monitor performance (only supported for Spark 1.4.0 or higher),
 * 

 * scala> val ml = new MLContext(sc, true)
 * 
 * 
 * If monitoring performance is enabled,
 * 
 
 * scala> print(ml.getMonitoringUtil().getExplainOutput())
 * scala> ml.getMonitoringUtil().getRuntimeInfoInHTML("runtime.html")
 * 
 * 
 * Note: The execute(...) methods does not support parallel calls from same or different MLContext.
 * This is because current SystemML engine does not allow multiple invocation in same JVM.
 * So, if you plan to create a system which potentially creates multiple MLContext, 
 * it is recommended to guard the execute(...) call using
 * 
  
 * synchronized(MLContext.class) { ml.execute(...); }
 * 
 */
public class MLContext {
	
	// ----------------------------------------------------
	// TODO: To make MLContext multi-threaded, track getCurrentMLContext and also all singletons and
	// static variables in SystemML codebase.
	private static MLContext _activeMLContext = null;
	
	// Package protected so as to maintain a clean public API for MLContext.
	// Use MLContextProxy.getActiveMLContext() if necessary
	static MLContext getActiveMLContext() {
		return _activeMLContext;
	}
	// ----------------------------------------------------
	
	private SparkContext _sc = null; // Read while creating SystemML's spark context
	public SparkContext getSparkContext() {
		if(_sc == null) {
			throw new RuntimeException("No spark context set in MLContext");
		}
		return _sc;
	}
	private ArrayList _inVarnames = null;
	private ArrayList _outVarnames = null;
	private LocalVariableMap _variables = null; // temporary symbol table
	private Program _rtprog = null;
	
	private HashMap _additionalConfigs = new HashMap();
	
	// --------------------------------------------------
	// _monitorUtils is set only when MLContext(sc, true)
	private SparkMonitoringUtil _monitorUtils = null;
	
	/**
	 * Experimental API. Not supported in Python MLContext API.
	 * @return
	 */
	public SparkMonitoringUtil getMonitoringUtil() {
		return _monitorUtils;
	}
	// --------------------------------------------------
	
	/**
	 * Create an associated MLContext for given spark session.
	 * @param sc
	 * @throws DMLRuntimeException
	 */
	public MLContext(SparkContext sc) throws DMLRuntimeException {
		initializeSpark(sc, false, false);
	}
	
	/**
	 * Create an associated MLContext for given spark session.
	 * @param sc
	 * @throws DMLRuntimeException
	 */
	public MLContext(JavaSparkContext sc) throws DMLRuntimeException {
		initializeSpark(sc.sc(), false, false);
	}
	
	/**
	 * Allow users to provide custom named-value configuration.
	 * @param paramName
	 * @param paramVal
	 */
	public void setConfig(String paramName, String paramVal) {
		_additionalConfigs.put(paramName, paramVal);
	}
	
	// ====================================================================================
	// Register input APIs
	// 1. DataFrame
	
	/**
	 * Register DataFrame as input. DataFrame is assumed to be in row format and each cell can be converted into double 
	 * through  Double.parseDouble(cell.toString()). This is suitable for passing dense matrices. For sparse matrices,
	 * consider passing through text format (using JavaRDD, format="text")
	 * 
	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param df
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, DataFrame df) throws DMLRuntimeException {
		registerInput(varName, df, false);
	}
	
	/**
	 * Register DataFrame as input. 
	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.  
	 * @param varName
	 * @param df
	 * @param containsID false if the DataFrame has an column ID which denotes the row ID.
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, DataFrame df, boolean containsID) throws DMLRuntimeException {
		MatrixCharacteristics mcOut = new MatrixCharacteristics();
		JavaPairRDD rdd = RDDConverterUtilsExt.dataFrameToBinaryBlock(new JavaSparkContext(_sc), df, mcOut, containsID);
		registerInput(varName, rdd, mcOut);
	}
	
	/**
	 * Experimental API. Not supported in Python MLContext API.
	 * @param varName
	 * @param df
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, MLMatrix df) throws DMLRuntimeException {
		registerInput(varName, MLMatrix.getRDDLazily(df), df.mc);
	}
	
	// ------------------------------------------------------------------------------------
	// 2. CSV/Text: Usually JavaRDD, but also supports JavaPairRDD
	/**
	 * Register CSV/Text as inputs: Method for supplying csv file format properties, but without dimensions or nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param hasHeader
	 * @param delim
	 * @param fill
	 * @param missingValue
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaRDD rdd, String format, boolean hasHeader, 
			String delim, boolean fill, double missingValue) throws DMLRuntimeException {
		registerInput(varName, rdd, format, hasHeader, delim, fill, missingValue, -1, -1, -1);
	}
	
	/**
	 * Register CSV/Text as inputs: Method for supplying csv file format properties, but without dimensions or nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param hasHeader
	 * @param delim
	 * @param fill
	 * @param missingValue
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, RDD rdd, String format, boolean hasHeader, 
			String delim, boolean fill, double missingValue) throws DMLRuntimeException {
		registerInput(varName, rdd.toJavaRDD(), format, hasHeader, delim, fill, missingValue, -1, -1, -1);
	}
	
	/**
	 * Register CSV/Text as inputs: Method for supplying csv file format properties along with dimensions or nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param hasHeader
	 * @param delim
	 * @param fill
	 * @param missingValue
	 * @param rlen
	 * @param clen
	 * @param nnz
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, RDD rdd, String format, boolean hasHeader, 
			String delim, boolean fill, double missingValue, long rlen, long clen, long nnz) throws DMLRuntimeException {
		registerInput(varName, rdd.toJavaRDD(), format, hasHeader, delim, fill, missingValue, -1, -1, -1);
	}
	
	/**
	 * Register CSV/Text as inputs: Method for supplying csv file format properties along with dimensions or nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param hasHeader
	 * @param delim
	 * @param fill
	 * @param missingValue
	 * @param rlen
	 * @param clen
	 * @param nnz
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaRDD rdd, String format, boolean hasHeader, 
			String delim, boolean fill, double missingValue, long rlen, long clen, long nnz) throws DMLRuntimeException {
		RDDProperties properties = new RDDProperties();
		properties.setHasHeader(hasHeader);
		properties.setFill(fill);
		properties.setDelim(delim);
		properties.setMissingValue(missingValue);
		registerInput(varName, rdd.mapToPair(new ConvertStringToLongTextPair()), format, rlen, clen, nnz, properties);
	} 
	
	/**
	 * Register CSV/Text as inputs: Convenience method without dimensions and nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, RDD rdd, String format) throws DMLRuntimeException {
		registerInput(varName, rdd.toJavaRDD().mapToPair(new ConvertStringToLongTextPair()), format, -1, -1, -1, null);
	}
	
	/**
	 * Register CSV/Text as inputs: Convenience method without dimensions and nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaRDD rdd, String format) throws DMLRuntimeException {
		registerInput(varName, rdd.mapToPair(new ConvertStringToLongTextPair()), format, -1, -1, -1, null);
	}
	
	/**
	 * Register CSV/Text as inputs: Convenience method with dimensions and but no nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file. 
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param rlen
	 * @param clen
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaRDD rdd, String format, long rlen, long clen) throws DMLRuntimeException {
		registerInput(varName, rdd.mapToPair(new ConvertStringToLongTextPair()), format, rlen, clen, -1, null);
	}
	
	/**
	 * Register CSV/Text as inputs: Convenience method with dimensions and but no nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param rlen
	 * @param clen
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, RDD rdd, String format, long rlen, long clen) throws DMLRuntimeException {
		registerInput(varName, rdd.toJavaRDD().mapToPair(new ConvertStringToLongTextPair()), format, rlen, clen, -1, null);
	}
	
	/**
	 * Register CSV/Text as inputs: with dimensions and nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param rlen
	 * @param clen
	 * @param nnz
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaRDD rdd, String format, long rlen, long clen, long nnz) throws DMLRuntimeException {
		registerInput(varName, rdd.mapToPair(new ConvertStringToLongTextPair()), format, rlen, clen, nnz, null);
	}
	
	/**
	 * Register CSV/Text as inputs: with dimensions and nnz. It uses default file properties (example: delim, fill, ..)
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param format
	 * @param rlen
	 * @param clen
	 * @param nnz
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, RDD rdd, String format, long rlen, long clen, long nnz) throws DMLRuntimeException {
		registerInput(varName, rdd.toJavaRDD().mapToPair(new ConvertStringToLongTextPair()), format, rlen, clen, nnz, null);
	}
	
	// All CSV related methods call this ... It provides access to dimensions, nnz, file properties.
	private void registerInput(String varName, JavaPairRDD textOrCsv_rdd, String format, long rlen, long clen, long nnz, RDDProperties properties) throws DMLRuntimeException {
		if(!(DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)) {
			throw new DMLRuntimeException("The registerInput functionality only supported for spark runtime. Please use MLContext(sc) instead of default constructor.");
		}
		
		if(_variables == null)
			_variables = new LocalVariableMap();
		if(_inVarnames == null)
			_inVarnames = new ArrayList();
		
		MatrixObject mo = null;
		if(format.compareTo("csv") == 0) {
			MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize, nnz);
			mo = new MatrixObject(ValueType.DOUBLE, null, new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
		}
		else if(format.compareTo("text") == 0) {
			if(rlen == -1 || clen == -1) {
				throw new DMLRuntimeException("The metadata is required in registerInput for format:" + format);
			}
			MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize, nnz);
			mo = new MatrixObject(ValueType.DOUBLE, null, new MatrixFormatMetaData(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
		}
		else if(format.compareTo("mm") == 0) {
			// TODO: Handle matrix market
			throw new DMLRuntimeException("Matrixmarket format is not yet implemented in registerInput: " + format);
		}
		else {
			
			throw new DMLRuntimeException("Incorrect format in registerInput: " + format);
		}
		
		JavaPairRDD rdd = textOrCsv_rdd.mapToPair(new CopyTextInputFunction());
		if(properties != null) {
			mo.setRddProperties(properties);
		}
		mo.setRDDHandle(new RDDObject(rdd, varName));
		_variables.put(varName, mo);
		_inVarnames.add(varName);
		checkIfRegisteringInputAllowed();
	}
	
	// ------------------------------------------------------------------------------------
	
	// 3. Binary blocked RDD: Support JavaPairRDD 
	
	/**
	 * Register binary blocked RDD with given dimensions, default block sizes and no nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file. 
	 * @param varName
	 * @param rdd
	 * @param rlen
	 * @param clen
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaPairRDD rdd, long rlen, long clen) throws DMLRuntimeException {
		registerInput(varName, rdd, rlen, clen, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
	}
	
	/**
	 * Register binary blocked RDD with given dimensions, given block sizes and no nnz
	 * 

	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param rlen
	 * @param clen
	 * @param brlen
	 * @param bclen
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaPairRDD rdd, long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException {
		registerInput(varName, rdd, rlen, clen, brlen, bclen, -1);
	}
	
	
	/**
	 * Register binary blocked RDD with given dimensions, given block sizes and given nnz (preferred).
	 * 
	 * Marks the variable in the DML script as input variable.
	 * Note that this expects a "varName = read(...)" statement in the DML script which through non-MLContext invocation
	 * would have been created by reading a HDFS file.
	 * @param varName
	 * @param rdd
	 * @param rlen
	 * @param clen
	 * @param brlen
	 * @param bclen
	 * @param nnz
	 * @throws DMLRuntimeException
	 */
	public void registerInput(String varName, JavaPairRDD rdd, long rlen, long clen, int brlen, int bclen, long nnz) throws DMLRuntimeException {
		if(rlen == -1 || clen == -1) {
			throw new DMLRuntimeException("The metadata is required in registerInput for binary format");
		}
		
		MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, brlen, bclen, nnz);
		registerInput(varName, rdd, mc);
	}
	
	// All binary blocked method call this.
	public void registerInput(String varName, JavaPairRDD rdd, MatrixCharacteristics mc) throws DMLRuntimeException {
		if(_variables == null)
			_variables = new LocalVariableMap();
		if(_inVarnames == null)
			_inVarnames = new ArrayList();
		// Bug in Spark is messing up blocks and indexes due to too eager reuse of data structures
		JavaPairRDD copyRDD = rdd.mapToPair( new CopyBlockPairFunction() );
		
		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, "temp", new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
		mo.setRDDHandle(new RDDObject(copyRDD, varName));
		_variables.put(varName, mo);
		_inVarnames.add(varName);
		checkIfRegisteringInputAllowed();
	}
	
	// =============================================================================================
	
	/**
	 * Marks the variable in the DML script as output variable.
	 * Note that this expects a "write(varName, ...)" statement in the DML script which through non-MLContext invocation
	 * would have written the matrix to HDFS.
	 * @param varName
	 * @throws DMLRuntimeException
	 */
	public void registerOutput(String varName) throws DMLRuntimeException {
		if(!(DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)) {
			throw new DMLRuntimeException("The registerOutput functionality only supported for spark runtime. Please use MLContext(sc) instead of default constructor.");
		}
		if(_outVarnames == null)
			_outVarnames = new ArrayList();
		_outVarnames.add(varName);
		if(_variables == null)
			_variables = new LocalVariableMap();
	}
	
	// =============================================================================================
	
	/**
	 * Execute DML script by passing named arguments using specified config file.
	 * @param dmlScriptFilePath the dml script can be in local filesystem or in HDFS
	 * @param namedArgs
	 * @param parsePyDML
	 * @param configFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, HashMap namedArgs, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException {
		String [] args = new String[namedArgs.size()];
		int i = 0;
		for(Entry entry : namedArgs.entrySet()) {
			if(entry.getValue().trim().compareTo("") == 0)
				args[i] = entry.getKey() + "=\"" + entry.getValue() + "\"";
			else
				args[i] = entry.getKey() + "=" + entry.getValue();
			i++;
		}
		return compileAndExecuteScript(dmlScriptFilePath, args, true, parsePyDML, configFilePath);
	}
	
	/**
	 * Execute DML script by passing named arguments using specified config file.
	 * @param dmlScriptFilePath the dml script can be in local filesystem or in HDFS
	 * @param namedArgs
	 * @param configFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, HashMap namedArgs, String configFilePath) throws IOException, DMLException, ParseException {
		String [] args = new String[namedArgs.size()];
		int i = 0;
		for(Entry entry : namedArgs.entrySet()) {
			if(entry.getValue().trim().compareTo("") == 0)
				args[i] = entry.getKey() + "=\"" + entry.getValue() + "\"";
			else
				args[i] = entry.getKey() + "=" + entry.getValue();
			i++;
		}
		
		return compileAndExecuteScript(dmlScriptFilePath, args, true, false, configFilePath);
	}
	
	/**
	 * Execute DML script by passing named arguments with default configuration.
	 * @param dmlScriptFilePath the dml script can be in local filesystem or in HDFS
	 * @param namedArgs
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, HashMap namedArgs) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, namedArgs, false, null);
	}
	
	/**
	 * Execute DML script by passing named arguments.
	 * @param dmlScriptFilePath
	 * @param namedArgs
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, scala.collection.immutable.Map namedArgs) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, new HashMap(scala.collection.JavaConversions.mapAsJavaMap(namedArgs)));
	}

	/**
	 * Experimental: Execute PyDML script by passing named arguments if parsePyDML=true.
	 * @param dmlScriptFilePath
	 * @param namedArgs
	 * @param parsePyDML
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, HashMap namedArgs, boolean parsePyDML) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, namedArgs, parsePyDML, null);
	}
	
	/**
	 * Experimental: Execute PyDML script by passing named arguments if parsePyDML=true.
	 * @param dmlScriptFilePath
	 * @param namedArgs
	 * @param parsePyDML
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, scala.collection.immutable.Map namedArgs, boolean parsePyDML) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, new HashMap(scala.collection.JavaConversions.mapAsJavaMap(namedArgs)), parsePyDML);
	}
	
	/**
	 * Execute DML script by passing positional arguments using specified config file
	 * @param dmlScriptFilePath
	 * @param args
	 * @param configFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, String [] args, String configFilePath) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, args, false, configFilePath);
	}
	
	/**
	 * Execute DML script by passing positional arguments using specified config file
	 * This method is implemented for compatibility with Python MLContext.
	 * Java/Scala users should use 'MLOutput execute(String dmlScriptFilePath, String [] args, String configFilePath)' instead as
	 * equivalent scala collections (Seq/ArrayBuffer) is not implemented.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param configFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, ArrayList args, String configFilePath) throws IOException, DMLException, ParseException {
		String [] argsArr = new String[args.size()];
		argsArr = args.toArray(argsArr);
		return execute(dmlScriptFilePath, argsArr, false, configFilePath);
	}
	
	/**
	 * Execute DML script by passing positional arguments using default configuration
	 * @param dmlScriptFilePath
	 * @param args
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, String [] args) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, args, false, null);
	}
	
	/**
	 * Execute DML script by passing positional arguments using default configuration.
	 * This method is implemented for compatibility with Python MLContext.
	 * Java/Scala users should use 'MLOutput execute(String dmlScriptFilePath, String [] args)' instead as
	 * equivalent scala collections (Seq/ArrayBuffer) is not implemented.
	 * @param dmlScriptFilePath
	 * @param args
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, ArrayList args) throws IOException, DMLException, ParseException {
		String [] argsArr = new String[args.size()];
		argsArr = args.toArray(argsArr);
		return execute(dmlScriptFilePath, argsArr, false, null);
	}
	
	/**
	 * Experimental: Execute DML script by passing positional arguments if parsePyDML=true, using default configuration.
	 * This method is implemented for compatibility with Python MLContext.
	 * Java/Scala users should use 'MLOutput execute(String dmlScriptFilePath, String [] args, boolean parsePyDML)' instead as
	 * equivalent scala collections (Seq/ArrayBuffer) is not implemented.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param parsePyDML
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, ArrayList args, boolean parsePyDML) throws IOException, DMLException, ParseException {
		String [] argsArr = new String[args.size()];
		argsArr = args.toArray(argsArr);
		return execute(dmlScriptFilePath, argsArr, parsePyDML, null);
	}
	
	/**
	 * Experimental: Execute DML script by passing positional arguments if parsePyDML=true, using specified config file.
	 * This method is implemented for compatibility with Python MLContext.
	 * Java/Scala users should use 'MLOutput execute(String dmlScriptFilePath, String [] args, boolean parsePyDML, String configFilePath)' instead as
	 * equivalent scala collections (Seq/ArrayBuffer) is not implemented.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param parsePyDML
	 * @param configFilePath
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, ArrayList args, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException {
		String [] argsArr = new String[args.size()];
		argsArr = args.toArray(argsArr);
		return execute(dmlScriptFilePath, argsArr, parsePyDML, configFilePath);
	}
	
	/**
	 * Experimental: Execute DML script by passing positional arguments if parsePyDML=true, using specified config file.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param parsePyDML
	 * @param configFilePath
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, String [] args, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException {
		return compileAndExecuteScript(dmlScriptFilePath, args, false, parsePyDML, configFilePath);
	}
	
	/**
	 * Experimental: Execute DML script by passing positional arguments if parsePyDML=true, using default configuration.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param parsePyDML
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, String [] args, boolean parsePyDML) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, args, parsePyDML, null);
	}
	
	/**
	 * Execute DML script without any arguments using specified config path
	 * @param dmlScriptFilePath
	 * @param configFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath, String configFilePath) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, false, configFilePath);
	}
	
	/**
	 * Execute DML script without any arguments using default configuration.
	 * @param dmlScriptFilePath
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException 
	 */
	public MLOutput execute(String dmlScriptFilePath) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, false, null);
	}
	
	/**
	 * Experimental: Execute DML script without any arguments if parsePyDML=true, using specified config path.
	 * @param dmlScriptFilePath
	 * @param parsePyDML
	 * @param configFilePath
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, boolean parsePyDML, String configFilePath) throws IOException, DMLException, ParseException {
		return compileAndExecuteScript(dmlScriptFilePath, null, false, parsePyDML, configFilePath);
	}
	
	/**
	 * Experimental: Execute DML script without any arguments if parsePyDML=true, using default configuration.
	 * @param dmlScriptFilePath
	 * @param parsePyDML
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput execute(String dmlScriptFilePath, boolean parsePyDML) throws IOException, DMLException, ParseException {
		return execute(dmlScriptFilePath, parsePyDML, null);
	}
	
	// -------------------------------- Utility methods begins ----------------------------------------------------------
	
	
	/**
	 * Call this method if you want to clear any RDDs set via registerInput, registerOutput.
	 * This is required if ml.execute(..) has been called earlier and you want to call a new DML script. 
	 * Note: By default this doesnot clean up configuration set using setConfig method. 
	 * To clean the configuration as along with registered input/outputs, please use reset(true);
	 * @throws DMLRuntimeException 
	 */
	public void reset() 
			throws DMLRuntimeException 
	{
		reset(false);
	}
	
	public void reset(boolean cleanupConfig) 
			throws DMLRuntimeException 
	{
		//cleanup variables from bufferpool, incl evicted files 
		//(otherwise memory leak because bufferpool holds references)
		CacheableData.cleanupCacheDir();

		//clear mlcontext state
		_inVarnames = null;
		_outVarnames = null;
		_variables = null;
		if(cleanupConfig)
			_additionalConfigs.clear();
	}
	
	/**
	 * Used internally
	 * @param source
	 * @param target
	 * @throws LanguageException
	 */
	void setAppropriateVarsForRead(Expression source, String target) 
		throws LanguageException 
	{
		boolean isTargetRegistered = isRegisteredAsInput(target);
		boolean isReadExpression = (source instanceof DataExpression && ((DataExpression) source).isRead());
		if(isTargetRegistered && isReadExpression) {
			// Do not check metadata file for registered reads 
			((DataExpression) source).setCheckMetadata(false);
			
			MatrixObject mo = null;
			try {
				mo = getMatrixObject(target);
				int blp = source.getBeginLine(); int bcp = source.getBeginColumn();
				int elp = source.getEndLine(); int ecp = source.getEndColumn();
				((DataExpression) source).addVarParam(DataExpression.READROWPARAM, new IntIdentifier(mo.getNumRows(), source.getFilename(), blp, bcp, elp, ecp));
				((DataExpression) source).addVarParam(DataExpression.READCOLPARAM, new IntIdentifier(mo.getNumColumns(), source.getFilename(), blp, bcp, elp, ecp));
				((DataExpression) source).addVarParam(DataExpression.READNUMNONZEROPARAM, new IntIdentifier(mo.getNnz(), source.getFilename(), blp, bcp, elp, ecp));
				((DataExpression) source).addVarParam(DataExpression.DATATYPEPARAM, new StringIdentifier("matrix", source.getFilename(), blp, bcp, elp, ecp));
				((DataExpression) source).addVarParam(DataExpression.VALUETYPEPARAM, new StringIdentifier("double", source.getFilename(), blp, bcp, elp, ecp));
				
				if(mo.getMetaData() instanceof MatrixFormatMetaData) {
					MatrixFormatMetaData metaData = (MatrixFormatMetaData) mo.getMetaData();
					if(metaData.getOutputInfo() == OutputInfo.CSVOutputInfo) {
						((DataExpression) source).addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier(DataExpression.FORMAT_TYPE_VALUE_CSV, source.getFilename(), blp, bcp, elp, ecp));
					}
					else if(metaData.getOutputInfo() == OutputInfo.TextCellOutputInfo) {
						((DataExpression) source).addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier(DataExpression.FORMAT_TYPE_VALUE_TEXT, source.getFilename(), blp, bcp, elp, ecp));
					}
					else if(metaData.getOutputInfo() == OutputInfo.BinaryBlockOutputInfo) {
						((DataExpression) source).addVarParam(DataExpression.ROWBLOCKCOUNTPARAM, new IntIdentifier(mo.getNumRowsPerBlock(), source.getFilename(), blp, bcp, elp, ecp));
						((DataExpression) source).addVarParam(DataExpression.COLUMNBLOCKCOUNTPARAM, new IntIdentifier(mo.getNumColumnsPerBlock(), source.getFilename(), blp, bcp, elp, ecp));
						((DataExpression) source).addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier(DataExpression.FORMAT_TYPE_VALUE_BINARY, source.getFilename(), blp, bcp, elp, ecp));
					}
					else {
						throw new LanguageException("Unsupported format through MLContext");
					}
				}
			} catch (DMLRuntimeException e) {
				throw new LanguageException(e);
			}
		}
	}
	
	/**
	 * Used internally
	 * @param tmp
	 * @return
	 */
	ArrayList performCleanupAfterRecompilation(ArrayList tmp) {
		String [] outputs = null;
		if(_outVarnames != null) {
			outputs = _outVarnames.toArray(new String[0]);
		}
		else {
			outputs = new String[0];
		}
		
		// No need to clean up entire program as this method is only called for last level program block
//		JMLCUtils.cleanupRuntimeProgram(_rtprog, outputs);
		
		for( int i=0; i version2) {
			        return 1;
			    }
			}
	
			if(s1.hasNextInt()) return 1;
		}
		finally {
			if(s1 != null) s1.close();
			if(s2 != null) s2.close();
		}
		
		return 0;
	}
	
	private void initializeSpark(SparkContext sc, boolean monitorPerformance, boolean setForcedSparkExecType) throws DMLRuntimeException {
		MLContextProxy.setActive(true);
		
		this._sc = sc;
		
		if(compareVersion(sc.version(), "1.3.0")  < 0 ) {
			throw new DMLRuntimeException("Expected spark version >= 1.3.0 for running SystemML");
		}
		
		if(setForcedSparkExecType)
			DMLScript.rtplatform = RUNTIME_PLATFORM.SPARK;
		else
			DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
		
		if(monitorPerformance) {
			initializeSparkListener(sc);
		}
	}
	
	private void initializeSparkListener(SparkContext sc) throws DMLRuntimeException {
		if(compareVersion(sc.version(), "1.4.0")  < 0 ) {
			throw new DMLRuntimeException("Expected spark version >= 1.4.0 for monitoring MLContext performance");
		}
		SparkListener sparkListener = new SparkListener(sc);
		_monitorUtils = new SparkMonitoringUtil(sparkListener);
		sc.addSparkListener(sparkListener);
	}
	
	/**
	 * Experimental API. Not supported in Python MLContext API.
	 * @param dmlScript
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLOutput executeScript(String dmlScript) throws IOException, DMLException, ParseException {
		return compileAndExecuteScript(dmlScript, null, false, false, false, null);
	}
	
	public MLOutput executeScript(String dmlScript, String configFilePath) throws IOException, DMLException, ParseException {
		return compileAndExecuteScript(dmlScript, null, false, false, false, configFilePath);
	}
	
	private void checkIfRegisteringInputAllowed() throws DMLRuntimeException {
		if(!(DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)) {
			throw new DMLRuntimeException("ERROR: registerInput is only allowed for spark execution mode");
		}
	}
	
	private MLOutput compileAndExecuteScript(String dmlScriptFilePath, String [] args, boolean isNamedArgument, boolean isPyDML, String configFilePath) throws IOException, DMLException, ParseException {
		return compileAndExecuteScript(dmlScriptFilePath, args, true, isNamedArgument, isPyDML, configFilePath);
	}
	
	/**
	 * All the execute() methods call this, which  after setting appropriate input/output variables
	 * calls _compileAndExecuteScript
	 * We have explicitly synchronized this function because MLContext/SystemML does not yet support multi-threading.
	 * @param dmlScriptFilePath
	 * @param args
	 * @param isNamedArgument
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	private synchronized MLOutput compileAndExecuteScript(String dmlScriptFilePath, String [] args,  boolean isFile, boolean isNamedArgument, boolean isPyDML, String configFilePath) throws IOException, DMLException, ParseException {
		try {
			if(getActiveMLContext() != null) {
				throw new DMLRuntimeException("SystemML (and hence by definition MLContext) doesnot support parallel execute() calls from same or different MLContexts. "
						+ "As a temporary fix, please do explicit synchronization, i.e. synchronized(MLContext.class) { ml.execute(...) } ");
			}
			
			// Set active MLContext.
			_activeMLContext = this;
			
			// Setup parser parameters
			// TODO In the process of hardening mlcontext, we should also reinvestigate if we
			// could be more restrictive and require known dimensions (rm REJECT_READ_WRITE_UNKNOWNS).  
			AParserWrapper.IGNORE_UNSPECIFIED_ARGS = true;
			DataExpression.REJECT_READ_WRITE_UNKNOWNS = false;
			
			if(_monitorUtils != null) {
				_monitorUtils.resetMonitoringData();
			}
			
			if(DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || DMLScript.rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK) {
				
				HashMap> retVal = null;
				
				// Depending on whether registerInput/registerOutput was called initialize the variables 
				String[] inputs = null; String[] outputs = null;
				if(_inVarnames != null) {
					inputs = _inVarnames.toArray(new String[0]);
				}
				else {
					inputs = new String[0];
				}
				if(_outVarnames != null) {
					outputs = _outVarnames.toArray(new String[0]);
				}
				else {
					outputs = new String[0];
				}
				HashMap outMetadata = new HashMap();
				
				HashMap argVals = DMLScript.createArgumentsMap(isNamedArgument, args);
				
				// Run the DML script
				ExecutionContext ec = executeUsingSimplifiedCompilationChain(dmlScriptFilePath, isFile, argVals, isPyDML, inputs, outputs, _variables, configFilePath);
				
				// Now collect the output
				if(_outVarnames != null) {
					if(_variables == null) {
						throw new DMLRuntimeException("The symbol table returned after executing the script is empty");
					}
					
					for( String ovar : _outVarnames ) {
						if( _variables.keySet().contains(ovar) ) {
							if(retVal == null) {
								retVal = new HashMap>();
							}
							retVal.put(ovar, ((SparkExecutionContext) ec).getBinaryBlockRDDHandleForVariable(ovar));
							outMetadata.put(ovar, ((SparkExecutionContext) ec).getMatrixCharacteristics(ovar)); // For converting output to dataframe
						}
						else {
							throw new DMLException("Error: The variable " + ovar + " is not available as output after the execution of the DMLScript.");
						}
					}
				}
				
				return new MLOutput(retVal, outMetadata);
			}
			else {
				throw new DMLRuntimeException("Unsupported runtime:" + DMLScript.rtplatform.name());
			}
		
		}
		finally {
			// Reset active MLContext.
			_activeMLContext = null;
			
			// Reset parser parameters
			AParserWrapper.IGNORE_UNSPECIFIED_ARGS = false;
			DataExpression.REJECT_READ_WRITE_UNKNOWNS = true;			
		}
	}
	
	
	/**
	 * This runs the DML script and returns the ExecutionContext for the caller to extract the output variables.
	 * The caller (which is compileAndExecuteScript) is expected to set inputSymbolTable with appropriate matrix representation (RDD, MatrixObject).
	 * 
	 * @param dmlScriptFilePath
	 * @param args
	 * @param isNamedArgument
	 * @param parsePyDML
	 * @param inputs
	 * @param outputs
	 * @param inputSymbolTable
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	private ExecutionContext executeUsingSimplifiedCompilationChain(String dmlScriptFilePath, boolean isFile, HashMap argVals, boolean parsePyDML, 
			String[] inputs, String[] outputs, LocalVariableMap inputSymbolTable, String configFilePath) throws IOException, DMLException, ParseException {
		DMLConfig config = null;
		if(configFilePath == null) {
			config = new DMLConfig();
		}
		else {
			config = new DMLConfig(configFilePath);
		}
		
		for(Entry param : _additionalConfigs.entrySet()) {
			config.setTextValue(param.getKey(), param.getValue());
		}
		
		ConfigurationManager.setConfig(config);
		
		String dmlScriptStr = null;
		if(isFile)
			dmlScriptStr = DMLScript.readDMLScript("-f", dmlScriptFilePath);
		else 
			dmlScriptStr = DMLScript.readDMLScript("-s", dmlScriptFilePath);
			
		if(_monitorUtils != null) {
			_monitorUtils.setDMLString(dmlScriptStr);
		}
		
		//simplified compilation chain
		_rtprog = null;
		
		//parsing
		AParserWrapper parser = AParserWrapper.createParser(parsePyDML);
		DMLProgram prog = parser.parse(dmlScriptFilePath, dmlScriptStr, argVals);
		
		//language validate
		DMLTranslator dmlt = new DMLTranslator(prog);
		dmlt.liveVariableAnalysis(prog);			
		dmlt.validateParseTree(prog);
		
		//hop construct/rewrite
		dmlt.constructHops(prog);
		dmlt.rewriteHopsDAG(prog);
		
		Explain.explain(prog);
		
		//rewrite persistent reads/writes
		if(inputSymbolTable != null) {
			RewriteRemovePersistentReadWrite rewrite = new RewriteRemovePersistentReadWrite(inputs, outputs);
			ProgramRewriter rewriter2 = new ProgramRewriter(rewrite);
			rewriter2.rewriteProgramHopDAGs(prog);
		}
		
		//lop construct and runtime prog generation
		dmlt.constructLops(prog);
		_rtprog = prog.getRuntimeProgram(config);
		
		//optional global data flow optimization
		if(OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY) ) {
			_rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, _rtprog);
		}
		
		// launch SystemML appmaster not required as it is already launched
		
		//count number compiled MR jobs / SP instructions	
		ExplainCounts counts = Explain.countDistributedOperations(_rtprog);
		Statistics.resetNoOfCompiledJobs( counts.numJobs );
		
		// Initialize caching and scratch space
		DMLScript.initHadoopExecution(config);
		
		//final cleanup runtime prog
		JMLCUtils.cleanupRuntimeProgram(_rtprog, outputs);
				
		//create and populate execution context
		ExecutionContext ec = ExecutionContextFactory.createContext(_rtprog);
		if(inputSymbolTable != null) {
			ec.setVariables(inputSymbolTable);
		}
		
		//core execute runtime program	
		_rtprog.execute( ec );
		
		if(_monitorUtils != null)
			_monitorUtils.setExplainOutput(Explain.explain(_rtprog));
		
		return ec;
	}
	
	// -------------------------------- Private methods ends ----------------------------------------------------------
	
	// TODO: Add additional create to provide sep, missing values, etc. for CSV
	/**
	 * Experimental API: Might be discontinued in future release
	 * @param sqlContext
	 * @param filePath
	 * @param format
	 * @return
	 * @throws IOException
	 * @throws DMLException
	 * @throws ParseException
	 */
	public MLMatrix read(SQLContext sqlContext, String filePath, String format) throws IOException, DMLException, ParseException {
		this.reset();
		this.registerOutput("output");
		MLOutput out = this.executeScript("output = read(\"" + filePath + "\", format=\"" + format + "\"); " + MLMatrix.writeStmt);
		JavaPairRDD blocks = out.getBinaryBlockedRDD("output");
		MatrixCharacteristics mcOut = out.getMatrixCharacteristics("output");
		return MLMatrix.createMLMatrix(this, sqlContext, blocks, mcOut);
	}
	
//	// TODO: Test this in different scenarios: sparse/dense/mixed
//	/**
//	 * Experimental unstable API: Might be discontinued in future release
//	 * @param ml
//	 * @param sqlContext
//	 * @param mllibMatrix
//	 * @return
//	 * @throws DMLRuntimeException
//	 */
//	public MLMatrix read(SQLContext sqlContext, BlockMatrix mllibMatrix) throws DMLRuntimeException {
//		long nnz = -1; // TODO: Find number of non-zeros from mllibMatrix ... This is important !!
//		
//		JavaPairRDD, Matrix> mllibBlocks = JavaPairRDD.fromJavaRDD(mllibMatrix.blocks().toJavaRDD());
//		long rlen = mllibMatrix.numRows(); long clen = mllibMatrix.numCols();
//		int brlen = mllibMatrix.numRowBlocks();
//		int bclen = mllibMatrix.numColBlocks();
//		if(mllibMatrix.numRowBlocks() != DMLTranslator.DMLBlockSize && mllibMatrix.numColBlocks() != DMLTranslator.DMLBlockSize) {
//			System.err.println("WARNING: Since the block size of mllib matrix is not " + DMLTranslator.DMLBlockSize + ", it may cause "
//					+ "reblocks");
//		}
//		
//		JavaPairRDD blocks = mllibBlocks
//				.mapToPair(new ConvertMLLibBlocksToBinaryBlocks(rlen, clen, brlen, bclen));
//		
//		MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, brlen, bclen, nnz);
//		return MLMatrix.createMLMatrix(this, sqlContext, blocks, mc);
//	}
	
}