All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.yarn.DMLYarnClient Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.yarn;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;

import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLScriptException;
import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
import org.apache.sysml.runtime.util.MapReduceTool;

/**
 * NOTES:
 *   * Security: By default, submitted applications are ran as user 'yarn'. 
 *     In order to allow for security and relative filenames on hdfs (/user//.), 
 *     we can configure the LinuxContainerExecutor in yarn-site.xml, which runs the
 *     application as the user who submits the application.
 *   * SystemML.jar file dependency: We need to submit the SystemML.jar along with the
 *     application. Unfortunately, hadoop jar unpacks the jar such that we dont get a handle
 *     to the original jar filename. We currently parse the constant IBM_JAVA_COMMAND_LINE
 *     to get the jar filename. For robustness, we fall back to repackaging the unzipped files
 *     to a jar if this constant does not exist.  
 * 
 */
public class DMLYarnClient 
{
		
	private static final Log LOG = LogFactory.getLog(DMLYarnClient.class);
	
	//Internal configuration parameters
	// environment variable to obtain the original jar filename
	public static final String JARFILE_ENV_CONST = "IBM_JAVA_COMMAND_LINE"; 
	// environment variable to obtain default jvm arguments
	public static final String JVMOPTS_ENV_CONST = "HADOOP_OPTS";
	// environment variable to obtain mapred home (for robustness only)
	public static final String MAPRED_HOME_ENV_CONST = "HADOOP_MAPRED_HOME";
	public static final String HADOOP_HOME_ENV_CONST = "HADOOP_HOME";
	// default of 1 core since YARN scheduler does not take the number of cores into account yet 
	public static final int NUM_CORES = 1;  
	// factor for compute virtual memory to request based on given max heap size
	// (if the absolute overhead is larger than a threshold, we use this threshold as a constant overhead)
	public static final double MEM_FACTOR = 1.5; 
	public static final int MAX_MEM_OVERHEAD = 2*1024; //2GB
	// default application state report (in milliseconds)
	public static final int APP_STATE_INTERVAL = 200;
	// default application master name
	public static final String APPMASTER_NAME = "SystemML-AM";
	// default dml script file name for hdfs script serialization
	public static final String DML_SCRIPT_NAME = "script.dml";
	// default dml config file name for hdfs config serialization
	public static final String DML_CONFIG_NAME = "config.xml";
	// default SystemML jar file name for hdfs jar copy
	public static final String DML_JAR_NAME = "SystemML.jar";
	// default dml stop message file name for hdfs message serialization
	public static final String DML_STOPMSG_NAME = "stop_msg.txt";
	
	
	private String _dmlScript = null;
	private DMLConfig _dmlConfig = null;
	private String[] _args = null; 
	
	//hdfs file names local resources
	private String _hdfsJarFile   = null;
	private String _hdfsDMLScript = null;
	private String _hdfsDMLConfig = null;	
	
	/**
	 * Protected since only supposed to be accessed via proxy in same package.
	 * This is to ensure robustness in case of missing yarn libraries.
	 * 
	 * @param dmlScriptStr
	 * @param conf
	 * @param args
	 */
	protected DMLYarnClient(String dmlScriptStr, DMLConfig conf, String[] args)
	{
		_dmlScript = dmlScriptStr;
		_dmlConfig = conf;
		_args = args;
	}
	
	
	/**
	 * Method to launch the dml yarn app master and execute the given dml script
	 * with the given configuration and jar file.
	 * 
	 * NOTE: on launching the yarn app master, we do not explicitly probe if we
	 *	  are running on a yarn or MR1 cluster. In case of MR1, already the class 
	 *	  YarnConfiguration will not be found and raise a classnotfound. In case of any 
	 *	  exception we fall back to run CP directly in the client process.
	 * 
	 * @return true if dml program successfully executed as yarn app master
	 * @throws IOException 
	 */
	protected boolean launchDMLYarnAppmaster() 
		throws IOException, DMLScriptException
	{
		boolean ret = false;
		String hdfsWD = null;
		
		try
		{
			Timing time = new Timing(true);
			
			// load yarn configuration
			YarnConfiguration yconf = new YarnConfiguration();
			
			// create yarn client
			YarnClient yarnClient = YarnClient.createYarnClient();
			yarnClient.init(yconf);
			yarnClient.start();
			
			// create application and get the ApplicationID
			YarnClientApplication app = yarnClient.createApplication();
			ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
			ApplicationId appId = appContext.getApplicationId();
			LOG.debug("Created application (applicationID: "+appId+")");
			
			// prepare hdfs working directory via ApplicationID
			// copy script, config, jar file to hdfs
			hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(_dmlConfig, appId);
			copyResourcesToHdfsWorkingDir(yconf, hdfsWD);
			
			//construct command line argument
			String command = constructAMCommand(_args, _dmlConfig);
			LOG.debug("Constructed application master command: \n"+command);
			
			// set up the container launch context for the application master
			ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
			amContainer.setCommands( Collections.singletonList(command) );
			amContainer.setLocalResources( constructLocalResourceMap(yconf) );
			amContainer.setEnvironment( constructEnvionmentMap(yconf) );

			// Set up resource type requirements for ApplicationMaster
			int memHeap = _dmlConfig.getIntValue(DMLConfig.YARN_APPMASTERMEM);
			int memAlloc = (int) computeMemoryAllocation(memHeap);
			Resource capability = Records.newRecord(Resource.class);
			capability.setMemory( memAlloc );
			capability.setVirtualCores( NUM_CORES );
			LOG.debug("Requested application resources: memory="+memAlloc+", vcores="+NUM_CORES);

			// Finally, set-up ApplicationSubmissionContext for the application
			String qname = _dmlConfig.getTextValue(DMLConfig.YARN_APPQUEUE);
			appContext.setApplicationName(APPMASTER_NAME); // application name
			appContext.setAMContainerSpec(amContainer);
			appContext.setResource(capability);
			appContext.setQueue(qname); // queue
			LOG.debug("Configured application meta data: name="+APPMASTER_NAME+", queue="+qname);
			
			// submit application (non-blocking)
			yarnClient.submitApplication(appContext);

			// Check application status periodically (and output web ui address)
			ApplicationReport appReport = yarnClient.getApplicationReport(appId);
			LOG.info("Application tracking-URL: "+appReport.getTrackingUrl());
			YarnApplicationState appState = appReport.getYarnApplicationState();
			YarnApplicationState oldState = appState;
			LOG.info("Application state: " + appState);
			while( appState != YarnApplicationState.FINISHED
					&& appState != YarnApplicationState.KILLED
					&& appState != YarnApplicationState.FAILED ) 
			{
				Thread.sleep(APP_STATE_INTERVAL); //wait for 200ms
				appReport = yarnClient.getApplicationReport(appId);
				appState = appReport.getYarnApplicationState();
				if( appState != oldState ) {
					oldState = appState;
					LOG.info("Application state: " + appState);
				}
			}
			//check final status (failed or succeeded)
			FinalApplicationStatus finalState = appReport.getFinalApplicationStatus();
			LOG.info("Application final status: " + finalState);
			
			//show application and total runtime
			double appRuntime = (double)(appReport.getFinishTime() - appReport.getStartTime()) / 1000;
			LOG.info( "Application runtime: " + appRuntime + " sec." );
			LOG.info( "Total runtime: " + String.format("%.3f", time.stop()/1000) + " sec.");
			
			//raised script-level error in case of failed final status
			if( finalState != FinalApplicationStatus.SUCCEEDED )
			{
				//propagate script-level stop call message
				String stop_msg = readMessageToHDFSWorkingDir(_dmlConfig, yconf, appId);
				if( stop_msg != null ) 
					throw new DMLScriptException(stop_msg);
				
				//generic failure message
				throw new DMLRuntimeException("DML yarn app master finished with final status: "+finalState+".");
			}
			
			ret = true;
		}
		catch(DMLScriptException ex) {
			//rethrow DMLScriptException to propagate stop call
			throw ex;
		}
		catch(Exception ex) {
			LOG.error("Failed to run DML yarn app master.", ex);
			ret = false;
		}
		finally
		{
			//cleanup working directory
			if( hdfsWD != null )
				MapReduceTool.deleteFileIfExistOnHDFS(hdfsWD);
		}
		
		return ret;
	}
	
	/**
	 * 	
	 * @param appId
	 * @throws ParseException
	 * @throws IOException
	 * @throws DMLRuntimeException
	 * @throws InterruptedException 
	 */
	@SuppressWarnings("deprecation")
	private void copyResourcesToHdfsWorkingDir( YarnConfiguration yconf, String hdfsWD ) 
		throws ParseException, IOException, DMLRuntimeException, InterruptedException 
	{
		FileSystem fs = FileSystem.get(yconf);
		
		//create working directory
		MapReduceTool.createDirIfNotExistOnHDFS(hdfsWD, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
		
		//serialize the dml config to HDFS file 
		//NOTE: we do not modify and ship the absolute scratch space path of the current user
		//because this might result in permission issues if the app master is run with a different user
		//(runtime plan migration during resource reoptimizations now needs to use qualified names
		//for shipping/reading intermediates) TODO modify resource reoptimizer on prototype integration.
		Path confPath = new Path(hdfsWD, DML_CONFIG_NAME);
		FSDataOutputStream fout = fs.create(confPath, true);
		//_dmlConfig.makeQualifiedScratchSpacePath(); 
		fout.writeBytes(_dmlConfig.serializeDMLConfig() + "\n");
		fout.close();
		_hdfsDMLConfig = confPath.makeQualified(fs).toString();
		LOG.debug("DML config written to HDFS file: "+_hdfsDMLConfig+"");

		//serialize the dml script to HDFS file
		Path scriptPath = new Path(hdfsWD, DML_SCRIPT_NAME);
		FSDataOutputStream fout2 = fs.create(scriptPath, true);
		fout2.writeBytes(_dmlScript);
		fout2.close();
		_hdfsDMLScript = scriptPath.makeQualified(fs).toString();
		LOG.debug("DML script written to HDFS file: "+_hdfsDMLScript+"");
		
		// copy local jar file to HDFS (try to get the original jar filename)
		String fname = getLocalJarFileNameFromEnvConst();
		if( fname == null ){
			//get location of unpacked jar classes and repackage (if required)
			String lclassFile = DMLYarnClient.class.getProtectionDomain().getCodeSource().getLocation().getPath().toString();
			File flclassFile = new File( lclassFile );
			if( !flclassFile.isDirectory() ) //called w/ jar 
				fname = lclassFile;
			else //called w/ unpacked jar (need to be repackaged)	
				fname = createJar(lclassFile);
		}
		Path srcPath = new Path(fname);
		Path dstPath = new Path(hdfsWD, srcPath.getName());
		FileUtil.copy(FileSystem.getLocal(yconf), srcPath, fs, dstPath, false, true, yconf);
		_hdfsJarFile = dstPath.makeQualified(fs).toString();	
		LOG.debug("Jar file copied from local file: "+srcPath.toString()+" to HDFS file: "+dstPath.toString());
	}
	
	/**
	 * 
	 * @return null if the constant does not exists
	 */
	private String getLocalJarFileNameFromEnvConst()
	{
		String fname = null;
		
		try
		{
			//parse environment constants
			Map env = System.getenv();
			if( env.containsKey(JARFILE_ENV_CONST) ){
				String tmp = env.get(JARFILE_ENV_CONST);
				String[] tmpargs = tmp.split(" ");
				for( int i=0; i0 && _args[i-1].equals("-f") ){
				command.append(_hdfsDMLScript);
				command.append(" -config=" + _hdfsDMLConfig);
			}
			else if( _args[i].startsWith("-config") ){
				//ignore because config added with -f
			}
			else	
				command.append(arg);
		}
	
		//setup stdout and stderr logs 
		command.append(" 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
		command.append(" 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
	
		return command.toString();
	}
	
	/**
	 * 
	 * @param yconf
	 * @param path
	 * @param lpath
	 * @return
	 * @throws IOException
	 */
	private Map constructLocalResourceMap(YarnConfiguration yconf) 
		throws IOException 
	{
		Map rMap = new HashMap();
		Path path = new Path(_hdfsJarFile); 
		
		LocalResource resource = Records.newRecord(LocalResource.class);
		FileStatus jarStat = FileSystem.get(yconf).getFileStatus(path);
		resource.setResource(ConverterUtils.getYarnUrlFromPath(path));
		resource.setSize(jarStat.getLen());
		resource.setTimestamp(jarStat.getModificationTime());
		resource.setType(LocalResourceType.FILE);
		resource.setVisibility(LocalResourceVisibility.PUBLIC);
		
		rMap.put(DML_JAR_NAME, resource);
		return rMap;
	}
	
	/**
	 * 
	 * @param yconf
	 * @return
	 * @throws IOException
	 */
	private Map constructEnvionmentMap(YarnConfiguration yconf) 
		throws IOException
	{
		Map eMap = new HashMap();
		
		//setup default app master environment
		StringBuilder classpath = new StringBuilder();
		for (String value : yconf.getStrings(
				YarnConfiguration.YARN_APPLICATION_CLASSPATH,
				YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) 
		{
			if( classpath.length() > 0 )
				classpath.append(File.pathSeparator); 
			classpath.append( value.trim() );
		}
		
		//setup mapreduce appmaster environment (for robustness if not included in default environment)
		//for example, by default HDP 2.2 did not include mapred client libraries in this configuration
		//note: we cannot use mapreduce.application.classpath because it refers to HDFS and $PWD that needs to be setup 		
		Map env = System.getenv();
		String mapred_home = null;
		//get mapred home via alternative environment variables
		if( env.containsKey(MAPRED_HOME_ENV_CONST) ) {
			mapred_home = env.get(MAPRED_HOME_ENV_CONST);
		} 
		else if ( env.containsKey(HADOOP_HOME_ENV_CONST) ){
			String tmp = env.get(HADOOP_HOME_ENV_CONST);
			mapred_home = tmp + File.separator + ".." + File.separator + "hadoop-mapreduce";
		}
		//concatenate mapred home libs to classpath
		if( mapred_home != null ) {
			if( classpath.length() > 0 )
				classpath.append( File.pathSeparator ); 
			classpath.append( mapred_home + File.separator + "*" );
			classpath.append( File.pathSeparator ); 
			classpath.append( mapred_home + File.separator + "lib" + File.separator + "*" );
		}
		
		eMap.put(Environment.CLASSPATH.name(), classpath.toString());
		MRApps.setClasspath(eMap, yconf);
		
		LOG.debug("Constructed environment classpath: "+classpath.toString());
		
		return eMap;
	}	
	
	/**
	 * 
	 * @param conf
	 * @param yconf
	 * @param appId
	 * @return
	 */
	private String readMessageToHDFSWorkingDir(DMLConfig conf, YarnConfiguration yconf, ApplicationId appId)
	{
		String ret = null;
		
		//construct working directory (consistent with client)
		String hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(conf, appId);
		Path msgPath = new Path(hdfsWD, DMLYarnClient.DML_STOPMSG_NAME);
		
		//write given message to hdfs
		try {
			FileSystem fs = FileSystem.get(yconf);
			if( fs.exists(msgPath) )
			{
				FSDataInputStream fin = fs.open(msgPath);
				BufferedReader br = new BufferedReader(new InputStreamReader(fin));
				ret = br.readLine();
				fin.close();
				LOG.debug("Stop message read from HDFS file "+msgPath+": "+ret );
			}
		}
		catch(Exception ex) {
			LOG.error("Failed to read stop message from HDFS file: "+msgPath, ex);
		}
		
		return ret;
	}
	
	/**
	 * 
	 * @param heapsize
	 * @return
	 */
	public static long computeMemoryAllocation( long heapsize )
	{
		long ret = heapsize;
		
		if( heapsize * MEM_FACTOR - heapsize < MAX_MEM_OVERHEAD )
			ret = (long) (heapsize * MEM_FACTOR);
		else
			ret = heapsize + MAX_MEM_OVERHEAD;
		
		return ret;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy