org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Declarative Machine Learning
There is a newer version: 1.2.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.controlprogram.parfor.stat;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;

/**
 * Central place for analyzing and obtaining static infrastructure properties
 * such as memory and number of logical processors.
 * 
 * 
 */
public class InfrastructureAnalyzer 
{
	
	public static final long DEFAULT_JVM_SIZE = 512 * 1024 * 1024;
	
	//static local master node properties
	private static int  _localPar        = -1;
	private static long _localJVMMaxMem  = -1;
	private static boolean _isLtJDK8 = false;
	
	//static hadoop cluster properties
	private static int  _remotePar       = -1;
	private static int  _remoteParMap    = -1;
	private static int  _remoteParReduce = -1;
	private static long _remoteJVMMaxMemMap    = -1;
	private static long _remoteJVMMaxMemReduce = -1;
	private static long _remoteMRSortMem = -1;
	private static boolean _localJT      = false;
	private static long _blocksize       = -1;
	private static boolean _yarnEnabled  = false;
	
	//static initialization, called for each JVM (on each node)
	static 
	{
		//analyze local node properties
		analyzeLocalMachine();
		
		//analyze remote Hadoop cluster properties
		//analyzeHadoopCluster(); //note: due to overhead - analyze on-demand
	}
	
	/**
	 * 
	 * @return
	 */
	public static boolean isJavaVersionLessThanJDK8()
	{
		return _isLtJDK8;
	}
	
	///////
	//methods for obtaining parallelism properties
	
	/**
	 * Gets the number of logical processors of the current node,
	 * including hyper-threading if enabled.
	 * 
	 * @return
	 */
	public static int getLocalParallelism()
	{
		return _localPar;
	}	
	
	/**
	 * Gets the number of cluster nodes (number of tasktrackers). If multiple tasktracker
	 * are started per node, each tasktracker is viewed as individual node.
	 * 
	 * @return
	 */
	public static int getRemoteParallelNodes() 
	{
		if( _remotePar == -1 )
			analyzeHadoopCluster();
		
		return _remotePar;
	}	
	
	/**
	 * Gets the total number of available map slots.
	 * 
	 * @return
	 */
	public static int getRemoteParallelMapTasks()
	{
		if( _remoteParMap == -1 )
			analyzeHadoopCluster();
		
		return _remoteParMap;
	}
	
	/**
	 * 
	 * @param pmap
	 */
	public static void setRemoteParallelMapTasks(int pmap)
	{
		_remoteParMap = pmap;
	}
	
	/**
	 * Gets the total number of available reduce slots.
	 * 
	 * @return
	 */
	public static int getRemoteParallelReduceTasks()
	{
		if( _remoteParReduce == -1 )
			analyzeHadoopCluster();
		
		return _remoteParReduce;
	}
	
	/**
	 * 
	 * @param preduce
	 */
	public static void setRemoteParallelReduceTasks(int preduce)
	{
		_remoteParReduce = preduce;
	}
	
	/**
	 * Gets the totals number of available map and reduce slots.
	 * 
	 * @return
	 */
	public static int getRemoteParallelTasks()
	{
		if( _remoteParMap == -1 )
			analyzeHadoopCluster();
		
		return _remoteParMap + _remoteParReduce;
	}
	
	
	///////
	//methods for obtaining memory properties
	
	/**
	 * Gets the maximum memory [in bytes] of the current JVM.
	 * 
	 * @return
	 */
	public static long getLocalMaxMemory()
	{
		return _localJVMMaxMem;
	}
	
	/**
	 * 
	 * @param localMem
	 */
	public static void setLocalMaxMemory( long localMem )
	{
		_localJVMMaxMem = localMem;
	}
	
	/**
	 * Gets the maximum memory [in bytes] of a hadoop map task JVM.
	 * 
	 * @return
	 */
	public static long getRemoteMaxMemoryMap()
	{
		if( _remoteJVMMaxMemMap == -1 )
			analyzeHadoopConfiguration();
		
		return _remoteJVMMaxMemMap;
	}
	
	/**
	 * 
	 * @param remoteMem
	 */
	public static void setRemoteMaxMemoryMap( long remoteMem )
	{
		_remoteJVMMaxMemMap = remoteMem;
	}
	
	/**
	 * Gets the maximum memory [in bytes] of a hadoop reduce task JVM.
	 * 
	 * @return
	 */
	public static long getRemoteMaxMemoryReduce()
	{
		if( _remoteJVMMaxMemReduce == -1 )
			analyzeHadoopConfiguration();
		
		return _remoteJVMMaxMemReduce;
	}
	
	/**
	 * 
	 * @param remoteMem
	 */
	public static void setRemoteMaxMemoryReduce( long remoteMem )
	{
		_remoteJVMMaxMemReduce = remoteMem;
	}
	
	/**
	 * Gets the maximum memory requirement [in bytes] of a given hadoop job.
	 * 
	 * @param conf
	 * @return
	 */
	public static long getRemoteMaxMemory( JobConf job )
	{
		return (1024*1024) * Math.max(
				               job.getMemoryForMapTask(),
				               job.getMemoryForReduceTask() );			
	}
	
	/**
	 * Gets the maximum sort buffer memory requirement [in bytes] of a hadoop task.
	 * 
	 * @return
	 */
	public static long getRemoteMaxMemorySortBuffer( )
	{
		if( _remoteMRSortMem == -1 )
			analyzeHadoopConfiguration();
		
		return _remoteMRSortMem;		
	}
	
	public static boolean isLocalMode()
	{
		if( _remoteJVMMaxMemMap == -1 )
			analyzeHadoopConfiguration();
		
		return _localJT;		
	}
	
	public static boolean isLocalMode(JobConf job)
	{
		// Due to a bug in HDP related to fetching the "mode" of execution within mappers,
		// we explicitly probe the relevant properties instead of relying on results from 
		// analyzeHadoopCluster().
		String jobTracker = job.get("mapred.job.tracker", "local");
		String framework = job.get("mapreduce.framework.name", "local");
		boolean isYarnEnabled = (framework!=null && framework.equals("yarn"));
		
		return ("local".equals(jobTracker) & !isYarnEnabled);
	}
	
	///////
	//methods for obtaining constraints or respective defaults
	
	/**
	 * Gets the maximum local parallelism constraint.
	 * 
	 * @return
	 */
	public static int getCkMaxCP() 
	{
		//default value (if not specified)
		return getLocalParallelism();
	}

	/**
	 * Gets the maximum remote parallelism constraint
	 * 
	 * @return
	 */
	public static int getCkMaxMR() 
	{
		//default value (if not specified)
		return getRemoteParallelMapTasks();
	}

	/**
	 * Gets the maximum memory constraint [in bytes].
	 * 
	 * @return
	 */
	public static long getCmMax() 
	{
		//default value (if not specified)
		return Math.min( getLocalMaxMemory(), getRemoteMaxMemoryMap() );
	}

	/**
	 * Gets the HDFS blocksize of the used cluster in bytes.
	 * 
	 * @return
	 */
	public static long getHDFSBlockSize()
	{
		if( _blocksize == -1 )
			analyzeHadoopConfiguration();
		
		return _blocksize;		
	}
	

	/**
	 * 
	 * @return
	 */
	public static boolean isYarnEnabled()
	{
		if( _remoteJVMMaxMemMap == -1 )
			analyzeHadoopConfiguration();
		
		return _yarnEnabled;
	}
	
	
	/**
	 * 
	 * @param javaOpts
	 * @return
	 */
	public static long extractMaxMemoryOpt(String javaOpts)
	{
		long ret = -1; //mem in bytes
		
		try
		{
			StringTokenizer st = new StringTokenizer( javaOpts, " " );
			while( st.hasMoreTokens() )
			{
				String arg = st.nextToken();
				if( !arg.startsWith("-Xmx") ) //search for max mem
					continue;
				
				arg = arg.substring(4); //cut off "-Xmx"
				//parse number and unit
				if ( arg.endsWith("g") || arg.endsWith("G") )
					ret = Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024 * 1024;
				else if ( arg.endsWith("m") || arg.endsWith("M") )
					ret = Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024;
				else if( arg.endsWith("k") || arg.endsWith("K") )
					ret = Long.parseLong(arg.substring(0,arg.length()-1)) * 1024;
				else 
					ret = Long.parseLong(arg.substring(0,arg.length()-2));
			}
			
			if( ret < 0 ) // no argument found
			{
				ret = DEFAULT_JVM_SIZE;
			}
		}
		catch(Exception ex)
		{
			//if anything breaks during parsing (e.g., because args not specified correctly)
			ret = DEFAULT_JVM_SIZE;
		}
		
		return ret;
	}
	
	/**
	 * 
	 * @param job
	 * @param key
	 * @param bytes
	 */
	public static void setMaxMemoryOpt(JobConf job, String key, long bytes)
	{
		String javaOptsOld = job.get( key );
		String javaOptsNew = null;

		//StringTokenizer st = new StringTokenizer( javaOptsOld, " " );
		String[] tokens = javaOptsOld.split(" "); //account also for no ' '
		StringBuilder sb = new StringBuilder();
		for( String arg : tokens )
		{
			if( arg.startsWith("-Xmx") ) //search for max mem
			{
				sb.append("-Xmx");
				sb.append( (bytes/(1024*1024)) );
				sb.append("M");
			}
			else
				sb.append(arg);
			
			sb.append(" ");
		}
		javaOptsNew = sb.toString().trim();		
		job.set(key, javaOptsNew);
	}
	
	/**
	 * Gets the fraction of running map/reduce tasks to existing
	 * map/reduce task slots. 
	 * 
	 * NOTE: on YARN the number of slots is a spurious indicator 
	 * because containers are purely scheduled based on memory. 
	 * 
	 * @return
	 * @throws IOException
	 */
	public static double getClusterUtilization(boolean mapOnly) 
		throws IOException
	{
		//in local mode, the cluster utilization is always 0.0 
		
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		
		double ret = 0.0;
		if( stat != null ) //if in cluster mode
		{	
			if( mapOnly )
			{
				int capacity = stat.getMaxMapTasks();
				int current = stat.getMapTasks();
				ret = ((double)current) / capacity;
			}
			else
			{
				int capacity = stat.getMaxMapTasks() + stat.getMaxReduceTasks();
				int current = stat.getMapTasks() + stat.getReduceTasks();
				ret = ((double)current) / capacity;
			}
		}
		
		return ret;
	}
	
	///////
	//internal methods for analysis
		
	/**
	 * Analyzes properties of local machine and JVM.
	 */
	private static void analyzeLocalMachine()
	{
		//step 1: basic parallelism and memory
		_localPar       = Runtime.getRuntime().availableProcessors();
		_localJVMMaxMem = Runtime.getRuntime().maxMemory();
		
		//step 2: analyze if used jdk older than jdk8
		String version = System.getProperty("java.version");
		
		//parse jre version
		int ix1 = version.indexOf('.');
		int ix2 = version.indexOf('.', ix1+1);
		int versionp1 = Integer.parseInt(version.substring(0, ix1));
		int versionp2 = Integer.parseInt(version.substring(ix1+1, ix2));
		
		//check for jdk version less than 8 (and raise warning if multi-threaded)
		_isLtJDK8 = (versionp1 == 1 && versionp2 < 8);
	}
	
	/**
	 * Analyzes properties of hadoop cluster and configuration.
	 */
	private static void analyzeHadoopCluster()
	{
		try 
		{
			JobConf job = ConfigurationManager.getCachedJobConf();
			JobClient client = new JobClient(job);
			ClusterStatus stat = client.getClusterStatus();
			if( stat != null ) //if in cluster mode
			{
				//analyze cluster status
				_remotePar = stat.getTaskTrackers();
				_remoteParMap = stat.getMaxMapTasks(); 
				_remoteParReduce = stat.getMaxReduceTasks(); 
				
				//analyze pure configuration properties
				analyzeHadoopConfiguration();
			}		
		} 
		catch (IOException e) 
		{
			throw new RuntimeException("Unable to analyze infrastructure.",e);
		}
	}
	
	/**
	 * Analyzes only properties of hadoop configuration in order to prevent 
	 * expensive call to cluster status .
	 */
	private static void analyzeHadoopConfiguration()
	{
		JobConf job = ConfigurationManager.getCachedJobConf();
		
		_remoteMRSortMem = (1024*1024) * job.getLong("io.sort.mb",100); //1MB
			
		//handle jvm max mem (map mem budget is relevant for map-side distcache and parfor)
		//(for robustness we probe both: child and map configuration parameters)
		String javaOpts1 = job.get("mapred.child.java.opts"); //internally mapred/mapreduce synonym
		String javaOpts2 = job.get("mapreduce.map.java.opts", null); //internally mapred/mapreduce synonym
		String javaOpts3 = job.get("mapreduce.reduce.java.opts", null); //internally mapred/mapreduce synonym
		if( javaOpts2 != null ) //specific value overrides generic
			_remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts2); 
		else
			_remoteJVMMaxMemMap = extractMaxMemoryOpt(javaOpts1);
		if( javaOpts3 != null ) //specific value overrides generic
			_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts3); 
		else
			_remoteJVMMaxMemReduce = extractMaxMemoryOpt(javaOpts1);
		
		//HDFS blocksize
		String blocksize = job.get(MRConfigurationNames.DFS_BLOCK_SIZE, "134217728");
		_blocksize = Long.parseLong(blocksize);
		
		//is yarn enabled
		String framework = job.get("mapreduce.framework.name");
		_yarnEnabled = (framework!=null && framework.equals("yarn"));
		
		//analyze if local mode (internally requires yarn_enabled)
		_localJT = analyzeLocalMode(job);		
	}
	
	/**
	 * 
	 * @param job
	 * @return
	 */
	private static boolean analyzeLocalMode(JobConf job)
	{
		//analyze if local mode (if yarn enabled, we always assume cluster mode
		//in order to workaround configuration issues on >=Hadoop 2.6)
		String jobTracker = job.get("mapred.job.tracker", "local");
		return "local".equals(jobTracker)
			   & !isYarnEnabled();
	}
}