eu.stratosphere.client.minicluster.NepheleMiniCluster Maven / Gradle / Ivy
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.client.minicluster;
import java.lang.reflect.Method;
import eu.stratosphere.nephele.instance.HardwareDescriptionFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.nephele.client.JobClient;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobmanager.JobManager;
import eu.stratosphere.nephele.jobmanager.JobManager.ExecutionMode;
public class NepheleMiniCluster {
private static final Log LOG = LogFactory.getLog(NepheleMiniCluster.class);
private static final int DEFAULT_JM_RPC_PORT = 6498;
private static final int DEFAULT_TM_RPC_PORT = 6501;
private static final int DEFAULT_TM_DATA_PORT = 7501;
private static final long DEFAULT_MEMORY_SIZE = -1;
private static final int DEFAULT_NUM_TASK_MANAGER = 1;
private static final boolean DEFAULT_LAZY_MEMORY_ALLOCATION = true;
// --------------------------------------------------------------------------------------------
private final Object startStopLock = new Object();
private int jobManagerRpcPort = DEFAULT_JM_RPC_PORT;
private int taskManagerRpcPort = DEFAULT_TM_RPC_PORT;
private int taskManagerDataPort = DEFAULT_TM_DATA_PORT;
private int numTaskManager = DEFAULT_NUM_TASK_MANAGER;
private long memorySize = DEFAULT_MEMORY_SIZE;
private String configDir;
private String hdfsConfigFile;
private boolean lazyMemoryAllocation = DEFAULT_LAZY_MEMORY_ALLOCATION;
private boolean defaultOverwriteFiles = false;
private boolean defaultAlwaysCreateDirectory = false;
private JobManager jobManager;
// ------------------------------------------------------------------------
// Constructor and feature / properties setup
// ------------------------------------------------------------------------
public int getJobManagerRpcPort() {
return jobManagerRpcPort;
}
public void setJobManagerRpcPort(int jobManagerRpcPort) {
this.jobManagerRpcPort = jobManagerRpcPort;
}
public int getTaskManagerRpcPort() {
return taskManagerRpcPort;
}
public void setTaskManagerRpcPort(int taskManagerRpcPort) {
this.taskManagerRpcPort = taskManagerRpcPort;
}
public int getTaskManagerDataPort() {
return taskManagerDataPort;
}
public void setTaskManagerDataPort(int taskManagerDataPort) {
this.taskManagerDataPort = taskManagerDataPort;
}
public long getMemorySize() {
return memorySize;
}
public void setMemorySize(long memorySize) {
this.memorySize = memorySize;
}
public String getConfigDir() {
return configDir;
}
public void setConfigDir(String configDir) {
this.configDir = configDir;
}
public String getHdfsConfigFile() {
return hdfsConfigFile;
}
public void setHdfsConfigFile(String hdfsConfigFile) {
this.hdfsConfigFile = hdfsConfigFile;
}
public boolean isLazyMemoryAllocation() {
return lazyMemoryAllocation;
}
public void setLazyMemoryAllocation(boolean lazyMemoryAllocation) {
this.lazyMemoryAllocation = lazyMemoryAllocation;
}
public boolean isDefaultOverwriteFiles() {
return defaultOverwriteFiles;
}
public void setDefaultOverwriteFiles(boolean defaultOverwriteFiles) {
this.defaultOverwriteFiles = defaultOverwriteFiles;
}
public boolean isDefaultAlwaysCreateDirectory() {
return defaultAlwaysCreateDirectory;
}
public void setDefaultAlwaysCreateDirectory(boolean defaultAlwaysCreateDirectory) {
this.defaultAlwaysCreateDirectory = defaultAlwaysCreateDirectory;
}
public void setNumTaskManager(int numTaskManager) { this.numTaskManager = numTaskManager; }
public int getNumTaskManager() { return numTaskManager; }
// ------------------------------------------------------------------------
// Life cycle and Job Submission
// ------------------------------------------------------------------------
public JobClient getJobClient(JobGraph jobGraph) throws Exception {
Configuration configuration = jobGraph.getJobConfiguration();
configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
return new JobClient(jobGraph, configuration);
}
public void start() throws Exception {
synchronized (startStopLock) {
// set up the global configuration
if (this.configDir != null) {
GlobalConfiguration.loadConfiguration(configDir);
} else {
Configuration conf = getMiniclusterDefaultConfig(jobManagerRpcPort, taskManagerRpcPort,
taskManagerDataPort, memorySize, hdfsConfigFile, lazyMemoryAllocation, defaultOverwriteFiles,
defaultAlwaysCreateDirectory, numTaskManager);
GlobalConfiguration.includeConfiguration(conf);
}
// force the input/output format classes to load the default values from the configuration.
// we need to do this here, because the format classes may have been initialized before the mini cluster was started
initializeIOFormatClasses();
// before we start the JobManager, we need to make sure that there are no lingering IPC threads from before
// check that all threads are done before we return
Thread[] allThreads = new Thread[Thread.activeCount()];
int numThreads = Thread.enumerate(allThreads);
for (int i = 0; i < numThreads; i++) {
Thread t = allThreads[i];
String name = t.getName();
if (name.startsWith("IPC")) {
t.join();
}
}
// start the job manager
jobManager = new JobManager(ExecutionMode.LOCAL);
waitForJobManagerToBecomeReady(numTaskManager);
}
}
public void stop() throws Exception {
synchronized (this.startStopLock) {
if (jobManager != null) {
jobManager.shutdown();
jobManager = null;
}
}
}
// ------------------------------------------------------------------------
// Network utility methods
// ------------------------------------------------------------------------
private void waitForJobManagerToBecomeReady(int numTaskManagers) throws InterruptedException {
while (jobManager.getNumberOfTaskTrackers() < numTaskManagers) {
Thread.sleep(50);
}
}
private static void initializeIOFormatClasses() {
try {
Method im = FileInputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
im.setAccessible(true);
im.invoke(null);
Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
om.setAccessible(true);
om.invoke(null);
}
catch (Exception e) {
LOG.error("Cannot (re) initialize the globally loaded defaults. Some classes might mot follow the specified default behavior.");
}
}
public static Configuration getMiniclusterDefaultConfig(int jobManagerRpcPort, int taskManagerRpcPort,
int taskManagerDataPort, long memorySize, String hdfsConfigFile, boolean lazyMemory,
boolean defaultOverwriteFiles, boolean defaultAlwaysCreateDirectory, int numTaskManager)
{
final Configuration config = new Configuration();
// addresses and ports
config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
config.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
config.setInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, taskManagerRpcPort);
config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, taskManagerDataPort);
// with the low dop, we can use few RPC handlers
config.setInteger(ConfigConstants.JOB_MANAGER_IPC_HANDLERS_KEY, 2);
config.setBoolean(ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, lazyMemory);
// polling interval
config.setInteger(ConfigConstants.JOBCLIENT_POLLING_INTERVAL_KEY, 2);
// hdfs
if (hdfsConfigFile != null) {
config.setString(ConfigConstants.HDFS_DEFAULT_CONFIG, hdfsConfigFile);
}
// file system behavior
config.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, defaultOverwriteFiles);
config.setBoolean(ConfigConstants.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY, defaultAlwaysCreateDirectory);
if(memorySize < 0){
memorySize = HardwareDescriptionFactory.extractFromSystem().getSizeOfFreeMemory();
// at this time, we need to scale down the memory, because we cannot dedicate all free memory to the
// memory manager. we have to account for the buffer pools as well, and the job manager#s data structures
long bufferMem = GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS) *
GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);
memorySize = (long) (0.8 * (memorySize - bufferMem));
//convert from bytes to mega bytes
memorySize >>>= 20;
}
memorySize /= numTaskManager;
config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, memorySize/numTaskManager);
config.setInteger(ConfigConstants.LOCAL_INSTANCE_MANAGER_NUMBER_TASK_MANAGER, numTaskManager);
return config;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy