ml.shifu.guagua.yarn.GuaguaAppMaster Maven / Gradle / Ivy
/*
* Copyright [2013-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.guagua.yarn;
import java.io.IOException;
import java.lang.Thread.UncaughtExceptionHandler;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicInteger;
import ml.shifu.guagua.GuaguaConstants;
import ml.shifu.guagua.GuaguaRuntimeException;
import ml.shifu.guagua.hadoop.io.GuaguaInputSplit;
import ml.shifu.guagua.yarn.util.GsonUtils;
import ml.shifu.guagua.yarn.util.YarnUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.jboss.netty.bootstrap.ServerBootstrap;
import org.jboss.netty.channel.ChannelEvent;
import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ChannelPipeline;
import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.ChannelState;
import org.jboss.netty.channel.ChannelStateEvent;
import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
import org.jboss.netty.handler.codec.serialization.ClassResolvers;
import org.jboss.netty.handler.codec.serialization.ObjectDecoder;
import org.jboss.netty.handler.codec.serialization.ObjectEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Maps;
/**
* {@link GuaguaAppMaster} is application master to launch master and worker tasks.
*
*
* This app master is used to check and launch all tasks, not to run the master task of distributed training. Master
* task is run on another container.
*
*
* TODO: Web monitor is not supported in current app master.
*
*
* Fail-over is added like mapreduce. If one container is failed, it will be launched until 4 times by default.
*
*
* In each container, use a number start from 1 to mark as the id of the container for fail-over.
*/
public class GuaguaAppMaster {
private static final Logger LOG = LoggerFactory.getLogger(GuaguaAppMaster.class);
/** Exit code for YARN containers that were manually killed/aborted */
private static final int YARN_ABORT_EXIT_STATUS = -100;
/** Exit code for successfully run YARN containers */
private static final int YARN_SUCCESS_EXIT_STATUS = 0;
/** millis to sleep between heartbeats during long loops */
private static final int SLEEP_BETWEEN_HEARTBEATS_MSECS = 900;
/**
* Container id for current master container
*/
private ContainerId masterContainerId;
/**
* App attempt id
*/
private ApplicationAttemptId appAttemptId;
/**
* Whether master is done.
*/
private volatile boolean done;
/**
* Yarn conf
*/
private Configuration yarnConf;
/**
* Number of completed containers.
*/
private AtomicInteger completedCount;
/**
* Number of failed containers.
*/
private AtomicInteger failedCount;
/**
* Number of allocated containers.
*/
private AtomicInteger allocatedCount;
/**
* Number of successful containers.
*/
private AtomicInteger successfulCount;
/**
* Number of completed containers.
*/
private int containersToLaunch;
/**
* executor to launch container.
*/
private ExecutorService executor;
/**
* executor to check whether task is time out.
*/
private ExecutorService taskTimeoutExecutor;
/**
* Like mapred.task.timeout, if no update on this time, container will be killed.
*/
private long taskTimeOut = GuaguaYarnConstants.DEFAULT_TIME_OUT;
/**
* Heap memory setting for worker container.
*/
private int heapPerContainer;
/**
* Handle to communicate with resource manager.
*/
private AMRMClientAsync amRmClient;
/**
* Handle to communicate with the Node Manager
*/
private NMClientAsync nmClientAsync;
/**
* Listen to process the response from the Node Manager
*/
private NMCallbackHandler containerListener;
/**
* A reusable map of resources already in HDFS for each task to copy-to-local env and use to launch each
* GuaguaYarnTask.
*/
private static Map localResources;
/**
* For status update for clients - yet to be implemented\\
* Hostname of the container
*/
private String appMasterHostname;
/** Port on which the app master listens for status updates from clients */
private int appMasterRpcPort = 1234;
/** Tracking url to which app master publishes info for clients to monitor */
private String appMasterTrackingUrl = "";
/**
* Setting container args
*/
private String containerArgs;
private List inputSplits;
private ApplicationId appId;
private Map> partitionContainerMap;
private Map containerPartitionMap;
private static enum PartitionStatus {
INIT, SUCCESSFUL, FAILED, RETRY,
}
private Map partitionStatusMap;
private List failedPartitions;
private AtomicInteger partitionIndex;
private int maxContainerAttempts;
private int totalIterations;
private ByteBuffer allTokens;
private int rpcPort = GuaguaYarnConstants.DEFAULT_STATUS_RPC_PORT;
private String rpcHostName;
private static final Object LOCK = new Object();
private Map partitionProgress;
private ServerBootstrap rpcServer;
static {
// pick up new conf XML file and populate it with stuff exported from client
Configuration.addDefaultResource(GuaguaYarnConstants.GUAGUA_CONF_FILE);
}
/**
* Construct the GuaguappMaster, populate fields using env vars and set up by YARN framework in this execution
* container.
*
* @param cId
* the ContainerId
* @param aId
* the ApplicationAttemptId
*/
public GuaguaAppMaster(ContainerId cId, ApplicationAttemptId aId, Configuration conf) {
try {
this.rpcHostName = this.appMasterHostname = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
LOG.error("Error in getting local host name.", e);
}
this.masterContainerId = cId; // future good stuff will need me to operate.
this.appAttemptId = aId;
this.appId = this.getAppAttemptId().getApplicationId();
this.yarnConf = conf;
this.completedCount = new AtomicInteger(0);
this.failedCount = new AtomicInteger(0);
this.allocatedCount = new AtomicInteger(0);
this.successfulCount = new AtomicInteger(0);
this.partitionContainerMap = new ConcurrentHashMap>();
this.containerPartitionMap = new ConcurrentHashMap();
this.partitionStatusMap = new ConcurrentHashMap();
this.partitionIndex = new AtomicInteger(0);
this.failedPartitions = new CopyOnWriteArrayList();;
this.maxContainerAttempts = this.getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_YARN_MAX_CONTAINER_ATTEMPTS,
GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_MAX_CONTAINER_ATTEMPTS);
this.heapPerContainer = this.getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_CHILD_MEMORY,
GuaguaYarnConstants.GUAGUA_CHILD_DEFAULT_MEMORY);
this.totalIterations = this.getYarnConf().getInt(GuaguaConstants.GUAGUA_ITERATION_COUNT, 1);
String containerArgs = this.getYarnConf().get(GuaguaYarnConstants.GUAGUA_YARN_CONTAINER_ARGS);
if(containerArgs == null) {
containerArgs = GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_CONTAINER_JAVA_OPTS;
} else {
containerArgs = GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_CONTAINER_JAVA_OPTS + " " + containerArgs;
}
this.containerArgs = containerArgs;
this.rpcPort = getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_YARN_STATUS_RPC_PORT,
GuaguaYarnConstants.DEFAULT_STATUS_RPC_PORT);
this.partitionProgress = new ConcurrentHashMap();
this.executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
this.taskTimeoutExecutor = Executors.newSingleThreadExecutor();
this.taskTimeOut = getYarnConf().getLong(GuaguaYarnConstants.GUAGUA_TASK_TIMEOUT,
GuaguaYarnConstants.DEFAULT_TIME_OUT);
LOG.info("{}:{}", taskTimeOut, GuaguaYarnConstants.DEFAULT_TIME_OUT);
LOG.info("GuaguaAppMaster for ContainerId {} ApplicationAttemptId {}", cId, aId);
}
/**
* Coordinates all requests for guagua's worker/master task containers, and manages application liveness heartbeat,
* completion status, teardown, etc.
*
* @return success or failure
*/
public boolean run() throws YarnException, IOException {
boolean success = false;
try {
// 1. get input from conf, generate input splits like GuaguaMapReduce
// 2. store all splits into conf and export to hdfs
// 3. for each conntainer, according to container host and split host to select a partition to the container
// add transfer partition as program args.
// 4. Store
// 5. If one container failed, try to request another container
// 6. for containers get, try failed partition firstly
prepareInputSplits();
// set tokens to make app master and task work well.
getAllTokens();
registerRMCallBackHandler();
registerNMCallbackHandler();
registerAMToRM();
startRPCServer();
startTaskTimeoutExecutor();
madeAllContainerRequestToRM();
LOG.info("Wait to finish ..");
while(!isDone()) {
try {
Thread.sleep(SLEEP_BETWEEN_HEARTBEATS_MSECS);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
}
LOG.info("Done {}", isDone());
} catch (Throwable t) {
LOG.error("Error in AppMaster run.", t);
throw new GuaguaRuntimeException(t);
} finally {
shutdown();
success = finish();
}
return success;
}
private void startTaskTimeoutExecutor() {
this.taskTimeoutExecutor.submit(new Runnable() {
@Override
public void run() {
while(true) {
try {
Thread.sleep(GuaguaAppMaster.this.taskTimeOut);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
LOG.debug(GuaguaAppMaster.this.partitionProgress.toString());
for(Map.Entry entry: GuaguaAppMaster.this.partitionProgress
.entrySet()) {
GuaguaIterationStatus status = entry.getValue();
// doesn't work in the first iteration
if(status.getTime() != 0l && status.getCurrentIteration() != 1
&& (System.currentTimeMillis() - status.getTime()) > GuaguaAppMaster.this.taskTimeOut) {
List containers = GuaguaAppMaster.this.partitionContainerMap.get(entry.getKey());
Container container = containers.get(containers.size() - 1);
LOG.info(
"Container {} is timeout with timeout period {}, will be killed by node manager {}.",
container.getId(), GuaguaAppMaster.this.taskTimeOut, container.getNodeId());
GuaguaAppMaster.this.getNmClientAsync().stopContainerAsync(container.getId(),
container.getNodeId());
}
}
}
}
});
}
protected void shutdown() {
// if we get here w/o problems, the executor is already long finished.
if(null != getExecutor() && !getExecutor().isTerminated()) {
LOG.info("Forcefully terminating executors with done ={}", isDone());
getExecutor().shutdownNow(); // force kill, especially if got here by throw
}
if(this.rpcServer != null) {
this.rpcServer.shutdown();
this.rpcServer.releaseExternalResources();
}
if(this.taskTimeoutExecutor != null) {
this.taskTimeoutExecutor.shutdownNow();
}
}
/**
* Start rpc server which is used to update progress.
*/
private void startRPCServer() {
this.rpcServer = new ServerBootstrap(new NioServerSocketChannelFactory(
Executors.newFixedThreadPool(GuaguaYarnConstants.DEFAULT_STATUS_RPC_SERVER_THREAD_COUNT),
Executors.newCachedThreadPool(new MasterThreadFactory())));
// Set up the pipeline factory.
this.rpcServer.setPipelineFactory(new ChannelPipelineFactory() {
public ChannelPipeline getPipeline() throws Exception {
return Channels.pipeline(new ObjectEncoder(),
new ObjectDecoder(ClassResolvers.cacheDisabled(getClass().getClassLoader())),
new ServerHandler());
}
});
// Bind and start to accept incoming connections.
this.rpcServer.bind(new InetSocketAddress(rpcPort));
}
/**
* The master thread factory. Main feature is to print error log of worker thread.
*/
private static class MasterThreadFactory implements ThreadFactory {
static final AtomicInteger poolNumber = new AtomicInteger(1);
final ThreadGroup group;
final AtomicInteger threadNumber = new AtomicInteger(1);
final String namePrefix;
MasterThreadFactory() {
SecurityManager s = System.getSecurityManager();
group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
namePrefix = "pool-" + poolNumber.getAndIncrement() + "-thread-";
}
public Thread newThread(Runnable r) {
Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0);
if(t.isDaemon()) {
t.setDaemon(false);
}
if(t.getPriority() != Thread.NORM_PRIORITY) {
t.setPriority(Thread.NORM_PRIORITY);
}
t.setUncaughtExceptionHandler(new UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
LOG.warn("Error message in thread {} with error message {}, error root cause {}.", t, e,
e.getCause());
// print stack???
}
});
return t;
}
}
/**
* {@link ServerHandler} is used to receive message and update progress for this yarn app.
*/
private class ServerHandler extends SimpleChannelUpstreamHandler {
@Override
public void handleUpstream(ChannelHandlerContext ctx, ChannelEvent e) throws Exception {
if(e instanceof ChannelStateEvent && ((ChannelStateEvent) e).getState() != ChannelState.INTEREST_OPS) {
LOG.debug(e.toString());
}
super.handleUpstream(ctx, e);
}
@Override
public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) {
GuaguaIterationStatus status = GsonUtils.fromJson(e.getMessage().toString(), GuaguaIterationStatus.class);
LOG.info("Receive RPC status:{}", status);
synchronized(LOCK) {
GuaguaAppMaster.this.partitionProgress.put(status.getPartition(), status);
}
if(status.isKillContainer()) {
List containers;
synchronized(LOCK) {
containers = GuaguaAppMaster.this.partitionContainerMap.get(status.getPartition());
}
LOG.info("containers:{}", containers);
Container container = containers.get(containers.size() - 1);
LOG.info("Container {} in node {} is killed because of straggler condition.", container.getId(),
container.getNodeId());
GuaguaAppMaster.this.getNmClientAsync().stopContainerAsync(container.getId(), container.getNodeId());
}
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) {
e.getChannel().close();
}
}
/**
* Prepare input splits for containers
*/
private void prepareInputSplits() throws IOException {
this.inputSplits = getNewSplits(getYarnConf());
this.setContainersToLaunch(this.inputSplits.size());
LOG.info("Input split size including master: {}", this.inputSplits.size());
}
public List getNewSplits(Configuration conf) throws IOException {
int masters = conf.getInt(GuaguaConstants.GUAGUA_MASTER_NUMBER, GuaguaConstants.DEFAULT_MASTER_NUMBER);
int size = getYarnConf().getInt(GuaguaConstants.GUAGUA_WORKER_NUMBER, 0) + masters;
List newSplits = new ArrayList(size);
for(int i = 1; i <= size; i++) {
newSplits.add(GsonUtils.fromJson(getYarnConf().get(GuaguaYarnConstants.GUAGUA_YARN_INPUT_SPLIT_PREFIX + i),
GuaguaInputSplit.class));
this.partitionProgress.put(i, new GuaguaIterationStatus());
}
return newSplits;
}
/**
* Populate allTokens with the tokens received
*/
private void getAllTokens() throws IOException {
Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
// Now remove the AM->RM token so that containers cannot access it.
Iterator> iter = credentials.getAllTokens().iterator();
while(iter.hasNext()) {
Token> token = iter.next();
if(LOG.isDebugEnabled()) {
LOG.debug("Token type : {}", token.getKind());
}
if(token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
iter.remove();
}
}
this.allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
}
/**
* Register RM callback and start listening
*/
private void registerRMCallBackHandler() {
AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
setAmRMClient(AMRMClientAsync.createAMRMClientAsync(1000, allocListener));
getAmRMClient().init(getYarnConf());
getAmRMClient().start();
}
/**
* Register NM callback and start listening
*/
private void registerNMCallbackHandler() {
setContainerListener(new NMCallbackHandler());
setNmClientAsync(new NMClientAsyncImpl(getContainerListener()));
getNmClientAsync().init(getYarnConf());
getNmClientAsync().start();
}
/**
* Register AM to RM
*
* @return AM register response
*/
private RegisterApplicationMasterResponse registerAMToRM() throws YarnException {
// register Application Master with the YARN Resource Manager so we can begin requesting resources.
try {
if(UserGroupInformation.isSecurityEnabled()) {
LOG.info("SECURITY ENABLED ");
}
RegisterApplicationMasterResponse response = getAmRMClient().registerApplicationMaster(
this.appMasterHostname, this.appMasterRpcPort, this.appMasterTrackingUrl);
return response;
} catch (IOException ioe) {
throw new IllegalStateException("GuaguaAppMaster failed to register with RM.", ioe);
}
}
/**
* Add all containers' request
*/
private void madeAllContainerRequestToRM() {
// Setup ask for containers from RM
// Send request for containers to RM Until we get our fully allocated quota, we keep on polling RM for
// containers. Keep looping until all the containers are launched and shell script executed on them ( regardless
// of success/failure).
for(int i = 0; i < getContainersToLaunch(); i++) {
ContainerRequest containerAsk = setupContainerAskForRM();
getAmRMClient().addContainerRequest(containerAsk);
}
}
private void madeOneContainerRequestToRM() {
ContainerRequest containerAsk = setupContainerAskForRM();
getAmRMClient().addContainerRequest(containerAsk);
}
/**
* Setup the request that will be sent to the RM for the container ask.
*
* @return the setup ResourceRequest to be sent to RM
*/
private ContainerRequest setupContainerAskForRM() {
// setup requirements for hosts, request containers firstly and then check allocated containers and splits to
// get data locality.
// TODO, better here to requests according to hosts of splits.
Priority pri = Records.newRecord(Priority.class);
pri.setPriority(GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_PRIORITY);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(getHeapPerContainer());
capability.setVirtualCores(getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_YARN_TASK_VCORES,
GuaguaYarnConstants.GUAGUA_YARN_TASK_DEFAULT_VCORES));
ContainerRequest request = new ContainerRequest(capability, null, null, pri);
LOG.info("Requested container ask: {}", request.toString());
return request;
}
/**
* Call when the application is done
*
* @return if all containers succeed
*/
private boolean finish() {
// When the application completes, it should stop all running containers
LOG.info("Application completed. Stopping running containers");
getNmClientAsync().stop();
// When the application completes, it should send a finish application
// signal to the RM
LOG.info("Application completed. Signalling finish to RM");
FinalApplicationStatus appStatus;
String appMessage = null;
boolean success = true;
if(getSuccessfulCount().get() == getContainersToLaunch()) {
appStatus = FinalApplicationStatus.SUCCEEDED;
} else {
appStatus = FinalApplicationStatus.FAILED;
appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(),
getCompletedCount().get(), getFailedCount().get());
success = false;
}
try {
getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl);
} catch (YarnException ex) {
LOG.error("Failed to unregister application", ex);
} catch (IOException e) {
LOG.error("Failed to unregister application", e);
}
getAmRMClient().stop();
return success;
}
/**
* CallbackHandler to process RM async calls
*/
private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
@Override
public void onContainersCompleted(List completedContainers) {
LOG.info("Got response from RM for container ask, completedCnt={}", completedContainers.size());
for(ContainerStatus containerStatus: completedContainers) {
LOG.info("Got container status for containerID={}, state={}, exitStatus={}, diagnostics={}.",
containerStatus.getContainerId(), containerStatus.getState(), containerStatus.getExitStatus(),
containerStatus.getDiagnostics());
if(!GuaguaAppMaster.this.containerPartitionMap.containsKey(containerStatus.getContainerId().toString())) {
getCompletedCount().incrementAndGet();
LOG.info("Why such container {} is started, no partition. Exited with status:{}",
containerStatus.getContainerId(), containerStatus.getExitStatus());
continue;
}
int partition = GuaguaAppMaster.this.containerPartitionMap.get(containerStatus.getContainerId()
.toString());
if(GuaguaAppMaster.this.partitionContainerMap.get(partition).size() >= GuaguaAppMaster.this.maxContainerAttempts) {
setDone(true);
LOG.info("One partition {} has more than max attempt {} ", partition,
GuaguaAppMaster.this.maxContainerAttempts);
return;
}
switch(containerStatus.getExitStatus()) {
case YARN_SUCCESS_EXIT_STATUS:
GuaguaAppMaster.this.partitionStatusMap.put(partition, PartitionStatus.SUCCESSFUL);
getSuccessfulCount().incrementAndGet();
break;
case YARN_ABORT_EXIT_STATUS:
LOG.info("YARN_ABORT_EXIT_STATUS: Container id {} exits with {}",
containerStatus.getContainerId(), YARN_ABORT_EXIT_STATUS);
break; // not success or fail
default:
LOG.info("default: Container id {} exits with {}", containerStatus.getContainerId(),
containerStatus.getExitStatus());
GuaguaAppMaster.this.partitionStatusMap.put(partition, PartitionStatus.FAILED);
GuaguaAppMaster.this.failedPartitions.add(partition);
GuaguaAppMaster.this.madeOneContainerRequestToRM();
getFailedCount().incrementAndGet();
break;
}
getCompletedCount().incrementAndGet();
}
if(getSuccessfulCount().get() == getContainersToLaunch()) {
setDone(true);
LOG.info("All container compeleted. done = {} ", isDone());
} else {
LOG.info(
"After completion of one conatiner. current status is: completedCount:{} containersToLaunch:{} successfulCount:{} failedCount:{}.",
getCompletedCount().get(), getContainersToLaunch(), getSuccessfulCount().get(),
getFailedCount().get());
}
}
@Override
public void onContainersAllocated(List allocatedContainers) {
LOG.info("Got response from RM for container ask, allocatedCnt={}", allocatedContainers.size());
getAllocatedCount().addAndGet(allocatedContainers.size());
LOG.info("Total allocated # of container so far {} : allocated out of required {}.", getAllocatedCount()
.get(), getContainersToLaunch());
startContainerLaunchingThreads(allocatedContainers);
}
@Override
public void onShutdownRequest() {
setDone(true);
getAmRMClient().stop();
}
@Override
public void onNodesUpdated(List updatedNodes) {
}
@Override
public float getProgress() {
// set progress to deliver to RM on next heartbeat
int sum = 0, totalSum = 0;
synchronized(LOCK) {
for(Map.Entry entry: GuaguaAppMaster.this.partitionProgress.entrySet()) {
sum += entry.getValue().getCurrentIteration();
totalSum += GuaguaAppMaster.this.totalIterations;
}
return (sum * 1.0f) / totalSum;
}
}
@Override
public void onError(Throwable e) {
setDone(true);
getAmRMClient().stop();
}
}
/**
* For each container successfully allocated, attempt to set up and launch a Guagua worker/master task.
*
* @param allocatedContainers
* the containers we have currently allocated.
*/
private void startContainerLaunchingThreads(final List allocatedContainers) {
Map> hostContainterMap = getHostContainersMap(allocatedContainers);
int size = allocatedContainers.size();
while(size > 0) {
int currentPartition = getCurrentPartition();
if(currentPartition == -1) {
LOG.warn("Request too many resources. TODO, remove containers no needed.");
for(Container container: allocatedContainers) {
GuaguaAppMaster.this.getAmRMClient().releaseAssignedContainer(container.getId());
}
break;
}
Container container = getDataLocalityContainer(hostContainterMap, currentPartition);
if(container == null) {
container = allocatedContainers.get(0);
}
allocatedContainers.remove(container);
LOG.info(
"Launching command on a new container., containerId={}, containerNode={}, containerPort={}, containerNodeURI={}, containerResourceMemory={}",
container.getId(), container.getNodeId().getHost(), container.getNodeId().getPort(),
container.getNodeHttpAddress(), container.getResource().getMemory());
List list = this.partitionContainerMap.get(currentPartition);
if(list == null) {
list = new ArrayList();
}
list.add(container);
this.partitionContainerMap.put(currentPartition, list);
this.containerPartitionMap.put(container.getId().toString(), currentPartition);
this.partitionStatusMap.put(currentPartition, PartitionStatus.INIT);
LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(container,
getContainerListener(), currentPartition);
getExecutor().execute(runnableLaunchContainer);
size = allocatedContainers.size();
}
}
private Map> getHostContainersMap(final List allocatedContainers) {
Map> hostContainterMap = new HashMap>();
for(Container container: allocatedContainers) {
String host = container.getNodeId().getHost();
List containers = hostContainterMap.get(host);
if(containers == null) {
containers = new ArrayList();
}
containers.add(container);
hostContainterMap.put(host, containers);
}
return hostContainterMap;
}
/**
* Find a container with the same host for input split. Not a good implementation for data locality. Check
* map-reduce implementation.
*
* TODO RACK-LOCAL implementation
*/
private Container getDataLocalityContainer(Map> hostContainterMap, int currentPartition) {
GuaguaInputSplit inputSplit = (GuaguaInputSplit) (this.inputSplits.get(currentPartition - 1));
String host = null;
FileSplit[] fileSplits = inputSplit.getFileSplits();
if(fileSplits != null) {
try {
host = fileSplits[0].getLocations()[0];
} catch (Exception mayNotHappen) {
host = null;
}
}
List containers = hostContainterMap.get(host);
Container container = null;
if(containers != null && !containers.isEmpty()) {
container = containers.remove(0);
hostContainterMap.put(host, containers);
LOG.info("find a container {} with host {} for partition {} and split {}.", container, host,
currentPartition, inputSplit);
return container;
}
// if not find a container, try to choose the first one.
Set>> entrySet = hostContainterMap.entrySet();
String firstHost = null;
List firstContainers = null;
for(Entry> entry: entrySet) {
firstHost = entry.getKey();
firstContainers = entry.getValue();
if(firstContainers != null && !firstContainers.isEmpty()) {
container = firstContainers.remove(0);
break;
}
}
hostContainterMap.put(firstHost, firstContainers);
LOG.info("find a container {} with host {} for partition {} and split {}.", container, host, currentPartition,
inputSplit);
return container;
}
private int getCurrentPartition() {
LOG.info("failed container request size:{} {}", this.failedPartitions.size(), this.failedPartitions);
Iterator it = this.failedPartitions.iterator();
// Launch and start the container on a separate thread to keep the main thread unblocked as all containers
// may not be allocated at one go.
int currentPartition = 0;
if(it.hasNext()) {
currentPartition = it.next();
// because we use CopyOnWriteArrayList, we can remove object in iteration
this.failedPartitions.remove(Integer.valueOf(currentPartition));
LOG.info("failed container request size after remove:{} {}", this.failedPartitions.size(),
this.failedPartitions);
} else {
LOG.info("partitionIndex{} containersToLaunch {}", this.partitionIndex.get(), this.containersToLaunch);
if(this.partitionIndex.get() >= this.containersToLaunch) {
return -1;
}
currentPartition = this.partitionIndex.addAndGet(1);
}
return currentPartition;
}
/**
* Thread to connect to the {@link ContainerManager} and launch the container that will house one of our Guagua
* worker (or master) tasks.
*/
private class LaunchContainerRunnable implements Runnable {
/** Allocated container */
private Container container;
/** NM listener */
private NMCallbackHandler containerListener;
private final int partition;
/**
* Constructor.
*
* @param container
* Allocated container
* @param containerListener
* container listener.
*/
public LaunchContainerRunnable(final Container container, NMCallbackHandler containerListener, int partition) {
this.container = container;
this.containerListener = containerListener;
this.partition = partition;
}
/**
* Connects to CM, sets up container launch context for shell command and eventually dispatches the container
* start request to the CM.
*/
@Override
public void run() {
// Connect to ContainerManager
// configure the launcher for the guagua task it will host
ContainerLaunchContext ctx = buildContainerLaunchContext();
// request CM to start this container as spec'd in ContainerLaunchContext
this.containerListener.addContainer(this.container.getId(), this.container);
getNmClientAsync().startContainerAsync(this.container, ctx);
}
/**
* Boilerplate to set up the ContainerLaunchContext to tell the Container Manager how to launch our guagua task
* in the execution container we have already allocated.
*
* @return a populated ContainerLaunchContext object.
*/
private ContainerLaunchContext buildContainerLaunchContext() {
LOG.info("Setting up container launch container for containerid={}", container.getId());
ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);
// args inject the CLASSPATH, heap MB, and TaskAttemptID for launched task
final List commands = generateShellExecCommand();
LOG.info("Conatain launch Commands :{}" + commands);
launchContext.setCommands(commands);
// Set up tokens for the container too. We are populating them mainly for NodeManagers to be able to
// download any files in the distributed file-system. The tokens are otherwise also useful in cases, for
// e.g., when one is running a "hadoop dfs" like command
launchContext.setTokens(allTokens.slice());
// Set the environment variables to inject into remote task's container
buildEnvironment(launchContext);
// Set the local resources: just send the copies already in HDFS
launchContext.setLocalResources(getTaskResourceMap());
return launchContext;
}
/**
* Generates our command line string used to launch our guagua tasks.
*
* @return the BASH shell commands to launch the job.
*/
private List generateShellExecCommand() {
String programArgs = new StringBuilder(300)
.append(getAppAttemptId().getApplicationId().getClusterTimestamp()).append(" ")
.append(getAppAttemptId().getApplicationId().getId()).append(" ")
.append(this.container.getId().getId()).append(" ").append(getAppAttemptId().getAttemptId())
.append(" ").append(this.partition).append(" ").append(GuaguaAppMaster.this.rpcHostName)
.append(" ").append(GuaguaAppMaster.this.rpcPort).toString();
return YarnUtils.getCommand(GuaguaYarnTask.class.getName(), GuaguaAppMaster.this.containerArgs,
programArgs, getHeapPerContainer() + "");
}
/**
* Utility to populate the environment vars we wish to inject into the new containter's env when the guagua BSP
* task is executed.
*
* @param launchContext
* the launch context which will set our environment vars in the app master's execution container.
*/
private void buildEnvironment(final ContainerLaunchContext launchContext) {
Map classPathForEnv = Maps.newHashMap();
// pick up the local classpath so when we instantiate a Configuration remotely.
YarnUtils.addLocalClasspathToEnv(classPathForEnv, getYarnConf());
// set this map of env vars into the launch context.
launchContext.setEnvironment(classPathForEnv);
}
}
/**
* CallbackHandler to process NM async calls
*/
private class NMCallbackHandler implements NMClientAsync.CallbackHandler {
/** List of containers */
private ConcurrentMap containers = new ConcurrentHashMap();
/**
* Add a container
*
* @param containerId
* id of container
* @param container
* container object
*/
public void addContainer(ContainerId containerId, Container container) {
this.containers.putIfAbsent(containerId, container);
}
@Override
public void onContainerStopped(ContainerId containerId) {
LOG.info("Succeeded to stop Container {}", containerId);
this.containers.remove(containerId);
}
@Override
public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) {
LOG.info("Container Status: id={}, status={}", containerId, containerStatus);
}
@Override
public void onContainerStarted(ContainerId containerId, Map allServiceResponse) {
LOG.info("Succeeded to start Container {}", containerId);
Container container = this.containers.get(containerId);
if(container != null) {
getNmClientAsync().getContainerStatusAsync(containerId, container.getNodeId());
}
}
@Override
public void onStartContainerError(ContainerId containerId, Throwable t) {
LOG.error(String.format("Failed to start Container %s", containerId), t);
this.containers.remove(containerId);
}
@Override
public void onGetContainerStatusError(ContainerId containerId, Throwable t) {
LOG.error(String.format("Failed to query the status of Container %s", containerId), t);
}
@Override
public void onStopContainerError(ContainerId containerId, Throwable t) {
LOG.error(String.format("Failed to stop Container %s", containerId), t);
this.containers.remove(containerId);
}
}
/**
* Lazily compose the map of jar and file names to LocalResource records for inclusion in GuaguaYarnTask container
* requests. Can re-use the same map as guagua tasks need identical HDFS-based resources (jars etc.) to run.
*
* @return the resource map for a ContainerLaunchContext
*/
private synchronized Map getTaskResourceMap() {
// Set the local resources: just send the copies already in HDFS
if(null == localResources) {
localResources = Maps.newHashMap();
try {
// if you have to update the Conf for export to tasks, do it now
// updateGuaguaConfForExport();
localResources = YarnUtils.getLocalResourceMap(getYarnConf(), getAppId());
} catch (IOException ioe) {
// fail fast, this container will never launch.
throw new IllegalStateException("Could not configure the container launch context for GuaguaYarnTask.",
ioe);
}
}
// else, return the prepopulated copy to reuse for each GuaguaYarkTask
return localResources;
}
public ContainerId getContainerId() {
return masterContainerId;
}
public void setContainerId(ContainerId containerId) {
this.masterContainerId = containerId;
}
public ApplicationAttemptId getAppAttemptId() {
return appAttemptId;
}
public void setAppAttemptId(ApplicationAttemptId appAttemptId) {
this.appAttemptId = appAttemptId;
}
public boolean isDone() {
return done;
}
public void setDone(boolean done) {
this.done = done;
}
public Configuration getYarnConf() {
return yarnConf;
}
public void setYarnConf(YarnConfiguration yarnConf) {
this.yarnConf = yarnConf;
}
public AtomicInteger getCompletedCount() {
return completedCount;
}
public void setCompletedCount(AtomicInteger completedCount) {
this.completedCount = completedCount;
}
public AtomicInteger getFailedCount() {
return failedCount;
}
public void setFailedCount(AtomicInteger failedCount) {
this.failedCount = failedCount;
}
public AtomicInteger getAllocatedCount() {
return allocatedCount;
}
public void setAllocatedCount(AtomicInteger allocatedCount) {
this.allocatedCount = allocatedCount;
}
public AtomicInteger getSuccessfulCount() {
return successfulCount;
}
public void setSuccessfulCount(AtomicInteger successfulCount) {
this.successfulCount = successfulCount;
}
public int getContainersToLaunch() {
return containersToLaunch;
}
public void setContainersToLaunch(int containersToLaunch) {
this.containersToLaunch = containersToLaunch;
}
public ExecutorService getExecutor() {
return executor;
}
public void setExecutor(ExecutorService executor) {
this.executor = executor;
}
public int getHeapPerContainer() {
return heapPerContainer;
}
public void setHeapPerContainer(int heapPerContainer) {
this.heapPerContainer = heapPerContainer;
}
public AMRMClientAsync getAmRMClient() {
return amRmClient;
}
public void setAmRMClient(AMRMClientAsync amRMClient) {
this.amRmClient = amRMClient;
}
public NMClientAsync getNmClientAsync() {
return nmClientAsync;
}
public void setNmClientAsync(NMClientAsync nmClientAsync) {
this.nmClientAsync = nmClientAsync;
}
public NMCallbackHandler getContainerListener() {
return containerListener;
}
public void setContainerListener(NMCallbackHandler containerListener) {
this.containerListener = containerListener;
}
public String getContainerArgs() {
return containerArgs;
}
public void setContainerArgs(String containerArgs) {
this.containerArgs = containerArgs;
}
public ApplicationId getAppId() {
return appId;
}
public void setAppId(ApplicationId appId) {
this.appId = appId;
}
/**
* Application entry point
*
* @param args
* command-line args (set by GuaguaYarnClient, if any)
*/
public static void main(final String[] args) {
LOG.info("Starting GuaguaAppMaster. ");
String containerIdString = System.getenv().get(Environment.CONTAINER_ID.name());
if(containerIdString == null) {
// container id should always be set in the env by the framework
throw new IllegalArgumentException("ContainerId not found in env vars.");
}
ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
ApplicationAttemptId appAttemptId = containerId.getApplicationAttemptId();
Configuration conf = new YarnConfiguration();
String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name());
conf.set(MRJobConfig.USER_NAME, jobUserName);
try {
UserGroupInformation.setConfiguration(conf);
// Security framework already loaded the tokens into current UGI, just use them
Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
LOG.info("Executing with tokens:");
for(Token> token: credentials.getAllTokens()) {
LOG.info(token.toString());
}
UserGroupInformation appMasterUgi = UserGroupInformation.createRemoteUser(jobUserName);
appMasterUgi.addCredentials(credentials);
// Now remove the AM->RM token so tasks don't have it
Iterator> iter = credentials.getAllTokens().iterator();
while(iter.hasNext()) {
Token> token = iter.next();
if(token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
iter.remove();
}
}
final GuaguaAppMaster appMaster = new GuaguaAppMaster(containerId, appAttemptId, conf);
appMasterUgi.doAs(new PrivilegedAction() {
@Override
public Void run() {
boolean result = false;
try {
result = appMaster.run();
} catch (Throwable t) {
LOG.error("GuaguaAppMaster caught a top-level exception in main.", t);
System.exit(1);
}
if(result) {
LOG.info("Guagua Application Master completed successfully. exiting");
System.exit(0);
} else {
LOG.info("Guagua Application Master failed. exiting");
System.exit(2);
}
return null;
}
});
} catch (Throwable t) {
LOG.error("GuaguaAppMaster caught a top-level exception in main.", t);
System.exit(1);
}
}
}