org.elasticsearch.hadoop.yarn.am.EsCluster Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.yarn.am;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.elasticsearch.hadoop.yarn.cfg.Config;
import org.elasticsearch.hadoop.yarn.compat.YarnCompat;
import org.elasticsearch.hadoop.yarn.util.StringUtils;
import org.elasticsearch.hadoop.yarn.util.YarnUtils;
/**
* logical cluster managing the global lifecycle for its multiple containers.
*/
class EsCluster implements AutoCloseable {
private static final Log log = LogFactory.getLog(EsCluster.class);
private final AppMasterRpc amRpc;
private final NodeMasterRpc nmRpc;
private final Configuration cfg;
private final Config appConfig;
private final Map masterEnv;
private volatile boolean running = false;
private volatile boolean clusterHasFailed = false;
private final Set allocatedContainers = new LinkedHashSet();
private final Set completedContainers = new LinkedHashSet();
public EsCluster(final AppMasterRpc rpc, Config appConfig, Map masterEnv) {
this.amRpc = rpc;
this.cfg = rpc.getConfiguration();
this.nmRpc = new NodeMasterRpc(cfg, rpc.getNMToCache());
this.appConfig = appConfig;
this.masterEnv = masterEnv;
}
public void start() {
running = true;
nmRpc.start();
UserGroupInformation.setConfiguration(cfg);
attemptKeytabLogin();
log.info(String.format("Allocating Elasticsearch cluster with %d nodes", appConfig.containersToAllocate()));
// register requests
Resource capability = YarnCompat.resource(cfg, appConfig.containerMem(), appConfig.containerVCores());
Priority prio = Priority.newInstance(appConfig.amPriority());
for (int i = 0; i < appConfig.containersToAllocate(); i++) {
// TODO: Add allocation (host/rack rules) - and disable location constraints
ContainerRequest req = new ContainerRequest(capability, null, null, prio);
amRpc.addContainerRequest(req);
}
// update status every 5 sec
final long heartBeatRate = TimeUnit.SECONDS.toMillis(5);
// start the allocation loop
// when a new container is allocated, launch it right away
int responseId = 0;
try {
do {
AllocateResponse alloc = amRpc.allocate(responseId++);
List currentlyAllocated = alloc.getAllocatedContainers();
for (Container container : currentlyAllocated) {
launchContainer(container);
allocatedContainers.add(container.getId());
}
if (currentlyAllocated.size() > 0) {
int needed = appConfig.containersToAllocate() - allocatedContainers.size();
if (needed > 0) {
log.info(String.format("%s containers allocated, %s remaining", allocatedContainers.size(),
needed));
}
else {
log.info(String.format("Fully allocated %s containers", allocatedContainers.size()));
}
}
List completed = alloc.getCompletedContainersStatuses();
for (ContainerStatus status : completed) {
if (!completedContainers.contains(status.getContainerId())) {
ContainerId containerId = status.getContainerId();
completedContainers.add(containerId);
boolean containerSuccesful = false;
switch (status.getExitStatus()) {
case ContainerExitStatus.SUCCESS:
log.info(String.format("Container %s finished succesfully...", containerId));
containerSuccesful = true;
break;
case ContainerExitStatus.ABORTED:
log.warn(String.format("Container %s aborted...", containerId));
break;
case ContainerExitStatus.DISKS_FAILED:
log.warn(String.format("Container %s ran out of disk...", containerId));
break;
case ContainerExitStatus.PREEMPTED:
log.warn(String.format("Container %s preempted...", containerId));
break;
default:
log.warn(String.format("Container %s exited with an invalid/unknown exit code...", containerId));
}
if (!containerSuccesful) {
log.warn("Cluster has not completed succesfully...");
clusterHasFailed = true;
running = false;
}
}
}
if (completedContainers.size() == appConfig.containersToAllocate()) {
running = false;
}
if (running) {
try {
Thread.sleep(heartBeatRate);
} catch (Exception ex) {
throw new EsYarnNmException("Cluster interrupted");
}
}
} while (running);
} finally {
log.info("Cluster has completed running...");
try {
Thread.sleep(TimeUnit.SECONDS.toMillis(15));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
close();
}
}
private void attemptKeytabLogin() {
if (UserGroupInformation.isSecurityEnabled()) {
try {
String localhost = InetAddress.getLocalHost().getCanonicalHostName();
String keytabFilename = appConfig.kerberosKeytab();
if (keytabFilename == null || keytabFilename.length() == 0) {
throw new EsYarnAmException("Security is enabled, but we could not find a configured keytab; Bailing out...");
}
String configuredPrincipal = appConfig.kerberosPrincipal();
String principal = SecurityUtil.getServerPrincipal(configuredPrincipal, localhost);
UserGroupInformation.loginUserFromKeytab(principal, keytabFilename);
} catch (UnknownHostException e) {
throw new EsYarnAmException("Could not read localhost information for server principal construction; Bailing out...", e);
} catch (IOException e) {
throw new EsYarnAmException("Could not log in.", e);
}
}
}
private void launchContainer(Container container) {
ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);
ctx.setEnvironment(setupEnv(appConfig));
ctx.setLocalResources(setupEsZipResource(appConfig));
ctx.setCommands(setupEsScript(appConfig));
log.info("About to launch container for command: " + ctx.getCommands());
// setup container
Map startContainer = nmRpc.startContainer(container, ctx);
log.info("Started container " + container);
}
private Map setupEnv(Config appConfig) {
// standard Hadoop env setup
Map env = YarnUtils.setupEnv(cfg);
// copy esYarn Config
//env.put(EsYarnConstants.CFG_PROPS, masterEnv.get(EsYarnConstants.CFG_PROPS));
// plus expand its vars into the env
YarnUtils.addToEnv(env, appConfig.envVars());
// add system properties (to ES_JAVA_OPTS for ES to pick them up)
Map sysProps = appConfig.systemProps();
if (!sysProps.isEmpty()) {
StringBuilder sb = new StringBuilder();
for (Map.Entry prop : appConfig.systemProps().entrySet()) {
sb.append(String.format(Locale.ROOT, " -D%s=%s", prop.getKey(), prop.getValue()));
}
YarnUtils.addToEnv(env, "ES_JAVA_OPTS", sb.toString());
}
return env;
}
private Map setupEsZipResource(Config conf) {
// elasticsearch.zip
Map resources = new LinkedHashMap();
LocalResource esZip = Records.newRecord(LocalResource.class);
String esZipHdfsPath = conf.esZipHdfsPath();
Path p = new Path(esZipHdfsPath);
FileStatus fsStat;
try {
fsStat = FileSystem.get(cfg).getFileStatus(p);
} catch (IOException ex) {
throw new IllegalArgumentException(
String.format("Cannot find Elasticsearch zip at [%s]; make sure the artifacts have been properly provisioned and the correct permissions are in place.", esZipHdfsPath), ex);
}
// use the normalized path as otherwise YARN chokes down the line
esZip.setResource(ConverterUtils.getYarnUrlFromPath(fsStat.getPath()));
esZip.setSize(fsStat.getLen());
esZip.setTimestamp(fsStat.getModificationTime());
esZip.setType(LocalResourceType.ARCHIVE);
esZip.setVisibility(LocalResourceVisibility.PUBLIC);
resources.put(conf.esZipName(), esZip);
return resources;
}
private List setupEsScript(Config conf) {
List cmds = new ArrayList();
// don't use -jar since it overrides the classpath
cmds.add(YarnCompat.$$(ApplicationConstants.Environment.SHELL));
// make sure to include the ES.ZIP archive name used in the local resource setup above (since it's the folder where it got unpacked)
cmds.add(conf.esZipName() + "/" + conf.esScript());
cmds.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/" + ApplicationConstants.STDOUT);
cmds.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/" + ApplicationConstants.STDERR);
return Collections.singletonList(StringUtils.concatenate(cmds, " "));
}
public boolean hasFailed() {
return clusterHasFailed;
}
@Override
public void close() {
running = false;
nmRpc.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy