All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.yarn.am.EsCluster Maven / Gradle / Ivy

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.yarn.am;

import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.elasticsearch.hadoop.yarn.cfg.Config;
import org.elasticsearch.hadoop.yarn.compat.YarnCompat;
import org.elasticsearch.hadoop.yarn.util.StringUtils;
import org.elasticsearch.hadoop.yarn.util.YarnUtils;

/**
 * logical cluster managing the global lifecycle for its multiple containers.
 */
class EsCluster implements AutoCloseable {

    private static final Log log = LogFactory.getLog(EsCluster.class);

    private final AppMasterRpc amRpc;
    private final NodeMasterRpc nmRpc;
    private final Configuration cfg;
    private final Config appConfig;
    private final Map masterEnv;

    private volatile boolean running = false;
    private volatile boolean clusterHasFailed = false;

    private final Set allocatedContainers = new LinkedHashSet();
    private final Set completedContainers = new LinkedHashSet();

    public EsCluster(final AppMasterRpc rpc, Config appConfig, Map masterEnv) {
        this.amRpc = rpc;
        this.cfg = rpc.getConfiguration();
        this.nmRpc = new NodeMasterRpc(cfg, rpc.getNMToCache());
        this.appConfig = appConfig;
        this.masterEnv = masterEnv;
    }

    public void start() {
        running = true;
        nmRpc.start();

        UserGroupInformation.setConfiguration(cfg);
        attemptKeytabLogin();

        log.info(String.format("Allocating Elasticsearch cluster with %d nodes", appConfig.containersToAllocate()));

        // register requests
        Resource capability = YarnCompat.resource(cfg, appConfig.containerMem(), appConfig.containerVCores());
        Priority prio = Priority.newInstance(appConfig.amPriority());

        for (int i = 0; i < appConfig.containersToAllocate(); i++) {
            // TODO: Add allocation (host/rack rules) - and disable location constraints
            ContainerRequest req = new ContainerRequest(capability, null, null, prio);
            amRpc.addContainerRequest(req);
        }


        // update status every 5 sec
        final long heartBeatRate = TimeUnit.SECONDS.toMillis(5);

        // start the allocation loop
        // when a new container is allocated, launch it right away

        int responseId = 0;

        try {
            do {
                AllocateResponse alloc = amRpc.allocate(responseId++);
                List currentlyAllocated = alloc.getAllocatedContainers();
                for (Container container : currentlyAllocated) {
                    launchContainer(container);
                    allocatedContainers.add(container.getId());
                }

                if (currentlyAllocated.size() > 0) {
                    int needed = appConfig.containersToAllocate() - allocatedContainers.size();
                    if (needed > 0) {
                        log.info(String.format("%s containers allocated, %s remaining", allocatedContainers.size(),
                                needed));
                    }
                    else {
                        log.info(String.format("Fully allocated %s containers", allocatedContainers.size()));
                    }
                }

                List completed = alloc.getCompletedContainersStatuses();
                for (ContainerStatus status : completed) {
                    if (!completedContainers.contains(status.getContainerId())) {
                        ContainerId containerId = status.getContainerId();
                        completedContainers.add(containerId);

                        boolean containerSuccesful = false;

                        switch (status.getExitStatus()) {
                        case ContainerExitStatus.SUCCESS:
                            log.info(String.format("Container %s finished succesfully...", containerId));
                            containerSuccesful = true;
                            break;
                        case ContainerExitStatus.ABORTED:
                            log.warn(String.format("Container %s aborted...", containerId));
                            break;
                        case ContainerExitStatus.DISKS_FAILED:
                            log.warn(String.format("Container %s ran out of disk...", containerId));
                            break;
                        case ContainerExitStatus.PREEMPTED:
                            log.warn(String.format("Container %s preempted...", containerId));
                            break;
                        default:
                            log.warn(String.format("Container %s exited with an invalid/unknown exit code...", containerId));
                        }

                        if (!containerSuccesful) {
                            log.warn("Cluster has not completed succesfully...");
                            clusterHasFailed = true;
                            running = false;
                        }
                    }
                }

                if (completedContainers.size() == appConfig.containersToAllocate()) {
                    running = false;
                }

                if (running) {
                    try {
                        Thread.sleep(heartBeatRate);
                    } catch (Exception ex) {
                        throw new EsYarnNmException("Cluster interrupted");
                    }
                }
            } while (running);
        } finally {
            log.info("Cluster has completed running...");
            try {
                Thread.sleep(TimeUnit.SECONDS.toMillis(15));
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
            close();
        }
    }

    private void attemptKeytabLogin() {
        if (UserGroupInformation.isSecurityEnabled()) {
            try {
                String localhost = InetAddress.getLocalHost().getCanonicalHostName();
                String keytabFilename = appConfig.kerberosKeytab();
                if (keytabFilename == null || keytabFilename.length() == 0) {
                    throw new EsYarnAmException("Security is enabled, but we could not find a configured keytab; Bailing out...");
                }
                String configuredPrincipal = appConfig.kerberosPrincipal();
                String principal = SecurityUtil.getServerPrincipal(configuredPrincipal, localhost);
                UserGroupInformation.loginUserFromKeytab(principal, keytabFilename);
            } catch (UnknownHostException e) {
                throw new EsYarnAmException("Could not read localhost information for server principal construction; Bailing out...", e);
            } catch (IOException e) {
                throw new EsYarnAmException("Could not log in.", e);
            }
        }
    }

    private void launchContainer(Container container) {
        ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

        ctx.setEnvironment(setupEnv(appConfig));
        ctx.setLocalResources(setupEsZipResource(appConfig));
        ctx.setCommands(setupEsScript(appConfig));

        log.info("About to launch container for command: " + ctx.getCommands());

        // setup container
        Map startContainer = nmRpc.startContainer(container, ctx);
        log.info("Started container " + container);
    }

    private Map setupEnv(Config appConfig) {
        // standard Hadoop env setup
        Map env = YarnUtils.setupEnv(cfg);
        // copy esYarn Config
        //env.put(EsYarnConstants.CFG_PROPS, masterEnv.get(EsYarnConstants.CFG_PROPS));
        // plus expand its vars into the env
        YarnUtils.addToEnv(env, appConfig.envVars());

        // add system properties (to ES_JAVA_OPTS for ES to pick them up)
        Map sysProps = appConfig.systemProps();
        if (!sysProps.isEmpty()) {
            StringBuilder sb = new StringBuilder();
            for (Map.Entry prop : appConfig.systemProps().entrySet()) {
                sb.append(String.format(Locale.ROOT, " -D%s=%s", prop.getKey(), prop.getValue()));
            }
            YarnUtils.addToEnv(env, "ES_JAVA_OPTS", sb.toString());
        }

        return env;
    }


    private Map setupEsZipResource(Config conf) {
        // elasticsearch.zip
        Map resources = new LinkedHashMap();

        LocalResource esZip = Records.newRecord(LocalResource.class);
        String esZipHdfsPath = conf.esZipHdfsPath();
        Path p = new Path(esZipHdfsPath);
        FileStatus fsStat;
        try {
            fsStat = FileSystem.get(cfg).getFileStatus(p);
        } catch (IOException ex) {
            throw new IllegalArgumentException(
                    String.format("Cannot find Elasticsearch zip at [%s]; make sure the artifacts have been properly provisioned and the correct permissions are in place.", esZipHdfsPath), ex);
        }
        // use the normalized path as otherwise YARN chokes down the line
        esZip.setResource(ConverterUtils.getYarnUrlFromPath(fsStat.getPath()));
        esZip.setSize(fsStat.getLen());
        esZip.setTimestamp(fsStat.getModificationTime());
        esZip.setType(LocalResourceType.ARCHIVE);
        esZip.setVisibility(LocalResourceVisibility.PUBLIC);

        resources.put(conf.esZipName(), esZip);
        return resources;
    }

    private List setupEsScript(Config conf) {
        List cmds = new ArrayList();
        // don't use -jar since it overrides the classpath
        cmds.add(YarnCompat.$$(ApplicationConstants.Environment.SHELL));
        // make sure to include the ES.ZIP archive name used in the local resource setup above (since it's the folder where it got unpacked)
        cmds.add(conf.esZipName() + "/" + conf.esScript());
        cmds.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/" + ApplicationConstants.STDOUT);
        cmds.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/" + ApplicationConstants.STDERR);
        return Collections.singletonList(StringUtils.concatenate(cmds, " "));
    }

    public boolean hasFailed() {
        return clusterHasFailed;
    }

    @Override
    public void close() {
        running = false;
        nmRpc.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy