All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.helios.master.MasterService Maven / Gradle / Ivy

There is a newer version: 0.9.9
Show newest version
/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.master;

import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Resources;
import com.google.common.util.concurrent.AbstractIdleService;

import com.codahale.metrics.MetricRegistry;
import com.spotify.helios.common.HeliosRuntimeException;
import com.spotify.helios.master.http.VersionResponseFilter;
import com.spotify.helios.master.metrics.HealthCheckGauge;
import com.spotify.helios.master.metrics.ReportingResourceMethodDispatchAdapter;
import com.spotify.helios.master.resources.DeploymentGroupResource;
import com.spotify.helios.master.resources.HistoryResource;
import com.spotify.helios.master.resources.HostsResource;
import com.spotify.helios.master.resources.JobsResource;
import com.spotify.helios.master.resources.MastersResource;
import com.spotify.helios.master.resources.VersionResource;
import com.spotify.helios.rollingupdate.RollingUpdateService;
import com.spotify.helios.serviceregistration.ServiceRegistrar;
import com.spotify.helios.serviceregistration.ServiceRegistration;
import com.spotify.helios.servicescommon.FastForwardConfig;
import com.spotify.helios.servicescommon.KafkaClientProvider;
import com.spotify.helios.servicescommon.KafkaSender;
import com.spotify.helios.servicescommon.ManagedStatsdReporter;
import com.spotify.helios.servicescommon.ReactorFactory;
import com.spotify.helios.servicescommon.RiemannFacade;
import com.spotify.helios.servicescommon.RiemannHeartBeat;
import com.spotify.helios.servicescommon.RiemannSupport;
import com.spotify.helios.servicescommon.ServiceUtil;
import com.spotify.helios.servicescommon.ZooKeeperRegistrarService;
import com.spotify.helios.servicescommon.coordination.CuratorClientFactory;
import com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient;
import com.spotify.helios.servicescommon.coordination.Paths;
import com.spotify.helios.servicescommon.coordination.ZooKeeperClient;
import com.spotify.helios.servicescommon.coordination.ZooKeeperClientProvider;
import com.spotify.helios.servicescommon.coordination.ZooKeeperHealthChecker;
import com.spotify.helios.servicescommon.coordination.ZooKeeperModelReporter;
import com.spotify.helios.servicescommon.statistics.FastForwardReporter;
import com.spotify.helios.servicescommon.statistics.Metrics;
import com.spotify.helios.servicescommon.statistics.MetricsImpl;
import com.spotify.helios.servicescommon.statistics.NoopMetrics;

import org.apache.curator.RetryPolicy;
import org.apache.curator.framework.AuthInfo;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.ACLProvider;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.zookeeper.data.ACL;
import org.eclipse.jetty.server.Handler;
import org.eclipse.jetty.server.Server;
import org.eclipse.jetty.server.handler.HandlerCollection;
import org.eclipse.jetty.server.handler.RequestLogHandler;
import org.eclipse.jetty.servlets.CrossOriginFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;

import javax.servlet.DispatcherType;
import javax.servlet.FilterRegistration;

import ch.qos.logback.access.jetty.RequestLogImpl;
import io.dropwizard.configuration.ConfigurationException;
import io.dropwizard.jetty.GzipFilterFactory;
import io.dropwizard.jetty.RequestLogFactory;
import io.dropwizard.logging.AppenderFactory;
import io.dropwizard.server.DefaultServerFactory;
import io.dropwizard.setup.Environment;

import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.spotify.helios.servicescommon.ServiceRegistrars.createServiceRegistrar;
import static com.spotify.helios.servicescommon.ZooKeeperAclProviders.digest;
import static com.spotify.helios.servicescommon.ZooKeeperAclProviders.heliosAclProvider;

/**
 * The Helios master service.
 */
public class MasterService extends AbstractIdleService {

  private static final Logger log = LoggerFactory.getLogger(MasterService.class);

  private static final String LOGBACK_ACCESS_CONFIG = "logback-access.xml";
  private static final String LOGBACK_ACCESS_RESOURCE = "/" + LOGBACK_ACCESS_CONFIG;

  private final Server server;
  private final MasterConfig config;
  private final ServiceRegistrar registrar;
  private final ZooKeeperClient zooKeeperClient;
  private final ExpiredJobReaper expiredJobReaper;
  private final CuratorClientFactory curatorClientFactory;
  private final RollingUpdateService rollingUpdateService;
  private final Map environmentVariables;
  private final Optional agentReaper;
  private final Optional oldJobReaper;

  private ZooKeeperRegistrarService zkRegistrar;

  /**
   * Create a new service instance. Initializes the control interface and the worker.
   *
   * @param config The service configuration.
   * @param environment The DropWizard environment.
   * @param curatorClientFactory The zookeeper curator factory.
   * @param environmentVariables Env vars
   * @throws ConfigurationException If there is a problem with the DropWizard configuration.
   * @throws IOException IOException
   * @throws InterruptedException InterruptedException
   */
  public MasterService(final MasterConfig config,
                       final Environment environment,
                       final CuratorClientFactory curatorClientFactory,
                       final Map environmentVariables)
      throws ConfigurationException, IOException, InterruptedException {
    this.config = config;
    this.curatorClientFactory = curatorClientFactory;
    this.environmentVariables = environmentVariables;

    // Configure metrics
    final MetricRegistry metricsRegistry = environment.metrics();
    final RiemannSupport riemannSupport = new RiemannSupport(metricsRegistry,
        config.getRiemannHostPort(), config.getName(), "helios-master");
    final RiemannFacade riemannFacade = riemannSupport.getFacade();
    log.info("Starting metrics");
    final Metrics metrics;
    if (config.isInhibitMetrics()) {
      metrics = new NoopMetrics();
    } else {
      metrics = new MetricsImpl(metricsRegistry, MetricsImpl.Type.MASTER);
      metrics.start();
      environment.lifecycle().manage(riemannSupport);
      if (!Strings.isNullOrEmpty(config.getStatsdHostPort())) {
        environment.lifecycle().manage(new ManagedStatsdReporter(config.getStatsdHostPort(),
                                                                 metricsRegistry));
      }

      final FastForwardConfig ffwdConfig = config.getFfwdConfig();
      if (ffwdConfig != null) {
        environment.lifecycle().manage(FastForwardReporter.create(
            metricsRegistry,
            ffwdConfig.getAddress(),
            ffwdConfig.getMetricKey(),
            ffwdConfig.getReportingIntervalSeconds())
        );
      }
    }

    // Set up the master model
    this.zooKeeperClient = setupZookeeperClient(config);
    final ZooKeeperModelReporter modelReporter = new ZooKeeperModelReporter(
        riemannFacade, metrics.getZooKeeperMetrics());
    final ZooKeeperClientProvider zkClientProvider = new ZooKeeperClientProvider(
        zooKeeperClient, modelReporter);
    final KafkaClientProvider kafkaClientProvider = new KafkaClientProvider(
        config.getKafkaBrokers());

    // Create state directory, if necessary
    final Path stateDirectory = config.getStateDirectory().toAbsolutePath().normalize();
    if (!Files.exists(stateDirectory)) {
      try {
        Files.createDirectories(stateDirectory);
      } catch (IOException e) {
        log.error("Failed to create state directory: {}", stateDirectory, e);
        throw Throwables.propagate(e);
      }
    }

    // Make a KafkaProducer for events that can be serialized to an array of bytes,
    // and wrap it in our KafkaSender.
    final KafkaSender kafkaSender = new KafkaSender(kafkaClientProvider.getDefaultProducer());

    final ZooKeeperMasterModel model =
        new ZooKeeperMasterModel(zkClientProvider, config.getName(), kafkaSender);

    final ZooKeeperHealthChecker zooKeeperHealthChecker = new ZooKeeperHealthChecker(
        zooKeeperClient, Paths.statusMasters(), riemannFacade, TimeUnit.MINUTES, 2);

    environment.lifecycle().manage(zooKeeperHealthChecker);
    environment.healthChecks().register("zookeeper", zooKeeperHealthChecker);

    // Report health checks as a gauge metric
    environment.healthChecks().getNames().forEach(
        name -> environment.metrics().register(
            "helios." + name + ".ok", new HealthCheckGauge(environment.healthChecks(), name)));

    environment.lifecycle().manage(new RiemannHeartBeat(TimeUnit.MINUTES, 2, riemannFacade));

    // Set up service registrar
    this.registrar = createServiceRegistrar(config.getServiceRegistrarPlugin(),
                                            config.getServiceRegistryAddress(),
                                            config.getDomain());

    // Set up reaping of expired jobs
    this.expiredJobReaper = ExpiredJobReaper.newBuilder()
        .setMasterModel(model)
        .build();

    // Set up rolling update service
    final ReactorFactory reactorFactory = new ReactorFactory();
    this.rollingUpdateService = new RollingUpdateService(model, reactorFactory);

    // Set up agent reaper (de-registering hosts that have been DOWN for more than X hours)
    if (config.getAgentReapingTimeout() > 0) {
      this.agentReaper = Optional.of(new DeadAgentReaper(model, config.getAgentReapingTimeout()));
    } else {
      log.info("Reaping of dead agents disabled");
      this.agentReaper = Optional.empty();
    }

    // Set up old job reaper (removes jobs not deployed anywhere and created more than X days ago)
    if (config.getJobRetention() > 0) {
      this.oldJobReaper = Optional.of(new OldJobReaper(model, config.getJobRetention()));
    } else {
      log.info("Reaping of old jobs disabled");
      this.oldJobReaper = Optional.empty();
    }

    // Set up http server
    environment.servlets()
        .addFilter("VersionResponseFilter", new VersionResponseFilter(metrics.getMasterMetrics()))
        .addMappingForUrlPatterns(EnumSet.of(DispatcherType.REQUEST), true, "/*");
    environment.jersey().register(
        new ReportingResourceMethodDispatchAdapter(metrics.getMasterMetrics()));
    environment.jersey().register(new JobsResource(
        model, metrics.getMasterMetrics(), config.getWhitelistedCapabilities()));
    environment.jersey().register(new HistoryResource(model, metrics.getMasterMetrics()));
    environment.jersey().register(new HostsResource(model));
    environment.jersey().register(new MastersResource(model));
    environment.jersey().register(new VersionResource());
    environment.jersey().register(new UserProvider());
    environment.jersey().register(new DeploymentGroupResource(model));

    final DefaultServerFactory serverFactory = ServiceUtil.createServerFactory(
        config.getHttpEndpoint(), config.getAdminEndpoint(), false);

    final RequestLogFactory requestLog = new RequestLogFactory();
    requestLog.setAppenders(ImmutableList.of());
    serverFactory.setRequestLogFactory(requestLog);

    // Enable CORS headers
    final FilterRegistration.Dynamic cors = environment.servlets()
        .addFilter("CORS", CrossOriginFilter.class);

    // Configure CORS parameters
    cors.setInitParameter("allowedOrigins", "*");
    cors.setInitParameter("allowedHeaders", "X-Requested-With,Content-Type,Accept,Origin");
    cors.setInitParameter("allowedMethods", "OPTIONS,GET,PUT,POST,DELETE,HEAD");

    // Add URL mapping
    cors.addMappingForUrlPatterns(EnumSet.allOf(DispatcherType.class), true, "/*");

    // Enable gzip compression for POST and GET requests. Default is GET only.
    final GzipFilterFactory gzip = new GzipFilterFactory();
    gzip.setIncludedMethods(ImmutableSet.of("GET", "POST"));
    serverFactory.setGzipFilterFactory(gzip);

    this.server = serverFactory.build(environment);

    setUpRequestLogging(stateDirectory);
  }

  private void setUpRequestLogging(final Path stateDirectory) {
    // Set up request logging
    final Handler originalHandler = server.getHandler();
    final HandlerCollection handlerCollection;
    if (originalHandler instanceof HandlerCollection) {
      handlerCollection = (HandlerCollection) originalHandler;
    } else {
      handlerCollection = new HandlerCollection();
      handlerCollection.addHandler(originalHandler);
    }

    final RequestLogHandler requestLogHandler = new RequestLogHandler();
    final RequestLogImpl requestLog = new RequestLogImpl();
    requestLog.setQuiet(true);

    if (stateDirectory.resolve(LOGBACK_ACCESS_CONFIG).toFile().exists()) {
      requestLog.setFileName(stateDirectory.resolve(LOGBACK_ACCESS_CONFIG).toString());
    } else if (this.getClass().getResource(LOGBACK_ACCESS_RESOURCE) != null) {
      requestLog.setResource(LOGBACK_ACCESS_RESOURCE);
    }

    requestLogHandler.setRequestLog(requestLog);
    handlerCollection.addHandler(requestLogHandler);
    server.setHandler(handlerCollection);
  }

  @Override
  protected void startUp() throws Exception {
    logBanner();
    if (!config.getNoZooKeeperMasterRegistration()) {
      zkRegistrar.startAsync().awaitRunning();
    }
    expiredJobReaper.startAsync().awaitRunning();
    rollingUpdateService.startAsync().awaitRunning();

    agentReaper.ifPresent(reaper -> reaper.startAsync().awaitRunning());
    oldJobReaper.ifPresent(reaper -> reaper.startAsync().awaitRunning());

    try {
      server.start();
    } catch (Exception e) {
      log.error("Unable to start server, shutting down", e);
      server.stop();
    }

    final ServiceRegistration serviceRegistration = ServiceRegistration.newBuilder()
        .endpoint("helios", "http", config.getHttpEndpoint().getPort(),
            config.getDomain(), config.getName())
        .build();
    registrar.register(serviceRegistration);
  }

  @Override
  protected void shutDown() throws Exception {
    server.stop();
    server.join();
    registrar.close();

    agentReaper.ifPresent(reaper -> reaper.stopAsync().awaitTerminated());
    oldJobReaper.ifPresent(reaper -> reaper.stopAsync().awaitTerminated());

    rollingUpdateService.stopAsync().awaitTerminated();
    expiredJobReaper.stopAsync().awaitTerminated();
    zkRegistrar.stopAsync().awaitTerminated();
    zooKeeperClient.close();
  }

  private void logBanner() {
    try {
      final String banner = Resources.toString(Resources.getResource("master-banner.txt"), UTF_8);
      log.info("\n{}", banner);
    } catch (IllegalArgumentException | IOException ignored) {
    }
  }

  /**
   * Create a Zookeeper client and create the control and state nodes if needed.
   *
   * @param config The service configuration.
   * @return A zookeeper client.
   */
  private ZooKeeperClient setupZookeeperClient(final MasterConfig config) {
    ACLProvider aclProvider = null;
    List authorization = null;

    final String masterUser = config.getZookeeperAclMasterUser();
    final String masterPassword = config.getZooKeeperAclMasterPassword();
    final String agentUser = config.getZookeeperAclAgentUser();
    final String agentDigest = config.getZooKeeperAclAgentDigest();

    if (!isNullOrEmpty(masterPassword)) {
      if (isNullOrEmpty(masterUser)) {
        throw new HeliosRuntimeException(
            "Master username must be set if a password is set");
      }

      authorization = Lists.newArrayList(new AuthInfo(
          "digest", String.format("%s:%s", masterUser, masterPassword).getBytes()));
    }

    if (config.isZooKeeperEnableAcls()) {
      if (isNullOrEmpty(masterUser) || isNullOrEmpty(masterPassword)) {
        throw new HeliosRuntimeException(
            "ZooKeeper ACLs enabled but master username and/or password not set");
      }

      if (isNullOrEmpty(agentUser) || isNullOrEmpty(agentDigest)) {
        throw new HeliosRuntimeException(
            "ZooKeeper ACLs enabled but agent username and/or digest not set");
      }

      aclProvider = heliosAclProvider(
          masterUser, digest(masterUser, masterPassword),
          agentUser, agentDigest);
    }

    final RetryPolicy zooKeeperRetryPolicy = new ExponentialBackoffRetry(1000, 3);
    final CuratorFramework curator = curatorClientFactory.newClient(
        config.getZooKeeperConnectionString(),
        config.getZooKeeperSessionTimeoutMillis(),
        config.getZooKeeperConnectionTimeoutMillis(),
        zooKeeperRetryPolicy,
        aclProvider,
        authorization);
    final ZooKeeperClient client =
        new DefaultZooKeeperClient(curator, config.getZooKeeperClusterId());
    client.start();
    zkRegistrar = ZooKeeperRegistrarService.newBuilder()
        .setZooKeeperClient(client)
        .setZooKeeperRegistrar(new MasterZooKeeperRegistrar(config.getName()))
        .build();

    // TODO: This is perhaps not the correct place to do this - but at present it's the only
    // place where we have access to the ACL provider.
    if (aclProvider != null) {
      // Set ACLs on the ZK root, if they aren't already set correctly.
      // This is handy since it avoids having to manually do this operation when setting up
      // a new ZK cluster.
      // Note that this is slightly racey -- if two masters start at the same time both might
      // attempt to update the ACLs but only one will succeed. That said, it's unlikely and the
      // effects are limited to a spurious log line.
      try {
        final List curAcls = client.getAcl("/");
        final List wantedAcls = aclProvider.getAclForPath("/");
        if (!Sets.newHashSet(curAcls).equals(Sets.newHashSet(wantedAcls))) {
          log.info(
              "Current ACL's on the zookeeper root node differ from desired, updating: {} -> {}",
              curAcls, wantedAcls);
          client.getCuratorFramework().setACL().withACL(wantedAcls).forPath("/");
        }
      } catch (Exception e) {
        log.error("Failed to get/set ACLs on the zookeeper root node", e);
      }
    }

    return client;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy