All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.helios.cli.command.RollingUpdateCommand Maven / Gradle / Ivy

There is a newer version: 0.9.283
Show newest version
/*-
 * -\-\-
 * Helios Tools
 * --
 * Copyright (C) 2016 Spotify AB
 * --
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -/-/-
 */

package com.spotify.helios.cli.command;

import static com.google.common.base.Preconditions.checkArgument;
import static com.spotify.helios.common.descriptors.Job.EMPTY_TOKEN;
import static java.lang.String.format;
import static net.sourceforge.argparse4j.impl.Arguments.storeTrue;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Supplier;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.spotify.helios.client.HeliosClient;
import com.spotify.helios.common.Json;
import com.spotify.helios.common.descriptors.JobId;
import com.spotify.helios.common.descriptors.RolloutOptions;
import com.spotify.helios.common.descriptors.TaskStatus;
import com.spotify.helios.common.protocol.DeploymentGroupStatusResponse;
import com.spotify.helios.common.protocol.RollingUpdateResponse;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import net.sourceforge.argparse4j.inf.Argument;
import net.sourceforge.argparse4j.inf.Namespace;
import net.sourceforge.argparse4j.inf.Subparser;

public class RollingUpdateCommand extends WildcardJobCommand {

  private static final long POLL_INTERVAL_MILLIS = 1000;

  private final SleepFunction sleepFunction;
  private final Supplier timeSupplier;

  private final Argument nameArg;
  private final Argument timeoutArg;
  private final Argument parallelismArg;
  private final Argument asyncArg;
  private final Argument rolloutTimeoutArg;
  private final Argument migrateArg;
  private final Argument overlapArg;
  private final Argument tokenArg;
  private final Argument ignoreFailuresArg;

  public RollingUpdateCommand(final Subparser parser) {
    this(parser, new SleepFunction() {
      @Override
      public void sleep(final long millis) throws InterruptedException {
        Thread.sleep(millis);
      }
    }, new Supplier() {
      @Override
      public Long get() {
        return System.currentTimeMillis();
      }
    });
  }

  @VisibleForTesting
  RollingUpdateCommand(final Subparser parser, final SleepFunction sleepFunction,
                       final Supplier timeSupplier) {
    super(parser, true);

    this.sleepFunction = sleepFunction;
    this.timeSupplier = timeSupplier;

    parser.help("Initiate a rolling update");

    nameArg = parser.addArgument("deployment-group-name")
        .required(true)
        .help("Deployment group name");

    timeoutArg = parser.addArgument("-t", "--timeout")
        .setDefault(RolloutOptions.DEFAULT_TIMEOUT)
        .type(Long.class)
        .help("Fail rollout if a job takes longer than this to reach RUNNING (seconds)");

    parallelismArg = parser.addArgument("-p", "--par")
        .dest("parallelism")
        .setDefault(RolloutOptions.DEFAULT_PARALLELISM)
        .type(Integer.class)
        .help("Number of hosts to deploy to concurrently");

    asyncArg = parser.addArgument("--async")
        .action(storeTrue())
        .help("Don't block until rolling-update is complete");

    rolloutTimeoutArg = parser.addArgument("-T", "--rollout-timeout")
        .setDefault(60L)
        .type(Long.class)
        .help("Exit if rolling-update takes longer than the given value (minutes). Note that "
              + "this will NOT abort the rolling update, it will just cause this command to exit.");

    migrateArg = parser.addArgument("--migrate")
        .setDefault(false)
        .action(storeTrue())
        .help("When specified a rolling-update will undeploy not only jobs previously deployed "
              + "by the deployment-group but also jobs with the same job id. Use it ONCE when "
              + "migrating a service to using deployment-groups");

    overlapArg = parser.addArgument("--overlap")
        .setDefault(false)
        .action(storeTrue())
        .help("When specified a rolling-update will, for every host, first deploy the new "
              + "version of a job before undeploying the old one. Note that the command will fail "
              + "if the job contains static port assignments.");

    tokenArg = parser.addArgument("--token")
        .nargs("?")
        .setDefault(EMPTY_TOKEN)
        .help("Insecure access token meant to prevent accidental changes to your job "
              + "(e.g. undeploys).");

    ignoreFailuresArg = parser.addArgument("--ignore-failures")
        .setDefault(false)
        .action(storeTrue())
        .help("When specified, the rolling-update will ignore *all* failures and will proceed "
              + "to deploying the job to all hosts in the deployment group. The rolling-update "
              + "will go through the normal rollout plan (respecting the --par and --overlap "
              + "settings), and will wait for the job to reach RUNNING on each host as normal; "
              + "however, any failure that would otherwise cause the rolling-update to abort and "
              + "set the deployment group's status to FAILED is *ignored*. Be *VERY* careful "
              + "about using this option, as it has the potential to completely take down your "
              + "service by rolling out a broken job to all of the hosts in your group.");
  }

  @Override
  protected int runWithJobId(final Namespace options, final HeliosClient client,
                             final PrintStream out, final boolean json, final JobId jobId,
                             final BufferedReader stdin)
      throws ExecutionException, InterruptedException, IOException {
    final String name = options.getString(nameArg.getDest());
    final long timeout = options.getLong(timeoutArg.getDest());
    final int parallelism = options.getInt(parallelismArg.getDest());
    final boolean async = options.getBoolean(asyncArg.getDest());
    final long rolloutTimeout = options.getLong(rolloutTimeoutArg.getDest());
    final boolean migrate = options.getBoolean(migrateArg.getDest());
    final boolean overlap = options.getBoolean(overlapArg.getDest());
    final String token = options.getString(tokenArg.getDest());
    final boolean ignoreFailures = options.getBoolean(ignoreFailuresArg.getDest());

    checkArgument(timeout > 0, "Timeout must be greater than 0");
    checkArgument(parallelism > 0, "Parallelism must be greater than 0");
    checkArgument(rolloutTimeout > 0, "Rollout timeout must be greater than 0");

    final long startTime = timeSupplier.get();

    final RolloutOptions rolloutOptions = RolloutOptions.newBuilder()
        .setTimeout(timeout)
        .setParallelism(parallelism)
        .setMigrate(migrate)
        .setOverlap(overlap)
        .setToken(token)
        .setIgnoreFailures(ignoreFailures)
        .build();
    final RollingUpdateResponse response = client.rollingUpdate(name, jobId, rolloutOptions).get();

    if (response.getStatus() != RollingUpdateResponse.Status.OK) {
      if (!json) {
        out.println("Failed: " + response);
      } else {
        out.println(response.toJsonString());
      }
      return 1;
    }

    if (!json) {
      out.println(format("Rolling update%s started: %s -> %s "
                         + "(parallelism=%d, timeout=%d, overlap=%b, token=%s, "
                         + "ignoreFailures=%b)%s",
          async ? " (async)" : "",
          name,
          jobId.toShortString(),
          parallelism,
          timeout,
          overlap,
          token,
          ignoreFailures,
          async ? "" : "\n"));
    }

    final Map jsonOutput = Maps.newHashMap();
    jsonOutput.put("parallelism", parallelism);
    jsonOutput.put("timeout", timeout);
    jsonOutput.put("overlap", overlap);
    jsonOutput.put("token", token);
    jsonOutput.put("ignoreFailures", ignoreFailures);

    if (async) {
      if (json) {
        jsonOutput.put("status", response.getStatus());
        out.println(Json.asStringUnchecked(jsonOutput));
      }
      return 0;
    }

    String error = "";
    boolean failed = false;
    boolean timedOut = false;
    final Set reported = Sets.newHashSet();
    while (true) {
      final DeploymentGroupStatusResponse status = client.deploymentGroupStatus(name).get();

      if (status == null) {
        failed = true;
        error = "Failed to fetch deployment-group status";
        break;
      }

      if (!jobId.equals(status.getDeploymentGroup().getJobId())) {
        // Another rolling-update was started, overriding this one -- exit
        failed = true;
        error = "Deployment-group job id changed during rolling-update";
        break;
      }

      if (!json) {
        for (final DeploymentGroupStatusResponse.HostStatus hostStatus : status.getHostStatuses()) {
          final JobId hostJobId = hostStatus.getJobId();
          final String host = hostStatus.getHost();
          final TaskStatus.State state = hostStatus.getState();
          final boolean done = hostJobId != null
                               && hostJobId.equals(jobId)
                               && state == TaskStatus.State.RUNNING;

          if (done && reported.add(host)) {
            out.println(format("%s -> %s (%d/%d)", host, state,
                reported.size(), status.getHostStatuses().size()));
          }
        }
      }

      if (status.getStatus() != DeploymentGroupStatusResponse.Status.ROLLING_OUT) {
        if (status.getStatus() == DeploymentGroupStatusResponse.Status.FAILED) {
          failed = true;
          error = status.getError();
        }
        break;
      }

      if (timeSupplier.get() - startTime > TimeUnit.MINUTES.toMillis(rolloutTimeout)) {
        // Rollout timed out
        timedOut = true;
        break;
      }

      sleepFunction.sleep(POLL_INTERVAL_MILLIS);
    }

    final double duration = (timeSupplier.get() - startTime) / 1000.0;

    if (json) {
      if (failed) {
        jsonOutput.put("status", "FAILED");
        jsonOutput.put("error", error);
      } else if (timedOut) {
        jsonOutput.put("status", "TIMEOUT");
      } else {
        jsonOutput.put("status", "DONE");
      }
      jsonOutput.put("duration", duration);
      out.println(Json.asStringUnchecked(jsonOutput));
    } else {
      out.println();
      if (failed) {
        out.println(format("Failed: %s", error));
      } else if (timedOut) {
        out.println("Timed out! (rolling-update still in progress)");
      } else {
        out.println("Done.");
      }
      out.println(format("Duration: %.2f s", duration));
    }

    return (failed || timedOut) ? 1 : 0;
  }

  interface SleepFunction {
    void sleep(long millis) throws InterruptedException;
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy