com.spotify.helios.cli.command.RollingUpdateCommand Maven / Gradle / Ivy
/*-
* -\-\-
* Helios Tools
* --
* Copyright (C) 2016 Spotify AB
* --
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -/-/-
*/
package com.spotify.helios.cli.command;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.String.format;
import static net.sourceforge.argparse4j.impl.Arguments.storeTrue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Supplier;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.spotify.helios.client.HeliosClient;
import com.spotify.helios.common.Json;
import com.spotify.helios.common.descriptors.Job;
import com.spotify.helios.common.descriptors.JobId;
import com.spotify.helios.common.descriptors.RolloutOptions;
import com.spotify.helios.common.descriptors.TaskStatus;
import com.spotify.helios.common.protocol.DeploymentGroupStatusResponse;
import com.spotify.helios.common.protocol.RollingUpdateResponse;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import net.sourceforge.argparse4j.inf.Argument;
import net.sourceforge.argparse4j.inf.Namespace;
import net.sourceforge.argparse4j.inf.Subparser;
public class RollingUpdateCommand extends WildcardJobCommand {
private static final long POLL_INTERVAL_MILLIS = 1000;
private final SleepFunction sleepFunction;
private final Supplier timeSupplier;
private final Argument nameArg;
private final Argument timeoutArg;
private final Argument parallelismArg;
private final Argument asyncArg;
private final Argument rolloutTimeoutArg;
private final Argument migrateArg;
private final Argument overlapArg;
private final Argument tokenArg;
private final Argument ignoreFailuresArg;
public RollingUpdateCommand(final Subparser parser) {
this(parser, new SleepFunction() {
@Override
public void sleep(final long millis) throws InterruptedException {
Thread.sleep(millis);
}
}, new Supplier() {
@Override
public Long get() {
return System.currentTimeMillis();
}
});
}
@VisibleForTesting
RollingUpdateCommand(final Subparser parser, final SleepFunction sleepFunction,
final Supplier timeSupplier) {
super(parser, true);
this.sleepFunction = sleepFunction;
this.timeSupplier = timeSupplier;
parser.help("Initiate a rolling update");
nameArg = parser.addArgument("deployment-group-name")
.required(true)
.help("Deployment group name");
timeoutArg = parser.addArgument("-t", "--timeout")
.type(Long.class)
.help("Fail rollout if a job takes longer than this to reach RUNNING (seconds)");
parallelismArg = parser.addArgument("-p", "--par")
.dest("parallelism")
.type(Integer.class)
.help("Number of hosts to deploy to concurrently");
asyncArg = parser.addArgument("--async")
.action(storeTrue())
.help("Don't block until rolling-update is complete");
rolloutTimeoutArg = parser.addArgument("-T", "--rollout-timeout")
.setDefault(60L)
.type(Long.class)
.help("Exit if rolling-update takes longer than the given value (minutes). Note that "
+ "this will NOT abort the rolling update, it will just cause this command to exit.");
migrateArg = parser.addArgument("--migrate")
.action(storeTrue())
.setDefault((Object) null)
.help("When specified a rolling-update will undeploy not only jobs previously deployed "
+ "by the deployment-group but also jobs with the same job id. Use it ONCE when "
+ "migrating a service to using deployment-groups");
overlapArg = parser.addArgument("--overlap")
.action(storeTrue())
.setDefault((Object) null)
.help("When specified a rolling-update will, for every host, first deploy the new "
+ "version of a job before undeploying the old one. Note that the command will fail "
+ "if the job contains static port assignments.");
tokenArg = parser.addArgument("--token")
.nargs("?")
.help("Insecure access token meant to prevent accidental changes to your job "
+ "(e.g. undeploys).");
ignoreFailuresArg = parser.addArgument("--ignore-failures")
.action(storeTrue())
.setDefault((Object) null)
.help("When specified, the rolling-update will ignore *all* failures and will proceed "
+ "to deploying the job to all hosts in the deployment group. The rolling-update "
+ "will go through the normal rollout plan (respecting the --par and --overlap "
+ "settings), and will wait for the job to reach RUNNING on each host as normal; "
+ "however, any failure that would otherwise cause the rolling-update to abort and "
+ "set the deployment group's status to FAILED is *ignored*. Be *VERY* careful "
+ "about using this option, as it has the potential to completely take down your "
+ "service by rolling out a broken job to all of the hosts in your group.");
}
@Override
protected int runWithJob(final Namespace options, final HeliosClient client,
final PrintStream out, final boolean json, final Job job,
final BufferedReader stdin)
throws ExecutionException, InterruptedException, IOException {
final JobId jobId = job.getId();
final String name = options.getString(nameArg.getDest());
final Long timeout = options.getLong(timeoutArg.getDest());
final Integer parallelism = options.getInt(parallelismArg.getDest());
final boolean async = options.getBoolean(asyncArg.getDest());
final long rolloutTimeout = options.getLong(rolloutTimeoutArg.getDest());
final Boolean migrate = options.getBoolean(migrateArg.getDest());
final Boolean overlap = options.getBoolean(overlapArg.getDest());
final String token = options.getString(tokenArg.getDest());
final Boolean ignoreFailures = options.getBoolean(ignoreFailuresArg.getDest());
checkArgument(timeout == null || timeout > 0, "Timeout must be greater than 0");
checkArgument(parallelism == null || parallelism > 0, "Parallelism must be greater than 0");
checkArgument(rolloutTimeout > 0, "Rollout timeout must be greater than 0");
final long startTime = timeSupplier.get();
final RolloutOptions rolloutOptions = RolloutOptions.newBuilder()
.setTimeout(timeout)
.setParallelism(parallelism)
.setMigrate(migrate)
.setOverlap(overlap)
.setToken(token)
.setIgnoreFailures(ignoreFailures)
.build();
final RollingUpdateResponse response = client.rollingUpdate(name, jobId, rolloutOptions).get();
if (response.getStatus() != RollingUpdateResponse.Status.OK) {
if (!json) {
out.println("Failed: " + response);
} else {
out.println(response.toJsonString());
}
return 1;
}
final RolloutOptions optionsFromJob =
MoreObjects.firstNonNull(job.getRolloutOptions(), RolloutOptions.getDefault());
final Integer actualParallelism =
nullableWithFallback(parallelism, optionsFromJob.getParallelism());
final Long actualTimeout = nullableWithFallback(timeout, optionsFromJob.getTimeout());
final Boolean actualOverlap = nullableWithFallback(overlap, optionsFromJob.getOverlap());
final String actualToken = nullableWithFallback(token, optionsFromJob.getToken());
final Boolean actualIgnoreFailures =
nullableWithFallback(ignoreFailures, optionsFromJob.getIgnoreFailures());
if (!json) {
out.println(format("Rolling update%s started: %s -> %s "
+ "(parallelism=%d, timeout=%d, overlap=%b, token=%s, "
+ "ignoreFailures=%b)%s",
async ? " (async)" : "",
name,
jobId.toShortString(),
actualParallelism,
actualTimeout,
actualOverlap,
actualToken,
actualIgnoreFailures,
async ? "" : "\n"));
}
final Map jsonOutput = Maps.newHashMap();
jsonOutput.put("parallelism", actualParallelism);
jsonOutput.put("timeout", actualTimeout);
jsonOutput.put("overlap", actualOverlap);
jsonOutput.put("token", actualToken);
jsonOutput.put("ignoreFailures", actualIgnoreFailures);
if (async) {
if (json) {
jsonOutput.put("status", response.getStatus());
out.println(Json.asStringUnchecked(jsonOutput));
}
return 0;
}
String error = "";
boolean failed = false;
boolean timedOut = false;
final Set reported = Sets.newHashSet();
while (true) {
final DeploymentGroupStatusResponse status = client.deploymentGroupStatus(name).get();
if (status == null) {
failed = true;
error = "Failed to fetch deployment-group status";
break;
}
if (!jobId.equals(status.getDeploymentGroup().getJobId())) {
// Another rolling-update was started, overriding this one -- exit
failed = true;
error = "Deployment-group job id changed during rolling-update";
break;
}
if (!json) {
for (final DeploymentGroupStatusResponse.HostStatus hostStatus : status.getHostStatuses()) {
final JobId hostJobId = hostStatus.getJobId();
final String host = hostStatus.getHost();
final TaskStatus.State state = hostStatus.getState();
final boolean done = hostJobId != null
&& hostJobId.equals(jobId)
&& state == TaskStatus.State.RUNNING;
if (done && reported.add(host)) {
out.println(format("%s -> %s (%d/%d)", host, state,
reported.size(), status.getHostStatuses().size()));
}
}
}
if (status.getStatus() != DeploymentGroupStatusResponse.Status.ROLLING_OUT) {
if (status.getStatus() == DeploymentGroupStatusResponse.Status.FAILED) {
failed = true;
error = status.getError();
}
break;
}
if (timeSupplier.get() - startTime > TimeUnit.MINUTES.toMillis(rolloutTimeout)) {
// Rollout timed out
timedOut = true;
break;
}
sleepFunction.sleep(POLL_INTERVAL_MILLIS);
}
final double duration = (timeSupplier.get() - startTime) / 1000.0;
if (json) {
if (failed) {
jsonOutput.put("status", "FAILED");
jsonOutput.put("error", error);
} else if (timedOut) {
jsonOutput.put("status", "TIMEOUT");
} else {
jsonOutput.put("status", "DONE");
}
jsonOutput.put("duration", duration);
out.println(Json.asStringUnchecked(jsonOutput));
} else {
out.println();
if (failed) {
out.println(format("Failed: %s", error));
} else if (timedOut) {
out.println("Timed out! (rolling-update still in progress)");
} else {
out.println("Done.");
}
out.println(format("Duration: %.2f s", duration));
}
return (failed || timedOut) ? 1 : 0;
}
interface SleepFunction {
void sleep(long millis) throws InterruptedException;
}
/**
* Return first argument if not null. Otherwise return second argument.
*/
private T nullableWithFallback(final T first, final T second) {
return first != null ? first : second;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy