Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.hyperfoil.clustering.ControllerVerticle Maven / Gradle / Ivy
package io.hyperfoil.clustering;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.ServiceLoader;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.infinispan.commons.api.BasicCacheContainer;
import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.hyperfoil.api.BenchmarkExecutionException;
import io.hyperfoil.api.config.Agent;
import io.hyperfoil.api.config.Benchmark;
import io.hyperfoil.api.config.BenchmarkSource;
import io.hyperfoil.api.config.Model;
import io.hyperfoil.api.config.Phase;
import io.hyperfoil.api.config.RunHook;
import io.hyperfoil.api.config.SessionLimitPolicy;
import io.hyperfoil.api.deployment.DeployedAgent;
import io.hyperfoil.api.deployment.Deployer;
import io.hyperfoil.api.session.GlobalData;
import io.hyperfoil.api.session.PhaseInstance;
import io.hyperfoil.clustering.messages.AgentControlMessage;
import io.hyperfoil.clustering.messages.AgentHello;
import io.hyperfoil.clustering.messages.AgentReadyMessage;
import io.hyperfoil.clustering.messages.AgentStatusMessage;
import io.hyperfoil.clustering.messages.AuxiliaryHello;
import io.hyperfoil.clustering.messages.ConnectionStatsMessage;
import io.hyperfoil.clustering.messages.DelayStatsCompletionMessage;
import io.hyperfoil.clustering.messages.ErrorMessage;
import io.hyperfoil.clustering.messages.PhaseChangeMessage;
import io.hyperfoil.clustering.messages.PhaseControlMessage;
import io.hyperfoil.clustering.messages.PhaseStatsCompleteMessage;
import io.hyperfoil.clustering.messages.RequestStatsMessage;
import io.hyperfoil.clustering.messages.SessionStatsMessage;
import io.hyperfoil.clustering.messages.StatsMessage;
import io.hyperfoil.clustering.util.PersistenceUtil;
import io.hyperfoil.controller.CsvWriter;
import io.hyperfoil.controller.JsonLoader;
import io.hyperfoil.controller.JsonWriter;
import io.hyperfoil.controller.StatisticsStore;
import io.hyperfoil.core.hooks.ExecRunHook;
import io.hyperfoil.core.parser.BenchmarkParser;
import io.hyperfoil.core.parser.ParserException;
import io.hyperfoil.core.util.CountDown;
import io.hyperfoil.core.util.LowHigh;
import io.hyperfoil.internal.Controller;
import io.hyperfoil.internal.Properties;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.AsyncResult;
import io.vertx.core.DeploymentOptions;
import io.vertx.core.Future;
import io.vertx.core.Handler;
import io.vertx.core.Promise;
import io.vertx.core.eventbus.EventBus;
import io.vertx.core.eventbus.Message;
import io.vertx.core.eventbus.ReplyException;
import io.vertx.core.impl.VertxInternal;
import io.vertx.core.json.JsonArray;
import io.vertx.core.json.JsonObject;
import io.vertx.core.spi.cluster.ClusterManager;
import io.vertx.core.spi.cluster.NodeListener;
import io.vertx.ext.cluster.infinispan.InfinispanClusterManager;
public class ControllerVerticle extends AbstractVerticle implements NodeListener {
private static final Logger log = LogManager.getLogger(ControllerVerticle.class);
private static final int MAX_IN_MEMORY_RUNS = Properties.getInt(Properties.MAX_IN_MEMORY_RUNS, 20);
static final String DEFAULT_STATS_JSON = "all.json";
private EventBus eb;
private ControllerServer server;
private Deployer deployer;
private final AtomicInteger runIds = new AtomicInteger();
private final Map benchmarks = new HashMap<>();
private final Map templates = new HashMap<>();
private long timerId = -1;
Map runs = new HashMap<>();
@Override
public void start(Promise future) {
log.info("Starting in directory {}...", Controller.ROOT_DIR);
CountDown startCountDown = new CountDown(future, 2);
server = new ControllerServer(this, startCountDown);
vertx.exceptionHandler(throwable -> log.error("Uncaught error: ", throwable));
if (Files.exists(Controller.RUN_DIR)) {
try {
Files.list(Controller.RUN_DIR).forEach(this::updateRuns);
} catch (IOException e) {
log.error("Could not list run dir contents", e);
} catch (Exception e) {
log.error("Cannot load previous runs from {}", Controller.RUN_DIR, e);
}
}
//noinspection ResultOfMethodCallIgnored
Controller.HOOKS_DIR.resolve("pre").toFile().mkdirs();
//noinspection ResultOfMethodCallIgnored
Controller.HOOKS_DIR.resolve("post").toFile().mkdirs();
eb = vertx.eventBus();
eb.consumer(Feeds.DISCOVERY, message -> {
if (message.body() instanceof AgentHello) {
handleAgentHello(message, (AgentHello) message.body());
} else if (message.body() instanceof AuxiliaryHello) {
AuxiliaryHello hello = (AuxiliaryHello) message.body();
log.info("Noticed auxiliary {} (node {}, {})", hello.name(), hello.nodeId(), hello.deploymentId());
String nodeId = ((VertxInternal) vertx).getClusterManager().getNodeId();
message.reply(nodeId);
} else {
log.error("Unknown message on discovery feed! {}", message.body());
}
});
eb.consumer(Feeds.RESPONSE, message -> {
AgentStatusMessage msg = (AgentStatusMessage) message.body();
Run run = runs.get(msg.runId());
if (run == null) {
log.error("No run {}", msg.runId());
return;
}
AgentInfo agent = run.agents.stream().filter(a -> a.deploymentId.equals(msg.senderId())).findAny().orElse(null);
if (agent == null) {
log.error("No agent {} in run {}", msg.senderId(), run.id);
return;
}
if (msg instanceof PhaseChangeMessage) {
handlePhaseChange(run, agent, (PhaseChangeMessage) msg);
} else if (msg instanceof ErrorMessage) {
ErrorMessage errorMessage = (ErrorMessage) msg;
run.errors.add(new Run.Error(agent, errorMessage.error()));
if (errorMessage.isFatal()) {
agent.status = AgentInfo.Status.FAILED;
stopSimulation(run);
}
} else if (msg instanceof AgentReadyMessage) {
if (!run.validation) {
agent.status = AgentInfo.Status.READY;
if (run.agents.stream().allMatch(a -> a.status == AgentInfo.Status.READY)) {
startSimulation(run);
}
} else { //stop simulation from running if a validation run
agent.status = AgentInfo.Status.STOPPED;
stopSimulation(run);
}
} else {
log.error("Unexpected type of message: {}", msg);
}
});
eb.consumer(Feeds.STATS, message -> {
if (!(message.body() instanceof StatsMessage)) {
log.error("Unknown message type: {}", message.body());
return;
}
StatsMessage statsMessage = (StatsMessage) message.body();
Run run = runs.get(statsMessage.runId);
if (run != null) {
String agentName = run.agents.stream()
.filter(ai -> ai.deploymentId.equals(statsMessage.address))
.map(ai -> ai.name).findFirst().orElse("");
if (statsMessage instanceof RequestStatsMessage) {
RequestStatsMessage rsm = (RequestStatsMessage) statsMessage;
String phase = run.phase(rsm.phaseId);
if (rsm.statistics != null) {
log.debug("Run {}: Received stats from {}({}): {}/{}/{}:{} ({} requests)",
rsm.runId, agentName, rsm.address, phase, rsm.stepId, rsm.metric,
rsm.statistics.sequenceId, rsm.statistics.requestCount);
boolean added = run.statisticsStore().record(agentName, rsm.phaseId, rsm.stepId, rsm.metric, rsm.statistics);
if (!added) {
// warning already logged
String errorMessage = String.format(
"Received statistics for %s/%d/%s:%d with %d requests but the statistics are already completed; these statistics won't be reported.",
phase, rsm.stepId, rsm.metric, rsm.statistics.sequenceId, rsm.statistics.requestCount);
run.errors.add(new Run.Error(null, new BenchmarkExecutionException(errorMessage)));
}
}
} else if (statsMessage instanceof PhaseStatsCompleteMessage) {
PhaseStatsCompleteMessage pscm = (PhaseStatsCompleteMessage) statsMessage;
log.debug("Run {}: Received stats completion for phase {} from {}", run.id, pscm.phase, pscm.address);
AgentInfo agent = run.agents.stream().filter(a -> a.deploymentId.equals(pscm.address)).findFirst().orElse(null);
if (agent == null) {
log.error("Run {}: Cannot find agent {}", run.id, pscm.address);
} else {
PhaseInstance.Status prevStatus = agent.phases.put(pscm.phase, PhaseInstance.Status.STATS_COMPLETE);
if (prevStatus == PhaseInstance.Status.STATS_COMPLETE) {
// TODO: the stats might be completed both regularly and when the agent receives STOP
log.info("Run {}: stats for phase {} are already completed, ignoring.", run.id, pscm.phase);
} else if (run.agents.stream().map(a -> a.phases.get(pscm.phase))
.allMatch(s -> s == PhaseInstance.Status.STATS_COMPLETE)) {
ControllerPhase controllerPhase = run.phases.get(pscm.phase);
if (controllerPhase != null) {
tryCompletePhase(run, pscm.phase, controllerPhase);
} else if (!run.validation) {
// if run.validation is true, then startSimulation is not executed and the phases are not
// added in the list, therefore it is expected that the phase is not found
// log the error if and only if the phase is not found, and we are not running just validation
log.error("Run {}: Cannot find phase {}!", run.id, pscm.phase);
}
}
}
} else if (statsMessage instanceof SessionStatsMessage) {
SessionStatsMessage sessionStatsMessage = (SessionStatsMessage) statsMessage;
log.trace("Run {}: Received session pool stats from {}", sessionStatsMessage.runId, sessionStatsMessage.address);
for (Map.Entry entry : sessionStatsMessage.sessionStats.entrySet()) {
run.statisticsStore().recordSessionStats(agentName, sessionStatsMessage.timestamp,
entry.getKey(), entry.getValue().low, entry.getValue().high);
}
} else if (statsMessage instanceof ConnectionStatsMessage) {
ConnectionStatsMessage connectionStatsMessage = (ConnectionStatsMessage) statsMessage;
log.trace("Run {}: Received connection stats from {}", connectionStatsMessage.runId,
connectionStatsMessage.address);
run.statisticsStore().recordConnectionStats(agentName, connectionStatsMessage.timestamp,
connectionStatsMessage.stats);
} else if (statsMessage instanceof DelayStatsCompletionMessage) {
DelayStatsCompletionMessage delayStatsCompletionMessage = (DelayStatsCompletionMessage) statsMessage;
String phase = run.phase(delayStatsCompletionMessage.phaseId);
log.trace("Run {}: Received request for extension from {} for phase {} by {} ms",
delayStatsCompletionMessage.runId, delayStatsCompletionMessage.address,
phase, delayStatsCompletionMessage.delay);
ControllerPhase controllerPhase = run.phases.get(phase);
controllerPhase.delayStatsCompletionUntil(System.currentTimeMillis() + delayStatsCompletionMessage.delay);
}
} else {
log.error("Unknown run {}", statsMessage.runId);
}
message.reply("OK");
});
if (vertx.isClustered()) {
for (Deployer.Factory deployerFactory : ServiceLoader.load(Deployer.Factory.class)) {
log.debug("Found deployer {}", deployerFactory.name());
if (Controller.DEPLOYER.equals(deployerFactory.name())) {
deployer = deployerFactory.create();
break;
}
}
if (deployer == null) {
throw new IllegalStateException(
"Hyperfoil is running in clustered mode but it couldn't load deployer '" + Controller.DEPLOYER + "'");
}
if (vertx instanceof VertxInternal) {
ClusterManager clusterManager = ((VertxInternal) vertx).getClusterManager();
clusterManager.nodeListener(this);
}
}
if (!Controller.BENCHMARK_DIR.toFile().exists() && !Controller.BENCHMARK_DIR.toFile().mkdirs()) {
log.error("Failed to create benchmark directory: {}", Controller.BENCHMARK_DIR);
}
startCountDown.increment();
loadBenchmarks(startCountDown);
startCountDown.countDown();
}
private void tryCompletePhase(Run run, String phase, ControllerPhase controllerPhase) {
long delay = controllerPhase.delayStatsCompletionUntil() == null ? -1
: controllerPhase.delayStatsCompletionUntil() - System.currentTimeMillis();
if (delay <= 0) {
log.info("Run {}: completing stats for phase {}", run.id, phase);
run.statisticsStore().completePhase(phase);
if (!run.statisticsStore().validateSlas()) {
log.info("SLA validation failed for {}", phase);
controllerPhase.setFailed();
if (run.benchmark.failurePolicy() == Benchmark.FailurePolicy.CANCEL) {
failNotStartedPhases(run, controllerPhase);
}
}
} else {
log.info("Run {}: all agents completed stats for phase {} but delaying for {} ms", run.id, phase, delay);
vertx.setTimer(delay, ignored -> tryCompletePhase(run, phase, controllerPhase));
}
}
private void handleAgentHello(Message message, AgentHello hello) {
String runId = hello.runId();
Run run = runs.get(runId);
if (run == null) {
log.error("Unknown run ID {}", runId);
message.fail(1, "Unknown run ID");
return;
}
AgentInfo agentInfo = run.agents.stream().filter(a -> a.name.equals(hello.name())).findAny().orElse(null);
if (agentInfo == null) {
log.error("Unknown agent {} ({}/{})", hello.name(), hello.nodeId(), hello.deploymentId());
message.fail(1, "Unknown agent");
return;
}
if (agentInfo.status != AgentInfo.Status.STARTING) {
log.info("Ignoring message, {} is not starting", agentInfo.name);
message.reply("Ignoring");
return;
}
log.debug("Registering agent {} ({}/{})", hello.name(), hello.nodeId(), hello.deploymentId());
agentInfo.nodeId = hello.nodeId();
agentInfo.deploymentId = hello.deploymentId();
agentInfo.status = AgentInfo.Status.REGISTERED;
message.reply("Registered");
if (run.agents.stream().allMatch(a -> a.status != AgentInfo.Status.STARTING)) {
handleAgentsStarted(run);
} else {
log.debug("Waiting for registration from agents {}",
run.agents.stream().filter(a -> a.status == AgentInfo.Status.STARTING).collect(Collectors.toList()));
}
}
private void handlePhaseChange(Run run, AgentInfo agent, PhaseChangeMessage phaseChange) {
String phase = phaseChange.phase();
log.debug("{} Received phase change from {}: {} is {} (session limit exceeded={}, CPU usage={} errors={})", run.id,
phaseChange.senderId(), phase, phaseChange.status(), phaseChange.sessionLimitExceeded(),
phaseChange.cpuUsage(), phaseChange.getError());
agent.phases.put(phase, phaseChange.status());
ControllerPhase controllerPhase = run.phases.get(phase);
if (phaseChange.cpuUsage() != null) {
run.statisticsStore().recordCpuUsage(phaseChange.phase(), agent.name, phaseChange.cpuUsage());
}
if (phaseChange.sessionLimitExceeded()) {
Phase def = controllerPhase.definition();
SessionLimitPolicy sessionLimitPolicy = def.model instanceof Model.OpenModel
? ((Model.OpenModel) def.model).sessionLimitPolicy
: null;
if (sessionLimitPolicy == SessionLimitPolicy.CONTINUE) {
log.warn("{} Phase {} session limit exceeded, continuing due to policy {}", run.id, def.name, sessionLimitPolicy);
// We must not record this as a failure as StatisticsStore.validateSlas() would cancel the benchmark
} else {
run.statisticsStore().addFailure(def.name, null, controllerPhase.absoluteStartTime(), System.currentTimeMillis(),
"Exceeded session limit");
log.info("{} Failing phase due to exceeded session limit.", run.id);
controllerPhase.setFailed();
}
}
if (phaseChange.getError() != null) {
log.error("{} Failing phase {} as agent {} reports error: {}", run.id,
controllerPhase.definition().name, agent.name, phaseChange.getError().getMessage());
controllerPhase.setFailed();
run.errors.add(new Run.Error(agent, phaseChange.getError()));
}
controllerPhase.addGlobalData(phaseChange.globalData());
tryProgressStatus(run, phase);
runSimulation(run);
}
@Override
public void nodeAdded(String nodeID) {
}
@Override
public void nodeLeft(String nodeID) {
for (Run run : runs.values()) {
if (run.terminateTime.future().isComplete()) {
continue;
}
for (AgentInfo agent : run.agents) {
if (Objects.equals(agent.nodeId, nodeID)) {
agent.status = AgentInfo.Status.FAILED;
run.errors.add(new Run.Error(agent, new BenchmarkExecutionException("Agent unexpectedly left the cluster.")));
kill(run, result -> {
/* used version of checkstyle does not implement allowEmptyLambdas */
});
stopSimulation(run);
break;
}
}
}
}
private void updateRuns(Path runDir) {
File file = runDir.toFile();
if (!file.getName().matches("[0-9A-F][0-9A-F][0-9A-F][0-9A-F]")) {
return;
}
String runId = file.getName();
int id = Integer.parseInt(runId, 16);
if (id >= runIds.get()) {
runIds.set(id + 1);
}
Path infoFile = runDir.resolve("info.json");
JsonObject info = new JsonObject();
if (infoFile.toFile().exists() && infoFile.toFile().isFile()) {
try {
info = new JsonObject(Files.readString(infoFile));
} catch (Exception e) {
log.error("Cannot read info for run {}", runId);
return;
}
}
String name = info.getString("benchmark", "");
JsonObject paramsObject = info.getJsonObject("params");
Map templateParams = paramsObject == null ? Collections.emptyMap()
: paramsObject.getMap().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, entry -> String.valueOf(entry.getValue())));
Benchmark benchmark = Benchmark.empty(name, templateParams);
Run run = new Run(runId, runDir, benchmark);
run.statsSupplier = () -> loadStats(runDir.resolve(DEFAULT_STATS_JSON), benchmark);
run.completed = true;
run.startTime = info.getLong("startTime", 0L);
run.terminateTime.complete(info.getLong("terminateTime", 0L));
run.description = info.getString("description");
JsonArray errors = info.getJsonArray("errors");
if (errors != null) {
run.errors.addAll(errors.stream()
.map(JsonObject.class::cast)
.map(e -> new Run.Error(new AgentInfo(e.getString("agent"), -1), new Throwable(e.getString("msg"))))
.collect(Collectors.toList()));
}
run.cancelled = info.getBoolean("cancelled", Boolean.FALSE);
runs.put(runId, run);
}
private StatisticsStore loadStats(Path jsonPath, Benchmark benchmark) {
File statsJson = jsonPath.toFile();
if (!statsJson.exists() || !statsJson.isFile() || !statsJson.canRead()) {
log.error("Cannot load stats from {}", jsonPath);
return null;
}
log.info("Loading stats from {}", jsonPath);
StatisticsStore store = new StatisticsStore(benchmark, f -> {
});
try {
JsonLoader.read(Files.readString(jsonPath, StandardCharsets.UTF_8), store);
} catch (Exception e) {
log.error("Cannot load stats from {}", jsonPath, e);
return null;
}
return store;
}
@Override
public void stop(Promise stopFuture) throws Exception {
if (deployer != null) {
deployer.close();
}
server.stop(stopFuture);
}
private void tryProgressStatus(Run run, String phase) {
PhaseInstance.Status minStatus = PhaseInstance.Status.TERMINATED;
for (AgentInfo a : run.agents) {
PhaseInstance.Status status = a.phases.get(phase);
if (status == null) {
// The status is not defined on one of the nodes, so we can't progress it.
return;
} else if (status.ordinal() < minStatus.ordinal()) {
minStatus = status;
}
}
ControllerPhase controllerPhase = run.phases.get(phase);
if (controllerPhase == null) {
log.error("Cannot find phase {} in run {}", phase, run.id);
return;
}
switch (minStatus) {
case RUNNING:
controllerPhase.status(run.id, ControllerPhase.Status.RUNNING);
break;
case FINISHED:
controllerPhase.status(run.id, ControllerPhase.Status.FINISHED);
break;
case TERMINATED:
case STATS_COMPLETE: // not sure if this can happen
controllerPhase.status(run.id, ControllerPhase.Status.TERMINATED);
controllerPhase.absoluteCompletionTime(System.currentTimeMillis());
run.newGlobalData.putAll(run.phases.get(phase).completeGlobalData());
break;
}
if (controllerPhase.isFailed()) {
failNotStartedPhases(run, controllerPhase);
}
}
private void failNotStartedPhases(Run run, ControllerPhase controllerPhase) {
log.info("Phase {} failed, cancelling other phases...", controllerPhase.definition().name());
for (ControllerPhase p : run.phases.values()) {
if (p.status() == ControllerPhase.Status.NOT_STARTED) {
p.status(run.id, ControllerPhase.Status.CANCELLED);
}
}
}
Run createRun(Benchmark benchmark, String description, Boolean validate) {
ensureMaxInMemoryRuns();
String runId = String.format("%04X", runIds.getAndIncrement());
Path runDir = Controller.RUN_DIR.resolve(runId);
//noinspection ResultOfMethodCallIgnored
runDir.toFile().mkdirs();
Run run = new Run(runId, runDir, benchmark, validate);
run.initStore(new StatisticsStore(benchmark, failure -> log.warn("Failed verify SLA(s) for {}/{}: {}",
failure.phase(), failure.metric(), failure.message())));
run.description = description;
runs.put(run.id, run);
if (run.benchmark.source() != null) {
PersistenceUtil.store(run.benchmark.source(), run.dir);
}
return run;
}
private void ensureMaxInMemoryRuns() {
List loadedRuns = runs.values().stream().filter(Run::isLoaded)
.sorted(Comparator.comparing(r -> r.id)).collect(Collectors.toList());
if (loadedRuns.size() + 1 > MAX_IN_MEMORY_RUNS) {
loadedRuns.stream().limit(loadedRuns.size() + 1 - MAX_IN_MEMORY_RUNS).forEach(r -> {
log.info("Unloading run {}", r.id);
r.unload();
r.statsSupplier = () -> {
Path statsPath = Controller.RUN_DIR.resolve(r.id).resolve(DEFAULT_STATS_JSON);
return loadStats(statsPath, r.benchmark);
};
});
}
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
String startBenchmark(Run run, Boolean validation) {
Set activeAgents = new HashSet<>();
for (Run r : runs.values()) {
if (!r.terminateTime.future().isComplete()) {
for (AgentInfo agent : run.agents) {
activeAgents.add(agent.name);
}
}
}
for (Agent agent : run.benchmark.agents()) {
if (activeAgents.contains(agent.name)) {
long currentTime = System.currentTimeMillis();
run.startTime = currentTime;
run.terminateTime.complete(currentTime);
run.completed = true;
return "Agent " + agent + " is already used; try starting the benchmark later";
}
}
if (run.benchmark.agents().length == 0) {
if (vertx.isClustered()) {
long currentTime = System.currentTimeMillis();
run.startTime = currentTime;
run.terminateTime.complete(currentTime);
run.completed = true;
return "Server is started in clustered mode; benchmarks must define agents.";
} else {
run.agents.add(new AgentInfo("in-vm", 0));
JsonObject config = new JsonObject().put("runId", run.id).put("name", "in-vm");
vertx.deployVerticle(AgentVerticle.class, new DeploymentOptions().setConfig(config));
}
} else {
if (!vertx.isClustered()) {
return "Server is not started as clustered and does not accept benchmarks with agents defined.";
}
log.info("Starting agents for run {}", run.id);
int agentCounter = 0;
for (Agent agent : run.benchmark.agents()) {
AgentInfo agentInfo = new AgentInfo(agent.name, agentCounter++);
run.agents.add(agentInfo);
log.debug("Starting agent {}", agent.name);
vertx.executeBlocking(
future -> agentInfo.deployedAgent = deployer.start(agent, run.id, run.benchmark, exception -> {
if (agentInfo.status.ordinal() < AgentInfo.Status.STOPPING.ordinal()) {
run.errors.add(
new Run.Error(agentInfo, new BenchmarkExecutionException("Failed to deploy agent", exception)));
log.error("Failed to deploy agent {}", agent.name, exception);
vertx.runOnContext(nil -> stopSimulation(run));
}
}), false, result -> {
if (result.failed()) {
run.errors.add(new Run.Error(agentInfo,
new BenchmarkExecutionException("Failed to start agent", result.cause())));
log.error("Failed to start agent {}", agent.name, result.cause());
vertx.runOnContext(nil -> stopSimulation(run));
}
});
}
}
run.deployTimerId = vertx.setTimer(Controller.DEPLOY_TIMEOUT, id -> {
log.error("{} Deployment timed out.", run.id);
run.errors.add(new Run.Error(null, new BenchmarkExecutionException("Deployment timed out.")));
stopSimulation(run);
});
return null;
}
private void handleAgentsStarted(Run run) {
vertx.cancelTimer(run.deployTimerId);
log.info("Starting benchmark {} - run {}", run.benchmark.name(), run.id);
for (AgentInfo agent : run.agents) {
if (agent.status != AgentInfo.Status.REGISTERED) {
log.error("{} Agent {}({}) already initializing, status is {}!", run.id, agent.name, agent.deploymentId,
agent.status);
} else {
eb.request(agent.deploymentId,
new AgentControlMessage(AgentControlMessage.Command.INITIALIZE, agent.id, run.benchmark), reply -> {
Throwable cause;
if (reply.failed()) {
cause = reply.cause();
log.error("{} Agent {}({}) failed to initialize", run.id, agent.name, agent.deploymentId);
log.error("Failure thrown on the controller (this node): ", cause);
} else {
Message message = reply.result();
if (message.body() instanceof ReplyException) {
String msg = ((ReplyException) message.body()).getMessage();
log.error("{} Agent {}({}) failed to initialize", run.id, agent.name, agent.deploymentId);
log.error("Failure thrown on the agent node (see agent log for details): {}", msg);
cause = new BenchmarkExecutionException(msg);
} else {
log.debug("{} Agent {}({}) was initialized.", run.id, agent.name, agent.deploymentId);
return;
}
}
agent.status = AgentInfo.Status.FAILED;
run.errors.add(new Run.Error(agent, cause));
stopSimulation(run);
});
}
}
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
private void startSimulation(Run run) {
vertx.executeBlocking(future -> {
// combine shared and benchmark-private hooks
List hooks = loadHooks("pre");
hooks.addAll(run.benchmark.preHooks());
Collections.sort(hooks);
for (RunHook hook : hooks) {
StringBuilder sb = new StringBuilder();
boolean success = hook.run(getRunProperties(run), sb::append);
run.hookResults.add(new Run.RunHookOutput(hook.name(), sb.toString()));
if (!success) {
run.errors.add(
new Run.Error(null, new BenchmarkExecutionException("Execution of run hook " + hook.name() + " failed.")));
future.fail("Execution of pre-hook " + hook.name() + " failed.");
break;
}
}
future.complete();
}, result -> {
if (result.succeeded()) {
vertx.runOnContext(nil -> {
assert run.startTime == Long.MIN_VALUE;
run.startTime = System.currentTimeMillis();
for (Phase phase : run.benchmark.phases()) {
run.phases.put(phase.name(), new ControllerPhase(phase));
}
runSimulation(run);
});
} else {
log.error("{} Failed to start the simulation", run.id, result.cause());
stopSimulation(run);
}
});
}
private void runSimulation(Run run) {
if (timerId >= 0) {
vertx.cancelTimer(timerId);
timerId = -1;
}
long now = System.currentTimeMillis();
for (ControllerPhase phase : run.phases.values()) {
if (phase.status() == ControllerPhase.Status.RUNNING
&& phase.absoluteStartTime() + phase.definition().duration() <= now) {
eb.publish(Feeds.CONTROL,
new PhaseControlMessage(PhaseControlMessage.Command.FINISH, phase.definition().name, null));
phase.status(run.id, ControllerPhase.Status.FINISHING);
}
if (phase.status() == ControllerPhase.Status.FINISHED) {
if (phase.definition().maxDuration() >= 0 && phase.absoluteStartTime() + phase.definition().maxDuration() <= now) {
eb.publish(Feeds.CONTROL,
new PhaseControlMessage(PhaseControlMessage.Command.TERMINATE, phase.definition().name, null));
phase.status(run.id, ControllerPhase.Status.TERMINATING);
} else if (phase.definition().terminateAfterStrict().stream().map(run.phases::get)
.allMatch(p -> p.status().isTerminated())) {
eb.publish(Feeds.CONTROL,
new PhaseControlMessage(PhaseControlMessage.Command.TRY_TERMINATE, phase.definition().name, null));
}
}
}
ControllerPhase[] availablePhases = run.getAvailablePhases();
for (ControllerPhase phase : availablePhases) {
Map newGlobalData = run.newGlobalData;
if (!newGlobalData.isEmpty()) {
run.newGlobalData = new HashMap<>();
}
eb.publish(Feeds.CONTROL,
new PhaseControlMessage(PhaseControlMessage.Command.RUN, phase.definition().name, newGlobalData));
phase.absoluteStartTime(now);
phase.status(run.id, ControllerPhase.Status.STARTING);
}
if (run.phases.values().stream().allMatch(phase -> phase.status().isTerminated())) {
log.info("{} All phases are terminated.", run.id);
stopSimulation(run);
return;
}
long nextTimestamp = run.nextTimestamp();
long delay = nextTimestamp - System.currentTimeMillis();
delay = Math.min(delay, 1000);
log.debug("Wait {} ms", delay);
if (delay > 0) {
if (timerId >= 0) {
vertx.cancelTimer(timerId);
}
timerId = vertx.setTimer(delay, timerId -> runSimulation(run));
} else {
vertx.runOnContext(nil -> runSimulation(run));
}
}
private void stopSimulation(Run run) {
if (run.terminateTime.future().isComplete()) {
log.warn("Run {} already completed.", run.id);
return;
}
run.terminateTime.complete(System.currentTimeMillis());
run.completed = true;
for (AgentInfo agent : run.agents) {
if (agent.deploymentId == null) {
assert agent.status == AgentInfo.Status.STARTING;
if (agent.deployedAgent != null) {
agent.deployedAgent.stop();
}
continue;
}
agent.status = AgentInfo.Status.STOPPING;
eb.request(agent.deploymentId, new AgentControlMessage(AgentControlMessage.Command.STOP, agent.id, null), reply -> {
if (reply.succeeded() && !(reply.result() instanceof Throwable)) {
agent.status = AgentInfo.Status.STOPPED;
checkAgentsStopped(run);
log.debug("Agent {}/{} stopped.", agent.name, agent.deploymentId);
} else {
agent.status = AgentInfo.Status.FAILED;
log.error("Agent {}/{} failed to stop", agent.name, agent.deploymentId);
if (reply.result() instanceof Throwable) {
log.error("Failure thrown on the agent node (see agent log for details): ", (Throwable) reply.result());
} else {
log.error("Failure thrown on the controller (this node): ", reply.cause());
}
}
if (agent.deployedAgent != null) {
// Give agents 3 seconds to leave the cluster
vertx.setTimer(3000, timerId -> agent.deployedAgent.stop());
}
});
}
checkAgentsStopped(run);
}
private void checkAgentsStopped(Run run) {
if (run.agents.stream().allMatch(a -> a.status.ordinal() >= AgentInfo.Status.STOPPED.ordinal())) {
for (var phase : run.phases.values()) {
run.statisticsStore().adjustPhaseTimestamps(phase.definition().name(), phase.absoluteStartTime(),
phase.absoluteCompletionTime());
}
run.statisticsStore().completeAll(error -> {
log.warn("Run {}: {}", run.id, error);
run.errors.add(new Run.Error(null, new BenchmarkExecutionException(error)));
});
persistRun(run);
log.info("Run {} completed", run.id);
}
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
private void persistRun(Run run) {
vertx.executeBlocking(future -> {
try {
CsvWriter.writeCsv(run.dir.resolve("stats"), run.statisticsStore());
} catch (IOException e) {
log.error("Failed to persist statistics", e);
future.fail(e);
}
JsonObject info = new JsonObject()
.put("id", run.id)
.put("benchmark", run.benchmark.name())
.put("params", new JsonObject(run.benchmark.params().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))))
.put("startTime", run.startTime)
.put("terminateTime", run.terminateTime.future().result())
.put("cancelled", run.cancelled)
.put("description", run.description)
.put("errors", new JsonArray(run.errors.stream()
.map(e -> {
JsonObject json = new JsonObject();
if (e.agent != null) {
json.put("agent", e.agent.name);
}
return json.put("msg", e.error.getMessage());
})
.collect(Collectors.toList())));
try {
Files.writeString(run.dir.resolve("info.json"), info.encodePrettily());
} catch (IOException e) {
log.error("Cannot write info file", e);
future.fail(e);
}
try (FileOutputStream stream = new FileOutputStream(run.dir.resolve(DEFAULT_STATS_JSON).toFile())) {
JsonFactory jfactory = new JsonFactory();
jfactory.setCodec(new ObjectMapper());
JsonGenerator jGenerator = jfactory.createGenerator(stream, JsonEncoding.UTF8);
jGenerator.setCodec(new ObjectMapper());
JsonWriter.writeArrayJsons(run.statisticsStore(), jGenerator, info);
jGenerator.flush();
jGenerator.close();
} catch (IOException e) {
log.error("Cannot write to {}", DEFAULT_STATS_JSON, e);
future.fail(e);
}
// combine shared and benchmark-private hooks
List hooks = loadHooks("post");
hooks.addAll(run.benchmark.postHooks());
Collections.sort(hooks);
for (RunHook hook : hooks) {
StringBuilder sb = new StringBuilder();
boolean success = hook.run(getRunProperties(run), sb::append);
run.hookResults.add(new Run.RunHookOutput(hook.name(), sb.toString()));
if (!success) {
log.error("Execution of post-hook {} failed.", hook.name());
// stop executing further hooks but persist info
break;
}
}
JsonArray hookResults = new JsonArray(run.hookResults.stream()
.map(r -> new JsonObject().put("name", r.name).put("output", r.output))
.collect(Collectors.toList()));
try {
Files.writeString(run.dir.resolve("hooks.json"), hookResults.encodePrettily());
} catch (IOException e) {
log.error("Cannot write hook results", e);
future.fail(e);
}
future.tryComplete();
}, result -> {
run.completed = true;
run.persisted = true;
if (result.failed()) {
log.error("Failed to persist run {}", run.id, result.cause());
} else {
log.info("Successfully persisted run {}", run.id);
}
});
}
private Map getRunProperties(Run run) {
Map properties = new HashMap<>();
properties.put("RUN_ID", run.id);
properties.put("RUN_DIR", Controller.RUN_DIR.resolve(run.id).toAbsolutePath().toString());
if (run.description != null) {
properties.put("RUN_DESCRIPTION", run.description);
}
properties.put("BENCHMARK", run.benchmark.name());
File benchmarkFile = Controller.BENCHMARK_DIR.resolve(run.benchmark.name() + ".yaml").toFile();
if (benchmarkFile.exists()) {
properties.put("BENCHMARK_PATH", benchmarkFile.getAbsolutePath());
}
return properties;
}
public Run run(String runId) {
return runs.get(runId);
}
public Collection runs() {
return runs.values();
}
public void kill(Run run, Handler> handler) {
log.info("{} Killing run", run.id);
try {
run.cancelled = true;
for (Map.Entry entry : run.phases.entrySet()) {
ControllerPhase.Status status = entry.getValue().status();
if (!status.isTerminated()) {
if (status == ControllerPhase.Status.NOT_STARTED) {
entry.getValue().status(run.id, ControllerPhase.Status.CANCELLED);
} else {
entry.getValue().status(run.id, ControllerPhase.Status.TERMINATING);
eb.publish(Feeds.CONTROL,
new PhaseControlMessage(PhaseControlMessage.Command.TERMINATE, entry.getKey(), null));
}
}
}
run.terminateTime.future().onComplete(result -> handler.handle(result.mapEmpty()));
} catch (Throwable e) {
handler.handle(Future.failedFuture(e));
}
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
public Future addBenchmark(Benchmark benchmark, String prevVersion) {
if (prevVersion != null) {
Benchmark prev = benchmarks.get(benchmark.name());
String currentVersion;
if (prev == null) {
BenchmarkSource template = templates.get(benchmark.name());
currentVersion = template != null ? template.version : null;
} else {
currentVersion = prev.version();
}
if (!prevVersion.equals(currentVersion)) {
log.info("Updating benchmark {}, version {} but current version is {}",
benchmark.name(), prevVersion, currentVersion != null ? currentVersion : "");
return Future.failedFuture(new VersionConflictException());
}
}
benchmarks.put(benchmark.name(), benchmark);
templates.remove(benchmark.name());
return vertx.executeBlocking(promise -> {
if (benchmark.source() != null) {
PersistenceUtil.store(benchmark.source(), Controller.BENCHMARK_DIR);
}
promise.complete();
});
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
public Future addTemplate(BenchmarkSource template, String prevVersion) {
if (prevVersion != null) {
BenchmarkSource prev = templates.get(template.name);
String currentVersion;
if (prev == null) {
Benchmark benchmark = benchmarks.get(template.name);
currentVersion = benchmark != null ? benchmark.version() : null;
} else {
currentVersion = prev.version;
}
if (!prevVersion.equals(currentVersion)) {
log.info("Updating template {}, version {} but current version is {}",
template.name, prevVersion, currentVersion != null ? currentVersion : "");
return Future.failedFuture(new VersionConflictException());
}
}
templates.put(template.name, template);
benchmarks.remove(template.name);
return vertx.executeBlocking(promise -> {
PersistenceUtil.store(template, Controller.BENCHMARK_DIR);
promise.complete();
});
}
public Collection getBenchmarks() {
return benchmarks.keySet();
}
public Collection getTemplates() {
return templates.keySet();
}
public Benchmark getBenchmark(String name) {
return benchmarks.get(name);
}
public BenchmarkSource getTemplate(String name) {
return templates.get(name);
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
private void loadBenchmarks(Handler> handler) {
vertx.executeBlocking(future -> {
try {
Files.list(Controller.BENCHMARK_DIR).forEach(file -> {
try {
BenchmarkSource source = PersistenceUtil.load(file);
if (source != null) {
if (source.isTemplate()) {
templates.put(source.name, source);
} else {
Benchmark benchmark = BenchmarkParser.instance().buildBenchmark(source, Collections.emptyMap());
benchmarks.put(benchmark.name(), benchmark);
}
}
} catch (Exception e) {
log.error("Failed to load a benchmark from {}", file, e);
}
});
} catch (IOException e) {
log.error("Failed to list benchmark dir {}", Controller.BENCHMARK_DIR, e);
}
future.complete();
}, handler);
}
private List loadHooks(String subdir) {
try {
File hookDir = Controller.HOOKS_DIR.resolve(subdir).toFile();
if (hookDir.exists() && hookDir.isDirectory()) {
return Files.list(hookDir.toPath())
.map(Path::toFile)
.filter(file -> !file.isDirectory() && !file.isHidden())
.map(file -> new ExecRunHook(file.getName(), file.getAbsolutePath()))
.collect(Collectors.toList());
}
} catch (IOException e) {
log.error("Failed to list hooks.", e);
}
return Collections.emptyList();
}
public void listSessions(Run run, boolean includeInactive, BiConsumer sessionStateHandler,
Handler> completionHandler) {
invokeOnAgents(run, AgentControlMessage.Command.LIST_SESSIONS, includeInactive, completionHandler, (agent, result) -> {
if (result.failed()) {
log.error("Agent {} failed listing sessions", agent, result.cause());
sessionStateHandler.accept(agent, "");
} else if (result.result() instanceof Throwable) {
log.error("Agent {} has thrown an error while listing sessions", agent, (Throwable) result.result());
sessionStateHandler.accept(agent, "");
} else {
@SuppressWarnings("unchecked")
List sessions = (List) result.result().body();
for (String state : sessions) {
sessionStateHandler.accept(agent, state);
}
}
});
}
public void listConnections(Run run, BiConsumer connectionHandler,
Handler> completionHandler) {
invokeOnAgents(run, AgentControlMessage.Command.LIST_CONNECTIONS, null, completionHandler, (agent, result) -> {
if (result.failed()) {
log.error("Agent {} failed listing connections", agent, result.cause());
connectionHandler.accept(agent, "");
} else if (result.result() instanceof Throwable) {
log.error("Agent {} has thrown an error while listing connections", agent, (Throwable) result.result());
connectionHandler.accept(agent, "");
} else {
@SuppressWarnings("unchecked")
List connections = (List) result.result().body();
for (String state : connections) {
connectionHandler.accept(agent, state);
}
}
});
}
private void invokeOnAgents(Run run, AgentControlMessage.Command command, Object param,
Handler> completionHandler, BiConsumer>> handler) {
AtomicInteger agentCounter = new AtomicInteger(1);
for (AgentInfo agent : run.agents) {
if (agent.status.ordinal() >= AgentInfo.Status.STOPPED.ordinal()) {
log.debug("Cannot invoke command on {}, status: {}", agent.name, agent.status);
continue;
}
agentCounter.incrementAndGet();
eb.request(agent.deploymentId, new AgentControlMessage(command, agent.id, param), result -> {
if (result.failed()) {
log.error("Failed to connect to agent {}", agent.name, result.cause());
completionHandler.handle(Future.failedFuture(result.cause()));
} else {
handler.accept(agent, result);
if (agentCounter.decrementAndGet() == 0) {
completionHandler.handle(Future.succeededFuture());
}
}
});
}
if (agentCounter.decrementAndGet() == 0) {
completionHandler.handle(Future.succeededFuture());
}
}
public boolean hasControllerLog() {
return deployer != null && deployer.hasControllerLog();
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
public void downloadControllerLog(long offset, File tempFile, Handler> handler) {
vertx.executeBlocking(future -> deployer.downloadControllerLog(offset, tempFile.toString(), handler), result -> {
if (result.failed()) {
handler.handle(Future.failedFuture(result.cause()));
}
});
}
@SuppressWarnings("deprecation") // Uses a deprecated executeBlocking call that should be addressed later. This is tracked in https://github.com/Hyperfoil/Hyperfoil/issues/493
public void downloadAgentLog(DeployedAgent deployedAgent, long offset, File tempFile, Handler> handler) {
vertx.executeBlocking(future -> deployer.downloadAgentLog(deployedAgent, offset, tempFile.toString(), handler),
result -> {
if (result.failed()) {
handler.handle(Future.failedFuture(result.cause()));
}
});
}
public Benchmark ensureBenchmark(Run run) throws ParserException {
if (run.benchmark.source() == null) {
File yamlSource = Controller.RUN_DIR.resolve(run.id).resolve(run.benchmark.name() + ".yaml").toFile();
if (yamlSource.exists() && yamlSource.isFile()) {
BenchmarkSource source = PersistenceUtil.load(yamlSource.toPath());
if (source != null) {
run.benchmark = BenchmarkParser.instance().buildBenchmark(source, run.benchmark.params());
return run.benchmark;
}
}
log.warn("Cannot find benchmark source for run {}, benchmark {}", run.id, run.benchmark.name());
}
return run.benchmark;
}
public void shutdown() {
InfinispanClusterManager clusterManager = (InfinispanClusterManager) ((VertxInternal) vertx).getClusterManager();
if (clusterManager != null) {
BasicCacheContainer cacheManager = clusterManager.getCacheContainer();
vertx.close(ar -> cacheManager.stop());
} else {
vertx.close();
}
}
public int actualPort() {
return server.httpServer.actualPort();
}
public Path getRunDir(Run run) {
return Controller.RUN_DIR.resolve(run.id);
}
public JsonObject getConfig() {
return context.config();
}
public boolean deleteBenchmark(String name) {
Benchmark oldBenchmark = benchmarks.remove(name);
BenchmarkSource oldTemplate = templates.remove(name);
if (oldBenchmark != null || oldTemplate != null) {
if (!PersistenceUtil.delete(name, Controller.BENCHMARK_DIR)) {
throw new RuntimeException("Cannot delete benchmark " + name);
}
return true;
}
return false;
}
}