com.yahoo.vespa.hosted.provision.maintenance.HostFlavorUpgrader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of node-repository Show documentation
Show all versions of node-repository Show documentation
Keeps track of node assignment in a multi-application setup.
The newest version!
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import java.time.Duration;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.function.Predicate;
import java.util.logging.Level;
/**
* This maintainer attempts to upgrade a single host running on an older flavor generation. The upgrade happens by
* marking and retiring the host on the old generation, and redeploying to provision a replacement host on a newer
* generation.
*
* If the cloud provider reports a lack of capacity for the newer generation, retirement of the host is
* cancelled, and upgrade is attempted of the next host on an old flavor, if any.
*
* Once a host has been marked for upgrade, {@link HostResumeProvisioner} will complete provisioning of the replacement
* host.
*
* @author mpolden
*/
public class HostFlavorUpgrader extends NodeRepositoryMaintainer {
private final HostProvisioner hostProvisioner;
private final Random random;
private final Deployer deployer;
private final Metric metric;
public HostFlavorUpgrader(NodeRepository nodeRepository, Duration interval, Metric metric, Deployer deployer, HostProvisioner hostProvisioner) {
super(nodeRepository, interval, metric);
this.hostProvisioner = Objects.requireNonNull(hostProvisioner);
this.deployer = Objects.requireNonNull(deployer);
this.metric = Objects.requireNonNull(metric);
this.random = new Random(nodeRepository.clock().millis());
}
@Override
protected double maintain() {
if (!nodeRepository().zone().cloud().dynamicProvisioning()) return 1.0; // Not relevant in zones with static capacity
if (nodeRepository().zone().environment().isTest()) return 1.0; // Short-lived deployments
if (!nodeRepository().nodes().isWorking()) return 0.0;
NodeList allNodes = nodeRepository().nodes().list();
if (!NodeMover.zoneIsStable(allNodes)) return 1.0;
return upgradeHostFlavor(allNodes);
}
private double upgradeHostFlavor(NodeList allNodes) {
NodeList activeNodes = allNodes.nodeType(NodeType.tenant)
.state(Node.State.active)
.shuffle(random); // Shuffle to avoid getting stuck trying to upgrade the same host
Set exhaustedFlavors = new HashSet<>();
for (var node : activeNodes) {
Optional parent = allNodes.parentOf(node);
if (parent.isEmpty()) continue;
// Limit upgrades to exclusive hosts because it is hard to predict which flavor will be provisioned when
// retiring nodes on shared hosts
if (parent.get().exclusiveToApplicationId().isEmpty()) continue;
if (exhaustedFlavors.contains(parent.get().flavor().name())) continue;
Allocation allocation = node.allocation().get();
Predicate realHostResourcesWithinLimits =
resources -> nodeRepository().nodeResourceLimits().isWithinRealLimits(resources, allocation.membership().cluster());
if (!hostProvisioner.canUpgradeFlavor(parent.get(), node, realHostResourcesWithinLimits)) continue;
if (parent.get().status().wantToUpgradeFlavor() && allocation.membership().retired()) continue; // Already upgrading
boolean redeployed = false;
boolean deploymentValid = false;
try (MaintenanceDeployment deployment = new MaintenanceDeployment(allocation.owner(), deployer, metric, nodeRepository(), true)) {
deploymentValid = deployment.isValid();
if (!deploymentValid) continue;
log.log(Level.INFO, () -> "Redeploying " + allocation.owner() + " to upgrade flavor (" +
parent.get().flavor().name() + ") of " + parent.get());
upgradeFlavor(parent.get(), true);
deployment.activate();
redeployed = true;
return 1.0;
} catch (NodeAllocationException e) {
// Fine, no capacity for upgrade
exhaustedFlavors.add(parent.get().flavor().name());
} finally {
if (deploymentValid && !redeployed) { // Cancel upgrade if redeploy failed
upgradeFlavor(parent.get(), false);
}
}
}
return 1.0;
}
private void upgradeFlavor(Node host, boolean upgrade) {
nodeRepository().nodes().upgradeFlavor(host.hostname(),
Agent.HostFlavorUpgrader,
nodeRepository().clock().instant(),
upgrade);
}
}