All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.provision.maintenance.HostFlavorUpgrader Maven / Gradle / Ivy

There is a newer version: 8.441.21
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;

import java.time.Duration;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.function.Predicate;
import java.util.logging.Level;

/**
 * This maintainer attempts to upgrade a single host running on an older flavor generation. The upgrade happens by
 * marking and retiring the host on the old generation, and redeploying to provision a replacement host on a newer
 * generation.
 *
 * If the cloud provider reports a lack of capacity for the newer generation, retirement of the host is
 * cancelled, and upgrade is attempted of the next host on an old flavor, if any.
 *
 * Once a host has been marked for upgrade, {@link HostResumeProvisioner} will complete provisioning of the replacement
 * host.
 *
 * @author mpolden
 */
public class HostFlavorUpgrader extends NodeRepositoryMaintainer {

    private final HostProvisioner hostProvisioner;
    private final Random random;
    private final Deployer deployer;
    private final Metric metric;

    public HostFlavorUpgrader(NodeRepository nodeRepository, Duration interval, Metric metric, Deployer deployer, HostProvisioner hostProvisioner) {
        super(nodeRepository, interval, metric);
        this.hostProvisioner = Objects.requireNonNull(hostProvisioner);
        this.deployer = Objects.requireNonNull(deployer);
        this.metric = Objects.requireNonNull(metric);
        this.random = new Random(nodeRepository.clock().millis());
    }

    @Override
    protected double maintain() {
        if (!nodeRepository().zone().cloud().dynamicProvisioning()) return 1.0; // Not relevant in zones with static capacity
        if (nodeRepository().zone().environment().isTest()) return 1.0; // Short-lived deployments
        if (!nodeRepository().nodes().isWorking()) return 0.0;

        NodeList allNodes = nodeRepository().nodes().list();
        if (!NodeMover.zoneIsStable(allNodes)) return 1.0;
        return upgradeHostFlavor(allNodes);
    }

    private double upgradeHostFlavor(NodeList allNodes) {
        NodeList activeNodes = allNodes.nodeType(NodeType.tenant)
                                       .state(Node.State.active)
                                       .shuffle(random); // Shuffle to avoid getting stuck trying to upgrade the same host
        Set exhaustedFlavors = new HashSet<>();
        for (var node : activeNodes) {
            Optional parent = allNodes.parentOf(node);
            if (parent.isEmpty()) continue;

            // Limit upgrades to exclusive hosts because it is hard to predict which flavor will be provisioned when
            // retiring nodes on shared hosts
            if (parent.get().exclusiveToApplicationId().isEmpty()) continue;

            if (exhaustedFlavors.contains(parent.get().flavor().name())) continue;
            Allocation allocation = node.allocation().get();
            Predicate realHostResourcesWithinLimits =
                    resources -> nodeRepository().nodeResourceLimits().isWithinRealLimits(resources, allocation.membership().cluster());
            if (!hostProvisioner.canUpgradeFlavor(parent.get(), node, realHostResourcesWithinLimits)) continue;
            if (parent.get().status().wantToUpgradeFlavor() && allocation.membership().retired()) continue; // Already upgrading

            boolean redeployed = false;
            boolean deploymentValid = false;
            try (MaintenanceDeployment deployment = new MaintenanceDeployment(allocation.owner(), deployer, metric, nodeRepository(), true)) {
                deploymentValid = deployment.isValid();
                if (!deploymentValid) continue;

                log.log(Level.INFO, () -> "Redeploying " + allocation.owner() + " to upgrade flavor (" +
                                          parent.get().flavor().name() + ") of " + parent.get());
                upgradeFlavor(parent.get(), true);
                deployment.activate();
                redeployed = true;
                return 1.0;
            } catch (NodeAllocationException e) {
               // Fine, no capacity for upgrade
                exhaustedFlavors.add(parent.get().flavor().name());
            } finally {
                if (deploymentValid && !redeployed) { // Cancel upgrade if redeploy failed
                    upgradeFlavor(parent.get(), false);
                }
            }
        }
        return 1.0;
    }

    private void upgradeFlavor(Node host, boolean upgrade) {
        nodeRepository().nodes().upgradeFlavor(host.hostname(),
                                               Agent.HostFlavorUpgrader,
                                               nodeRepository().clock().instant(),
                                               upgrade);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy