All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.provision.maintenance.SpareCapacityMaintainer Maven / Gradle / Ivy

The newest version!
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import ai.vespa.metrics.ConfigServerMetrics;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;
import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator;

import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.logging.Level;
import java.util.stream.Collectors;

/**
 * A maintainer which attempts to ensure there is spare capacity available in chunks which can fit
 * all node resource configuration in use, such that the system is able to quickly replace a failed node
 * if necessary.
 *
 * This also emits the following metrics:
 * - Overcommitted hosts: Hosts whose capacity is less than the sum of its children's
 * - Spare host capacity, or how many hosts the repository can stand to lose without ending up in a situation where it's
 *   unable to find a new home for orphaned tenants.
 *
 * @author mgimle
 * @author bratseth
 */
public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {

    private final int maxIterations;
    private final Deployer deployer;
    private final Metric metric;

    public SpareCapacityMaintainer(Deployer deployer,
                                   NodeRepository nodeRepository,
                                   Metric metric,
                                   Duration interval) {
        this(deployer, nodeRepository, metric, interval, 10_000 /* Should take less than a few minutes */);
    }

    public SpareCapacityMaintainer(Deployer deployer,
                                   NodeRepository nodeRepository,
                                   Metric metric,
                                   Duration interval,
                                   int maxIterations) {
        super(nodeRepository, interval, metric);
        this.deployer = deployer;
        this.metric = metric;
        this.maxIterations = maxIterations;
    }

    @Override
    protected double maintain() {
        if ( ! nodeRepository().nodes().isWorking()) return 0.0;

        // Don't need to maintain spare capacity in dynamically provisioned zones; can provision more on demand.
        if (nodeRepository().zone().cloud().dynamicProvisioning()) return 1.0;

        NodeList allNodes = nodeRepository().nodes().list();
        CapacityChecker capacityChecker = new CapacityChecker(allNodes);

        List overcommittedHosts = capacityChecker.findOvercommittedHosts();
        metric.set(ConfigServerMetrics.OVERCOMMITTED_HOSTS.baseName(), overcommittedHosts.size(), null);
        retireOvercommitedHosts(allNodes, overcommittedHosts);

        boolean success = true;
        Optional failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
        if (failurePath.isPresent()) {
            int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
            if (spareHostCapacity == 0) {
                List mitigation = findMitigation(failurePath.get());
                if (execute(mitigation, failurePath.get())) {
                    // We succeeded or are in the process of taking a step to mitigate.
                    // Report with the assumption this will eventually succeed to avoid alerting before we're stuck
                    spareHostCapacity++;
                }
                else {
                    success = false;
                }
            }
            metric.set(ConfigServerMetrics.SPARE_HOST_CAPACITY.baseName(), spareHostCapacity, null);
        }
        return success ? 1.0 : 0.0;
    }

    private boolean execute(List mitigation, CapacityChecker.HostFailurePath failurePath) {
        if (mitigation.isEmpty()) {
            log.warning("Out of spare capacity and no mitigation possible: " + failurePath);
            return false;
        }
        Move firstMove = mitigation.get(0);
        if (firstMove.node().allocation().get().membership().retired()) return true; // Already in progress
        boolean success = firstMove.execute(false, Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
        log.info("Out of spare capacity. Mitigation plan: " + mitigation + ". First move successful: " + success);
        return success;
    }

    private List findMitigation(CapacityChecker.HostFailurePath failurePath) {
        Optional nodeWhichCantMove = failurePath.failureReason.tenant;
        if (nodeWhichCantMove.isEmpty()) return List.of();

        Node node = nodeWhichCantMove.get();
        NodeList allNodes = nodeRepository().nodes().list();
        // Allocation will assign the spareCount most empty nodes as "spares", which will not be allocated on
        // unless needed for node failing. Our goal here is to make room on these spares for the given node
        HostCapacity hostCapacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
        Set spareHosts = hostCapacity.findSpareHosts(allNodes.hosts().satisfies(node.resources()).asList(),
                                                           nodeRepository().spareCount());
        List hosts = allNodes.hosts().except(spareHosts).asList();

        CapacitySolver capacitySolver = new CapacitySolver(hostCapacity, maxIterations);
        List shortestMitigation = null;
        for (Node spareHost : spareHosts) {
            List mitigation = capacitySolver.makeRoomFor(node, spareHost, hosts, List.of(), List.of());
            if (mitigation == null) continue;
            if (shortestMitigation == null || shortestMitigation.size() > mitigation.size())
                shortestMitigation = mitigation;
        }
        if (shortestMitigation == null || shortestMitigation.isEmpty()) return List.of();
        return shortestMitigation;
    }

    private int retireOvercomittedComparator(Node n1, Node n2) {
        ClusterSpec.Type t1 = n1.allocation().get().membership().cluster().type();
        ClusterSpec.Type t2 = n2.allocation().get().membership().cluster().type();

        // Prefer to container nodes for faster retirement
        if (t1 == ClusterSpec.Type.container && t2 != ClusterSpec.Type.container) return -1;
        if (t1 != ClusterSpec.Type.container && t2 == ClusterSpec.Type.container) return 1;

        return NodeResourceComparator.memoryDiskCpuOrder().compare(n1.resources(), n2.resources());
    }

    private void retireOvercommitedHosts(NodeList allNodes, List overcommittedHosts) {
        if (overcommittedHosts.isEmpty()) return;
        log.log(Level.WARNING, String.format("%d hosts are overcommitted: %s",
                overcommittedHosts.size(),
                overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", "))));

        if (!NodeMover.zoneIsStable(allNodes)) return;

        // Find an active node on a overcommitted host and retire it
        Optional nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream())
                .filter(node -> node.state() == Node.State.active)
                .min(this::retireOvercomittedComparator);
        if (nodeToRetire.isEmpty()) return;
        retire(nodeToRetire.get());
    }

    /** Mark node for retirement and redeploy its application */
    private void retire(Node node) {
        ApplicationId owner = node.allocation().get().owner();
        try (MaintenanceDeployment deployment = new MaintenanceDeployment(owner, deployer, metric, nodeRepository())) {
            if (!deployment.isValid()) return;
            if (!markWantToRetire(node.hostname())) return;
            log.log(Level.INFO, String.format("Redeploying %s to move %s from over-committed host",
                                              owner, node.hostname()));
            deployment.activate();
        }
    }

    private boolean markWantToRetire(String hostname) {
        Optional optionalNodeMutex = nodeRepository().nodes().lockAndGet(hostname);
        if (optionalNodeMutex.isEmpty()) return false;
        try (var nodeMutex = optionalNodeMutex.get()) {
            Node retiredNode = nodeMutex.node().withWantToRetire(true, Agent.SpareCapacityMaintainer,
                                                                 nodeRepository().clock().instant());
            nodeRepository().nodes().write(retiredNode, nodeMutex);
            return true;
        }
    }

    private static class CapacitySolver {

        private final HostCapacity hostCapacity;
        private final int maxIterations;

        private int iterations = 0;

        CapacitySolver(HostCapacity hostCapacity, int maxIterations) {
            this.hostCapacity = hostCapacity;
            this.maxIterations = maxIterations;
        }

        /** The map of subproblem solutions already found. The value is null when there is no solution. */
        private Map> solutions = new HashMap<>();

        /**
         * Finds the shortest sequence of moves which makes room for the given node on the given host,
         * assuming the given moves already made over the given hosts' current allocation.
         *
         * @param node the node to make room for
         * @param host the target host to make room on
         * @param hosts the hosts onto which we can move nodes
         * @param movesConsidered the moves already being considered to add as part of this scenario
         *                        (after any moves made by this)
         * @param movesMade the moves already made in this scenario
         * @return the list of movesMade with the moves needed for this appended, in the order they should be performed,
         *         or null if no sequence could be found
         */
        List makeRoomFor(Node node, Node host, List hosts, List movesConsidered, List movesMade) {
            SolutionKey solutionKey = new SolutionKey(node, host, movesConsidered, movesMade);
            List solution = solutions.get(solutionKey);
            if (solution == null) {
                solution = findRoomFor(node, host, hosts, movesConsidered, movesMade);
                solutions.put(solutionKey, solution);
            }
            return solution;
        }

        private List findRoomFor(Node node, Node host, List hosts,
                                       List movesConsidered, List movesMade) {
            if (iterations++ > maxIterations)
                return null;

            if ( ! host.resources().satisfies(node.resources())) return null;
            NodeResources freeCapacity = availableCapacityWith(movesMade, host);
            if (freeCapacity.satisfies(node.resources())) return List.of();

            List shortest = null;
            for (var i = subsets(hostCapacity.allNodes().childrenOf(host), 5); i.hasNext(); ) {
                List childrenToMove = i.next();
                if ( ! addResourcesOf(childrenToMove, freeCapacity).satisfies(node.resources())) continue;
                List moves = move(childrenToMove, host, hosts, movesConsidered, movesMade);
                if (moves == null) continue;

                if (shortest == null || moves.size() < shortest.size())
                    shortest = moves;
            }
            if (shortest == null) return null;
            return append(movesMade, shortest);
        }

        private List move(List nodes, Node host, List hosts, List movesConsidered, List movesMade) {
            List moves = new ArrayList<>();
            for (Node childToMove : nodes) {
                List childMoves = move(childToMove, host, hosts, movesConsidered, append(movesMade, moves));
                if (childMoves == null) return null;
                moves.addAll(childMoves);
            }
            return moves;
        }

        private List move(Node node, Node host, List hosts, List movesConsidered, List movesMade) {
            if (contains(node, movesConsidered)) return null;
            if (contains(node, movesMade)) return null;
            List shortest = null;
            for (Node target : hosts) {
                if (target.equals(host)) continue;
                Move move = new Move(node, host, target);
                List childMoves = makeRoomFor(node, target, hosts, append(movesConsidered, move), movesMade);
                if (childMoves == null) continue;
                if (shortest == null || shortest.size() > childMoves.size() + 1) {
                    shortest = new ArrayList<>(childMoves);
                    shortest.add(move);
                }
            }
            return shortest;
        }

        private boolean contains(Node node, List moves) {
            return moves.stream().anyMatch(move -> move.node().equals(node));
        }

        private NodeResources addResourcesOf(List nodes, NodeResources resources) {
            for (Node node : nodes)
                resources = resources.add(node.resources());
            return resources;
        }

        private Iterator> subsets(NodeList nodes, int maxSize) {
            return new SubsetIterator(nodes.asList(), maxSize);
        }

        private List append(List a, List b) {
            List list = new ArrayList<>();
            list.addAll(a);
            list.addAll(b);
            return list;
        }

        private List append(List moves, Move move) {
            List list = new ArrayList<>(moves);
            list.add(move);
            return list;
        }

        private NodeResources availableCapacityWith(List moves, Node host) {
            NodeResources resources = hostCapacity.availableCapacityOf(host);
            for (Move move : moves) {
                if ( ! move.toHost().equals(host)) continue;
                resources = resources.subtract(move.node().resources()).justNumbers();
            }
            for (Move move : moves) {
                if ( ! move.fromHost().equals(host)) continue;
                resources = resources.add(move.node().resources()).justNumbers();
            }
            return resources;
        }

    }

    private static class SolutionKey {

        private final Node node;
        private final Node host;
        private final List movesConsidered;
        private final List movesMade;

        private final int hash;

        public SolutionKey(Node node, Node host, List movesConsidered, List movesMade) {
            this.node = node;
            this.host = host;
            this.movesConsidered = movesConsidered;
            this.movesMade = movesMade;

            hash = Objects.hash(node, host, movesConsidered, movesMade);
        }

        @Override
        public int hashCode() { return hash; }

        @Override
        public boolean equals(Object o) {
            if (o == this) return true;
            if (o == null || o.getClass() != this.getClass()) return false;

            SolutionKey other = (SolutionKey)o;
            if ( ! other.node.equals(this.node)) return false;
            if ( ! other.host.equals(this.host)) return false;
            if ( ! other.movesConsidered.equals(this.movesConsidered)) return false;
            if ( ! other.movesMade.equals(this.movesMade)) return false;
            return true;
        }

    }

    private static class SubsetIterator implements Iterator> {

        private final List nodes;
        private final int maxLength;

        // A number whose binary representation determines which items of list we'll include
        private int i = 0; // first "previous" = 0 -> skip the empty set
        private List next = null;

        public SubsetIterator(List nodes, int maxLength) {
            this.nodes = new ArrayList<>(nodes.subList(0, Math.min(nodes.size(), 31)));
            this.maxLength = maxLength;
        }

        @Override
        public boolean hasNext() {
            if (next != null) return true;

            // find next
            while (++i < 1< maxLength) continue;

                next = new ArrayList<>(ones);
                for (int position = 0; position < nodes.size(); position++) {
                    if (hasOneAtPosition(position, i))
                        next.add(nodes.get(position));
                }
                return true;
            }
            return false;
        }

        @Override
        public List next() {
            if ( ! hasNext()) throw new IllegalStateException("No more elements");
            var current = next;
            next = null;
            return current;
        }

        private boolean hasOneAtPosition(int position, int number) {
            return (number & (1 << position)) > 0;
        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy