com.yahoo.vespa.hosted.provision.provisioning.NodeAllocation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of node-repository Show documentation
Show all versions of node-repository Show documentation
Keeps track of node assignment in a multi-application setup.
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.lang.MutableInteger;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import java.time.Clock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* Used to manage a list of nodes during the node reservation process
* in order to fulfill the nodespec.
*
* @author bratseth
*/
class NodeAllocation {
/** List of all nodes in node-repository */
private final NodeList allNodes;
/** The application this list is for */
private final ApplicationId application;
/** The cluster this list is for */
private final ClusterSpec cluster;
/** The requested nodes of this list */
private final NodeSpec requestedNodes;
/** The nodes this has accepted so far */
private final Set nodes = new LinkedHashSet<>();
/** The number of nodes in the accepted nodes which are of the requested flavor */
private int acceptedOfRequestedFlavor = 0;
/** The number of nodes rejected because of clashing parentHostname */
private int rejectedWithClashingParentHost = 0;
/** The number of nodes rejected due to exclusivity constraints */
private int rejectedDueToExclusivity = 0;
/** The number of nodes that just now was changed to retired */
private int wasRetiredJustNow = 0;
/** The node indexes to verify uniqueness of each members index */
private final Set indexes = new HashSet<>();
/** The next membership index to assign to a new node */
private final MutableInteger highestIndex;
private final NodeFlavors flavors;
private final Zone zone;
private final Clock clock;
NodeAllocation(NodeList allNodes, ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes,
MutableInteger highestIndex, NodeFlavors flavors, Zone zone, Clock clock) {
this.allNodes = allNodes;
this.application = application;
this.cluster = cluster;
this.requestedNodes = requestedNodes;
this.highestIndex = highestIndex;
this.flavors = flavors;
this.zone = zone;
this.clock = clock;
}
/**
* Offer some nodes to this. The nodes may have an allocation to a different application or cluster,
* an allocation to this cluster, or no current allocation (in which case one is assigned).
*
* Note that if unallocated nodes are offered before allocated nodes, this will unnecessarily
* reject allocated nodes due to index duplicates.
*
* @param nodesPrioritized the nodes which are potentially on offer. These may belong to a different application etc.
* @return the subset of offeredNodes which was accepted, with the correct allocation assigned
*/
List offer(List nodesPrioritized) {
List accepted = new ArrayList<>();
for (PrioritizableNode node : nodesPrioritized) {
Node offered = node.node;
if (offered.allocation().isPresent()) {
ClusterMembership membership = offered.allocation().get().membership();
if ( ! offered.allocation().get().owner().equals(application)) continue; // wrong application
if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type
if ((! node.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it
if ( offered.allocation().get().isRemovable()) continue; // don't accept; causes removal
if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure)
if (requestedNodes.considerRetiring()) {
boolean wantToRetireNode = false;
if (violatesParentHostPolicy(this.nodes, offered)) wantToRetireNode = true;
if ( ! hasCompatibleFlavor(node)) wantToRetireNode = true;
if (offered.status().wantToRetire()) wantToRetireNode = true;
if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname()))
wantToRetireNode = true;
if (( ! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node))
accepted.add(acceptNode(node, wantToRetireNode, node.isResizable));
}
else {
accepted.add(acceptNode(node, false, false));
}
}
else if ( ! saturated() && hasCompatibleFlavor(node)) {
if ( violatesParentHostPolicy(this.nodes, offered)) {
++rejectedWithClashingParentHost;
continue;
}
if ( ! exclusiveTo(application.tenant(), offered.parentHostname())) {
++rejectedDueToExclusivity;
continue;
}
if ( requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname())) {
++rejectedDueToExclusivity;
continue;
}
if (offered.status().wantToRetire()) {
continue;
}
node.node = offered.allocate(application,
ClusterMembership.from(cluster, highestIndex.add(1)),
requestedNodes.resources().orElse(node.node.flavor().resources()),
clock.instant());
accepted.add(acceptNode(node, false, false));
}
}
return accepted;
}
private boolean violatesParentHostPolicy(Collection accepted, Node offered) {
return checkForClashingParentHost() && offeredNodeHasParentHostnameAlreadyAccepted(accepted, offered);
}
private boolean checkForClashingParentHost() {
return zone.system() == SystemName.main && zone.environment().isProduction() && ! application.instance().isTester();
}
private boolean offeredNodeHasParentHostnameAlreadyAccepted(Collection accepted, Node offered) {
for (PrioritizableNode acceptedNode : accepted) {
if (acceptedNode.node.parentHostname().isPresent() && offered.parentHostname().isPresent() &&
acceptedNode.node.parentHostname().get().equals(offered.parentHostname().get())) {
return true;
}
}
return false;
}
/**
* If a parent host is given, and it hosts another tenant with an application which requires exclusive access
* to the physical host, then we cannot host this application on it.
*/
private boolean exclusiveTo(TenantName tenant, Optional parentHostname) {
if (parentHostname.isEmpty()) return true;
for (Node nodeOnHost : allNodes.childrenOf(parentHostname.get())) {
if (nodeOnHost.allocation().isEmpty()) continue;
if ( nodeOnHost.allocation().get().membership().cluster().isExclusive() &&
! nodeOnHost.allocation().get().owner().tenant().equals(tenant))
return false;
}
return true;
}
private boolean hostsOnly(TenantName tenant, Optional parentHostname) {
if (parentHostname.isEmpty()) return true; // yes, as host is exclusive
for (Node nodeOnHost : allNodes.childrenOf(parentHostname.get())) {
if (nodeOnHost.allocation().isEmpty()) continue;
if ( ! nodeOnHost.allocation().get().owner().tenant().equals(tenant))
return false;
}
return true;
}
/**
* Returns whether this node should be accepted into the cluster even if it is not currently desired
* (already enough nodes, or wrong flavor).
* Such nodes will be marked retired during finalization of the list of accepted nodes.
* The conditions for this are:
*
* This is a content or combined node. These must always be retired before being removed to allow the cluster to
* migrate away data.
*
* This is a container node and it is not desired due to having the wrong flavor. In this case this
* will (normally) obtain for all the current nodes in the cluster and so retiring before removing must
* be used to avoid removing all the current nodes at once, before the newly allocated replacements are
* initialized. (In the other case, where a container node is not desired because we have enough nodes we
* do want to remove it immediately to get immediate feedback on how the size reduction works out.)
*/
private boolean acceptToRetire(PrioritizableNode node) {
if (node.node.state() != Node.State.active) return false;
if (! node.node.allocation().get().membership().cluster().group().equals(cluster.group())) return false;
return cluster.type().isContent() ||
(cluster.type() == ClusterSpec.Type.container && !hasCompatibleFlavor(node));
}
private boolean hasCompatibleFlavor(PrioritizableNode node) {
return requestedNodes.isCompatible(node.node.flavor(), flavors) || node.isResizable;
}
private Node acceptNode(PrioritizableNode prioritizableNode, boolean wantToRetire, boolean resize) {
Node node = prioritizableNode.node;
if (node.allocation().isPresent()) // Record the currently requested resources
node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.flavor().resources())));
if (! wantToRetire) {
if (resize) {
NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources();
node = node.with(new Flavor(requestedNodes.resources().get()
.with(hostResources.diskSpeed())
.with(hostResources.storageType())));
}
if (node.state() != Node.State.active) // reactivated node - make sure its not retired
node = node.unretire();
acceptedOfRequestedFlavor++;
} else {
++wasRetiredJustNow;
// Retire nodes which are of an unwanted flavor, retired flavor or have an overlapping parent host
node = node.retire(clock.instant());
}
if ( ! node.allocation().get().membership().cluster().equals(cluster)) {
// group may be different
node = setCluster(cluster, node);
}
prioritizableNode.node = node;
indexes.add(node.allocation().get().membership().index());
highestIndex.set(Math.max(highestIndex.get(), node.allocation().get().membership().index()));
nodes.add(prioritizableNode);
return node;
}
private Node setCluster(ClusterSpec cluster, Node node) {
ClusterMembership membership = node.allocation().get().membership().with(cluster);
return node.with(node.allocation().get().with(membership));
}
/** Returns true if no more nodes are needed in this list */
private boolean saturated() {
return requestedNodes.saturatedBy(acceptedOfRequestedFlavor);
}
/** Returns true if the content of this list is sufficient to meet the request */
boolean fulfilled() {
return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor);
}
boolean wouldBeFulfilledWithRetiredNodes() {
return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + wasRetiredJustNow);
}
boolean wouldBeFulfilledWithClashingParentHost() {
return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedWithClashingParentHost);
}
boolean wouldBeFulfilledWithoutExclusivity() {
return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedDueToExclusivity);
}
/**
* Returns {@link FlavorCount} describing the docker node deficit for the given {@link NodeSpec}.
*
* @return empty if the requested spec is not count based or the requested flavor type is not docker or
* the request is already fulfilled. Otherwise returns {@link FlavorCount} containing the required flavor
* and node count to cover the deficit.
*/
Optional getFulfilledDockerDeficit() {
return Optional.of(requestedNodes)
.filter(NodeSpec.CountNodeSpec.class::isInstance)
.map(spec -> new FlavorCount(spec.resources().get(), spec.fulfilledDeficitCount(acceptedOfRequestedFlavor)))
.filter(flavorCount -> flavorCount.getCount() > 0);
}
/**
* Make the number of non-retired nodes in the list equal to the requested number
* of nodes, and retire the rest of the list. Only retire currently active nodes.
* Prefer to retire nodes of the wrong flavor.
* Make as few changes to the retired set as possible.
*
* @param surplusNodes this will add nodes not any longer needed by this group to this list
* @return the final list of nodes
*/
List finalNodes(List surplusNodes) {
int currentRetiredCount = (int) nodes.stream().filter(node -> node.node.allocation().get().membership().retired()).count();
int deltaRetiredCount = requestedNodes.idealRetiredCount(nodes.size(), currentRetiredCount) - currentRetiredCount;
if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0, prefer to retire higher indexes to minimize redistribution
for (PrioritizableNode node : byDecreasingIndex(nodes)) {
if ( ! node.node.allocation().get().membership().retired() && node.node.state() == Node.State.active) {
node.node = node.node.retire(Agent.application, clock.instant());
surplusNodes.add(node.node); // offer this node to other groups
if (--deltaRetiredCount == 0) break;
}
}
}
else if (deltaRetiredCount < 0) { // unretire until deltaRetiredCount is 0
for (PrioritizableNode node : byIncreasingIndex(nodes)) {
if ( node.node.allocation().get().membership().retired() && hasCompatibleFlavor(node)) {
node.node = node.node.unretire();
if (++deltaRetiredCount == 0) break;
}
}
}
for (PrioritizableNode node : nodes) {
// Set whether the node is exclusive
Allocation allocation = node.node.allocation().get();
node.node = node.node.with(allocation.with(allocation.membership()
.with(allocation.membership().cluster().exclusive(requestedNodes.isExclusive()))));
}
return nodes.stream().map(n -> n.node).collect(Collectors.toList());
}
List reservableNodes() {
// Include already reserved nodes to extend reservation period and to potentially update their cluster spec.
EnumSet reservableStates = EnumSet.of(Node.State.inactive, Node.State.ready, Node.State.reserved);
return nodesFilter(n -> !n.isNewNode && reservableStates.contains(n.node.state()));
}
List surplusNodes() {
return nodesFilter(n -> n.isSurplusNode);
}
List newNodes() {
return nodesFilter(n -> n.isNewNode);
}
private List nodesFilter(Predicate predicate) {
return nodes.stream()
.filter(predicate)
.map(n -> n.node)
.collect(Collectors.toList());
}
private List byDecreasingIndex(Set nodes) {
return nodes.stream().sorted(nodeIndexComparator().reversed()).collect(Collectors.toList());
}
private List byIncreasingIndex(Set nodes) {
return nodes.stream().sorted(nodeIndexComparator()).collect(Collectors.toList());
}
private Comparator nodeIndexComparator() {
return Comparator.comparing((PrioritizableNode n) -> n.node.allocation().get().membership().index());
}
static class FlavorCount {
private final NodeResources flavor;
private final int count;
private FlavorCount(NodeResources flavor, int count) {
this.flavor = flavor;
this.count = count;
}
NodeResources getFlavor() {
return flavor;
}
int getCount() {
return count;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy