com.yahoo.vespa.hosted.provision.maintenance.LoadBalancerExpirer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of node-repository Show documentation
Show all versions of node-repository Show documentation
Keeps track of node assignment in a multi-application setup.
The newest version!
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.common.collect.Sets;
import com.yahoo.jdisc.Metric;
import com.yahoo.lang.MutableInteger;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer.State;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerService;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerSpec;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDb;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
* Periodically expire load balancers and de-provision inactive ones.
*
* Load balancers expire from the following states:
*
* {@link LoadBalancer.State#inactive}: An application is removed and load balancers are deactivated.
* {@link LoadBalancer.State#reserved}: An prepared application is never successfully activated, thus never activating
* any prepared load balancers.
*
* @author mpolden
*/
public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
private static final Logger LOG = Logger.getLogger(LoadBalancerExpirer.class.getName());
private static final Duration reservedExpiry = Duration.ofHours(1);
private static final Duration inactiveExpiry = Duration.ofHours(1);
private final LoadBalancerService service;
private final CuratorDb db;
public LoadBalancerExpirer(NodeRepository nodeRepository, Duration interval, LoadBalancerService service, Metric metric) {
super(nodeRepository, interval, metric);
this.service = Objects.requireNonNull(service, "service must be non-null");
this.db = nodeRepository.database();
}
@Override
protected double maintain() {
expireReserved();
return (deprovisionRemovable() + pruneReals()) / 2;
}
/** Move reserved load balancer that have expired to inactive */
private void expireReserved() {
Instant now = nodeRepository().clock().instant();
Instant expiry = now.minus(reservedExpiry);
patchLoadBalancers(lb -> canDeactivate(lb, expiry),
lb -> db.writeLoadBalancer(lb.with(State.inactive, now), lb.state()));
}
/** Deprovision removable load balancers */
private double deprovisionRemovable() {
MutableInteger attempts = new MutableInteger(0);
var failed = new ArrayList();
var lastException = new AtomicReference();
var expiry = nodeRepository().clock().instant().minus(inactiveExpiry);
patchLoadBalancers(lb -> canRemove(lb, expiry), lb -> {
try {
attempts.add(1);
log.log(Level.INFO, () -> "Removing expired inactive " + lb.id());
service.remove(lb);
db.removeLoadBalancer(lb.id());
} catch (Exception e){
failed.add(lb.id());
lastException.set(e);
}
});
if (!failed.isEmpty()) {
log.log(Level.WARNING, lastException.get(), () -> String.format("Failed to remove %d load balancers: %s, retrying in %s",
failed.size(),
failed.stream()
.map(LoadBalancerId::serializedForm)
.collect(Collectors.joining(", ")),
interval()));
}
return asSuccessFactorDeviation(attempts.get(), failed.size());
}
/** Remove reals from inactive load balancers */
private double pruneReals() {
var attempts = new MutableInteger(0);
var failed = new ArrayList();
var lastException = new AtomicReference();
patchLoadBalancers(lb -> lb.state() == State.inactive, lb -> {
if (lb.instance().isEmpty()) return;
var allocatedNodes = allocatedNodes(lb.id()).stream().map(Node::hostname).collect(Collectors.toSet());
var reals = new LinkedHashSet<>(lb.instance().get().reals());
// Remove any real no longer allocated to this application
reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value()));
if (reals.equals(lb.instance().get().reals())) return; // Nothing to remove
try {
attempts.add(1);
LOG.log(Level.INFO, () -> "Removing reals from inactive load balancer " + lb.id() + ": " + Sets.difference(lb.instance().get().reals(), reals));
LoadBalancerInstance instance = service.configure(lb.instance().get(),
new LoadBalancerSpec(lb.id().application(), lb.id().cluster(), reals,
lb.instance().get().settings(),
lb.instance().get().cloudAccount(), lb.idSeed()),
true);
db.writeLoadBalancer(lb.with(instance), lb.state());
} catch (Exception e) {
failed.add(lb.id());
lastException.set(e);
}
});
if (!failed.isEmpty()) {
log.log(Level.WARNING, String.format("Failed to remove reals from %d load balancers: %s, retrying in %s",
failed.size(),
failed.stream()
.map(LoadBalancerId::serializedForm)
.collect(Collectors.joining(", ")),
interval()),
lastException.get());
}
return asSuccessFactorDeviation(attempts.get(), failed.size());
}
/** Patch load balancers matching given filter, while holding lock */
private void patchLoadBalancers(Predicate filter, Consumer patcher) {
for (var id : db.readLoadBalancerIds()) {
Optional loadBalancer = db.readLoadBalancer(id);
if (loadBalancer.isEmpty() || !filter.test(loadBalancer.get())) continue;
try (var lock = db.lock(id.application(), Duration.ofSeconds(1))) {
loadBalancer = db.readLoadBalancer(id);
if (loadBalancer.isEmpty() || !filter.test(loadBalancer.get())) continue;
patcher.accept(loadBalancer.get());
}
}
}
private boolean canRemove(LoadBalancer lb, Instant expiry) {
return lb.state() == State.removable || (lb.state() == State.inactive &&
lb.changedAt().isBefore(expiry) &&
allocatedNodes(lb.id()).isEmpty());
}
private boolean canDeactivate(LoadBalancer lb, Instant expiry) {
return lb.state() == State.reserved && lb.changedAt().isBefore(expiry);
}
private List allocatedNodes(LoadBalancerId loadBalancer) {
return nodeRepository().nodes()
.list(Node.State.active, Node.State.inactive, Node.State.reserved)
.owner(loadBalancer.application())
// Always match the cluster by the effective container ID
// TODO(mpolden): Remove this and use NodeList::cluster once combined disappears in Vespa 9
.matching((node) -> node.allocation().isPresent() &&
LoadBalancer.containerId(node.allocation().get().membership().cluster())
.equals(loadBalancer.cluster()))
.asList();
}
}