com.yahoo.vespa.hosted.provision.maintenance.DiskReplacer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of node-repository Show documentation
Show all versions of node-repository Show documentation
Keeps track of node assignment in a multi-application setup.
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.RebuildResult;
import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Rebuilds hosts by replacing the root disk (only supports hosts with remote storage).
*
* @author mpolden
*/
public class DiskReplacer extends NodeRepositoryMaintainer {
private static final Logger log = Logger.getLogger(DiskReplacer.class.getName());
private final HostProvisioner hostProvisioner;
DiskReplacer(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) {
super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
}
@Override
protected double maintain() {
NodeList candidates = nodeRepository().nodes().list().rebuilding(true);
if (candidates.isEmpty()) {
return 0;
}
int failures = 0;
List rebuilding;
try (var locked = nodeRepository().nodes().lockAndGetAll(candidates.asList(), Optional.of(Duration.ofSeconds(10)))) {
rebuilding = locked.nodes().stream().map(NodeMutex::node).toList();
RebuildResult result = hostProvisioner.replaceRootDisk(rebuilding);
for (Node updated : result.rebuilt())
if (!updated.status().wantToRebuild())
nodeRepository().nodes().write(updated, () -> { });
for (var entry : result.failed().entrySet()) {
++failures;
log.log(Level.WARNING, "Failed to rebuild " + entry.getKey() + ", will retry in " +
interval() + ": " + Exceptions.toMessageString(entry.getValue()));
}
}
return asSuccessFactorDeviation(rebuilding.size(), failures);
}
}