All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.provision.maintenance.NodeMetricsDbMaintainer Maven / Gradle / Ivy

There is a newer version: 8.458.13
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Environment;
import com.yahoo.jdisc.Metric;
import com.yahoo.lang.MutableInteger;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsResponse;
import com.yahoo.yolean.Exceptions;

import java.time.Duration;
import java.util.Set;
import java.util.logging.Level;

/**
 * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all active nodes.
 *
 * @author bratseth
 */
public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {

    private static final int maxWarningsPerInvocation = 2;

    private final MetricsFetcher metricsFetcher;

    public NodeMetricsDbMaintainer(NodeRepository nodeRepository,
                                   MetricsFetcher metricsFetcher,
                                   Duration interval,
                                   Metric metric) {
        super(nodeRepository, interval, metric, false); // No locking because this not modify shared state
        this.metricsFetcher = metricsFetcher;
    }

    @Override
    protected double maintain() {
        int attempts = 0;
        var failures = new MutableInteger(0);
        try {
            Set applications = activeNodesByApplication().keySet();
            if (applications.isEmpty()) return 1.0;

            long pauseMs = interval().toMillis() / Math.max(4, applications.size()); // spread requests over interval
            int done = 0;
            for (ApplicationId application : applications) {
                if (shuttingDown()) return asSuccessFactorDeviation(attempts, failures.get());

                attempts++;
                metricsFetcher.fetchMetrics(application)
                              .whenComplete((metricsResponse, exception) -> handleResponse(metricsResponse,
                                                                                           exception,
                                                                                           failures,
                                                                                           application));
                if (++done < applications.size())
                    Thread.sleep(pauseMs);
            }

            nodeRepository().metricsDb().gc();

            return asSuccessFactorDeviation(attempts, failures.get());
        }
        catch (InterruptedException e) {
            return asSuccessFactorDeviation(attempts, failures.get());
        }
    }

    @Override
    public void shutdown() {
        super.shutdown();
        metricsFetcher.deconstruct();
    }

    private void handleResponse(MetricsResponse response,
                                Throwable exception,
                                MutableInteger failures,
                                ApplicationId application) {
        if (exception != null) {
            if (nodeRepository().zone().environment() == Environment.prod
                    && failures.get() < maxWarningsPerInvocation)
                log.log(Level.WARNING, "Could not update metrics for " + application + ": " +
                        Exceptions.toMessageString(exception));
            failures.add(1);
        }
        else if (response != null) {
            nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics());
            nodeRepository().metricsDb().addClusterMetrics(application, response.clusterMetrics());
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy