All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.provision.autoscale.MetricsResponse Maven / Gradle / Ivy

// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;

import com.yahoo.collections.ListMap;
import com.yahoo.collections.Pair;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.ObjectTraverser;
import com.yahoo.slime.Slime;
import com.yahoo.slime.SlimeUtils;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;

import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

/**
 * A response containing metrics for a collection of nodes.
 *
 * @author bratseth
 */
public class MetricsResponse {

    /** Node level metrics */
    private final Collection> nodeMetrics;

    /**
     * Cluster level metrics.
     * Must be aggregated at fetch time to avoid issues with nodes and nodes joining/leaving the cluster over time.
     */
    private final Map clusterMetrics = new HashMap<>();

    /** Creates this from a metrics/V2 response */
    public MetricsResponse(String response, NodeList applicationNodes, NodeRepository nodeRepository) {
        this(SlimeUtils.jsonToSlime(response), applicationNodes, nodeRepository);
    }

    public MetricsResponse(Collection> metrics) {
        this.nodeMetrics = metrics;
    }

    private MetricsResponse(Slime response, NodeList applicationNodes, NodeRepository nodeRepository) {
        nodeMetrics = new ArrayList<>();
        Inspector root = response.get();
        Inspector nodes = root.field("nodes");
        nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node, applicationNodes, nodeRepository));
    }

    public Collection> nodeMetrics() { return nodeMetrics; }

    public Map clusterMetrics() { return clusterMetrics; }

    private void consumeNode(Inspector nodeObject, NodeList applicationNodes, NodeRepository nodeRepository) {
        String hostname = nodeObject.field("hostname").asString();
        Optional node = applicationNodes.node(hostname);
        if (node.isEmpty()) return; // Node is not part of this cluster any longer

        ListMap nodeValues = new ListMap<>();
        Instant at = consumeNodeMetrics(nodeObject.field("node"), nodeValues);
        consumeServiceMetrics(nodeObject.field("services"), nodeValues);

        nodeMetrics.add(new Pair<>(hostname, new NodeMetricSnapshot(at,
                                                                    new Load(Metric.cpu.from(nodeValues),
                                                                             Metric.memory.from(nodeValues),
                                                                             Metric.disk.from(nodeValues)),
                                                                    (long)Metric.generation.from(nodeValues),
                                                                    Metric.inService.from(nodeValues) > 0,
                                                                    clusterIsStable(node.get(), applicationNodes, nodeRepository),
                                                                    Metric.queryRate.from(nodeValues))));

        var cluster = node.get().allocation().get().membership().cluster().id();
        var metrics = clusterMetrics.getOrDefault(cluster, ClusterMetricSnapshot.empty(at));
        metrics = metrics.withQueryRate(metrics.queryRate() + Metric.queryRate.from(nodeValues));
        metrics = metrics.withWriteRate(metrics.queryRate() + Metric.writeRate.from(nodeValues));
        clusterMetrics.put(cluster, metrics);
    }

    private Instant consumeNodeMetrics(Inspector nodeObject, ListMap nodeValues) {
        long timestampSecond = nodeObject.field("timestamp").asLong();
        Instant at = Instant.ofEpochMilli(timestampSecond * 1000);
        nodeObject.field("metrics").traverse((ArrayTraverser) (__, item) -> consumeMetricsItem(item, nodeValues));
        return at;
    }

    private void consumeServiceMetrics(Inspector servicesObject, ListMap nodeValues) {
        servicesObject.traverse((ArrayTraverser) (__, item) -> consumeServiceItem(item, nodeValues));
    }

    private void consumeServiceItem(Inspector serviceObject, ListMap nodeValues) {
        serviceObject.field("metrics").traverse((ArrayTraverser) (__, item) -> consumeMetricsItem(item, nodeValues));
    }

    private void consumeMetricsItem(Inspector item, ListMap values) {
        item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble()));
    }

    private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) {
        ClusterSpec cluster = node.allocation().get().membership().cluster();
        return Autoscaler.clusterIsStable(applicationNodes.cluster(cluster.id()), nodeRepository);
    }

    public static MetricsResponse empty() { return new MetricsResponse(List.of()); }

    /** The metrics this can read */
    private enum Metric {

        cpu { // a node resource

            @Override
            public List metricResponseNames() { return List.of("cpu.util"); }

            @Override
            double computeFinal(ListMap values) {
                return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).average().orElse(0) / 100; // % to ratio
            }

        },
        memory { // a node resource

            @Override
            public List metricResponseNames() {
                return List.of("content.proton.resource_usage.memory.average", "mem.util");
            }

            @Override
            double computeFinal(ListMap values) {
                var valueList = values.get("content.proton.resource_usage.memory.average"); // prefer over mem.util
                if ( ! valueList.isEmpty()) return valueList.get(0);

                valueList = values.get("mem.util");
                if ( ! valueList.isEmpty()) return valueList.get(0) / 100; // % to ratio

                return 0;
            }

        },
        disk { // a node resource

            @Override
            public List metricResponseNames() {
                return List.of("content.proton.resource_usage.disk.average", "disk.util");
            }

            @Override
            double computeFinal(ListMap values) {
                var valueList = values.get("content.proton.resource_usage.disk.average"); // prefer over mem.util
                if ( ! valueList.isEmpty()) return valueList.get(0);

                valueList = values.get("disk.util");
                if ( ! valueList.isEmpty()) return valueList.get(0) / 100; // % to ratio

                return 0;
            }

        },
        generation { // application config generation active on the node

            @Override
            public List metricResponseNames() { return List.of("application_generation"); }

            @Override
            double computeFinal(ListMap values) {
                return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).min().orElse(-1);
            }

        },
        inService {

            @Override
            public List metricResponseNames() { return List.of("in_service"); }

            @Override
            double computeFinal(ListMap values) {
                // Really a boolean. Default true. If any is oos -> oos.
                return values.values().stream().flatMap(List::stream).anyMatch(v -> v == 0) ? 0 : 1;
            }

        },
        queryRate { // queries per second

            @Override
            public List metricResponseNames() {
                return List.of("queries.rate",
                               "content.proton.documentdb.matching.queries.rate");
            }

        },
        writeRate { // writes per second

            @Override
            public List metricResponseNames() {
                return List.of("feed.http-requests.rate",
                               "vds.filestor.allthreads.put.count.rate",
                               "vds.filestor.allthreads.remove.count.rate",
                               "vds.filestor.allthreads.update.count.rate"); }

        };

        /**
         * The names of this metric as emitted from its source.
         * A map of the values of these names which were present in the response will
         * be provided to computeFinal to decide on a single value.
         */
        public abstract List metricResponseNames();

        /** Computes the final metric value */
        double computeFinal(ListMap values) {
            return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).sum();
        }

        public double from(ListMap metricValues) {
            ListMap values = new ListMap<>(metricValues);
            values.keySet().retainAll(metricResponseNames());
            return computeFinal(values);
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy