All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.hosted.controller.maintenance.ChangeManagementAssessor Maven / Gradle / Ivy

// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;

import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
 * @author smorgrav
 */
public class ChangeManagementAssessor {

    private final NodeRepository nodeRepository;

    public ChangeManagementAssessor(NodeRepository nodeRepository) {
        this.nodeRepository = nodeRepository;
    }

    public Assessment assessment(List impactedHostnames, ZoneId zone) {
        return assessmentInner(impactedHostnames, nodeRepository.list(zone, NodeFilter.all()), zone);
    }

    Assessment assessmentInner(List impactedHostnames, List allNodes, ZoneId zone) {

        List impactedParentHosts = toParentHosts(impactedHostnames, allNodes);
        // Group impacted application nodes by parent host
        Map> prParentHost = allNodes.stream()
                .filter(node -> node.state() == Node.State.active) //TODO look at more states?
                .filter(node -> impactedParentHosts.contains(node.parentHostname().map(HostName::value).orElse("")))
                .collect(Collectors.groupingBy(node ->
                    allNodes.stream()
                            .filter(parent -> parent.hostname().equals(node.parentHostname().get()))
                            .findFirst().orElseThrow()
                ));

        // Group nodes pr cluster
        Map> prCluster = prParentHost.values()
                .stream()
                .flatMap(Collection::stream)
                .collect(Collectors.groupingBy(ChangeManagementAssessor::clusterKey));

        var tenantHosts = prParentHost.keySet().stream()
                .filter(node -> node.type() == NodeType.host)
                .map(node -> node.hostname())
                .collect(Collectors.toList());

        boolean allHostsReplacable = tenantHosts.isEmpty() || nodeRepository.isReplaceable(
                zone,
                tenantHosts
        );

        // Report assessment pr cluster
        var clusterAssessments = prCluster.entrySet().stream().map((entry) -> {
            Cluster cluster = entry.getKey();
            List nodes = entry.getValue();

            long[] totalStats = clusterStats(cluster, allNodes);
            long[] impactedStats = clusterStats(cluster, nodes);

            ClusterAssessment assessment = new ClusterAssessment();
            assessment.app = cluster.getApp();
            assessment.zone = zone.value();
            assessment.cluster = cluster.getClusterType() + ":" + cluster.getClusterId();
            assessment.clusterSize = totalStats[0];
            assessment.clusterImpact = impactedStats[0];
            assessment.groupsTotal = totalStats[1];
            assessment.groupsImpact = impactedStats[1];


            // TODO check upgrade policy
            assessment.upgradePolicy = "na";
            // TODO do some heuristic on suggestion action
            assessment.suggestedAction = allHostsReplacable ? "Retire all hosts" : "nothing";
            // TODO do some heuristic on impact
            assessment.impact = getImpact(cluster, impactedStats, totalStats);

            return assessment;
        }).collect(Collectors.toList());

        var hostAssessments = prParentHost.entrySet().stream().map((entry) -> {
            HostAssessment hostAssessment = new HostAssessment();
            hostAssessment.hostName = entry.getKey().hostname().value();
            hostAssessment.switchName = entry.getKey().switchHostname().orElse(null);
            hostAssessment.numberOfChildren = entry.getValue().size();

            //TODO: Some better heuristic for what's considered problematic
            hostAssessment.numberOfProblematicChildren = (int) entry.getValue().stream()
                    .mapToInt(node -> prCluster.get(clusterKey(node)).size())
                    .filter(i -> i > 1)
                    .count();

            return hostAssessment;
        }).collect(Collectors.toList());

        return new Assessment(clusterAssessments, hostAssessments);
    }

    private List toParentHosts(List impactedHostnames, List allNodes) {
        return impactedHostnames.stream()
                .flatMap(hostname ->
                    allNodes.stream()
                            .filter(node -> List.of(NodeType.config, NodeType.proxy, NodeType.host).contains(node.type()))
                            .filter(node -> hostname.equals(node.hostname().value()) || hostname.equals(node.parentHostname().map(HostName::value).orElse("")))
                            .map(node -> {
                                if (node.type() == NodeType.host)
                                    return node.hostname().value();
                                return node.parentHostname().get().value();
                            }).findFirst().stream()
                )
                .collect(Collectors.toList());
    }

    private static Cluster clusterKey(Node node) {
        if (node.owner().isEmpty())
            return Cluster.EMPTY;
        String appId = node.owner().get().serializedForm();
        return new Cluster(node.clusterType(), node.clusterId(), appId, node.type());
    }

    private static long[] clusterStats(Cluster cluster, List containerNodes) {
        List clusterNodes = containerNodes.stream().filter(node -> cluster.equals(clusterKey(node))).collect(Collectors.toList());
        long groups = clusterNodes.stream().map(Node::group).distinct().count();
        return new long[] { clusterNodes.size(), groups};
    }

    private String getImpact(Cluster cluster, long[] impactedStats, long[] totalStats) {
        switch (cluster.getNodeType()) {
            case tenant:
                return getTenantImpact(cluster, impactedStats, totalStats);
            case proxy:
                return getProxyImpact(impactedStats[0], totalStats[0]);
            case config:
                return getConfigServerImpact(impactedStats[0]);
            default:
                return "Unkown impact";
        }
    }

    private String getTenantImpact(Cluster cluster, long[] impactedStats, long[] totalStats) {
        switch (cluster.getClusterType()) {
            case container:
                return getContainerImpact(impactedStats[0], totalStats[0]);
            case content:
            case combined:
                return getContentImpact(totalStats[1] > 1, impactedStats[0], impactedStats[1]);
            default:
                return "Unknown impact";
        }
    }

    private String getProxyImpact(long impactedNodes, long totalNodes) {
        int impact = (int) (100.0 * impactedNodes / totalNodes);
        return impact + "% of routing nodes impacted. Consider reprovisioning if too many";
    }

    private String getConfigServerImpact(long impactedNodes) {
        if (impactedNodes == 1) {
            return "Acceptable impact";
        }
        return "Large impact. Consider reprovisioning one or more config servers";
    }

    private String getContainerImpact(long impactedNodes, long totalNodes) {
        if ((double) impactedNodes / totalNodes  <= 0.1) {
            return "Impact not larger than upgrade policy";
        }
        return "Impact larger than upgrade policy";
    }

    private String getContentImpact(boolean isGrouped, long impactedNodes, long impactedGroups) {
        if ((isGrouped && impactedGroups == 1) || impactedNodes == 1)
            return "Impact not larger than upgrade policy";
        return "Impact larger than upgrade policy";
    }


    public static class Assessment {
        List clusterAssessments;
        List hostAssessments;

        Assessment(List clusterAssessments, List hostAssessments) {
            this.clusterAssessments = clusterAssessments;
            this.hostAssessments = hostAssessments;
        }

        public List getClusterAssessments() {
            return clusterAssessments;
        }

        public List getHostAssessments() {
            return hostAssessments;
        }
    }

    public static class ClusterAssessment {
        public String app;
        public String zone;
        public String cluster;
        public long clusterImpact;
        public long clusterSize;
        public long groupsImpact;
        public long groupsTotal;
        public String upgradePolicy;
        public String suggestedAction;
        public String impact;
    }

    public static class HostAssessment {
        public String hostName;
        public String switchName;
        public int numberOfChildren;
        public int numberOfProblematicChildren;
    }

    private static class Cluster {
        private Node.ClusterType clusterType;
        private String clusterId;
        private String app;
        private NodeType nodeType;

        public final static Cluster EMPTY = new Cluster(Node.ClusterType.unknown, "na", "na", NodeType.tenant);

        public Cluster(Node.ClusterType clusterType, String clusterId, String app, NodeType nodeType) {
            this.clusterType = clusterType;
            this.clusterId = clusterId;
            this.app = app;
            this.nodeType = nodeType;
        }

        public Node.ClusterType getClusterType() {
            return clusterType;
        }

        public String getClusterId() {
            return clusterId;
        }

        public String getApp() {
            return app;
        }

        public NodeType getNodeType() {
            return nodeType;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            Cluster cluster = (Cluster) o;
            return Objects.equals(clusterType, cluster.clusterType) &&
                    Objects.equals(clusterId, cluster.clusterId) &&
                    Objects.equals(app, cluster.app);
        }

        @Override
        public int hashCode() {
            return Objects.hash(clusterType, clusterId, app);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy