All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.vespa.model.builder.xml.dom.NodesSpecification Maven / Gradle / Ivy

There is a newer version: 8.458.13
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.builder.xml.dom;

import com.yahoo.collections.Pair;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.ConfigModelContext;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.ClusterInfo;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.IntRange;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.ZoneEndpoint;
import com.yahoo.text.XML;
import com.yahoo.vespa.model.HostResource;
import com.yahoo.vespa.model.HostSystem;
import com.yahoo.vespa.model.container.xml.ContainerModelBuilder;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.ToDoubleFunction;

/**
 * A common utility class to represent a requirement for nodes during model building.
 * Such a requirement is commonly specified in services.xml as a nodes element.
 *
 * @author bratseth
 */
public class NodesSpecification {

    private final ClusterResources min, max;

    private final IntRange groupSize;

    private final boolean dedicated;

    /** The Vespa version we want the nodes to run */
    private final Version version;

    /** 
     * Whether the capacity amount specified is required or can be relaxed
     * at the discretion of the component fulfilling it
     */
    private final boolean required;

    private final boolean canFail;

    private final boolean exclusive;

    /** The repo part of a docker image (without tag), optional */
    private final Optional dockerImageRepo;

    /** The ID of the cluster referencing this node specification, if any */
    private final Optional combinedId;

    /** The cloud account to use for nodes in this spec, if any */
    private final Optional cloudAccount;

    /* Whether the count attribute was present on the nodes element. */
    private final boolean hasCountAttribute;

    private NodesSpecification(ClusterResources min,
                               ClusterResources max,
                               IntRange groupSize,
                               boolean dedicated, Version version,
                               boolean required, boolean canFail, boolean exclusive,
                               Optional dockerImageRepo,
                               Optional combinedId,
                               Optional cloudAccount,
                               boolean hasCountAttribute) {
        if (max.smallerThan(min))
            throw new IllegalArgumentException("Max resources must be larger or equal to min resources, but " +
                                               max + " is smaller than " + min);
        if (min.nodes() < 1)
            throw new IllegalArgumentException("Min node count cannot be less than 1, but is " + min.nodes());

        // Non-scaled resources must be equal
        if ( ! min.nodeResources().justNonNumbers().equals(max.nodeResources().justNonNumbers()))
            throw new IllegalArgumentException("Min and max resources must have the same non-numeric settings, but " +
                                               "min is " + min + " and max " + max);
        if (min.nodeResources().bandwidthGbps() != max.nodeResources().bandwidthGbps())
            throw new IllegalArgumentException("Min and max resources must have the same bandwidth, but " +
                                               "min is " + min + " and max " + max);

        this.min = min;
        this.max = max;
        this.groupSize = groupSize;
        this.dedicated = dedicated;
        this.version = version;
        this.required = required;
        this.canFail = canFail;
        this.exclusive = exclusive;
        this.dockerImageRepo = dockerImageRepo;
        this.combinedId = combinedId;
        this.cloudAccount = cloudAccount;
        this.hasCountAttribute = hasCountAttribute;
    }

    static NodesSpecification create(boolean dedicated, boolean canFail, Version version,
                                     ModelElement nodesElement, Optional dockerImageRepo,
                                     Optional cloudAccount) {
        var resolvedElement = resolveElement(nodesElement);
        var combinedId = findCombinedId(nodesElement, resolvedElement);
        var resourceConstraints = toResourceConstraints(resolvedElement);
        boolean hasCountAttribute = resolvedElement.stringAttribute("count") != null;
        return new NodesSpecification(resourceConstraints.min,
                                      resourceConstraints.max,
                                      resourceConstraints.groupSize,
                                      dedicated,
                                      version,
                                      resolvedElement.booleanAttribute("required", false),
                                      canFail,
                                      resolvedElement.booleanAttribute("exclusive", false),
                                      dockerImageToUse(resolvedElement, dockerImageRepo),
                                      combinedId,
                                      cloudAccount,
                                      hasCountAttribute);
    }

    private static ResourceConstraints toResourceConstraints(ModelElement nodesElement) {
        var nodes =  rangeFrom(nodesElement, "count");
        var groups =  rangeFrom(nodesElement, "groups");
        var groupSize =  rangeFrom(nodesElement, "group-size");

        if (nodes.from().orElse(1) < 1)
            throw new IllegalArgumentException("Min node resources cannot be less than 1, but is " + nodes.from().getAsInt());

        // Find the tightest possible limits for groups to avoid falsely concluding we are autoscaling
        // when only specifying group size
        int defaultMinGroups =                           nodes.from().orElse(1) / groupSize.to().orElse(nodes.from().orElse(1));
        int defaultMaxGroups = groupSize.isEmpty() ? 1 : nodes.to().orElse(1) / groupSize.from().orElse(1);

        var min = new ClusterResources(nodes.from().orElse(1), groups.from().orElse(defaultMinGroups), nodeResources(nodesElement).getFirst());
        var max = new ClusterResources(nodes.to().orElse(1), groups.to().orElse(defaultMaxGroups), nodeResources(nodesElement).getSecond());
        return new ResourceConstraints(min, max, groupSize);
    }

    private static IntRange rangeFrom(ModelElement element, String name) {
        try {
            return IntRange.from(element.stringAttribute(name, ""));
        }
        catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Illegal " + name + " value", e);
        }
    }

    private record ResourceConstraints(ClusterResources min, ClusterResources max, IntRange groupSize) {}

    /** Returns the ID of the cluster referencing this node specification, if any */
    private static Optional findCombinedId(ModelElement nodesElement, ModelElement resolvedElement) {
        if (resolvedElement != nodesElement) {
            // Specification for a container cluster referencing nodes in a content cluster
            return containerIdOf(nodesElement);
        }
        // Specification for a content cluster that is referenced by a container cluster
        return containerIdReferencing(nodesElement);
    }

    /** Returns a requirement for dedicated nodes taken from the given nodes element */
    public static NodesSpecification from(ModelElement nodesElement, ConfigModelContext context) {
        return create(true,
                      ! context.getDeployState().getProperties().isBootstrap(),
                      context.getDeployState().getWantedNodeVespaVersion(),
                      nodesElement,
                      context.getDeployState().getWantedDockerImageRepo(),
                      context.getDeployState().getProperties().cloudAccount());
    }

    /**
     * Returns a requirement for non-dedicated or dedicated nodes taken from the nodes element
     * contained in the given parent element, or empty if the parent element is null, or the nodes elements
     * is not present.
     */
    public static Optional optionalDedicatedFromParent(ModelElement parentElement,
                                                                           ConfigModelContext context) {
        if (parentElement == null) return Optional.empty();
        ModelElement nodesElement = parentElement.child("nodes");
        if (nodesElement == null) return Optional.empty();
        return Optional.of(create(nodesElement.booleanAttribute("dedicated", false),
                                  ! context.getDeployState().getProperties().isBootstrap(),
                                  context.getDeployState().getWantedNodeVespaVersion(),
                                  nodesElement,
                                  context.getDeployState().getWantedDockerImageRepo(),
                                  context.getDeployState().getProperties().cloudAccount()));
    }

    /**
     * Returns a requirement from count non-dedicated nodes in one group
     */
    public static NodesSpecification nonDedicated(int count, ConfigModelContext context) {
        return new NodesSpecification(new ClusterResources(count, 1, NodeResources.unspecified()),
                                      new ClusterResources(count, 1, NodeResources.unspecified()),
                                      IntRange.empty(),
                                      false,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      false,
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    /** Returns a requirement from count dedicated nodes in one group */
    public static NodesSpecification dedicated(int count, ConfigModelContext context) {
        return new NodesSpecification(new ClusterResources(count, 1, NodeResources.unspecified()),
                                      new ClusterResources(count, 1, NodeResources.unspecified()),
                                      IntRange.empty(),
                                      true,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      false,
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    /**
     * Returns a requirement for {@code count} shared nodes with {@code required} taken as
     * the OR over all content clusters, and with the given resources.
     */
    public static NodesSpecification requiredFromSharedParents(int count, NodeResources resources,
                                                               ModelElement element, ConfigModelContext context) {
        List allContent = findParentByTag("services", element.getXml()).map(services -> XML.getChildren(services, "content"))
                                                                                           .orElse(List.of())
                                                                                           .stream()
                                                                                           .map(content -> new ModelElement(content).child("nodes"))
                                                                                           .filter(nodes -> nodes != null && nodes.stringAttribute("count") != null)
                                                                                           .map(nodes -> from(nodes, context))
                                                                                           .toList();
        return new NodesSpecification(new ClusterResources(count, 1, resources),
                                      new ClusterResources(count, 1, resources),
                                      IntRange.empty(),
                                      true,
                                      context.getDeployState().getWantedNodeVespaVersion(),
                                      allContent.stream().anyMatch(content -> content.required),
                                      ! context.getDeployState().getProperties().isBootstrap(),
                                      false,
                                      context.getDeployState().getWantedDockerImageRepo(),
                                      Optional.empty(),
                                      context.getDeployState().getProperties().cloudAccount(),
                                      false);
    }

    public ClusterResources minResources() { return min; }
    public ClusterResources maxResources() { return max; }
    public IntRange groupSize() { return groupSize; }

    /**
     * Returns whether this requires dedicated nodes.
     * Otherwise the model encountering this request should reuse nodes requested for other purposes whenever possible.
     */
    public boolean isDedicated() { return dedicated; }

    /**
     * Returns whether the physical hosts running the nodes of this application can
     * also run nodes of other applications. Using exclusive nodes for containers increases security
     * and increases cost.
     */
    public boolean isExclusive() { return exclusive; }

    /** Returns whether the count attribute was present on the {@code } element. */
    public boolean hasCountAttribute() {
        return hasCountAttribute;
    }

    public Map provision(HostSystem hostSystem,
                                                          ClusterSpec.Type clusterType,
                                                          ClusterSpec.Id clusterId,
                                                          DeployLogger logger,
                                                          boolean stateful,
                                                          ClusterInfo clusterInfo) {
        return provision(hostSystem, clusterType, clusterId, ZoneEndpoint.defaultEndpoint, logger, stateful, clusterInfo);
    }

    public Map provision(HostSystem hostSystem,
                                                          ClusterSpec.Type clusterType,
                                                          ClusterSpec.Id clusterId,
                                                          ZoneEndpoint zoneEndpoint,
                                                          DeployLogger logger,
                                                          boolean stateful,
                                                          ClusterInfo info) {
        if (combinedId.isPresent())
            clusterType = ClusterSpec.Type.combined;
        ClusterSpec cluster = ClusterSpec.request(clusterType, clusterId)
                                         .vespaVersion(version)
                                         .exclusive(exclusive)
                                         .combinedId(combinedId.map(ClusterSpec.Id::from))
                                         .dockerImageRepository(dockerImageRepo)
                                         .loadBalancerSettings(zoneEndpoint)
                                         .stateful(stateful)
                                         .build();
        return hostSystem.allocateHosts(cluster, Capacity.from(min, max, groupSize, required, canFail, cloudAccount, info), logger);
    }

    private static Pair nodeResources(ModelElement nodesElement) {
        ModelElement resources = nodesElement.child("resources");
        if (resources != null) {
            return nodeResourcesFromResourcesElement(resources);
        }
        else if (nodesElement.stringAttribute("flavor") != null) { // legacy fallback
            var flavorResources = NodeResources.fromLegacyName(nodesElement.stringAttribute("flavor"));
            return new Pair<>(flavorResources, flavorResources);
        }
        else {
            return new Pair<>(NodeResources.unspecified(), NodeResources.unspecified());
        }
    }

    private static Pair nodeResourcesFromResourcesElement(ModelElement element) {
        Pair vcpu       = toRange("vcpu", element,      .0, Double::parseDouble);
        Pair memory     = toRange("memory", element,    .0, s -> parseGbAmount(s, "B"));
        Pair disk       = toRange("disk", element,      .0, s -> parseGbAmount(s, "B"));
        Pair bandwith   = toRange("bandwidth", element, .3, s -> parseGbAmount(s, "BPS"));
        NodeResources.DiskSpeed   diskSpeed     = parseOptionalDiskSpeed(element.stringAttribute("disk-speed"));
        NodeResources.StorageType storageType   = parseOptionalStorageType(element.stringAttribute("storage-type"));
        NodeResources.Architecture architecture = parseOptionalArchitecture(element.stringAttribute("architecture"));
        NodeResources.GpuResources gpuResources = parseOptionalGpuResources(element.child("gpu"));

        var min = new NodeResources(vcpu.getFirst(),  memory.getFirst(),  disk.getFirst(),  bandwith.getFirst(),
                                    diskSpeed, storageType, architecture, gpuResources);
        var max = new NodeResources(vcpu.getSecond(), memory.getSecond(), disk.getSecond(), bandwith.getSecond(),
                                    diskSpeed, storageType, architecture, gpuResources);
        return new Pair<>(min, max);
    }

    private static NodeResources.GpuResources parseOptionalGpuResources(ModelElement element) {
        if (element == null) return NodeResources.GpuResources.getDefault();
        int count = element.requiredIntegerAttribute("count");
        double memory = parseGbAmount(element.requiredStringAttribute("memory"), "B");
        return new NodeResources.GpuResources(count, memory);
    }

    private static double parseGbAmount(String byteAmount, String unit) {
        byteAmount = byteAmount.strip();
        byteAmount = byteAmount.toUpperCase();
        if (byteAmount.endsWith(unit))
            byteAmount = byteAmount.substring(0, byteAmount.length() - unit.length());

        double multiplier = -1;
        if (byteAmount.endsWith("K"))
            multiplier = Math.pow(1000, -2);
        else if (byteAmount.endsWith("M"))
            multiplier = Math.pow(1000, -1);
        else if (byteAmount.endsWith("G"))
            multiplier = 1;
        else if (byteAmount.endsWith("T"))
            multiplier = 1000;
        else if (byteAmount.endsWith("P"))
            multiplier = Math.pow(1000, 2);
        else if (byteAmount.endsWith("E"))
            multiplier = Math.pow(1000, 3);
        else if (byteAmount.endsWith("Z"))
            multiplier = Math.pow(1000, 4);
        else if (byteAmount.endsWith("Y"))
            multiplier = Math.pow(1000, 5);

        if (multiplier == -1)
            multiplier = Math.pow(1000, -3);
        else
            byteAmount = byteAmount.substring(0, byteAmount.length() -1).strip();

        try {
            return Double.parseDouble(byteAmount) * multiplier;
        }
        catch (NumberFormatException e) {
            throw new IllegalArgumentException("Invalid byte amount '" + byteAmount +
                                               "': Must be a floating point number " +
                                               "optionally followed by k, M, G, T, P, E, Z or Y");
        }
    }

    private static NodeResources.DiskSpeed parseOptionalDiskSpeed(String diskSpeedString) {
        if (diskSpeedString == null) return NodeResources.DiskSpeed.getDefault();
        return switch (diskSpeedString) {
            case "fast" -> NodeResources.DiskSpeed.fast;
            case "slow" -> NodeResources.DiskSpeed.slow;
            case "any" -> NodeResources.DiskSpeed.any;
            default -> throw new IllegalArgumentException("Illegal disk-speed value '" + diskSpeedString +
                                                          "': Legal values are 'fast', 'slow' and 'any')");
        };
    }

    private static NodeResources.StorageType parseOptionalStorageType(String storageTypeString) {
        if (storageTypeString == null) return NodeResources.StorageType.getDefault();
        return switch (storageTypeString) {
            case "remote" -> NodeResources.StorageType.remote;
            case "local" -> NodeResources.StorageType.local;
            case "any" -> NodeResources.StorageType.any;
            default -> throw new IllegalArgumentException("Illegal storage-type value '" + storageTypeString +
                                                          "': Legal values are 'remote', 'local' and 'any')");
        };
    }

    private static NodeResources.Architecture parseOptionalArchitecture(String architecture) {
        if (architecture == null) return NodeResources.Architecture.getDefault();
        return switch (architecture) {
            case "x86_64" -> NodeResources.Architecture.x86_64;
            case "arm64" -> NodeResources.Architecture.arm64;
            case "any" -> NodeResources.Architecture.any;
            default -> throw new IllegalArgumentException("Illegal architecture value '" + architecture +
                                                          "': Legal values are 'x86_64', 'arm64' and 'any')");
        };
    }

    /**
     * Resolve any reference in nodesElement and return the referred element.
     *
     * If nodesElement does not refer to a different element, this method behaves as the identity function.
     */
    private static ModelElement resolveElement(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var referenceId = element.getAttribute("of");
        if (referenceId.isEmpty()) return nodesElement;

        var services = findParentByTag("services", element).orElseThrow(() -> clusterReferenceNotFoundException(referenceId));
        var referencedService = findChildById(services, referenceId).orElseThrow(() -> clusterReferenceNotFoundException(referenceId));
        if ( ! referencedService.getTagName().equals("content"))
            throw new IllegalArgumentException("service '" + referenceId + "' is not a content service");
        var referencedNodesElement = XML.getChild(referencedService, "nodes");
        if (referencedNodesElement == null)
            throw new IllegalArgumentException("expected reference to service '" + referenceId + "' to supply nodes, " +
                                               "but that service has no  element");

        return new ModelElement(referencedNodesElement);
    }

    /** Returns the ID of the parent container element of nodesElement, if any  */
    private static Optional containerIdOf(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var container = findParentByTag("container", element);
        return container.map(el -> el.getAttribute("id"));
    }

    /** Returns the ID of the container element referencing nodesElement, if any */
    private static Optional containerIdReferencing(ModelElement nodesElement) {
        var element = nodesElement.getXml();
        var services = findParentByTag("services", element);
        if (services.isEmpty()) return Optional.empty();

        var content = findParentByTag("content", element);
        if (content.isEmpty()) return Optional.empty();
        var contentClusterId = content.get().getAttribute("id");
        if (contentClusterId.isEmpty()) return Optional.empty();
        for (var rootChild : XML.getChildren(services.get())) {
            if ( ! ContainerModelBuilder.isContainerTag(rootChild)) continue;
            var nodes = XML.getChild(rootChild, "nodes");
            if (nodes == null) continue;
            if (!contentClusterId.equals(nodes.getAttribute("of"))) continue;
            return Optional.of(rootChild.getAttribute("id"));
        }
        return Optional.empty();
    }

    private static Optional findChildById(Element parent, String id) {
        for (Element child : XML.getChildren(parent))
            if (id.equals(child.getAttribute("id"))) return Optional.of(child);
        return Optional.empty();
    }

    private static Optional findParentByTag(String tag, Element element) {
        Node parent = element.getParentNode();
        if (parent == null) return Optional.empty();
        if ( ! (parent instanceof Element parentElement)) return Optional.empty();
        if (parentElement.getTagName().equals(tag)) return Optional.of(parentElement);
        return findParentByTag(tag, parentElement);
    }

    private static IllegalArgumentException clusterReferenceNotFoundException(String referenceId) {
        return new IllegalArgumentException("referenced service '" + referenceId + "' is not defined");
    }

    private static Optional dockerImageToUse(ModelElement nodesElement, Optional dockerImage) {
        String dockerImageFromElement = nodesElement.stringAttribute("docker-image");
        return dockerImageFromElement == null ? dockerImage : Optional.of(DockerImage.fromString(dockerImageFromElement));
    }

    /** Parses a value ("value") or value range ("[min-value, max-value]") */
    private static Pair toRange(String name, ModelElement element, double defaultValue, ToDoubleFunction valueParser) {
        String s = element.stringAttribute(name);
        try {
            Pair pair;
            if (s == null) return new Pair<>(defaultValue, defaultValue);
            s = s.trim();
            if (s.startsWith("[") && s.endsWith("]")) {
                String[] numbers = s.substring(1, s.length() - 1).split(",");
                if (numbers.length != 2) throw new IllegalArgumentException();
                pair = new Pair<>(valueParser.applyAsDouble(numbers[0].trim()), valueParser.applyAsDouble(numbers[1].trim()));
                if (pair.getFirst() > pair.getSecond())
                    throw new IllegalArgumentException("first value cannot be larger than second value");
            } else {
                pair = new Pair<>(valueParser.applyAsDouble(s), valueParser.applyAsDouble(s));
            }
            if (pair.getFirst() < 0 || pair.getSecond() < 0)
                throw new IllegalArgumentException("values cannot be negative");
            return pair;
        }
        catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Expected a number or range on the form [min, max] for node resource '" + name + "', but got '" + s + "'", e);
        }
    }

    @Override
    public String toString() {
        return "specification of " + (dedicated ? "dedicated " : "") +
               (min.equals(max) ? min : "min " + min + " max " + max);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy