com.yahoo.vespa.model.content.StorageGroup Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.content;
import com.yahoo.config.model.ConfigModelContext;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.vespa.config.content.StorDistributionConfig;
import com.yahoo.vespa.model.HostResource;
import com.yahoo.vespa.model.HostSystem;
import com.yahoo.vespa.model.builder.xml.dom.ModelElement;
import com.yahoo.vespa.model.builder.xml.dom.NodesSpecification;
import com.yahoo.vespa.model.builder.xml.dom.VespaDomBuilder;
import com.yahoo.vespa.model.container.Container;
import com.yahoo.vespa.model.content.cluster.ContentCluster;
import com.yahoo.vespa.model.content.cluster.RedundancyBuilder;
import com.yahoo.vespa.model.content.engines.PersistenceEngine;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
/**
* A group of storage nodes/distributors.
*
* @author unknown, probably thomasg
* @author bratseth has done things here recently
*/
public class StorageGroup {
private final boolean useCpuSocketAffinity;
private final String index;
private Optional partitions;
String name;
private final boolean isHosted;
private final Optional mmapNoCoreLimit;
private final Optional coreOnOOM;
private final Optional noVespaMalloc;
private final Optional vespaMalloc;
private final Optional vespaMallocDebug;
private final Optional vespaMallocDebugStackTrace;
private final List subgroups = new ArrayList<>();
private final List nodes = new ArrayList<>();
/**
* Creates a storage group
*
* @param isHosted true if this is in a hosted setup
* @param name the name of this group
* @param index the distribution-key index of this group
* @param partitions the distribution strategy to use to distribute content to subgroups or empty
* (meaning that the "*" distribution will be used) only if this is a leaf group
* (having nodes, not subgroups as children).
* @param useCpuSocketAffinity whether processes should be started with socket affinity
*/
private StorageGroup(boolean isHosted, String name, String index, Optional partitions,
boolean useCpuSocketAffinity, Optional mmapNoCoreLimit, Optional coreOnOOM,
Optional noVespaMalloc, Optional vespaMalloc,
Optional vespaMallocDebug, Optional vespaMallocDebugStackTrace)
{
this.isHosted = isHosted;
this.index = index;
this.name = name;
this.partitions = partitions;
this.useCpuSocketAffinity = useCpuSocketAffinity;
this.mmapNoCoreLimit = mmapNoCoreLimit;
this.coreOnOOM = coreOnOOM;
this.noVespaMalloc = noVespaMalloc;
this.vespaMalloc = vespaMalloc;
this.vespaMallocDebug = vespaMallocDebug;
this.vespaMallocDebugStackTrace = vespaMallocDebugStackTrace;
}
private StorageGroup(boolean isHosted, String name, String index) {
this(isHosted, name, index, Optional.empty(), false, Optional.empty(),Optional.empty(), Optional.empty(),
Optional.empty(), Optional.empty(), Optional.empty());
}
/** Returns the name of this group, or null if it is the root group */
public String getName() { return name; }
/** Returns the subgroups of this, or an empty list if it is a leaf group */
public List getSubgroups() { return subgroups; }
/** Returns the nodes of this, or an empty list of it is not a leaf group */
public List getNodes() { return nodes; }
public boolean isHosted() { return isHosted; }
/** Returns the index of this group, or null if it is the root group */
public String getIndex() { return index; }
public Optional getPartitions() { return partitions; }
public boolean useCpuSocketAffinity() { return useCpuSocketAffinity; }
public Optional getMmapNoCoreLimit() { return mmapNoCoreLimit; }
public Optional getCoreOnOOM() { return coreOnOOM; }
public Optional getNoVespaMalloc() { return noVespaMalloc; }
public Optional getVespaMalloc() { return vespaMalloc; }
public Optional getVespaMallocDebug() { return vespaMallocDebug; }
public Optional getVespaMallocDebugStackTrace() { return vespaMallocDebugStackTrace; }
/** Returns all the nodes below this group */
public List recursiveGetNodes() {
if ( ! nodes.isEmpty()) return nodes;
List nodes = new ArrayList<>();
for (StorageGroup subgroup : subgroups)
nodes.addAll(subgroup.recursiveGetNodes());
return nodes;
}
public Collection getGroupStructureConfig() {
List groups = new ArrayList<>();
StorDistributionConfig.Group.Builder myGroup = new StorDistributionConfig.Group.Builder();
getConfig(myGroup);
groups.add(myGroup);
for (StorageGroup g : subgroups) {
groups.addAll(g.getGroupStructureConfig());
}
return groups;
}
public void getConfig(StorDistributionConfig.Group.Builder builder) {
builder.index(index == null ? "invalid" : index);
builder.name(name == null ? "invalid" : name);
partitions.ifPresent(builder::partitions);
for (StorageNode node : nodes) {
StorDistributionConfig.Group.Nodes.Builder nb = new StorDistributionConfig.Group.Nodes.Builder();
nb.index(node.getDistributionKey());
nb.retired(node.isRetired());
builder.nodes.add(nb);
}
builder.capacity(getCapacity());
}
public int getNumberOfLeafGroups() {
if (subgroups.isEmpty()) return 1;
int count = 0;
for (StorageGroup g : subgroups)
count += g.getNumberOfLeafGroups();
return count;
}
public double getCapacity() {
double capacity = 0;
for (StorageNode node : nodes) {
capacity += node.getCapacity();
}
for (StorageGroup group : subgroups) {
capacity += group.getCapacity();
}
return capacity;
}
/** Returns the total number of nodes below this group */
public int countNodes(boolean includeRetired) {
int nodeCount = (int)nodes.stream().filter(node -> includeRetired || ! node.isRetired()).count();
for (StorageGroup group : subgroups)
nodeCount += group.countNodes(includeRetired);
return nodeCount;
}
@Override
public boolean equals(Object o) {
if (o instanceof StorageGroup other) {
return this.index.equals(other.index) &&
this.name.equals(other.name) &&
this.partitions.equals(other.partitions);
}
return false;
}
@Override public int hashCode() {
return java.util.Objects.hash(index, name, partitions);
}
public static Map provisionHosts(NodesSpecification nodesSpecification,
String clusterIdString,
HostSystem hostSystem,
ConfigModelContext context) {
ClusterSpec.Id clusterId = ClusterSpec.Id.from(clusterIdString);
return nodesSpecification.provision(hostSystem,
ClusterSpec.Type.content,
clusterId,
context.getDeployLogger(),
true,
context.clusterInfo().build());
}
public static class Builder {
private final ModelElement clusterElement;
private final ConfigModelContext context;
public Builder(ModelElement clusterElement, ConfigModelContext context) {
this.clusterElement = clusterElement;
this.context = context;
}
public StorageGroup buildRootGroup(DeployState deployState, ContentCluster owner, Boolean isStreaming) {
try {
if (owner.isHosted())
validateRedundancyAndGroups(deployState.zone().environment());
Optional group = Optional.ofNullable(clusterElement.child("group"));
Optional nodes = getNodes(clusterElement);
if (group.isPresent() && nodes.isPresent())
throw new IllegalArgumentException("Both and is specified: Only one of these tags can be used in the same configuration");
if (group.isPresent() && (group.get().integerAttribute("distribution-key") != null)) {
deployState.getDeployLogger().logApplicationPackage(Level.INFO, "'distribution-key' attribute on a content cluster's root group is ignored");
}
GroupBuilder groupBuilder = collectGroup(owner.isHosted(), group, nodes, null, null);
StorageGroup storageGroup = owner.isHosted()
? groupBuilder.buildHosted(deployState, owner, Optional.empty(), context)
: groupBuilder.buildNonHosted(deployState, owner, Optional.empty());
RedundancyBuilder redundancyBuilder = new RedundancyBuilder(clusterElement);
Redundancy redundancy = redundancyBuilder.build(owner.isHosted(), isStreaming, storageGroup.subgroups.size(),
storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes(false));
owner.setRedundancy(redundancy);
if (storageGroup.partitions.isEmpty() && (redundancy.groups() > 1)) {
storageGroup.partitions = Optional.of(computePartitions(redundancy.finalRedundancy(), redundancy.groups()));
}
return storageGroup;
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("In " + owner, e);
}
}
private void validateRedundancyAndGroups(Environment environment) {
var redundancyElement = clusterElement.child("redundancy");
if (redundancyElement == null) return;
long redundancy = redundancyElement.asLong();
var nodesElement = clusterElement.child("nodes");
if (nodesElement == null) return;
var nodesSpec = NodesSpecification.from(nodesElement, context);
// Allow dev deployment of self-hosted app (w/o count attribute): absent count => 1 node
if (!nodesSpec.hasCountAttribute() && environment == Environment.dev) return;
int minNodesPerGroup = (int) Math.ceil((double) nodesSpec.minResources().nodes() / nodesSpec.minResources().groups());
if (minNodesPerGroup < redundancy) {
throw new IllegalArgumentException("This cluster specifies redundancy " + redundancy +
", but this cannot be higher than " +
"the minimum nodes per group, which is " + minNodesPerGroup);
}
}
/** This returns a partition string which specifies equal distribution between all groups */
// TODO: Make a partitions object
static private String computePartitions(int redundancyPerGroup, int numGroups) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < numGroups - 1; ++i) {
sb.append(redundancyPerGroup);
sb.append("|");
}
sb.append("*");
return sb.toString();
}
/**
* Represents a storage group and can build storage nodes in both hosted and non-hosted environments.
*/
private static class GroupBuilder {
private final StorageGroup storageGroup;
/* The explicitly defined subgroups of this */
private final List subGroups;
private final List nodeBuilders;
/** The nodes explicitly specified as a nodes tag in this group, or empty if none */
private final Optional nodeRequirement;
private GroupBuilder(StorageGroup storageGroup, List subGroups, List nodeBuilders,
Optional nodeRequirement) {
this.storageGroup = storageGroup;
this.subGroups = subGroups;
this.nodeBuilders = nodeBuilders;
this.nodeRequirement = nodeRequirement;
}
/**
* Builds a storage group for a nonhosted environment
*
* @param owner the cluster owning this
* @param parent the parent storage group, or empty if this is the root group
* @return the storage group build by this
*/
public StorageGroup buildNonHosted(DeployState deployState, ContentCluster owner, Optional parent) {
for (GroupBuilder subGroup : subGroups) {
storageGroup.subgroups.add(subGroup.buildNonHosted(deployState, owner, Optional.of(this)));
}
for (XmlNodeBuilder nodeBuilder : nodeBuilders) {
storageGroup.nodes.add(nodeBuilder.build(deployState, owner, storageGroup));
}
if (parent.isEmpty() && subGroups.isEmpty() && nodeBuilders.isEmpty()) { // no nodes or groups: create single node
storageGroup.nodes.add(buildSingleNode(deployState, owner));
}
return storageGroup;
}
private StorageNode buildSingleNode(DeployState deployState, ContentCluster parent) {
int distributionKey = 0;
StorageNode searchNode = new StorageNode(deployState.getProperties(), parent.getStorageCluster(), 1.0, distributionKey , false);
searchNode.setHostResource(parent.hostSystem().getHost(Container.SINGLENODE_CONTAINER_SERVICESPEC));
PersistenceEngine provider = parent.getPersistence().create(deployState, searchNode, storageGroup, null);
searchNode.initService(deployState);
Distributor distributor = new Distributor(deployState.getProperties(), parent.getDistributorNodes(), distributionKey, null, provider);
distributor.setHostResource(searchNode.getHostResource());
distributor.initService(deployState);
return searchNode;
}
/**
* Builds a storage group for a hosted environment
*
* @param owner the cluster owning this
* @param parent the parent storage group, or empty if this is the root group
* @return the storage group build by this
*/
public StorageGroup buildHosted(DeployState deployState,
ContentCluster owner,
Optional parent,
ConfigModelContext context) {
if (storageGroup.getIndex() != null)
throw new IllegalArgumentException("Specifying individual groups is not supported on hosted applications");
Map hostMapping =
nodeRequirement.isPresent() ?
provisionHosts(nodeRequirement.get(),
owner.getStorageCluster().getClusterName(),
owner.getRoot().hostSystem(),
context) :
Map.of();
Map, Map> hostGroups = collectAllocatedSubgroups(hostMapping);
if (hostGroups.size() > 1) {
if (parent.isPresent())
throw new IllegalArgumentException("Cannot specify groups using the groups attribute in nested content groups");
// create subgroups as returned from allocation
for (Map.Entry, Map> hostGroup : hostGroups.entrySet()) {
String groupIndex = String.valueOf(hostGroup.getKey().get().index());
StorageGroup subgroup = new StorageGroup(true, groupIndex, groupIndex);
for (Map.Entry host : hostGroup.getValue().entrySet()) {
subgroup.nodes.add(createStorageNode(deployState, owner, host.getKey(), subgroup, host.getValue()));
}
storageGroup.subgroups.add(subgroup);
}
}
else { // or otherwise just create the nodes directly on this group, or the explicitly enumerated subgroups
for (Map.Entry host : hostMapping.entrySet()) {
storageGroup.nodes.add(createStorageNode(deployState, owner, host.getKey(), storageGroup, host.getValue()));
}
for (GroupBuilder subGroup : subGroups) {
storageGroup.subgroups.add(subGroup.buildHosted(deployState, owner, Optional.of(this), context));
}
}
return storageGroup;
}
/** Collect hosts per group */
private Map, Map> collectAllocatedSubgroups(Map hostMapping) {
Map, Map> hostsPerGroup = new LinkedHashMap<>();
for (Map.Entry entry : hostMapping.entrySet()) {
Optional group = entry.getValue().cluster().group();
Map hostsInGroup = hostsPerGroup.computeIfAbsent(group, k -> new LinkedHashMap<>());
hostsInGroup.put(entry.getKey(), entry.getValue());
}
return hostsPerGroup;
}
}
private record XmlNodeBuilder(ModelElement clusterElement, ModelElement element) {
public StorageNode build(DeployState deployState, ContentCluster parent, StorageGroup storageGroup) {
StorageNode sNode = new StorageNode.Builder().build(deployState, parent.getStorageCluster(), element.getXml());
PersistenceEngine provider = parent.getPersistence().create(deployState, sNode, storageGroup, element);
new Distributor.Builder(clusterElement, provider).build(deployState, parent.getDistributorNodes(), element.getXml());
return sNode;
}
}
/**
* Creates a content group builder from a group and/or nodes element.
* These are the possibilities:
*
* - group and nodes is present: This is a leaf group specifying a set of nodes
* - only group is present: This is a nonleaf group
* - only nodes is present: This is the implicitly specified toplevel leaf group, or a set of groups
* specified using a group count attribute.
*
- Neither element is present: Create a single node.
*
*
* Note: DO NOT change allocation behaviour to allow version X and Y of the config-model to allocate a different
* set of nodes. Such changes must be guarded by a common condition (e.g. feature flag) so the behaviour can be
* changed simultaneously for all active config models.
*/
private GroupBuilder collectGroup(boolean isHosted, Optional groupElement, Optional nodesElement, String name, String index) {
StorageGroup group = new StorageGroup(
isHosted, name, index,
childAsString(groupElement, "distribution.partitions"),
booleanAttributeOr(groupElement, VespaDomBuilder.CPU_SOCKET_AFFINITY_ATTRIB_NAME, false),
childAsLong(groupElement, VespaDomBuilder.MMAP_NOCORE_LIMIT),
childAsBoolean(groupElement, VespaDomBuilder.CORE_ON_OOM),
childAsString(groupElement, VespaDomBuilder.NO_VESPAMALLOC),
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC),
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG),
childAsString(groupElement, VespaDomBuilder.VESPAMALLOC_DEBUG_STACKTRACE));
List subGroups = groupElement.map(modelElement -> collectSubGroups(isHosted, group, modelElement)).orElseGet(List::of);
List explicitNodes = new ArrayList<>();
explicitNodes.addAll(collectExplicitNodes(groupElement));
explicitNodes.addAll(collectExplicitNodes(nodesElement));
if (!subGroups.isEmpty() && nodesElement.isPresent())
throw new IllegalArgumentException("A group can contain either explicit subgroups or a nodes specification, but not both.");
Optional nodeRequirement;
if (nodesElement.isPresent() && nodesElement.get().stringAttribute("count") != null ) // request these nodes
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
else if (nodesElement.isPresent() && context.getDeployState().isHosted() && context.getDeployState().zone().environment().isManuallyDeployed() ) // default to 1 node
nodeRequirement = Optional.of(NodesSpecification.from(nodesElement.get(), context));
else if (nodesElement.isEmpty() && subGroups.isEmpty() && context.getDeployState().isHosted()) // request one node
nodeRequirement = Optional.of(NodesSpecification.nonDedicated(1, context));
else if (nodesElement.isPresent() && nodesElement.get().stringAttribute("count") == null && context.getDeployState().isHosted())
throw new IllegalArgumentException("""
Clusters in hosted environments must have a tag
matching all zones, and having no subtags,
see https://cloud.vespa.ai/en/reference/services""");
else // Nodes or groups explicitly listed - resolve in GroupBuilder
nodeRequirement = Optional.empty();
return new GroupBuilder(group, subGroups, explicitNodes, nodeRequirement);
}
private Optional childAsString(Optional element, String childTagName) {
if (element.isEmpty()) return Optional.empty();
return Optional.ofNullable(element.get().childAsString(childTagName));
}
private Optional childAsLong(Optional element, String childTagName) {
return element.map(modelElement -> modelElement.childAsLong(childTagName));
}
private Optional childAsBoolean(Optional element, String childTagName) {
return element.map(modelElement -> modelElement.childAsBoolean(childTagName));
}
private boolean booleanAttributeOr(Optional element, String attributeName, boolean defaultValue) {
return element.map(modelElement -> modelElement.booleanAttribute(attributeName, defaultValue)).orElse(defaultValue);
}
private Optional getNodes(ModelElement groupOrNodesElement) {
if (groupOrNodesElement.getXml().getTagName().equals("nodes")) return Optional.of(groupOrNodesElement);
return Optional.ofNullable(groupOrNodesElement.child("nodes"));
}
private List collectExplicitNodes(Optional groupOrNodesElement) {
if (groupOrNodesElement.isEmpty()) return List.of();
List nodes = new ArrayList<>();
for (ModelElement n : groupOrNodesElement.get().subElements("node"))
nodes.add(new XmlNodeBuilder(clusterElement, n));
return nodes;
}
private List collectSubGroups(boolean isHosted, StorageGroup parentGroup, ModelElement parentGroupElement) {
List subGroupElements = parentGroupElement.subElements("group");
if (subGroupElements.size() > 1 && parentGroup.getPartitions().isEmpty())
throw new IllegalArgumentException("'distribution' attribute is required with multiple subgroups");
List subGroups = new ArrayList<>();
String indexPrefix = "";
if (parentGroup.index != null) {
indexPrefix = parentGroup.index + ".";
}
for (ModelElement g : subGroupElements) {
subGroups.add(collectGroup(isHosted, Optional.of(g), Optional.ofNullable(g.child("nodes")), g.stringAttribute("name"),
indexPrefix + g.integerAttribute("distribution-key")));
}
return subGroups;
}
private static StorageNode createStorageNode(DeployState deployState, ContentCluster parent, HostResource hostResource, StorageGroup parentGroup, ClusterMembership clusterMembership) {
StorageNode sNode = new StorageNode(deployState.getProperties(), parent.getStorageCluster(), null, clusterMembership.index(), clusterMembership.retired());
sNode.setHostResource(hostResource);
sNode.initService(deployState);
// TODO: Supplying null as XML is not very nice
PersistenceEngine provider = parent.getPersistence().create(deployState, sNode, parentGroup, null);
Distributor d = new Distributor(deployState.getProperties(), parent.getDistributorNodes(), clusterMembership.index(), null, provider);
d.setHostResource(sNode.getHostResource());
d.initService(deployState);
return sNode;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy