All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.metadata.DesiredNodes Maven / Gradle / Ivy

There is a newer version: 8.13.4
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.cluster.metadata;

import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.node.Node;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;

import static java.lang.String.format;
import static org.elasticsearch.node.Node.NODE_EXTERNAL_ID_SETTING;

/**
 * 

* Desired nodes represents the cluster topology that the operator of the cluster is aiming for. * Therefore, it is possible that the desired nodes contain nodes that are not part of the * cluster in contrast to {@link DiscoveryNodes} that contains only nodes that are part of the cluster. *

* *

* This concept is useful as it provides more context about future topology changes to the system * as well as the desired set of nodes in the cluster, allowing it to make better decisions * about allocation, autoscaling, auto-expand replicas, etc. *

* *

* Additionally, settings validation is done during desired nodes updates avoiding boot-looping * when an invalid setting is provided before the node is started. *

* *

* To modify the desired nodes it is necessary to provide the entire collection of nodes that will * be part of the proposed cluster topology. *

* *

* Desired nodes are expected to be part of a lineage defined by the provided {@code historyId}. * The {@code historyId} is provided by the orchestrator taking care of managing the cluster. * In order to identify the different proposed desired nodes within the same history, it is * also expected that the orchestrator provides a monotonically increasing {@code version} * when it communicates about future topology changes. * The cluster rejects desired nodes updated with a {@code version} less than or equal * than the current {@code version} for the same {@code historyId}. *

* *

* The {@code historyId} is expected to remain stable during the cluster lifecycle, but it is * possible that the orchestrator loses its own state and needs to be restored to a * previous point in time with an older desired nodes {@code version}. In those cases it is * expected to use new {@code historyId} that would allow starting from a different version. *

* *

* Each {@link DesiredNode} part of {@link DesiredNodes} has a {@link DesiredNodeWithStatus.Status} * depending on whether or not the node has been part of the cluster at some point. *

* * The two possible statuses {@link DesiredNodeWithStatus.Status} are: *
    *
  • {@code PENDING}: The {@link DesiredNode} is not part of the cluster yet
  • *
  • {@code ACTUALIZED}: The {@link DesiredNode} is or has been part of the cluster. * Notice that it is possible that a node has {@code ACTUALIZED} status but it is not part of {@link DiscoveryNodes}, * this is a conscious decision as it is expected that nodes can leave the cluster momentarily due to network issues, * gc pressure, restarts, hardware failures etc, but are expected to still be part of the cluster. *
  • *
* *

* See {@code JoinTaskExecutor} and {@code TransportUpdateDesiredNodesAction} for more details about * desired nodes status tracking. *

* *

* Finally, each {@link DesiredNode} is expected to provide a way of identifying the node when it joins, * {@link Node#NODE_EXTERNAL_ID_SETTING} allows providing that identity through settings. *

* */ public class DesiredNodes implements Writeable, ToXContentObject, Iterable { public static final String CONTEXT_MODE_PARAM = "desired_nodes_x_content_context"; public static final String CONTEXT_MODE_API = SerializationContext.GET_DESIRED_NODES_API.toString(); public static final String CONTEXT_MODE_CLUSTER_STATE = SerializationContext.CLUSTER_STATE.toString(); public enum SerializationContext { GET_DESIRED_NODES_API, CLUSTER_STATE } private static final ParseField HISTORY_ID_FIELD = new ParseField("history_id"); private static final ParseField VERSION_FIELD = new ParseField("version"); private static final ParseField NODES_FIELD = new ParseField("nodes"); @SuppressWarnings("unchecked") public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "desired_nodes", false, (args, unused) -> create((String) args[0], (long) args[1], (List) args[2]) ); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), HISTORY_ID_FIELD); PARSER.declareLong(ConstructingObjectParser.constructorArg(), VERSION_FIELD); PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), (p, c) -> DesiredNodeWithStatus.fromXContent(p), NODES_FIELD); } private final String historyID; private final long version; private final Map nodes; private final Set actualized; private final Set pending; private DesiredNodes(String historyID, long version, Map nodes) { assert historyID != null && historyID.isBlank() == false; assert version != Long.MIN_VALUE; this.historyID = historyID; this.version = version; this.nodes = Collections.unmodifiableMap(nodes); this.actualized = nodes.values() .stream() .filter(DesiredNodeWithStatus::actualized) .map(DesiredNodeWithStatus::desiredNode) .collect(Collectors.toUnmodifiableSet()); this.pending = nodes.values() .stream() .filter(DesiredNodeWithStatus::pending) .map(DesiredNodeWithStatus::desiredNode) .collect(Collectors.toUnmodifiableSet()); } public static DesiredNodes readFrom(StreamInput in) throws IOException { final var historyId = in.readString(); final var version = in.readLong(); final var nodesWithStatus = in.readList(DesiredNodeWithStatus::readFrom); return create(historyId, version, nodesWithStatus); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(historyID); out.writeLong(version); out.writeCollection(nodes.values()); } static DesiredNodes fromXContent(XContentParser parser) throws IOException { return PARSER.parse(parser, null); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.field(HISTORY_ID_FIELD.getPreferredName(), historyID); builder.field(VERSION_FIELD.getPreferredName(), version); builder.xContentList(NODES_FIELD.getPreferredName(), nodes.values(), params); builder.endObject(); return builder; } public static DesiredNodes createIncludingStatusFromPreviousVersion( String historyId, long version, List nodes, @Nullable DesiredNodes previousDesiredNodes ) { if (previousDesiredNodes == null || previousDesiredNodes.historyID.equals(historyId) == false) { return create( historyId, version, nodes.stream().map(desiredNode -> new DesiredNodeWithStatus(desiredNode, DesiredNodeWithStatus.Status.PENDING)).toList() ); } return create(historyId, version, previousDesiredNodes.transferStatusInformation(nodes)); } public static DesiredNodes create(String historyID, long version, List nodes) { checkForDuplicatedExternalIDs(nodes); return new DesiredNodes(historyID, version, toMap(nodes)); } @Nullable public static DesiredNodes latestFromClusterState(ClusterState clusterState) { return DesiredNodesMetadata.fromClusterState(clusterState).getLatestDesiredNodes(); } public boolean isSupersededBy(DesiredNodes otherDesiredNodes) { return historyID.equals(otherDesiredNodes.historyID) == false || version < otherDesiredNodes.version; } public boolean hasSameVersion(DesiredNodes other) { return historyID.equals(other.historyID) && version == other.version; } public boolean hasSameHistoryId(DesiredNodes other) { return other != null && historyID.equals(other.historyID); } private static void checkForDuplicatedExternalIDs(List nodes) { Set nodeIDs = Sets.newHashSetWithExpectedSize(nodes.size()); Set duplicatedIDs = new HashSet<>(); for (DesiredNodeWithStatus node : nodes) { String externalID = node.desiredNode().externalId(); assert externalID != null; if (nodeIDs.add(externalID) == false) { duplicatedIDs.add(externalID); } } if (duplicatedIDs.isEmpty() == false) { throw new IllegalArgumentException( format( Locale.ROOT, "Some nodes contain the same setting value %s for [%s]", duplicatedIDs, NODE_EXTERNAL_ID_SETTING.getKey() ) ); } } public boolean equalsWithProcessorsCloseTo(DesiredNodes that) { return that != null && version == that.version && Objects.equals(historyID, that.historyID) && equalsNodesWithProcessorsCloseTo(that); } public boolean equalsNodesWithProcessorsCloseTo(DesiredNodes that) { if (that == null || nodes.size() != that.nodes.size()) { return false; } for (Map.Entry desiredNodeEntry : nodes.entrySet()) { final DesiredNodeWithStatus desiredNodeWithStatus = desiredNodeEntry.getValue(); final DesiredNodeWithStatus otherDesiredNodeWithStatus = that.nodes.get(desiredNodeEntry.getKey()); if (desiredNodeWithStatus.equalsWithProcessorsCloseTo(otherDesiredNodeWithStatus) == false) { return false; } } return true; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; DesiredNodes that = (DesiredNodes) o; return version == that.version && Objects.equals(historyID, that.historyID) && Objects.equals(nodes, that.nodes); } @Override public int hashCode() { return Objects.hash(historyID, version, nodes); } @Override public String toString() { return "DesiredNodes{" + "historyID='" + historyID + '\'' + ", version=" + version + ", nodes=" + nodes + '}'; } public String historyID() { return historyID; } public long version() { return version; } public Set nodes() { return Set.copyOf(nodes.values()); } public Set actualized() { return actualized; } public Set pending() { return pending; } @Override public Iterator iterator() { return nodes.values().iterator(); } @Nullable public DesiredNodeWithStatus find(String externalId) { return nodes.get(externalId); } private static Map toMap(final List desiredNodes) { return Collections.unmodifiableMap( desiredNodes.stream().collect(Collectors.toMap(DesiredNodeWithStatus::externalId, Function.identity(), (left, right) -> { assert left.desiredNode().externalId().equals(right.externalId()) == false; throw new IllegalStateException("duplicate desired node external id [" + left.externalId() + "]"); }, TreeMap::new)) ); } private List transferStatusInformation(List proposedDesiredNodes) { List desiredNodesWithStatus = new ArrayList<>(proposedDesiredNodes.size()); for (final var desiredNode : proposedDesiredNodes) { final var desiredNodeWithStatus = nodes.get(desiredNode.externalId()); if (desiredNodeWithStatus != null) { desiredNodesWithStatus.add(new DesiredNodeWithStatus(desiredNode, desiredNodeWithStatus.status())); } else { desiredNodesWithStatus.add(new DesiredNodeWithStatus(desiredNode, DesiredNodeWithStatus.Status.PENDING)); } } return Collections.unmodifiableList(desiredNodesWithStatus); } public static ClusterState updateDesiredNodesStatusIfNeeded(ClusterState clusterState) { final var desiredNodes = latestFromClusterState(clusterState); final var updatedDesiredNodes = updateDesiredNodesStatusIfNeeded(clusterState.nodes(), desiredNodes); return desiredNodes == updatedDesiredNodes ? clusterState : clusterState.copyAndUpdateMetadata( metadata -> metadata.putCustom(DesiredNodesMetadata.TYPE, new DesiredNodesMetadata(updatedDesiredNodes)) ); } public static DesiredNodes updateDesiredNodesStatusIfNeeded(DiscoveryNodes discoveryNodes, DesiredNodes desiredNodes) { if (desiredNodes == null) { return null; } Map desiredNodesWithUpdatedStatus = null; for (DiscoveryNode discoveryNode : discoveryNodes) { final var desiredNode = desiredNodes.find(discoveryNode.getExternalId()); if (desiredNode != null && desiredNode.pending()) { if (desiredNodesWithUpdatedStatus == null) { desiredNodesWithUpdatedStatus = new HashMap<>(desiredNodes.nodes); } desiredNodesWithUpdatedStatus.put( desiredNode.externalId(), new DesiredNodeWithStatus(desiredNode.desiredNode(), DesiredNodeWithStatus.Status.ACTUALIZED) ); } } return desiredNodesWithUpdatedStatus == null ? desiredNodes : new DesiredNodes(desiredNodes.historyID(), desiredNodes.version(), desiredNodesWithUpdatedStatus); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy