All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.ClusterState Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.cluster;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.coordination.ClusterStatePublisher;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion;
import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfiguration;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.service.ClusterApplierService;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.cluster.service.MasterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.VersionedNamedWriteable;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ChunkedToXContent;
import org.elasticsearch.common.xcontent.ChunkedToXContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContent;

import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.function.Consumer;
import java.util.function.Function;

import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;

/**
 * Represents the state of the cluster, held in memory on all nodes in the cluster with updates coordinated by the elected master.
 * 

* Conceptually immutable, but in practice it has a few components like {@link RoutingNodes} which are pure functions of the immutable state * but are expensive to compute so they are built on-demand if needed. *

* The {@link Metadata} portion is written to disk on each update so it persists across full-cluster restarts. The rest of this data is * maintained only in-memory and resets back to its initial state on a full-cluster restart, but it is held on all nodes so it persists * across master elections (and therefore is preserved in a rolling restart). *

* Updates are triggered by submitting tasks to the {@link MasterService} on the elected master, typically using a {@link * TransportMasterNodeAction} to route a request to the master on which the task is submitted via a queue obtained with {@link * ClusterService#createTaskQueue}, which has an associated priority. Submitted tasks have an associated * timeout. Tasks are processed in priority order, so a flood of higher-priority tasks can starve lower-priority ones from running. * Therefore, avoid priorities other than {@link Priority#NORMAL} where possible. Tasks associated with client actions should typically have * a timeout, or otherwise be sensitive to client cancellations, to avoid surprises caused by the execution of stale tasks long after they * are submitted (since clients themselves tend to time out). In contrast, internal tasks can reasonably have an infinite timeout, * especially if a timeout would simply trigger a retry. *

* Tasks that share the same {@link ClusterStateTaskExecutor} instance are processed as a batch. Each batch of tasks yields a new {@link * ClusterState} which is published to the cluster by {@link ClusterStatePublisher#publish}. Publication usually works by sending a diff, * computed via the {@link Diffable} interface, rather than the full state, although it will fall back to sending the full state if the * receiving node is new or it has missed out on an intermediate state for some reason. States and diffs are published using the transport * protocol, i.e. the {@link Writeable} interface and friends. *

* When committed, the new state is applied which exposes it to the node via {@link ClusterStateApplier} and {@link * ClusterStateListener} callbacks registered with the {@link ClusterApplierService}. The new state is also made available via {@link * ClusterService#state()}. The appliers are notified (in no particular order) before {@link ClusterService#state()} is updated, and the * listeners are notified (in no particular order) afterwards. Cluster state updates run in sequence, one-by-one, so they can be a * performance bottleneck. See the JavaDocs on the linked classes and methods for more details. *

* Cluster state updates can be used to trigger various actions via a {@link ClusterStateListener} rather than using a timer. *

* Implements {@link ChunkedToXContent} to be exposed in REST APIs (e.g. {@code GET _cluster/state} and {@code POST _cluster/reroute}) and * to be indexed by monitoring, mostly just for diagnostics purposes. The {@link XContent} representation does not need to be 100% faithful * since we never reconstruct a cluster state from its XContent representation, but the more faithful it is the more useful it is for * diagnostics. Note that the {@link XContent} representation of the {@link Metadata} portion does have to be faithful (in {@link * Metadata.XContentContext#GATEWAY} context) since this is how it persists across full cluster restarts. *

* Security-sensitive data such as passwords or private keys should not be stored in the cluster state, since the contents of the cluster * state are exposed in various APIs. */ public class ClusterState implements ChunkedToXContent, Diffable { public static final ClusterState EMPTY_STATE = builder(ClusterName.DEFAULT).build(); public interface Custom extends NamedDiffable, ChunkedToXContent { /** * Returns true iff this {@link Custom} is private to the cluster and should never be sent to a client. * The default is false; */ default boolean isPrivate() { return false; } /** * Serialize this {@link Custom} for diagnostic purposes, exposed by the

GET _cluster/state
API etc. The XContent * representation does not need to be 100% faithful since we never reconstruct a cluster state from its XContent representation, but * the more faithful it is the more useful it is for diagnostics. */ @Override Iterator toXContentChunked(ToXContent.Params params); } private static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class); private static final DiffableUtils.ValueSerializer TRANSPORT_VERSION_VALUE_SERIALIZER = new DiffableUtils.NonDiffableValueSerializer<>() { @Override public void write(TransportVersion value, StreamOutput out) throws IOException { TransportVersion.writeVersion(value, out); } @Override public TransportVersion read(StreamInput in, String key) throws IOException { return TransportVersion.readVersion(in); } }; public static final String UNKNOWN_UUID = "_na_"; public static final long UNKNOWN_VERSION = -1; /** * Monotonically increasing on (and therefore uniquely identifies) committed states. However sometimes a state is created/applied * without committing it, for instance to add a {@link NoMasterBlockService#getNoMasterBlock}. */ private final long version; /** * Uniquely identifies this state, even if the state is not committed. */ private final String stateUUID; /** * Describes the location (and state) of all shards, used for routing actions such as searches to the relevant shards. */ private final RoutingTable routingTable; private final DiscoveryNodes nodes; private final Map transportVersions; private final TransportVersion minTransportVersion; private final Metadata metadata; private final ClusterBlocks blocks; private final Map customs; private final ClusterName clusterName; private final boolean wasReadFromDiff; // built on demand private volatile RoutingNodes routingNodes; public ClusterState(long version, String stateUUID, ClusterState state) { this( state.clusterName, version, stateUUID, state.metadata(), state.routingTable(), state.nodes(), state.transportVersions(), state.blocks(), state.customs(), false, state.routingNodes ); } public ClusterState( ClusterName clusterName, long version, String stateUUID, Metadata metadata, RoutingTable routingTable, DiscoveryNodes nodes, Map transportVersions, ClusterBlocks blocks, Map customs, boolean wasReadFromDiff, @Nullable RoutingNodes routingNodes ) { this.version = version; this.stateUUID = stateUUID; this.clusterName = clusterName; this.metadata = metadata; this.routingTable = routingTable; this.nodes = nodes; this.transportVersions = Map.copyOf(transportVersions); this.blocks = blocks; this.customs = customs; this.wasReadFromDiff = wasReadFromDiff; this.routingNodes = routingNodes; assert assertConsistentRoutingNodes(routingTable, nodes, routingNodes); this.minTransportVersion = transportVersions.values().stream().min(Comparator.naturalOrder()).orElse(TransportVersion.current()); } private static boolean assertConsistentRoutingNodes( RoutingTable routingTable, DiscoveryNodes nodes, @Nullable RoutingNodes routingNodes ) { if (routingNodes == null) { return true; } final RoutingNodes expected = RoutingNodes.immutable(routingTable, nodes); assert routingNodes.equals(expected) : "RoutingNodes [" + routingNodes + "] are not consistent with this cluster state [" + expected + "]"; return true; } public long term() { return coordinationMetadata().term(); } public long version() { return this.version; } public long getVersion() { return version(); } /** * This stateUUID is automatically generated for for each version of cluster state. It is used to make sure that * we are applying diffs to the right previous state. */ public String stateUUID() { return this.stateUUID; } public DiscoveryNodes nodes() { return this.nodes; } public DiscoveryNodes getNodes() { return nodes(); } /** * Returns the set of nodes that should be exposed to things like REST handlers that behave differently depending on the nodes in the * cluster and their versions. Specifically, if the cluster has properly formed then this is the nodes in the last-applied cluster * state, but if the cluster has not properly formed then no nodes are returned. * * @return the nodes in the cluster if the cluster has properly formed, otherwise an empty set of nodes. */ public DiscoveryNodes nodesIfRecovered() { return blocks.hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) ? DiscoveryNodes.EMPTY_NODES : nodes; } public Map transportVersions() { return this.transportVersions; } public TransportVersion getMinTransportVersion() { return this.minTransportVersion; } public Metadata metadata() { return this.metadata; } public Metadata getMetadata() { return metadata(); } public CoordinationMetadata coordinationMetadata() { return metadata.coordinationMetadata(); } public RoutingTable routingTable() { return routingTable; } public RoutingTable getRoutingTable() { return routingTable(); } public ClusterBlocks blocks() { return this.blocks; } public ClusterBlocks getBlocks() { return blocks; } public Map customs() { return this.customs; } public Map getCustoms() { return this.customs; } @SuppressWarnings("unchecked") public T custom(String type) { return (T) customs.get(type); } @SuppressWarnings("unchecked") public T custom(String type, T defaultValue) { return (T) customs.getOrDefault(type, defaultValue); } public ClusterName getClusterName() { return this.clusterName; } public VotingConfiguration getLastAcceptedConfiguration() { return coordinationMetadata().getLastAcceptedConfiguration(); } public VotingConfiguration getLastCommittedConfiguration() { return coordinationMetadata().getLastCommittedConfiguration(); } public Set getVotingConfigExclusions() { return coordinationMetadata().getVotingConfigExclusions(); } /** * Returns a built (on demand) routing nodes view of the routing table. */ public RoutingNodes getRoutingNodes() { RoutingNodes r = routingNodes; if (r != null) { return r; } r = buildRoutingNodes(); return r; } private synchronized RoutingNodes buildRoutingNodes() { RoutingNodes r = routingNodes; if (r != null) { return r; } r = RoutingNodes.immutable(routingTable, nodes); routingNodes = r; return r; } /** * Returns a fresh mutable copy of the routing nodes view. */ public RoutingNodes mutableRoutingNodes() { final RoutingNodes nodes = this.routingNodes; // use the cheaper copy constructor if we already computed the routing nodes for this state. if (nodes != null) { return nodes.mutableCopy(); } // we don't have any routing nodes for this state, likely because it's a temporary state in the reroute logic, don't compute an // immutable copy that will never be used and instead directly build a mutable copy return RoutingNodes.mutable(routingTable, this.nodes); } /** * Initialize data structures that lazy computed for this instance in the background by using the giving executor. * @param executor executor to run initialization tasks on */ public void initializeAsync(Executor executor) { if (routingNodes == null) { executor.execute(new Runnable() { @Override public void run() { getRoutingNodes(); } @Override public String toString() { return "async initialization of routing nodes for cluster state " + version(); } }); } if (metadata.indicesLookupInitialized() == false) { executor.execute(new Runnable() { @Override public void run() { metadata.getIndicesLookup(); } @Override public String toString() { return "async initialization of indices lookup for cluster state " + version(); } }); } } @Override public String toString() { StringBuilder sb = new StringBuilder(); final String TAB = " "; sb.append("cluster uuid: ") .append(metadata.clusterUUID()) .append(" [committed: ") .append(metadata.clusterUUIDCommitted()) .append("]") .append("\n"); sb.append("version: ").append(version).append("\n"); sb.append("state uuid: ").append(stateUUID).append("\n"); sb.append("from_diff: ").append(wasReadFromDiff).append("\n"); sb.append("meta data version: ").append(metadata.version()).append("\n"); sb.append(TAB).append("coordination_metadata:\n"); sb.append(TAB).append(TAB).append("term: ").append(coordinationMetadata().term()).append("\n"); sb.append(TAB) .append(TAB) .append("last_committed_config: ") .append(coordinationMetadata().getLastCommittedConfiguration()) .append("\n"); sb.append(TAB) .append(TAB) .append("last_accepted_config: ") .append(coordinationMetadata().getLastAcceptedConfiguration()) .append("\n"); sb.append(TAB).append(TAB).append("voting tombstones: ").append(coordinationMetadata().getVotingConfigExclusions()).append("\n"); for (IndexMetadata indexMetadata : metadata) { sb.append(TAB).append(indexMetadata.getIndex()); sb.append(": v[") .append(indexMetadata.getVersion()) .append("], mv[") .append(indexMetadata.getMappingVersion()) .append("], sv[") .append(indexMetadata.getSettingsVersion()) .append("], av[") .append(indexMetadata.getAliasesVersion()) .append("]\n"); for (int shard = 0; shard < indexMetadata.getNumberOfShards(); shard++) { sb.append(TAB).append(TAB).append(shard).append(": "); sb.append("p_term [").append(indexMetadata.primaryTerm(shard)).append("], "); sb.append("isa_ids ").append(indexMetadata.inSyncAllocationIds(shard)).append("\n"); } } if (metadata.customs().isEmpty() == false) { sb.append("metadata customs:\n"); for (final Map.Entry cursor : metadata.customs().entrySet()) { final String type = cursor.getKey(); final Metadata.Custom custom = cursor.getValue(); sb.append(TAB).append(type).append(": ").append(custom); } sb.append("\n"); } sb.append(blocks()); sb.append(nodes()); if (transportVersions.isEmpty() == false) { sb.append("transport versions:\n"); for (var tv : transportVersions.entrySet()) { sb.append(TAB).append(tv.getKey()).append(": ").append(tv.getValue()).append("\n"); } } sb.append(routingTable()); sb.append(getRoutingNodes()); if (customs.isEmpty() == false) { sb.append("customs:\n"); for (Map.Entry cursor : customs.entrySet()) { final String type = cursor.getKey(); final Custom custom = cursor.getValue(); sb.append(TAB).append(type).append(": ").append(custom); } } return sb.toString(); } /** * a cluster state supersedes another state if they are from the same master and the version of this state is higher than that of the * other state. *

* In essence that means that all the changes from the other cluster state are also reflected by the current one */ public boolean supersedes(ClusterState other) { return this.nodes().getMasterNodeId() != null && this.nodes().getMasterNodeId().equals(other.nodes().getMasterNodeId()) && this.version() > other.version(); } public enum Metric { VERSION("version"), MASTER_NODE("master_node"), BLOCKS("blocks"), NODES("nodes"), METADATA("metadata"), ROUTING_TABLE("routing_table"), ROUTING_NODES("routing_nodes"), CUSTOMS("customs"); private static final Map valueToEnum; static { valueToEnum = new HashMap<>(); for (Metric metric : Metric.values()) { valueToEnum.put(metric.value, metric); } } private final String value; Metric(String value) { this.value = value; } public static EnumSet parseString(String param, boolean ignoreUnknown) { String[] metrics = Strings.splitStringByCommaToArray(param); EnumSet result = EnumSet.noneOf(Metric.class); for (String metric : metrics) { if ("_all".equals(metric)) { result = EnumSet.allOf(Metric.class); break; } Metric m = valueToEnum.get(metric); if (m == null) { if (ignoreUnknown == false) { throw new IllegalArgumentException("Unknown metric [" + metric + "]"); } } else { result.add(m); } } return result; } @Override public String toString() { return value; } } private static Iterator chunkedSection( boolean condition, ToXContent before, Iterator items, Function> fn, ToXContent after ) { return condition ? Iterators.concat(Iterators.single(before), Iterators.flatMap(items, fn::apply), Iterators.single(after)) : Collections.emptyIterator(); } @Override public Iterator toXContentChunked(ToXContent.Params outerParams) { final var metrics = Metric.parseString(outerParams.param("metric", "_all"), true); return Iterators.concat( // header chunk Iterators.single(((builder, params) -> { // always provide the cluster_uuid as part of the top-level response (also part of the metadata response) builder.field("cluster_uuid", metadata().clusterUUID()); // state version info if (metrics.contains(Metric.VERSION)) { builder.field("version", version); builder.field("state_uuid", stateUUID); } // master node if (metrics.contains(Metric.MASTER_NODE)) { builder.field("master_node", nodes().getMasterNodeId()); } return builder; })), // blocks chunkedSection(metrics.contains(Metric.BLOCKS), (builder, params) -> { builder.startObject("blocks"); if (blocks().global().isEmpty() == false) { builder.startObject("global"); for (ClusterBlock block : blocks().global()) { block.toXContent(builder, params); } builder.endObject(); } if (blocks().indices().isEmpty() == false) { builder.startObject("indices"); } return builder; }, blocks.indices().entrySet().iterator(), entry -> Iterators.single((builder, params) -> { builder.startObject(entry.getKey()); for (ClusterBlock block : entry.getValue()) { block.toXContent(builder, params); } return builder.endObject(); }), (builder, params) -> { if (blocks().indices().isEmpty() == false) { builder.endObject(); } return builder.endObject(); }), // nodes chunkedSection( metrics.contains(Metric.NODES), (builder, params) -> builder.startObject("nodes"), nodes.iterator(), Iterators::single, (builder, params) -> builder.endObject() ), // transportVersions // just use NODES again, its node-related information chunkedSection( metrics.contains(Metric.NODES), (builder, params) -> builder.startArray("transport_versions"), transportVersions.entrySet().iterator(), e -> Iterators.single( (builder, params) -> builder.startObject() .field("node_id", e.getKey()) .field("transport_version", e.getValue().toString()) .endObject() ), (builder, params) -> builder.endArray() ), // metadata metrics.contains(Metric.METADATA) ? metadata.toXContentChunked(outerParams) : Collections.emptyIterator(), // routing table chunkedSection( metrics.contains(Metric.ROUTING_TABLE), (builder, params) -> builder.startObject("routing_table").startObject("indices"), routingTable().iterator(), indexRoutingTable -> Iterators.single((builder, params) -> { builder.startObject(indexRoutingTable.getIndex().getName()); builder.startObject("shards"); for (int shardId = 0; shardId < indexRoutingTable.size(); shardId++) { IndexShardRoutingTable indexShardRoutingTable = indexRoutingTable.shard(shardId); builder.startArray(Integer.toString(indexShardRoutingTable.shardId().id())); for (int copy = 0; copy < indexShardRoutingTable.size(); copy++) { indexShardRoutingTable.shard(copy).toXContent(builder, params); } builder.endArray(); } return builder.endObject().endObject(); }), (builder, params) -> builder.endObject().endObject() ), // routing nodes chunkedSection( metrics.contains(Metric.ROUTING_NODES), (builder, params) -> builder.startObject("routing_nodes").startArray("unassigned"), getRoutingNodes().unassigned().iterator(), Iterators::single, (builder, params) -> builder.endArray() // no endObject() here, continued in next chunkedSection() ), chunkedSection( metrics.contains(Metric.ROUTING_NODES), (builder, params) -> builder.startObject("nodes"), getRoutingNodes().iterator(), routingNode -> Iterators.concat( ChunkedToXContentHelper.startArray(routingNode.nodeId() == null ? "null" : routingNode.nodeId()), routingNode.iterator(), ChunkedToXContentHelper.endArray() ), (builder, params) -> builder.endObject().endObject() ), // customs metrics.contains(Metric.CUSTOMS) ? Iterators.flatMap( customs.entrySet().iterator(), cursor -> ChunkedToXContentHelper.wrapWithObject(cursor.getKey(), cursor.getValue().toXContentChunked(outerParams)) ) : Collections.emptyIterator() ); } public static Builder builder(ClusterName clusterName) { return new Builder(clusterName); } public static Builder builder(ClusterState state) { return new Builder(state); } public ClusterState copyAndUpdate(Consumer updater) { var builder = builder(this); updater.accept(builder); return builder.build(); } public ClusterState copyAndUpdateMetadata(Consumer updater) { return copyAndUpdate(builder -> builder.metadata(metadata().copyAndUpdate(updater))); } public static class Builder { private ClusterState previous; private final ClusterName clusterName; private long version = 0; private String uuid = UNKNOWN_UUID; private Metadata metadata = Metadata.EMPTY_METADATA; private RoutingTable routingTable = RoutingTable.EMPTY_ROUTING_TABLE; private DiscoveryNodes nodes = DiscoveryNodes.EMPTY_NODES; private final Map transportVersions; private ClusterBlocks blocks = ClusterBlocks.EMPTY_CLUSTER_BLOCK; private final ImmutableOpenMap.Builder customs; private boolean fromDiff; public Builder(ClusterState state) { this.previous = state; this.clusterName = state.clusterName; this.version = state.version(); this.uuid = state.stateUUID(); this.nodes = state.nodes(); this.transportVersions = new HashMap<>(state.transportVersions()); this.routingTable = state.routingTable(); this.metadata = state.metadata(); this.blocks = state.blocks(); this.customs = ImmutableOpenMap.builder(state.customs()); this.fromDiff = false; } public Builder(ClusterName clusterName) { this.transportVersions = new HashMap<>(); customs = ImmutableOpenMap.builder(); this.clusterName = clusterName; } public Builder nodes(DiscoveryNodes.Builder nodesBuilder) { return nodes(nodesBuilder.build()); } public Builder nodes(DiscoveryNodes nodes) { this.nodes = nodes; return this; } public DiscoveryNodes nodes() { return nodes; } public Builder putTransportVersion(String node, TransportVersion version) { transportVersions.put(node, Objects.requireNonNull(version, node)); return this; } public Builder transportVersions(Map versions) { versions.forEach((key, value) -> Objects.requireNonNull(value, key)); // remove all versions not present in the new map this.transportVersions.keySet().retainAll(versions.keySet()); this.transportVersions.putAll(versions); return this; } public Map transportVersions() { return Collections.unmodifiableMap(this.transportVersions); } public Builder routingTable(RoutingTable.Builder routingTableBuilder) { return routingTable(routingTableBuilder.build()); } public Builder routingTable(RoutingTable routingTable) { this.routingTable = routingTable; return this; } public Builder metadata(Metadata.Builder metadataBuilder) { return metadata(metadataBuilder.build()); } public Builder metadata(Metadata metadata) { this.metadata = metadata; return this; } public Builder blocks(ClusterBlocks.Builder blocksBuilder) { return blocks(blocksBuilder.build()); } public Builder blocks(ClusterBlocks blocks) { this.blocks = blocks; return this; } public Builder version(long version) { this.version = version; return this; } public Builder incrementVersion() { this.version = version + 1; this.uuid = UNKNOWN_UUID; return this; } public Builder stateUUID(String uuid) { this.uuid = uuid; return this; } public Builder putCustom(String type, Custom custom) { customs.put(type, Objects.requireNonNull(custom, type)); return this; } public Builder removeCustom(String type) { customs.remove(type); return this; } public Builder customs(Map customs) { customs.forEach((key, value) -> Objects.requireNonNull(value, key)); this.customs.putAllFromMap(customs); return this; } // set previous cluster state that this builder is created from during diff application private Builder fromDiff(ClusterState previous) { this.fromDiff = true; this.previous = previous; return this; } public ClusterState build() { if (UNKNOWN_UUID.equals(uuid)) { uuid = UUIDs.randomBase64UUID(); } final RoutingNodes routingNodes; if (previous != null && routingTable.indicesRouting() == previous.routingTable.indicesRouting() && nodes == previous.nodes) { // routing table contents and nodes haven't changed so we can try to reuse the previous state's routing nodes which are // expensive to compute routingNodes = previous.routingNodes; } else { routingNodes = null; } return new ClusterState( clusterName, version, uuid, metadata, routingTable, nodes, transportVersions, blocks, customs.build(), fromDiff, routingNodes ); } public static byte[] toBytes(ClusterState state) throws IOException { BytesStreamOutput os = new BytesStreamOutput(); state.writeTo(os); return BytesReference.toBytes(os.bytes()); } /** * @param data input bytes * @param localNode used to set the local node in the cluster state. */ public static ClusterState fromBytes(byte[] data, DiscoveryNode localNode, NamedWriteableRegistry registry) throws IOException { StreamInput in = new NamedWriteableAwareStreamInput(StreamInput.wrap(data), registry); return readFrom(in, localNode); } } @Override public Diff diff(ClusterState previousState) { return new ClusterStateDiff(previousState, this); } public static Diff readDiffFrom(StreamInput in, DiscoveryNode localNode) throws IOException { return new ClusterStateDiff(in, localNode); } public static ClusterState readFrom(StreamInput in, DiscoveryNode localNode) throws IOException { ClusterName clusterName = new ClusterName(in); Builder builder = new Builder(clusterName); builder.version = in.readLong(); builder.uuid = in.readString(); builder.metadata = Metadata.readFrom(in); builder.routingTable = RoutingTable.readFrom(in); builder.nodes = DiscoveryNodes.readFrom(in, localNode); if (in.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0)) { builder.transportVersions(in.readMap(TransportVersion::readVersion)); } else { // this clusterstate is from a pre-8.8.0 node // infer the versions from discoverynodes for now builder.nodes().getNodes().values().forEach(n -> builder.putTransportVersion(n.getId(), inferTransportVersion(n))); } builder.blocks = ClusterBlocks.readFrom(in); int customSize = in.readVInt(); for (int i = 0; i < customSize; i++) { Custom customIndexMetadata = in.readNamedWriteable(Custom.class); builder.putCustom(customIndexMetadata.getWriteableName(), customIndexMetadata); } if (in.getTransportVersion().before(TransportVersion.V_8_0_0)) { in.readVInt(); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x } return builder.build(); } /** * If the cluster state does not contain transport version information, this is the version * that is inferred for all nodes on version 8.8.0 or above. */ public static final TransportVersion INFERRED_TRANSPORT_VERSION = TransportVersion.V_8_8_0; private static TransportVersion inferTransportVersion(DiscoveryNode node) { TransportVersion tv; if (node.getVersion().before(Version.V_8_8_0)) { // 1-to-1 mapping between Version and TransportVersion tv = TransportVersion.fromId(node.getVersion().id); } else { // use the lowest value it could be for now tv = INFERRED_TRANSPORT_VERSION; } return tv; } @Override public void writeTo(StreamOutput out) throws IOException { clusterName.writeTo(out); out.writeLong(version); out.writeString(stateUUID); metadata.writeTo(out); routingTable.writeTo(out); nodes.writeTo(out); if (out.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0)) { out.writeMap(transportVersions, StreamOutput::writeString, (o, v) -> TransportVersion.writeVersion(v, o)); } blocks.writeTo(out); VersionedNamedWriteable.writeVersionedWritables(out, customs); if (out.getTransportVersion().before(TransportVersion.V_8_0_0)) { out.writeVInt(-1); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x } } private static class ClusterStateDiff implements Diff { private final long toVersion; private final String fromUuid; private final String toUuid; private final ClusterName clusterName; private final Diff routingTable; private final Diff nodes; @Nullable private final Diff> transportVersions; private final Diff metadata; private final Diff blocks; private final Diff> customs; ClusterStateDiff(ClusterState before, ClusterState after) { fromUuid = before.stateUUID; toUuid = after.stateUUID; toVersion = after.version; clusterName = after.clusterName; routingTable = after.routingTable.diff(before.routingTable); nodes = after.nodes.diff(before.nodes); transportVersions = DiffableUtils.diff( before.transportVersions, after.transportVersions, DiffableUtils.getStringKeySerializer(), TRANSPORT_VERSION_VALUE_SERIALIZER ); metadata = after.metadata.diff(before.metadata); blocks = after.blocks.diff(before.blocks); customs = DiffableUtils.diff(before.customs, after.customs, DiffableUtils.getStringKeySerializer(), CUSTOM_VALUE_SERIALIZER); } ClusterStateDiff(StreamInput in, DiscoveryNode localNode) throws IOException { clusterName = new ClusterName(in); fromUuid = in.readString(); toUuid = in.readString(); toVersion = in.readLong(); routingTable = RoutingTable.readDiffFrom(in); nodes = DiscoveryNodes.readDiffFrom(in, localNode); if (in.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0) && in.readBoolean()) { transportVersions = DiffableUtils.readJdkMapDiff( in, DiffableUtils.getStringKeySerializer(), TRANSPORT_VERSION_VALUE_SERIALIZER ); } else { transportVersions = null; // infer at application time } metadata = Metadata.readDiffFrom(in); blocks = ClusterBlocks.readDiffFrom(in); customs = DiffableUtils.readJdkMapDiff(in, DiffableUtils.getStringKeySerializer(), CUSTOM_VALUE_SERIALIZER); if (in.getTransportVersion().before(TransportVersion.V_8_0_0)) { in.readVInt(); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x } } @Override public void writeTo(StreamOutput out) throws IOException { clusterName.writeTo(out); out.writeString(fromUuid); out.writeString(toUuid); out.writeLong(toVersion); routingTable.writeTo(out); nodes.writeTo(out); if (out.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0)) { out.writeOptionalWriteable(transportVersions); } metadata.writeTo(out); blocks.writeTo(out); customs.writeTo(out); if (out.getTransportVersion().before(TransportVersion.V_8_0_0)) { out.writeVInt(-1); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x } } @Override public ClusterState apply(ClusterState state) { Builder builder = new Builder(clusterName); if (toUuid.equals(state.stateUUID)) { // no need to read the rest - cluster state didn't change return state; } if (fromUuid.equals(state.stateUUID) == false) { throw new IncompatibleClusterStateVersionException(state.version, state.stateUUID, toVersion, fromUuid); } builder.stateUUID(toUuid); builder.version(toVersion); builder.routingTable(routingTable.apply(state.routingTable)); builder.nodes(nodes.apply(state.nodes)); if (transportVersions != null) { builder.transportVersions(transportVersions.apply(state.transportVersions)); } else { // infer the versions from discoverynodes for now builder.nodes().getNodes().values().forEach(n -> builder.putTransportVersion(n.getId(), inferTransportVersion(n))); } builder.metadata(metadata.apply(state.metadata)); builder.blocks(blocks.apply(state.blocks)); builder.customs(customs.apply(state.customs)); builder.fromDiff(state); return builder.build(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy