org.elasticsearch.cluster.ClusterState Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.coordination.ClusterStatePublisher;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion;
import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfiguration;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.service.ClusterApplierService;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.cluster.service.MasterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.VersionedNamedWriteable;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ChunkedToXContent;
import org.elasticsearch.common.xcontent.ChunkedToXContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContent;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.function.Consumer;
import java.util.function.Function;
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
/**
* Represents the state of the cluster, held in memory on all nodes in the cluster with updates coordinated by the elected master.
*
* Conceptually immutable, but in practice it has a few components like {@link RoutingNodes} which are pure functions of the immutable state
* but are expensive to compute so they are built on-demand if needed.
*
* The {@link Metadata} portion is written to disk on each update so it persists across full-cluster restarts. The rest of this data is
* maintained only in-memory and resets back to its initial state on a full-cluster restart, but it is held on all nodes so it persists
* across master elections (and therefore is preserved in a rolling restart).
*
* Updates are triggered by submitting tasks to the {@link MasterService} on the elected master, typically using a {@link
* TransportMasterNodeAction} to route a request to the master on which the task is submitted via a queue obtained with {@link
* ClusterService#createTaskQueue}, which has an associated priority. Submitted tasks have an associated
* timeout. Tasks are processed in priority order, so a flood of higher-priority tasks can starve lower-priority ones from running.
* Therefore, avoid priorities other than {@link Priority#NORMAL} where possible. Tasks associated with client actions should typically have
* a timeout, or otherwise be sensitive to client cancellations, to avoid surprises caused by the execution of stale tasks long after they
* are submitted (since clients themselves tend to time out). In contrast, internal tasks can reasonably have an infinite timeout,
* especially if a timeout would simply trigger a retry.
*
* Tasks that share the same {@link ClusterStateTaskExecutor} instance are processed as a batch. Each batch of tasks yields a new {@link
* ClusterState} which is published to the cluster by {@link ClusterStatePublisher#publish}. Publication usually works by sending a diff,
* computed via the {@link Diffable} interface, rather than the full state, although it will fall back to sending the full state if the
* receiving node is new or it has missed out on an intermediate state for some reason. States and diffs are published using the transport
* protocol, i.e. the {@link Writeable} interface and friends.
*
* When committed, the new state is applied which exposes it to the node via {@link ClusterStateApplier} and {@link
* ClusterStateListener} callbacks registered with the {@link ClusterApplierService}. The new state is also made available via {@link
* ClusterService#state()}. The appliers are notified (in no particular order) before {@link ClusterService#state()} is updated, and the
* listeners are notified (in no particular order) afterwards. Cluster state updates run in sequence, one-by-one, so they can be a
* performance bottleneck. See the JavaDocs on the linked classes and methods for more details.
*
* Cluster state updates can be used to trigger various actions via a {@link ClusterStateListener} rather than using a timer.
*
* Implements {@link ChunkedToXContent} to be exposed in REST APIs (e.g. {@code GET _cluster/state} and {@code POST _cluster/reroute}) and
* to be indexed by monitoring, mostly just for diagnostics purposes. The {@link XContent} representation does not need to be 100% faithful
* since we never reconstruct a cluster state from its XContent representation, but the more faithful it is the more useful it is for
* diagnostics. Note that the {@link XContent} representation of the {@link Metadata} portion does have to be faithful (in {@link
* Metadata.XContentContext#GATEWAY} context) since this is how it persists across full cluster restarts.
*
* Security-sensitive data such as passwords or private keys should not be stored in the cluster state, since the contents of the cluster
* state are exposed in various APIs.
*/
public class ClusterState implements ChunkedToXContent, Diffable {
public static final ClusterState EMPTY_STATE = builder(ClusterName.DEFAULT).build();
public interface Custom extends NamedDiffable, ChunkedToXContent {
/**
* Returns true
iff this {@link Custom} is private to the cluster and should never be sent to a client.
* The default is false
;
*/
default boolean isPrivate() {
return false;
}
/**
* Serialize this {@link Custom} for diagnostic purposes, exposed by the GET _cluster/state
API etc. The XContent
* representation does not need to be 100% faithful since we never reconstruct a cluster state from its XContent representation, but
* the more faithful it is the more useful it is for diagnostics.
*/
@Override
Iterator extends ToXContent> toXContentChunked(ToXContent.Params params);
}
private static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class);
private static final DiffableUtils.ValueSerializer TRANSPORT_VERSION_VALUE_SERIALIZER =
new DiffableUtils.NonDiffableValueSerializer<>() {
@Override
public void write(TransportVersion value, StreamOutput out) throws IOException {
TransportVersion.writeVersion(value, out);
}
@Override
public TransportVersion read(StreamInput in, String key) throws IOException {
return TransportVersion.readVersion(in);
}
};
public static final String UNKNOWN_UUID = "_na_";
public static final long UNKNOWN_VERSION = -1;
/**
* Monotonically increasing on (and therefore uniquely identifies) committed states. However sometimes a state is created/applied
* without committing it, for instance to add a {@link NoMasterBlockService#getNoMasterBlock}.
*/
private final long version;
/**
* Uniquely identifies this state, even if the state is not committed.
*/
private final String stateUUID;
/**
* Describes the location (and state) of all shards, used for routing actions such as searches to the relevant shards.
*/
private final RoutingTable routingTable;
private final DiscoveryNodes nodes;
private final Map transportVersions;
private final TransportVersion minTransportVersion;
private final Metadata metadata;
private final ClusterBlocks blocks;
private final Map customs;
private final ClusterName clusterName;
private final boolean wasReadFromDiff;
// built on demand
private volatile RoutingNodes routingNodes;
public ClusterState(long version, String stateUUID, ClusterState state) {
this(
state.clusterName,
version,
stateUUID,
state.metadata(),
state.routingTable(),
state.nodes(),
state.transportVersions(),
state.blocks(),
state.customs(),
false,
state.routingNodes
);
}
public ClusterState(
ClusterName clusterName,
long version,
String stateUUID,
Metadata metadata,
RoutingTable routingTable,
DiscoveryNodes nodes,
Map transportVersions,
ClusterBlocks blocks,
Map customs,
boolean wasReadFromDiff,
@Nullable RoutingNodes routingNodes
) {
this.version = version;
this.stateUUID = stateUUID;
this.clusterName = clusterName;
this.metadata = metadata;
this.routingTable = routingTable;
this.nodes = nodes;
this.transportVersions = Map.copyOf(transportVersions);
this.blocks = blocks;
this.customs = customs;
this.wasReadFromDiff = wasReadFromDiff;
this.routingNodes = routingNodes;
assert assertConsistentRoutingNodes(routingTable, nodes, routingNodes);
this.minTransportVersion = transportVersions.values().stream().min(Comparator.naturalOrder()).orElse(TransportVersion.current());
}
private static boolean assertConsistentRoutingNodes(
RoutingTable routingTable,
DiscoveryNodes nodes,
@Nullable RoutingNodes routingNodes
) {
if (routingNodes == null) {
return true;
}
final RoutingNodes expected = RoutingNodes.immutable(routingTable, nodes);
assert routingNodes.equals(expected)
: "RoutingNodes [" + routingNodes + "] are not consistent with this cluster state [" + expected + "]";
return true;
}
public long term() {
return coordinationMetadata().term();
}
public long version() {
return this.version;
}
public long getVersion() {
return version();
}
/**
* This stateUUID is automatically generated for for each version of cluster state. It is used to make sure that
* we are applying diffs to the right previous state.
*/
public String stateUUID() {
return this.stateUUID;
}
public DiscoveryNodes nodes() {
return this.nodes;
}
public DiscoveryNodes getNodes() {
return nodes();
}
/**
* Returns the set of nodes that should be exposed to things like REST handlers that behave differently depending on the nodes in the
* cluster and their versions. Specifically, if the cluster has properly formed then this is the nodes in the last-applied cluster
* state, but if the cluster has not properly formed then no nodes are returned.
*
* @return the nodes in the cluster if the cluster has properly formed, otherwise an empty set of nodes.
*/
public DiscoveryNodes nodesIfRecovered() {
return blocks.hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) ? DiscoveryNodes.EMPTY_NODES : nodes;
}
public Map transportVersions() {
return this.transportVersions;
}
public TransportVersion getMinTransportVersion() {
return this.minTransportVersion;
}
public Metadata metadata() {
return this.metadata;
}
public Metadata getMetadata() {
return metadata();
}
public CoordinationMetadata coordinationMetadata() {
return metadata.coordinationMetadata();
}
public RoutingTable routingTable() {
return routingTable;
}
public RoutingTable getRoutingTable() {
return routingTable();
}
public ClusterBlocks blocks() {
return this.blocks;
}
public ClusterBlocks getBlocks() {
return blocks;
}
public Map customs() {
return this.customs;
}
public Map getCustoms() {
return this.customs;
}
@SuppressWarnings("unchecked")
public T custom(String type) {
return (T) customs.get(type);
}
@SuppressWarnings("unchecked")
public T custom(String type, T defaultValue) {
return (T) customs.getOrDefault(type, defaultValue);
}
public ClusterName getClusterName() {
return this.clusterName;
}
public VotingConfiguration getLastAcceptedConfiguration() {
return coordinationMetadata().getLastAcceptedConfiguration();
}
public VotingConfiguration getLastCommittedConfiguration() {
return coordinationMetadata().getLastCommittedConfiguration();
}
public Set getVotingConfigExclusions() {
return coordinationMetadata().getVotingConfigExclusions();
}
/**
* Returns a built (on demand) routing nodes view of the routing table.
*/
public RoutingNodes getRoutingNodes() {
RoutingNodes r = routingNodes;
if (r != null) {
return r;
}
r = buildRoutingNodes();
return r;
}
private synchronized RoutingNodes buildRoutingNodes() {
RoutingNodes r = routingNodes;
if (r != null) {
return r;
}
r = RoutingNodes.immutable(routingTable, nodes);
routingNodes = r;
return r;
}
/**
* Returns a fresh mutable copy of the routing nodes view.
*/
public RoutingNodes mutableRoutingNodes() {
final RoutingNodes nodes = this.routingNodes;
// use the cheaper copy constructor if we already computed the routing nodes for this state.
if (nodes != null) {
return nodes.mutableCopy();
}
// we don't have any routing nodes for this state, likely because it's a temporary state in the reroute logic, don't compute an
// immutable copy that will never be used and instead directly build a mutable copy
return RoutingNodes.mutable(routingTable, this.nodes);
}
/**
* Initialize data structures that lazy computed for this instance in the background by using the giving executor.
* @param executor executor to run initialization tasks on
*/
public void initializeAsync(Executor executor) {
if (routingNodes == null) {
executor.execute(new Runnable() {
@Override
public void run() {
getRoutingNodes();
}
@Override
public String toString() {
return "async initialization of routing nodes for cluster state " + version();
}
});
}
if (metadata.indicesLookupInitialized() == false) {
executor.execute(new Runnable() {
@Override
public void run() {
metadata.getIndicesLookup();
}
@Override
public String toString() {
return "async initialization of indices lookup for cluster state " + version();
}
});
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
final String TAB = " ";
sb.append("cluster uuid: ")
.append(metadata.clusterUUID())
.append(" [committed: ")
.append(metadata.clusterUUIDCommitted())
.append("]")
.append("\n");
sb.append("version: ").append(version).append("\n");
sb.append("state uuid: ").append(stateUUID).append("\n");
sb.append("from_diff: ").append(wasReadFromDiff).append("\n");
sb.append("meta data version: ").append(metadata.version()).append("\n");
sb.append(TAB).append("coordination_metadata:\n");
sb.append(TAB).append(TAB).append("term: ").append(coordinationMetadata().term()).append("\n");
sb.append(TAB)
.append(TAB)
.append("last_committed_config: ")
.append(coordinationMetadata().getLastCommittedConfiguration())
.append("\n");
sb.append(TAB)
.append(TAB)
.append("last_accepted_config: ")
.append(coordinationMetadata().getLastAcceptedConfiguration())
.append("\n");
sb.append(TAB).append(TAB).append("voting tombstones: ").append(coordinationMetadata().getVotingConfigExclusions()).append("\n");
for (IndexMetadata indexMetadata : metadata) {
sb.append(TAB).append(indexMetadata.getIndex());
sb.append(": v[")
.append(indexMetadata.getVersion())
.append("], mv[")
.append(indexMetadata.getMappingVersion())
.append("], sv[")
.append(indexMetadata.getSettingsVersion())
.append("], av[")
.append(indexMetadata.getAliasesVersion())
.append("]\n");
for (int shard = 0; shard < indexMetadata.getNumberOfShards(); shard++) {
sb.append(TAB).append(TAB).append(shard).append(": ");
sb.append("p_term [").append(indexMetadata.primaryTerm(shard)).append("], ");
sb.append("isa_ids ").append(indexMetadata.inSyncAllocationIds(shard)).append("\n");
}
}
if (metadata.customs().isEmpty() == false) {
sb.append("metadata customs:\n");
for (final Map.Entry cursor : metadata.customs().entrySet()) {
final String type = cursor.getKey();
final Metadata.Custom custom = cursor.getValue();
sb.append(TAB).append(type).append(": ").append(custom);
}
sb.append("\n");
}
sb.append(blocks());
sb.append(nodes());
if (transportVersions.isEmpty() == false) {
sb.append("transport versions:\n");
for (var tv : transportVersions.entrySet()) {
sb.append(TAB).append(tv.getKey()).append(": ").append(tv.getValue()).append("\n");
}
}
sb.append(routingTable());
sb.append(getRoutingNodes());
if (customs.isEmpty() == false) {
sb.append("customs:\n");
for (Map.Entry cursor : customs.entrySet()) {
final String type = cursor.getKey();
final Custom custom = cursor.getValue();
sb.append(TAB).append(type).append(": ").append(custom);
}
}
return sb.toString();
}
/**
* a cluster state supersedes another state if they are from the same master and the version of this state is higher than that of the
* other state.
*
* In essence that means that all the changes from the other cluster state are also reflected by the current one
*/
public boolean supersedes(ClusterState other) {
return this.nodes().getMasterNodeId() != null
&& this.nodes().getMasterNodeId().equals(other.nodes().getMasterNodeId())
&& this.version() > other.version();
}
public enum Metric {
VERSION("version"),
MASTER_NODE("master_node"),
BLOCKS("blocks"),
NODES("nodes"),
METADATA("metadata"),
ROUTING_TABLE("routing_table"),
ROUTING_NODES("routing_nodes"),
CUSTOMS("customs");
private static final Map valueToEnum;
static {
valueToEnum = new HashMap<>();
for (Metric metric : Metric.values()) {
valueToEnum.put(metric.value, metric);
}
}
private final String value;
Metric(String value) {
this.value = value;
}
public static EnumSet parseString(String param, boolean ignoreUnknown) {
String[] metrics = Strings.splitStringByCommaToArray(param);
EnumSet result = EnumSet.noneOf(Metric.class);
for (String metric : metrics) {
if ("_all".equals(metric)) {
result = EnumSet.allOf(Metric.class);
break;
}
Metric m = valueToEnum.get(metric);
if (m == null) {
if (ignoreUnknown == false) {
throw new IllegalArgumentException("Unknown metric [" + metric + "]");
}
} else {
result.add(m);
}
}
return result;
}
@Override
public String toString() {
return value;
}
}
private static Iterator chunkedSection(
boolean condition,
ToXContent before,
Iterator items,
Function> fn,
ToXContent after
) {
return condition
? Iterators.concat(Iterators.single(before), Iterators.flatMap(items, fn::apply), Iterators.single(after))
: Collections.emptyIterator();
}
@Override
public Iterator extends ToXContent> toXContentChunked(ToXContent.Params outerParams) {
final var metrics = Metric.parseString(outerParams.param("metric", "_all"), true);
return Iterators.concat(
// header chunk
Iterators.single(((builder, params) -> {
// always provide the cluster_uuid as part of the top-level response (also part of the metadata response)
builder.field("cluster_uuid", metadata().clusterUUID());
// state version info
if (metrics.contains(Metric.VERSION)) {
builder.field("version", version);
builder.field("state_uuid", stateUUID);
}
// master node
if (metrics.contains(Metric.MASTER_NODE)) {
builder.field("master_node", nodes().getMasterNodeId());
}
return builder;
})),
// blocks
chunkedSection(metrics.contains(Metric.BLOCKS), (builder, params) -> {
builder.startObject("blocks");
if (blocks().global().isEmpty() == false) {
builder.startObject("global");
for (ClusterBlock block : blocks().global()) {
block.toXContent(builder, params);
}
builder.endObject();
}
if (blocks().indices().isEmpty() == false) {
builder.startObject("indices");
}
return builder;
}, blocks.indices().entrySet().iterator(), entry -> Iterators.single((builder, params) -> {
builder.startObject(entry.getKey());
for (ClusterBlock block : entry.getValue()) {
block.toXContent(builder, params);
}
return builder.endObject();
}), (builder, params) -> {
if (blocks().indices().isEmpty() == false) {
builder.endObject();
}
return builder.endObject();
}),
// nodes
chunkedSection(
metrics.contains(Metric.NODES),
(builder, params) -> builder.startObject("nodes"),
nodes.iterator(),
Iterators::single,
(builder, params) -> builder.endObject()
),
// transportVersions
// just use NODES again, its node-related information
chunkedSection(
metrics.contains(Metric.NODES),
(builder, params) -> builder.startArray("transport_versions"),
transportVersions.entrySet().iterator(),
e -> Iterators.single(
(builder, params) -> builder.startObject()
.field("node_id", e.getKey())
.field("transport_version", e.getValue().toString())
.endObject()
),
(builder, params) -> builder.endArray()
),
// metadata
metrics.contains(Metric.METADATA) ? metadata.toXContentChunked(outerParams) : Collections.emptyIterator(),
// routing table
chunkedSection(
metrics.contains(Metric.ROUTING_TABLE),
(builder, params) -> builder.startObject("routing_table").startObject("indices"),
routingTable().iterator(),
indexRoutingTable -> Iterators.single((builder, params) -> {
builder.startObject(indexRoutingTable.getIndex().getName());
builder.startObject("shards");
for (int shardId = 0; shardId < indexRoutingTable.size(); shardId++) {
IndexShardRoutingTable indexShardRoutingTable = indexRoutingTable.shard(shardId);
builder.startArray(Integer.toString(indexShardRoutingTable.shardId().id()));
for (int copy = 0; copy < indexShardRoutingTable.size(); copy++) {
indexShardRoutingTable.shard(copy).toXContent(builder, params);
}
builder.endArray();
}
return builder.endObject().endObject();
}),
(builder, params) -> builder.endObject().endObject()
),
// routing nodes
chunkedSection(
metrics.contains(Metric.ROUTING_NODES),
(builder, params) -> builder.startObject("routing_nodes").startArray("unassigned"),
getRoutingNodes().unassigned().iterator(),
Iterators::single,
(builder, params) -> builder.endArray() // no endObject() here, continued in next chunkedSection()
),
chunkedSection(
metrics.contains(Metric.ROUTING_NODES),
(builder, params) -> builder.startObject("nodes"),
getRoutingNodes().iterator(),
routingNode -> Iterators.concat(
ChunkedToXContentHelper.startArray(routingNode.nodeId() == null ? "null" : routingNode.nodeId()),
routingNode.iterator(),
ChunkedToXContentHelper.endArray()
),
(builder, params) -> builder.endObject().endObject()
),
// customs
metrics.contains(Metric.CUSTOMS)
? Iterators.flatMap(
customs.entrySet().iterator(),
cursor -> ChunkedToXContentHelper.wrapWithObject(cursor.getKey(), cursor.getValue().toXContentChunked(outerParams))
)
: Collections.emptyIterator()
);
}
public static Builder builder(ClusterName clusterName) {
return new Builder(clusterName);
}
public static Builder builder(ClusterState state) {
return new Builder(state);
}
public ClusterState copyAndUpdate(Consumer updater) {
var builder = builder(this);
updater.accept(builder);
return builder.build();
}
public ClusterState copyAndUpdateMetadata(Consumer updater) {
return copyAndUpdate(builder -> builder.metadata(metadata().copyAndUpdate(updater)));
}
public static class Builder {
private ClusterState previous;
private final ClusterName clusterName;
private long version = 0;
private String uuid = UNKNOWN_UUID;
private Metadata metadata = Metadata.EMPTY_METADATA;
private RoutingTable routingTable = RoutingTable.EMPTY_ROUTING_TABLE;
private DiscoveryNodes nodes = DiscoveryNodes.EMPTY_NODES;
private final Map transportVersions;
private ClusterBlocks blocks = ClusterBlocks.EMPTY_CLUSTER_BLOCK;
private final ImmutableOpenMap.Builder customs;
private boolean fromDiff;
public Builder(ClusterState state) {
this.previous = state;
this.clusterName = state.clusterName;
this.version = state.version();
this.uuid = state.stateUUID();
this.nodes = state.nodes();
this.transportVersions = new HashMap<>(state.transportVersions());
this.routingTable = state.routingTable();
this.metadata = state.metadata();
this.blocks = state.blocks();
this.customs = ImmutableOpenMap.builder(state.customs());
this.fromDiff = false;
}
public Builder(ClusterName clusterName) {
this.transportVersions = new HashMap<>();
customs = ImmutableOpenMap.builder();
this.clusterName = clusterName;
}
public Builder nodes(DiscoveryNodes.Builder nodesBuilder) {
return nodes(nodesBuilder.build());
}
public Builder nodes(DiscoveryNodes nodes) {
this.nodes = nodes;
return this;
}
public DiscoveryNodes nodes() {
return nodes;
}
public Builder putTransportVersion(String node, TransportVersion version) {
transportVersions.put(node, Objects.requireNonNull(version, node));
return this;
}
public Builder transportVersions(Map versions) {
versions.forEach((key, value) -> Objects.requireNonNull(value, key));
// remove all versions not present in the new map
this.transportVersions.keySet().retainAll(versions.keySet());
this.transportVersions.putAll(versions);
return this;
}
public Map transportVersions() {
return Collections.unmodifiableMap(this.transportVersions);
}
public Builder routingTable(RoutingTable.Builder routingTableBuilder) {
return routingTable(routingTableBuilder.build());
}
public Builder routingTable(RoutingTable routingTable) {
this.routingTable = routingTable;
return this;
}
public Builder metadata(Metadata.Builder metadataBuilder) {
return metadata(metadataBuilder.build());
}
public Builder metadata(Metadata metadata) {
this.metadata = metadata;
return this;
}
public Builder blocks(ClusterBlocks.Builder blocksBuilder) {
return blocks(blocksBuilder.build());
}
public Builder blocks(ClusterBlocks blocks) {
this.blocks = blocks;
return this;
}
public Builder version(long version) {
this.version = version;
return this;
}
public Builder incrementVersion() {
this.version = version + 1;
this.uuid = UNKNOWN_UUID;
return this;
}
public Builder stateUUID(String uuid) {
this.uuid = uuid;
return this;
}
public Builder putCustom(String type, Custom custom) {
customs.put(type, Objects.requireNonNull(custom, type));
return this;
}
public Builder removeCustom(String type) {
customs.remove(type);
return this;
}
public Builder customs(Map customs) {
customs.forEach((key, value) -> Objects.requireNonNull(value, key));
this.customs.putAllFromMap(customs);
return this;
}
// set previous cluster state that this builder is created from during diff application
private Builder fromDiff(ClusterState previous) {
this.fromDiff = true;
this.previous = previous;
return this;
}
public ClusterState build() {
if (UNKNOWN_UUID.equals(uuid)) {
uuid = UUIDs.randomBase64UUID();
}
final RoutingNodes routingNodes;
if (previous != null && routingTable.indicesRouting() == previous.routingTable.indicesRouting() && nodes == previous.nodes) {
// routing table contents and nodes haven't changed so we can try to reuse the previous state's routing nodes which are
// expensive to compute
routingNodes = previous.routingNodes;
} else {
routingNodes = null;
}
return new ClusterState(
clusterName,
version,
uuid,
metadata,
routingTable,
nodes,
transportVersions,
blocks,
customs.build(),
fromDiff,
routingNodes
);
}
public static byte[] toBytes(ClusterState state) throws IOException {
BytesStreamOutput os = new BytesStreamOutput();
state.writeTo(os);
return BytesReference.toBytes(os.bytes());
}
/**
* @param data input bytes
* @param localNode used to set the local node in the cluster state.
*/
public static ClusterState fromBytes(byte[] data, DiscoveryNode localNode, NamedWriteableRegistry registry) throws IOException {
StreamInput in = new NamedWriteableAwareStreamInput(StreamInput.wrap(data), registry);
return readFrom(in, localNode);
}
}
@Override
public Diff diff(ClusterState previousState) {
return new ClusterStateDiff(previousState, this);
}
public static Diff readDiffFrom(StreamInput in, DiscoveryNode localNode) throws IOException {
return new ClusterStateDiff(in, localNode);
}
public static ClusterState readFrom(StreamInput in, DiscoveryNode localNode) throws IOException {
ClusterName clusterName = new ClusterName(in);
Builder builder = new Builder(clusterName);
builder.version = in.readLong();
builder.uuid = in.readString();
builder.metadata = Metadata.readFrom(in);
builder.routingTable = RoutingTable.readFrom(in);
builder.nodes = DiscoveryNodes.readFrom(in, localNode);
if (in.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0)) {
builder.transportVersions(in.readMap(TransportVersion::readVersion));
} else {
// this clusterstate is from a pre-8.8.0 node
// infer the versions from discoverynodes for now
builder.nodes().getNodes().values().forEach(n -> builder.putTransportVersion(n.getId(), inferTransportVersion(n)));
}
builder.blocks = ClusterBlocks.readFrom(in);
int customSize = in.readVInt();
for (int i = 0; i < customSize; i++) {
Custom customIndexMetadata = in.readNamedWriteable(Custom.class);
builder.putCustom(customIndexMetadata.getWriteableName(), customIndexMetadata);
}
if (in.getTransportVersion().before(TransportVersion.V_8_0_0)) {
in.readVInt(); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x
}
return builder.build();
}
/**
* If the cluster state does not contain transport version information, this is the version
* that is inferred for all nodes on version 8.8.0 or above.
*/
public static final TransportVersion INFERRED_TRANSPORT_VERSION = TransportVersion.V_8_8_0;
private static TransportVersion inferTransportVersion(DiscoveryNode node) {
TransportVersion tv;
if (node.getVersion().before(Version.V_8_8_0)) {
// 1-to-1 mapping between Version and TransportVersion
tv = TransportVersion.fromId(node.getVersion().id);
} else {
// use the lowest value it could be for now
tv = INFERRED_TRANSPORT_VERSION;
}
return tv;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
clusterName.writeTo(out);
out.writeLong(version);
out.writeString(stateUUID);
metadata.writeTo(out);
routingTable.writeTo(out);
nodes.writeTo(out);
if (out.getTransportVersion().onOrAfter(TransportVersion.V_8_8_0)) {
out.writeMap(transportVersions, StreamOutput::writeString, (o, v) -> TransportVersion.writeVersion(v, o));
}
blocks.writeTo(out);
VersionedNamedWriteable.writeVersionedWritables(out, customs);
if (out.getTransportVersion().before(TransportVersion.V_8_0_0)) {
out.writeVInt(-1); // used to be minimumMasterNodesOnPublishingMaster, which was used in 7.x for BWC with 6.x
}
}
private static class ClusterStateDiff implements Diff {
private final long toVersion;
private final String fromUuid;
private final String toUuid;
private final ClusterName clusterName;
private final Diff routingTable;
private final Diff nodes;
@Nullable
private final Diff