org.elasticsearch.gateway.GatewayMetaState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.gateway;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.coordination.CoordinationState.PersistedState;
import org.elasticsearch.cluster.coordination.InMemoryPersistedState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.IndexMetadataVerifier;
import org.elasticsearch.cluster.metadata.IndexTemplateMetadata;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.env.NodeMetadata;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.ClusterCoordinationPlugin;
import org.elasticsearch.plugins.MetadataUpgrader;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory;
/**
* Loads (and maybe upgrades) cluster metadata at startup, and persistently stores cluster metadata for future restarts.
*
* When started, ensures that this version is compatible with the state stored on disk, and performs a state upgrade if necessary. Note that
* the state being loaded when constructing the instance of this class is not necessarily the state that will be used as {@link
* ClusterState#metadata()} because it might be stale or incomplete. Master-eligible nodes must perform an election to find a complete and
* non-stale state, and master-ineligible nodes receive the real cluster state from the elected master after joining the cluster.
*/
public class GatewayMetaState implements Closeable {
/**
* Fake node ID for a voting configuration written by a master-ineligible data node to indicate that its on-disk state is potentially
* stale (since it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is
* restarted as a master-eligible node then it does not win any elections until it has received a fresh cluster state.
*/
public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG";
// Set by calling start()
private final SetOnce persistedState = new SetOnce<>();
public PersistedState getPersistedState() {
final PersistedState persistedState = this.persistedState.get();
assert persistedState != null : "not started";
return persistedState;
}
public Metadata getMetadata() {
return getPersistedState().getLastAcceptedState().metadata();
}
public void start(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
List clusterCoordinationPlugins
) {
assert persistedState.get() == null : "should only start once, but already have " + persistedState.get();
try {
persistedState.set(
createPersistedState(
settings,
transportService,
clusterService,
metaStateService,
indexMetadataVerifier,
metadataUpgrader,
persistedClusterStateService,
clusterCoordinationPlugins
)
);
} catch (IOException e) {
throw new ElasticsearchException("failed to load metadata", e);
}
}
private PersistedState createPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
List clusterCoordinationPlugins
) throws IOException {
final var persistedStateFactories = clusterCoordinationPlugins.stream()
.map(ClusterCoordinationPlugin::getPersistedStateFactory)
.flatMap(Optional::stream)
.toList();
if (persistedStateFactories.size() > 1) {
throw new IllegalStateException("multiple persisted-state factories found: " + persistedStateFactories);
}
if (persistedStateFactories.size() == 1) {
return persistedStateFactories.get(0).createPersistedState(settings, transportService, persistedClusterStateService);
}
if (DiscoveryNode.isMasterNode(settings) || DiscoveryNode.canContainData(settings)) {
return createOnDiskPersistedState(
settings,
transportService,
clusterService,
metaStateService,
indexMetadataVerifier,
metadataUpgrader,
persistedClusterStateService
);
}
return createInMemoryPersistedState(settings, transportService, clusterService, metaStateService, persistedClusterStateService);
}
private PersistedState createOnDiskPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService
) throws IOException {
final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadBestOnDiskState();
Metadata metadata = onDiskState.metadata;
long lastAcceptedVersion = onDiskState.lastAcceptedVersion;
long currentTerm = onDiskState.currentTerm;
if (onDiskState.empty()) {
assert Version.CURRENT.major <= Version.V_7_0_0.major + 1 : "legacy metadata loader is not needed anymore from v9 onwards";
final Tuple legacyState = metaStateService.loadFullState();
if (legacyState.v1().isEmpty() == false) {
metadata = legacyState.v2();
lastAcceptedVersion = legacyState.v1().clusterStateVersion();
currentTerm = legacyState.v1().currentTerm();
}
}
PersistedState persistedState = null;
boolean success = false;
try {
final ClusterState clusterState = prepareInitialClusterState(
transportService,
clusterService,
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings))
.version(lastAcceptedVersion)
.metadata(upgradeMetadataForNode(metadata, indexMetadataVerifier, metadataUpgrader))
.build()
);
if (DiscoveryNode.isMasterNode(settings)) {
persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState);
} else {
persistedState = new AsyncPersistedState(
settings,
transportService.getThreadPool(),
new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState)
);
}
if (DiscoveryNode.canContainData(settings)) {
metaStateService.unreferenceAll(); // unreference legacy files (only keep them for dangling indices functionality)
} else {
metaStateService.deleteAll(); // delete legacy files
}
// write legacy node metadata to prevent accidental downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(persistedClusterStateService.getNodeId(), Version.CURRENT, clusterState.metadata().oldestIndexVersion()),
persistedClusterStateService.getDataPaths()
);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(persistedState);
}
}
return persistedState;
}
private PersistedState createInMemoryPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
PersistedClusterStateService persistedClusterStateService
) throws IOException {
final long currentTerm = 0L;
final ClusterState clusterState = prepareInitialClusterState(
transportService,
clusterService,
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build()
);
if (persistedClusterStateService.getDataPaths().length > 0) {
// write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with
// cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state
try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) {
persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState);
}
// delete legacy cluster state files
metaStateService.deleteAll();
// write legacy node metadata to prevent downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(persistedClusterStateService.getNodeId(), Version.CURRENT, clusterState.metadata().oldestIndexVersion()),
persistedClusterStateService.getDataPaths()
);
}
return new InMemoryPersistedState(currentTerm, clusterState);
}
// exposed so it can be overridden by tests
ClusterState prepareInitialClusterState(TransportService transportService, ClusterService clusterService, ClusterState clusterState) {
assert clusterState.nodes().getLocalNode() == null : "prepareInitialClusterState must only be called once";
assert transportService.getLocalNode() != null : "transport service is not yet started";
return Function.identity()
.andThen(ClusterStateUpdaters::addStateNotRecoveredBlock)
.andThen(state -> ClusterStateUpdaters.setLocalNode(state, transportService.getLocalNode(), TransportVersion.current()))
.andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings()))
.andThen(ClusterStateUpdaters::recoverClusterBlocks)
.apply(clusterState);
}
// exposed so it can be overridden by tests
Metadata upgradeMetadataForNode(Metadata metadata, IndexMetadataVerifier indexMetadataVerifier, MetadataUpgrader metadataUpgrader) {
return upgradeMetadata(metadata, indexMetadataVerifier, metadataUpgrader);
}
/**
* This method uses {@link IndexMetadataVerifier} to ensure that indices are compatible
* with the current version. It also calls into plugins to update their index templates.
*
* @return input metadata
if no upgrade is needed or an upgraded metadata
*/
static Metadata upgradeMetadata(Metadata metadata, IndexMetadataVerifier indexMetadataVerifier, MetadataUpgrader metadataUpgrader) {
boolean changed = false;
final Metadata.Builder upgradedMetadata = Metadata.builder(metadata);
for (IndexMetadata indexMetadata : metadata) {
IndexMetadata newMetadata = indexMetadataVerifier.verifyIndexMetadata(
indexMetadata,
Version.CURRENT.minimumIndexCompatibilityVersion()
);
changed |= indexMetadata != newMetadata;
upgradedMetadata.put(newMetadata, false);
}
// upgrade current templates
if (applyPluginUpgraders(
metadata.getTemplates(),
metadataUpgrader.indexTemplateMetadataUpgraders,
upgradedMetadata::removeTemplate,
(s, indexTemplateMetadata) -> upgradedMetadata.put(indexTemplateMetadata)
)) {
changed = true;
}
return changed ? upgradedMetadata.build() : metadata;
}
private static boolean applyPluginUpgraders(
Map existingData,
UnaryOperator
© 2015 - 2025 Weber Informatics LLC | Privacy Policy