org.elasticsearch.gateway.GatewayMetaState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.gateway;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.coordination.CoordinationState.PersistedState;
import org.elasticsearch.cluster.coordination.InMemoryPersistedState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.IndexMetadataVerifier;
import org.elasticsearch.cluster.metadata.IndexTemplateMetadata;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.cluster.version.CompatibilityVersions;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.env.BuildVersion;
import org.elasticsearch.env.NodeMetadata;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.ClusterCoordinationPlugin;
import org.elasticsearch.plugins.MetadataUpgrader;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory;
/**
* Loads (and maybe upgrades) cluster metadata at startup, and persistently stores cluster metadata for future restarts.
*
* When started, ensures that this version is compatible with the state stored on disk, and performs a state upgrade if necessary. Note that
* the state being loaded when constructing the instance of this class is not necessarily the state that will be used as {@link
* ClusterState#metadata()} because it might be stale or incomplete. Master-eligible nodes must perform an election to find a complete and
* non-stale state, and master-ineligible nodes receive the real cluster state from the elected master after joining the cluster.
*/
public class GatewayMetaState implements Closeable {
/**
* Fake node ID for a voting configuration written by a master-ineligible data node to indicate that its on-disk state is potentially
* stale (since it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is
* restarted as a master-eligible node then it does not win any elections until it has received a fresh cluster state.
*/
public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG";
// Set by calling start()
private final SetOnce persistedState = new SetOnce<>();
public PersistedState getPersistedState() {
final PersistedState persistedState = this.persistedState.get();
assert persistedState != null : "not started";
return persistedState;
}
public Metadata getMetadata() {
return getPersistedState().getLastAcceptedState().metadata();
}
public void start(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
List clusterCoordinationPlugins,
CompatibilityVersions compatibilityVersions
) {
assert persistedState.get() == null : "should only start once, but already have " + persistedState.get();
try {
persistedState.set(
createPersistedState(
settings,
transportService,
clusterService,
metaStateService,
indexMetadataVerifier,
metadataUpgrader,
persistedClusterStateService,
clusterCoordinationPlugins,
compatibilityVersions
)
);
} catch (IOException e) {
throw new ElasticsearchException("failed to load metadata", e);
}
}
private PersistedState createPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
List clusterCoordinationPlugins,
CompatibilityVersions compatibilityVersions
) throws IOException {
final var persistedStateFactories = clusterCoordinationPlugins.stream()
.map(ClusterCoordinationPlugin::getPersistedStateFactory)
.flatMap(Optional::stream)
.toList();
if (persistedStateFactories.size() > 1) {
throw new IllegalStateException("multiple persisted-state factories found: " + persistedStateFactories);
}
if (persistedStateFactories.size() == 1) {
return persistedStateFactories.get(0).createPersistedState(settings, transportService, persistedClusterStateService);
}
if (DiscoveryNode.isMasterNode(settings) || DiscoveryNode.canContainData(settings)) {
return createOnDiskPersistedState(
settings,
transportService,
clusterService,
metaStateService,
indexMetadataVerifier,
metadataUpgrader,
persistedClusterStateService,
compatibilityVersions
);
}
return createInMemoryPersistedState(
settings,
transportService,
clusterService,
metaStateService,
persistedClusterStateService,
compatibilityVersions
);
}
private PersistedState createOnDiskPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
IndexMetadataVerifier indexMetadataVerifier,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
CompatibilityVersions compatibilityVersions
) throws IOException {
final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadBestOnDiskState();
Metadata metadata = onDiskState.metadata;
long lastAcceptedVersion = onDiskState.lastAcceptedVersion;
long currentTerm = onDiskState.currentTerm;
if (onDiskState.empty()) {
@UpdateForV9 // legacy metadata loader is not needed anymore from v9 onwards
final Tuple legacyState = metaStateService.loadFullState();
if (legacyState.v1().isEmpty() == false) {
metadata = legacyState.v2();
lastAcceptedVersion = legacyState.v1().clusterStateVersion();
currentTerm = legacyState.v1().currentTerm();
}
}
PersistedState persistedState = null;
boolean success = false;
try {
final ClusterState clusterState = prepareInitialClusterState(
transportService,
clusterService,
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings))
.version(lastAcceptedVersion)
.metadata(upgradeMetadataForNode(metadata, indexMetadataVerifier, metadataUpgrader))
.build(),
compatibilityVersions
);
if (DiscoveryNode.isMasterNode(settings)) {
persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState);
} else {
persistedState = new AsyncPersistedState(
settings,
transportService.getThreadPool(),
new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState)
);
}
if (DiscoveryNode.canContainData(settings)) {
metaStateService.unreferenceAll(); // unreference legacy files (only keep them for dangling indices functionality)
} else {
metaStateService.deleteAll(); // delete legacy files
}
// write legacy node metadata to prevent accidental downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(
persistedClusterStateService.getNodeId(),
BuildVersion.current(),
clusterState.metadata().oldestIndexVersion()
),
persistedClusterStateService.getDataPaths()
);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(persistedState);
}
}
return persistedState;
}
private PersistedState createInMemoryPersistedState(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
PersistedClusterStateService persistedClusterStateService,
CompatibilityVersions compatibilityVersions
) throws IOException {
final long currentTerm = 0L;
final ClusterState clusterState = prepareInitialClusterState(
transportService,
clusterService,
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build(),
compatibilityVersions
);
if (persistedClusterStateService.getDataPaths().length > 0) {
// write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with
// cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state
try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) {
persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState);
}
// delete legacy cluster state files
metaStateService.deleteAll();
// write legacy node metadata to prevent downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(
persistedClusterStateService.getNodeId(),
BuildVersion.current(),
clusterState.metadata().oldestIndexVersion()
),
persistedClusterStateService.getDataPaths()
);
}
return new InMemoryPersistedState(currentTerm, clusterState);
}
// exposed so it can be overridden by tests
ClusterState prepareInitialClusterState(
TransportService transportService,
ClusterService clusterService,
ClusterState clusterState,
CompatibilityVersions compatibilityVersions
) {
assert clusterState.nodes().getLocalNode() == null : "prepareInitialClusterState must only be called once";
assert transportService.getLocalNode() != null : "transport service is not yet started";
return Function.identity()
.andThen(ClusterStateUpdaters::addStateNotRecoveredBlock)
.andThen(state -> ClusterStateUpdaters.setLocalNode(state, transportService.getLocalNode(), compatibilityVersions))
.andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings()))
.andThen(ClusterStateUpdaters::recoverClusterBlocks)
.apply(clusterState);
}
// exposed so it can be overridden by tests
Metadata upgradeMetadataForNode(Metadata metadata, IndexMetadataVerifier indexMetadataVerifier, MetadataUpgrader metadataUpgrader) {
return upgradeMetadata(metadata, indexMetadataVerifier, metadataUpgrader);
}
/**
* This method uses {@link IndexMetadataVerifier} to ensure that indices are compatible
* with the current version. It also calls into plugins to update their index templates.
*
* @return input metadata
if no upgrade is needed or an upgraded metadata
*/
static Metadata upgradeMetadata(Metadata metadata, IndexMetadataVerifier indexMetadataVerifier, MetadataUpgrader metadataUpgrader) {
boolean changed = false;
final Metadata.Builder upgradedMetadata = Metadata.builder(metadata);
for (IndexMetadata indexMetadata : metadata) {
IndexMetadata newMetadata = indexMetadataVerifier.verifyIndexMetadata(indexMetadata, IndexVersions.MINIMUM_COMPATIBLE);
changed |= indexMetadata != newMetadata;
upgradedMetadata.put(newMetadata, false);
}
// upgrade current templates
if (applyPluginUpgraders(
metadata.getTemplates(),
metadataUpgrader.indexTemplateMetadataUpgraders,
upgradedMetadata::removeTemplate,
(s, indexTemplateMetadata) -> upgradedMetadata.put(indexTemplateMetadata)
)) {
changed = true;
}
return changed ? upgradedMetadata.build() : metadata;
}
private static boolean applyPluginUpgraders(
Map existingData,
UnaryOperator