org.opensearch.gateway.GatewayMetaState Maven / Gradle / Ivy
Show all versions of opensearch Show documentation
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.gateway;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
import org.opensearch.ExceptionsHelper;
import org.opensearch.LegacyESVersion;
import org.opensearch.OpenSearchException;
import org.opensearch.Version;
import org.opensearch.cluster.ClusterChangedEvent;
import org.opensearch.cluster.ClusterName;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.ClusterStateApplier;
import org.opensearch.cluster.coordination.CoordinationMetadata;
import org.opensearch.cluster.coordination.CoordinationState.PersistedState;
import org.opensearch.cluster.coordination.InMemoryPersistedState;
import org.opensearch.cluster.coordination.PersistedStateRegistry;
import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType;
import org.opensearch.cluster.coordination.PersistedStateStats;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.metadata.IndexTemplateMetadata;
import org.opensearch.cluster.metadata.Manifest;
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.metadata.MetadataIndexUpgradeService;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.collect.Tuple;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.concurrent.AbstractRunnable;
import org.opensearch.common.util.concurrent.OpenSearchExecutors;
import org.opensearch.common.util.concurrent.OpenSearchThreadPoolExecutor;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.env.NodeMetadata;
import org.opensearch.gateway.remote.ClusterMetadataManifest;
import org.opensearch.gateway.remote.RemoteClusterStateService;
import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo;
import org.opensearch.index.recovery.RemoteStoreRestoreService;
import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult;
import org.opensearch.node.Node;
import org.opensearch.plugins.MetadataUpgrader;
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.transport.TransportService;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import static org.opensearch.common.util.concurrent.OpenSearchExecutors.daemonThreadFactory;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled;
/**
* Loads (and maybe upgrades) cluster metadata at startup, and persistently stores cluster metadata for future restarts.
*
* When started, ensures that this version is compatible with the state stored on disk, and performs a state upgrade if necessary. Note that the state being
* loaded when constructing the instance of this class is not necessarily the state that will be used as {@link ClusterState#metadata()} because it might be
* stale or incomplete. Cluster-manager-eligible nodes must perform an election to find a complete and non-stale state, and cluster-manager-ineligible nodes
* receive the real cluster state from the elected cluster-manager after joining the cluster.
*
* @opensearch.internal
*/
public class GatewayMetaState implements Closeable {
/**
* Fake node ID for a voting configuration written by a cluster-manager-ineligible data node to indicate that its on-disk state is potentially stale (since
* it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is restarted as a
* cluster-manager-eligible node then it does not win any elections until it has received a fresh cluster state.
*/
public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG";
private PersistedStateRegistry persistedStateRegistry;
public PersistedState getPersistedState() {
final PersistedState persistedState = persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL);
assert persistedState != null : "not started";
return persistedState;
}
public Metadata getMetadata() {
return persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).getLastAcceptedState().metadata();
}
public void start(
Settings settings,
TransportService transportService,
ClusterService clusterService,
MetaStateService metaStateService,
MetadataIndexUpgradeService metadataIndexUpgradeService,
MetadataUpgrader metadataUpgrader,
PersistedClusterStateService persistedClusterStateService,
RemoteClusterStateService remoteClusterStateService,
PersistedStateRegistry persistedStateRegistry,
RemoteStoreRestoreService remoteStoreRestoreService
) {
assert this.persistedStateRegistry == null : "Persisted state registry should only be set once";
this.persistedStateRegistry = persistedStateRegistry;
if (DiscoveryNode.isClusterManagerNode(settings) || DiscoveryNode.isDataNode(settings)) {
try {
final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadBestOnDiskState();
Metadata metadata = onDiskState.metadata;
long lastAcceptedVersion = onDiskState.lastAcceptedVersion;
long currentTerm = onDiskState.currentTerm;
if (onDiskState.empty()) {
assert Version.CURRENT.major <= LegacyESVersion.V_7_0_0.major + 1
: "legacy metadata loader is not needed anymore from v9 onwards";
final Tuple legacyState = metaStateService.loadFullState();
if (legacyState.v1().isEmpty() == false) {
metadata = legacyState.v2();
lastAcceptedVersion = legacyState.v1().getClusterStateVersion();
currentTerm = legacyState.v1().getCurrentTerm();
}
}
PersistedState persistedState = null;
PersistedState remotePersistedState = null;
boolean success = false;
try {
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings))
.version(lastAcceptedVersion)
.metadata(metadata)
.build();
if (DiscoveryNode.isClusterManagerNode(settings) && isRemoteStoreClusterStateEnabled(settings)) {
// If the cluster UUID loaded from local is unknown (_na_) then fetch the best state from remote
// If there is no valid state on remote, continue with initial empty state
// If there is a valid state, then restore index metadata using this state
String lastKnownClusterUUID = ClusterState.UNKNOWN_UUID;
if (ClusterState.UNKNOWN_UUID.equals(clusterState.metadata().clusterUUID())) {
lastKnownClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote(
clusterState.getClusterName().value()
);
if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) {
// Load state from remote
final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore(
// Remote Metadata should always override local disk Metadata
// if local disk Metadata's cluster uuid is UNKNOWN_UUID
ClusterState.builder(clusterState).metadata(Metadata.EMPTY_METADATA).build(),
lastKnownClusterUUID,
false,
new String[] {}
);
clusterState = remoteRestoreResult.getClusterState();
}
}
remotePersistedState = new RemotePersistedState(remoteClusterStateService, lastKnownClusterUUID);
}
// Recovers Cluster and Index level blocks
clusterState = prepareInitialClusterState(
transportService,
clusterService,
ClusterState.builder(clusterState)
.metadata(upgradeMetadataForNode(clusterState.metadata(), metadataIndexUpgradeService, metadataUpgrader))
.build()
);
if (DiscoveryNode.isClusterManagerNode(settings)) {
persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState);
} else {
persistedState = new AsyncLucenePersistedState(
settings,
transportService.getThreadPool(),
new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState)
);
}
if (DiscoveryNode.isDataNode(settings)) {
metaStateService.unreferenceAll(); // unreference legacy files (only keep them for dangling indices functionality)
} else {
metaStateService.deleteAll(); // delete legacy files
}
// write legacy node metadata to prevent accidental downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(persistedClusterStateService.getNodeId(), Version.CURRENT),
persistedClusterStateService.getDataPaths()
);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(persistedStateRegistry);
}
}
persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, persistedState);
if (remotePersistedState != null) {
persistedStateRegistry.addPersistedState(PersistedStateType.REMOTE, remotePersistedState);
}
} catch (IOException e) {
throw new OpenSearchException("failed to load metadata", e);
}
} else {
final long currentTerm = 0L;
final ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build();
if (persistedClusterStateService.getDataPaths().length > 0) {
// write empty cluster state just so that we have a persistent node id. There is no need to write out global metadata with
// cluster uuid as coordinating-only nodes do not snap into a cluster as they carry no state
try (PersistedClusterStateService.Writer persistenceWriter = persistedClusterStateService.createWriter()) {
persistenceWriter.writeFullStateAndCommit(currentTerm, clusterState);
} catch (IOException e) {
throw new OpenSearchException("failed to load metadata", e);
}
try {
// delete legacy cluster state files
metaStateService.deleteAll();
// write legacy node metadata to prevent downgrades from spawning empty cluster state
NodeMetadata.FORMAT.writeAndCleanup(
new NodeMetadata(persistedClusterStateService.getNodeId(), Version.CURRENT),
persistedClusterStateService.getDataPaths()
);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(currentTerm, clusterState));
}
}
// exposed so it can be overridden by tests
ClusterState prepareInitialClusterState(TransportService transportService, ClusterService clusterService, ClusterState clusterState) {
assert clusterState.nodes().getLocalNode() == null : "prepareInitialClusterState must only be called once";
assert transportService.getLocalNode() != null : "transport service is not yet started";
return Function.identity()
.andThen(ClusterStateUpdaters::addStateNotRecoveredBlock)
.andThen(state -> ClusterStateUpdaters.setLocalNode(state, transportService.getLocalNode()))
.andThen(state -> ClusterStateUpdaters.upgradeAndArchiveUnknownOrInvalidSettings(state, clusterService.getClusterSettings()))
.andThen(ClusterStateUpdaters::recoverClusterBlocks)
.apply(clusterState);
}
// exposed so it can be overridden by tests
Metadata upgradeMetadataForNode(
Metadata metadata,
MetadataIndexUpgradeService metadataIndexUpgradeService,
MetadataUpgrader metadataUpgrader
) {
return upgradeMetadata(metadata, metadataIndexUpgradeService, metadataUpgrader);
}
/**
* This method calls {@link MetadataIndexUpgradeService} to makes sure that indices are compatible with the current version. The MetadataIndexUpgradeService
* might also update obsolete settings if needed.
*
* @return input metadata
if no upgrade is needed or an upgraded metadata
*/
static Metadata upgradeMetadata(
Metadata metadata,
MetadataIndexUpgradeService metadataIndexUpgradeService,
MetadataUpgrader metadataUpgrader
) {
// upgrade index meta data
boolean changed = false;
final Metadata.Builder upgradedMetadata = Metadata.builder(metadata);
for (IndexMetadata indexMetadata : metadata) {
IndexMetadata newMetadata = metadataIndexUpgradeService.upgradeIndexMetadata(
indexMetadata,
Version.CURRENT.minimumIndexCompatibilityVersion()
);
changed |= indexMetadata != newMetadata;
upgradedMetadata.put(newMetadata, false);
}
// upgrade current templates
if (applyPluginUpgraders(
metadata.getTemplates(),
metadataUpgrader.indexTemplateMetadataUpgraders,
upgradedMetadata::removeTemplate,
(s, indexTemplateMetadata) -> upgradedMetadata.put(indexTemplateMetadata)
)) {
changed = true;
}
return changed ? upgradedMetadata.build() : metadata;
}
private static boolean applyPluginUpgraders(
final Map existingData,
UnaryOperator