org.opensearch.gateway.remote.RemoteClusterStateService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
package org.opensearch.gateway.remote;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.Version;
import org.opensearch.action.LatchedActionListener;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.Nullable;
import org.opensearch.common.blobstore.BlobContainer;
import org.opensearch.common.blobstore.BlobMetadata;
import org.opensearch.common.blobstore.BlobPath;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.index.Index;
import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata;
import org.opensearch.index.remote.RemoteStoreUtils;
import org.opensearch.index.translog.transfer.BlobStoreTransferService;
import org.opensearch.node.Node;
import org.opensearch.node.remotestore.RemoteStoreNodeAttribute;
import org.opensearch.repositories.RepositoriesService;
import org.opensearch.repositories.Repository;
import org.opensearch.repositories.blobstore.BlobStoreRepository;
import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat;
import org.opensearch.threadpool.ThreadPool;
import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD;
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled;
/**
* A Service which provides APIs to upload and download cluster metadata from remote store.
*
* @opensearch.internal
*/
public class RemoteClusterStateService implements Closeable {
public static final String METADATA_NAME_FORMAT = "%s.dat";
public static final String METADATA_MANIFEST_NAME_FORMAT = "%s";
public static final int RETAINED_MANIFESTS = 10;
public static final String DELIMITER = "__";
private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class);
public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000;
public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>(
"index-metadata",
METADATA_NAME_FORMAT,
IndexMetadata::fromXContent
);
public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>(
"cluster-metadata-manifest",
METADATA_MANIFEST_NAME_FORMAT,
ClusterMetadataManifest::fromXContent
);
/**
* Used to specify if cluster state metadata should be published to remote store
*/
public static final Setting REMOTE_CLUSTER_STATE_ENABLED_SETTING = Setting.boolSetting(
"cluster.remote_store.state.enabled",
false,
Property.NodeScope,
Property.Final
);
public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state";
public static final String INDEX_PATH_TOKEN = "index";
public static final String MANIFEST_PATH_TOKEN = "manifest";
public static final String MANIFEST_FILE_PREFIX = "manifest";
public static final String INDEX_METADATA_FILE_PREFIX = "metadata";
private final String nodeId;
private final Supplier repositoriesService;
private final Settings settings;
private final LongSupplier relativeTimeNanosSupplier;
private final ThreadPool threadpool;
private BlobStoreRepository blobStoreRepository;
private BlobStoreTransferService blobStoreTransferService;
private volatile TimeValue slowWriteLoggingThreshold;
private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false);
public RemoteClusterStateService(
String nodeId,
Supplier repositoriesService,
Settings settings,
ClusterSettings clusterSettings,
LongSupplier relativeTimeNanosSupplier,
ThreadPool threadPool
) {
assert isRemoteStoreClusterStateEnabled(settings) : "Remote cluster state is not enabled";
this.nodeId = nodeId;
this.repositoriesService = repositoriesService;
this.settings = settings;
this.relativeTimeNanosSupplier = relativeTimeNanosSupplier;
this.threadpool = threadPool;
this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD);
clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold);
}
private BlobStoreTransferService getBlobStoreTransferService() {
if (blobStoreTransferService == null) {
blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadpool);
}
return blobStoreTransferService;
}
/**
* This method uploads entire cluster state metadata to the configured blob store. For now only index metadata upload is supported. This method should be
* invoked by the elected cluster manager when the remote cluster state is enabled.
*
* @return A manifest object which contains the details of uploaded entity metadata.
*/
@Nullable
public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException {
final long startTimeNanos = relativeTimeNanosSupplier.getAsLong();
if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) {
logger.error("Local node is not elected cluster manager. Exiting");
return null;
}
// any validations before/after upload ?
final List allUploadedIndexMetadata = writeIndexMetadataParallel(
clusterState,
new ArrayList<>(clusterState.metadata().indices().values())
);
final ClusterMetadataManifest manifest = uploadManifest(clusterState, allUploadedIndexMetadata, previousClusterUUID, false);
final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos);
if (durationMillis >= slowWriteLoggingThreshold.getMillis()) {
logger.warn(
"writing cluster state took [{}ms] which is above the warn threshold of [{}]; " + "wrote full state with [{}] indices",
durationMillis,
slowWriteLoggingThreshold,
allUploadedIndexMetadata.size()
);
} else {
// todo change to debug
logger.info(
"writing cluster state took [{}ms]; " + "wrote full state with [{}] indices",
durationMillis,
allUploadedIndexMetadata.size()
);
}
return manifest;
}
/**
* This method uploads the diff between the previous cluster state and the current cluster state. The previous manifest file is needed to create the new
* manifest. The new manifest file is created by using the unchanged metadata from the previous manifest and the new metadata changes from the current
* cluster state.
*
* @return The uploaded ClusterMetadataManifest file
*/
@Nullable
public ClusterMetadataManifest writeIncrementalMetadata(
ClusterState previousClusterState,
ClusterState clusterState,
ClusterMetadataManifest previousManifest
) throws IOException {
final long startTimeNanos = relativeTimeNanosSupplier.getAsLong();
if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) {
logger.error("Local node is not elected cluster manager. Exiting");
return null;
}
assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term();
final Map previousStateIndexMetadataVersionByName = new HashMap<>();
for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) {
previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion());
}
int numIndicesUpdated = 0;
int numIndicesUnchanged = 0;
final Map allUploadedIndexMetadata = previousManifest.getIndices()
.stream()
.collect(Collectors.toMap(UploadedIndexMetadata::getIndexName, Function.identity()));
List toUpload = new ArrayList<>();
for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) {
final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName());
if (previousVersion == null || indexMetadata.getVersion() != previousVersion) {
logger.trace(
"updating metadata for [{}], changing version from [{}] to [{}]",
indexMetadata.getIndex(),
previousVersion,
indexMetadata.getVersion()
);
numIndicesUpdated++;
toUpload.add(indexMetadata);
} else {
numIndicesUnchanged++;
}
previousStateIndexMetadataVersionByName.remove(indexMetadata.getIndex().getName());
}
List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload);
uploadedIndexMetadataList.forEach(
uploadedIndexMetadata -> allUploadedIndexMetadata.put(uploadedIndexMetadata.getIndexName(), uploadedIndexMetadata)
);
for (String removedIndexName : previousStateIndexMetadataVersionByName.keySet()) {
allUploadedIndexMetadata.remove(removedIndexName);
}
final ClusterMetadataManifest manifest = uploadManifest(
clusterState,
new ArrayList<>(allUploadedIndexMetadata.values()),
previousManifest.getPreviousClusterUUID(),
false
);
deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS);
final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos);
if (durationMillis >= slowWriteLoggingThreshold.getMillis()) {
logger.warn(
"writing cluster state took [{}ms] which is above the warn threshold of [{}]; "
+ "wrote metadata for [{}] indices and skipped [{}] unchanged indices",
durationMillis,
slowWriteLoggingThreshold,
numIndicesUpdated,
numIndicesUnchanged
);
} else {
logger.trace(
"writing cluster state took [{}ms]; " + "wrote metadata for [{}] indices and skipped [{}] unchanged indices",
durationMillis,
numIndicesUpdated,
numIndicesUnchanged
);
}
return manifest;
}
/**
* Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return.
*
* @param clusterState current ClusterState
* @param toUpload list of IndexMetadata to upload
* @return {@code List} list of IndexMetadata uploaded to remote
*/
private List writeIndexMetadataParallel(ClusterState clusterState, List toUpload)
throws IOException {
List exceptionList = Collections.synchronizedList(new ArrayList<>(toUpload.size()));
final CountDownLatch latch = new CountDownLatch(toUpload.size());
List result = new ArrayList<>(toUpload.size());
LatchedActionListener latchedActionListener = new LatchedActionListener<>(
ActionListener.wrap((UploadedIndexMetadata uploadedIndexMetadata) -> {
logger.trace(
String.format(Locale.ROOT, "IndexMetadata uploaded successfully for %s", uploadedIndexMetadata.getIndexName())
);
result.add(uploadedIndexMetadata);
}, ex -> {
assert ex instanceof IndexMetadataTransferException;
logger.error(
() -> new ParameterizedMessage("Exception during transfer of IndexMetadata to Remote {}", ex.getMessage()),
ex
);
exceptionList.add(ex);
}),
latch
);
for (IndexMetadata indexMetadata : toUpload) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX/metadata_4_1690947200
writeIndexMetadataAsync(clusterState, indexMetadata, latchedActionListener);
}
try {
if (latch.await(INDEX_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) {
IndexMetadataTransferException ex = new IndexMetadataTransferException(
String.format(
Locale.ROOT,
"Timed out waiting for transfer of index metadata to complete - %s",
toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(""))
)
);
exceptionList.forEach(ex::addSuppressed);
throw ex;
}
} catch (InterruptedException ex) {
exceptionList.forEach(ex::addSuppressed);
IndexMetadataTransferException exception = new IndexMetadataTransferException(
String.format(
Locale.ROOT,
"Timed out waiting for transfer of index metadata to complete - %s",
toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(""))
),
ex
);
Thread.currentThread().interrupt();
throw exception;
}
if (exceptionList.size() > 0) {
IndexMetadataTransferException exception = new IndexMetadataTransferException(
String.format(
Locale.ROOT,
"Exception during transfer of IndexMetadata to Remote %s",
toUpload.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(""))
)
);
exceptionList.forEach(exception::addSuppressed);
throw exception;
}
return result;
}
/**
* Allows async Upload of IndexMetadata to remote
*
* @param clusterState current ClusterState
* @param indexMetadata {@link IndexMetadata} to upload
* @param latchedActionListener listener to respond back on after upload finishes
*/
private void writeIndexMetadataAsync(
ClusterState clusterState,
IndexMetadata indexMetadata,
LatchedActionListener latchedActionListener
) throws IOException {
final BlobContainer indexMetadataContainer = indexMetadataContainer(
clusterState.getClusterName().value(),
clusterState.metadata().clusterUUID(),
indexMetadata.getIndexUUID()
);
final String indexMetadataFilename = indexMetadataFileName(indexMetadata);
ActionListener completionListener = ActionListener.wrap(
resp -> latchedActionListener.onResponse(
new UploadedIndexMetadata(
indexMetadata.getIndex().getName(),
indexMetadata.getIndexUUID(),
indexMetadataContainer.path().buildAsString() + indexMetadataFilename
)
),
ex -> latchedActionListener.onFailure(new IndexMetadataTransferException(indexMetadata.getIndex().toString(), ex))
);
INDEX_METADATA_FORMAT.writeAsync(
indexMetadata,
indexMetadataContainer,
indexMetadataFilename,
blobStoreRepository.getCompressor(),
completionListener
);
}
@Nullable
public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest)
throws IOException {
assert clusterState != null : "Last accepted cluster state is not set";
if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) {
logger.error("Local node is not elected cluster manager. Exiting");
return null;
}
assert previousManifest != null : "Last cluster metadata manifest is not set";
ClusterMetadataManifest committedManifest = uploadManifest(
clusterState,
previousManifest.getIndices(),
previousManifest.getPreviousClusterUUID(),
true
);
deleteStaleClusterUUIDs(clusterState, committedManifest);
return committedManifest;
}
@Override
public void close() throws IOException {
if (blobStoreRepository != null) {
IOUtils.close(blobStoreRepository);
}
}
public void start() {
assert isRemoteStoreClusterStateEnabled(settings) == true : "Remote cluster state is not enabled";
final String remoteStoreRepo = settings.get(
Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY
);
assert remoteStoreRepo != null : "Remote Cluster State repository is not configured";
final Repository repository = repositoriesService.get().repository(remoteStoreRepo);
assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository";
blobStoreRepository = (BlobStoreRepository) repository;
}
private ClusterMetadataManifest uploadManifest(
ClusterState clusterState,
List uploadedIndexMetadata,
String previousClusterUUID,
boolean committed
) throws IOException {
synchronized (this) {
final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version());
final ClusterMetadataManifest manifest = new ClusterMetadataManifest(
clusterState.term(),
clusterState.getVersion(),
clusterState.metadata().clusterUUID(),
clusterState.stateUUID(),
Version.CURRENT,
nodeId,
committed,
uploadedIndexMetadata,
previousClusterUUID,
clusterState.metadata().clusterUUIDCommitted()
);
writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName);
return manifest;
}
}
private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName)
throws IOException {
final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID);
CLUSTER_METADATA_MANIFEST_FORMAT.write(uploadManifest, metadataManifestContainer, fileName, blobStoreRepository.getCompressor());
}
private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) {
final Optional latestManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID);
if (!latestManifest.isPresent()) {
final String previousClusterUUID = getLastKnownUUIDFromRemote(clusterName);
assert !clusterUUID.equals(previousClusterUUID) : "Last cluster UUID is same current cluster UUID";
return previousClusterUUID;
}
return latestManifest.get().getPreviousClusterUUID();
}
private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX
return blobStoreRepository.blobStore()
.blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(INDEX_PATH_TOKEN).add(indexUUID));
}
private BlobContainer manifestContainer(String clusterName, String clusterUUID) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest
return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID));
}
private BlobPath getCusterMetadataBasePath(String clusterName, String clusterUUID) {
return blobStoreRepository.basePath().add(encodeString(clusterName)).add(CLUSTER_STATE_PATH_TOKEN).add(clusterUUID);
}
private BlobContainer clusterUUIDContainer(String clusterName) {
return blobStoreRepository.blobStore()
.blobContainer(
blobStoreRepository.basePath()
.add(Base64.getUrlEncoder().withoutPadding().encodeToString(clusterName.getBytes(StandardCharsets.UTF_8)))
.add(CLUSTER_STATE_PATH_TOKEN)
);
}
private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) {
this.slowWriteLoggingThreshold = slowWriteLoggingThreshold;
}
private static String getManifestFileName(long term, long version) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637_456536447
return String.join(DELIMITER, getManifestFileNamePrefix(term, version), RemoteStoreUtils.invertLong(System.currentTimeMillis()));
}
private static String getManifestFileNamePrefix(long term, long version) {
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637
return String.join(DELIMITER, MANIFEST_PATH_TOKEN, RemoteStoreUtils.invertLong(term), RemoteStoreUtils.invertLong(version));
}
private static String indexMetadataFileName(IndexMetadata indexMetadata) {
return String.join(
DELIMITER,
INDEX_METADATA_FILE_PREFIX,
String.valueOf(indexMetadata.getVersion()),
String.valueOf(System.currentTimeMillis())
);
}
private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) {
return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN);
}
/**
* Fetch latest index metadata from remote cluster state
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @return {@code Map} latest IndexUUID to IndexMetadata map
*/
public Map getLatestIndexMetadata(String clusterName, String clusterUUID) throws IOException {
start();
Map remoteIndexMetadata = new HashMap<>();
Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID);
if (!clusterMetadataManifest.isPresent()) {
throw new IllegalStateException("Latest index metadata is not present for the provided clusterUUID");
}
assert Objects.equals(clusterUUID, clusterMetadataManifest.get().getClusterUUID())
: "Corrupt ClusterMetadataManifest found. Cluster UUID mismatch.";
for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.get().getIndices()) {
IndexMetadata indexMetadata = getIndexMetadata(clusterName, clusterUUID, uploadedIndexMetadata);
remoteIndexMetadata.put(uploadedIndexMetadata.getIndexUUID(), indexMetadata);
}
return remoteIndexMetadata;
}
/**
* Fetch index metadata from remote cluster state
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @param uploadedIndexMetadata {@link UploadedIndexMetadata} contains details about remote location of index metadata
* @return {@link IndexMetadata}
*/
private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, UploadedIndexMetadata uploadedIndexMetadata) {
try {
String[] splitPath = uploadedIndexMetadata.getUploadedFilename().split("/");
return INDEX_METADATA_FORMAT.read(
indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()),
splitPath[splitPath.length - 1],
blobStoreRepository.getNamedXContentRegistry()
);
} catch (IOException e) {
throw new IllegalStateException(
String.format(Locale.ROOT, "Error while downloading IndexMetadata - %s", uploadedIndexMetadata.getUploadedFilename()),
e
);
}
}
/**
* Fetch latest ClusterMetadataManifest from remote state store
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @return ClusterMetadataManifest
*/
public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) {
Optional latestManifestFileName = getLatestManifestFileName(clusterName, clusterUUID);
if (latestManifestFileName.isPresent()) {
return Optional.of(fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, latestManifestFileName.get()));
}
return Optional.empty();
}
/**
* Fetch the previous cluster UUIDs from remote state store and return the most recent valid cluster UUID
*
* @param clusterName The cluster name for which previous cluster UUID is to be fetched
* @return Last valid cluster UUID
*/
public String getLastKnownUUIDFromRemote(String clusterName) {
try {
Set clusterUUIDs = getAllClusterUUIDs(clusterName);
Map latestManifests = getLatestManifestForAllClusterUUIDs(clusterName, clusterUUIDs);
List validChain = createClusterChain(latestManifests, clusterName);
if (validChain.isEmpty()) {
return ClusterState.UNKNOWN_UUID;
}
return validChain.get(0);
} catch (IOException e) {
throw new IllegalStateException(
String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName)
);
}
}
private Set getAllClusterUUIDs(String clusterName) throws IOException {
Map clusterUUIDMetadata = clusterUUIDContainer(clusterName).children();
if (clusterUUIDMetadata == null) {
return Collections.emptySet();
}
return Collections.unmodifiableSet(clusterUUIDMetadata.keySet());
}
private Map getLatestManifestForAllClusterUUIDs(String clusterName, Set clusterUUIDs) {
Map manifestsByClusterUUID = new HashMap<>();
for (String clusterUUID : clusterUUIDs) {
try {
Optional manifest = getLatestClusterMetadataManifest(clusterName, clusterUUID);
manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest));
} catch (Exception e) {
throw new IllegalStateException(
String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID)
);
}
}
return manifestsByClusterUUID;
}
/**
* This method creates a valid cluster UUID chain.
*
* @param manifestsByClusterUUID Map of latest ClusterMetadataManifest for every cluster UUID
* @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain
*/
private List createClusterChain(final Map manifestsByClusterUUID, final String clusterName) {
final Map clusterUUIDGraph = manifestsByClusterUUID.values()
.stream()
.collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID));
final List validClusterUUIDs = manifestsByClusterUUID.values()
.stream()
.filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID()))
.map(ClusterMetadataManifest::getClusterUUID)
.collect(Collectors.toList());
if (validClusterUUIDs.isEmpty()) {
logger.info("There is no valid previous cluster UUID");
return Collections.emptyList();
}
if (validClusterUUIDs.size() > 1) {
// If the valid cluster UUIDs are more that 1, it means there was some race condition where
// more then 2 cluster manager nodes tried to become active cluster manager and published
// 2 cluster UUIDs which followed the same previous UUID.
final Map manifestsByClusterUUIDTrimmed = trimClusterUUIDs(
manifestsByClusterUUID,
validClusterUUIDs,
clusterName
);
if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) {
throw new IllegalStateException(
String.format(
Locale.ROOT,
"The system has ended into multiple valid cluster states in the remote store. "
+ "Please check their latest manifest to decide which one you want to keep. Valid Cluster UUIDs: - %s",
validClusterUUIDs
)
);
}
return createClusterChain(manifestsByClusterUUIDTrimmed, clusterName);
}
final List validChain = new ArrayList<>();
String currentUUID = validClusterUUIDs.get(0);
while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) {
validChain.add(currentUUID);
// Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph
currentUUID = clusterUUIDGraph.get(currentUUID);
}
return validChain;
}
/**
* This method take a map of manifests for different cluster UUIDs and removes the
* manifest of a cluster UUID if the latest metadata for that cluster UUID is equivalent
* to the latest metadata of its previous UUID.
* @return Trimmed map of manifests
*/
private Map trimClusterUUIDs(
final Map latestManifestsByClusterUUID,
final List validClusterUUIDs,
final String clusterName
) {
final Map trimmedUUIDs = new HashMap<>(latestManifestsByClusterUUID);
for (String clusterUUID : validClusterUUIDs) {
ClusterMetadataManifest currentManifest = trimmedUUIDs.get(clusterUUID);
// Here we compare the manifest of current UUID to that of previous UUID
// In case currentUUID's latest manifest is same as previous UUIDs latest manifest,
// that means it was restored from previousUUID and no IndexMetadata update was performed on it.
if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) {
if (currentManifest.getIndices().isEmpty()) {
trimmedUUIDs.remove(clusterUUID);
}
} else {
ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID());
if (isMetadataEqual(currentManifest, previousManifest, clusterName)) {
trimmedUUIDs.remove(clusterUUID);
}
}
}
return trimmedUUIDs;
}
private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) {
// todo clusterName can be set as final in the constructor
if (first.getIndices().size() != second.getIndices().size()) {
return false;
}
final Map secondIndices = second.getIndices()
.stream()
.collect(Collectors.toMap(md -> md.getIndexName(), Function.identity()));
for (UploadedIndexMetadata uploadedIndexMetadata : first.getIndices()) {
final IndexMetadata firstIndexMetadata = getIndexMetadata(clusterName, first.getClusterUUID(), uploadedIndexMetadata);
final UploadedIndexMetadata secondUploadedIndexMetadata = secondIndices.get(uploadedIndexMetadata.getIndexName());
if (secondUploadedIndexMetadata == null) {
return false;
}
final IndexMetadata secondIndexMetadata = getIndexMetadata(clusterName, second.getClusterUUID(), secondUploadedIndexMetadata);
if (firstIndexMetadata.equals(secondIndexMetadata) == false) {
return false;
}
}
return true;
}
private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) {
return !manifest.isClusterUUIDCommitted();
}
/**
* Fetch ClusterMetadataManifest files from remote state store in order
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @param limit max no of files to fetch
* @return all manifest file names
*/
private List getManifestFileNames(String clusterName, String clusterUUID, int limit) throws IllegalStateException {
try {
/**
* {@link BlobContainer#listBlobsByPrefixInSortedOrder} will list the latest manifest file first
* as the manifest file name generated via {@link RemoteClusterStateService#getManifestFileName} ensures
* when sorted in LEXICOGRAPHIC order the latest uploaded manifest file comes on top.
*/
return manifestContainer(clusterName, clusterUUID).listBlobsByPrefixInSortedOrder(
MANIFEST_FILE_PREFIX + DELIMITER,
limit,
BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC
);
} catch (IOException e) {
throw new IllegalStateException("Error while fetching latest manifest file for remote cluster state", e);
}
}
/**
* Fetch latest ClusterMetadataManifest file from remote state store
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @return latest ClusterMetadataManifest filename
*/
private Optional getLatestManifestFileName(String clusterName, String clusterUUID) throws IllegalStateException {
List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, 1);
if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) {
return Optional.of(manifestFilesMetadata.get(0).name());
}
logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID);
return Optional.empty();
}
/**
* Fetch ClusterMetadataManifest from remote state store
*
* @param clusterUUID uuid of cluster state to refer to in remote
* @param clusterName name of the cluster
* @return ClusterMetadataManifest
*/
private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename)
throws IllegalStateException {
try {
return RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.read(
manifestContainer(clusterName, clusterUUID),
filename,
blobStoreRepository.getNamedXContentRegistry()
);
} catch (IOException e) {
throw new IllegalStateException(String.format(Locale.ROOT, "Error while downloading cluster metadata - %s", filename), e);
}
}
public static String encodeString(String content) {
return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8));
}
/**
* Exception for IndexMetadata transfer failures to remote
*/
static class IndexMetadataTransferException extends RuntimeException {
public IndexMetadataTransferException(String errorDesc) {
super(errorDesc);
}
public IndexMetadataTransferException(String errorDesc, Throwable cause) {
super(errorDesc, cause);
}
}
/**
* Purges all remote cluster state against provided cluster UUIDs
*
* @param clusterName name of the cluster
* @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged
*/
private void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) {
clusterUUIDs.forEach(clusterUUID -> {
getBlobStoreTransferService().deleteAsync(
ThreadPool.Names.REMOTE_PURGE,
getCusterMetadataBasePath(clusterName, clusterUUID),
new ActionListener<>() {
@Override
public void onResponse(Void unused) {
logger.info("Deleted all remote cluster metadata for cluster UUID - {}", clusterUUID);
}
@Override
public void onFailure(Exception e) {
logger.error(
new ParameterizedMessage(
"Exception occurred while deleting all remote cluster metadata for cluster UUID {}",
clusterUUID
),
e
);
}
}
);
});
}
/**
* Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests
*
* @param clusterName name of the cluster
* @param clusterUUID uuid of cluster state to refer to in remote
* @param manifestsToRetain no of latest manifest files to keep in remote
*/
private void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) {
if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) {
logger.info("Delete stale cluster metadata task is already in progress.");
return;
}
try {
getBlobStoreTransferService().listAllInSortedOrderAsync(
ThreadPool.Names.REMOTE_PURGE,
getManifestFolderPath(clusterName, clusterUUID),
"manifest",
Integer.MAX_VALUE,
new ActionListener<>() {
@Override
public void onResponse(List blobMetadata) {
if (blobMetadata.size() > manifestsToRetain) {
deleteClusterMetadata(
clusterName,
clusterUUID,
blobMetadata.subList(0, manifestsToRetain - 1),
blobMetadata.subList(manifestsToRetain - 1, blobMetadata.size())
);
}
deleteStaleMetadataRunning.set(false);
}
@Override
public void onFailure(Exception e) {
logger.error(
new ParameterizedMessage(
"Exception occurred while deleting Remote Cluster Metadata for clusterUUIDs {}",
clusterUUID
)
);
deleteStaleMetadataRunning.set(false);
}
}
);
} finally {
deleteStaleMetadataRunning.set(false);
}
}
private void deleteClusterMetadata(
String clusterName,
String clusterUUID,
List activeManifestBlobMetadata,
List staleManifestBlobMetadata
) {
try {
Set filesToKeep = new HashSet<>();
Set staleManifestPaths = new HashSet<>();
Set staleIndexMetadataPaths = new HashSet<>();
activeManifestBlobMetadata.forEach(blobMetadata -> {
ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest(
clusterName,
clusterUUID,
blobMetadata.name()
);
clusterMetadataManifest.getIndices()
.forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename()));
});
staleManifestBlobMetadata.forEach(blobMetadata -> {
ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest(
clusterName,
clusterUUID,
blobMetadata.name()
);
staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name());
clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> {
if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) {
staleIndexMetadataPaths.add(
new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString()
+ uploadedIndexMetadata.getUploadedFilename()
+ ".dat"
);
}
});
});
if (staleManifestPaths.isEmpty()) {
logger.info("No stale Remote Cluster Metadata files found");
return;
}
deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths));
deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths));
} catch (IllegalStateException e) {
logger.error("Error while fetching Remote Cluster Metadata manifests", e);
} catch (IOException e) {
logger.error("Error while deleting stale Remote Cluster Metadata files", e);
} catch (Exception e) {
logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e);
}
}
private void deleteStalePaths(String clusterName, String clusterUUID, List stalePaths) throws IOException {
logger.debug(String.format(Locale.ROOT, "Deleting stale files from remote - %s", stalePaths));
getBlobStoreTransferService().deleteBlobs(getCusterMetadataBasePath(clusterName, clusterUUID), stalePaths);
}
/**
* Purges all remote cluster state against provided cluster UUIDs
* @param clusterState current state of the cluster
* @param committedManifest last committed ClusterMetadataManifest
*/
public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataManifest committedManifest) {
threadpool.executor(ThreadPool.Names.REMOTE_PURGE).execute(() -> {
String clusterName = clusterState.getClusterName().value();
logger.info("Deleting stale cluster UUIDs data from remote [{}]", clusterName);
Set allClustersUUIDsInRemote;
try {
allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value()));
} catch (IOException e) {
logger.info(String.format(Locale.ROOT, "Error while fetching all cluster UUIDs for [%s]", clusterName));
return;
}
// Retain last 2 cluster uuids data
allClustersUUIDsInRemote.remove(committedManifest.getClusterUUID());
allClustersUUIDsInRemote.remove(committedManifest.getPreviousClusterUUID());
deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote));
});
}
}