All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.gateway.AsyncShardFetch Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.gateway;

import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchTimeoutException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.FailedNodeException;
import org.elasticsearch.action.support.nodes.BaseNodeResponse;
import org.elasticsearch.action.support.nodes.BaseNodesResponse;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.ReceiveTimeoutTransportException;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.stream.Collectors;

import static java.util.Collections.emptySet;
import static org.elasticsearch.core.Strings.format;

/**
 * Allows to asynchronously fetch shard related data from other nodes for allocation, without blocking
 * the cluster update thread.
 * 

* The async fetch logic maintains a map of which nodes are being fetched from in an async manner, * and once the results are back, it makes sure to schedule a reroute to make sure those results will * be taken into account. */ public abstract class AsyncShardFetch implements Releasable { protected final Logger logger; protected final String type; protected final ShardId shardId; protected final String customDataPath; private final Map> cache; private final Set nodesToIgnore = new HashSet<>(); private final AtomicLong round = new AtomicLong(); private boolean closed; private volatile int fetchingCount; @SuppressWarnings("unchecked") protected AsyncShardFetch(Logger logger, String type, ShardId shardId, String customDataPath, int expectedSize) { this.logger = logger; this.type = type; this.shardId = Objects.requireNonNull(shardId); this.customDataPath = Objects.requireNonNull(customDataPath); this.fetchingCount = 0; this.cache = Maps.newHashMapWithExpectedSize(expectedSize); } @Override public synchronized void close() { this.closed = true; } /** * Returns the number of async fetches that are currently ongoing. */ public int getNumberOfInFlightFetches() { return fetchingCount; } /** * Fetches the data for the relevant shard. If there any ongoing async fetches going on, or new ones have * been initiated by this call, the result will have no data. *

* The ignoreNodes are nodes that are supposed to be ignored for this round, since fetching is async, we need * to keep them around and make sure we add them back when all the responses are fetched and returned. */ public synchronized FetchResult fetchData(DiscoveryNodes nodes, Set ignoreNodes) { if (closed) { throw new IllegalStateException(shardId + ": can't fetch data on closed async fetch"); } nodesToIgnore.addAll(ignoreNodes); fillShardCacheWithDataNodes(cache, nodes); List> nodesToFetch = findNodesToFetch(cache); if (nodesToFetch.isEmpty() == false) { // mark all node as fetching and go ahead and async fetch them // use a unique round id to detect stale responses in processAsyncFetch final long fetchingRound = round.incrementAndGet(); for (NodeEntry nodeEntry : nodesToFetch) { nodeEntry.markAsFetching(fetchingRound); fetchingCount++; } DiscoveryNode[] discoNodesToFetch = nodesToFetch.stream() .map(NodeEntry::getNodeId) .map(nodes::get) .toArray(DiscoveryNode[]::new); asyncFetch(discoNodesToFetch, fetchingRound); } assert assertFetchingCountConsistent(); // if we are still fetching, return null to indicate it if (hasAnyNodeFetching()) { return new FetchResult<>(shardId, null, emptySet()); } else { // nothing to fetch, yay, build the return value Map fetchData = new HashMap<>(); Set failedNodes = new HashSet<>(); for (Iterator>> it = cache.entrySet().iterator(); it.hasNext();) { Map.Entry> entry = it.next(); String nodeId = entry.getKey(); NodeEntry nodeEntry = entry.getValue(); DiscoveryNode node = nodes.get(nodeId); if (node != null) { if (nodeEntry.isFailed()) { // if its failed, remove it from the list of nodes, so if this run doesn't work // we try again next round to fetch it again it.remove(); failedNodes.add(nodeEntry.getNodeId()); } else { if (nodeEntry.getValue() != null) { fetchData.put(node, nodeEntry.getValue()); } } } } Set allIgnoreNodes = Set.copyOf(nodesToIgnore); // clear the nodes to ignore, we had a successful run in fetching everything we can // we need to try them if another full run is needed nodesToIgnore.clear(); // if at least one node failed, make sure to have a protective reroute // here, just case this round won't find anything, and we need to retry fetching data if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) { reroute(shardId, "nodes failed [" + failedNodes.size() + "], ignored [" + allIgnoreNodes.size() + "]"); } return new FetchResult<>(shardId, fetchData, allIgnoreNodes); } } /** * Called by the response handler of the async action to fetch data. Verifies that its still working * on the same cache generation, otherwise the results are discarded. It then goes and fills the relevant data for * the shard (response + failures), issuing a reroute at the end of it to make sure there will be another round * of allocations taking this new data into account. */ protected synchronized void processAsyncFetch(List responses, List failures, long fetchingRound) { if (closed) { // we are closed, no need to process this async fetch at all logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type); return; } logger.trace("{} processing fetched [{}] results", shardId, type); if (responses != null) { for (T response : responses) { NodeEntry nodeEntry = cache.get(response.getNode().getId()); if (nodeEntry != null) { if (nodeEntry.getFetchingRound() != fetchingRound) { assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds"; logger.trace( "{} received response for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound ); } else if (nodeEntry.isFailed()) { logger.trace( "{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure() ); } else { // if the entry is there, for the right fetching round and not marked as failed already, process it logger.trace("{} marking {} as done for [{}], result is [{}]", shardId, nodeEntry.getNodeId(), type, response); nodeEntry.doneFetching(response); fetchingCount--; } } } } if (failures != null) { for (FailedNodeException failure : failures) { logger.trace("{} processing failure {} for [{}]", shardId, failure, type); NodeEntry nodeEntry = cache.get(failure.nodeId()); if (nodeEntry != null) { if (nodeEntry.getFetchingRound() != fetchingRound) { assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds"; logger.trace( "{} received failure for [{}] from node {} for an older fetching round (expected: {} but was: {})", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound ); } else if (nodeEntry.isFailed() == false) { // if the entry is there, for the right fetching round and not marked as failed already, process it Throwable unwrappedCause = ExceptionsHelper.unwrapCause(failure.getCause()); // if the request got rejected or timed out, we need to try it again next time... if (unwrappedCause instanceof EsRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof ElasticsearchTimeoutException) { nodeEntry.restartFetching(); fetchingCount--; } else { logger.warn( () -> format("%s: failed to list shard for %s on node [%s]", shardId, type, failure.nodeId()), failure ); nodeEntry.doneFetching(failure.getCause()); fetchingCount--; } } } } } reroute(shardId, "post_response"); assert assertFetchingCountConsistent(); } /** * Implement this in order to scheduled another round that causes a call to fetch data. */ protected abstract void reroute(ShardId shardId, String reason); /** * Clear cache for node, ensuring next fetch will fetch a fresh copy. */ synchronized void clearCacheForNode(String nodeId) { NodeEntry nodeEntry = cache.remove(nodeId); if (nodeEntry != null && nodeEntry.fetching) { fetchingCount--; } assert assertFetchingCountConsistent(); } /** * Fills the shard fetched data with new (data) nodes and a fresh NodeEntry, and removes from * it nodes that are no longer part of the state. */ private void fillShardCacheWithDataNodes(Map> shardCache, DiscoveryNodes nodes) { // verify that all current data nodes are there for (Map.Entry cursor : nodes.getDataNodes().entrySet()) { DiscoveryNode node = cursor.getValue(); if (shardCache.containsKey(node.getId()) == false) { shardCache.put(node.getId(), new NodeEntry(node.getId())); } } // remove nodes that are not longer part of the data nodes set for (Iterator>> iterator = shardCache.entrySet().iterator(); iterator.hasNext();) { Map.Entry> entry = iterator.next(); String nodeId = entry.getKey(); NodeEntry nodeEntry = entry.getValue(); if (nodes.nodeExists(nodeId) == false) { if (nodeEntry.fetching) { fetchingCount--; } iterator.remove(); } } assert assertFetchingCountConsistent(); } /** * Finds all the nodes that need to be fetched. Those are nodes that have no * data, and are not in fetch mode. */ private List> findNodesToFetch(Map> shardCache) { List> nodesToFetch = new ArrayList<>(); for (NodeEntry nodeEntry : shardCache.values()) { if (nodeEntry.hasData() == false && nodeEntry.isFetching() == false) { nodesToFetch.add(nodeEntry); } } return nodesToFetch; } /** * Are there any nodes that are fetching data? */ private boolean hasAnyNodeFetching() { return fetchingCount != 0; } /** * Async fetches data for the provided shard with the set of nodes that need to be fetched from. */ // visible for testing void asyncFetch(final DiscoveryNode[] nodes, long fetchingRound) { logger.trace("{} fetching [{}] from {}", shardId, type, nodes); list(shardId, customDataPath, nodes, new ActionListener>() { @Override public void onResponse(BaseNodesResponse response) { assert assertSameNodes(response); processAsyncFetch(response.getNodes(), response.failures(), fetchingRound); } @Override public void onFailure(Exception e) { List failures = new ArrayList<>(nodes.length); for (final DiscoveryNode node : nodes) { failures.add(new FailedNodeException(node.getId(), "total failure in fetching", e)); } processAsyncFetch(null, failures, fetchingRound); } private boolean assertSameNodes(BaseNodesResponse response) { final Map nodesById = Arrays.stream(nodes) .collect(Collectors.toMap(DiscoveryNode::getEphemeralId, Function.identity())); for (T nodeResponse : response.getNodes()) { final DiscoveryNode responseNode = nodeResponse.getNode(); final DiscoveryNode localNode = nodesById.get(responseNode.getEphemeralId()); assert localNode == responseNode : "not reference equal: " + localNode + " vs " + responseNode; } return true; } }); } protected abstract void list( ShardId shardId, @Nullable String customDataPath, DiscoveryNode[] nodes, ActionListener> listener ); /** * The result of a fetch operation. Make sure to first check {@link #hasData()} before * fetching the actual data. */ public static class FetchResult { private final ShardId shardId; private final Map data; private final Set ignoreNodes; public FetchResult(ShardId shardId, Map data, Set ignoreNodes) { this.shardId = shardId; this.data = data; this.ignoreNodes = ignoreNodes; } /** * Does the result actually contain data? If not, then there are on going fetch * operations happening, and it should wait for it. */ public boolean hasData() { return data != null; } /** * Returns the actual data, note, make sure to check {@link #hasData()} first and * only use this when there is an actual data. */ public Map getData() { assert data != null : "getData should only be called if there is data to be fetched, please check hasData first"; return this.data; } /** * Process any changes needed to the allocation based on this fetch result. */ public void processAllocation(RoutingAllocation allocation) { for (String ignoreNode : ignoreNodes) { allocation.addIgnoreShardForNode(shardId, ignoreNode); } } } /** * A node entry, holding the state of the fetched data for a specific shard * for a giving node. */ static class NodeEntry { private final String nodeId; private boolean fetching; @Nullable private T value; private boolean valueSet; private Throwable failure; private long fetchingRound; NodeEntry(String nodeId) { this.nodeId = nodeId; } String getNodeId() { return this.nodeId; } boolean isFetching() { return fetching; } void markAsFetching(long fetchingRound) { assert fetching == false : "double marking a node as fetching"; this.fetching = true; this.fetchingRound = fetchingRound; } void doneFetching(T value) { assert fetching : "setting value but not in fetching mode"; assert failure == null : "setting value when failure already set"; this.valueSet = true; this.value = value; this.fetching = false; } void doneFetching(Throwable failure) { assert fetching : "setting value but not in fetching mode"; assert valueSet == false : "setting failure when already set value"; assert failure != null : "setting failure can't be null"; this.failure = failure; this.fetching = false; } void restartFetching() { assert fetching : "restarting fetching, but not in fetching mode"; assert valueSet == false : "value can't be set when restarting fetching"; assert failure == null : "failure can't be set when restarting fetching"; this.fetching = false; } boolean isFailed() { return failure != null; } boolean hasData() { return valueSet || failure != null; } Throwable getFailure() { assert hasData() : "getting failure when data has not been fetched"; return failure; } @Nullable T getValue() { assert failure == null : "trying to fetch value, but its marked as failed, check isFailed"; assert valueSet : "value is not set, hasn't been fetched yet"; return value; } long getFetchingRound() { return fetchingRound; } } private boolean assertFetchingCountConsistent() { assert Thread.holdsLock(this); assert fetchingCount == cache.values().stream().filter(NodeEntry::isFetching).count(); return true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy