org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.elasticsearch
Show all versions of org.apache.servicemix.bundles.elasticsearch
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.support.replication;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
import org.elasticsearch.action.*;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.TransportAction;
import org.elasticsearch.action.support.TransportActions;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateObserver;
import org.elasticsearch.cluster.action.shard.ShardStateAction;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.RefCounted;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.DocumentAlreadyExistsException;
import org.elasticsearch.index.engine.VersionConflictEngineException;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.*;
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
/**
*/
public abstract class TransportShardReplicationOperationAction extends TransportAction {
protected final TransportService transportService;
protected final ClusterService clusterService;
protected final IndicesService indicesService;
protected final ShardStateAction shardStateAction;
protected final ReplicationType defaultReplicationType;
protected final WriteConsistencyLevel defaultWriteConsistencyLevel;
protected final TransportRequestOptions transportOptions;
final String transportReplicaAction;
final String executor;
final boolean checkWriteConsistency;
protected TransportShardReplicationOperationAction(Settings settings, String actionName, TransportService transportService,
ClusterService clusterService, IndicesService indicesService,
ThreadPool threadPool, ShardStateAction shardStateAction, ActionFilters actionFilters) {
super(settings, actionName, threadPool, actionFilters);
this.transportService = transportService;
this.clusterService = clusterService;
this.indicesService = indicesService;
this.shardStateAction = shardStateAction;
this.transportReplicaAction = actionName + "[r]";
this.executor = executor();
this.checkWriteConsistency = checkWriteConsistency();
transportService.registerHandler(actionName, new OperationTransportHandler());
transportService.registerHandler(transportReplicaAction, new ReplicaOperationTransportHandler());
this.transportOptions = transportOptions();
this.defaultReplicationType = ReplicationType.fromString(settings.get("action.replication_type", "sync"));
this.defaultWriteConsistencyLevel = WriteConsistencyLevel.fromString(settings.get("action.write_consistency", "quorum"));
}
@Override
protected void doExecute(Request request, ActionListener listener) {
new PrimaryPhase(request, listener).run();
}
protected abstract Request newRequestInstance();
protected abstract ReplicaRequest newReplicaRequestInstance();
protected abstract Response newResponseInstance();
protected abstract String executor();
protected abstract void shardOperationOnReplica(ReplicaOperationRequest shardRequest);
/**
* @return A tuple containing not null values, as first value the result of the primary operation and as second value
* the request to be executed on the replica shards.
*/
protected abstract Tuple shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) throws Throwable;
protected abstract ShardIterator shards(ClusterState clusterState, InternalRequest request) throws ElasticsearchException;
protected abstract boolean checkWriteConsistency();
protected ClusterBlockException checkGlobalBlock(ClusterState state) {
return state.blocks().globalBlockedException(ClusterBlockLevel.WRITE);
}
protected ClusterBlockException checkRequestBlock(ClusterState state, InternalRequest request) {
return state.blocks().indexBlockedException(ClusterBlockLevel.WRITE, request.concreteIndex());
}
protected abstract boolean resolveIndex();
/**
* Resolves the request, by default doing nothing. If the resolve
* means a different execution, then return false here to indicate not to continue and execute this request.
*/
protected boolean resolveRequest(ClusterState state, InternalRequest request, ActionListener listener) {
return true;
}
protected TransportRequestOptions transportOptions() {
return TransportRequestOptions.EMPTY;
}
protected boolean retryPrimaryException(Throwable e) {
return TransportActions.isShardNotAvailableException(e);
}
/**
* Should an exception be ignored when the operation is performed on the replica.
*/
protected boolean ignoreReplicaException(Throwable e) {
if (TransportActions.isShardNotAvailableException(e)) {
return true;
}
// on version conflict or document missing, it means
// that a new change has crept into the replica, and it's fine
if (isConflictException(e)) {
return true;
}
return false;
}
protected boolean isConflictException(Throwable e) {
Throwable cause = ExceptionsHelper.unwrapCause(e);
// on version conflict or document missing, it means
// that a new change has crept into the replica, and it's fine
if (cause instanceof VersionConflictEngineException) {
return true;
}
if (cause instanceof DocumentAlreadyExistsException) {
return true;
}
return false;
}
class OperationTransportHandler extends BaseTransportRequestHandler {
@Override
public Request newInstance() {
return newRequestInstance();
}
@Override
public String executor() {
return ThreadPool.Names.SAME;
}
@Override
public void messageReceived(final Request request, final TransportChannel channel) throws Exception {
// no need to have a threaded listener since we just send back a response
request.listenerThreaded(false);
// if we have a local operation, execute it on a thread since we don't spawn
request.operationThreaded(true);
execute(request, new ActionListener() {
@Override
public void onResponse(Response result) {
try {
channel.sendResponse(result);
} catch (Throwable e) {
onFailure(e);
}
}
@Override
public void onFailure(Throwable e) {
try {
channel.sendResponse(e);
} catch (Throwable e1) {
logger.warn("Failed to send response for " + actionName, e1);
}
}
});
}
}
class ReplicaOperationTransportHandler extends BaseTransportRequestHandler {
@Override
public ReplicaOperationRequest newInstance() {
return new ReplicaOperationRequest();
}
@Override
public String executor() {
return executor;
}
// we must never reject on because of thread pool capacity on replicas
@Override
public boolean isForceExecution() {
return true;
}
@Override
public void messageReceived(final ReplicaOperationRequest request, final TransportChannel channel) throws Exception {
try (Releasable shardReference = getIndexShardOperationsCounter(request.shardId)) {
shardOperationOnReplica(request);
} catch (Throwable t) {
failReplicaIfNeeded(request.shardId.getIndex(), request.shardId.id(), t);
throw t;
}
channel.sendResponse(TransportResponse.Empty.INSTANCE);
}
}
protected class PrimaryOperationRequest {
public ShardId shardId;
public Request request;
public PrimaryOperationRequest(int shardId, String index, Request request) {
this.shardId = new ShardId(index, shardId);
this.request = request;
}
}
protected class ReplicaOperationRequest extends TransportRequest implements IndicesRequest {
public ShardId shardId;
public ReplicaRequest request;
ReplicaOperationRequest() {
}
ReplicaOperationRequest(ShardId shardId, ReplicaRequest request) {
super(request);
this.shardId = shardId;
this.request = request;
}
public String[] indices() {
return request.indices();
}
@Override
public IndicesOptions indicesOptions() {
return request.indicesOptions();
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
int shard = -1;
if (in.getVersion().onOrAfter(Version.V_1_4_0_Beta1)) {
shardId = ShardId.readShardId(in);
} else {
shard = in.readVInt();
}
request = newReplicaRequestInstance();
request.readFrom(in);
if (in.getVersion().before(Version.V_1_4_0_Beta1)) {
assert shard >= 0;
//older nodes will send the concrete index as part of the request
shardId = new ShardId(request.index(), shard);
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_1_4_0_Beta1)) {
shardId.writeTo(out);
} else {
out.writeVInt(shardId.id());
//older nodes expect the concrete index as part of the request
request.index(shardId.getIndex());
}
request.writeTo(out);
}
}
/**
* Responsible for performing all operations up to the point we start starting sending requests to replica shards.
* Including forwarding the request to another node if the primary is not assigned locally.
*
* Note that as soon as we start sending request to replicas, state responsibility is transferred to {@link ReplicationPhase}
*/
final class PrimaryPhase extends AbstractRunnable {
private final ActionListener listener;
private final InternalRequest internalRequest;
private final ClusterStateObserver observer;
private final AtomicBoolean finished = new AtomicBoolean(false);
private volatile Releasable indexShardReference;
PrimaryPhase(Request request, ActionListener listener) {
this.internalRequest = new InternalRequest(request);
this.listener = listener;
this.observer = new ClusterStateObserver(clusterService, internalRequest.request().timeout(), logger);
}
@Override
public void onFailure(Throwable e) {
finishWithUnexpectedFailure(e);
}
protected void doRun() {
if (checkBlocks() == false) {
return;
}
final ShardIterator shardIt = shards(observer.observedState(), internalRequest);
final ShardRouting primary = resolvePrimary(shardIt);
if (primary == null) {
retryBecauseUnavailable(shardIt.shardId(), "No active shards.");
return;
}
if (primary.active() == false) {
logger.trace("primary shard [{}] is not yet active, scheduling a retry.", primary.shardId());
retryBecauseUnavailable(shardIt.shardId(), "Primary shard is not active or isn't assigned to a known node.");
return;
}
if (observer.observedState().nodes().nodeExists(primary.currentNodeId()) == false) {
logger.trace("primary shard [{}] is assigned to anode we do not know the node, scheduling a retry.", primary.shardId(), primary.currentNodeId());
retryBecauseUnavailable(shardIt.shardId(), "Primary shard is not active or isn't assigned to a known node.");
return;
}
routeRequestOrPerformLocally(primary, shardIt);
}
/**
* checks for any cluster state blocks. Returns true if operation is OK to proceeded.
* if false is return, no further action is needed. The method takes care of any continuation, by either
* responding to the listener or scheduling a retry
*/
protected boolean checkBlocks() {
ClusterBlockException blockException = checkGlobalBlock(observer.observedState());
if (blockException != null) {
if (blockException.retryable()) {
logger.trace("cluster is blocked ({}), scheduling a retry", blockException.getMessage());
retry(blockException);
} else {
finishAsFailed(blockException);
}
return false;
}
if (resolveIndex()) {
internalRequest.concreteIndex(observer.observedState().metaData().concreteSingleIndex(internalRequest.request().index(), internalRequest.request().indicesOptions()));
} else {
internalRequest.concreteIndex(internalRequest.request().index());
}
// check if we need to execute, and if not, return
if (resolveRequest(observer.observedState(), internalRequest, listener) == false) {
return false;
}
blockException = checkRequestBlock(observer.observedState(), internalRequest);
if (blockException != null) {
if (blockException.retryable()) {
logger.trace("cluster is blocked ({}), scheduling a retry", blockException.getMessage());
retry(blockException);
} else {
finishAsFailed(blockException);
}
return false;
}
return true;
}
protected ShardRouting resolvePrimary(ShardIterator shardIt) {
// no shardIt, might be in the case between index gateway recovery and shardIt initialization
ShardRouting shard;
while ((shard = shardIt.nextOrNull()) != null) {
// we only deal with primary shardIt here...
if (shard.primary()) {
return shard;
}
}
return null;
}
/**
* send the request to the node holding the primary or execute if local
*/
protected void routeRequestOrPerformLocally(final ShardRouting primary, final ShardIterator shardsIt) {
if (primary.currentNodeId().equals(observer.observedState().nodes().localNodeId())) {
try {
if (internalRequest.request().operationThreaded()) {
threadPool.executor(executor).execute(new AbstractRunnable() {
@Override
public void onFailure(Throwable t) {
finishAsFailed(t);
}
@Override
protected void doRun() throws Exception {
performOnPrimary(primary, shardsIt);
}
});
} else {
performOnPrimary(primary, shardsIt);
}
} catch (Throwable t) {
// no commit: check threadpool rejection.
finishAsFailed(t);
}
} else {
DiscoveryNode node = observer.observedState().nodes().get(primary.currentNodeId());
transportService.sendRequest(node, actionName, internalRequest.request(), transportOptions, new BaseTransportResponseHandler() {
@Override
public Response newInstance() {
return newResponseInstance();
}
@Override
public String executor() {
return ThreadPool.Names.SAME;
}
@Override
public void handleResponse(Response response) {
finishOnRemoteSuccess(response);
}
@Override
public void handleException(TransportException exp) {
try {
// if we got disconnected from the node, or the node / shard is not in the right state (being closed)
if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeClosedException ||
retryPrimaryException(exp)) {
internalRequest.request().setCanHaveDuplicates();
// we already marked it as started when we executed it (removed the listener) so pass false
// to re-add to the cluster listener
logger.trace("received an error from node the primary was assigned to ({}), scheduling a retry", exp.getMessage());
retry(exp);
} else {
finishAsFailed(exp);
}
} catch (Throwable t) {
finishWithUnexpectedFailure(t);
}
}
});
}
}
void retry(Throwable failure) {
assert failure != null;
if (observer.isTimedOut()) {
// we running as a last attempt after a timeout has happened. don't retry
finishAsFailed(failure);
return;
}
// make it threaded operation so we fork on the discovery listener thread
internalRequest.request().operationThreaded(true);
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
run();
}
@Override
public void onClusterServiceClose() {
finishAsFailed(new NodeClosedException(clusterService.localNode()));
}
@Override
public void onTimeout(TimeValue timeout) {
// Try one more time...
run();
}
});
}
/**
* upon success, finish the first phase and transfer responsibility to the {@link ReplicationPhase}
*/
void finishAndMoveToReplication(ReplicationPhase replicationPhase) {
if (finished.compareAndSet(false, true)) {
replicationPhase.run();
} else {
assert false : "finishAndMoveToReplication called but operation is already finished";
}
}
void finishAsFailed(Throwable failure) {
if (finished.compareAndSet(false, true)) {
Releasables.close(indexShardReference);
logger.trace("operation failed", failure);
listener.onFailure(failure);
} else {
assert false : "finishAsFailed called but operation is already finished";
}
}
void finishWithUnexpectedFailure(Throwable failure) {
logger.warn("unexpected error during the primary phase for action [{}]", failure, actionName);
if (finished.compareAndSet(false, true)) {
Releasables.close(indexShardReference);
listener.onFailure(failure);
} else {
assert false : "finishWithUnexpectedFailure called but operation is already finished";
}
}
void finishOnRemoteSuccess(Response response) {
if (finished.compareAndSet(false, true)) {
logger.trace("operation succeeded");
listener.onResponse(response);
} else {
assert false : "finishOnRemoteSuccess called but operation is already finished";
}
}
/**
* perform the operation on the node holding the primary
*/
void performOnPrimary(final ShardRouting primary, final ShardIterator shardsIt) {
final String writeConsistencyFailure = checkWriteConsistency(primary);
if (writeConsistencyFailure != null) {
retryBecauseUnavailable(primary.shardId(), writeConsistencyFailure);
return;
}
final ReplicationPhase replicationPhase;
try {
indexShardReference = getIndexShardOperationsCounter(primary.shardId());
PrimaryOperationRequest por = new PrimaryOperationRequest(primary.id(), internalRequest.concreteIndex(), internalRequest.request());
Tuple primaryResponse = shardOperationOnPrimary(observer.observedState(), por);
logger.trace("operation completed on primary [{}]", primary);
replicationPhase = new ReplicationPhase(shardsIt, primaryResponse.v2(), primaryResponse.v1(), observer, primary, internalRequest, listener, indexShardReference);
} catch (Throwable e) {
internalRequest.request.setCanHaveDuplicates();
// shard has not been allocated yet, retry it here
if (retryPrimaryException(e)) {
logger.trace("had an error while performing operation on primary ({}), scheduling a retry.", e.getMessage());
// We have to close here because when we retry we will increment get a new reference on index shard again and we do not want to
// increment twice.
Releasables.close(indexShardReference);
// We have to reset to null here because whe we retry it might be that we never get to the point where we assign a new reference
// (for example, in case the operation was rejected because queue is full). In this case we would release again once one of the finish methods is called.
indexShardReference = null;
retry(e);
return;
}
if (e instanceof ElasticsearchException && ((ElasticsearchException) e).status() == RestStatus.CONFLICT) {
if (logger.isTraceEnabled()) {
logger.trace(primary.shortSummary() + ": Failed to execute [" + internalRequest.request() + "]", e);
}
} else {
if (logger.isDebugEnabled()) {
logger.debug(primary.shortSummary() + ": Failed to execute [" + internalRequest.request() + "]", e);
}
}
finishAsFailed(e);
return;
}
finishAndMoveToReplication(replicationPhase);
}
/**
* checks whether we can perform a write based on the write consistency setting
* returns **null* if OK to proceed, or a string describing the reason to stop
*/
String checkWriteConsistency(ShardRouting shard) {
if (checkWriteConsistency == false) {
return null;
}
final WriteConsistencyLevel consistencyLevel;
if (internalRequest.request().consistencyLevel() != WriteConsistencyLevel.DEFAULT) {
consistencyLevel = internalRequest.request().consistencyLevel();
} else {
consistencyLevel = defaultWriteConsistencyLevel;
}
final int sizeActive;
final int requiredNumber;
IndexRoutingTable indexRoutingTable = observer.observedState().getRoutingTable().index(shard.index());
if (indexRoutingTable != null) {
IndexShardRoutingTable shardRoutingTable = indexRoutingTable.shard(shard.getId());
if (shardRoutingTable != null) {
sizeActive = shardRoutingTable.activeShards().size();
if (consistencyLevel == WriteConsistencyLevel.QUORUM && shardRoutingTable.getSize() > 2) {
// only for more than 2 in the number of shardIt it makes sense, otherwise its 1 shard with 1 replica, quorum is 1 (which is what it is initialized to)
requiredNumber = (shardRoutingTable.getSize() / 2) + 1;
} else if (consistencyLevel == WriteConsistencyLevel.ALL) {
requiredNumber = shardRoutingTable.getSize();
} else {
requiredNumber = 1;
}
} else {
sizeActive = 0;
requiredNumber = 1;
}
} else {
sizeActive = 0;
requiredNumber = 1;
}
if (sizeActive < requiredNumber) {
logger.trace("not enough active copies of shard [{}] to meet write consistency of [{}] (have {}, needed {}), scheduling a retry.",
shard.shardId(), consistencyLevel, sizeActive, requiredNumber);
return "Not enough active copies to meet write consistency of [" + consistencyLevel + "] (have " + sizeActive + ", needed " + requiredNumber + ").";
} else {
return null;
}
}
void retryBecauseUnavailable(ShardId shardId, String message) {
retry(new UnavailableShardsException(shardId, message + " Timeout: [" + internalRequest.request().timeout() + "], request: " + internalRequest.request().toString()));
}
}
protected Releasable getIndexShardOperationsCounter(ShardId shardId) {
IndexService indexService = indicesService.indexServiceSafe(shardId.index().getName());
IndexShard indexShard = indexService.shardSafe(shardId.id());
return new IndexShardReference(indexShard);
}
private void failReplicaIfNeeded(String index, int shardId, Throwable t) {
logger.trace("failure on replica [{}][{}]", t, index, shardId);
if (ignoreReplicaException(t) == false) {
IndexService indexService = indicesService.indexService(index);
if (indexService == null) {
logger.debug("ignoring failed replica [{}][{}] because index was already removed.", index, shardId);
return;
}
IndexShard indexShard = indexService.shard(shardId);
if (indexShard == null) {
logger.debug("ignoring failed replica [{}][{}] because index was already removed.", index, shardId);
return;
}
indexShard.failShard(actionName + " failed on replica", t);
}
}
/**
* inner class is responsible for send the requests to all replica shards and manage the responses
*/
final class ReplicationPhase extends AbstractRunnable {
private final ReplicaRequest replicaRequest;
private final Response finalResponse;
private final ShardIterator shardIt;
private final ActionListener listener;
private final AtomicBoolean finished = new AtomicBoolean(false);
private final AtomicInteger success = new AtomicInteger(1); // We already wrote into the primary shard
private final IndexMetaData indexMetaData;
private final ShardRouting originalPrimaryShard;
private final AtomicInteger pending;
private final int totalShards;
private final ClusterStateObserver observer;
private final Releasable indexShardReference;
/**
* the constructor doesn't take any action, just calculates state. Call {@link #run()} to start
* replicating.
*/
public ReplicationPhase(ShardIterator originalShardIt, ReplicaRequest replicaRequest, Response finalResponse,
ClusterStateObserver observer, ShardRouting originalPrimaryShard,
InternalRequest internalRequest, ActionListener listener, Releasable indexShardReference) {
this.replicaRequest = replicaRequest;
this.listener = listener;
this.finalResponse = finalResponse;
this.originalPrimaryShard = originalPrimaryShard;
this.observer = observer;
indexMetaData = observer.observedState().metaData().index(internalRequest.concreteIndex());
this.indexShardReference = indexShardReference;
ShardRouting shard;
// we double check on the state, if it got changed we need to make sure we take the latest one cause
// maybe a replica shard started its recovery process and we need to apply it there...
// we also need to make sure if the new state has a new primary shard (that we indexed to before) started
// and assigned to another node (while the indexing happened). In that case, we want to apply it on the
// new primary shard as well...
ClusterState newState = clusterService.state();
int numberOfUnassignedOrShadowReplicas = 0;
int numberOfPendingShardInstances = 0;
if (observer.observedState() != newState) {
observer.reset(newState);
shardIt = shards(newState, internalRequest);
while ((shard = shardIt.nextOrNull()) != null) {
if (shard.primary()) {
if (originalPrimaryShard.currentNodeId().equals(shard.currentNodeId()) == false) {
// there is a new primary, we'll have to replicate to it.
numberOfPendingShardInstances++;
}
if (shard.relocating()) {
numberOfPendingShardInstances++;
}
} else if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.settings())) {
// If the replicas use shadow replicas, there is no reason to
// perform the action on the replica, so skip it and
// immediately return
// this delays mapping updates on replicas because they have
// to wait until they get the new mapping through the cluster
// state, which is why we recommend pre-defined mappings for
// indices using shadow replicas
numberOfUnassignedOrShadowReplicas++;
} else if (shard.unassigned()) {
numberOfUnassignedOrShadowReplicas++;
} else if (shard.relocating()) {
// we need to send to two copies
numberOfPendingShardInstances += 2;
} else {
numberOfPendingShardInstances++;
}
}
internalRequest.request().setCanHaveDuplicates(); // safe side, cluster state changed, we might have dups
} else {
shardIt = originalShardIt;
shardIt.reset();
while ((shard = shardIt.nextOrNull()) != null) {
if (shard.state() != ShardRoutingState.STARTED) {
replicaRequest.setCanHaveDuplicates();
}
if (shard.unassigned()) {
numberOfUnassignedOrShadowReplicas++;
} else if (shard.primary()) {
if (shard.relocating()) {
// we have to replicate to the other copy
numberOfPendingShardInstances += 1;
}
} else if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.settings())) {
// If the replicas use shadow replicas, there is no reason to
// perform the action on the replica, so skip it and
// immediately return
// this delays mapping updates on replicas because they have
// to wait until they get the new mapping through the cluster
// state, which is why we recommend pre-defined mappings for
// indices using shadow replicas
numberOfUnassignedOrShadowReplicas++;
} else if (shard.relocating()) {
// we need to send to two copies
numberOfPendingShardInstances += 2;
} else {
numberOfPendingShardInstances++;
}
}
}
// one for the primary already done
this.totalShards = 1 + numberOfPendingShardInstances + numberOfUnassignedOrShadowReplicas;
this.pending = new AtomicInteger(numberOfPendingShardInstances);
}
/**
* total shard copies
*/
int totalShards() {
return totalShards;
}
/**
* total successful operations so far
*/
int successful() {
return success.get();
}
/**
* number of pending operations
*/
int pending() {
return pending.get();
}
@Override
public void onFailure(Throwable t) {
logger.error("unexpected error while replicating for action [{}]. shard [{}]. ", t, actionName, shardIt.shardId());
forceFinishAsFailed(t);
}
/**
* start sending current requests to replicas
*/
@Override
protected void doRun() {
if (pending.get() == 0) {
doFinish();
return;
}
ShardRouting shard;
shardIt.reset(); // reset the iterator
while ((shard = shardIt.nextOrNull()) != null) {
// if its unassigned, nothing to do here...
if (shard.unassigned()) {
continue;
}
// we index on a replica that is initializing as well since we might not have got the event
// yet that it was started. We will get an exception IllegalShardState exception if its not started
// and that's fine, we will ignore it
if (shard.primary()) {
if (originalPrimaryShard.currentNodeId().equals(shard.currentNodeId()) == false) {
// there is a new primary, we'll have to replicate to it.
performOnReplica(shard, shard.currentNodeId());
}
if (shard.relocating()) {
performOnReplica(shard, shard.relocatingNodeId());
}
} else if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.settings()) == false) {
performOnReplica(shard, shard.currentNodeId());
if (shard.relocating()) {
performOnReplica(shard, shard.relocatingNodeId());
}
}
}
}
/**
* send operation to the given node or perform it if local
*/
void performOnReplica(final ShardRouting shard, final String nodeId) {
// if we don't have that node, it means that it might have failed and will be created again, in
// this case, we don't have to do the operation, and just let it failover
if (!observer.observedState().nodes().nodeExists(nodeId)) {
onReplicaFailure(nodeId, null);
return;
}
final ReplicaOperationRequest shardRequest = new ReplicaOperationRequest(shardIt.shardId(), replicaRequest);
if (!nodeId.equals(observer.observedState().nodes().localNodeId())) {
final DiscoveryNode node = observer.observedState().nodes().get(nodeId);
transportService.sendRequest(node, transportReplicaAction, shardRequest,
transportOptions, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override
public void handleResponse(TransportResponse.Empty vResponse) {
onReplicaSuccess();
}
@Override
public void handleException(TransportException exp) {
onReplicaFailure(nodeId, exp);
logger.trace("[{}] transport failure during replica request [{}] ", exp, node, replicaRequest);
if (ignoreReplicaException(exp) == false) {
logger.warn("failed to perform " + actionName + " on remote replica " + node + shardIt.shardId(), exp);
shardStateAction.shardFailed(shard, indexMetaData.getUUID(),
"Failed to perform [" + actionName + "] on replica, message [" + ExceptionsHelper.detailedMessage(exp) + "]");
}
}
});
} else {
if (replicaRequest.operationThreaded()) {
try {
threadPool.executor(executor).execute(new AbstractRunnable() {
@Override
protected void doRun() {
try {
shardOperationOnReplica(shardRequest);
onReplicaSuccess();
} catch (Throwable e) {
onReplicaFailure(nodeId, e);
failReplicaIfNeeded(shard.index(), shard.id(), e);
}
}
// we must never reject on because of thread pool capacity on replicas
@Override
public boolean isForceExecution() {
return true;
}
@Override
public void onFailure(Throwable t) {
onReplicaFailure(nodeId, t);
}
});
} catch (Throwable e) {
failReplicaIfNeeded(shard.index(), shard.id(), e);
onReplicaFailure(nodeId, e);
}
} else {
try {
shardOperationOnReplica(shardRequest);
onReplicaSuccess();
} catch (Throwable e) {
failReplicaIfNeeded(shard.index(), shard.id(), e);
onReplicaFailure(nodeId, e);
}
}
}
}
void onReplicaFailure(String nodeId, @Nullable Throwable e) {
decPendingAndFinishIfNeeded();
}
void onReplicaSuccess() {
success.incrementAndGet();
decPendingAndFinishIfNeeded();
}
private void decPendingAndFinishIfNeeded() {
if (pending.decrementAndGet() <= 0) {
doFinish();
}
}
private void forceFinishAsFailed(Throwable t) {
if (finished.compareAndSet(false, true)) {
Releasables.close(indexShardReference);
listener.onFailure(t);
}
}
private void doFinish() {
if (finished.compareAndSet(false, true)) {
Releasables.close(indexShardReference);
listener.onResponse(finalResponse);
}
}
}
/**
* Internal request class that gets built on each node. Holds the original request plus additional info.
*/
protected class InternalRequest {
final Request request;
String concreteIndex;
InternalRequest(Request request) {
this.request = request;
}
public Request request() {
return request;
}
void concreteIndex(String concreteIndex) {
this.concreteIndex = concreteIndex;
}
public String concreteIndex() {
return concreteIndex;
}
}
static class IndexShardReference implements Releasable {
final private IndexShard counter;
private final AtomicBoolean closed = new AtomicBoolean(false);
IndexShardReference(IndexShard counter) {
counter.incrementOperationCounter();
this.counter = counter;
}
@Override
public void close() {
if (closed.compareAndSet(false, true)) {
counter.decrementOperationCounter();
}
}
}
}