org.elasticsearch.cluster.routing.RoutingNodes Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing;
import com.carrotsearch.hppc.ObjectIntHashMap;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Randomness;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Set;
import java.util.function.Predicate;
/**
* {@link RoutingNodes} represents a copy the routing information contained in the {@link ClusterState cluster state}.
* It can be either initialized as mutable or immutable (see {@link #RoutingNodes(ClusterState, boolean)}), allowing
* or disallowing changes to its elements.
*
* The main methods used to update routing entries are:
*
* - {@link #initializeShard} initializes an unassigned shard.
*
- {@link #startShard} starts an initializing shard / completes relocation of a shard.
*
- {@link #relocateShard} starts relocation of a started shard.
*
- {@link #failShard} fails/cancels an assigned shard.
*
*/
public class RoutingNodes implements Iterable {
private final Map nodesToShards = new HashMap<>();
private final UnassignedShards unassignedShards = new UnassignedShards(this);
private final Map> assignedShards = new HashMap<>();
private final boolean readOnly;
private int inactivePrimaryCount = 0;
private int inactiveShardCount = 0;
private int relocatingShards = 0;
private final Map> nodesPerAttributeNames = new HashMap<>();
private final Map recoveriesPerNode = new HashMap<>();
public RoutingNodes(ClusterState clusterState) {
this(clusterState, true);
}
public RoutingNodes(ClusterState clusterState, boolean readOnly) {
this.readOnly = readOnly;
final RoutingTable routingTable = clusterState.routingTable();
Map> nodesToShards = new HashMap<>();
// fill in the nodeToShards with the "live" nodes
for (ObjectCursor cursor : clusterState.nodes().getDataNodes().values()) {
nodesToShards.put(cursor.value.getId(), new LinkedHashMap<>()); // LinkedHashMap to preserve order
}
// fill in the inverse of node -> shards allocated
// also fill replicaSet information
for (ObjectCursor indexRoutingTable : routingTable.indicesRouting().values()) {
for (IndexShardRoutingTable indexShard : indexRoutingTable.value) {
assert indexShard.primary != null;
for (ShardRouting shard : indexShard) {
// to get all the shards belonging to an index, including the replicas,
// we define a replica set and keep track of it. A replica set is identified
// by the ShardId, as this is common for primary and replicas.
// A replica Set might have one (and not more) replicas with the state of RELOCATING.
if (shard.assignedToNode()) {
Map entries = nodesToShards.computeIfAbsent(shard.currentNodeId(),
k -> new LinkedHashMap<>()); // LinkedHashMap to preserve order
ShardRouting previousValue = entries.put(shard.shardId(), shard);
if (previousValue != null) {
throw new IllegalArgumentException("Cannot have two different shards with same shard id on same node");
}
assignedShardsAdd(shard);
if (shard.relocating()) {
relocatingShards++;
entries = nodesToShards.computeIfAbsent(shard.relocatingNodeId(),
k -> new LinkedHashMap<>()); // LinkedHashMap to preserve order
// add the counterpart shard with relocatingNodeId reflecting the source from which
// it's relocating from.
ShardRouting targetShardRouting = shard.getTargetRelocatingShard();
addInitialRecovery(targetShardRouting, indexShard.primary);
previousValue = entries.put(targetShardRouting.shardId(), targetShardRouting);
if (previousValue != null) {
throw new IllegalArgumentException("Cannot have two different shards with same shard id on same node");
}
assignedShardsAdd(targetShardRouting);
} else if (shard.initializing()) {
if (shard.primary()) {
inactivePrimaryCount++;
}
inactiveShardCount++;
addInitialRecovery(shard, indexShard.primary);
}
} else {
unassignedShards.add(shard);
}
}
}
}
for (Map.Entry> entry : nodesToShards.entrySet()) {
String nodeId = entry.getKey();
this.nodesToShards.put(nodeId, new RoutingNode(nodeId, clusterState.nodes().get(nodeId), entry.getValue()));
}
}
private void addRecovery(ShardRouting routing) {
updateRecoveryCounts(routing, true, findAssignedPrimaryIfPeerRecovery(routing));
}
private void removeRecovery(ShardRouting routing) {
updateRecoveryCounts(routing, false, findAssignedPrimaryIfPeerRecovery(routing));
}
private void addInitialRecovery(ShardRouting routing, ShardRouting initialPrimaryShard) {
updateRecoveryCounts(routing, true, initialPrimaryShard);
}
private void updateRecoveryCounts(final ShardRouting routing, final boolean increment, @Nullable final ShardRouting primary) {
final int howMany = increment ? 1 : -1;
assert routing.initializing() : "routing must be initializing: " + routing;
// TODO: check primary == null || primary.active() after all tests properly add ReplicaAfterPrimaryActiveAllocationDecider
assert primary == null || primary.assignedToNode() :
"shard is initializing but its primary is not assigned to a node";
Recoveries.getOrAdd(recoveriesPerNode, routing.currentNodeId()).addIncoming(howMany);
if (routing.recoverySource().getType() == RecoverySource.Type.PEER) {
// add/remove corresponding outgoing recovery on node with primary shard
if (primary == null) {
throw new IllegalStateException("shard is peer recovering but primary is unassigned");
}
Recoveries.getOrAdd(recoveriesPerNode, primary.currentNodeId()).addOutgoing(howMany);
if (increment == false && routing.primary() && routing.relocatingNodeId() != null) {
// primary is done relocating, move non-primary recoveries from old primary to new primary
int numRecoveringReplicas = 0;
for (ShardRouting assigned : assignedShards(routing.shardId())) {
if (assigned.primary() == false && assigned.initializing() &&
assigned.recoverySource().getType() == RecoverySource.Type.PEER) {
numRecoveringReplicas++;
}
}
recoveriesPerNode.get(routing.relocatingNodeId()).addOutgoing(-numRecoveringReplicas);
recoveriesPerNode.get(routing.currentNodeId()).addOutgoing(numRecoveringReplicas);
}
}
}
public int getIncomingRecoveries(String nodeId) {
return recoveriesPerNode.getOrDefault(nodeId, Recoveries.EMPTY).getIncoming();
}
public int getOutgoingRecoveries(String nodeId) {
return recoveriesPerNode.getOrDefault(nodeId, Recoveries.EMPTY).getOutgoing();
}
@Nullable
private ShardRouting findAssignedPrimaryIfPeerRecovery(ShardRouting routing) {
ShardRouting primary = null;
if (routing.recoverySource() != null && routing.recoverySource().getType() == RecoverySource.Type.PEER) {
List shardRoutings = assignedShards.get(routing.shardId());
if (shardRoutings != null) {
for (ShardRouting shardRouting : shardRoutings) {
if (shardRouting.primary()) {
if (shardRouting.active()) {
return shardRouting;
} else if (primary == null) {
primary = shardRouting;
} else if (primary.relocatingNodeId() != null) {
primary = shardRouting;
}
}
}
}
}
return primary;
}
@Override
public Iterator iterator() {
return Collections.unmodifiableCollection(nodesToShards.values()).iterator();
}
public Iterator mutableIterator() {
ensureMutable();
return nodesToShards.values().iterator();
}
public UnassignedShards unassigned() {
return this.unassignedShards;
}
public RoutingNode node(String nodeId) {
return nodesToShards.get(nodeId);
}
public ObjectIntHashMap nodesPerAttributesCounts(String attributeName) {
ObjectIntHashMap nodesPerAttributesCounts = nodesPerAttributeNames.get(attributeName);
if (nodesPerAttributesCounts != null) {
return nodesPerAttributesCounts;
}
nodesPerAttributesCounts = new ObjectIntHashMap<>();
for (RoutingNode routingNode : this) {
String attrValue = routingNode.node().getAttributes().get(attributeName);
nodesPerAttributesCounts.addTo(attrValue, 1);
}
nodesPerAttributeNames.put(attributeName, nodesPerAttributesCounts);
return nodesPerAttributesCounts;
}
/**
* Returns true
iff this {@link RoutingNodes} instance has any unassigned primaries even if the
* primaries are marked as temporarily ignored.
*/
public boolean hasUnassignedPrimaries() {
return unassignedShards.getNumPrimaries() + unassignedShards.getNumIgnoredPrimaries() > 0;
}
/**
* Returns true
iff this {@link RoutingNodes} instance has any unassigned shards even if the
* shards are marked as temporarily ignored.
* @see UnassignedShards#isEmpty()
* @see UnassignedShards#isIgnoredEmpty()
*/
public boolean hasUnassignedShards() {
return unassignedShards.isEmpty() == false || unassignedShards.isIgnoredEmpty() == false;
}
public boolean hasInactivePrimaries() {
return inactivePrimaryCount > 0;
}
public boolean hasInactiveShards() {
return inactiveShardCount > 0;
}
public int getRelocatingShardCount() {
return relocatingShards;
}
/**
* Returns all shards that are not in the state UNASSIGNED with the same shard
* ID as the given shard.
*/
public List assignedShards(ShardId shardId) {
final List replicaSet = assignedShards.get(shardId);
return replicaSet == null ? EMPTY : Collections.unmodifiableList(replicaSet);
}
@Nullable
public ShardRouting getByAllocationId(ShardId shardId, String allocationId) {
final List replicaSet = assignedShards.get(shardId);
if (replicaSet == null) {
return null;
}
for (ShardRouting shardRouting : replicaSet) {
if (shardRouting.allocationId().getId().equals(allocationId)) {
return shardRouting;
}
}
return null;
}
/**
* Returns the active primary shard for the given shard id or null
if
* no primary is found or the primary is not active.
*/
public ShardRouting activePrimary(ShardId shardId) {
for (ShardRouting shardRouting : assignedShards(shardId)) {
if (shardRouting.primary() && shardRouting.active()) {
return shardRouting;
}
}
return null;
}
/**
* Returns one active replica shard for the given shard id or null
if
* no active replica is found.
*/
public ShardRouting activeReplica(ShardId shardId) {
for (ShardRouting shardRouting : assignedShards(shardId)) {
if (!shardRouting.primary() && shardRouting.active()) {
return shardRouting;
}
}
return null;
}
/**
* Returns true
iff all replicas are active for the given shard routing. Otherwise false
*/
public boolean allReplicasActive(ShardId shardId, MetaData metaData) {
final List shards = assignedShards(shardId);
if (shards.isEmpty() || shards.size() < metaData.getIndexSafe(shardId.getIndex()).getNumberOfReplicas() + 1) {
return false; // if we are empty nothing is active if we have less than total at least one is unassigned
}
for (ShardRouting shard : shards) {
if (!shard.active()) {
return false;
}
}
return true;
}
public List shards(Predicate predicate) {
List shards = new ArrayList<>();
for (RoutingNode routingNode : this) {
for (ShardRouting shardRouting : routingNode) {
if (predicate.test(shardRouting)) {
shards.add(shardRouting);
}
}
}
return shards;
}
public List shardsWithState(ShardRoutingState... state) {
// TODO these are used on tests only - move into utils class
List shards = new ArrayList<>();
for (RoutingNode routingNode : this) {
shards.addAll(routingNode.shardsWithState(state));
}
for (ShardRoutingState s : state) {
if (s == ShardRoutingState.UNASSIGNED) {
unassigned().forEach(shards::add);
break;
}
}
return shards;
}
public List shardsWithState(String index, ShardRoutingState... state) {
// TODO these are used on tests only - move into utils class
List shards = new ArrayList<>();
for (RoutingNode routingNode : this) {
shards.addAll(routingNode.shardsWithState(index, state));
}
for (ShardRoutingState s : state) {
if (s == ShardRoutingState.UNASSIGNED) {
for (ShardRouting unassignedShard : unassignedShards) {
if (unassignedShard.index().getName().equals(index)) {
shards.add(unassignedShard);
}
}
break;
}
}
return shards;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("routing_nodes:\n");
for (RoutingNode routingNode : this) {
sb.append(routingNode.prettyPrint());
}
sb.append("---- unassigned\n");
for (ShardRouting shardEntry : unassignedShards) {
sb.append("--------").append(shardEntry.shortSummary()).append('\n');
}
return sb.toString();
}
/**
* Moves a shard from unassigned to initialize state
*
* @param existingAllocationId allocation id to use. If null, a fresh allocation id is generated.
* @return the initialized shard
*/
public ShardRouting initializeShard(ShardRouting unassignedShard, String nodeId, @Nullable String existingAllocationId,
long expectedSize, RoutingChangesObserver routingChangesObserver) {
ensureMutable();
assert unassignedShard.unassigned() : "expected an unassigned shard " + unassignedShard;
ShardRouting initializedShard = unassignedShard.initialize(nodeId, existingAllocationId, expectedSize);
node(nodeId).add(initializedShard);
inactiveShardCount++;
if (initializedShard.primary()) {
inactivePrimaryCount++;
}
addRecovery(initializedShard);
assignedShardsAdd(initializedShard);
routingChangesObserver.shardInitialized(unassignedShard, initializedShard);
return initializedShard;
}
/**
* Relocate a shard to another node, adding the target initializing
* shard as well as assigning it.
*
* @return pair of source relocating and target initializing shards.
*/
public Tuple relocateShard(ShardRouting startedShard, String nodeId, long expectedShardSize,
RoutingChangesObserver changes) {
ensureMutable();
relocatingShards++;
ShardRouting source = startedShard.relocate(nodeId, expectedShardSize);
ShardRouting target = source.getTargetRelocatingShard();
updateAssigned(startedShard, source);
node(target.currentNodeId()).add(target);
assignedShardsAdd(target);
addRecovery(target);
changes.relocationStarted(startedShard, target);
return Tuple.tuple(source, target);
}
/**
* Applies the relevant logic to start an initializing shard.
*
* Moves the initializing shard to started. If the shard is a relocation target, also removes the relocation source.
*
* @return the started shard
*/
public ShardRouting startShard(Logger logger, ShardRouting initializingShard, RoutingChangesObserver routingChangesObserver) {
ensureMutable();
ShardRouting startedShard = started(initializingShard);
logger.trace("{} marked shard as started (routing: {})", initializingShard.shardId(), initializingShard);
routingChangesObserver.shardStarted(initializingShard, startedShard);
if (initializingShard.relocatingNodeId() != null) {
// relocation target has been started, remove relocation source
RoutingNode relocationSourceNode = node(initializingShard.relocatingNodeId());
ShardRouting relocationSourceShard = relocationSourceNode.getByShardId(initializingShard.shardId());
assert relocationSourceShard.isRelocationSourceOf(initializingShard);
assert relocationSourceShard.getTargetRelocatingShard() == initializingShard : "relocation target mismatch, expected: "
+ initializingShard + " but was: " + relocationSourceShard.getTargetRelocatingShard();
remove(relocationSourceShard);
routingChangesObserver.relocationCompleted(relocationSourceShard);
}
return startedShard;
}
/**
* Applies the relevant logic to handle a cancelled or failed shard.
*
* Moves the shard to unassigned or completely removes the shard (if relocation target).
*
* - If shard is a primary, this also fails initializing replicas.
* - If shard is an active primary, this also promotes an active replica to primary (if such a replica exists).
* - If shard is a relocating primary, this also removes the primary relocation target shard.
* - If shard is a relocating replica, this promotes the replica relocation target to a full initializing replica, removing the
* relocation source information. This is possible as peer recovery is always done from the primary.
* - If shard is a (primary or replica) relocation target, this also clears the relocation information on the source shard.
*
*/
public void failShard(Logger logger, ShardRouting failedShard, UnassignedInfo unassignedInfo, IndexMetaData indexMetaData,
RoutingChangesObserver routingChangesObserver) {
ensureMutable();
assert failedShard.assignedToNode() : "only assigned shards can be failed";
assert indexMetaData.getIndex().equals(failedShard.index()) :
"shard failed for unknown index (shard entry: " + failedShard + ")";
assert getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId()) == failedShard :
"shard routing to fail does not exist in routing table, expected: " + failedShard + " but was: " +
getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId());
logger.debug("{} failing shard {} with unassigned info ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary());
// if this is a primary, fail initializing replicas first (otherwise we move RoutingNodes into an inconsistent state)
if (failedShard.primary()) {
List assignedShards = assignedShards(failedShard.shardId());
if (assignedShards.isEmpty() == false) {
// copy list to prevent ConcurrentModificationException
for (ShardRouting routing : new ArrayList<>(assignedShards)) {
if (!routing.primary() && routing.initializing()) {
// re-resolve replica as earlier iteration could have changed source/target of replica relocation
ShardRouting replicaShard = getByAllocationId(routing.shardId(), routing.allocationId().getId());
assert replicaShard != null : "failed to re-resolve " + routing + " when failing replicas";
UnassignedInfo primaryFailedUnassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED,
"primary failed while replica initializing", null, 0, unassignedInfo.getUnassignedTimeInNanos(),
unassignedInfo.getUnassignedTimeInMillis(), false, AllocationStatus.NO_ATTEMPT);
failShard(logger, replicaShard, primaryFailedUnassignedInfo, indexMetaData, routingChangesObserver);
}
}
}
}
if (failedShard.relocating()) {
// find the shard that is initializing on the target node
ShardRouting targetShard = getByAllocationId(failedShard.shardId(), failedShard.allocationId().getRelocationId());
assert targetShard.isRelocationTargetOf(failedShard);
if (failedShard.primary()) {
logger.trace("{} is removed due to the failure/cancellation of the source shard", targetShard);
// cancel and remove target shard
remove(targetShard);
routingChangesObserver.shardFailed(targetShard, unassignedInfo);
} else {
logger.trace("{}, relocation source failed / cancelled, mark as initializing without relocation source", targetShard);
// promote to initializing shard without relocation source and ensure that removed relocation source
// is not added back as unassigned shard
removeRelocationSource(targetShard);
routingChangesObserver.relocationSourceRemoved(targetShard);
}
}
// fail actual shard
if (failedShard.initializing()) {
if (failedShard.relocatingNodeId() == null) {
if (failedShard.primary()) {
// promote active replica to primary if active replica exists (only the case for shadow replicas)
ShardRouting activeReplica = activeReplica(failedShard.shardId());
assert activeReplica == null || IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings()) :
"initializing primary [" + failedShard + "] with active replicas [" + activeReplica + "] only expected when " +
"using shadow replicas";
if (activeReplica == null) {
moveToUnassigned(failedShard, unassignedInfo);
} else {
movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo);
promoteReplicaToPrimary(activeReplica, indexMetaData, routingChangesObserver);
}
} else {
// initializing shard that is not relocation target, just move to unassigned
moveToUnassigned(failedShard, unassignedInfo);
}
} else {
// The shard is a target of a relocating shard. In that case we only need to remove the target shard and cancel the source
// relocation. No shard is left unassigned
logger.trace("{} is a relocation target, resolving source to cancel relocation ({})", failedShard,
unassignedInfo.shortSummary());
ShardRouting sourceShard = getByAllocationId(failedShard.shardId(),
failedShard.allocationId().getRelocationId());
assert sourceShard.isRelocationSourceOf(failedShard);
logger.trace("{}, resolved source to [{}]. canceling relocation ... ({})", failedShard.shardId(), sourceShard,
unassignedInfo.shortSummary());
cancelRelocation(sourceShard);
remove(failedShard);
}
routingChangesObserver.shardFailed(failedShard, unassignedInfo);
} else {
assert failedShard.active();
if (failedShard.primary()) {
// promote active replica to primary if active replica exists
ShardRouting activeReplica = activeReplica(failedShard.shardId());
if (activeReplica == null) {
moveToUnassigned(failedShard, unassignedInfo);
} else {
movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo);
promoteReplicaToPrimary(activeReplica, indexMetaData, routingChangesObserver);
}
} else {
assert failedShard.primary() == false;
if (failedShard.relocating()) {
remove(failedShard);
} else {
moveToUnassigned(failedShard, unassignedInfo);
}
}
routingChangesObserver.shardFailed(failedShard, unassignedInfo);
}
assert node(failedShard.currentNodeId()).getByShardId(failedShard.shardId()) == null : "failedShard " + failedShard +
" was matched but wasn't removed";
}
private void promoteReplicaToPrimary(ShardRouting activeReplica, IndexMetaData indexMetaData,
RoutingChangesObserver routingChangesObserver) {
// if the activeReplica was relocating before this call to failShard, its relocation was cancelled earlier when we
// failed initializing replica shards (and moved replica relocation source back to started)
assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica;
ShardRouting primarySwappedCandidate = promoteActiveReplicaShardToPrimary(activeReplica);
routingChangesObserver.replicaPromoted(activeReplica);
if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings())) {
ShardRouting initializedShard = reinitShadowPrimary(primarySwappedCandidate);
routingChangesObserver.startedPrimaryReinitialized(primarySwappedCandidate, initializedShard);
}
}
/**
* Mark a shard as started and adjusts internal statistics.
*
* @return the started shard
*/
private ShardRouting started(ShardRouting shard) {
assert shard.initializing() : "expected an initializing shard " + shard;
if (shard.relocatingNodeId() == null) {
// if this is not a target shard for relocation, we need to update statistics
inactiveShardCount--;
if (shard.primary()) {
inactivePrimaryCount--;
}
}
removeRecovery(shard);
ShardRouting startedShard = shard.moveToStarted();
updateAssigned(shard, startedShard);
return startedShard;
}
/**
* Cancels a relocation of a shard that shard must relocating.
*
* @return the shard after cancelling relocation
*/
private ShardRouting cancelRelocation(ShardRouting shard) {
relocatingShards--;
ShardRouting cancelledShard = shard.cancelRelocation();
updateAssigned(shard, cancelledShard);
return cancelledShard;
}
/**
* moves the assigned replica shard to primary.
*
* @param replicaShard the replica shard to be promoted to primary
* @return the resulting primary shard
*/
private ShardRouting promoteActiveReplicaShardToPrimary(ShardRouting replicaShard) {
assert replicaShard.active() : "non-active shard cannot be promoted to primary: " + replicaShard;
assert replicaShard.primary() == false : "primary shard cannot be promoted to primary: " + replicaShard;
ShardRouting primaryShard = replicaShard.moveActiveReplicaToPrimary();
updateAssigned(replicaShard, primaryShard);
return primaryShard;
}
private static final List EMPTY = Collections.emptyList();
/**
* Cancels the give shard from the Routing nodes internal statistics and cancels
* the relocation if the shard is relocating.
*/
private void remove(ShardRouting shard) {
assert shard.unassigned() == false : "only assigned shards can be removed here (" + shard + ")";
node(shard.currentNodeId()).remove(shard);
if (shard.initializing() && shard.relocatingNodeId() == null) {
inactiveShardCount--;
assert inactiveShardCount >= 0;
if (shard.primary()) {
inactivePrimaryCount--;
}
} else if (shard.relocating()) {
shard = cancelRelocation(shard);
}
assignedShardsRemove(shard);
if (shard.initializing()) {
removeRecovery(shard);
}
}
/**
* Removes relocation source of an initializing non-primary shard. This allows the replica shard to continue recovery from
* the primary even though its non-primary relocation source has failed.
*/
private ShardRouting removeRelocationSource(ShardRouting shard) {
assert shard.isRelocationTarget() : "only relocation target shards can have their relocation source removed (" + shard + ")";
ShardRouting relocationMarkerRemoved = shard.removeRelocationSource();
updateAssigned(shard, relocationMarkerRemoved);
inactiveShardCount++; // relocation targets are not counted as inactive shards whereas initializing shards are
return relocationMarkerRemoved;
}
private void assignedShardsAdd(ShardRouting shard) {
assert shard.unassigned() == false : "unassigned shard " + shard + " cannot be added to list of assigned shards";
List shards = assignedShards.computeIfAbsent(shard.shardId(), k -> new ArrayList<>());
assert assertInstanceNotInList(shard, shards) : "shard " + shard + " cannot appear twice in list of assigned shards";
shards.add(shard);
}
private boolean assertInstanceNotInList(ShardRouting shard, List shards) {
for (ShardRouting s : shards) {
assert s != shard;
}
return true;
}
private void assignedShardsRemove(ShardRouting shard) {
final List replicaSet = assignedShards.get(shard.shardId());
if (replicaSet != null) {
final Iterator iterator = replicaSet.iterator();
while(iterator.hasNext()) {
// yes we check identity here
if (shard == iterator.next()) {
iterator.remove();
return;
}
}
}
assert false : "No shard found to remove";
}
private ShardRouting reinitShadowPrimary(ShardRouting candidate) {
if (candidate.relocating()) {
cancelRelocation(candidate);
}
ShardRouting reinitializedShard = candidate.reinitializePrimaryShard();
updateAssigned(candidate, reinitializedShard);
inactivePrimaryCount++;
inactiveShardCount++;
addRecovery(reinitializedShard);
return reinitializedShard;
}
private void updateAssigned(ShardRouting oldShard, ShardRouting newShard) {
assert oldShard.shardId().equals(newShard.shardId()) :
"can only update " + oldShard + " by shard with same shard id but was " + newShard;
assert oldShard.unassigned() == false && newShard.unassigned() == false :
"only assigned shards can be updated in list of assigned shards (prev: " + oldShard + ", new: " + newShard + ")";
assert oldShard.currentNodeId().equals(newShard.currentNodeId()) : "shard to update " + oldShard +
" can only update " + oldShard + " by shard assigned to same node but was " + newShard;
node(oldShard.currentNodeId()).update(oldShard, newShard);
List shardsWithMatchingShardId = assignedShards.computeIfAbsent(oldShard.shardId(), k -> new ArrayList<>());
int previousShardIndex = shardsWithMatchingShardId.indexOf(oldShard);
assert previousShardIndex >= 0 : "shard to update " + oldShard + " does not exist in list of assigned shards";
shardsWithMatchingShardId.set(previousShardIndex, newShard);
}
private ShardRouting moveToUnassigned(ShardRouting shard, UnassignedInfo unassignedInfo) {
assert shard.unassigned() == false : "only assigned shards can be moved to unassigned (" + shard + ")";
remove(shard);
ShardRouting unassigned = shard.moveToUnassigned(unassignedInfo);
unassignedShards.add(unassigned);
return unassigned;
}
/**
* Moves assigned primary to unassigned and demotes it to a replica.
* Used in conjunction with {@link #promoteActiveReplicaShardToPrimary} when an active replica is promoted to primary.
*/
private ShardRouting movePrimaryToUnassignedAndDemoteToReplica(ShardRouting shard, UnassignedInfo unassignedInfo) {
assert shard.unassigned() == false : "only assigned shards can be moved to unassigned (" + shard + ")";
assert shard.primary() : "only primary can be demoted to replica (" + shard + ")";
remove(shard);
ShardRouting unassigned = shard.moveToUnassigned(unassignedInfo).moveUnassignedFromPrimary();
unassignedShards.add(unassigned);
return unassigned;
}
/**
* Returns the number of routing nodes
*/
public int size() {
return nodesToShards.size();
}
public static final class UnassignedShards implements Iterable {
private final RoutingNodes nodes;
private final List unassigned;
private final List ignored;
private int primaries = 0;
private int ignoredPrimaries = 0;
public UnassignedShards(RoutingNodes nodes) {
this.nodes = nodes;
unassigned = new ArrayList<>();
ignored = new ArrayList<>();
}
public void add(ShardRouting shardRouting) {
if(shardRouting.primary()) {
primaries++;
}
unassigned.add(shardRouting);
}
public void sort(Comparator comparator) {
nodes.ensureMutable();
CollectionUtil.timSort(unassigned, comparator);
}
/**
* Returns the size of the non-ignored unassigned shards
*/
public int size() { return unassigned.size(); }
/**
* Returns the size of the temporarily marked as ignored unassigned shards
*/
public int ignoredSize() { return ignored.size(); }
/**
* Returns the number of non-ignored unassigned primaries
*/
public int getNumPrimaries() {
return primaries;
}
/**
* Returns the number of temporarily marked as ignored unassigned primaries
*/
public int getNumIgnoredPrimaries() { return ignoredPrimaries; }
@Override
public UnassignedIterator iterator() {
return new UnassignedIterator();
}
/**
* The list of ignored unassigned shards (read only). The ignored unassigned shards
* are not part of the formal unassigned list, but are kept around and used to build
* back the list of unassigned shards as part of the routing table.
*/
public List ignored() {
return Collections.unmodifiableList(ignored);
}
/**
* Marks a shard as temporarily ignored and adds it to the ignore unassigned list.
* Should be used with caution, typically,
* the correct usage is to removeAndIgnore from the iterator.
* @see #ignored()
* @see UnassignedIterator#removeAndIgnore(AllocationStatus, RoutingChangesObserver)
* @see #isIgnoredEmpty()
*/
public void ignoreShard(ShardRouting shard, AllocationStatus allocationStatus, RoutingChangesObserver changes) {
nodes.ensureMutable();
if (shard.primary()) {
ignoredPrimaries++;
UnassignedInfo currInfo = shard.unassignedInfo();
assert currInfo != null;
if (allocationStatus.equals(currInfo.getLastAllocationStatus()) == false) {
UnassignedInfo newInfo = new UnassignedInfo(currInfo.getReason(), currInfo.getMessage(), currInfo.getFailure(),
currInfo.getNumFailedAllocations(), currInfo.getUnassignedTimeInNanos(),
currInfo.getUnassignedTimeInMillis(), currInfo.isDelayed(),
allocationStatus);
ShardRouting updatedShard = shard.updateUnassigned(newInfo, shard.recoverySource());
changes.unassignedInfoUpdated(shard, newInfo);
shard = updatedShard;
}
}
ignored.add(shard);
}
public class UnassignedIterator implements Iterator {
private final ListIterator iterator;
private ShardRouting current;
public UnassignedIterator() {
this.iterator = unassigned.listIterator();
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public ShardRouting next() {
return current = iterator.next();
}
/**
* Initializes the current unassigned shard and moves it from the unassigned list.
*
* @param existingAllocationId allocation id to use. If null, a fresh allocation id is generated.
*/
public ShardRouting initialize(String nodeId, @Nullable String existingAllocationId, long expectedShardSize,
RoutingChangesObserver routingChangesObserver) {
nodes.ensureMutable();
innerRemove();
return nodes.initializeShard(current, nodeId, existingAllocationId, expectedShardSize, routingChangesObserver);
}
/**
* Removes and ignores the unassigned shard (will be ignored for this run, but
* will be added back to unassigned once the metadata is constructed again).
* Typically this is used when an allocation decision prevents a shard from being allocated such
* that subsequent consumers of this API won't try to allocate this shard again.
*
* @param attempt the result of the allocation attempt
*/
public void removeAndIgnore(AllocationStatus attempt, RoutingChangesObserver changes) {
nodes.ensureMutable();
innerRemove();
ignoreShard(current, attempt, changes);
}
private void updateShardRouting(ShardRouting shardRouting) {
current = shardRouting;
iterator.set(shardRouting);
}
/**
* updates the unassigned info and recovery source on the current unassigned shard
*
* @param unassignedInfo the new unassigned info to use
* @param recoverySource the new recovery source to use
* @return the shard with unassigned info updated
*/
public ShardRouting updateUnassigned(UnassignedInfo unassignedInfo, RecoverySource recoverySource,
RoutingChangesObserver changes) {
nodes.ensureMutable();
ShardRouting updatedShardRouting = current.updateUnassigned(unassignedInfo, recoverySource);
changes.unassignedInfoUpdated(current, unassignedInfo);
updateShardRouting(updatedShardRouting);
return updatedShardRouting;
}
/**
* Unsupported operation, just there for the interface. Use {@link #removeAndIgnore(AllocationStatus, RoutingChangesObserver)} or
* {@link #initialize(String, String, long, RoutingChangesObserver)}.
*/
@Override
public void remove() {
throw new UnsupportedOperationException("remove is not supported in unassigned iterator, use removeAndIgnore or initialize");
}
private void innerRemove() {
iterator.remove();
if (current.primary()) {
primaries--;
}
}
}
/**
* Returns true
iff this collection contains one or more non-ignored unassigned shards.
*/
public boolean isEmpty() {
return unassigned.isEmpty();
}
/**
* Returns true
iff any unassigned shards are marked as temporarily ignored.
* @see UnassignedShards#ignoreShard(ShardRouting, AllocationStatus, RoutingChangesObserver)
* @see UnassignedIterator#removeAndIgnore(AllocationStatus, RoutingChangesObserver)
*/
public boolean isIgnoredEmpty() {
return ignored.isEmpty();
}
public void shuffle() {
nodes.ensureMutable();
Randomness.shuffle(unassigned);
}
/**
* Drains all unassigned shards and returns it.
* This method will not drain ignored shards.
*/
public ShardRouting[] drain() {
nodes.ensureMutable();
ShardRouting[] mutableShardRoutings = unassigned.toArray(new ShardRouting[unassigned.size()]);
unassigned.clear();
primaries = 0;
return mutableShardRoutings;
}
}
/**
* Calculates RoutingNodes statistics by iterating over all {@link ShardRouting}s
* in the cluster to ensure the book-keeping is correct.
* For performance reasons, this should only be called from asserts
*
* @return this method always returns true
or throws an assertion error. If assertion are not enabled
* this method does nothing.
*/
public static boolean assertShardStats(RoutingNodes routingNodes) {
boolean run = false;
assert (run = true); // only run if assertions are enabled!
if (!run) {
return true;
}
int unassignedPrimaryCount = 0;
int unassignedIgnoredPrimaryCount = 0;
int inactivePrimaryCount = 0;
int inactiveShardCount = 0;
int relocating = 0;
Map indicesAndShards = new HashMap<>();
for (RoutingNode node : routingNodes) {
for (ShardRouting shard : node) {
if (shard.initializing() && shard.relocatingNodeId() == null) {
inactiveShardCount++;
if (shard.primary()) {
inactivePrimaryCount++;
}
}
if (shard.relocating()) {
relocating++;
}
Integer i = indicesAndShards.get(shard.index());
if (i == null) {
i = shard.id();
}
indicesAndShards.put(shard.index(), Math.max(i, shard.id()));
}
}
// Assert that the active shard routing are identical.
Set> entries = indicesAndShards.entrySet();
final List shards = new ArrayList<>();
for (Map.Entry e : entries) {
Index index = e.getKey();
for (int i = 0; i < e.getValue(); i++) {
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
if (shardRouting.index().equals(index) && shardRouting.id() == i) {
shards.add(shardRouting);
}
}
}
List mutableShardRoutings = routingNodes.assignedShards(new ShardId(index, i));
assert mutableShardRoutings.size() == shards.size();
for (ShardRouting r : mutableShardRoutings) {
assert shards.contains(r);
shards.remove(r);
}
assert shards.isEmpty();
}
}
for (ShardRouting shard : routingNodes.unassigned()) {
if (shard.primary()) {
unassignedPrimaryCount++;
}
}
for (ShardRouting shard : routingNodes.unassigned().ignored()) {
if (shard.primary()) {
unassignedIgnoredPrimaryCount++;
}
}
for (Map.Entry recoveries : routingNodes.recoveriesPerNode.entrySet()) {
String node = recoveries.getKey();
final Recoveries value = recoveries.getValue();
int incoming = 0;
int outgoing = 0;
RoutingNode routingNode = routingNodes.nodesToShards.get(node);
if (routingNode != null) { // node might have dropped out of the cluster
for (ShardRouting routing : routingNode) {
if (routing.initializing()) {
incoming++;
}
if (routing.primary() && routing.isRelocationTarget() == false) {
for (ShardRouting assigned : routingNodes.assignedShards.get(routing.shardId())) {
if (assigned.initializing() && assigned.recoverySource().getType() == RecoverySource.Type.PEER) {
outgoing++;
}
}
}
}
}
assert incoming == value.incoming : incoming + " != " + value.incoming + " node: " + routingNode;
assert outgoing == value.outgoing : outgoing + " != " + value.outgoing + " node: " + routingNode;
}
assert unassignedPrimaryCount == routingNodes.unassignedShards.getNumPrimaries() :
"Unassigned primaries is [" + unassignedPrimaryCount + "] but RoutingNodes returned unassigned primaries [" + routingNodes.unassigned().getNumPrimaries() + "]";
assert unassignedIgnoredPrimaryCount == routingNodes.unassignedShards.getNumIgnoredPrimaries() :
"Unassigned ignored primaries is [" + unassignedIgnoredPrimaryCount + "] but RoutingNodes returned unassigned ignored primaries [" + routingNodes.unassigned().getNumIgnoredPrimaries() + "]";
assert inactivePrimaryCount == routingNodes.inactivePrimaryCount :
"Inactive Primary count [" + inactivePrimaryCount + "] but RoutingNodes returned inactive primaries [" + routingNodes.inactivePrimaryCount + "]";
assert inactiveShardCount == routingNodes.inactiveShardCount :
"Inactive Shard count [" + inactiveShardCount + "] but RoutingNodes returned inactive shards [" + routingNodes.inactiveShardCount + "]";
assert routingNodes.getRelocatingShardCount() == relocating : "Relocating shards mismatch [" + routingNodes.getRelocatingShardCount() + "] but expected [" + relocating + "]";
return true;
}
private void ensureMutable() {
if (readOnly) {
throw new IllegalStateException("can't modify RoutingNodes - readonly");
}
}
/**
* Creates an iterator over shards interleaving between nodes: The iterator returns the first shard from
* the first node, then the first shard of the second node, etc. until one shard from each node has been returned.
* The iterator then resumes on the first node by returning the second shard and continues until all shards from
* all the nodes have been returned.
*/
public Iterator nodeInterleavedShardIterator() {
final Queue> queue = new ArrayDeque<>();
for (Map.Entry entry : nodesToShards.entrySet()) {
queue.add(entry.getValue().copyShards().iterator());
}
return new Iterator() {
public boolean hasNext() {
while (!queue.isEmpty()) {
if (queue.peek().hasNext()) {
return true;
}
queue.poll();
}
return false;
}
public ShardRouting next() {
if (hasNext() == false) {
throw new NoSuchElementException();
}
Iterator iter = queue.poll();
ShardRouting result = iter.next();
queue.offer(iter);
return result;
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
private static final class Recoveries {
private static final Recoveries EMPTY = new Recoveries();
private int incoming = 0;
private int outgoing = 0;
int getTotal() {
return incoming + outgoing;
}
void addOutgoing(int howMany) {
assert outgoing + howMany >= 0 : outgoing + howMany+ " must be >= 0";
outgoing += howMany;
}
void addIncoming(int howMany) {
assert incoming + howMany >= 0 : incoming + howMany+ " must be >= 0";
incoming += howMany;
}
int getOutgoing() {
return outgoing;
}
int getIncoming() {
return incoming;
}
public static Recoveries getOrAdd(Map map, String key) {
Recoveries recoveries = map.get(key);
if (recoveries == null) {
recoveries = new Recoveries();
map.put(key, recoveries);
}
return recoveries;
}
}
}