oracle.kv.impl.sna.masterBalance.RebalanceThread Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of oracle-nosql-server Show documentation
Show all versions of oracle-nosql-server Show documentation
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.sna.masterBalance;
import java.rmi.NotBoundException;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;
import oracle.kv.impl.fault.InternalFaultException;
import oracle.kv.impl.rep.admin.RepNodeAdminAPI;
import oracle.kv.impl.sna.StorageNodeAgentAPI;
import oracle.kv.impl.sna.masterBalance.MasterBalanceManager.SNInfo;
import oracle.kv.impl.sna.masterBalance.MasterBalancingInterface.MDInfo;
import oracle.kv.impl.sna.masterBalance.MasterBalancingInterface.MasterLeaseInfo;
import oracle.kv.impl.sna.masterBalance.MasterBalancingInterface.StateInfo;
import oracle.kv.impl.sna.masterBalance.ReplicaLeaseManager.ReplicaLease;
import oracle.kv.impl.topo.Datacenter;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.topo.StorageNode;
import oracle.kv.impl.topo.StorageNodeId;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.util.PollCondition;
import oracle.kv.impl.util.RateLimitingLogger;
import oracle.kv.impl.util.registry.RegistryUtils;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.utilint.StoppableThread;
/**
* The thread that does the rebalancing when one is necessary. The thread
* reacts to changes that are posted in the stateTransitions queue and
* initiates master transfer operations in response to the state changes.
*/
class RebalanceThread extends StoppableThread {
private static final int THREAD_SOFT_SHUTDOWN_MS = 10000;
/**
* The time associated with a remote master lease. This is also the
* amount of time, associated with the master transfer request.
*/
private static final int MASTER_LEASE_MS = 5 * 60 * 1000;
/**
* The poll period used to poll for a rebalance in the absence of any
* HA state transitions by the nodes on the SN. This permits an imbalanced
* SN to check whether other SNs who can help ameliorate the local
* imbalance have come online.
*/
private static final int POLL_PERIOD_DEFAULT_MS = 1 * 60 * 1000;
private static int pollPeriodMs = POLL_PERIOD_DEFAULT_MS;
/**
* The SN that is being brought into balance, by this rebalance thread.
*/
private final SNInfo snInfo;
/**
* The set of replica RNs at the SN.
*/
private final Set activeReplicas;
/**
* The set of master RNs at the SN. Some subset of the masters may have
* replicaLeases associated with them, if they are in the midst of a master
* transfer.
*/
private final Set activeMasters;
/**
* The shared topo cache at the SN
*/
private final TopoCache topoCache;
/**
* The leases on an RN as it transitions from master to replica as part of
* a master transfer operation.
*/
private final ReplicaLeaseManager replicaLeases;
/*
* Determines if this SN has been contacted by overcommitted SNs that
* could potentially transfer masters to it.
*/
private final AtomicBoolean overloadedNeighbor = new AtomicBoolean(false);
/**
* The queue used to post state changes to the thread that attempts to
* correct a Master Density (MD) imbalance.
*/
private final BlockingQueue stateTransitions =
new ArrayBlockingQueue(100);
/**
* The master balance manager associated with this thread. The
* RebalanceThread is a component of the MasterBalanceManager.
*/
private final MasterBalanceManager manager;
private final Logger logger;
private final RateLimitingLogger limitingLogger;
/*
* Boolean variable used to indicate whether this SN will be shutting down
* its RNs or not. Used to prevent gaining master leases from this SN when
* the RNs in this SN will be shutting down.
*/
private final AtomicBoolean shutdownServices = new AtomicBoolean(false);
RebalanceThread(MasterBalanceManager manager) {
super("MasterRebalanceThread");
snInfo = manager.getSnInfo();
topoCache = manager.getTopoCache();
this.manager = manager;
activeReplicas = Collections.synchronizedSet(new HashSet());
activeMasters = Collections.synchronizedSet(new HashSet());
this.logger = manager.logger;
limitingLogger = new RateLimitingLogger<>(60 * 10 * 1000, 10, logger);
replicaLeases = new ReplicaLeaseManager(logger);
}
@Override
protected Logger getLogger() {
return logger;
}
/**
* Used to vary the poll period in unit tests.
*/
static void setPollPeriodMs(int pollPeriodMs) {
RebalanceThread.pollPeriodMs = pollPeriodMs;
}
/**
* Take note of an RN that has exited by simulating a state change to
* DETACHED
*/
void noteExit(RepNodeId rnId)
throws InterruptedException {
logger.info("Rebalance thread notes " + rnId + " exited");
noteState(new StateInfo(rnId, ReplicatedEnvironment.State.DETACHED, 0));
}
/**
* Take note of the new state placing it in the stateTransitions queue.
*/
void noteState(StateInfo stateInfo)
throws InterruptedException {
while (!stateTransitions.offer(stateInfo, 60, TimeUnit.SECONDS)) {
logger.info("State transition queue is full retrying. " +
"Capacity:" + stateTransitions.size());
}
logger.info("added:" + stateInfo);
}
/**
* Part of the thread shutdown protocol.
*/
@Override
protected int initiateSoftShutdown() {
if (!manager.shutdown.get()) {
throw new IllegalStateException("Expected manager to be shutdown");
}
/* Provoke the thread into looking at the shutdown state. */
stateTransitions.offer(new StateInfo(null, null, 0));
return RebalanceThread.THREAD_SOFT_SHUTDOWN_MS;
}
/**
* Returns true if this SN could potentially be rebalanced.
*/
private boolean needsRebalancing(boolean overload) {
if (activeMasters.size() == 0) {
return false;
}
final int leaseAdjustedMD = getLeaseAdjustedMD();
final int BMD = getBMD();
boolean needsRebalancing = (leaseAdjustedMD > BMD)/* Imbalance */;
/*
* When BMD > 0, SN is allowed to host at least a master. So, no need
* rebalance master when SN only host a master
*/
if (BMD > 0) {
needsRebalancing = needsRebalancing &&
(activeMasters.size() > 1) /* More than one master. */;
}
if (needsRebalancing) {
limitingLogger.log("Needs Rebalancing " +
topoCache.getTopology().getSequenceNumber(),
Level.INFO,
snInfo.snId + " masters: " + activeMasters +
" replica leases: " + replicaLeases.leaseCount()+
" resident RNs: " + topoCache.getRnCount() +
" needs rebalancing. lease adjusted MD: " +
leaseAdjustedMD + " > BMD:" + BMD);
return true;
}
/**
* If one of this SNs neighbors is imbalanced, try harder and seek to
* move a master off this SN, so that the imbalanced neighbor can in
* turn move a master to this SN. Note that as currently implemented
* the imbalance is only propagated to immediate neighbors as a
* simplification. In future SNs may propagate imbalance to their
* neighbors in turn, if they cannot move a master RN off the SN based
* on local considerations.
*/
final int incrementalMD = (100 / topoCache.getRnCount());
if (overload &&
((leaseAdjustedMD + incrementalMD) > BMD)) {
limitingLogger.log("Overload " +
topoCache.getTopology().getSequenceNumber(),
Level.INFO,
snInfo.snId + " masters: " + activeMasters +
" replica leases: " + replicaLeases.leaseCount()+
" resident RNs: " + topoCache.getRnCount() +
" Unbalanced neighbors initiated rebalancing. " +
"lease adjusted MD: " + leaseAdjustedMD +
" + " + incrementalMD + "> BMD:"
+ BMD);
return true;
}
return false;
}
/**
* The run thread. It reacts to state changes posted to the
* stateTransitions queue.
*/
@Override
public void run() {
logger.info("Started " + this.getName());
try {
while (true) {
/*
* Wait for RN state transitions
*/
final StateInfo stateInfo =
stateTransitions.poll(pollPeriodMs, TimeUnit.MILLISECONDS);
/* Check if "released" for shutdown. */
if (manager.shutdown.get()) {
return;
}
if (stateInfo != null) {
processStateInfo(stateInfo);
}
if (!topoCache.ensureTopology()) {
continue;
}
final boolean overload = overloadedNeighbor.getAndSet(false);
if (!needsRebalancing(overload)) {
continue;
}
final List choice = candidateTransfers(overload);
if (choice.size() > 0) {
if (transferMaster(choice) != null) {
continue;
}
}
/* needs balancing, but no master transfer. */
final String masters =
new ArrayList(activeMasters).toString();
limitingLogger.log("No RNs " +
topoCache.getTopology().getSequenceNumber(),
Level.INFO,
"No suitable RNs: " + masters +
" for master transfer.");
if ((replicaLeases.leaseCount() == 0) &&
(getRawMD() > getCurrentAffinityBMD())) {
/*
* Unbalanced, but no progress, inform our MSCN neighbors
* of this predicament in case they can help by
* vacating a master RN
*/
informNeighborsOfOverload();
}
}
} catch (InterruptedException e) {
if (manager.shutdown.get()) {
return;
}
logger.info("Lease expiration task interrupted.");
} catch (Exception e) {
logger.log(Level.SEVERE, this.getName() +
" thread exiting due to exception.", e);
/* Shutdown the manager. */
manager.shutdown();
} finally {
logger.info(this.getName() + " thread exited.");
}
}
private void processStateInfo(final StateInfo stateInfo) {
/* Process the state change. */
final RepNodeId rnId = stateInfo.rnId;
switch (stateInfo.state) {
case MASTER:
activeReplicas.remove(rnId);
activeMasters.add(rnId);
break;
case REPLICA:
activeMasters.remove(rnId);
activeReplicas.add(rnId);
replicaLeases.cancel(rnId);
break;
case UNKNOWN:
case DETACHED:
activeMasters.remove(rnId);
activeReplicas.remove(rnId);
replicaLeases.cancel(rnId);
break;
}
logger.info("sn: " + snInfo.snId +
" state transition: " + stateInfo +
" active masters:" + activeMasters +
" active replicas:" + activeReplicas.size() +
" replica leases:" + replicaLeases.leaseCount());
}
/**
* Returns true if the node is known to be the master
*/
@SuppressWarnings("unused")
private boolean isMaster(RepNodeId repNodeId) {
return activeMasters.contains(repNodeId);
}
/**
* Returns true if the node is known to be a replica
*/
boolean isReplica(RepNodeId repNodeId) {
return activeReplicas.contains(repNodeId);
}
/**
* Request a single master transfer from the ordered list of transfer
* candidates.
*/
private Transfer transferMaster(List candidateTransfers) {
final Topology topo = topoCache.getTopology();
final RegistryUtils regUtils =
new RegistryUtils(topo, manager.getLoginManager());
final StorageNode localSN = topo.get(snInfo.snId);
for (Transfer transfer : candidateTransfers) {
/* Contact the local RN and request the transfer. */
logger.info("Requesting master RN transfer : " + transfer);
StorageNodeAgentAPI sna = null;
RepNodeId sourceRNId = null;
boolean masterTransferInitiated = false;
try {
sna = regUtils.getStorageNodeAgent
(transfer.targetSN.getResourceId());
final MasterLeaseInfo masterLease =
new MasterLeaseInfo(localSN,
transfer.targetRN,
transfer.ptmd,
MASTER_LEASE_MS);
if (!sna.getMasterLease(masterLease)) {
continue;
}
sourceRNId = transfer.sourceRN.getResourceId();
final ReplicaLease replicaLease = new ReplicaLeaseManager.
ReplicaLease(sourceRNId, MASTER_LEASE_MS);
replicaLeases.getReplicaLease(replicaLease);
RepNodeAdminAPI rna = regUtils.getRepNodeAdmin(sourceRNId);
masterTransferInitiated =
rna.initiateMasterTransfer(transfer.targetRN.getResourceId(),
MASTER_LEASE_MS,
TimeUnit.MILLISECONDS);
if (masterTransferInitiated) {
return transfer;
}
} catch (RemoteException e) {
continue;
} catch (NotBoundException e) {
continue;
} catch (InternalFaultException re) {
/* An internal fault in a remote request, log and proceed. */
logger.log(Level.WARNING,
"Unexpected fault during transfer:" + transfer, re);
continue;
} finally {
if (!masterTransferInitiated) {
/* Clean up leases. */
if (sna != null) {
try {
sna.cancelMasterLease(localSN, transfer.targetRN);
} catch (RemoteException e) {
logger.info("Failed master lease cleanup: " +
e.getMessage());
} catch (InternalFaultException re) {
/*
* An internal fault in a remote request, log and
* proceed.
*/
logger.log(Level.WARNING,
"Failed master lease cleanup:" +
transfer, re);
continue;
}
}
if (sourceRNId != null) {
replicaLeases.cancel(sourceRNId);
}
}
}
}
return null;
}
/**
* Selects suitable target SNs for the master transfer, if doing so will
* reduce the imbalance across the KVS.
*
* It contacts all the SNs in Master Candidate SNs (MCSNs) to get a
* snapshot of their current post-transfer MDs (the MD after an additional
* master has been transferred to it). If this candidate SN has a lower
* post-transfer MD than the post-transfer MD (the MD with one less master)
* of the overloaded SN, the SN is selected as a potential target for the
* master transfer. In the case of a tie, the selection process may use
* other criteria, like switch load balancing, as a tie breaker.
*
* Note that if a neighbor SN is overloaded, then the MD density associated
* with this SN is boosted by one in order to consider a master transfer
* in situations where it may not have otherwise.
*
* @param overload used to indicate whether a neighbor SN is overloaded
*
* @return an ordered list of potential transfers. The transfers are
* ordered by increasing Post-Transfer Master Density (PTMD)
*/
private List candidateTransfers(boolean overload) {
final Topology topology = topoCache.getTopology();
final Set mscns = getMCSNs();
limitingLogger.log("MSCNS " + topology.getSequenceNumber(),
Level.INFO, "MSCNS:" + mscns);
final TreeMap>>
znAffinities = orderMSCNs(mscns);
final List transfers = new LinkedList();
if (znAffinities.size() == 0) {
/* None of the SNs were available. */
return transfers;
}
/* Select an RN to move */
final int loweredMD =
((activeMasters.size() +
(overload ? 1 : 0) -
replicaLeases.leaseCount() - 1) * 100) /
topoCache.getRnCount();
final boolean znAffinity = topology.getDatacenter(snInfo.snId).
getMasterAffinity();
for (Entry>>
znAffinityEntry : znAffinities.entrySet()) {
final boolean entryAffinity = znAffinityEntry.getKey();
/*
* No further candidates when the current SN has master affinity
* and the only SN candidates left don't have master affinity
*/
if (znAffinity && !entryAffinity) {
return transfers;
}
if (znAffinity == entryAffinity) {
if (loweredMD <= 0) {
/*
* No further candidates when transferring to SNs with the
* same master affinity and the transfer would move all
* masters from this SN. Note that we do allow such
* transfers when moving SNs from no master affinity to
* master affinity zones.
*/
return transfers;
}
/* Transfer to zone with the same master affinity */
final TreeMap> ptmds =
znAffinityEntry.getValue();
findCurrentAffinityCandidateTransfer(
ptmds, transfers, loweredMD, overload);
} else {
/* Transfer from non-affinity to affinity */
final TreeMap> ptmds =
znAffinityEntry.getValue();
findHigherAffinityCandidateTransfer(ptmds, transfers);
}
}
return transfers;
}
/**
* Returns the candidate transfer whose target zones has the same master
* affinity as the current one.
*/
private void findCurrentAffinityCandidateTransfer(
TreeMap> ptmds,
List transfers,
int loweredMD,
boolean overload) {
final Topology topology = topoCache.getTopology();
for (final Entry> entry :
ptmds.entrySet()) {
final int ptmd = entry.getKey();
limitingLogger.log("Lowered " + topology.getSequenceNumber() +
" " + ptmd,
Level.INFO,
"loweredMD:" + loweredMD + " ptmd:" + ptmd +
" SNS:" + entry.getValue());
/*
* If the PTMD of the target SN reaches a balanced state,
* allow a transfer to it, even if it results in a target with a
* lower MD than the one on this node. See SR 22888 for details.
*/
if ((ptmd > getCurrentAffinityBMD()) && (ptmd > loweredMD)) {
/* No further candidates. */
break;
}
if (overload && (ptmd > getCurrentAffinityBMD())) {
/*
* Don't push under neighbor overload if it will result in the
* target SN becoming overloaded.
*/
break;
}
findSNCandidateTransfers(entry.getValue(), transfers, ptmd);
}
}
/**
* Find the transfer candidates in the list of target SNs.
*/
private void findSNCandidateTransfers(List targetSNs,
List transfers,
int ptmd) {
final Topology topology = topoCache.getTopology();
for (final StorageNode targetSN : targetSNs) {
final StorageNodeId targetSnId = targetSN.getResourceId();
/* Pick a master RN to transfer. */
for (RepNodeId mRnId : activeMasters) {
if (replicaLeases.hasLease(mRnId)) {
/* A MT for this RN is already in progress. */
continue;
}
final RepNode sourceRN = topology.get(mRnId);
if (sourceRN == null) {
/*
* The SNA heard about the master state transition before
* it had a chance to update its cached topology.
*/
continue;
}
/* Pick the target for the source. */
final Collection groupNodes =
topology.get(sourceRN.getRepGroupId()).getRepNodes();
for (RepNode targetRN : groupNodes) {
if (targetRN.getStorageNodeId().equals(targetSnId)) {
final Transfer transfer =
new Transfer(sourceRN, targetRN, targetSN, ptmd);
transfers.add(transfer);
}
}
}
}
}
/**
* Returns the candidate transfers whose target zones have master
* affinity when the current one has no master affinity.
*/
private void findHigherAffinityCandidateTransfer(
TreeMap> ptmds,
List transfers) {
final Topology topology = topoCache.getTopology();
for (final Entry> entry :
ptmds.entrySet()) {
final int ptmd = entry.getKey();
limitingLogger.log("Lowered " + topology.getSequenceNumber() +
" " + ptmd,
Level.INFO,
" ptmd:" + ptmd + " SNS:" + entry.getValue());
findSNCandidateTransfers(entry.getValue(), transfers, ptmd);
}
}
/**
* A comparator that sorts Boolean values in the opposite order, with True
* first.
*/
private class TrueFirstComparator implements Comparator {
@Override
public int compare(Boolean o1, Boolean o2) {
return o2.compareTo(o1);
}
}
/**
* Returns the MSCNs ordered by their current zone master affinity/PTMDs.
* The main map is indexed by zone master affinity, with master affinity
* sorted first, and no affinity second. The map stored for each affinity
* is indexed by PTMD with lower values first. As a result, iterating
* through the nested maps will visit the most desirable candidates first.
* SNs that cannot be contacted are eliminated from this computation.
*/
private TreeMap>>
orderMSCNs(Set mscns) {
final TreeMap>>
znAffinities = new TreeMap<>(new TrueFirstComparator());
Topology topo = topoCache.getTopology();
for (StorageNode sn : mscns) {
try {
final StorageNodeAgentAPI sna =
RegistryUtils.getStorageNodeAgent(
snInfo.storename, sn, manager.getLoginManager());
MDInfo mdInfo = sna.getMDInfo();
if (mdInfo == null) {
continue;
}
final boolean znAffinity =
topo.getDatacenter(sn.getStorageNodeId()).
getMasterAffinity();
TreeMap> ptmds =
znAffinities.get(znAffinity);
if (ptmds == null) {
ptmds = new TreeMap<>();
znAffinities.put(znAffinity, ptmds);
}
final int ptmd = mdInfo.getPTMD();
List sns = ptmds.get(ptmd);
if (sns == null) {
sns = new LinkedList();
ptmds.put(ptmd, sns);
}
sns.add(sn);
} catch (RemoteException e) {
logger.info("Could not contact SN to compute PTMD: " + sn +
" reason:" + e.getMessage());
continue;
} catch (NotBoundException e) {
/* Should not happen. */
logger.warning(sn + " missing from registry.");
continue;
} catch (InternalFaultException re) {
/* An internal fault in a remote request, log and proceed. */
logger.log(Level.WARNING,
"Unexpected fault at remote SN:" + sn, re);
continue;
}
}
return znAffinities;
}
/**
* Returns the set of Master Candidate SNs (MCSNs) that are candidates for
* a MasterTransfer. That is, they host replica RNs that could trade state
* with master RNs at this SN.
*/
private Set getMCSNs() {
final Set mscns = new HashSet();
final Topology topology = topoCache.getTopology();
for (RepNodeId mRnId : activeMasters) {
final RepNode mrn = topology.get(mRnId);
if (mrn == null) {
/*
* A topology race, have heard of master but don't yet have
* an updated topology.
*/
continue;
}
final RepGroupId mRgId = mrn.getRepGroupId();
if (replicaLeases.hasLease(mRnId)) {
/* Already a master transfer candidate. */
continue;
}
for (RepNode rn : topology.get(mRgId).getRepNodes()) {
final StorageNodeId targetSNId = rn.getStorageNodeId();
if (snInfo.snId.equals(targetSNId)) {
continue;
}
final StorageNode targetSN = topology.get(targetSNId);
if (!topology.get(targetSN.getDatacenterId()).
getDatacenterType().isPrimary()) {
/* Non-primary datacenters can't host masters. */
continue;
}
mscns.add(targetSN);
}
}
return mscns;
}
/**
* Returns the raw (unadjusted for any in-flight master transfers) Master
* Density (MD).
*/
private int getRawMD() {
return (!topoCache.isInitialized() || manager.shutdown.get()) ?
Integer.MAX_VALUE :
((activeMasters.size() * 100) / topoCache.getRnCount());
}
/**
* Subtract replica leases to account for master transfers that are in
* play.
*/
private int getLeaseAdjustedMD() {
return ((activeMasters.size() - replicaLeases.leaseCount()) * 100) /
topoCache.getRnCount();
}
/**
* Returns the Balanced Master Density (BMD) for nodes on this SN, which
* represents the percentage of the total masters in a shard that should
* appear on a given SN, as a value between 0 (none) and 100 (all). If
* there are zones with master affinity, then the value only takes into
* account nodes that are in master affinity zones.
*/
int getBMD() {
Topology topo = topoCache.getTopology();
boolean hasMasterAffinity = false;
/* Find the master affinity in topology */
for (Datacenter dc : topo.getDatacenterMap().getAll()) {
if (dc.getMasterAffinity()) {
hasMasterAffinity = true;
break;
}
}
if (!hasMasterAffinity) {
return 100 / topoCache.getPrimaryRF();
}
/*
* Should be no masters if this SN's zone does not have master affinity
* and other zones do.
*/
if (!topo.getDatacenter(snInfo.snId).getMasterAffinity()) {
return 0;
}
/* Find information about zones with master affinity */
int totalRFAffinityZone = 0;
for (Datacenter dc : topo.getDatacenterMap().getAll()) {
if (dc.getMasterAffinity()) {
totalRFAffinityZone += dc.getRepFactor();
}
}
return 100 / totalRFAffinityZone;
}
/**
* Returns the balanced master density when masters should be transferred
* to the zones which has the same master affinity as the zone hosting the
* current SN.
*/
int getCurrentAffinityBMD() {
Topology topo = topoCache.getTopology();
boolean targetMasterAffinity =
topo.getDatacenter(snInfo.snId).getMasterAffinity();
int totalRFAffinityZone = 0;
for (Datacenter dc : topo.getDatacenterMap().getAll()) {
boolean masterAffinity = dc.getMasterAffinity();
if (masterAffinity == targetMasterAffinity) {
totalRFAffinityZone += dc.getRepFactor();
}
}
/*
* Calculate the BMD when all masters should be transferred to the
* zones whose affinity is equal as the current one
*/
int currentAffinityBMD = 100 / totalRFAffinityZone;
return currentAffinityBMD;
}
/**
* Returns a count of the current masters.
*/
int getMasterCount() {
return activeMasters.size();
}
/**
* Returns a count of the current replicas.
*/
int getReplicaCount() {
return activeReplicas.size();
}
/**
* Returns the masters and replica that are active at the SN
*/
Set getActiveRNs() {
final Set activeRNs = new HashSet();
activeRNs.addAll(activeMasters);
activeRNs.addAll(activeReplicas);
return activeRNs;
}
/**
* Contact all MSCNs exhorting them to try harder to shed a master,
* permitting this SN in turn to move a master over.
*/
private void informNeighborsOfOverload() {
for (StorageNode sn : getMCSNs()) {
try {
/* Should we request a push to just one SN and then exit? */
final StorageNodeAgentAPI sna =
RegistryUtils.getStorageNodeAgent(
snInfo.storename, sn, manager.getLoginManager());
sna.overloadedNeighbor(snInfo.snId);
limitingLogger.log("Push " +
topoCache.getTopology().getSequenceNumber() +
" " + sn.getStorageNodeId(),
Level.INFO,
"master rebalance push requested: " + sn);
} catch (RemoteException e) {
logger.info("Could not contact SN to request master " +
"rebalance push: " + sn + " reason:" +
e.getMessage());
continue;
} catch (NotBoundException e) {
/* Should almost never happen. */
logger.warning(sn + " missing from registry.");
continue;
} catch (InternalFaultException re) {
/* An internal fault in a remote request, log and proceed. */
logger.log(Level.WARNING,
"Unexpected fault at remote SN:" + sn, re);
continue;
}
}
}
/**
* Note unbalanced neighbor SN
*/
void overloadedNeighbor(StorageNodeId storageNodeId) {
limitingLogger.log("Neighbor " +
storageNodeId, Level.INFO,
"Master unbalanced neighbor sn:" +
storageNodeId);
overloadedNeighbor.set(true);
}
public void setShutDownServices(boolean shutdownServices) {
this.shutdownServices.set(shutdownServices);
}
public boolean willShutDownServices() {
return shutdownServices.get();
}
/**
* Initiate master transfers for RNs that have masters on the current SN in
* preparation for shutting down the SN. This method also causes the master
* balance manager to reject requests to transfer masters to this SNA.
*/
public void transferMastersForShutdown() {
logger.info("Initiating master transfers for RNs " +
"in preparation for shutting down the SN");
if (!topoCache.ensureTopology()) {
return;
}
/*
* Mark the boolean flag true indicating that this SN will be shutting
* down its RNs. On noting this flag, other SNs will not be able to
* lease the master in this SN.
*/
if (!shutdownServices.compareAndSet(false, true)) {
return;
}
Topology topology = topoCache.getTopology();
Set activeMasterSet = new HashSet();
activeMasterSet.addAll(activeMasters);
final List allMasterTransferSourceRNs =
new LinkedList();
boolean znAffinity =
topology.getDatacenter(snInfo.snId).getMasterAffinity();
/*
* Perform transfers one at a time so that each transfer can take into
* account the results of the previous one, to avoid overloading the
* target SNs.
*/
for (RepNodeId rnId : activeMasterSet) {
final int remainingMasters =
activeMasters.size() - replicaLeases.leaseCount();
if (remainingMasters <= 0) {
/*
* No more masters or all masters already master transfer
* candidates.
*/
break;
}
RepNode masterRN = topology.get(rnId);
if (masterRN == null) {
/*
* Topology race condition. The node has been heard of but
* doesn't have an updated topology yet.
*/
continue;
}
if (replicaLeases.hasLease(rnId)) {
/*
* Already a master transfer candidate.
*/
continue;
}
final List transfers = new LinkedList();
Collection groupNodes = topology.
get(masterRN.getRepGroupId()).getRepNodes();
Set targetSNs = new HashSet();
for (RepNode rn : groupNodes) {
StorageNodeId targetSNId = rn.getStorageNodeId();
if (targetSNId.equals(snInfo.snId)) {
continue;
}
StorageNode targetSN = topology.get(targetSNId);
if (!topology.get(targetSN.getDatacenterId())
.getDatacenterType().isPrimary()) {
/*
* Non PRIMARY datacenter's can't host master RNs
*/
continue;
}
targetSNs.add(targetSN);
}
if (targetSNs.isEmpty()) {
continue;
}
/*
* Calculate the PTMD for all the neighbors after each Master
* transfer to take into account the results of the previous one.
*/
TreeMap>> znAffinities =
orderMSCNs(targetSNs);
for (final Entry>>
znAffinityEntry : znAffinities.entrySet()) {
/*
* No further candidates when the current SN has master
* affinity and the only SN candidates left don't have master
* affinity
*/
if (znAffinity && !znAffinityEntry.getKey()) {
break;
}
final TreeMap> ptmds =
znAffinityEntry.getValue();
for (final Entry> entry :
ptmds.entrySet()) {
final int ptmd = entry.getKey();
/*
* Target SN has only Masters in it. Don't consider this SN
* or any of the remaining targets as transfer candidates.
*/
if (ptmd > 100) {
break;
}
for (final StorageNode targetSN : entry.getValue()) {
final StorageNodeId targetSNId =
targetSN.getStorageNodeId();
for (RepNode targetRN : groupNodes) {
if (targetRN.getStorageNodeId().equals(
targetSNId)) {
Transfer transfer = new Transfer(
masterRN, targetRN, targetSN, ptmd);
transfers.add(transfer);
}
}
}
}
/*
* All the candidate transfers listed in increasing order of
* PTMD so that the transfer is to the one with the least PTMD.
*/
if (!transfers.isEmpty()) {
Transfer masterTransfer = transferMaster(transfers);
if (masterTransfer != null) {
allMasterTransferSourceRNs
.add(masterTransfer.sourceRN.getResourceId());
logger.info("Master transfer of RN: " +
masterRN.getResourceId() + " initiated");
} else {
logger.info("Master transfer of RN: " +
masterRN.getResourceId() + " was not initiated");
}
}
}
}
/*
* Time interval for all the initiated master transfers to complete.
* We use the same time interval to wait for all the RepNodeServices
* to shutdown.
*/
final int transferWaitMillis = 120000;
waitForRNTransfer(allMasterTransferSourceRNs, transferWaitMillis);
}
/*
* Wait for all the initiated master transfers to complete.
*/
private void waitForRNTransfer(List allMasterTransferSourceRNs,
int transferWaitMillis) {
final List currentMasterTransferSourceRNs =
new LinkedList();
final List nextMasterTransferSourceRNs =
new LinkedList();
nextMasterTransferSourceRNs.addAll(allMasterTransferSourceRNs);
new PollCondition(1000, transferWaitMillis) {
@Override
protected boolean condition() {
currentMasterTransferSourceRNs.clear();
currentMasterTransferSourceRNs.
addAll(nextMasterTransferSourceRNs);
for (RepNodeId sourceRNId :
currentMasterTransferSourceRNs) {
/*
* If replicaLease does not have the sourceRNId it means
* that the master transfer has been successfully
* completed.
*/
if (!replicaLeases.hasLease(sourceRNId)) {
nextMasterTransferSourceRNs.remove(sourceRNId);
}
}
if (nextMasterTransferSourceRNs.isEmpty()) {
return true;
}
return false;
}
}.await();
for (RepNodeId rnId : allMasterTransferSourceRNs) {
if (nextMasterTransferSourceRNs.contains(rnId)) {
logger.info("Master transfer of RepNode: " + rnId +
" timed out");
} else {
logger.info("Master transfer of RepNode: " + rnId +
" completed");
}
}
}
/**
* The struct containing the RNs and the target SN involved in a master
* transfer.
*/
private static final class Transfer {
final RepNode sourceRN;
final RepNode targetRN;
final StorageNode targetSN;
final private int ptmd;
Transfer(RepNode sourceRN, RepNode targetRN,
StorageNode targetSN, int ptmd) {
super();
this.sourceRN = sourceRN;
this.targetRN = targetRN;
this.targetSN = targetSN;
this.ptmd = ptmd;
}
@Override
public String toString() {
return "";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy