
com.tangosol.net.ConfigurableQuorumPolicy Maven / Gradle / Ivy
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
package com.tangosol.net;
import com.tangosol.internal.util.GridComponent;
import com.tangosol.net.CacheService.CacheAction;
import com.tangosol.net.Cluster.MemberTimeoutAction;
import com.tangosol.net.ConfigurableQuorumPolicy.MembershipQuorumPolicy.QuorumRule;
import com.tangosol.net.PartitionedService.PartitionRecoveryAction;
import com.tangosol.net.PartitionedService.PartitionedAction;
import com.tangosol.net.ProxyService.ProxyAction;
import com.tangosol.net.internal.QuorumInfo;
import com.tangosol.net.management.Registry;
import com.tangosol.net.partition.PartitionSet;
import com.tangosol.persistence.CachePersistenceHelper;
import com.tangosol.persistence.GUIDHelper;
import com.tangosol.persistence.GUIDHelper.GUIDResolver;
import com.tangosol.persistence.PersistenceManagerMBean;
import com.tangosol.util.Base;
import com.tangosol.util.LongArray;
import com.tangosol.util.NullImplementation;
import com.tangosol.util.SynchronousListener;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
/**
* ConfigurableQuorumPolicy provides a Quorum-based {@link ActionPolicy} for
* services based on the cluster-configuration.
*
* @author rhl 2009.05.07
* @since Coherence 3.6
*/
public abstract class ConfigurableQuorumPolicy
extends Base
implements ActionPolicy
{
// ----- constructors --------------------------------------------------
/**
* Create a ConfigurableQuorumPolicy.
*/
protected ConfigurableQuorumPolicy()
{
}
// ----- accessors ----------------------------------------------------
/**
* Return a String that describes the current Quorum state.
*
* @return a String describing the allowed actions in the current state
*/
public abstract String getStatusDescription();
// ----- factory methods ----------------------------------------------
/**
* Instantiate an action policy for a PartitionedCache service.
*
* @param aRule the quorum rule to be used for this policy
* @param provider the address-provider for recovery addresses
*
* @return an action policy for a PartitionedCache service
*/
public static PartitionedCacheQuorumPolicy instantiatePartitionedCachePolicy(
QuorumRule[] aRule, AddressProvider provider)
{
return new PartitionedCacheQuorumPolicy(aRule, provider);
}
/**
* Instantiate an action policy for the a proxy service.
*
* @param aRule the quorum rule to be used for this policy
*
* @return an action policy for a Proxy service
*/
public static ProxyQuorumPolicy instantiateProxyPolicy(QuorumRule[] aRule)
{
return new ProxyQuorumPolicy(aRule);
}
/**
* Instantiate an action policy for the a cluster service.
*
* @param mapQuorum the map of quorum count keyed by member role.
*
* @return an action policy for the cluster
*/
public static ClusterQuorumPolicy instantiateClusterPolicy(Map mapQuorum)
{
return new ClusterQuorumPolicy(mapQuorum);
}
// ----- inner class: MembershipQuorumPolicy --------------------------
/**
* MembershipQuorumPolicy is a quorum policy that is stateless and based
* solely on service membership sizes. MembershipQuorumPolicy uses a
* state-machine that encodes the allowable Actions, and uses MemberEvents to
* maintain the state-machine as the service membership changes.
*/
public abstract static class MembershipQuorumPolicy
extends ConfigurableQuorumPolicy
{
// ----- constructors ---------------------------------------------
/**
* Create a MembershipQuorumPolicy.
*/
protected MembershipQuorumPolicy()
{
}
// ----- accessors ------------------------------------------------
/**
* Return the Service which this policy applies to.
*
* @return the Service which this policy applies to
*/
public Service getService()
{
return m_service;
}
/**
* Set the service that this policy applies to.
*
* @param service the Service that this policy applies to
*/
protected void setService(Service service)
{
m_service = service;
}
/**
* Return the current quorum rule used by the policy.
*
* @return the current quorum rule used by the policy
*/
protected QuorumRule getCurrentRule()
{
return m_ruleCurrent;
}
/**
* Set the current quorum rule used by the policy.
*
* @param ruleCurrent the current quorum rule used by the policy
*/
protected void setCurrentRule(QuorumRule ruleCurrent)
{
m_ruleCurrent = ruleCurrent;
}
/**
* Set the quorum rules used by this policy.
*
* @param aRule the quorum rules used by this policy
*/
protected void setQuorumRules(QuorumRule[] aRule)
{
m_aRules = aRule;
}
/**
* Return the quorum rules used by this policy.
*
* @return the quorum rules used by this policy
*/
protected QuorumRule[] getQuorumRules()
{
return m_aRules;
}
/**
* Return the set of members that are leaving the associated service
*
* @return the set of members that are leaving the associated service
*/
protected Set getLeavingMembers()
{
return m_setLeaving;
}
/**
* Calculate and return the current size of the member set that contributes
* to the quorum for this policy domain.
*
* @return the current size
*/
protected int getPolicyPopulation()
{
Set setMembers = getService().getInfo().getServiceMembers();
setMembers.removeAll(getLeavingMembers());
return setMembers.size();
}
// ----- internal -------------------------------------------------
/**
* Configure and initialize this policy with the specified quorum rules.
*
* @param aRule the array of quorum rules to configure for this policy
*/
protected void configure(QuorumRule[] aRule)
{
int cRules = aRule.length;
if (cRules == 0)
{
// degenerate case; configure the ALL_ALLOWED rule
setQuorumRules(new QuorumRule[] {QuorumRule.ALL_ALLOWED});
return;
}
// sort the rules in the ascending order by their threshold
Arrays.sort(aRule);
// compose the "union" rules starting with NONE_ALLOWED
// and discard redundant rules with the same threshold
List listNewRules = new ArrayList<>(cRules + 2);
QuorumRule rulePrevious = QuorumRule.NONE_ALLOWED;
QuorumRule ruleNext = rulePrevious;
for (int i = 0; i < cRules; i++)
{
ruleNext = aRule[i].union(rulePrevious);
if (ruleNext.getThreshold() > rulePrevious.getThreshold())
{
listNewRules.add(rulePrevious);
}
rulePrevious = ruleNext;
}
listNewRules.add(ruleNext);
// set the last rule to be ALL_ALLOWED
listNewRules.add(QuorumRule.ALL_ALLOWED);
setQuorumRules(listNewRules.toArray(new QuorumRule[listNewRules.size()]));
setCurrentRule(QuorumRule.NONE_ALLOWED);
}
/**
* Update the currently applicable quorum rule, possibly changing it to
* reflect growth or shrinkage of the membership size.
*/
protected void updateCurrentRule()
{
QuorumRule ruleNew = null;
int nSize = getPolicyPopulation();
QuorumRule[] aRule = getQuorumRules();
for (int i = 0, c = aRule.length; i < c; i++)
{
QuorumRule ruleCurrent = aRule[i];
if (nSize >= ruleCurrent.getThreshold())
{
ruleNew = ruleCurrent;
}
else
{
break;
}
}
if (ruleNew != getCurrentRule())
{
setCurrentRule(ruleNew);
}
}
// ----- ActionPolicy interface -----------------------------------
/**
* {@inheritDoc}
*/
public void init(Service service)
{
setService(service);
service.addMemberListener(instantiateMemberListener());
updateCurrentRule();
}
// ----- Object methods -------------------------------------------
/**
* {@inheritDoc}
*/
public String toString()
{
return "{" + getClass().getName() + " " + getStatusDescription() + "}";
}
// ----- inner class: QuorumListener ------------------------------
/**
* Instantiate a member listener to subscribe to service membership events.
*
* @return a member listener
*/
protected MemberListener instantiateMemberListener()
{
return new QuorumListener();
}
/**
* QuorumListener is used to subscribe the quorum policy to receive service
* membership events.
*/
protected class QuorumListener
implements MemberListener, SynchronousListener
{
/**
* {@inheritDoc}
*/
public void memberJoined(MemberEvent evt)
{
updateCurrentRule();
}
/**
* {@inheritDoc}
*/
public void memberLeaving(MemberEvent evt)
{
getLeavingMembers().add(evt.getMember());
updateCurrentRule();
}
/**
* {@inheritDoc}
*/
public void memberLeft(MemberEvent evt)
{
getLeavingMembers().remove(evt.getMember());
updateCurrentRule();
}
}
/**
* A quorum rule defines a set of allowable actions beyond the rule's
* threshold size.
*/
public static class QuorumRule
implements Comparable
{
/**
* Construct a state with the specified threshold and numeric
* representation.
*
* @param nRuleMask numeric representation of the state
* @param nThreshold the size threshold of the state
*/
public QuorumRule(int nRuleMask, int nThreshold)
{
setRuleMask(nRuleMask);
setThreshold(nThreshold);
}
/**
* Compare this Rule to another one based on the {@link #getThreshold()
* threshold}.
*/
@Override
public int compareTo(QuorumRule that)
{
return this.getThreshold() - that.getThreshold();
}
// ----- Object methods ---------------------------------------
@Override
public String toString()
{
return "QuorumRule {threshold=" + getThreshold() + ", rule mask=" + getRuleMask() + "}";
}
// ----- internal ---------------------------------------------
/**
* Return true if the current rule contains the specified action mask.
*
* @param nMask the action bitmask to test for
*
* @return true if the current rule contains the specified action mask
*/
protected boolean contains(int nMask)
{
return (getRuleMask() & nMask) != 0;
}
/**
* Return a quorum rule composed from this and the specified rule that
* reflects the "union" of the two rules. The union of two rules A
* and B requires a membership threshold that is
* max(A.getThreshold(), A.getThreshold()) and allows all actions
* allowed by A or B.
*
* @param rule the rule to compute the union of this with
*
* @return a quorum rule representing the union with the specified rule
*/
protected QuorumRule union(QuorumRule rule)
{
return new QuorumRule(getRuleMask() | rule.getRuleMask(),
Math.max(getThreshold(), rule.getThreshold()));
}
// ----- accessors --------------------------------------------
/**
* Return the numeric representation of the actions allowed by this rule.
*
* @return the numeric representation of this rule
*/
protected int getRuleMask()
{
return m_nRuleMask;
}
/**
* Set the numeric representation of the actions allowed by this rule.
*
* @param nRuleMask the numeric representation of this rule
*/
protected void setRuleMask(int nRuleMask)
{
m_nRuleMask = nRuleMask;
}
/**
* Return the size threshold for this rule.
*
* @return the size threshold for this rule
*/
protected int getThreshold()
{
return m_nThreshold;
}
/**
* Set the size threshold for this rule.
*
* @param nThreshold the size threshold for this rule
*/
protected void setThreshold(int nThreshold)
{
m_nThreshold = nThreshold;
}
// ----- data members -----------------------------------------
/**
* The size threshold for this state.
*/
private int m_nThreshold;
/**
* A numeric representation of the actions allowed by this state.
*/
private int m_nRuleMask;
/**
* A QuorumRule that rejects all actions.
*/
protected static final QuorumRule NONE_ALLOWED = new QuorumRule(0, 0);
/**
* A QuorumRule that permits all actions.
*/
protected static final QuorumRule ALL_ALLOWED = new QuorumRule(0xFFFFFFFF, 0);
}
// ----- data members ---------------------------------------------
/**
* The set of leaving members.
*/
protected Set m_setLeaving = new HashSet();
/**
* The Service that this policy applies to.
*/
protected Service m_service;
/**
* The current state.
*/
protected QuorumRule m_ruleCurrent;
/**
* The array of quorum rules.
*/
protected QuorumRule[] m_aRules;
}
// ----- inner class: PartitionedCacheQuorumPolicy --------------------
/**
* PartitionedCacheQuorumPolicy defines a configurable quorum policy that is
* applicable to a DistributedCacheService.
*/
public static class PartitionedCacheQuorumPolicy
extends MembershipQuorumPolicy
{
// ----- constructors ---------------------------------------------
/**
* Construct a PartitionedCacheQuorumPolicy with the specified rule.
*
* @param aRule the quorum rule
* @param provider the recovery address-provider
*/
public PartitionedCacheQuorumPolicy(QuorumRule[] aRule, AddressProvider provider)
{
configure(aRule);
// all the rules are "unions" now and sorted by the threshold;
// if the first one doesn't have the RECOVER limitations
// then none of them do
m_apRecovery = provider;
m_fDynamic = provider == null && m_aRules[0].contains(MASK_RECOVER);
}
// ----- helpers --------------------------------------------------
/**
* Return the set of ownership-enabled members in the associated
* PartitionedService.
*
* @return the set of ownership-enabled members
*/
protected Set getOwnershipMemberSet()
{
return getService().getOwnershipEnabledMembers();
}
/**
* Return the associated PartitionedService.
*
* @return the associated PartitionedService
*/
@Override
public PartitionedService getService()
{
return (PartitionedService) super.getService();
}
/**
* Check if the recovery is allowed for the current membership.
*
* If the address provider is specified, it means that all the recovery
* addresses are represented by the ownership-enabled members.
*
* For dynamic active persistence strategy it means that:
*
* - Global partition (partition 0) is recoverable.
*
- All machines, derived from the global partition, are present.
*
- All other partitions are accessible / recoverable across the service members.
*
- The number of members is larger than the threshold, which is calculated as
* a factor of the "last well-formed" member set size
*
*
* Note: the implementation *MUST NOT* mutate the action's state
* (including the resolver's)
*
* @return null if the recovery is allowed; otherwise a list of rejection reasons
*/
protected List checkRecoveryMembership(PartitionRecoveryAction action)
{
List listReasons = null;
if (m_fDynamic)
{
QuorumInfo info = action.getQuorumInfo();
GUIDResolver resolver = action.getResolver();
PartitionSet partsRecover = action.getOrphanedPartitions();
Set setLast = info == null ? null : info.getMembers();
PartitionSet partsMissing = resolver.getUnresolvedPartitions();
if (setLast == null)
{
// there is no info available; this could be due to
// a) completely fresh deployment, or on-demand persistence
// b) missing global partition(s)
// c) an old persistence version
// d) global partition being transferred while experiencing partition loss
if (partsMissing.isFull())
{
// everything is missing; must be the case (a) - allow
return null;
}
if (partsMissing.isEmpty())
{
// everything is recoverable; must be the case (c) - allow
return null;
}
if (!partsRecover.isFull() && !partsMissing.intersects(partsRecover))
{
// everything is recoverable; must be case (d) - allow
return null;
}
return addReason(listReasons, "Unreachable quorum info " + partsMissing,
"recovery of " + partsRecover + " is disallowed");
}
if (partsMissing.intersects(partsRecover))
{
partsMissing = new PartitionSet(partsMissing);
partsMissing.retain(partsRecover);
listReasons = addReason(listReasons, "Unreachable " + partsMissing,
reportLastOwnership(partsMissing, info));
}
// check the existing storage versions
PartitionSet partsStale = new PartitionSet(partsRecover);
partsStale.remove(partsMissing);
for (int iPart = partsStale.next(0); iPart >= 0;
iPart = partsStale.next(iPart + 1))
{
int nVersionLast = info.getVersions()[iPart];
if (nVersionLast > 0)
{
int nVersionPresent = (int) GUIDHelper.getVersion(
resolver.getNewestGUID(iPart));
if (nVersionLast > nVersionPresent)
{
// the existing version is stale
continue;
}
}
// the partition is recoverable
partsStale.remove(iPart);
}
if (!partsStale.isEmpty())
{
listReasons = addReason(listReasons,
"Stale storage versions for " + partsStale,
reportLastOwnership(partsStale, info));
}
// all the partitions are recoverable; make sure we have
// enough capacity based on the "last good" membership;
// (we definitely don't need more nodes than there are partitions)
int cLast = setLast.size();
int cParts = partsRecover.getPartitionCount();
int cMinimum = Math.min(calculateMinThreshold(cLast), cParts);
int cCurrent = getOwnershipMemberSet().size();
if (cCurrent < cMinimum)
{
listReasons = addReason(listReasons, "Insufficient capacity",
"the last known ownership size was " + cLast +
", need at least " + cMinimum + " nodes to recover");
}
else if (!resolver.isSharedStorage())
{
// to make sure all the machines have enough nodes to recover
// we simply calculate the number of nodes on a minimally
// loaded machine; it has to follow the same "two-thirds" rule
// the total number of nodes
// calculate the minimum node count per machine for the
// "last good" distribution
int cMinLast = calculateMinimumNodeCount(info.getMembers());
// calculate the minimum node count per machine now
int cMinCurrent = calculateMinimumNodeCount(
getService().getOwnershipEnabledMembers());
if (cMinCurrent < calculateMinThreshold(cMinLast))
{
listReasons = addReason(listReasons, "Insufficient minimum capacity",
"the last known distribution had " + cMinLast +
" nodes on the least loaded machine, current minimum is " + cMinCurrent);
}
}
}
else
{
AddressProvider provider = m_apRecovery;
if (provider != null)
{
Set setAddresses = new HashSet();
for (Iterator iter = getOwnershipMemberSet().iterator(); iter.hasNext(); )
{
Member member = (Member) iter.next();
setAddresses.add(member.getAddress().getHostAddress());
}
for (InetSocketAddress address = provider.getNextAddress();
address != null; address = provider.getNextAddress())
{
String sAddress = address.getAddress().getHostAddress();
if (!setAddresses.contains(sAddress))
{
if (m_fLogged)
{
listReasons = addReason(listReasons, "Address in is not present: " + sAddress, "");
}
else
{
listReasons = addReason(listReasons, "Address in is not present: " + sAddress,
"Persistence recovery will be deferred until a member from the missing host(s) joins the service.\n" +
"To commence recovery regardless of the missing hosts use the forceRecovery operation on the PersistenceManagerMBean.");
m_fLogged = true;
}
}
}
}
}
return listReasons;
}
/**
* Calculate the minimum number of storage enabled nodes necessary to
* proceed with the service recovery.
*
* @param cLast the "last well-formed" member set size
*
* @return the number of nodes necessary for the recovery to commence;
* the default implementation calculates it as "2/3" of the last
* "well-formed" member set size.
*/
protected int calculateMinThreshold(int cLast)
{
return cLast * 2 / 3;
}
/**
* Add a reason to the provided list (could be null).
*/
private static List addReason(
List list, String sMessage, String sData)
{
if (list == null)
{
list = new LinkedList<>();
}
list.add(new Notification(sMessage, sData));
return list;
}
/**
* Report the machine names in the "last good" membership that owned
* the specified partitions.
*
* @param parts the partitions to report
* @param info the QuorumInfo containing the "last good" membership data
*
* @return the human readable string with the machine names
*/
protected static String reportLastOwnership(PartitionSet parts, QuorumInfo info)
{
LongArray laMembers = info.getMemberArray();
int[] anOwner = info.getOwners();
Set setMembers = new HashSet<>();
for (int iPart = parts.next(0); iPart >= 0; iPart = parts.next(iPart + 1))
{
int nOwner = anOwner[iPart];
if (nOwner > 0)
{
setMembers.add(laMembers.get(nOwner));
}
}
Set setReported = new HashSet<>(); // reported machines
StringBuilder sb = new StringBuilder("last known locations:");
for (Member member : setMembers)
{
String sMachine = member.getMachineName();
if (sMachine == null)
{
sMachine = member.getAddress().toString();
}
if (setReported.add(sMachine))
{
sb.append(' ');
String sAddress = member.getAddress().toString();
if (sAddress.contains(sMachine))
{
sb.append(sAddress);
}
else
{
sb.append(sMachine).append(" at ")
.append(sAddress);
}
sb.append(',');
}
}
return setReported.isEmpty() ? "" : sb.substring(0, sb.length() - 1);
}
/**
* Given a set of Member objects, calculate the minimum number of nodes
* on a single machine.
*
* @param setMembers the member set
*
* @return the minimum number of nodes for a machine
*/
protected static int calculateMinimumNodeCount(Set setMembers)
{
// it's best to use the "machine-name" attribute to split members
// across different machines, but we have to rely on the manually
// configured or automatic generation of that attribute.
// If any of the machine-name attributes are missing, we'll revert
// to using the "address" instead.
Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy