![JAR search and dependency download from the Maven repository](/logo.png)
com.gemstone.gemfire.internal.cache.PRHARedundancyProvider Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gemfire-core Show documentation
Show all versions of gemfire-core Show documentation
SnappyData store based off Pivotal GemFireXD
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gemstone.gemfire.internal.cache;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import com.gemstone.gemfire.CancelException;
import com.gemstone.gemfire.SystemFailure;
import com.gemstone.gemfire.cache.CacheClosedException;
import com.gemstone.gemfire.cache.PartitionedRegionStorageException;
import com.gemstone.gemfire.cache.Region;
import com.gemstone.gemfire.cache.RegionDestroyedException;
import com.gemstone.gemfire.cache.persistence.PartitionOfflineException;
import com.gemstone.gemfire.distributed.DistributedMember;
import com.gemstone.gemfire.distributed.internal.DM;
import com.gemstone.gemfire.distributed.internal.LonerDistributionManager;
import com.gemstone.gemfire.distributed.internal.MembershipListener;
import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
import com.gemstone.gemfire.i18n.LogWriterI18n;
import com.gemstone.gemfire.i18n.StringIdImpl;
import com.gemstone.gemfire.internal.Assert;
import com.gemstone.gemfire.internal.DebugLogWriter;
import com.gemstone.gemfire.internal.LogWriterImpl;
import com.gemstone.gemfire.internal.NanoTimer;
import com.gemstone.gemfire.internal.OneTaskOnlyExecutor;
import com.gemstone.gemfire.internal.cache.PartitionedRegion.RetryTimeKeeper;
import com.gemstone.gemfire.internal.cache.PartitionedRegionDataStore.CreateBucketResult;
import com.gemstone.gemfire.internal.cache.control.InternalResourceManager;
import com.gemstone.gemfire.internal.cache.partitioned.*;
import com.gemstone.gemfire.internal.cache.partitioned.FetchPartitionDetailsMessage.FetchPartitionDetailsResponse;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBucketMessage.NodeResponse;
import com.gemstone.gemfire.internal.cache.partitioned.RegionAdvisor.PartitionProfile;
import com.gemstone.gemfire.internal.cache.partitioned.rebalance.CompositeDirector;
import com.gemstone.gemfire.internal.cache.partitioned.rebalance.FPRDirector;
import com.gemstone.gemfire.internal.cache.partitioned.rebalance.RebalanceDirector;
import com.gemstone.gemfire.internal.cache.persistence.MembershipFlushRequest;
import com.gemstone.gemfire.internal.cache.persistence.PersistentMemberID;
import com.gemstone.gemfire.internal.cache.persistence.PersistentStateListener;
import com.gemstone.gemfire.internal.i18n.LocalizedStrings;
import io.snappydata.collection.OpenHashSet;
import com.gemstone.org.jgroups.util.StringId;
/**
* This class provides the redundancy management for partitioned region. It will
* provide the following to the PartitionedRegion:
* (1) Redundancy Management at the time of bucket creation.
* (2) Redundancy management at the new node arrival.
* (3) Redundancy management when the node leaves the partitioned region
* distributed system gracefully. i.e. Cache.close()
* (4) Redundancy management at random node failure.
*
* @author tnegi, Mitch Thomas
*/
public class PRHARedundancyProvider
{
private static final boolean DISABLE_CREATE_BUCKET_RANDOMNESS
= Boolean.getBoolean("gemfire.DISABLE_CREATE_BUCKET_RANDOMNESS");
public static class ArrayListWithClearState extends ArrayList {
private static final long serialVersionUID = 1L;
private boolean wasCleared = false;
public boolean wasCleared() {
return this.wasCleared;
}
@Override
public void clear() {
super.clear();
this.wasCleared = true;
}
}
public static final String DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME =
"gemfire.partitionedRegionDatastoreDiscoveryTimeout";
static volatile Long DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS =
Long.getLong(DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME);
public final PartitionedRegion prRegion;
private static AtomicLong insufficientLogTimeStamp = new AtomicLong(0);
private final AtomicBoolean firstInsufficentStoresLogged =
new AtomicBoolean(false);
/**
* An executor to submit tasks for redundancy recovery too. It makes sure
* that there will only be one redundancy recovery task in the queue at a time.
*/
protected final OneTaskOnlyExecutor recoveryExecutor;
private volatile ScheduledFuture> recoveryFuture;
private final Object shutdownLock = new Object();
private boolean shutdown = false;
volatile CountDownLatch allBucketsRecoveredFromDisk;
/**
* Used to consolidate logging for bucket regions waiting on other
* members to come online.
*/
private RedundancyLogger redundancyLogger = null;
/**
* Constructor for PRHARedundancyProvider.
*
* @param region
* The PartitionedRegion for which the HA redundancy is required to
* be managed.
*/
public PRHARedundancyProvider(final PartitionedRegion region) {
this.prRegion = region;
final InternalResourceManager resourceManager = region.getGemFireCache()
.getResourceManager();
recoveryExecutor = new OneTaskOnlyExecutor(resourceManager.getRecoveryExecutor(),
new OneTaskOnlyExecutor.ConflatedTaskListener() {
public void taskDropped() {
InternalResourceManager.getResourceObserver().recoveryConflated(region);
}
});
}
public static final String PRLOG_PREFIX = "Partitioned Region name = ";
/**
* Display bucket allocation status
* @param prRegion the given region
* @param allStores the list of available stores. If null, unknown.
* @param alreadyUsed stores allocated; only used if allStores != null
* @param forLog true if the generated string is for a log message
* @return the description string
*/
public static String regionStatus(PartitionedRegion prRegion,
Set allStores, Collection alreadyUsed, boolean forLog) {
StringBuilder sb = new StringBuilder();
sb.append(PRLOG_PREFIX + prRegion.getFullPath());
final char newLine;
final String spaces;
if (forLog) {
newLine = ' ';
spaces = "";
} else {
newLine = '\n';
spaces = " ";
}
if (allStores != null) {
sb.append(newLine + spaces + "Redundancy level set to "
+ prRegion.getRedundantCopies());
sb.append(newLine + ". Number of available data stores: " +
allStores.size());
sb.append(newLine + spaces + ". Number successfully allocated = " +
alreadyUsed.size());
sb.append(newLine + ". Data stores: "
+ PartitionedRegionHelper.printCollection(allStores));
sb.append(newLine + ". Data stores successfully allocated: "
+ PartitionedRegionHelper.printCollection(alreadyUsed));
sb.append(newLine + ". Equivalent members: "
+ PartitionedRegionHelper.printCollection(prRegion.getDistributionManager().getMembersInThisZone()));
}
return sb.toString();
}
static public final StringId TIMEOUT_MSG
= LocalizedStrings.PRHARedundancyProvider_IF_YOUR_SYSTEM_HAS_SUFFICIENT_SPACE_PERHAPS_IT_IS_UNDER_MEMBERSHIP_OR_REGION_CREATION_STRESS;
/**
* Indicate a timeout due to excessive retries among available peers
* @param allStores all feasible stores. If null, we don't know.
* @param alreadyUsed those that have already accepted, only used if allStores != null
* @param opString description of the operation which timed out
*/
public static void timedOut(PartitionedRegion prRegion, Set allStores,
Collection alreadyUsed, String opString, long timeOut) {
final String tooManyRetries =
LocalizedStrings.PRHARedundancyProvider_TIMED_OUT_ATTEMPTING_TO_0_IN_THE_PARTITIONED_REGION__1_WAITED_FOR_2_MS.toLocalizedString(new Object[] {opString, regionStatus(prRegion, allStores, alreadyUsed, true), Long.valueOf(timeOut)}) + TIMEOUT_MSG;
throw new PartitionedRegionStorageException(tooManyRetries);
}
private Set getAllStores(String partitionName) {
if(partitionName != null){
return getFixedPartitionStores(partitionName);
}
final Set allStores = this.prRegion.getRegionAdvisor().adviseDataStore(true);
PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
if (myDS != null) {
allStores.add(this.prRegion.getDistributionManager().getId());
}
return allStores;
}
/**
* This is for FPR, for given partition, we have to return the set of
* datastores on which the given partition is defined
*
* @param partitionName
* name of the partition for which datastores need to be found out
*/
private Set getFixedPartitionStores(
String partitionName) {
Set members = this.prRegion.getRegionAdvisor()
.adviseFixedPartitionDataStores(partitionName);
List FPAs = this.prRegion
.getFixedPartitionAttributesImpl();
if (FPAs != null) {
for (FixedPartitionAttributesImpl fpa : FPAs) {
if (fpa.getPartitionName().equals(partitionName)) {
members.add(this.prRegion.getMyId());
}
}
}
return members;
}
/**
* Signature string indicating that not enough stores are
* available.
*/
static public final StringId INSUFFICIENT_STORES_MSG
= LocalizedStrings.PRHARedundancyProvider_CONSIDER_STARTING_ANOTHER_MEMBER;
/**
* Signature string indicating that there are enough stores
* available.
*/
static public final StringId SUFFICIENT_STORES_MSG
= LocalizedStrings.PRHARRedundancyProvider_FOUND_A_MEMBER_TO_HOST_A_BUCKET;
/**
* string indicating the attempt to allocate a bucket
*/
private static final StringId ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET
= LocalizedStrings.PRHARRedundancyProvider_ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET;
/**
* Indicate that we are unable to allocate sufficient stores and
* the timeout period has passed
* @param allStores stores we know about
* @param alreadyUsed ones already committed
* @param onlyLog true if only a warning log messages should be generated.
*/
private void insufficientStores(Set allStores, Collection alreadyUsed,
boolean onlyLog) {
final String regionStat = regionStatus(this.prRegion, allStores,
alreadyUsed, onlyLog);
final char newLine;
if (onlyLog) {
newLine = ' ';
} else {
newLine = '\n';
}
final StringId notEnoughValidNodes;
if(alreadyUsed.isEmpty()) {
notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_UNABLE_TO_FIND_ANY_MEMBERS_TO_HOST_A_BUCKET_IN_THE_PARTITIONED_REGION_0;
} else {
notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_CONFIGURED_REDUNDANCY_LEVEL_COULD_NOT_BE_SATISFIED_0;
}
final Object[] notEnoughValidNodesArgs = new Object[] {PRHARedundancyProvider.INSUFFICIENT_STORES_MSG, newLine + regionStat + newLine};
if (onlyLog) {
getLogger().warning(notEnoughValidNodes, notEnoughValidNodesArgs);
}
else {
throw new PartitionedRegionStorageException(notEnoughValidNodes.toLocalizedString(notEnoughValidNodesArgs));
}
}
/**
* Create a single copy of this bucket on one node. The bucket must
* already be locked.
*
* @param bucketId The bucket we are working on
* @param newBucketSize size to create it
* @param excludedMembers
* @param alreadyUsed members who already seem to have the bucket
* @param timeOut point at which to fail
* @param allStores the set of data stores to choose from
* @return the new member, null if it fails.
* @throws PartitionedRegionStorageException if there are not enough data stores
*/
private InternalDistributedMember createBucketInstance(int bucketId,
final int newBucketSize,
final Set excludedMembers,
Collection alreadyUsed,
ArrayListWithClearState failedMembers, final long timeOut,
final Set allStores) {
LogWriterI18n lw = getLogger();
// this.prRegion.getCache().getLogger().config("DEBUG createBucketInstance: "
// + " bucketId=" + this.prRegion.getBucketName(bucketId) +
// " alreadyUsed: " + alreadyUsed +
// " failedMembers: " + failedMembers);
// Recalculate list of candidates
HashSet candidateMembers = new HashSet(allStores);
candidateMembers.removeAll(alreadyUsed);
candidateMembers.removeAll(excludedMembers);
candidateMembers.removeAll(failedMembers);
if (lw.fineEnabled()) {
lw.fine("AllStores=" +
allStores);
lw.fine("AlreadyUsed=" +
alreadyUsed);
lw.fine("excluded=" +
excludedMembers);
lw.fine("failed=" +
failedMembers);
}
if (candidateMembers.size() == 0) {
this.prRegion.checkReadiness(); // fix for bug #37207
// Run out of candidates. Refetch?
if (System.currentTimeMillis() > timeOut) {
if (lw.fineEnabled()) {
lw.fine("createBucketInstance: ran out of candidates and timed out");
}
return null; // fail, let caller signal error
}
// Recalculate
candidateMembers = new HashSet(allStores);
candidateMembers.removeAll(alreadyUsed);
candidateMembers.removeAll(excludedMembers);
failedMembers.clear();
}
if (lw.fineEnabled()) {
lw.fine("createBucketInstance: candidateMembers = " + candidateMembers);
}
InternalDistributedMember candidate = null;
// If there are no candidates, early out.
if (candidateMembers.size() == 0) { // no options
if (lw.fineEnabled()) {
lw.fine("createBucketInstance: no valid candidates");
}
return null; // failure
} // no options
else {
// In case of FPR, candidateMembers is the set of members on which
// required fixed partition is defined.
if (this.prRegion.isFixedPartitionedRegion()) {
candidate = candidateMembers.iterator().next();
}
else {
String prName = this.prRegion.getAttributes().getPartitionAttributes()
.getColocatedWith();
if (prName != null) {
candidate = getColocatedDataStore(candidateMembers, alreadyUsed, bucketId, prName);
}
else {
final ArrayList orderedCandidates =
new ArrayList(candidateMembers);
candidate = getPreferredDataStore(orderedCandidates, alreadyUsed);
}
}
}
if (candidate == null) {
failedMembers.addAll(candidateMembers);
return null;
}
if(!this.prRegion.isShadowPR() && !ColocationHelper.checkMembersColocation(this.prRegion, candidate)) {
if (lw.fineEnabled()) {
lw.fine("createBucketInstances - Member does not have all of the regions colocated with "
+ prRegion + ", " + candidate);
}
failedMembers.add(candidate);
return null;
}
if (! (candidate.equals(this.prRegion.getMyId()))) { // myself
PartitionProfile pp = this.prRegion.getRegionAdvisor()
.getPartitionProfile(candidate);
if (pp == null) {
if (lw.fineEnabled()) {
lw.fine("createBucketInstance: " + this.prRegion.getFullPath()
+ ": no partition profile for " + candidate);
}
failedMembers.add(candidate);
return null;
}
} // myself
// Coordinate with any remote close occurring, causing it to wait until
// this create bucket attempt has been made.
final ManageBucketRsp response = createBucketOnMember(bucketId,
candidate, newBucketSize, failedMembers.wasCleared());
// Add targetNode to bucketNodes if successful, else to failedNodeList
if (response.isAcceptance()) {
return candidate; // success!
}
if (lw.fineEnabled()) {
lw.fine("createBucketInstance: " + this.prRegion.getFullPath()
+ ": candidate " + candidate + " declined to manage bucketId="
+ this.prRegion.bucketStringForLogs(bucketId) + ": " + response);
}
if(response.equals(ManageBucketRsp.CLOSED)) {
excludedMembers.add(candidate);
} else {
failedMembers.add(candidate);
}
candidate = null; // failure
return null;
}
final private static boolean DEBUG_LOGGING_ENABLED =
Boolean.getBoolean(PRHARedundancyProvider.class.getName() + "-logging");
public static final long INSUFFICIENT_LOGGING_THROTTLE_TIME =
TimeUnit.SECONDS.toNanos(Integer.getInteger("gemfire.InsufficientLoggingThrottleTime", 2).intValue());
public volatile static boolean TEST_MODE = false;
//since 6.6, please use the distributed system property enforce-unique-host instead.
// public static final boolean ENFORCE_UNIQUE_HOST_STORAGE_ALLOCATION = DistributionConfig.DEFAULT_ENFORCE_UNIQUE_HOST;
public LogWriterI18n getLogger() {
if (DEBUG_LOGGING_ENABLED) {
return new DebugLogWriter((LogWriterImpl)this.prRegion.getCache().getLogger(),
getClass());
} else {
return this.prRegion.getCache().getLoggerI18n();
}
}
public InternalDistributedMember createBucketOnDataStore(int bucketId,
int size, long startTime, RetryTimeKeeper snoozer) {
Set attempted = new HashSet();
InternalDistributedMember ret;
InternalDistributedMember primaryForFixedPartition = null;
if (this.prRegion.isFixedPartitionedRegion()) {
primaryForFixedPartition = this.prRegion.getRegionAdvisor()
.adviseFixedPrimaryPartitionDataStore(bucketId);
}
do {
this.prRegion.checkReadiness();
Set available = this.prRegion
.getRegionAdvisor().adviseInitializedDataStore();
// remove uninitialized members for bucket creation
this.prRegion.getCache().removeUnInitializedMembers(available);
InternalDistributedMember target = null;
available.removeAll(attempted);
for (InternalDistributedMember member : available) {
if (primaryForFixedPartition != null
&& available.contains(primaryForFixedPartition)) {
target = primaryForFixedPartition;
}
else {
target = member;
}
break;
}
if (target == null) {
if (shouldLogInsufficientStores()) {
insufficientStores(available, Collections.emptySet(), true);
}
// this will always throw an exception
insufficientStores(available, Collections.emptySet(), false);
}
try {
if(getLogger().fineEnabled()) {
getLogger().fine("Attempting to get data store " + target
+ " to create the bucket "
+ this.prRegion.bucketStringForLogs(bucketId) + " for us");
}
CreateBucketMessage.NodeResponse response = CreateBucketMessage.send(target, this.prRegion, bucketId, size);
ret = response.waitForResponse();
if(ret != null) {
return ret;
}
} catch(ForceReattemptException e) {
//do nothing, we will already check again for a primary.
}
attempted.add(target);
}
while((ret = this.prRegion.getNodeForBucketWrite(bucketId, snoozer)) == null);
return ret;
}
/**
* Creates bucket atomically by creating all the copies to satisfy redundancy. In case all
* copies can not be created, a PartitionedRegionStorageException is thrown to
* the user and BucketBackupMessage is sent to the nodes to make copies of a bucket
* that was only partially created. Other VMs are informed
* of bucket creation through updates through their {@link BucketAdvisor.BucketProfile}s.
*
*
* This method is synchronized to enforce a single threaded ordering, allowing
* for a more accurate picture of bucket distribution in the face of concurrency.
* See bug 37275.
*
*
* This method is now slightly misnamed. Another member could be in the process
* of creating this same bucket at the same time.
*
* @param bucketId
* Id of the bucket to be created.
* @param newBucketSize
* size of the first entry.
* @param startTime a time stamp prior to calling the method, used to update bucket creation stats
* @return the primary member for the newly created bucket
* @throws PartitionedRegionStorageException
* if required # of buckets can not be created to satisfy
* redundancy.
* @throws PartitionedRegionException
* if d-lock can not be acquired to create bucket.
*
*/
public InternalDistributedMember
createBucketAtomically(final int bucketId,
final int newBucketSize,
final long startTime,
final boolean finishIncompleteCreation, String partitionName) throws PartitionedRegionStorageException,
PartitionedRegionException
{
// If there are insufficient stores throw *before* we try acquiring the
// (very expensive) bucket lock or the (somewhat expensive) monitor on this
earlySufficientStoresCheck(partitionName);
if (this.prRegion.getCache().isCacheAtShutdownAll()) {
throw new CacheClosedException("Cache is shutting down");
}
final LogWriterI18n log = getLogger();
if (log.fineEnabled()) {
log.fine("Starting atomic creation of bucketId=" +
this.prRegion.bucketStringForLogs(bucketId));
}
Collection acceptedMembers = new ArrayList(); // ArrayList
OpenHashSet excludedMembers = new OpenHashSet<>();
ArrayListWithClearState failedMembers = new ArrayListWithClearState();
final long timeOut = System.currentTimeMillis() + computeTimeout();
BucketMembershipObserver observer = null;
boolean needToElectPrimary = true;
InternalDistributedMember bucketPrimary = null;
try {
this.prRegion.checkReadiness();
Bucket toCreate = this.prRegion.getRegionAdvisor().getBucket(bucketId);
final ReentrantLock redundancyLock = toCreate.getBucketAdvisor().redundancyLock;
redundancyLock.lock();
try {
if(!finishIncompleteCreation) {
bucketPrimary =
this.prRegion.getBucketPrimary(bucketId);
if (bucketPrimary != null) {
if(log.fineEnabled()) {
log.fine("during atomic creation, discovered that the primary already exists "
+ bucketPrimary + " returning early");
}
needToElectPrimary = false;
return bucketPrimary;
}
}
observer = new BucketMembershipObserver(toCreate).beginMonitoring();
boolean loggedInsufficentStores = false; // track if insufficient data stores have been detected
final LogWriterI18n logger = getLogger();
for (;;) {
this.prRegion.checkReadiness();
if (this.prRegion.getCache().isCacheAtShutdownAll()) {
log.info(LocalizedStrings.DEBUG, "Aborted createBucketAtomically due to ShutdownAll");
throw new CacheClosedException("Cache is shutting down");
}
// this.prRegion.getCache().getLogger().config(
// "DEBUG createBucketAtomically: "
// + " bucketId=" + this.prRegion.getBucketName(bucketId) +
// " accepted: " + acceptedMembers +
// " failed: " + failedMembers);
long timeLeft = timeOut - System.currentTimeMillis();
if (timeLeft < 0) {
// It took too long.
timedOut(this.prRegion, getAllStores(partitionName),
acceptedMembers, ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET
.toLocalizedString(), computeTimeout());
// NOTREACHED
}
if (logger.fineEnabled()) {
logger.fine(
"createBucketAtomically: have " + timeLeft
+ " ms left to finish this");
}
// Always go back to the advisor, see if any fresh data stores are
// present.
Set allStores = getAllStores(partitionName);
// remove nodes that are not fully initialized
this.prRegion.getCache().removeUnInitializedMembers(allStores);
loggedInsufficentStores = checkSufficientStores(allStores,
loggedInsufficentStores);
InternalDistributedMember candidate = createBucketInstance(bucketId,
newBucketSize, excludedMembers, acceptedMembers, failedMembers, timeOut, allStores);
if (candidate != null) {
if (this.prRegion.getDistributionManager().enforceUniqueZone()) {
//enforceUniqueZone property has no effect for a loner. Fix for defect #47181
if (!(this.prRegion.getDistributionManager() instanceof LonerDistributionManager)) {
Set exm = getBuddyMembersInZone(candidate, allStores);
exm.remove(candidate);
exm.removeAll(acceptedMembers);
excludedMembers.addAll(exm);
} else {
//log a warning if Loner
if(logger.warningEnabled()) {
logger.warning(LocalizedStrings.GemFireCache_ENFORCE_UNIQUE_HOST_NOT_APPLICABLE_FOR_LONER);
}
}
}
}
// Get an updated list of bucket owners, which should include
// buckets created concurrently with this createBucketAtomically call
acceptedMembers = prRegion.getRegionAdvisor().getBucketOwners(bucketId);
if (logger.fineEnabled()) {
logger.fine("Accepted members: " + acceptedMembers);
}
// [sumedh] set the primary as the candidate in the first iteration if
// the candidate has accepted
if (bucketPrimary == null && acceptedMembers.contains(candidate)) {
bucketPrimary = candidate;
}
// prune out the stores that have left
verifyBucketNodes(excludedMembers, partitionName);
//Note - we used to wait for the created bucket to become primary here
//if this is a colocated region. We no longer need to do that, because
//the EndBucketMessage is sent out after bucket creation completes to
//select the primary.
// Have we exhausted all candidates?
final int potentialCandidateCount = (allStores.size() - (excludedMembers
.size() + acceptedMembers.size() + failedMembers.size()));
// Determining exhausted members competes with bucket balancing; it's
// important to re-visit all failed members since "failed" set may
// contain datastores which at the moment are imbalanced, but yet could
// be candidates. If the failed members list is empty, its expected
// that the next iteration clears the (already empty) list.
final boolean exhaustedPotentialCandidates = failedMembers.wasCleared() && potentialCandidateCount <= 0;
final boolean redundancySatisfied = acceptedMembers.size() > this.prRegion.getRedundantCopies();
final boolean bucketNotCreated = acceptedMembers.size() == 0;
if (logger.fineEnabled()) {
logger.fine("potentialCandidateCount=" + potentialCandidateCount
+ ", exhaustedPotentialCandidates="
+ exhaustedPotentialCandidates + ", redundancySatisfied="
+ redundancySatisfied + ", bucketNotCreated="
+ bucketNotCreated);
}
if (bucketNotCreated) {
// if we haven't managed to create the bucket on any nodes, retry.
continue;
}
if (exhaustedPotentialCandidates && ! redundancySatisfied) {
insufficientStores(allStores, acceptedMembers, true);
}
// Allow the thread to potentially finish bucket creation even if redundancy was not met.
// Fix for bug 39283
if (redundancySatisfied || exhaustedPotentialCandidates) {
//Tell one of the members to become primary.
//The rest of the members will be allowed to
//volunteer for primary.
endBucketCreation(bucketId, acceptedMembers, bucketPrimary, partitionName);
final int expectedRemoteHosts = acceptedMembers.size()
- (acceptedMembers.contains(this.prRegion.getMyId()) ? 1: 0);
boolean interrupted = Thread.interrupted();
try {
BucketMembershipObserverResults results =
observer.waitForOwnersGetPrimary(expectedRemoteHosts,
acceptedMembers, partitionName);
if (results.problematicDeparture) {
// BZZZT! Member left. Start over.
continue;
}
bucketPrimary = results.primary;
}
catch (InterruptedException e) {
interrupted = true;
this.prRegion.getCancelCriterion().checkCancelInProgress(e);
}
finally {
if (interrupted) {
Thread.currentThread().interrupt();
}
}
needToElectPrimary = false;
return bucketPrimary;
} // almost done
} // for
} finally {
redundancyLock.unlock();
}
}
catch (CancelException e) {
//Fix for 43544 - We don't need to elect a primary
//if the cache was closed. The other members will
//take care of it. This ensures we don't compromise
//redundancy.
needToElectPrimary = false;
// log.warning(
// "PRHARedundancyProvider:createBucketAtomically():Got Cache closed
// exception while creating new bucket.",
// e);
throw e;
}
catch (RegionDestroyedException e) {
//Fix for 43544 - We don't need to elect a primary
//if the region was destroyed. The other members will
//take care of it. This ensures we don't compromise
//redundancy.
needToElectPrimary = false;
// log.warning(
// "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed
// exception while creating new bucket.",
// e);
throw e;
}
catch (PartitionOfflineException e) {
// log.warning(
// "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed
// exception while creating new bucket.",
// e);
throw e;
}
catch (RuntimeException e) {
if(log.fineEnabled()) {
log.fine("Unable to create new bucket " + bucketId + " :"
+ e.getMessage());
}
//If we're finishing an incomplete bucket creation, don't blast out
//another message to peers to do so.
//TODO - should we ignore a PartitionRegionStorageException, rather
//than reattempting on other nodes?
if(!finishIncompleteCreation) {
cleanUpBucket(bucketId);
}
throw e;
} finally {
if (observer != null) {
observer.stopMonitoring();
}
//Try to make sure everyone that created the bucket can volunteer for primary
if(needToElectPrimary) {
try {
endBucketCreation(bucketId, prRegion.getRegionAdvisor()
.getBucketOwners(bucketId), bucketPrimary, partitionName);
} catch (Exception e) {
// if region is going down, then no warning level logs
if (e instanceof CancelException || e instanceof CacheClosedException
|| (prRegion.getCancelCriterion().cancelInProgress() != null)) {
getLogger().fine("Exception trying choose a primary after "
+ "bucket creation failure", e);
}
else {
getLogger().warning(StringIdImpl.LITERAL, "Exception trying choose a "
+ "primary after bucket creation failure", e);
}
}
}
}
}
/**
* Figure out which member should be primary for a bucket
* among the members that have created the bucket, and tell
* that member to become the primary.
* @param acceptedMembers The members that now host the bucket
*/
private void endBucketCreation(int bucketId,
Collection acceptedMembers,
InternalDistributedMember targetPrimary, String partitionName) {
if(acceptedMembers.isEmpty()) {
return;
}
acceptedMembers = new HashSet(acceptedMembers);
//TODO prpersist - we need to factor out a method that just chooses
//the primary. But this will do the trick for the moment.
// This is for FPR, for a given bucket id , make sure that for given bucket
// id , only the datastore on which primary partition is defined for this
// bucket becomes the primary. If primary partition is not available then
// secondary partition will become primary
if (partitionName != null) {
if (isLocalPrimary(partitionName)) {
targetPrimary = this.prRegion.getMyId();
}
else {
targetPrimary = this.prRegion.getRegionAdvisor()
.adviseFixedPrimaryPartitionDataStore(bucketId);
if (targetPrimary == null) {
Set fpDataStores = getFixedPartitionStores(partitionName);
targetPrimary = fpDataStores.iterator().next();
}
}
}
if (targetPrimary == null) {
// [sumedh] we need to select the same primary as chosen earlier (e.g.
// the parent's in case of colocation) so it is now passed
//InternalDistributedMember targetPrimary = getPreferredDataStore(
// acceptedMembers, Collections. emptySet());
this.prRegion.getCache().removeUnInitializedMembers(acceptedMembers);
if (acceptedMembers.isEmpty()) {
return;
}
targetPrimary = getPreferredDataStore(acceptedMembers,
Collections. emptySet());
}
boolean isHosting = acceptedMembers.remove(prRegion
.getDistributionManager().getId());
EndBucketCreationMessage.send(acceptedMembers, targetPrimary,
this.prRegion, bucketId);
// Observer for testing purpose
final EndBucketCreationObserver observer = testEndObserverInstance;
if (observer != null) {
observer.afterEndBucketCreationMessageSend(this.prRegion, bucketId);
}
if (isHosting) {
endBucketCreationLocally(bucketId, targetPrimary);
}
if (observer != null) {
observer.afterEndBucketCreation(this.prRegion, bucketId);
}
}
private boolean isLocalPrimary(String partitionName) {
List FPAs = this.prRegion
.getFixedPartitionAttributesImpl();
if (FPAs != null) {
for (FixedPartitionAttributesImpl fpa : FPAs) {
if (fpa.getPartitionName().equals(partitionName) && fpa.isPrimary()) {
return true;
}
}
}
return false;
}
private static volatile EndBucketCreationObserver testEndObserverInstance;
// Observer for testing purpose
public static void setTestEndBucketCreationObserver(
EndBucketCreationObserver observer) {
testEndObserverInstance = observer;
}
/**
* Test observer to help reproduce #42429.
*/
public static interface EndBucketCreationObserver {
public void afterEndBucketCreationMessageSend(PartitionedRegion pr,
int bucketId);
public void afterEndBucketCreation(PartitionedRegion pr, int bucketId);
}
public void endBucketCreationLocally(int bucketId,
InternalDistributedMember newPrimary) {
//Don't elect ourselves as primary or tell others to persist our ID if this member
//has been destroyed.
if (prRegion.getCancelCriterion().cancelInProgress() != null
|| prRegion.isDestroyed()) {
return;
}
final BucketAdvisor bucketAdvisor = this.prRegion.getRegionAdvisor()
.getBucketAdvisor(bucketId);
final ProxyBucketRegion proxyBucketRegion = bucketAdvisor
.getProxyBucketRegion();
final LogWriterI18n logger = this.getLogger();
if (logger.fineEnabled()) {
logger.fine("endBucketCreationLocally: for region "
+ this.prRegion.getFullPath() + " bucketId=" + bucketId
+ " bucketAdvisor=" + bucketAdvisor + ", new primary: " + newPrimary);
}
BucketPersistenceAdvisor persistentAdvisor = proxyBucketRegion
.getPersistenceAdvisor();
//prevent multiple threads from ending bucket creation at the same time.
//This fixes an issue with 41336, where multiple threads were calling endBucketCreation
//on the persistent advisor and marking a bucket as initialized twice.
synchronized(proxyBucketRegion) {
if(persistentAdvisor != null) {
BucketRegion realBucket = proxyBucketRegion.getCreatedBucketRegion();
if(realBucket != null) {
PersistentMemberID persistentID = realBucket.getPersistentID();
persistentAdvisor.endBucketCreation(persistentID);
}
}
//We've received an endBucketCreationMessage, but the primary
//may not have. So now we wait for the chosen member to become
//primary.
bucketAdvisor.setPrimaryElector(newPrimary);
if(prRegion.getGemFireCache().getMyId().equals(newPrimary)) {
//If we're the choosen primary, volunteer for primary now
if (bucketAdvisor.isHosting()) {
bucketAdvisor.clearPrimaryElector();
bucketAdvisor.volunteerForPrimary();
}
} else {
//It's possible the chosen primary has already left. In
//that case, volunteer for primary now.
if(!bucketAdvisor.adviseInitialized().contains(newPrimary)) {
bucketAdvisor.clearPrimaryElector();
bucketAdvisor.volunteerForPrimary();
}
//If the bucket has had a primary, that means the
//chosen bucket was primary for a while. Go ahead and
//clear the primary elector field.
if(bucketAdvisor.getHadPrimary()) {
bucketAdvisor.clearPrimaryElector();
bucketAdvisor.volunteerForPrimary();
}
}
}
//send out a profile update to indicate the persistence is initialized, if needed.
if(persistentAdvisor != null) {
bucketAdvisor.endBucketCreation();
}
List colocatedWithList = ColocationHelper.getColocatedChildRegions(prRegion);
for(PartitionedRegion child : colocatedWithList) {
if(child.getRegionAdvisor().isBucketLocal(bucketId)) {
child.getRedundancyProvider().endBucketCreationLocally(bucketId, newPrimary);
}
}
}
/**
* Get buddy data stores on the same Host as the accepted member
* @return set of members on the same host, not including accepted member
* @since gemfire59poc
*
*/
private Set getBuddyMembersInZone(
final InternalDistributedMember acceptedMember,
final Set allStores)
{
HashSet allMembersOnSystem = new HashSet();
DM dm = this.prRegion.getDistributionManager();
Set buddies = dm.getMembersInSameZone(acceptedMember);
//TODO Dan - I'm not sure this retain all is necessary, but there may have been a reason we were
//passing this set in before.
buddies.retainAll(allStores);
return buddies;
}
/**
* Early check for resources. This code may be executed for every put operation if
* there are no datastores present, limit excessive logging.
* @since gemfire5.8
*/
private void earlySufficientStoresCheck(String partitionName) {
assert Assert.assertHoldsLock(this,false);
Set currentStores = getAllStores(partitionName);
if (currentStores.isEmpty()) {
if (shouldLogInsufficientStores()) {
insufficientStores(currentStores, Collections.EMPTY_LIST, true);
}
insufficientStores(currentStores, Collections.EMPTY_LIST, false);
}
}
/**
* Limit the frequency for logging the {@link #INSUFFICIENT_STORES_MSG} message
* to once per PR after which once every {@link #INSUFFICIENT_LOGGING_THROTTLE_TIME}
* second
* @return true if it's time to log
* @since gemfire5.8
*/
private boolean shouldLogInsufficientStores() {
long now = NanoTimer.getTime();
long delta = now - insufficientLogTimeStamp.get();
if (this.firstInsufficentStoresLogged.compareAndSet(false, true) ||
delta >= INSUFFICIENT_LOGGING_THROTTLE_TIME) {
insufficientLogTimeStamp.set(now);
return true;
} else {
return false;
}
}
/**
* Compute timeout for waiting for a bucket. Prefer {@link #DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS}
* over {@link PartitionedRegion#getRetryTimeout()}
* @return the milliseconds to wait for a bucket creation operation
*/
private long computeTimeout() {
if (DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS != null) {
long millis = DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS.longValue();
if (millis > 0) { // only positive values allowed
return millis;
}
}
return this.prRegion.getRetryTimeout();
}
/**
* Check to determine that there are enough datastore VMs to start the bucket
* creation processes. Log a warning or throw an exception indicating
* when there are not enough datastore VMs.
* @param allStores All known data store instances (including local)
* @param loggedInsufficentStores indicates whether a warning has been logged
* @return true when a warning has been logged, false if a warning should be logged.
*/
private boolean checkSufficientStores(final Set allStores,
final boolean loggedInsufficentStores) {
// Report (only once) if insufficient data store have been detected.
if (! loggedInsufficentStores) {
if (allStores.size() == 0) {
insufficientStores(allStores, Collections.EMPTY_LIST, true);
return true;
}
} else {
if (allStores.size() > 0) {
// Excellent, sufficient resources were found!
final StringId logStr =
LocalizedStrings.PRHARRedundancyProvider_0_IN_THE_PARTITIONED_REGION_REGION_NAME_1;
final Object[] logArgs = new Object[] {SUFFICIENT_STORES_MSG.toLocalizedString(), prRegion.getFullPath()};
if (TEST_MODE) {
getLogger().severe(logStr, logArgs);
} else {
getLogger().info(logStr, logArgs);
}
return false;
} else {
// Already logged warning, there are no datastores
insufficientStores(allStores, Collections.EMPTY_LIST, false);
// UNREACHABLE
}
}
return loggedInsufficentStores;
}
/**
* Clean up locally created bucket and tell other VMs to
* attempt recovering redundancy
* @param buck the bucket identifier
*/
private void cleanUpBucket(int buck)
{
Set dataStores = this.prRegion.getRegionAdvisor().adviseDataStore();
BucketBackupMessage.send(dataStores, this.prRegion, buck);
}
public void finishIncompleteBucketCreation(int bucketId) {
String partitionName = null;
final long startTime = PartitionedRegionStats.startTime();
if (this.prRegion.isFixedPartitionedRegion()) {
FixedPartitionAttributesImpl fpa = PartitionedRegionHelper
.getFixedPartitionAttributesForBucket(this.prRegion, bucketId);
partitionName = fpa.getPartitionName();
}
createBucketAtomically(bucketId, 0, startTime, true, partitionName);
}
/**
* Creates bucket with ID bucketId on targetNode. This method
* will also create the bucket for all of the child colocated PRs.
*
* @param bucketId
* @param targetNMember
* @param isRebalance true if bucket creation is directed by rebalancing
* @param replaceOfflineData
* @return true if the bucket was sucessfully created
*/
public boolean createBackupBucketOnMember(final int bucketId,
final InternalDistributedMember targetNMember, final boolean isRebalance,
boolean replaceOfflineData, InternalDistributedMember moveSource,
boolean forceCreation) {
if (getLogger().fineEnabled()) {
getLogger().fine("createBackupBucketOnMember for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " member: " + targetNMember);
}
if (! (targetNMember.equals(this.prRegion.getMyId()))) {
// final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock;
PartitionProfile pp = this.prRegion.getRegionAdvisor()
.getPartitionProfile(targetNMember);
if (pp != null) {
// isClosingReadLock = pp.getIsClosingReadLock(
// this.prRegion.getCancelCriterion());
} else {
return false;
}
try {
ManageBackupBucketMessage.NodeResponse response = ManageBackupBucketMessage
.send(targetNMember, this.prRegion, bucketId, isRebalance,
replaceOfflineData, moveSource, forceCreation);
if (response.waitForAcceptance()) {
getLogger().fine("createBackupBucketOnMember: "
+ "Bucket creation succeed for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " on member = " + targetNMember);
return true;
}
else {
getLogger().fine("createBackupBucketOnMember: "
+ "Bucket creation failed for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " on member = " + targetNMember);
return false;
}
}
catch (Throwable e) {
Error err;
if (e instanceof Error && SystemFailure.isJVMFailureError(
err = (Error)e)) {
SystemFailure.initiateFailure(err);
// If this ever returns, rethrow the error. We're poisoned
// now, so don't let this thread continue.
throw err;
}
// Whenever you catch Error or Throwable, you must also
// check for fatal JVM error (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
if (e instanceof ForceReattemptException) {
// no log needed see bug 37569
}
else if (e instanceof CancelException
|| (e.getCause() != null
&& (e.getCause() instanceof CancelException))) {
// no need to log exceptions caused by cache closure
}
else {
LogWriterI18n log = getLogger();
if (log.warningEnabled()) {
log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e);
}
}
return false;
}
}
else {
final PartitionedRegionDataStore prDS = this.prRegion.getDataStore();
boolean bucketManaged = prDS!=null &&
prDS.grabBucket(bucketId, moveSource, forceCreation,
replaceOfflineData, isRebalance, null, false).equals(
CreateBucketResult.CREATED);
if (! bucketManaged) {
getLogger().fine("createBackupBucketOnMember:"
+ " Local data store refused to accommodate the data for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId) + " prDS=" + prDS);
}
return bucketManaged;
}
}
private static final ThreadLocal forceLocalPrimaries = new ThreadLocal();
public static void setForceLocalPrimaries(boolean v) {
forceLocalPrimaries.set(Boolean.valueOf(v));
}
private boolean getForceLocalPrimaries() {
boolean result = false;
Boolean v = (Boolean)forceLocalPrimaries.get();
if (v != null) {
result = v.booleanValue();
}
return result;
}
/**
* Creates bucket with ID bucketId on targetNode.
*
* @param bucketId
* @param targetNMember
* @param newBucketSize
* @param forceCreation inform the targetMember it must attempt host the bucket,
* appropriately ignoring it's maximums
* @return a response object
*/
public ManageBucketRsp createBucketOnMember(final int bucketId,
final InternalDistributedMember targetNMember,
final int newBucketSize, boolean forceCreation)
{
if (getLogger().fineEnabled()) {
getLogger().fine("createBucketOnMember for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " member: " + targetNMember + (forceCreation ? " forced" : ""));
}
if (! (targetNMember.equals(this.prRegion.getMyId()))) {
// final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock;
PartitionProfile pp = this.prRegion.getRegionAdvisor()
.getPartitionProfile(targetNMember);
if (pp != null) {
// isClosingReadLock = pp.getIsClosingReadLock(
// this.prRegion.getCancelCriterion());
} else {
return ManageBucketRsp.NO;
}
try {
// isClosingReadLock.lock(); // Grab the read lock, preventing any region closures
// on this remote Node until this bucket is fully published, forcing the closing
// Node to recognize any pre-natal buckets.
NodeResponse response = ManageBucketMessage.send(targetNMember,
this.prRegion, bucketId, newBucketSize, forceCreation);
if (response.waitForAcceptance()) {
getLogger().fine("createBucketOnMember: "
+ "Bucket creation succeed for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " on member = " + targetNMember);
// lockList.add(isClosingReadLock);
return ManageBucketRsp.YES;
}
else {
getLogger().fine("createBucketOnMember: "
+ "Bucket creation failed for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId)
+ " on member = " + targetNMember);
// isClosingReadLock.unlock();
return response.rejectedDueToInitialization()
? ManageBucketRsp.NO_INITIALIZING : ManageBucketRsp.NO;
}
} catch(PartitionOfflineException e) {
throw e;
}
catch (Throwable e) {
Error err;
if (e instanceof Error && SystemFailure.isJVMFailureError(
err = (Error)e)) {
SystemFailure.initiateFailure(err);
// If this ever returns, rethrow the error. We're poisoned
// now, so don't let this thread continue.
throw err;
}
// Whenever you catch Error or Throwable, you must also
// check for fatal JVM error (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
if (e instanceof CancelException
|| (e.getCause() != null
&& (e.getCause() instanceof CancelException))) {
// no need to log exceptions caused by cache closure
return ManageBucketRsp.CLOSED;
} else if (e instanceof ForceReattemptException) {
// no log needed see bug 37569
}
else {
LogWriterI18n log = getLogger();
if (log.warningEnabled()) {
log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e);
}
}
// isClosingReadLock.unlock();
return ManageBucketRsp.NO;
}
}
else {
final PartitionedRegionDataStore prDS = this.prRegion.getDataStore();
boolean bucketManaged = prDS!=null &&
prDS.handleManageBucketRequest(bucketId, newBucketSize,
this.prRegion.getMyId(), forceCreation);
if (! bucketManaged) {
getLogger().fine("createBucketOnMember:"
+ " Local data store not able to accommodate the data for bucketId="
+ this.prRegion.bucketStringForLogs(bucketId));
}
return ManageBucketRsp.valueOf(bucketManaged);
}
}
/**
* Select the member with which is hosting the same bucketid for the PR it is
* colocated with In case of primary it returns the same node whereas in case
* of secondary it will return the least loaded datastore which is hosting the
* bucketid.
*
* @param alreadyUsed
* @param bucketId
* @param prName
* @return InternalDistributedMember colocated data store
* @since 5.8Beta
*/
private InternalDistributedMember getColocatedDataStore(
Collection candidates,
Collection alreadyUsed, int bucketId,
String prName) {
Assert.assertTrue(prName != null); // precondition1
PartitionedRegion colocatedRegion = ColocationHelper.getColocatedRegion(this.prRegion);
Region prRoot = PartitionedRegionHelper.getPRRoot(prRegion
.getCache());
PartitionRegionConfig config = (PartitionRegionConfig)prRoot.get(prRegion
.getRegionIdentifier());
if (!config.isColocationComplete()) {
throw new IllegalStateException(
"Cannot create buckets, as colocated regions are not "
+ "configured to be at the same nodes.");
}
RegionAdvisor advisor = colocatedRegion.getRegionAdvisor();
if (alreadyUsed.isEmpty()) {
InternalDistributedMember primary = advisor.getPrimaryMemberForBucket(bucketId);
if (!candidates.contains(primary)) {
return null;
}
return primary;
}
Set bucketOwnersSet = advisor.getBucketOwners(bucketId);
bucketOwnersSet.retainAll(candidates);
ArrayList members = new ArrayList(bucketOwnersSet);
if(members.isEmpty()){
return null;
}
return getPreferredDataStore(members, alreadyUsed);
}
/**
* Select the member with the fewest buckets, among those with the fewest
* randomly select one.
*
* Under concurrent access, the data that this method uses, may be somewhat
* volatile, note that createBucketAtomically synchronizes
* to enhance the consistency of the data used in this method.
*
* @param candidates ArrayList of InternalDistributedMember, potential datastores
* @param alreadyUsed data stores already in use
* @return a member with the fewest buckets or null if no datastores
*/
private InternalDistributedMember getPreferredDataStore(
Collection candidates,
final Collection alreadyUsed) {
/* has a primary already been chosen? */
final boolean forPrimary = alreadyUsed.size() == 0;
if (forPrimary && getForceLocalPrimaries()) {
PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
if (myDS != null) {
return this.prRegion.getMyId();
}
}
if (candidates.size() == 1) {
return candidates.iterator().next();
}
Assert.assertTrue(candidates.size() > 1);
final LogWriterI18n lw = getLogger();
// Convert peers to DataStoreBuckets
ArrayList stores = this.prRegion.getRegionAdvisor()
.adviseFilteredDataStores(new OpenHashSet<>(candidates));
final DM dm = this.prRegion.getDistributionManager();
// Add ourself as a candidate, if appropriate
InternalDistributedMember moi = dm.getId();
PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
if (myDS != null && candidates.contains(moi)) {
int bucketCount = myDS.getBucketsManaged();
int priCount = myDS.getNumberOfPrimaryBucketsManaged();
int localMaxMemory = this.prRegion.getLocalMaxMemory();
stores.add(new DataStoreBuckets(moi, bucketCount, priCount, localMaxMemory));
}
if (stores.isEmpty()) {
return null;
}
// ---------------------------------------------
// Calculate all hosts who already have this bucket
final HashSet existingHosts = new HashSet();
Iterator it = alreadyUsed.iterator();
while (it.hasNext()) {
InternalDistributedMember mem = it.next();
existingHosts.addAll(dm.getMembersInSameZone(mem));
}
Comparator comparator = new Comparator() {
public int compare(DataStoreBuckets d1, DataStoreBuckets d2)
{
boolean host1Used = existingHosts.contains(d1.memberId);
boolean host2Used = existingHosts.contains(d2.memberId);
if (!host1Used && host2Used) {
return -1; // host1 preferred
}
if (host1Used && !host2Used) {
return 1; // host2 preferred
}
// Six eggs, half a dozen. Look for least loaded.
float metric1, metric2;
if (forPrimary) {
metric1 = d1.numPrimaries / (float) d1.localMaxMemoryMB;
metric2 = d2.numPrimaries / (float) d2.localMaxMemoryMB;
}
else {
metric1 = d1.numBuckets / (float) d1.localMaxMemoryMB;
metric2 = d2.numBuckets / (float) d2.localMaxMemoryMB;
}
int result = Float.compare(metric1, metric2);
if(result == 0) {
//if they have the same load, choose the member with the
//higher localMaxMemory
result = d2.localMaxMemoryMB - d1.localMaxMemoryMB;
}
return result;
}
};
// ---------------------------------------------
// First step is to sort datastores first by those whose hosts don't
// hold this bucket, and then secondarily by loading.
Collections.sort(stores, comparator);
if (lw.fineEnabled()) {
lw.fine(fancyFormatBucketAllocation("Sorted ", stores, existingHosts));
}
// ---------------------------------------------
// Always add the first datastore and note just how good it is.
DataStoreBuckets bestDataStore = stores.get(0);
ArrayList bestStores = new ArrayList();
bestStores.add(bestDataStore);
final boolean allStoresInUse = alreadyUsed.contains(bestDataStore.memberId);
// ---------------------------------------------
// Collect all of the other hosts in this sorted list that are as good
// as the very first one.
for (int i = 1; i < stores.size(); i ++) {
DataStoreBuckets aDataStore = stores.get(i);
if (!allStoresInUse &&
alreadyUsed.contains(aDataStore.memberId)) {
// Only choose between the ones not in use.
break;
}
if (comparator.compare(bestDataStore, aDataStore) != 0) {
break;
}
bestStores.add(aDataStore);
}
if (lw.fineEnabled()) {
lw.fine(fancyFormatBucketAllocation("Best Stores ", bestStores,
existingHosts));
}
// ---------------------------------------------
int chosen;
if(DISABLE_CREATE_BUCKET_RANDOMNESS) {
chosen = 0;
}
else {
// Pick one (at random)
chosen = PartitionedRegion.rand.nextInt(bestStores.size());
}
DataStoreBuckets aDataStore = bestStores.get(chosen);
return aDataStore.memberId;
}
/**
* Adds a membership listener to watch for member departures,
* and schedules a task to recover redundancy of existing buckets
*/
public void startRedundancyRecovery() {
prRegion.getRegionAdvisor().addMembershipListener(new PRMembershipListener());
scheduleRedundancyRecovery(null);
}
/**
* Log bucket allocation in the log files in this format:
*
* member1: +5/20
* member2: -10/5
*
* After the member name, the +/- indicates whether or not this bucket is
* already hosted on the given member. This is followed by the number of
* hosted primaries followed by the number of hosted non-primary buckets.
*
* @param prefix first part of message to print
* @param dataStores list of stores
* @param existingStores to mark those already in use
*/
private String fancyFormatBucketAllocation(String prefix, List dataStores,
Set existingStores) {
StringBuilder logStr = new StringBuilder();
if (prefix != null) {
logStr.append(prefix);
}
logStr.append("Bucket Allocation for prId=" +
this.prRegion.getPRId() + ":\n");
for (Iterator i = dataStores.iterator(); i.hasNext(); ) {
DataStoreBuckets dsb = (DataStoreBuckets)i.next();
logStr.append(dsb.memberId).append(": ");
if (existingStores.contains(dsb.memberId)) {
logStr.append("+");
}
else {
logStr.append("-");
}
logStr.append(Integer.toString(dsb.numPrimaries));
logStr.append("/");
logStr.append(Integer.toString(dsb.numBuckets - dsb.numPrimaries));
// for (int j = 0; j < dsb.numPrimaries; j++) {
// logStr.append('#');
// }
// int nonPrimary = dsb.numBuckets - dsb.numPrimaries;
// for (int j = 0; j < nonPrimary; j++) {
// logStr.append('*');
// }
logStr.append('\n');
}
return logStr.toString();
}
public static class DataStoreBuckets {
public final InternalDistributedMember memberId;
public final int numBuckets;
public final int numPrimaries;
private final int localMaxMemoryMB;
public DataStoreBuckets(InternalDistributedMember mem, int buckets,
int primaryBuckets, int localMaxMemory) {
this.memberId = mem;
this.numBuckets = buckets;
this.numPrimaries = primaryBuckets;
this.localMaxMemoryMB = localMaxMemory;
}
@Override
public boolean equals(Object obj)
{
if ((obj == null) || !(obj instanceof DataStoreBuckets)) {
return false;
}
DataStoreBuckets other = (DataStoreBuckets) obj;
return this.numBuckets == other.numBuckets &&
this.memberId.equals(other.memberId);
}
@Override
public int hashCode()
{
return this.memberId.hashCode();
}
@Override
public String toString()
{
return "DataStoreBuckets memberId=" + this.memberId
+ "; numBuckets=" + this.numBuckets + "; numPrimaries=" + this.numPrimaries;
}
}
/**
* Verifies the members and removes the members that are either not present in the
* DistributedSystem or are no longer part of the PartitionedRegion
* (close/localDestroy has been performed.) .
*
* @param members collection of members to scan and modify
*/
void verifyBucketNodes(Collection members, String partitionName) {
if (members == null || members.isEmpty()) {
return;
}
// Revisit region advisor, get current bucket stores.
final Set availableMembers = getAllStores(partitionName);
// boolean debugAnyRemoved = false;
for (Iterator itr = members.iterator(); itr.hasNext();) {
InternalDistributedMember node = itr.next();
if ( ! availableMembers.contains(node)) {
if (getLogger().fineEnabled()) {
getLogger().fine(
"verifyBucketNodes: removing member " + node);
// debugAnyRemoved = true;
}
itr.remove();
Assert.assertTrue(!members.contains(node),
"return value does not contain " + node);
}
} // for
// if (debugAnyRemoved) {
// this.prRegion.getRegionAdvisor().dumpProfiles(getLogger(), "DEBUG verifyBucketNodes removed a profile from the 'accepted' list");
// Set members = ((InternalDistributedSystem)this.prRegion.getCache().getDistributedSystem())
// .getDistributionManager().getDistributionManagerIds();
// getLogger().info("Distributed members in view: " + PartitionedRegionHelper.printCollection(members));
// }
}
/**
* Schedule a task to perform redundancy recovery for a new node or for
* the node departed.
*/
public void scheduleRedundancyRecovery(Object failedMemId) {
final boolean isStartup = (failedMemId == null);
final LogWriterI18n logger = getLogger();
final GemFireCacheImpl cache = this.prRegion.getCache();
final int redundantCopies = PRHARedundancyProvider.this.prRegion.getRedundantCopies();
final long delay;
final boolean movePrimaries;
if (isStartup) {
delay = this.prRegion.getPartitionAttributes().getStartupRecoveryDelay();
movePrimaries = !Boolean.getBoolean("gemfire.DISABLE_MOVE_PRIMARIES_ON_STARTUP");
} else {
delay = this.prRegion.getPartitionAttributes().getRecoveryDelay();
movePrimaries = false;
}
final boolean requiresRedundancyRecovery = delay >= 0 && redundantCopies > 0;
if(!requiresRedundancyRecovery) {
return;
}
if (!PRHARedundancyProvider.this.prRegion.isDataStore()) {
return;
}
if (cache.isUnInitializedMember(cache.getMyId())) {
return;
}
Runnable task = new RecoveryRunnable(this) {
@Override
public void run2()
{
try {
final boolean isFixedPartitionedRegion
= PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion();
//Fix for 43582 - always replace offline data for fixed partitioned
//regions - this guarantees we create the buckets we are supposed to
//create on this node.
boolean replaceOfflineData = isFixedPartitionedRegion || !isStartup;
RebalanceDirector director;
if(isFixedPartitionedRegion) {
director = new FPRDirector(true, movePrimaries);
} else {
director= new CompositeDirector(true, true, false,
movePrimaries);
}
final PartitionedRegionRebalanceOp rebalance = new PartitionedRegionRebalanceOp(
PRHARedundancyProvider.this.prRegion, false, director, replaceOfflineData,false);
long start = PRHARedundancyProvider.this.prRegion.getPrStats()
.startRecovery();
if (isFixedPartitionedRegion) {
rebalance.executeFPA();
} else {
rebalance.execute();
}
PRHARedundancyProvider.this.prRegion.getPrStats().endRecovery(start);
PRHARedundancyProvider.this.recoveryFuture = null;
} catch(CancelException e) {
logger.fine("Cache closed while recovery in progress");
} catch(RegionDestroyedException e) {
logger.fine("Region destroyed while recovery in progress");
} catch (Exception e) {
logger.error(LocalizedStrings.PRHARedundancyProvider_UNEXPECTED_EXCEPTION_DURING_BUCKET_RECOVERY, e);
}
}
};
synchronized (this.shutdownLock) { // possible fix for bug 41094
if (!this.shutdown) {
try {
if(logger.fineEnabled()) {
if (isStartup) {
logger.fine(this.prRegion + " scheduling redundancy recovery in "
+ delay + " ms");
} else {
logger
.fine(prRegion
+ " scheduling redundancy recovery after departure/crash/error in "
+ failedMemId + " in " + delay + " ms");
}
}
recoveryFuture = this.recoveryExecutor.schedule(task, delay, TimeUnit.MILLISECONDS);
} catch(RejectedExecutionException e) {
//ok, the executor is shutting down.
}
}
}
}
public boolean isRedundancyImpaired() {
int numBuckets = this.prRegion.getPartitionAttributes().getTotalNumBuckets();
int targetRedundancy = this.prRegion.getPartitionAttributes().getRedundantCopies();
for (int i =0; i < numBuckets; i++) {
int redundancy = this.prRegion.getRegionAdvisor().getBucketRedundancy(i);
if (redundancy < targetRedundancy && redundancy != -1
|| redundancy > targetRedundancy) {
return true;
}
}
return false;
}
public boolean recoverPersistentBuckets() {
/**
* To handle a case where ParallelGatewaySender is persistent but userPR is not
* First recover the GatewaySender buckets for ParallelGatewaySender
* irrespective of whether colocation is complete or not.
*/
PartitionedRegion leaderRegion = ColocationHelper.getLeaderRegion(this.prRegion);
if(getLogger().fineEnabled()) {
getLogger().fine(
"recoverPersistentBuckets for " + this.prRegion.getFullPath()
+ " isShadowPR " + this.prRegion.isShadowPR() + " I am persistent : " + this.prRegion.getDataPolicy().withPersistence()
+ " leaderRegion "
+ leaderRegion + " leaderRegion is persistent: " + leaderRegion.getDataPolicy().withPersistence());
}
//Check if the leader region or some child shadow PR region is persistent
//and return the first persistent region found
PartitionedRegion persistentLeader = getPersistentLeader();
//If there is no persistent region in the colocation chain, no need to recover.
if(persistentLeader == null) {
return true;
}
if (!ColocationHelper.checkMembersColocation(leaderRegion,
leaderRegion.getDistributionManager().getDistributionManagerId())) {
if(getLogger().fineEnabled()) {
getLogger().fine(
"Skipping persistent recovery of " + prRegion
+ " because colocation is not complete for " + leaderRegion);
}
return false;
}
//TODO prpersist - It would make sense to hold the lock here in some cases
//to prevent confusing members that are trying to rebalance. BUT, these persistent regions
//need to wait for other members to recover during initialization.
// RecoveryLock lock = leaderRegion.getRecoveryLock();
// lock.lock();
// try {
final ProxyBucketRegion[] proxyBucketArray = persistentLeader.getRegionAdvisor().getProxyBucketArray();
for(ProxyBucketRegion proxyBucket : proxyBucketArray) {
proxyBucket.initializePersistenceAdvisor();
}
Set peers = this.prRegion.getRegionAdvisor().adviseGeneric();
//TODO prpersist - Ok, this is super lame. We need to make sure here that we don't run into this race condition
//1) We get a membership view from member A
//2) Member B removes itself, and distributes to us and A. We don't remove B
//3) We apply the membership view from A, which includes B.
//That will add B back into the set.
//This state flush will make sure that any membership changes
//That are in progress on the peers are finished.
MembershipFlushRequest.send(peers, this.prRegion.getDistributionManager(), this.prRegion.getFullPath());
ArrayList bucketsNotHostedLocally
= new ArrayList(proxyBucketArray.length);
ArrayList bucketsHostedLocally
= new ArrayList(proxyBucketArray.length);
/*
* Start the redundancy logger before recovering any proxy buckets.
*/
allBucketsRecoveredFromDisk = new CountDownLatch(proxyBucketArray.length);
try {
if(proxyBucketArray.length > 0) {
this.redundancyLogger = new RedundancyLogger(this);
Thread loggingThread = new Thread(this.redundancyLogger,"RedundancyLogger for region " + this.prRegion.getName());
loggingThread.start();
}
} catch(RuntimeException e) {
allBucketsRecoveredFromDisk = null;
throw e;
}
/*
* Spawn a separate thread for bucket that we previously hosted
* to recover that bucket.
*
* That thread will get to the point at which it has determined that
* at least one member (possibly the local member) has fully initialized
* the bucket, at which it will count down the someMemberRecoveredLatch
* latch on the bucket.
*
* Once at least one copy of each bucket has been created in the distributed
* system, the initPRInternals method will exit. Some of the threads
* spawned here will still be doing GII's in the background. This
* allows the system to become usable as fast as possible.
*
* If we used a bounded thread pool here, we end up waiting for
* some buckets to finish there GII before returning from initPRInternals.
* In the future maybe we could let the create bucket return and pass
* the GII task to a separate thread pool.
*
*/
for(final ProxyBucketRegion proxyBucket : proxyBucketArray) {
if(proxyBucket.getPersistenceAdvisor().wasHosting()) {
final RecoveryRunnable recoveryRunnable = new RecoveryRunnable(this) {
@Override
public void run() {
//Fix for 44551 - make sure that we always count down
//this latch, even if the region was destroyed.
try {
super.run();
} finally {
allBucketsRecoveredFromDisk.countDown();
}
}
@Override
public void run2() {
proxyBucket.recoverFromDiskRecursively();
}
};
Thread recoveryThread = new Thread(recoveryRunnable, "Recovery thread for bucket " + proxyBucket.getName());
recoveryThread.start();
bucketsHostedLocally.add(proxyBucket);
} else {
bucketsNotHostedLocally.add(proxyBucket);
}
}
try {
//Partial fix for 44045, try to recover the local
//buckets before the proxy buckets. This will allow us
//to detect any ConflictingDataException before the proxy
//buckets update their membership view.
for(final ProxyBucketRegion proxyBucket : bucketsHostedLocally) {
proxyBucket.waitForPrimaryPersistentRecovery();
}
for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) {
proxyBucket.recoverFromDiskRecursively();
}
} finally {
for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) {
allBucketsRecoveredFromDisk.countDown();
}
}
return true;
// } finally {
// lock.unlock();
// }
}
/**
* Check to see if any colocated region of the current region is persistent.
* It's not enough to check just the leader region, because a child region might
* be a persistent parallel WAN queue, which is allowed.
*
* @return the most senior region in the colocation chain (closest to the leader)
* that is persistent.
*/
protected PartitionedRegion getPersistentLeader() {
PartitionedRegion leader = ColocationHelper.getLeaderRegion(this.prRegion);
return findPersistentRegionRecursively(leader);
}
private PartitionedRegion findPersistentRegionRecursively(
PartitionedRegion pr) {
if(pr.getDataPolicy().withPersistence()) {
return pr;
}
for(PartitionedRegion child : ColocationHelper.getColocatedChildRegions(pr)) {
PartitionedRegion leader = findPersistentRegionRecursively(child);
if(leader != null) {
return leader;
}
}
return null;
}
public void scheduleCreateMissingBuckets() {
if (this.prRegion.getColocatedWith() != null
&& ColocationHelper
.isColocationComplete(this.prRegion)) {
Runnable task = new CreateMissingBucketsTask(this);
final InternalResourceManager resourceManager = this.prRegion
.getGemFireCache().getResourceManager();
resourceManager.getRecoveryExecutor().execute(task);
}
}
public void shutdown() {
synchronized (this.shutdownLock) { // possible fix for bug 41094
this.shutdown = true;
ScheduledFuture> recoveryFuture = this.recoveryFuture;
if (recoveryFuture != null) {
recoveryFuture.cancel(false/*mayInterruptIfRunning*/);
this.recoveryExecutor.purge();
}
}
}
/**
* Creates and fills in a PartitionMemberDetails for the partitioned region.
*
* @param internal true if internal-only details should be included
* @param loadProbe the LoadProbe to use
* @return PartitionRegionInfo for the partitioned region
*/
public InternalPRInfo buildPartitionedRegionInfo(
final boolean internal,
final LoadProbe loadProbe) {
final PartitionedRegion pr = this.prRegion;
if (pr == null) {
return null;
}
PartitionedRegionStats prStats = pr.getPrStats();
int configuredBucketCount = pr.getTotalNumberOfBuckets();
int createdBucketCount = pr.getRegionAdvisor().getCreatedBucketsCount();
int lowRedundancyBucketCount = prStats.getLowRedundancyBucketCount();
int configuredRedundantCopies = pr.getRedundantCopies();
int actualRedundantCopies = prStats.getActualRedundantCopies();
final PartitionedRegionDataStore ds = pr.getDataStore();
Set datastores =
pr.getRegionAdvisor().adviseDataStore();
//int size = datastores.size() + (ds == null ? 0 : 1);
Set memberDetails =
new TreeSet();
OfflineMemberDetails offlineMembers = null;
boolean fetchOfflineMembers = false;
if (ds != null) {
memberDetails.add(buildPartitionMemberDetails(internal, loadProbe));
offlineMembers = fetchOfflineMembers();
} else {
fetchOfflineMembers = true;
}
// Get remote results
if (!datastores.isEmpty()) {
FetchPartitionDetailsResponse response =
FetchPartitionDetailsMessage.send(datastores, pr, internal, fetchOfflineMembers, loadProbe);
memberDetails.addAll(response.waitForResponse());
if(fetchOfflineMembers) {
offlineMembers = response.getOfflineMembers();
}
}
String colocatedWithPath = pr.getColocatedWith();
InternalPRInfo details = new PartitionRegionInfoImpl(
pr.getFullPath(),
configuredBucketCount,
createdBucketCount,
lowRedundancyBucketCount,
configuredRedundantCopies,
actualRedundantCopies,
memberDetails,
colocatedWithPath,
offlineMembers);
return details;
}
/**
* Retrieve the set of members which are currently offline
* for all buckets.
*/
public OfflineMemberDetailsImpl fetchOfflineMembers() {
ProxyBucketRegion[] proxyBuckets = prRegion.getRegionAdvisor().getProxyBucketArray();
Set[] offlineMembers = new Set[proxyBuckets.length];
for(int i =0; i < proxyBuckets.length; i++) {
ProxyBucketRegion proxy = proxyBuckets[i];
if(this.prRegion.getDataPolicy().withPersistence()) {
Set persistedMembers = proxy.getPersistenceAdvisor().getMissingMembers();
if(persistedMembers == null) {
persistedMembers = Collections.emptySet();
}
offlineMembers[i] = persistedMembers;
} else {
offlineMembers[i] = Collections.emptySet();
}
}
return new OfflineMemberDetailsImpl(offlineMembers);
}
/**
* Creates and fills in a PartitionMemberDetails for the local member.
*
* @param internal true if internal-only details should be included
* @param loadProbe the LoadProbe to use
* @return PartitionMemberDetails for the local member
*/
public InternalPartitionDetails buildPartitionMemberDetails(
final boolean internal,
final LoadProbe loadProbe) {
final PartitionedRegion pr = this.prRegion;
PartitionedRegionDataStore ds = pr.getDataStore();
if (ds == null) {
return null;
}
InternalPartitionDetails localDetails = null;
long size = 0;
InternalDistributedMember localMember = (InternalDistributedMember)
pr.getMyId();
int configuredBucketCount = pr.getTotalNumberOfBuckets();
long[] bucketSizes = new long[configuredBucketCount];
// key: bid, value: size
Map bucketSizeMap = ds.getSizeLocally();
for (Iterator> iter =
bucketSizeMap.entrySet().iterator(); iter.hasNext();) {
Map.Entry me = iter.next();
int bid = me.getKey().intValue();
long bucketSize = ds.getBucketSize(bid);
bucketSizes[bid] = bucketSize;
size += bucketSize;
}
if (internal) {
waitForPersistentBucketRecoveryOrClose();
PRLoad prLoad = loadProbe.getLoad(pr);
localDetails = new PartitionMemberInfoImpl(
localMember,
pr.getLocalMaxMemory() * (1024L * 1024L),
size,
ds.getBucketsManaged(),
ds.getNumberOfPrimaryBucketsManaged(),
prLoad,
bucketSizes);
}
else {
localDetails = new PartitionMemberInfoImpl(
localMember,
pr.getLocalMaxMemory() * (1024L * 1024L),
size,
ds.getBucketsManaged(),
ds.getNumberOfPrimaryBucketsManaged());
}
return localDetails;
}
/**
* Wait for all persistent buckets to be recovered from disk,
* or for the region to be closed, whichever happens first.
*/
protected void waitForPersistentBucketRecoveryOrClose() {
CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk;
if(recoveryLatch != null) {
boolean interrupted = false;
while (true) {
try {
this.prRegion.getCancelCriterion().checkCancelInProgress(null);
boolean done = recoveryLatch.await(
PartitionedRegionHelper.DEFAULT_WAIT_PER_RETRY_ITERATION,
TimeUnit.MILLISECONDS);
if (done) {
break;
}
} catch(InterruptedException e) {
interrupted = true;
}
}
if(interrupted) {
Thread.currentThread().interrupt();
}
}
List colocatedRegions = ColocationHelper.getColocatedChildRegions(this.prRegion);
for(PartitionedRegion child : colocatedRegions) {
child.getRedundancyProvider().waitForPersistentBucketRecoveryOrClose();
}
}
/**
* Wait for all persistent buckets to be recovered from disk,
* regardless of whether the region is currently being closed.
*/
protected void waitForPersistentBucketRecovery() {
CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk;
if(recoveryLatch != null) {
boolean interrupted = false;
while (true) {
try {
recoveryLatch.await();
break;
} catch(InterruptedException e) {
interrupted = true;
}
}
if(interrupted) {
Thread.currentThread().interrupt();
}
}
}
public boolean isPersistentRecoveryComplete() {
if(!ColocationHelper.checkMembersColocation(this.prRegion, this.prRegion.getMyId())) {
return false;
}
if(allBucketsRecoveredFromDisk != null
&& allBucketsRecoveredFromDisk.getCount() > 0) {
return false;
}
Map colocatedRegions = ColocationHelper.getAllColocationRegions(this.prRegion);
for(PartitionedRegion region : colocatedRegions.values()) {
PRHARedundancyProvider redundancyProvider = region.getRedundancyProvider();
if(redundancyProvider.allBucketsRecoveredFromDisk != null
&& redundancyProvider.allBucketsRecoveredFromDisk.getCount() > 0) {
return false;
}
}
return true;
}
private static class ManageBucketRsp {
final static ManageBucketRsp NO = new ManageBucketRsp("NO");
final static ManageBucketRsp YES = new ManageBucketRsp("YES");
final static ManageBucketRsp NO_INITIALIZING =
new ManageBucketRsp("NO_INITIALIZING");
public static final ManageBucketRsp CLOSED = new ManageBucketRsp("CLOSED");
private final String name;
private ManageBucketRsp(String name) {
this.name = name;
}
boolean isRejection() {
return this == NO || this == NO_INITIALIZING || this==CLOSED;
}
boolean isAcceptance() {
return this == YES;
}
boolean isInitializing() {
return this == NO_INITIALIZING;
}
@Override
public String toString() {
return "ManageBucketRsp(" + this.name + ")";
}
/** return YES if the argument is true, NO if not */
static ManageBucketRsp valueOf( boolean managed ) {
return managed? YES : NO;
}
}
static private class BucketMembershipObserverResults {
final boolean problematicDeparture;
final InternalDistributedMember primary;
BucketMembershipObserverResults(boolean re, InternalDistributedMember p) {
problematicDeparture = re;
primary = p;
}
@Override
public String toString() {
return "pDepart:"+problematicDeparture+" primary:"+primary;
}
}
/**
* Monitors distributed membership for a given bucket
* @author mthomas
*
*/
private class BucketMembershipObserver implements MembershipListener {
final Bucket bucketToMonitor;
final AtomicInteger arrivals = new AtomicInteger(0);
final AtomicBoolean departures = new AtomicBoolean(false);
public BucketMembershipObserver(Bucket b) {
this.bucketToMonitor = b;
}
public BucketMembershipObserver beginMonitoring() {
int profilesPresent =
this.bucketToMonitor.getBucketAdvisor()
.addMembershipListenerAndAdviseGeneric(this).size();
arrivals.addAndGet(profilesPresent);
return this;
}
public void stopMonitoring() {
this.bucketToMonitor.getBucketAdvisor().removeMembershipListener(this);
}
public void memberJoined(InternalDistributedMember id) {
if (getLogger().fineEnabled()) {
getLogger().fine("Observer for bucket " + this.bucketToMonitor
+ " member joined " + id);
}
synchronized(this) {
// TODO manipulate failedNodes and verifiedNodeList directly
arrivals.addAndGet(1);
notify();
}
}
public void memberSuspect(InternalDistributedMember id,
InternalDistributedMember whoSuspected) {
}
public void memberDeparted(InternalDistributedMember id, boolean crashed) {
if (getLogger().fineEnabled()) {
getLogger().fine("Observer for bucket " + this.bucketToMonitor
+ " member departed " + id);
}
synchronized(this) {
// TODO manipulate failedNodes and verifiedNodeList directly
departures.getAndSet(true);
notify();
}
}
/**
* Wait for expected number of owners to be recognized. When the expected
* number have been seen, then fetch the primary and report it. If while
* waiting for the owners to be recognized there is a departure which compromises
* redundancy
* @param expectedCount the number of bucket owners to wait for
* @param expectedOwners the list of owners used when a departure is detected
* @return if no problematic departures are detected, the primary
* @throws InterruptedException
*/
public BucketMembershipObserverResults waitForOwnersGetPrimary(
final int expectedCount, final Collection expectedOwners, String partitionName)
throws InterruptedException {
boolean problematicDeparture = false;
synchronized(this) {
for (;;) {
this.bucketToMonitor.getCancelCriterion().checkCancelInProgress(null);
// If any departures, need to rethink much...
boolean oldDepartures = departures.get();
if (oldDepartures) {
verifyBucketNodes(expectedOwners, partitionName);
if ( expectedOwners.isEmpty() ) {
problematicDeparture = true; // need to pick new victims
}
// reselect = true; // need to pick new victims
arrivals.set(expectedOwners.size());
departures.set(false);
if(problematicDeparture) {
if (getLogger().fineEnabled()) {
getLogger().fine("Bucket observer found departed members - retrying");
}
}
break;
}
// Look for success...
int oldArrivals = arrivals.get();
if (oldArrivals >= expectedCount) {
// success!
break;
}
if (getLogger().fineEnabled()) {
getLogger().fine("Waiting for bucket " +
prRegion.bucketStringForLogs(this.bucketToMonitor.getId()) +
" to finish being created");
}
prRegion.checkReadiness();
final int creationWaitMillis = 5 * 1000;
wait(creationWaitMillis);
if (oldArrivals == arrivals.get() &&
oldDepartures == departures.get()) {
getLogger().warning(
LocalizedStrings.PRHARedundancyProvider_TIME_OUT_WAITING_0_MS_FOR_CREATION_OF_BUCKET_FOR_PARTITIONED_REGION_1_MEMBERS_REQUESTED_TO_CREATE_THE_BUCKET_ARE_2,
new Object[] {Integer.valueOf(creationWaitMillis), prRegion.getFullPath(), expectedOwners});
}
} // for (;;)
} // synchronized
if (problematicDeparture) {
return new BucketMembershipObserverResults(true, null);
}
InternalDistributedMember primmy = bucketToMonitor.getBucketAdvisor().getPrimary();
if(primmy==null) {
/*
* Handle a race where nobody has the bucket. We can't return a null member here because we haven't created the bucket, need to let
* the higher level code loop.
*/
return new BucketMembershipObserverResults(true, null);
} else {
return new BucketMembershipObserverResults(false,
primmy);
}
}
@Override
public void quorumLost(Set failures,
List remaining) {
}
}
/**
* This class extends MembershipListener to perform cleanup when a node leaves
* DistributedSystem.
*
*/
protected class PRMembershipListener implements MembershipListener {
public void memberDeparted(final InternalDistributedMember id,
final boolean crashed)
{
try {
DistributedMember dmem = prRegion.getSystem().getDistributedMember();
if(getLogger().fineEnabled()) {
getLogger().fine(
"MembershipListener invoked on DistributedMember = " + dmem
+ " for failed memberId = " + id);
}
if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() &&
! dmem.equals(id)) {
Runnable postRecoveryTask = null;
//Only schedule redundancy recovery if this not a fixed PR.
if (!PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion()) {
postRecoveryTask = new Runnable() {
public void run() {
//After the metadata has been cleaned, recover redundancy.
scheduleRedundancyRecovery(id);
}
};
}
//Schedule clean up the metadata for the failed member.
PartitionedRegionHelper.cleanUpMetaDataForRegion(prRegion.getCache(),
prRegion.getRegionIdentifier(), id, postRecoveryTask);
}
} catch(CancelException e) {
//ignore
}
}
@Override
public void memberSuspect(InternalDistributedMember id, InternalDistributedMember whoSuspected) {
}
public void memberJoined(InternalDistributedMember id)
{
// no action required
}
public void quorumLost(Set failures, List remaining) {
}
}
/**
* This class extends MembershipListener to start redundancy recovery
* when a persistent member is revoked
*
*/
protected class PRPersistenceListener extends PersistentStateListener.PersistentStateAdapter {
//TODO prpersist It seems like this might trigger recovery too often. For example, a rebalance
//can end up removing a bucket, which would trigger recovery here. We really need to only
//trigger this thing when a PR region is destroyed. And isn't that code already in there?
@Override
public void memberRemoved(PersistentMemberID persistentID, boolean revoked) {
if(!revoked) {
return;
}
DistributedMember dmem = prRegion.getSystem().getDistributedMember();
if(getLogger().fineEnabled()) {
getLogger().fine(
"Persistent Membership Listener invoked on DistributedMember = " + dmem
+ " for removed memberId = " + persistentID);
}
if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() && !prRegion.isFixedPartitionedRegion()) {
scheduleRedundancyRecovery(persistentID);
}
}
}
public CountDownLatch getAllBucketsRecoveredFromDiskLatch() {
return allBucketsRecoveredFromDisk;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy