All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gemstone.gemfire.internal.cache.PRHARedundancyProvider Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2015 Pivotal Software, Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You
 * may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License. See accompanying
 * LICENSE file.
 */

package com.gemstone.gemfire.internal.cache;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

import com.gemstone.gemfire.CancelException;
import com.gemstone.gemfire.SystemFailure;
import com.gemstone.gemfire.cache.CacheClosedException;
import com.gemstone.gemfire.cache.PartitionedRegionStorageException;
import com.gemstone.gemfire.cache.Region;
import com.gemstone.gemfire.cache.RegionDestroyedException;
import com.gemstone.gemfire.cache.persistence.PartitionOfflineException;
import com.gemstone.gemfire.distributed.DistributedMember;
import com.gemstone.gemfire.distributed.internal.DM;
import com.gemstone.gemfire.distributed.internal.MembershipListener;
import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
import com.gemstone.gemfire.i18n.LogWriterI18n;
import com.gemstone.gemfire.i18n.StringIdImpl;
import com.gemstone.gemfire.internal.Assert;
import com.gemstone.gemfire.internal.DebugLogWriter;
import com.gemstone.gemfire.internal.LogWriterImpl;
import com.gemstone.gemfire.internal.NanoTimer;
import com.gemstone.gemfire.internal.OneTaskOnlyExecutor;
import com.gemstone.gemfire.internal.cache.PartitionedRegion.RetryTimeKeeper;
import com.gemstone.gemfire.internal.cache.PartitionedRegionDataStore.CreateBucketResult;
import com.gemstone.gemfire.internal.cache.control.InternalResourceManager;
import com.gemstone.gemfire.internal.cache.partitioned.Bucket;
import com.gemstone.gemfire.internal.cache.partitioned.BucketBackupMessage;
import com.gemstone.gemfire.internal.cache.partitioned.CreateBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.EndBucketCreationMessage;
import com.gemstone.gemfire.internal.cache.partitioned.FetchPartitionDetailsMessage;
import com.gemstone.gemfire.internal.cache.partitioned.FetchPartitionDetailsMessage.FetchPartitionDetailsResponse;
import com.gemstone.gemfire.internal.cache.partitioned.InternalPRInfo;
import com.gemstone.gemfire.internal.cache.partitioned.InternalPartitionDetails;
import com.gemstone.gemfire.internal.cache.partitioned.LoadProbe;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBackupBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBucketMessage.NodeResponse;
import com.gemstone.gemfire.internal.cache.partitioned.CreateMissingBucketsTask;
import com.gemstone.gemfire.internal.cache.partitioned.OfflineMemberDetails;
import com.gemstone.gemfire.internal.cache.partitioned.OfflineMemberDetailsImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PRLoad;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionMemberInfoImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionRegionInfoImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionedRegionRebalanceOp;
import com.gemstone.gemfire.internal.cache.partitioned.RecoveryRunnable;
import com.gemstone.gemfire.internal.cache.partitioned.RedundancyLogger;
import com.gemstone.gemfire.internal.cache.partitioned.RegionAdvisor;
import com.gemstone.gemfire.internal.cache.partitioned.RegionAdvisor.PartitionProfile;
import com.gemstone.gemfire.internal.cache.persistence.MembershipFlushRequest;
import com.gemstone.gemfire.internal.cache.persistence.PersistentMemberID;
import com.gemstone.gemfire.internal.cache.persistence.PersistentStateListener;
import com.gemstone.gemfire.internal.concurrent.AB;
import com.gemstone.gemfire.internal.concurrent.AI;
import com.gemstone.gemfire.internal.concurrent.AL;
import com.gemstone.gemfire.internal.concurrent.CFactory;
import com.gemstone.gemfire.internal.i18n.LocalizedStrings;
import com.gemstone.gemfire.internal.tools.gfsh.app.commands.pr;
import com.gemstone.org.jgroups.util.StringId;

/**
 * This class provides the redundancy management for partitioned region. It will
 * provide the following to the PartitionedRegion: 
* (1) Redundancy Management at the time of bucket creation.

* (2) Redundancy management at the new node arrival.

* (3) Redundancy management when the node leaves the partitioned region * distributed system gracefully. i.e. Cache.close()

* (4) Redundancy management at random node failure.
* * @author tnegi, Mitch Thomas */ public class PRHARedundancyProvider { private static final boolean DISABLE_CREATE_BUCKET_RANDOMNESS = Boolean.getBoolean("gemfire.DISABLE_CREATE_BUCKET_RANDOMNESS"); public static class ArrayListWithClearState extends ArrayList { private static final long serialVersionUID = 1L; private boolean wasCleared = false; public boolean wasCleared() { return this.wasCleared; } @Override public void clear() { super.clear(); this.wasCleared = true; } } public static final String DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME = "gemfire.partitionedRegionDatastoreDiscoveryTimeout"; static volatile Long DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS = Long.getLong(DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME); public final PartitionedRegion prRegion; private static AL insufficientLogTimeStamp = CFactory.createAL(0); private final AB firstInsufficentStoresLogged = CFactory.createAB(false); /** * An executor to submit tasks for redundancy recovery too. It makes sure * that there will only be one redundancy recovery task in the queue at a time. */ protected final OneTaskOnlyExecutor recoveryExecutor; private volatile ScheduledFuture recoveryFuture; private final Object shutdownLock = new Object(); private boolean shutdown = false; volatile CountDownLatch allBucketsRecoveredFromDisk; /** * Used to consolidate logging for bucket regions waiting on other * members to come online. */ private RedundancyLogger redundancyLogger = null; /** * Constructor for PRHARedundancyProvider. * * @param region * The PartitionedRegion for which the HA redundancy is required to * be managed. */ public PRHARedundancyProvider(final PartitionedRegion region) { this.prRegion = region; final InternalResourceManager resourceManager = region.getGemFireCache() .getResourceManager(); recoveryExecutor = new OneTaskOnlyExecutor(resourceManager.getExecutor(), new OneTaskOnlyExecutor.ConflatedTaskListener() { public void taskDropped() { InternalResourceManager.getResourceObserver().recoveryConflated(region); } }); } public static final String PRLOG_PREFIX = "Partitioned Region name = "; /** * Display bucket allocation status * @param prRegion the given region * @param allStores the list of available stores. If null, unknown. * @param alreadyUsed stores allocated; only used if allStores != null * @param forLog true if the generated string is for a log message * @return the description string */ public static String regionStatus(PartitionedRegion prRegion, Set allStores, Collection alreadyUsed, boolean forLog) { StringBuilder sb = new StringBuilder(); sb.append(PRLOG_PREFIX + prRegion.getFullPath()); final char newLine; final String spaces; if (forLog) { newLine = ' '; spaces = ""; } else { newLine = '\n'; spaces = " "; } if (allStores != null) { sb.append(newLine + spaces + "Redundancy level set to " + prRegion.getRedundantCopies()); sb.append(newLine + ". Number of available data stores: " + allStores.size()); sb.append(newLine + spaces + ". Number successfully allocated = " + alreadyUsed.size()); sb.append(newLine + ". Data stores: " + PartitionedRegionHelper.printCollection(allStores)); sb.append(newLine + ". Data stores successfully allocated: " + PartitionedRegionHelper.printCollection(alreadyUsed)); sb.append(newLine + ". Equivalent members: " + PartitionedRegionHelper.printCollection(prRegion.getDistributionManager().getMembersInThisZone())); } return sb.toString(); } static public final StringId TIMEOUT_MSG = LocalizedStrings.PRHARedundancyProvider_IF_YOUR_SYSTEM_HAS_SUFFICIENT_SPACE_PERHAPS_IT_IS_UNDER_MEMBERSHIP_OR_REGION_CREATION_STRESS; /** * Indicate a timeout due to excessive retries among available peers * @param allStores all feasible stores. If null, we don't know. * @param alreadyUsed those that have already accepted, only used if allStores != null * @param opString description of the operation which timed out */ public static void timedOut(PartitionedRegion prRegion, Set allStores, Collection alreadyUsed, String opString, long timeOut) { final String tooManyRetries = LocalizedStrings.PRHARedundancyProvider_TIMED_OUT_ATTEMPTING_TO_0_IN_THE_PARTITIONED_REGION__1_WAITED_FOR_2_MS.toLocalizedString(new Object[] {opString, regionStatus(prRegion, allStores, alreadyUsed, true), Long.valueOf(timeOut)}) + TIMEOUT_MSG; throw new PartitionedRegionStorageException(tooManyRetries); } private Set getAllStores(String partitionName) { if(partitionName != null){ return getFixedPartitionStores(partitionName); } final Set allStores = this.prRegion.getRegionAdvisor().adviseDataStore(true); PartitionedRegionDataStore myDS = this.prRegion.getDataStore(); if (myDS != null) { allStores.add(this.prRegion.getDistributionManager().getId()); } return allStores; } /** * This is for FPR, for given partition, we have to return the set of * datastores on which the given partition is defined * * @param partitionName * name of the partition for which datastores need to be found out */ private Set getFixedPartitionStores( String partitionName) { Set members = this.prRegion.getRegionAdvisor() .adviseFixedPartitionDataStores(partitionName); List FPAs = this.prRegion .getFixedPartitionAttributesImpl(); if (FPAs != null) { for (FixedPartitionAttributesImpl fpa : FPAs) { if (fpa.getPartitionName().equals(partitionName)) { members.add(this.prRegion.getMyId()); } } } return members; } /** * Signature string indicating that not enough stores are * available. */ static public final StringId INSUFFICIENT_STORES_MSG = LocalizedStrings.PRHARedundancyProvider_CONSIDER_STARTING_ANOTHER_MEMBER; /** * Signature string indicating that there are enough stores * available. */ static public final StringId SUFFICIENT_STORES_MSG = LocalizedStrings.PRHARRedundancyProvider_FOUND_A_MEMBER_TO_HOST_A_BUCKET; /** * string indicating the attempt to allocate a bucket */ private static final StringId ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET = LocalizedStrings.PRHARRedundancyProvider_ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET; /** * Indicate that we are unable to allocate sufficient stores and * the timeout period has passed * @param allStores stores we know about * @param alreadyUsed ones already committed * @param onlyLog true if only a warning log messages should be generated. */ private void insufficientStores(Set allStores, Collection alreadyUsed, boolean onlyLog) { final String regionStat = regionStatus(this.prRegion, allStores, alreadyUsed, onlyLog); final char newLine; if (onlyLog) { newLine = ' '; } else { newLine = '\n'; } final StringId notEnoughValidNodes; if(alreadyUsed.isEmpty()) { notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_UNABLE_TO_FIND_ANY_MEMBERS_TO_HOST_A_BUCKET_IN_THE_PARTITIONED_REGION_0; } else { notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_CONFIGURED_REDUNDANCY_LEVEL_COULD_NOT_BE_SATISFIED_0; } final Object[] notEnoughValidNodesArgs = new Object[] {PRHARedundancyProvider.INSUFFICIENT_STORES_MSG, newLine + regionStat + newLine}; if (onlyLog) { getLogger().warning(notEnoughValidNodes, notEnoughValidNodesArgs); } else { throw new PartitionedRegionStorageException(notEnoughValidNodes.toLocalizedString(notEnoughValidNodesArgs)); } } /** * Create a single copy of this bucket on one node. The bucket must * already be locked. * * @param bucketId The bucket we are working on * @param newBucketSize size to create it * @param excludedMembers * @param alreadyUsed members who already seem to have the bucket * @param timeOut point at which to fail * @param allStores the set of data stores to choose from * @return the new member, null if it fails. * @throws PartitionedRegionStorageException if there are not enough data stores */ private InternalDistributedMember createBucketInstance(int bucketId, final int newBucketSize, final Set excludedMembers, Collection alreadyUsed, ArrayListWithClearState failedMembers, final long timeOut, final Set allStores) { LogWriterI18n lw = getLogger(); // this.prRegion.getCache().getLogger().config("DEBUG createBucketInstance: " // + " bucketId=" + this.prRegion.getBucketName(bucketId) + // " alreadyUsed: " + alreadyUsed + // " failedMembers: " + failedMembers); // Recalculate list of candidates HashSet candidateMembers = new HashSet(allStores); candidateMembers.removeAll(alreadyUsed); candidateMembers.removeAll(excludedMembers); candidateMembers.removeAll(failedMembers); if (lw.fineEnabled()) { lw.fine("AllStores=" + allStores); lw.fine("AlreadyUsed=" + alreadyUsed); lw.fine("excluded=" + excludedMembers); lw.fine("failed=" + failedMembers); } if (candidateMembers.size() == 0) { this.prRegion.checkReadiness(); // fix for bug #37207 // Run out of candidates. Refetch? if (System.currentTimeMillis() > timeOut) { if (lw.fineEnabled()) { lw.fine("createBucketInstance: ran out of candidates and timed out"); } return null; // fail, let caller signal error } // Recalculate candidateMembers = new HashSet(allStores); candidateMembers.removeAll(alreadyUsed); candidateMembers.removeAll(excludedMembers); failedMembers.clear(); } if (lw.fineEnabled()) { lw.fine("createBucketInstance: candidateMembers = " + candidateMembers); } InternalDistributedMember candidate = null; // If there are no candidates, early out. if (candidateMembers.size() == 0) { // no options if (lw.fineEnabled()) { lw.fine("createBucketInstance: no valid candidates"); } return null; // failure } // no options else { // In case of FPR, candidateMembers is the set of members on which // required fixed partition is defined. if (this.prRegion.isFixedPartitionedRegion()) { candidate = candidateMembers.iterator().next(); } else { String prName = this.prRegion.getAttributes().getPartitionAttributes() .getColocatedWith(); if (prName != null) { candidate = getColocatedDataStore(candidateMembers, alreadyUsed, bucketId, prName); } else { final ArrayList orderedCandidates = new ArrayList(candidateMembers); candidate = getPreferredDataStore(orderedCandidates, alreadyUsed); } } } if (candidate == null) { failedMembers.addAll(candidateMembers); return null; } if(!this.prRegion.isShadowPR() && !ColocationHelper.checkMembersColocation(this.prRegion, candidate)) { if (lw.fineEnabled()) { lw.fine("createBucketInstances - Member does not have all of the regions colocated with " + prRegion + ", " + candidate); } failedMembers.add(candidate); return null; } if (! (candidate.equals(this.prRegion.getMyId()))) { // myself PartitionProfile pp = this.prRegion.getRegionAdvisor() .getPartitionProfile(candidate); if (pp == null) { if (lw.fineEnabled()) { lw.fine("createBucketInstance: " + this.prRegion.getFullPath() + ": no partition profile for " + candidate); } failedMembers.add(candidate); return null; } } // myself // Coordinate with any remote close occurring, causing it to wait until // this create bucket attempt has been made. final ManageBucketRsp response = createBucketOnMember(bucketId, candidate, newBucketSize, failedMembers.wasCleared()); // Add targetNode to bucketNodes if successful, else to failedNodeList if (response.isAcceptance()) { return candidate; // success! } if (lw.fineEnabled()) { lw.fine("createBucketInstance: " + this.prRegion.getFullPath() + ": candidate " + candidate + " declined to manage bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + ": " + response); } if(response.equals(ManageBucketRsp.CLOSED)) { excludedMembers.add(candidate); } else { failedMembers.add(candidate); } candidate = null; // failure return null; } final private static boolean DEBUG_LOGGING_ENABLED = Boolean.getBoolean(PRHARedundancyProvider.class.getName() + "-logging"); public static final long INSUFFICIENT_LOGGING_THROTTLE_TIME = TimeUnit.SECONDS.toNanos(Integer.getInteger("gemfire.InsufficientLoggingThrottleTime", 2).intValue()); public volatile static boolean TEST_MODE = false; //since 6.6, please use the distributed system property enforce-unique-host instead. // public static final boolean ENFORCE_UNIQUE_HOST_STORAGE_ALLOCATION = DistributionConfig.DEFAULT_ENFORCE_UNIQUE_HOST; public LogWriterI18n getLogger() { if (DEBUG_LOGGING_ENABLED) { return new DebugLogWriter((LogWriterImpl)this.prRegion.getCache().getLogger(), getClass()); } else { return this.prRegion.getCache().getLoggerI18n(); } } public InternalDistributedMember createBucketOnDataStore(int bucketId, int size, long startTime, RetryTimeKeeper snoozer) { Set attempted = new HashSet(); InternalDistributedMember ret; InternalDistributedMember primaryForFixedPartition = null; if (this.prRegion.isFixedPartitionedRegion()) { primaryForFixedPartition = this.prRegion.getRegionAdvisor() .adviseFixedPrimaryPartitionDataStore(bucketId); } do { this.prRegion.checkReadiness(); Set available = this.prRegion .getRegionAdvisor().adviseInitializedDataStore(); // remove uninitialized members for bucket creation this.prRegion.getCache().removeUnInitializedMembers(available); InternalDistributedMember target = null; available.removeAll(attempted); for (InternalDistributedMember member : available) { if (primaryForFixedPartition != null && available.contains(primaryForFixedPartition)) { target = primaryForFixedPartition; } else { target = member; } break; } if (target == null) { if (shouldLogInsufficientStores()) { insufficientStores(available, Collections.emptySet(), true); } // this will always throw an exception insufficientStores(available, Collections.emptySet(), false); } try { if(getLogger().fineEnabled()) { getLogger().fine("Attempting to get data store " + target + " to create the bucket " + this.prRegion.bucketStringForLogs(bucketId) + " for us"); } CreateBucketMessage.NodeResponse response = CreateBucketMessage.send(target, this.prRegion, bucketId, size); ret = response.waitForResponse(); if(ret != null) { return ret; } } catch(ForceReattemptException e) { //do nothing, we will already check again for a primary. } attempted.add(target); } while((ret = this.prRegion.getNodeForBucketWrite(bucketId, snoozer)) == null); return ret; } /** * Creates bucket atomically by creating all the copies to satisfy redundancy. In case all * copies can not be created, a PartitionedRegionStorageException is thrown to * the user and BucketBackupMessage is sent to the nodes to make copies of a bucket * that was only partially created. Other VMs are informed * of bucket creation through updates through their {@link BucketAdvisor.BucketProfile}s. * *

* This method is synchronized to enforce a single threaded ordering, allowing * for a more accurate picture of bucket distribution in the face of concurrency. * See bug 37275. *

* * This method is now slightly misnamed. Another member could be in the process * of creating this same bucket at the same time. * * @param bucketId * Id of the bucket to be created. * @param newBucketSize * size of the first entry. * @param startTime a time stamp prior to calling the method, used to update bucket creation stats * @return the primary member for the newly created bucket * @throws PartitionedRegionStorageException * if required # of buckets can not be created to satisfy * redundancy. * @throws PartitionedRegionException * if d-lock can not be acquired to create bucket. * */ public InternalDistributedMember createBucketAtomically(final int bucketId, final int newBucketSize, final long startTime, final boolean finishIncompleteCreation, String partitionName) throws PartitionedRegionStorageException, PartitionedRegionException { // If there are insufficient stores throw *before* we try acquiring the // (very expensive) bucket lock or the (somewhat expensive) monitor on this earlySufficientStoresCheck(partitionName); synchronized(this) { if (this.prRegion.getCache().isCacheAtShutdownAll()) { throw new CacheClosedException("Cache is shutting down"); } final LogWriterI18n log = getLogger(); if (log.fineEnabled()) { log.fine("Starting atomic creation of bucketId=" + this.prRegion.bucketStringForLogs(bucketId)); } Collection acceptedMembers = new ArrayList(); // ArrayList Set excludedMembers = new HashSet(); ArrayListWithClearState failedMembers = new ArrayListWithClearState(); final long timeOut = System.currentTimeMillis() + computeTimeout(); BucketMembershipObserver observer = null; boolean needToElectPrimary = true; InternalDistributedMember bucketPrimary = null; try { this.prRegion.checkReadiness(); Bucket toCreate = this.prRegion.getRegionAdvisor().getBucket(bucketId); if(!finishIncompleteCreation) { bucketPrimary = this.prRegion.getBucketPrimary(bucketId); if (bucketPrimary != null) { if(log.fineEnabled()) { log.fine("during atomic creation, discovered that the primary already exists " + bucketPrimary + " returning early"); } needToElectPrimary = false; return bucketPrimary; } } observer = new BucketMembershipObserver(toCreate).beginMonitoring(); boolean loggedInsufficentStores = false; // track if insufficient data stores have been detected final LogWriterI18n logger = getLogger(); for (;;) { this.prRegion.checkReadiness(); if (this.prRegion.getCache().isCacheAtShutdownAll()) { log.info(LocalizedStrings.DEBUG, "Aborted createBucketAtomically due to ShutdownAll"); throw new CacheClosedException("Cache is shutting down"); } // this.prRegion.getCache().getLogger().config( // "DEBUG createBucketAtomically: " // + " bucketId=" + this.prRegion.getBucketName(bucketId) + // " accepted: " + acceptedMembers + // " failed: " + failedMembers); long timeLeft = timeOut - System.currentTimeMillis(); if (timeLeft < 0) { // It took too long. timedOut(this.prRegion, getAllStores(partitionName), acceptedMembers, ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET .toLocalizedString(), computeTimeout()); // NOTREACHED } if (logger.fineEnabled()) { logger.fine( "createBucketAtomically: have " + timeLeft + " ms left to finish this"); } // Always go back to the advisor, see if any fresh data stores are // present. Set allStores = getAllStores(partitionName); // remove nodes that are not fully initialized this.prRegion.getCache().removeUnInitializedMembers(allStores); loggedInsufficentStores = checkSufficientStores(allStores, loggedInsufficentStores); InternalDistributedMember candidate = createBucketInstance(bucketId, newBucketSize, excludedMembers, acceptedMembers, failedMembers, timeOut, allStores); if (candidate != null) { if (this.prRegion.getDistributionManager().enforceUniqueZone()) { Set exm = getBuddyMembersInZone(candidate, allStores); exm.remove(candidate); exm.removeAll(acceptedMembers); excludedMembers.addAll(exm); } } // Get an updated list of bucket owners, which should include // buckets created concurrently with this createBucketAtomically call acceptedMembers = prRegion.getRegionAdvisor().getBucketOwners(bucketId); if (logger.fineEnabled()) { logger.fine("Accepted members: " + acceptedMembers); } // [sumedh] set the primary as the candidate in the first iteration if // the candidate has accepted if (bucketPrimary == null && acceptedMembers.contains(candidate)) { bucketPrimary = candidate; } // prune out the stores that have left verifyBucketNodes(excludedMembers, partitionName); //Note - we used to wait for the created bucket to become primary here //if this is a colocated region. We no longer need to do that, because //the EndBucketMessage is sent out after bucket creation completes to //select the primary. // Have we exhausted all candidates? final int potentialCandidateCount = (allStores.size() - (excludedMembers .size() + acceptedMembers.size() + failedMembers.size())); // Determining exhausted members competes with bucket balancing; it's // important to re-visit all failed members since "failed" set may // contain datastores which at the moment are imbalanced, but yet could // be candidates. If the failed members list is empty, its expected // that the next iteration clears the (already empty) list. final boolean exhaustedPotentialCandidates = failedMembers.wasCleared() && potentialCandidateCount <= 0; final boolean redundancySatisfied = acceptedMembers.size() > this.prRegion.getRedundantCopies(); final boolean bucketNotCreated = acceptedMembers.size() == 0; if (logger.fineEnabled()) { logger.fine("potentialCandidateCount=" + potentialCandidateCount + ", exhaustedPotentialCandidates=" + exhaustedPotentialCandidates + ", redundancySatisfied=" + redundancySatisfied + ", bucketNotCreated=" + bucketNotCreated); } if (bucketNotCreated) { // if we haven't managed to create the bucket on any nodes, retry. continue; } if (exhaustedPotentialCandidates && ! redundancySatisfied) { insufficientStores(allStores, acceptedMembers, true); } // Allow the thread to potentially finish bucket creation even if redundancy was not met. // Fix for bug 39283 if (redundancySatisfied || exhaustedPotentialCandidates) { //Tell one of the members to become primary. //The rest of the members will be allowed to //volunteer for primary. endBucketCreation(bucketId, acceptedMembers, bucketPrimary, partitionName); final int expectedRemoteHosts = acceptedMembers.size() - (acceptedMembers.contains(this.prRegion.getMyId()) ? 1: 0); boolean interrupted = Thread.interrupted(); try { BucketMembershipObserverResults results = observer.waitForOwnersGetPrimary(expectedRemoteHosts, acceptedMembers, partitionName); if (results.problematicDeparture) { // BZZZT! Member left. Start over. continue; } bucketPrimary = results.primary; } catch (InterruptedException e) { interrupted = true; this.prRegion.getCancelCriterion().checkCancelInProgress(e); } finally { if (interrupted) { Thread.currentThread().interrupt(); } } needToElectPrimary = false; return bucketPrimary; } // almost done } // for } catch (CancelException e) { //Fix for 43544 - We don't need to elect a primary //if the cache was closed. The other members will //take care of it. This ensures we don't compromise //redundancy. needToElectPrimary = false; // log.warning( // "PRHARedundancyProvider:createBucketAtomically():Got Cache closed // exception while creating new bucket.", // e); throw e; } catch (RegionDestroyedException e) { //Fix for 43544 - We don't need to elect a primary //if the region was destroyed. The other members will //take care of it. This ensures we don't compromise //redundancy. needToElectPrimary = false; // log.warning( // "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed // exception while creating new bucket.", // e); throw e; } catch (PartitionOfflineException e) { // log.warning( // "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed // exception while creating new bucket.", // e); throw e; } catch (RuntimeException e) { if(log.fineEnabled()) { log.fine("Unable to create new bucket " + bucketId + " :" + e.getMessage()); } //If we're finishing an incomplete bucket creation, don't blast out //another message to peers to do so. //TODO - should we ignore a PartitionRegionStorageException, rather //than reattempting on other nodes? if(!finishIncompleteCreation) { cleanUpBucket(bucketId); } throw e; } finally { if (observer != null) { observer.stopMonitoring(); } //Try to make sure everyone that created the bucket can volunteer for primary if(needToElectPrimary) { try { endBucketCreation(bucketId, prRegion.getRegionAdvisor() .getBucketOwners(bucketId), bucketPrimary, partitionName); } catch (Exception e) { // if region is going down, then no warning level logs if (e instanceof CancelException || e instanceof CacheClosedException || (prRegion.getCancelCriterion().cancelInProgress() != null)) { getLogger().fine("Exception trying choose a primary after " + "bucket creation failure", e); } else { getLogger().warning(StringIdImpl.LITERAL, "Exception trying choose a " + "primary after bucket creation failure", e); } } } } } // synchronized(this) } /** * Figure out which member should be primary for a bucket * among the members that have created the bucket, and tell * that member to become the primary. * @param acceptedMembers The members that now host the bucket */ private void endBucketCreation(int bucketId, Collection acceptedMembers, InternalDistributedMember targetPrimary, String partitionName) { if(acceptedMembers.isEmpty()) { return; } acceptedMembers = new HashSet(acceptedMembers); //TODO prpersist - we need to factor out a method that just chooses //the primary. But this will do the trick for the moment. // This is for FPR, for a given bucket id , make sure that for given bucket // id , only the datastore on which primary partition is defined for this // bucket becomes the primary. If primary partition is not available then // secondary partition will become primary if (partitionName != null) { if (isLocalPrimary(partitionName)) { targetPrimary = this.prRegion.getMyId(); } else { targetPrimary = this.prRegion.getRegionAdvisor() .adviseFixedPrimaryPartitionDataStore(bucketId); if (targetPrimary == null) { Set fpDataStores = getFixedPartitionStores(partitionName); targetPrimary = fpDataStores.iterator().next(); } } } if (targetPrimary == null) { // [sumedh] we need to select the same primary as chosen earlier (e.g. // the parent's in case of colocation) so it is now passed //InternalDistributedMember targetPrimary = getPreferredDataStore( // acceptedMembers, Collections. emptySet()); this.prRegion.getCache().removeUnInitializedMembers(acceptedMembers); if (acceptedMembers.isEmpty()) { return; } targetPrimary = getPreferredDataStore(acceptedMembers, Collections. emptySet()); } boolean isHosting = acceptedMembers.remove(prRegion .getDistributionManager().getId()); EndBucketCreationMessage.send(acceptedMembers, targetPrimary, this.prRegion, bucketId); // Observer for testing purpose final EndBucketCreationObserver observer = testEndObserverInstance; if (observer != null) { observer.afterEndBucketCreationMessageSend(this.prRegion, bucketId); } if (isHosting) { endBucketCreationLocally(bucketId, targetPrimary); } if (observer != null) { observer.afterEndBucketCreation(this.prRegion, bucketId); } } private boolean isLocalPrimary(String partitionName) { List FPAs = this.prRegion .getFixedPartitionAttributesImpl(); if (FPAs != null) { for (FixedPartitionAttributesImpl fpa : FPAs) { if (fpa.getPartitionName().equals(partitionName) && fpa.isPrimary()) { return true; } } } return false; } private static volatile EndBucketCreationObserver testEndObserverInstance; // Observer for testing purpose public static void setTestEndBucketCreationObserver( EndBucketCreationObserver observer) { testEndObserverInstance = observer; } /** * Test observer to help reproduce #42429. */ public static interface EndBucketCreationObserver { public void afterEndBucketCreationMessageSend(PartitionedRegion pr, int bucketId); public void afterEndBucketCreation(PartitionedRegion pr, int bucketId); } public void endBucketCreationLocally(int bucketId, InternalDistributedMember newPrimary) { //Don't elect ourselves as primary or tell others to persist our ID if this member //has been destroyed. if (prRegion.getCancelCriterion().cancelInProgress() != null || prRegion.isDestroyed()) { return; } final BucketAdvisor bucketAdvisor = this.prRegion.getRegionAdvisor() .getBucketAdvisor(bucketId); final ProxyBucketRegion proxyBucketRegion = bucketAdvisor .getProxyBucketRegion(); final LogWriterI18n logger = this.getLogger(); if (logger.fineEnabled()) { logger.fine("endBucketCreationLocally: for region " + this.prRegion.getFullPath() + " bucketId=" + bucketId + " bucketAdvisor=" + bucketAdvisor + ", new primary: " + newPrimary); } BucketPersistenceAdvisor persistentAdvisor = proxyBucketRegion .getPersistenceAdvisor(); //prevent multiple threads from ending bucket creation at the same time. //This fixes an issue with 41336, where multiple threads were calling endBucketCreation //on the persistent advisor and marking a bucket as initialized twice. synchronized(proxyBucketRegion) { if(persistentAdvisor != null) { BucketRegion realBucket = proxyBucketRegion.getCreatedBucketRegion(); if(realBucket != null) { PersistentMemberID persistentID = realBucket.getPersistentID(); persistentAdvisor.endBucketCreation(persistentID); } } //We've received an endBucketCreationMessage, but the primary //may not have. So now we wait for the chosen member to become //primary. bucketAdvisor.setPrimaryElector(newPrimary); if(prRegion.getGemFireCache().getMyId().equals(newPrimary)) { //If we're the choosen primary, volunteer for primary now if (bucketAdvisor.isHosting()) { bucketAdvisor.clearPrimaryElector(); bucketAdvisor.volunteerForPrimary(); } } else { //It's possible the chosen primary has already left. In //that case, volunteer for primary now. if(!bucketAdvisor.adviseInitialized().contains(newPrimary)) { bucketAdvisor.clearPrimaryElector(); bucketAdvisor.volunteerForPrimary(); } //If the bucket has had a primary, that means the //chosen bucket was primary for a while. Go ahead and //clear the primary elector field. if(bucketAdvisor.getHadPrimary()) { bucketAdvisor.clearPrimaryElector(); bucketAdvisor.volunteerForPrimary(); } } } //send out a profile update to indicate the persistence is initialized, if needed. if(persistentAdvisor != null) { bucketAdvisor.endBucketCreation(); } List colocatedWithList = ColocationHelper.getColocatedChildRegions(prRegion); for(PartitionedRegion child : colocatedWithList) { if(child.getRegionAdvisor().isBucketLocal(bucketId)) { child.getRedundancyProvider().endBucketCreationLocally(bucketId, newPrimary); } } } /** * Get buddy data stores on the same Host as the accepted member * @return set of members on the same host, not including accepted member * @since gemfire59poc * */ private Set getBuddyMembersInZone( final InternalDistributedMember acceptedMember, final Set allStores) { DM dm = this.prRegion.getDistributionManager(); Set buddies = dm.getMembersInSameZone(acceptedMember); //TODO Dan - I'm not sure this retain all is necessary, but there may have been a reason we were //passing this set in before. buddies.retainAll(allStores); return buddies; } /** * Early check for resources. This code may be executed for every put operation if * there are no datastores present, limit excessive logging. * @since gemfire5.8 */ private void earlySufficientStoresCheck(String partitionName) { assert Assert.assertHoldsLock(this,false); Set currentStores = getAllStores(partitionName); if (currentStores.isEmpty()) { if (shouldLogInsufficientStores()) { insufficientStores(currentStores, Collections.EMPTY_LIST, true); } insufficientStores(currentStores, Collections.EMPTY_LIST, false); } } /** * Limit the frequency for logging the {@link #INSUFFICIENT_STORES_MSG} message * to once per PR after which once every {@link #INSUFFICIENT_LOGGING_THROTTLE_TIME} * second * @return true if it's time to log * @since gemfire5.8 */ private boolean shouldLogInsufficientStores() { long now = NanoTimer.getTime(); long delta = now - insufficientLogTimeStamp.get(); if (this.firstInsufficentStoresLogged.compareAndSet(false, true) || delta >= INSUFFICIENT_LOGGING_THROTTLE_TIME) { insufficientLogTimeStamp.set(now); return true; } else { return false; } } /** * Compute timeout for waiting for a bucket. Prefer {@link #DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS} * over {@link PartitionedRegion#getRetryTimeout()} * @return the milliseconds to wait for a bucket creation operation */ private long computeTimeout() { if (DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS != null) { long millis = DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS.longValue(); if (millis > 0) { // only positive values allowed return millis; } } return this.prRegion.getRetryTimeout(); } /** * Check to determine that there are enough datastore VMs to start the bucket * creation processes. Log a warning or throw an exception indicating * when there are not enough datastore VMs. * @param allStores All known data store instances (including local) * @param loggedInsufficentStores indicates whether a warning has been logged * @return true when a warning has been logged, false if a warning should be logged. */ private boolean checkSufficientStores(final Set allStores, final boolean loggedInsufficentStores) { // Report (only once) if insufficient data store have been detected. if (! loggedInsufficentStores) { if (allStores.size() == 0) { insufficientStores(allStores, Collections.EMPTY_LIST, true); return true; } } else { if (allStores.size() > 0) { // Excellent, sufficient resources were found! final StringId logStr = LocalizedStrings.PRHARRedundancyProvider_0_IN_THE_PARTITIONED_REGION_REGION_NAME_1; final Object[] logArgs = new Object[] {SUFFICIENT_STORES_MSG.toLocalizedString(), prRegion.getFullPath()}; if (TEST_MODE) { getLogger().severe(logStr, logArgs); } else { getLogger().info(logStr, logArgs); } return false; } else { // Already logged warning, there are no datastores insufficientStores(allStores, Collections.EMPTY_LIST, false); // UNREACHABLE } } return loggedInsufficentStores; } /** * Clean up locally created bucket and tell other VMs to * attempt recovering redundancy * @param buck the bucket identifier */ private void cleanUpBucket(int buck) { Set dataStores = this.prRegion.getRegionAdvisor().adviseDataStore(); BucketBackupMessage.send(dataStores, this.prRegion, buck); } public void finishIncompleteBucketCreation(int bucketId) { String partitionName = null; final long startTime = PartitionedRegionStats.startTime(); if (this.prRegion.isFixedPartitionedRegion()) { FixedPartitionAttributesImpl fpa = PartitionedRegionHelper .getFixedPartitionAttributesForBucket(this.prRegion, bucketId); partitionName = fpa.getPartitionName(); } createBucketAtomically(bucketId, 0, startTime, true, partitionName); } /** * Creates bucket with ID bucketId on targetNode. This method * will also create the bucket for all of the child colocated PRs. * * @param bucketId * @param targetNMember * @param isRebalance true if bucket creation is directed by rebalancing * @param replaceOfflineData * @return true if the bucket was sucessfully created */ public boolean createBackupBucketOnMember(final int bucketId, final InternalDistributedMember targetNMember, final boolean isRebalance, boolean replaceOfflineData, InternalDistributedMember moveSource, boolean forceCreation) { if (getLogger().fineEnabled()) { getLogger().fine("createBackupBucketOnMember for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " member: " + targetNMember); } if (! (targetNMember.equals(this.prRegion.getMyId()))) { // final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock; PartitionProfile pp = this.prRegion.getRegionAdvisor() .getPartitionProfile(targetNMember); if (pp != null) { // isClosingReadLock = pp.getIsClosingReadLock( // this.prRegion.getCancelCriterion()); } else { return false; } try { ManageBackupBucketMessage.NodeResponse response = ManageBackupBucketMessage .send(targetNMember, this.prRegion, bucketId, isRebalance, replaceOfflineData, moveSource, forceCreation); if (response.waitForAcceptance()) { getLogger().fine("createBackupBucketOnMember: " + "Bucket creation succeed for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " on member = " + targetNMember); return true; } else { getLogger().fine("createBackupBucketOnMember: " + "Bucket creation failed for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " on member = " + targetNMember); return false; } } catch (Throwable e) { Error err; if (e instanceof Error && SystemFailure.isJVMFailureError( err = (Error)e)) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } // Whenever you catch Error or Throwable, you must also // check for fatal JVM error (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); if (e instanceof ForceReattemptException) { // no log needed see bug 37569 } else if (e instanceof CancelException || (e.getCause() != null && (e.getCause() instanceof CancelException))) { // no need to log exceptions caused by cache closure } else { LogWriterI18n log = getLogger(); if (log.warningEnabled()) { log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e); } } return false; } } else { final PartitionedRegionDataStore prDS = this.prRegion.getDataStore(); boolean bucketManaged = prDS!=null && prDS.grabBucket(bucketId, moveSource, forceCreation, replaceOfflineData, isRebalance, null, false).equals( CreateBucketResult.CREATED); if (! bucketManaged) { getLogger().fine("createBackupBucketOnMember:" + " Local data store refused to accommodate the data for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " prDS=" + prDS); } return bucketManaged; } } private static final ThreadLocal forceLocalPrimaries = new ThreadLocal(); public static void setForceLocalPrimaries(boolean v) { forceLocalPrimaries.set(Boolean.valueOf(v)); } private boolean getForceLocalPrimaries() { boolean result = false; Boolean v = (Boolean)forceLocalPrimaries.get(); if (v != null) { result = v.booleanValue(); } return result; } /** * Creates bucket with ID bucketId on targetNode. * * @param bucketId * @param targetNMember * @param newBucketSize * @param forceCreation inform the targetMember it must attempt host the bucket, * appropriately ignoring it's maximums * @return a response object */ public ManageBucketRsp createBucketOnMember(final int bucketId, final InternalDistributedMember targetNMember, final int newBucketSize, boolean forceCreation) { if (getLogger().fineEnabled()) { getLogger().fine("createBucketOnMember for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " member: " + targetNMember + (forceCreation ? " forced" : "")); } if (! (targetNMember.equals(this.prRegion.getMyId()))) { // final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock; PartitionProfile pp = this.prRegion.getRegionAdvisor() .getPartitionProfile(targetNMember); if (pp != null) { // isClosingReadLock = pp.getIsClosingReadLock( // this.prRegion.getCancelCriterion()); } else { return ManageBucketRsp.NO; } try { // isClosingReadLock.lock(); // Grab the read lock, preventing any region closures // on this remote Node until this bucket is fully published, forcing the closing // Node to recognize any pre-natal buckets. NodeResponse response = ManageBucketMessage.send(targetNMember, this.prRegion, bucketId, newBucketSize, forceCreation); if (response.waitForAcceptance()) { getLogger().fine("createBucketOnMember: " + "Bucket creation succeed for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " on member = " + targetNMember); // lockList.add(isClosingReadLock); return ManageBucketRsp.YES; } else { getLogger().fine("createBucketOnMember: " + "Bucket creation failed for bucketId=" + this.prRegion.bucketStringForLogs(bucketId) + " on member = " + targetNMember); // isClosingReadLock.unlock(); return response.rejectedDueToInitialization() ? ManageBucketRsp.NO_INITIALIZING : ManageBucketRsp.NO; } } catch(PartitionOfflineException e) { throw e; } catch (Throwable e) { Error err; if (e instanceof Error && SystemFailure.isJVMFailureError( err = (Error)e)) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } // Whenever you catch Error or Throwable, you must also // check for fatal JVM error (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); if (e instanceof CancelException || (e.getCause() != null && (e.getCause() instanceof CancelException))) { // no need to log exceptions caused by cache closure return ManageBucketRsp.CLOSED; } else if (e instanceof ForceReattemptException) { // no log needed see bug 37569 } else { LogWriterI18n log = getLogger(); if (log.warningEnabled()) { log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e); } } // isClosingReadLock.unlock(); return ManageBucketRsp.NO; } } else { final PartitionedRegionDataStore prDS = this.prRegion.getDataStore(); boolean bucketManaged = prDS!=null && prDS.handleManageBucketRequest(bucketId, newBucketSize, this.prRegion.getMyId(), forceCreation); if (! bucketManaged) { getLogger().fine("createBucketOnMember:" + " Local data store not able to accommodate the data for bucketId=" + this.prRegion.bucketStringForLogs(bucketId)); } return ManageBucketRsp.valueOf(bucketManaged); } } /** * Select the member with which is hosting the same bucketid for the PR it is * colocated with In case of primary it returns the same node whereas in case * of secondary it will return the least loaded datastore which is hosting the * bucketid. * * @param alreadyUsed * @param bucketId * @param prName * @return InternalDistributedMember colocated data store * @since 5.8Beta */ private InternalDistributedMember getColocatedDataStore( Collection candidates, Collection alreadyUsed, int bucketId, String prName) { Assert.assertTrue(prName != null); // precondition1 PartitionedRegion colocatedRegion = ColocationHelper.getColocatedRegion(this.prRegion); Region prRoot = PartitionedRegionHelper.getPRRoot(prRegion .getCache()); PartitionRegionConfig config = (PartitionRegionConfig)prRoot.get(prRegion .getRegionIdentifier()); if (!config.isColocationComplete()) { throw new IllegalStateException( "Cannot create buckets, as colocated regions are not " + "configured to be at the same nodes."); } RegionAdvisor advisor = colocatedRegion.getRegionAdvisor(); if (alreadyUsed.isEmpty()) { InternalDistributedMember primary = advisor.getPrimaryMemberForBucket(bucketId); if (!candidates.contains(primary)) { return null; } return primary; } Set bucketOwnersSet = advisor.getBucketOwners(bucketId); bucketOwnersSet.retainAll(candidates); ArrayList members = new ArrayList(bucketOwnersSet); if(members.isEmpty()){ return null; } return getPreferredDataStore(members, alreadyUsed); } /** * Select the member with the fewest buckets, among those with the fewest * randomly select one. * * Under concurrent access, the data that this method uses, may be somewhat * volatile, note that createBucketAtomically synchronizes * to enhance the consistency of the data used in this method. * * @param candidates ArrayList of InternalDistributedMember, potential datastores * @param alreadyUsed data stores already in use * @return a member with the fewest buckets or null if no datastores */ private InternalDistributedMember getPreferredDataStore( Collection candidates, final Collection alreadyUsed) { /* has a primary already been chosen? */ final boolean forPrimary = alreadyUsed.size() == 0; if (forPrimary && getForceLocalPrimaries()) { PartitionedRegionDataStore myDS = this.prRegion.getDataStore(); if (myDS != null) { return this.prRegion.getMyId(); } } if (candidates.size() == 1) { return candidates.iterator().next(); } Assert.assertTrue(candidates.size() > 1); final LogWriterI18n lw = getLogger(); // Convert peers to DataStoreBuckets ArrayList stores = this.prRegion.getRegionAdvisor() .adviseFilteredDataStores(new HashSet(candidates)); final DM dm = this.prRegion.getDistributionManager(); // Add ourself as a candidate, if appropriate InternalDistributedMember moi = dm.getId(); PartitionedRegionDataStore myDS = this.prRegion.getDataStore(); if (myDS != null && candidates.contains(moi)) { int bucketCount = myDS.getBucketsManaged(); int priCount = myDS.getNumberOfPrimaryBucketsManaged(); int localMaxMemory = this.prRegion.getLocalMaxMemory(); stores.add(new DataStoreBuckets(moi, bucketCount, priCount, localMaxMemory)); } if (stores.isEmpty()) { return null; } // --------------------------------------------- // Calculate all hosts who already have this bucket final HashSet existingHosts = new HashSet(); Iterator it = alreadyUsed.iterator(); while (it.hasNext()) { InternalDistributedMember mem = it.next(); existingHosts.addAll(dm.getMembersInSameZone(mem)); } Comparator comparator = new Comparator() { public int compare(DataStoreBuckets d1, DataStoreBuckets d2) { boolean host1Used = existingHosts.contains(d1.memberId); boolean host2Used = existingHosts.contains(d2.memberId); if (!host1Used && host2Used) { return -1; // host1 preferred } if (host1Used && !host2Used) { return 1; // host2 preferred } // Six eggs, half a dozen. Look for least loaded. float metric1, metric2; if (forPrimary) { metric1 = d1.numPrimaries / (float) d1.localMaxMemoryMB; metric2 = d2.numPrimaries / (float) d2.localMaxMemoryMB; } else { metric1 = d1.numBuckets / (float) d1.localMaxMemoryMB; metric2 = d2.numBuckets / (float) d2.localMaxMemoryMB; } int result = Float.compare(metric1, metric2); if(result == 0) { //if they have the same load, choose the member with the //higher localMaxMemory result = d2.localMaxMemoryMB - d1.localMaxMemoryMB; } return result; } }; // --------------------------------------------- // First step is to sort datastores first by those whose hosts don't // hold this bucket, and then secondarily by loading. Collections.sort(stores, comparator); if (lw.fineEnabled()) { lw.fine(fancyFormatBucketAllocation("Sorted ", stores, existingHosts)); } // --------------------------------------------- // Always add the first datastore and note just how good it is. DataStoreBuckets bestDataStore = stores.get(0); ArrayList bestStores = new ArrayList(); bestStores.add(bestDataStore); final boolean allStoresInUse = alreadyUsed.contains(bestDataStore.memberId); // --------------------------------------------- // Collect all of the other hosts in this sorted list that are as good // as the very first one. for (int i = 1; i < stores.size(); i ++) { DataStoreBuckets aDataStore = stores.get(i); if (!allStoresInUse && alreadyUsed.contains(aDataStore.memberId)) { // Only choose between the ones not in use. break; } if (comparator.compare(bestDataStore, aDataStore) != 0) { break; } bestStores.add(aDataStore); } if (lw.fineEnabled()) { lw.fine(fancyFormatBucketAllocation("Best Stores ", bestStores, existingHosts)); } // --------------------------------------------- int chosen; if(DISABLE_CREATE_BUCKET_RANDOMNESS) { chosen = 0; } else { // Pick one (at random) chosen = PartitionedRegion.rand.nextInt(bestStores.size()); } DataStoreBuckets aDataStore = bestStores.get(chosen); return aDataStore.memberId; } /** * Adds a membership listener to watch for member departures, * and schedules a task to recover redundancy of existing buckets */ public void startRedundancyRecovery() { prRegion.getRegionAdvisor().addMembershipListener(new PRMembershipListener()); scheduleRedundancyRecovery(null); } /** * Log bucket allocation in the log files in this format: *
   * member1: +5/20
   * member2: -10/5
   * 
* After the member name, the +/- indicates whether or not this bucket is * already hosted on the given member. This is followed by the number of * hosted primaries followed by the number of hosted non-primary buckets. * * @param prefix first part of message to print * @param dataStores list of stores * @param existingStores to mark those already in use */ private String fancyFormatBucketAllocation(String prefix, List dataStores, Set existingStores) { StringBuilder logStr = new StringBuilder(); if (prefix != null) { logStr.append(prefix); } logStr.append("Bucket Allocation for prId=" + this.prRegion.getPRId() + ":\n"); for (Iterator i = dataStores.iterator(); i.hasNext(); ) { DataStoreBuckets dsb = (DataStoreBuckets)i.next(); logStr.append(dsb.memberId).append(": "); if (existingStores.contains(dsb.memberId)) { logStr.append("+"); } else { logStr.append("-"); } logStr.append(Integer.toString(dsb.numPrimaries)); logStr.append("/"); logStr.append(Integer.toString(dsb.numBuckets - dsb.numPrimaries)); // for (int j = 0; j < dsb.numPrimaries; j++) { // logStr.append('#'); // } // int nonPrimary = dsb.numBuckets - dsb.numPrimaries; // for (int j = 0; j < nonPrimary; j++) { // logStr.append('*'); // } logStr.append('\n'); } return logStr.toString(); } public static class DataStoreBuckets { public final InternalDistributedMember memberId; public final int numBuckets; public final int numPrimaries; private final int localMaxMemoryMB; public DataStoreBuckets(InternalDistributedMember mem, int buckets, int primaryBuckets, int localMaxMemory) { this.memberId = mem; this.numBuckets = buckets; this.numPrimaries = primaryBuckets; this.localMaxMemoryMB = localMaxMemory; } @Override public boolean equals(Object obj) { if ((obj == null) || !(obj instanceof DataStoreBuckets)) { return false; } DataStoreBuckets other = (DataStoreBuckets) obj; return this.numBuckets == other.numBuckets && this.memberId.equals(other.memberId); } @Override public int hashCode() { return this.memberId.hashCode(); } @Override public String toString() { return "DataStoreBuckets memberId=" + this.memberId + "; numBuckets=" + this.numBuckets + "; numPrimaries=" + this.numPrimaries; } } /** * Verifies the members and removes the members that are either not present in the * DistributedSystem or are no longer part of the PartitionedRegion * (close/localDestroy has been performed.) . * * @param members collection of members to scan and modify */ void verifyBucketNodes(Collection members, String partitionName) { if (members == null || members.isEmpty()) { return; } // Revisit region advisor, get current bucket stores. final Set availableMembers = getAllStores(partitionName); // boolean debugAnyRemoved = false; for (Iterator itr = members.iterator(); itr.hasNext();) { InternalDistributedMember node = itr.next(); if ( ! availableMembers.contains(node)) { if (getLogger().fineEnabled()) { getLogger().fine( "verifyBucketNodes: removing member " + node); // debugAnyRemoved = true; } itr.remove(); Assert.assertTrue(!members.contains(node), "return value does not contain " + node); } } // for // if (debugAnyRemoved) { // this.prRegion.getRegionAdvisor().dumpProfiles(getLogger(), "DEBUG verifyBucketNodes removed a profile from the 'accepted' list"); // Set members = ((InternalDistributedSystem)this.prRegion.getCache().getDistributedSystem()) // .getDistributionManager().getDistributionManagerIds(); // getLogger().info("Distributed members in view: " + PartitionedRegionHelper.printCollection(members)); // } } /** * Schedule a task to perform redundancy recovery for a new node or for * the node departed. */ public void scheduleRedundancyRecovery(Object failedMemId) { final boolean isStartup = (failedMemId == null); final LogWriterI18n logger = getLogger(); final GemFireCacheImpl cache = this.prRegion.getCache(); final int redundantCopies = PRHARedundancyProvider.this.prRegion.getRedundantCopies(); final long delay; final boolean movePrimaries; if (isStartup) { delay = this.prRegion.getPartitionAttributes().getStartupRecoveryDelay(); movePrimaries = !Boolean.getBoolean("gemfire.DISABLE_MOVE_PRIMARIES_ON_STARTUP"); } else { delay = this.prRegion.getPartitionAttributes().getRecoveryDelay(); movePrimaries = false; } final boolean requiresRedundancyRecovery = delay >= 0 && redundantCopies > 0; if(!requiresRedundancyRecovery) { return; } if (!PRHARedundancyProvider.this.prRegion.isDataStore()) { return; } if (cache.isUnInitializedMember(cache.getMyId())) { return; } Runnable task = new RecoveryRunnable(this) { @Override public void run2() { try { final boolean isFixedPartitionedRegion = PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion(); final PartitionedRegionRebalanceOp rebalance; //Fix for 43582 - always replace offline data for fixed partitioned //regions - this guarantees we create the buckets we are supposed to //create on this node. boolean replaceOfflineData = isFixedPartitionedRegion || !isStartup; rebalance = new PartitionedRegionRebalanceOp( PRHARedundancyProvider.this.prRegion, false, true, false, movePrimaries, replaceOfflineData,false); long start = PRHARedundancyProvider.this.prRegion.getPrStats() .startRecovery(); if (isFixedPartitionedRegion) { rebalance.executeFPA(); } else { rebalance.execute(); } PRHARedundancyProvider.this.prRegion.getPrStats().endRecovery(start); PRHARedundancyProvider.this.recoveryFuture = null; } catch(CancelException e) { logger.fine("Cache closed while recovery in progress"); } catch(RegionDestroyedException e) { logger.fine("Region destroyed while recovery in progress"); } catch (Exception e) { logger.error(LocalizedStrings.PRHARedundancyProvider_UNEXPECTED_EXCEPTION_DURING_BUCKET_RECOVERY, e); } } }; synchronized (this.shutdownLock) { // possible fix for bug 41094 if (!this.shutdown) { try { if(logger.fineEnabled()) { if (isStartup) { logger.fine(this.prRegion + " scheduling redundancy recovery in " + delay + " ms"); } else { logger .fine(prRegion + " scheduling redundancy recovery after departure/crash/error in " + failedMemId + " in " + delay + " ms"); } } recoveryFuture = this.recoveryExecutor.schedule(task, delay, TimeUnit.MILLISECONDS); } catch(RejectedExecutionException e) { //ok, the executor is shutting down. } } } } public boolean isRedundancyImpaired() { int numBuckets = this.prRegion.getPartitionAttributes().getTotalNumBuckets(); int targetRedundancy = this.prRegion.getPartitionAttributes().getRedundantCopies(); for (int i =0; i < numBuckets; i++) { int redundancy = this.prRegion.getRegionAdvisor().getBucketRedundancy(i); if (redundancy < targetRedundancy && redundancy != -1 || redundancy > targetRedundancy) { return true; } } return false; } public boolean recoverPersistentBuckets() { /** * To handle a case where ParallelGatewaySender is persistent but userPR is not * First recover the GatewaySender buckets for ParallelGatewaySender * irrespective of whether colocation is complete or not. */ PartitionedRegion leaderRegion = ColocationHelper.getLeaderRegion(this.prRegion); if(getLogger().fineEnabled()) { getLogger().fine( "recoverPersistentBuckets for " + this.prRegion.getFullPath() + " isShadowPR " + this.prRegion.isShadowPR() + " I am persistent : " + this.prRegion.getDataPolicy().withPersistence() + " leaderRegion " + leaderRegion + " leaderRegion is persistent: " + leaderRegion.getDataPolicy().withPersistence()); } //Check if the leader region or some child shadow PR region is persistent //and return the first persistent region found PartitionedRegion persistentLeader = getPersistentLeader(); //If there is no persistent region in the colocation chain, no need to recover. if(persistentLeader == null) { return true; } if (!ColocationHelper.checkMembersColocation(leaderRegion, leaderRegion.getDistributionManager().getDistributionManagerId())) { if(getLogger().fineEnabled()) { getLogger().fine( "Skipping persistent recovery of " + prRegion + " because colocation is not complete for " + leaderRegion); } return false; } //TODO prpersist - It would make sense to hold the lock here in some cases //to prevent confusing members that are trying to rebalance. BUT, these persistent regions //need to wait for other members to recover during initialization. // RecoveryLock lock = leaderRegion.getRecoveryLock(); // lock.lock(); // try { final ProxyBucketRegion[] proxyBucketArray = persistentLeader.getRegionAdvisor().getProxyBucketArray(); for(ProxyBucketRegion proxyBucket : proxyBucketArray) { proxyBucket.initializePersistenceAdvisor(); } Set peers = this.prRegion.getRegionAdvisor().adviseGeneric(); //TODO prpersist - Ok, this is super lame. We need to make sure here that we don't run into this race condition //1) We get a membership view from member A //2) Member B removes itself, and distributes to us and A. We don't remove B //3) We apply the membership view from A, which includes B. //That will add B back into the set. //This state flush will make sure that any membership changes //That are in progress on the peers are finished. MembershipFlushRequest.send(peers, this.prRegion.getDistributionManager(), this.prRegion.getFullPath()); ArrayList bucketsNotHostedLocally = new ArrayList(proxyBucketArray.length); ArrayList bucketsHostedLocally = new ArrayList(proxyBucketArray.length); /* * Start the redundancy logger before recovering any proxy buckets. */ allBucketsRecoveredFromDisk = new CountDownLatch(proxyBucketArray.length); try { if(proxyBucketArray.length > 0) { this.redundancyLogger = new RedundancyLogger(this); Thread loggingThread = new Thread(this.redundancyLogger,"RedundancyLogger for region " + this.prRegion.getName()); loggingThread.start(); } } catch(RuntimeException e) { allBucketsRecoveredFromDisk = null; throw e; } /* * Spawn a separate thread for bucket that we previously hosted * to recover that bucket. * * That thread will get to the point at which it has determined that * at least one member (possibly the local member) has fully initialized * the bucket, at which it will count down the someMemberRecoveredLatch * latch on the bucket. * * Once at least one copy of each bucket has been created in the distributed * system, the initPRInternals method will exit. Some of the threads * spawned here will still be doing GII's in the background. This * allows the system to become usable as fast as possible. * * If we used a bounded thread pool here, we end up waiting for * some buckets to finish there GII before returning from initPRInternals. * In the future maybe we could let the create bucket return and pass * the GII task to a separate thread pool. * */ for(final ProxyBucketRegion proxyBucket : proxyBucketArray) { if(proxyBucket.getPersistenceAdvisor().wasHosting()) { final RecoveryRunnable recoveryRunnable = new RecoveryRunnable(this) { @Override public void run() { //Fix for 44551 - make sure that we always count down //this latch, even if the region was destroyed. try { super.run(); } finally { allBucketsRecoveredFromDisk.countDown(); } } @Override public void run2() { proxyBucket.recoverFromDiskRecursively(); } }; Thread recoveryThread = new Thread(recoveryRunnable, "Recovery thread for bucket " + proxyBucket.getName()); recoveryThread.start(); bucketsHostedLocally.add(proxyBucket); } else { bucketsNotHostedLocally.add(proxyBucket); } } try { //Partial fix for 44045, try to recover the local //buckets before the proxy buckets. This will allow us //to detect any ConflictingDataException before the proxy //buckets update their membership view. for(final ProxyBucketRegion proxyBucket : bucketsHostedLocally) { proxyBucket.waitForPrimaryPersistentRecovery(); } for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) { proxyBucket.recoverFromDiskRecursively(); } } finally { for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) { allBucketsRecoveredFromDisk.countDown(); } } return true; // } finally { // lock.unlock(); // } } /** * Check to see if any colocated region of the current region is persistent. * It's not enough to check just the leader region, because a child region might * be a persistent parallel WAN queue, which is allowed. * * @return the most senior region in the colocation chain (closest to the leader) * that is persistent. */ protected PartitionedRegion getPersistentLeader() { PartitionedRegion leader = ColocationHelper.getLeaderRegion(this.prRegion); return findPersistentRegionRecursively(leader); } private PartitionedRegion findPersistentRegionRecursively( PartitionedRegion pr) { if(pr.getDataPolicy().withPersistence()) { return pr; } for(PartitionedRegion child : ColocationHelper.getColocatedChildRegions(pr)) { PartitionedRegion leader = findPersistentRegionRecursively(child); if(leader != null) { return leader; } } return null; } public void scheduleCreateMissingBuckets() { if (this.prRegion.getColocatedWith() != null && ColocationHelper .isColocationComplete(this.prRegion)) { Runnable task = new CreateMissingBucketsTask(this); final InternalResourceManager resourceManager = this.prRegion .getGemFireCache().getResourceManager(); resourceManager.getExecutor().submit(task); } } public void shutdown() { synchronized (this.shutdownLock) { // possible fix for bug 41094 this.shutdown = true; ScheduledFuture recoveryFuture = this.recoveryFuture; if (recoveryFuture != null) { recoveryFuture.cancel(false/*mayInterruptIfRunning*/); this.recoveryExecutor.purge(); } } } /** * Creates and fills in a PartitionMemberDetails for the partitioned region. * * @param internal true if internal-only details should be included * @param loadProbe the LoadProbe to use * @return PartitionRegionInfo for the partitioned region */ public InternalPRInfo buildPartitionedRegionInfo( final boolean internal, final LoadProbe loadProbe) { final PartitionedRegion pr = this.prRegion; if (pr == null) { return null; } PartitionedRegionStats prStats = pr.getPrStats(); int configuredBucketCount = pr.getTotalNumberOfBuckets(); int createdBucketCount = pr.getRegionAdvisor().getCreatedBucketsCount(); int lowRedundancyBucketCount = prStats.getLowRedundancyBucketCount(); int configuredRedundantCopies = pr.getRedundantCopies(); int actualRedundantCopies = prStats.getActualRedundantCopies(); final PartitionedRegionDataStore ds = pr.getDataStore(); Set datastores = pr.getRegionAdvisor().adviseDataStore(); //int size = datastores.size() + (ds == null ? 0 : 1); Set memberDetails = new TreeSet(); OfflineMemberDetails offlineMembers = null; boolean fetchOfflineMembers = false; if (ds != null) { memberDetails.add(buildPartitionMemberDetails(internal, loadProbe)); offlineMembers = fetchOfflineMembers(); } else { fetchOfflineMembers = true; } // Get remote results if (!datastores.isEmpty()) { FetchPartitionDetailsResponse response = FetchPartitionDetailsMessage.send(datastores, pr, internal, fetchOfflineMembers, loadProbe); memberDetails.addAll(response.waitForResponse()); if(fetchOfflineMembers) { offlineMembers = response.getOfflineMembers(); } } String colocatedWithPath = pr.getColocatedWith(); InternalPRInfo details = new PartitionRegionInfoImpl( pr.getFullPath(), configuredBucketCount, createdBucketCount, lowRedundancyBucketCount, configuredRedundantCopies, actualRedundantCopies, memberDetails, colocatedWithPath, offlineMembers); return details; } /** * Retrieve the set of members which are currently offline * for all buckets. */ public OfflineMemberDetailsImpl fetchOfflineMembers() { ProxyBucketRegion[] proxyBuckets = prRegion.getRegionAdvisor().getProxyBucketArray(); Set[] offlineMembers = new Set[proxyBuckets.length]; for(int i =0; i < proxyBuckets.length; i++) { ProxyBucketRegion proxy = proxyBuckets[i]; if(this.prRegion.getDataPolicy().withPersistence()) { Set persistedMembers = proxy.getPersistenceAdvisor().getMissingMembers(); if(persistedMembers == null) { persistedMembers = Collections.emptySet(); } offlineMembers[i] = persistedMembers; } else { offlineMembers[i] = Collections.emptySet(); } } return new OfflineMemberDetailsImpl(offlineMembers); } /** * Creates and fills in a PartitionMemberDetails for the local member. * * @param internal true if internal-only details should be included * @param loadProbe the LoadProbe to use * @return PartitionMemberDetails for the local member */ public InternalPartitionDetails buildPartitionMemberDetails( final boolean internal, final LoadProbe loadProbe) { final PartitionedRegion pr = this.prRegion; PartitionedRegionDataStore ds = pr.getDataStore(); if (ds == null) { return null; } InternalPartitionDetails localDetails = null; long size = 0; InternalDistributedMember localMember = pr.getMyId(); int configuredBucketCount = pr.getTotalNumberOfBuckets(); long[] bucketSizes = new long[configuredBucketCount]; // key: bid, value: size Map bucketSizeMap = ds.getSizeLocally(); for (Iterator> iter = bucketSizeMap.entrySet().iterator(); iter.hasNext();) { Map.Entry me = iter.next(); int bid = me.getKey().intValue(); long bucketSize = ds.getBucketSize(bid); bucketSizes[bid] = bucketSize; size += bucketSize; } if (internal) { waitForPersistentBucketRecoveryOrClose(); PRLoad prLoad = loadProbe.getLoad(pr); localDetails = new PartitionMemberInfoImpl( localMember, pr.getLocalMaxMemory() * (1024L * 1024L), size, ds.getBucketsManaged(), ds.getNumberOfPrimaryBucketsManaged(), prLoad, bucketSizes); } else { localDetails = new PartitionMemberInfoImpl( localMember, pr.getLocalMaxMemory() * (1024L * 1024L), size, ds.getBucketsManaged(), ds.getNumberOfPrimaryBucketsManaged()); } return localDetails; } /** * Wait for all persistent buckets to be recovered from disk, * or for the region to be closed, whichever happens first. */ protected void waitForPersistentBucketRecoveryOrClose() { CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk; if(recoveryLatch != null) { boolean interrupted = false; while (true) { try { this.prRegion.getCancelCriterion().checkCancelInProgress(null); boolean done = recoveryLatch.await( PartitionedRegionHelper.DEFAULT_WAIT_PER_RETRY_ITERATION, TimeUnit.MILLISECONDS); if (done) { break; } } catch(InterruptedException e) { interrupted = true; } } if(interrupted) { Thread.currentThread().interrupt(); } } } /** * Wait for all persistent buckets to be recovered from disk, * regardless of whether the region is currently being closed. */ protected void waitForPersistentBucketRecovery() { CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk; if(recoveryLatch != null) { boolean interrupted = false; while (true) { try { recoveryLatch.await(); break; } catch(InterruptedException e) { interrupted = true; } } if(interrupted) { Thread.currentThread().interrupt(); } } } private static class ManageBucketRsp { final static ManageBucketRsp NO = new ManageBucketRsp("NO"); final static ManageBucketRsp YES = new ManageBucketRsp("YES"); final static ManageBucketRsp NO_INITIALIZING = new ManageBucketRsp("NO_INITIALIZING"); public static final ManageBucketRsp CLOSED = new ManageBucketRsp("CLOSED"); private final String name; private ManageBucketRsp(String name) { this.name = name; } boolean isRejection() { return this == NO || this == NO_INITIALIZING || this==CLOSED; } boolean isAcceptance() { return this == YES; } boolean isInitializing() { return this == NO_INITIALIZING; } @Override public String toString() { return "ManageBucketRsp(" + this.name + ")"; } /** return YES if the argument is true, NO if not */ static ManageBucketRsp valueOf( boolean managed ) { return managed? YES : NO; } } static private class BucketMembershipObserverResults { final boolean problematicDeparture; final InternalDistributedMember primary; BucketMembershipObserverResults(boolean re, InternalDistributedMember p) { problematicDeparture = re; primary = p; } @Override public String toString() { return "pDepart:"+problematicDeparture+" primary:"+primary; } } /** * Monitors distributed membership for a given bucket * @author mthomas * */ private class BucketMembershipObserver implements MembershipListener { final Bucket bucketToMonitor; final AI arrivals = CFactory.createAI(0); final AB departures = CFactory.createAB(false); public BucketMembershipObserver(Bucket b) { this.bucketToMonitor = b; } public BucketMembershipObserver beginMonitoring() { int profilesPresent = this.bucketToMonitor.getBucketAdvisor() .addMembershipListenerAndAdviseGeneric(this).size(); arrivals.addAndGet(profilesPresent); return this; } public void stopMonitoring() { this.bucketToMonitor.getBucketAdvisor().removeMembershipListener(this); } public void memberJoined(InternalDistributedMember id) { if (getLogger().fineEnabled()) { getLogger().fine("Observer for bucket " + this.bucketToMonitor + " member joined " + id); } synchronized(this) { // TODO manipulate failedNodes and verifiedNodeList directly arrivals.addAndGet(1); notify(); } } public void memberSuspect(InternalDistributedMember id, InternalDistributedMember whoSuspected) { } public void memberDeparted(InternalDistributedMember id, boolean crashed) { if (getLogger().fineEnabled()) { getLogger().fine("Observer for bucket " + this.bucketToMonitor + " member departed " + id); } synchronized(this) { // TODO manipulate failedNodes and verifiedNodeList directly departures.getAndSet(true); notify(); } } /** * Wait for expected number of owners to be recognized. When the expected * number have been seen, then fetch the primary and report it. If while * waiting for the owners to be recognized there is a departure which compromises * redundancy * @param expectedCount the number of bucket owners to wait for * @param expectedOwners the list of owners used when a departure is detected * @return if no problematic departures are detected, the primary * @throws InterruptedException */ public BucketMembershipObserverResults waitForOwnersGetPrimary( final int expectedCount, final Collection expectedOwners, String partitionName) throws InterruptedException { boolean problematicDeparture = false; synchronized(this) { for (;;) { this.bucketToMonitor.getCancelCriterion().checkCancelInProgress(null); // If any departures, need to rethink much... boolean oldDepartures = departures.get(); if (oldDepartures) { verifyBucketNodes(expectedOwners, partitionName); if ( expectedOwners.isEmpty() ) { problematicDeparture = true; // need to pick new victims } // reselect = true; // need to pick new victims arrivals.set(expectedOwners.size()); departures.set(false); if(problematicDeparture) { if (getLogger().fineEnabled()) { getLogger().fine("Bucket observer found departed members - retrying"); } } break; } // Look for success... int oldArrivals = arrivals.get(); if (oldArrivals >= expectedCount) { // success! break; } if (getLogger().fineEnabled()) { getLogger().fine("Waiting for bucket " + prRegion.bucketStringForLogs(this.bucketToMonitor.getId()) + " to finish being created"); } prRegion.checkReadiness(); final int creationWaitMillis = 5 * 1000; wait(creationWaitMillis); if (oldArrivals == arrivals.get() && oldDepartures == departures.get()) { getLogger().warning( LocalizedStrings.PRHARedundancyProvider_TIME_OUT_WAITING_0_MS_FOR_CREATION_OF_BUCKET_FOR_PARTITIONED_REGION_1_MEMBERS_REQUESTED_TO_CREATE_THE_BUCKET_ARE_2, new Object[] {Integer.valueOf(creationWaitMillis), prRegion.getFullPath(), expectedOwners}); } } // for (;;) } // synchronized if (problematicDeparture) { return new BucketMembershipObserverResults(true, null); } InternalDistributedMember primmy = bucketToMonitor.getBucketAdvisor().getPrimary(); if(primmy==null) { /* * Handle a race where nobody has the bucket. We can't return a null member here because we haven't created the bucket, need to let * the higher level code loop. */ return new BucketMembershipObserverResults(true, null); } else { return new BucketMembershipObserverResults(false, primmy); } } @Override public void quorumLost(Set failures, List remaining) { } } /** * This class extends MembershipListener to perform cleanup when a node leaves * DistributedSystem. * */ protected class PRMembershipListener implements MembershipListener { public void memberDeparted(final InternalDistributedMember id, final boolean crashed) { try { DistributedMember dmem = prRegion.getSystem().getDistributedMember(); if(getLogger().fineEnabled()) { getLogger().fine( "MembershipListener invoked on DistributedMember = " + dmem + " for failed memberId = " + id); } if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() && ! dmem.equals(id)) { Runnable postRecoveryTask = null; //Only schedule redundancy recovery if this not a fixed PR. if (!PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion()) { postRecoveryTask = new Runnable() { public void run() { //After the metadata has been cleaned, recover redundancy. scheduleRedundancyRecovery(id); } }; } //Schedule clean up the metadata for the failed member. PartitionedRegionHelper.cleanUpMetaDataForRegion(prRegion.getCache(), prRegion.getRegionIdentifier(), id, postRecoveryTask); } } catch(CancelException e) { //ignore } } public void memberSuspect(InternalDistributedMember id, InternalDistributedMember whoSuspected) { } public void memberJoined(InternalDistributedMember id) { // no action required } public void quorumLost(Set failures, List remaining) { } } /** * This class extends MembershipListener to start redundancy recovery * when a persistent member is revoked * */ protected class PRPersistenceListener extends PersistentStateListener.PersistentStateAdapter { //TODO prpersist It seems like this might trigger recovery too often. For example, a rebalance //can end up removing a bucket, which would trigger recovery here. We really need to only //trigger this thing when a PR region is destroyed. And isn't that code already in there? @Override public void memberRemoved(PersistentMemberID persistentID, boolean revoked) { if(!revoked) { return; } DistributedMember dmem = prRegion.getSystem().getDistributedMember(); if(getLogger().fineEnabled()) { getLogger().fine( "Persistent Membership Listener invoked on DistributedMember = " + dmem + " for removed memberId = " + persistentID); } if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() && !prRegion.isFixedPartitionedRegion()) { scheduleRedundancyRecovery(persistentID); } } } public CountDownLatch getAllBucketsRecoveredFromDiskLatch() { return allBucketsRecoveredFromDisk; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy