com.gemstone.gemfire.internal.cache.PRHARedundancyProvider Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of gemfire-core Show documentation
SnappyData store based off Pivotal GemFireXD
The newest version!
/*
 * Copyright (c) 2010-2015 Pivotal Software, Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You
 * may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License. See accompanying
 * LICENSE file.
 */

package com.gemstone.gemfire.internal.cache;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

import com.gemstone.gemfire.CancelException;
import com.gemstone.gemfire.SystemFailure;
import com.gemstone.gemfire.cache.CacheClosedException;
import com.gemstone.gemfire.cache.PartitionedRegionStorageException;
import com.gemstone.gemfire.cache.Region;
import com.gemstone.gemfire.cache.RegionDestroyedException;
import com.gemstone.gemfire.cache.persistence.PartitionOfflineException;
import com.gemstone.gemfire.distributed.DistributedMember;
import com.gemstone.gemfire.distributed.internal.DM;
import com.gemstone.gemfire.distributed.internal.MembershipListener;
import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
import com.gemstone.gemfire.i18n.LogWriterI18n;
import com.gemstone.gemfire.i18n.StringIdImpl;
import com.gemstone.gemfire.internal.Assert;
import com.gemstone.gemfire.internal.DebugLogWriter;
import com.gemstone.gemfire.internal.LogWriterImpl;
import com.gemstone.gemfire.internal.NanoTimer;
import com.gemstone.gemfire.internal.OneTaskOnlyExecutor;
import com.gemstone.gemfire.internal.cache.PartitionedRegion.RetryTimeKeeper;
import com.gemstone.gemfire.internal.cache.PartitionedRegionDataStore.CreateBucketResult;
import com.gemstone.gemfire.internal.cache.control.InternalResourceManager;
import com.gemstone.gemfire.internal.cache.partitioned.Bucket;
import com.gemstone.gemfire.internal.cache.partitioned.BucketBackupMessage;
import com.gemstone.gemfire.internal.cache.partitioned.CreateBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.EndBucketCreationMessage;
import com.gemstone.gemfire.internal.cache.partitioned.FetchPartitionDetailsMessage;
import com.gemstone.gemfire.internal.cache.partitioned.FetchPartitionDetailsMessage.FetchPartitionDetailsResponse;
import com.gemstone.gemfire.internal.cache.partitioned.InternalPRInfo;
import com.gemstone.gemfire.internal.cache.partitioned.InternalPartitionDetails;
import com.gemstone.gemfire.internal.cache.partitioned.LoadProbe;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBackupBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBucketMessage;
import com.gemstone.gemfire.internal.cache.partitioned.ManageBucketMessage.NodeResponse;
import com.gemstone.gemfire.internal.cache.partitioned.CreateMissingBucketsTask;
import com.gemstone.gemfire.internal.cache.partitioned.OfflineMemberDetails;
import com.gemstone.gemfire.internal.cache.partitioned.OfflineMemberDetailsImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PRLoad;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionMemberInfoImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionRegionInfoImpl;
import com.gemstone.gemfire.internal.cache.partitioned.PartitionedRegionRebalanceOp;
import com.gemstone.gemfire.internal.cache.partitioned.RecoveryRunnable;
import com.gemstone.gemfire.internal.cache.partitioned.RedundancyLogger;
import com.gemstone.gemfire.internal.cache.partitioned.RegionAdvisor;
import com.gemstone.gemfire.internal.cache.partitioned.RegionAdvisor.PartitionProfile;
import com.gemstone.gemfire.internal.cache.persistence.MembershipFlushRequest;
import com.gemstone.gemfire.internal.cache.persistence.PersistentMemberID;
import com.gemstone.gemfire.internal.cache.persistence.PersistentStateListener;
import com.gemstone.gemfire.internal.concurrent.AB;
import com.gemstone.gemfire.internal.concurrent.AI;
import com.gemstone.gemfire.internal.concurrent.AL;
import com.gemstone.gemfire.internal.concurrent.CFactory;
import com.gemstone.gemfire.internal.i18n.LocalizedStrings;
import com.gemstone.gemfire.internal.tools.gfsh.app.commands.pr;
import com.gemstone.org.jgroups.util.StringId;

/**
 * This class provides the redundancy management for partitioned region. It will
 * provide the following to the PartitionedRegion: 

 * (1) Redundancy Management at the time of bucket creation.


 * (2) Redundancy management at the new node arrival.


 * (3) Redundancy management when the node leaves the partitioned region
 * distributed system gracefully. i.e. Cache.close()


 * (4) Redundancy management at random node failure.

 * 
 * @author tnegi, Mitch Thomas
 */
public class PRHARedundancyProvider
  {
  private static final boolean DISABLE_CREATE_BUCKET_RANDOMNESS 
  = Boolean.getBoolean("gemfire.DISABLE_CREATE_BUCKET_RANDOMNESS");

  public static class ArrayListWithClearState extends ArrayList {
    private static final long serialVersionUID = 1L;
    private boolean wasCleared = false;
    public boolean wasCleared() {
      return this.wasCleared;
    }
    @Override
    public void clear() {
      super.clear();
      this.wasCleared = true;
    }
  }

  public static final String  DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME =
    "gemfire.partitionedRegionDatastoreDiscoveryTimeout";
  static volatile Long DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS =
    Long.getLong(DATASTORE_DISCOVERY_TIMEOUT_PROPERTY_NAME);

  public final PartitionedRegion prRegion;
  private static AL insufficientLogTimeStamp = CFactory.createAL(0);
  private final AB firstInsufficentStoresLogged = CFactory.createAB(false);
  
  /**
   * An executor to submit tasks for redundancy recovery too. It makes sure
   * that there will only be one redundancy recovery task in the queue at a time.
   */
  protected final OneTaskOnlyExecutor recoveryExecutor;
  private volatile ScheduledFuture recoveryFuture;
  private final Object shutdownLock = new Object();
  private boolean shutdown = false;
  
  volatile CountDownLatch allBucketsRecoveredFromDisk;
  
  /**
   * Used to consolidate logging for bucket regions waiting on other
   * members to come online.
   */
  private RedundancyLogger redundancyLogger = null;

  /**
   * Constructor for PRHARedundancyProvider.
   * 
   * @param region
   *          The PartitionedRegion for which the HA redundancy is required to
   *          be managed.
   */
  public PRHARedundancyProvider(final PartitionedRegion region) {
    this.prRegion = region;
    final InternalResourceManager resourceManager = region.getGemFireCache()
    .getResourceManager();
    recoveryExecutor = new OneTaskOnlyExecutor(resourceManager.getExecutor(),
        new OneTaskOnlyExecutor.ConflatedTaskListener() {
          public void taskDropped() {
            InternalResourceManager.getResourceObserver().recoveryConflated(region);
          }
    });
  }

  public static final String PRLOG_PREFIX = "Partitioned Region name = ";
  /**
   * Display bucket allocation status
   * @param prRegion the given region
   * @param allStores the list of available stores.  If null, unknown.
   * @param alreadyUsed stores allocated; only used if allStores != null
   * @param forLog true if the generated string is for a log message
   * @return the description string
   */
  public static String regionStatus(PartitionedRegion prRegion,
      Set allStores, Collection alreadyUsed, boolean forLog) {
    StringBuilder sb = new StringBuilder();
    sb.append(PRLOG_PREFIX + prRegion.getFullPath());
    final char newLine;
    final String spaces; 
    if (forLog) {
      newLine = ' ';
      spaces = "";
    } else {
      newLine = '\n';
      spaces = "   ";
    }
    if (allStores != null) {
      sb.append(newLine + spaces + "Redundancy level set to "
          + prRegion.getRedundantCopies());
      sb.append(newLine + ". Number of available data stores: " +
          allStores.size());
      sb.append(newLine + spaces + ". Number successfully allocated = " +
          alreadyUsed.size());
      sb.append(newLine + ". Data stores: "  
          + PartitionedRegionHelper.printCollection(allStores));
      sb.append(newLine + ". Data stores successfully allocated: "
          + PartitionedRegionHelper.printCollection(alreadyUsed));
      sb.append(newLine + ". Equivalent members: "
          + PartitionedRegionHelper.printCollection(prRegion.getDistributionManager().getMembersInThisZone()));
    }
    return sb.toString();
  }
  
  static public final StringId TIMEOUT_MSG
      = LocalizedStrings.PRHARedundancyProvider_IF_YOUR_SYSTEM_HAS_SUFFICIENT_SPACE_PERHAPS_IT_IS_UNDER_MEMBERSHIP_OR_REGION_CREATION_STRESS;
  
  /**
   * Indicate a timeout due to excessive retries among available peers
   * @param allStores all feasible stores.  If null, we don't know.
   * @param alreadyUsed those that have already accepted, only used if allStores != null
   * @param opString description of the operation which timed out
   */
  public static void timedOut(PartitionedRegion prRegion, Set allStores, 
      Collection alreadyUsed, String opString, long timeOut) {
    final String tooManyRetries =
      LocalizedStrings.PRHARedundancyProvider_TIMED_OUT_ATTEMPTING_TO_0_IN_THE_PARTITIONED_REGION__1_WAITED_FOR_2_MS.toLocalizedString(new Object[] {opString, regionStatus(prRegion, allStores, alreadyUsed, true), Long.valueOf(timeOut)}) + TIMEOUT_MSG;
    throw new PartitionedRegionStorageException(tooManyRetries);        
  }
  
  private Set getAllStores(String partitionName) {
    if(partitionName != null){
      
      return getFixedPartitionStores(partitionName);
    }
    final Set allStores = this.prRegion.getRegionAdvisor().adviseDataStore(true);
    PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
    if (myDS != null) {
      allStores.add(this.prRegion.getDistributionManager().getId());
    }
    return allStores;
  }
  
  /**
   * This is for FPR, for given partition, we have to return the set of
   * datastores on which the given partition is defined
   * 
   * @param partitionName
   *          name of the partition for which datastores need to be found out
   */
  private Set getFixedPartitionStores(
      String partitionName) {
    Set members = this.prRegion.getRegionAdvisor()
        .adviseFixedPartitionDataStores(partitionName);

    List FPAs = this.prRegion
        .getFixedPartitionAttributesImpl();

    if (FPAs != null) {
      for (FixedPartitionAttributesImpl fpa : FPAs) {
        if (fpa.getPartitionName().equals(partitionName)) {
          members.add(this.prRegion.getMyId());
        }
      }
    }
    return members;
  }
        
  /**
   * Signature string indicating that not enough stores are
   * available. 
   */
  static public final StringId INSUFFICIENT_STORES_MSG 
      = LocalizedStrings.PRHARedundancyProvider_CONSIDER_STARTING_ANOTHER_MEMBER;
  
  /**
   * Signature string indicating that there are enough stores
   * available.
   */
  static public final StringId SUFFICIENT_STORES_MSG 
    = LocalizedStrings.PRHARRedundancyProvider_FOUND_A_MEMBER_TO_HOST_A_BUCKET;

  /**
   * string indicating the attempt to allocate a bucket
   */
  private static final StringId ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET
    = LocalizedStrings.PRHARRedundancyProvider_ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET;

  
  /**
   * Indicate that we are unable to allocate sufficient stores and
   * the timeout period has passed
   * @param allStores stores we know about
   * @param alreadyUsed ones already committed
   * @param onlyLog true if only a warning log messages should be generated.
   */
  private void insufficientStores(Set allStores, Collection alreadyUsed, 
      boolean onlyLog) {
    final String regionStat = regionStatus(this.prRegion, allStores,
        alreadyUsed, onlyLog);
    final char newLine;
    if (onlyLog) {
      newLine = ' ';
    } else {
      newLine = '\n';
    }
    final StringId notEnoughValidNodes;
    if(alreadyUsed.isEmpty()) {
      notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_UNABLE_TO_FIND_ANY_MEMBERS_TO_HOST_A_BUCKET_IN_THE_PARTITIONED_REGION_0;
    } else {
      notEnoughValidNodes = LocalizedStrings.PRHARRedundancyProvider_CONFIGURED_REDUNDANCY_LEVEL_COULD_NOT_BE_SATISFIED_0;
    }
    final Object[] notEnoughValidNodesArgs = new Object[] {PRHARedundancyProvider.INSUFFICIENT_STORES_MSG, newLine + regionStat + newLine};
    if (onlyLog) {
      getLogger().warning(notEnoughValidNodes, notEnoughValidNodesArgs);
    }
    else {
      throw new PartitionedRegionStorageException(notEnoughValidNodes.toLocalizedString(notEnoughValidNodesArgs));
    }
  }

  /**
   * Create a single copy of this bucket on one node.  The bucket must
   * already be locked.
   * 
   * @param bucketId The bucket we are working on 
   * @param newBucketSize size to create it
   * @param excludedMembers 
   * @param alreadyUsed members who already seem to have the bucket
   * @param timeOut point at which to fail
   * @param allStores the set of data stores to choose from
   * @return the new member, null if it fails.
   * @throws PartitionedRegionStorageException if there are not enough data stores
   */
  private InternalDistributedMember createBucketInstance(int bucketId,
      final int newBucketSize,
      final Set excludedMembers,
      Collection alreadyUsed,
      ArrayListWithClearState failedMembers, final long timeOut,
      final Set allStores) {
    LogWriterI18n lw = getLogger();
//  this.prRegion.getCache().getLogger().config("DEBUG createBucketInstance: "
//  + " bucketId=" + this.prRegion.getBucketName(bucketId) +
//  " alreadyUsed: " + alreadyUsed +
//  " failedMembers: " + failedMembers);

    // Recalculate list of candidates
    HashSet candidateMembers = new HashSet(allStores);
    candidateMembers.removeAll(alreadyUsed);
    candidateMembers.removeAll(excludedMembers);
    candidateMembers.removeAll(failedMembers);

    if (lw.fineEnabled()) {
      lw.fine("AllStores=" + 
          allStores);
      lw.fine("AlreadyUsed=" + 
          alreadyUsed);
      lw.fine("excluded=" + 
          excludedMembers);
      lw.fine("failed=" + 
          failedMembers);
    }
    if (candidateMembers.size() == 0) {
      this.prRegion.checkReadiness();  // fix for bug #37207

      // Run out of candidates.  Refetch?
      if (System.currentTimeMillis() > timeOut) {
        if (lw.fineEnabled()) {
          lw.fine("createBucketInstance: ran out of candidates and timed out");
        }
        return null; // fail, let caller signal error
      }

      // Recalculate
      candidateMembers = new HashSet(allStores);
      candidateMembers.removeAll(alreadyUsed);
      candidateMembers.removeAll(excludedMembers);
      failedMembers.clear();
    }

    if (lw.fineEnabled()) {
      lw.fine("createBucketInstance: candidateMembers = " + candidateMembers);
    }
    InternalDistributedMember candidate = null;
    
    // If there are no candidates, early out.
    if (candidateMembers.size() == 0) { // no options
      if (lw.fineEnabled()) {
        lw.fine("createBucketInstance: no valid candidates");
      }
      return null; // failure
    } // no options
    else {
      // In case of FPR, candidateMembers is the set of members on which
      // required fixed partition is defined.
      if (this.prRegion.isFixedPartitionedRegion()) {
        candidate = candidateMembers.iterator().next();
      }
      else {
        String prName = this.prRegion.getAttributes().getPartitionAttributes()
          .getColocatedWith();
        if (prName != null) {
          candidate = getColocatedDataStore(candidateMembers, alreadyUsed, bucketId, prName);
        }
        else {
          final ArrayList orderedCandidates =
            new ArrayList(candidateMembers);
          candidate = getPreferredDataStore(orderedCandidates, alreadyUsed);
        }
      }
    }

    if (candidate == null) {
      failedMembers.addAll(candidateMembers);
      return null;
    }

    if(!this.prRegion.isShadowPR() && !ColocationHelper.checkMembersColocation(this.prRegion, candidate)) {
      if (lw.fineEnabled()) {
        lw.fine("createBucketInstances - Member does not have all of the regions colocated with "
          + prRegion + ", " + candidate);
      }
      failedMembers.add(candidate);
      return null;
    }

    if (! (candidate.equals(this.prRegion.getMyId()))) { // myself
      PartitionProfile pp = this.prRegion.getRegionAdvisor()
      .getPartitionProfile(candidate);
      if (pp == null) {
        if (lw.fineEnabled()) {
          lw.fine("createBucketInstance: " + this.prRegion.getFullPath()
              + ": no partition profile for " + candidate);
        }
        failedMembers.add(candidate);
        return null;
      }
    } // myself

    // Coordinate with any remote close occurring, causing it to wait until
    // this create bucket attempt has been made.
    final ManageBucketRsp response = createBucketOnMember(bucketId,
        candidate, newBucketSize, failedMembers.wasCleared());

    // Add targetNode to bucketNodes if successful, else to failedNodeList
    if (response.isAcceptance()) {
      return candidate; // success!
    }

    if (lw.fineEnabled()) {
      lw.fine("createBucketInstance: " + this.prRegion.getFullPath()
          + ": candidate " + candidate + " declined to manage bucketId="
          + this.prRegion.bucketStringForLogs(bucketId) + ": " + response);
    }
    if(response.equals(ManageBucketRsp.CLOSED)) {
      excludedMembers.add(candidate);
    } else {
      failedMembers.add(candidate);
    }
    candidate = null; // failure

    return null;
  }
  
  final private static boolean DEBUG_LOGGING_ENABLED = 
    Boolean.getBoolean(PRHARedundancyProvider.class.getName() + "-logging");
  public static final long INSUFFICIENT_LOGGING_THROTTLE_TIME =
    TimeUnit.SECONDS.toNanos(Integer.getInteger("gemfire.InsufficientLoggingThrottleTime", 2).intValue());
  public volatile static boolean TEST_MODE = false;
  //since 6.6, please use the distributed system property enforce-unique-host instead.
  //  public static final boolean ENFORCE_UNIQUE_HOST_STORAGE_ALLOCATION = DistributionConfig.DEFAULT_ENFORCE_UNIQUE_HOST;
    

  public LogWriterI18n getLogger() {
    if (DEBUG_LOGGING_ENABLED) {
      return new DebugLogWriter((LogWriterImpl)this.prRegion.getCache().getLogger(), 
          getClass());
    } else {
      return this.prRegion.getCache().getLoggerI18n();
    }
  }

  public InternalDistributedMember createBucketOnDataStore(int bucketId,
      int size, long startTime, RetryTimeKeeper snoozer) {
    Set attempted = new HashSet();
    InternalDistributedMember ret;
    InternalDistributedMember primaryForFixedPartition = null;
    if (this.prRegion.isFixedPartitionedRegion()) {
      primaryForFixedPartition = this.prRegion.getRegionAdvisor()
          .adviseFixedPrimaryPartitionDataStore(bucketId);
    }
    do {
      this.prRegion.checkReadiness();
      Set available = this.prRegion
          .getRegionAdvisor().adviseInitializedDataStore();
      // remove uninitialized members for bucket creation
      this.prRegion.getCache().removeUnInitializedMembers(available);
      InternalDistributedMember target = null;
      available.removeAll(attempted);
      for (InternalDistributedMember member : available) {
        if (primaryForFixedPartition != null
            && available.contains(primaryForFixedPartition)) {
          target = primaryForFixedPartition;
        }
        else {
          target = member;
        }
        break;
      }
      if (target == null) {
        if (shouldLogInsufficientStores()) {
          insufficientStores(available, Collections.emptySet(), true);
        }
        // this will always throw an exception
        insufficientStores(available, Collections.emptySet(), false);
      }
     try {
        if(getLogger().fineEnabled()) {
          getLogger().fine("Attempting to get data store " + target
                  + " to create the bucket "
                  + this.prRegion.bucketStringForLogs(bucketId) + " for us");
        }
        CreateBucketMessage.NodeResponse response = CreateBucketMessage.send(target, this.prRegion, bucketId, size);
        ret = response.waitForResponse();
        if(ret != null) {
          return ret;
        }
      } catch(ForceReattemptException e) {
        //do nothing, we will already check again for a primary.
      }
      attempted.add(target);
    }
    while((ret = this.prRegion.getNodeForBucketWrite(bucketId, snoozer)) == null);
    return ret;
  }

  /**
   * Creates bucket atomically by creating all the copies to satisfy redundancy. In case all
   * copies can not be created, a PartitionedRegionStorageException is thrown to
   * the user and BucketBackupMessage is sent to the nodes to make copies of a bucket
   * that was only partially created. Other VMs are informed 
   * of bucket creation through updates through their {@link BucketAdvisor.BucketProfile}s.
   * 
   * 
   * This method is synchronized to enforce a single threaded ordering, allowing 
   * for a more accurate picture of bucket distribution in the face of concurrency.
   * See bug 37275.
   * 
   * 
   * This method is now slightly misnamed. Another member could be in the process
   * of creating this same bucket at the same time.
   * 
   * @param bucketId
   *          Id of the bucket to be created.
   * @param newBucketSize
   *          size of the first entry.
   * @param startTime a time stamp prior to calling the method, used to update bucket creation stats
   * @return the primary member for the newly created bucket
   * @throws PartitionedRegionStorageException
   *           if required # of buckets can not be created to satisfy
   *           redundancy.
   * @throws PartitionedRegionException
   *           if d-lock can not be acquired to create bucket.
   * 
   */
  public InternalDistributedMember
    createBucketAtomically(final int bucketId,
                           final int newBucketSize,
                           final long startTime,
                           final boolean finishIncompleteCreation, String partitionName) throws PartitionedRegionStorageException,
                                    PartitionedRegionException
  {
    // If there are insufficient stores throw *before* we try acquiring the
    // (very expensive) bucket lock or the (somewhat expensive) monitor on this
    earlySufficientStoresCheck(partitionName);

    synchronized(this) {
      if (this.prRegion.getCache().isCacheAtShutdownAll()) {
        throw new CacheClosedException("Cache is shutting down");
      }

    final LogWriterI18n log = getLogger();
    if (log.fineEnabled()) {
      log.fine("Starting atomic creation of bucketId=" + 
          this.prRegion.bucketStringForLogs(bucketId));
    }
    Collection acceptedMembers = new ArrayList(); // ArrayList
    Set  excludedMembers = new HashSet();
    ArrayListWithClearState failedMembers = new ArrayListWithClearState();
    final long timeOut = System.currentTimeMillis() + computeTimeout();
    BucketMembershipObserver observer = null;
    boolean needToElectPrimary = true;
    InternalDistributedMember bucketPrimary = null;
    try {
      this.prRegion.checkReadiness();

      Bucket toCreate = this.prRegion.getRegionAdvisor().getBucket(bucketId);
      
      if(!finishIncompleteCreation) {
        bucketPrimary = 
          this.prRegion.getBucketPrimary(bucketId);
        if (bucketPrimary != null) {
          if(log.fineEnabled()) {
            log.fine("during atomic creation, discovered that the primary already exists "
                + bucketPrimary + " returning early");
          }
          needToElectPrimary = false;
          return bucketPrimary;
        }
      }
      
      observer = new BucketMembershipObserver(toCreate).beginMonitoring();
      boolean loggedInsufficentStores = false; // track if insufficient data stores have been detected
      final LogWriterI18n logger = getLogger();
      for (;;) {
        this.prRegion.checkReadiness();
        if (this.prRegion.getCache().isCacheAtShutdownAll()) {
          log.info(LocalizedStrings.DEBUG, "Aborted createBucketAtomically due to ShutdownAll");
          throw new CacheClosedException("Cache is shutting down");
        }
//        this.prRegion.getCache().getLogger().config(
//            "DEBUG createBucketAtomically: "
//            + " bucketId=" + this.prRegion.getBucketName(bucketId) + 
//            " accepted: " + acceptedMembers + 
//            " failed: " + failedMembers);
        
        long timeLeft = timeOut - System.currentTimeMillis();
        if (timeLeft < 0) {
            // It took too long.
            timedOut(this.prRegion, getAllStores(partitionName),
                acceptedMembers, ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET
                    .toLocalizedString(), computeTimeout());
            // NOTREACHED
          }
        if (logger.fineEnabled()) {
          logger.fine(
              "createBucketAtomically: have " + timeLeft
              + " ms left to finish this");
        }

        // Always go back to the advisor, see if any fresh data stores are
          // present.
        Set allStores = getAllStores(partitionName);
        // remove nodes that are not fully initialized
        this.prRegion.getCache().removeUnInitializedMembers(allStores);

        loggedInsufficentStores  = checkSufficientStores(allStores, 
            loggedInsufficentStores);

        InternalDistributedMember candidate = createBucketInstance(bucketId, 
	   newBucketSize, excludedMembers, acceptedMembers, failedMembers, timeOut, allStores);
        if (candidate != null) {
          if (this.prRegion.getDistributionManager().enforceUniqueZone()) {
            Set exm = getBuddyMembersInZone(candidate, allStores);
            exm.remove(candidate);
            exm.removeAll(acceptedMembers);
            excludedMembers.addAll(exm);
          }
        }

        // Get an updated list of bucket owners, which should include
        // buckets created concurrently with this createBucketAtomically call 
        acceptedMembers = prRegion.getRegionAdvisor().getBucketOwners(bucketId);

        if (logger.fineEnabled()) {
          logger.fine("Accepted members: " + acceptedMembers);
        }

        // [sumedh] set the primary as the candidate in the first iteration if
        // the candidate has accepted
        if (bucketPrimary == null && acceptedMembers.contains(candidate)) {
          bucketPrimary = candidate;
        }

        // prune out the stores that have left
        verifyBucketNodes(excludedMembers, partitionName);

        //Note - we used to wait for the created bucket to become primary here
        //if this is a colocated region. We no longer need to do that, because
        //the EndBucketMessage is sent out after bucket creation completes to
        //select the primary.
        
        // Have we exhausted all candidates?
        final int potentialCandidateCount = (allStores.size() - (excludedMembers
            .size() + acceptedMembers.size() + failedMembers.size()));
        // Determining exhausted members competes with bucket balancing;  it's
        // important to re-visit all failed members since "failed" set may
        // contain datastores which at the moment are imbalanced, but yet could
        // be candidates.  If the failed members list is empty, its expected
        // that the next iteration clears the (already empty) list.
        final boolean exhaustedPotentialCandidates = failedMembers.wasCleared() && potentialCandidateCount <= 0;
        final boolean redundancySatisfied = acceptedMembers.size() > this.prRegion.getRedundantCopies();
        final boolean bucketNotCreated = acceptedMembers.size() == 0;

        if (logger.fineEnabled()) {
            logger.fine("potentialCandidateCount=" + potentialCandidateCount
                + ", exhaustedPotentialCandidates="
                + exhaustedPotentialCandidates + ", redundancySatisfied="
                + redundancySatisfied + ", bucketNotCreated="
                + bucketNotCreated);
        }

        if (bucketNotCreated) {
          // if we haven't managed to create the bucket on any nodes, retry.
          continue;
        }

        if (exhaustedPotentialCandidates && ! redundancySatisfied) {
          insufficientStores(allStores, acceptedMembers, true);
        }

        // Allow the thread to potentially finish bucket creation even if redundancy was not met.
        // Fix for bug 39283
        if (redundancySatisfied || exhaustedPotentialCandidates) {
          //Tell one of the members to become primary.
          //The rest of the members will be allowed to
          //volunteer for primary.
          endBucketCreation(bucketId, acceptedMembers, bucketPrimary, partitionName);

          final int expectedRemoteHosts = acceptedMembers.size()
          - (acceptedMembers.contains(this.prRegion.getMyId()) ? 1: 0);
          boolean interrupted = Thread.interrupted();
          try {
            BucketMembershipObserverResults results = 
              observer.waitForOwnersGetPrimary(expectedRemoteHosts, 
                  acceptedMembers, partitionName);
            if (results.problematicDeparture) {
              // BZZZT! Member left.  Start over.
              continue;
            }
            bucketPrimary = results.primary;
          } 
          catch (InterruptedException e) {
            interrupted = true;
            this.prRegion.getCancelCriterion().checkCancelInProgress(e);
          }
          finally {
            if (interrupted) {
              Thread.currentThread().interrupt();
            }
          }
          needToElectPrimary = false;
          return bucketPrimary;
         } // almost done
      } // for
    }
    catch (CancelException e) {
      //Fix for 43544 - We don't need to elect a primary
      //if the cache was closed. The other members will
      //take care of it. This ensures we don't compromise
      //redundancy.
      needToElectPrimary = false;
      // log.warning(
      // "PRHARedundancyProvider:createBucketAtomically():Got Cache closed
      // exception while creating new bucket.",
      // e);
      throw e;
    }
    catch (RegionDestroyedException e) {
      //Fix for 43544 - We don't need to elect a primary
      //if the region was destroyed. The other members will
      //take care of it. This ensures we don't compromise
      //redundancy.
      needToElectPrimary = false;
      // log.warning(
      // "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed
      // exception while creating new bucket.",
      // e);
      throw e;
    }
    catch (PartitionOfflineException e) {
      // log.warning(
      // "PRHARedundancyProvider:createBucketAtomically():Got Region Destroyed
      // exception while creating new bucket.",
      // e);
      throw e;
    }
    catch (RuntimeException e) {
      if(log.fineEnabled()) {
        log.fine("Unable to create new bucket " + bucketId + " :" 
            + e.getMessage());
      }

      //If we're finishing an incomplete bucket creation, don't blast out
      //another message to peers to do so.
      //TODO - should we ignore a PartitionRegionStorageException, rather
      //than reattempting on other nodes?
      if(!finishIncompleteCreation) {
        cleanUpBucket(bucketId);
      }
      throw e;
    } finally {
      if (observer != null) {
        observer.stopMonitoring();
      }
      //Try to make sure everyone that created the bucket can volunteer for primary
      if(needToElectPrimary) {
        try {
            endBucketCreation(bucketId, prRegion.getRegionAdvisor()
                .getBucketOwners(bucketId), bucketPrimary, partitionName);
        } catch (Exception e) {
          // if region is going down, then no warning level logs
          if (e instanceof CancelException || e instanceof CacheClosedException
              || (prRegion.getCancelCriterion().cancelInProgress() != null)) {
            getLogger().fine("Exception trying choose a primary after "
                + "bucket creation failure", e);
          }
          else {
            getLogger().warning(StringIdImpl.LITERAL, "Exception trying choose a "
                + "primary after bucket creation failure", e);
          }
        }
      }
    }
    } // synchronized(this)
  }

  /**
   * Figure out which member should be primary for a bucket
   * among the members that have created the bucket, and tell
   * that member to become the primary.
   * @param acceptedMembers The members that now host the bucket
   */
  private void endBucketCreation(int bucketId,
      Collection acceptedMembers,
      InternalDistributedMember targetPrimary, String partitionName) {
    if(acceptedMembers.isEmpty()) {
      return;
    }
    acceptedMembers = new HashSet(acceptedMembers);

    //TODO prpersist - we need to factor out a method that just chooses
    //the primary. But this will do the trick for the moment.
    
    // This is for FPR, for a given bucket id , make sure that for given bucket
    // id , only the datastore on which primary partition is defined for this
    // bucket becomes the primary. If primary partition is not available then
    // secondary partition will become primary
    if (partitionName != null) {
      if (isLocalPrimary(partitionName)) {
        targetPrimary = this.prRegion.getMyId();
      }
      else {
        targetPrimary = this.prRegion.getRegionAdvisor()
            .adviseFixedPrimaryPartitionDataStore(bucketId);
        if (targetPrimary == null) {
          Set fpDataStores = getFixedPartitionStores(partitionName);
          targetPrimary = fpDataStores.iterator().next();
        }
      }
    }
    if (targetPrimary == null) {
      // [sumedh] we need to select the same primary as chosen earlier (e.g.
      //  the parent's in case of colocation) so it is now passed
      //InternalDistributedMember targetPrimary = getPreferredDataStore(
      //    acceptedMembers, Collections. emptySet());
      this.prRegion.getCache().removeUnInitializedMembers(acceptedMembers);
      if (acceptedMembers.isEmpty()) {
        return;
      }
      targetPrimary = getPreferredDataStore(acceptedMembers,
          Collections. emptySet());
    }
    boolean isHosting = acceptedMembers.remove(prRegion
        .getDistributionManager().getId());
    EndBucketCreationMessage.send(acceptedMembers, targetPrimary,
        this.prRegion, bucketId);

    // Observer for testing purpose
    final EndBucketCreationObserver observer = testEndObserverInstance;
    if (observer != null) {
      observer.afterEndBucketCreationMessageSend(this.prRegion, bucketId);
    }

    if (isHosting) {
      endBucketCreationLocally(bucketId, targetPrimary);
    }

    if (observer != null) {
      observer.afterEndBucketCreation(this.prRegion, bucketId);
    }
  }
  
  private boolean isLocalPrimary(String partitionName) {
    List FPAs = this.prRegion
        .getFixedPartitionAttributesImpl();
    if (FPAs != null) {
      for (FixedPartitionAttributesImpl fpa : FPAs) {
        if (fpa.getPartitionName().equals(partitionName) && fpa.isPrimary()) {
          return true;
        }
      }
    }
    return false;
  }

  private static volatile EndBucketCreationObserver testEndObserverInstance;

  // Observer for testing purpose
  public static void setTestEndBucketCreationObserver(
      EndBucketCreationObserver observer) {
    testEndObserverInstance = observer;
  }

  /**
   * Test observer to help reproduce #42429.
   */
  public static interface EndBucketCreationObserver {

    public void afterEndBucketCreationMessageSend(PartitionedRegion pr,
        int bucketId);

    public void afterEndBucketCreation(PartitionedRegion pr, int bucketId);
  }

  public void endBucketCreationLocally(int bucketId,
      InternalDistributedMember newPrimary) {

    //Don't elect ourselves as primary or tell others to persist our ID if this member
    //has been destroyed.
    if (prRegion.getCancelCriterion().cancelInProgress() != null
        || prRegion.isDestroyed()) {
      return;
    }

    final BucketAdvisor bucketAdvisor = this.prRegion.getRegionAdvisor()
        .getBucketAdvisor(bucketId);
    final ProxyBucketRegion proxyBucketRegion = bucketAdvisor
        .getProxyBucketRegion();
    final LogWriterI18n logger = this.getLogger();
    if (logger.fineEnabled()) {
      logger.fine("endBucketCreationLocally: for region "
          + this.prRegion.getFullPath() + " bucketId=" + bucketId
          + " bucketAdvisor=" + bucketAdvisor + ", new primary: " + newPrimary);
    }

    BucketPersistenceAdvisor persistentAdvisor = proxyBucketRegion
        .getPersistenceAdvisor();

    //prevent multiple threads from ending bucket creation at the same time.
    //This fixes an issue with 41336, where multiple threads were calling endBucketCreation
    //on the persistent advisor and marking a bucket as initialized twice.
    synchronized(proxyBucketRegion) {
      if(persistentAdvisor != null) {
        BucketRegion realBucket = proxyBucketRegion.getCreatedBucketRegion();
        if(realBucket != null) {
          PersistentMemberID persistentID = realBucket.getPersistentID();
          persistentAdvisor.endBucketCreation(persistentID);
        }
      }

      //We've received an endBucketCreationMessage, but the primary
      //may not have. So now we wait for the chosen member to become
      //primary.
      bucketAdvisor.setPrimaryElector(newPrimary);
      
      if(prRegion.getGemFireCache().getMyId().equals(newPrimary)) {
        //If we're the choosen primary, volunteer for primary now
        if (bucketAdvisor.isHosting()) {
          bucketAdvisor.clearPrimaryElector();
          bucketAdvisor.volunteerForPrimary();
        }
      } else {
        //It's possible the chosen primary has already left. In 
        //that case, volunteer for primary now.
        if(!bucketAdvisor.adviseInitialized().contains(newPrimary)) {
          bucketAdvisor.clearPrimaryElector();
          bucketAdvisor.volunteerForPrimary();
        }
        
        //If the bucket has had a primary, that means the
        //chosen bucket was primary for a while. Go ahead and
        //clear the primary elector field.
        if(bucketAdvisor.getHadPrimary()) {
          bucketAdvisor.clearPrimaryElector();
          bucketAdvisor.volunteerForPrimary();
      }
    }
    }

    //send out a profile update to indicate the persistence is initialized, if needed.
    if(persistentAdvisor != null) {
      bucketAdvisor.endBucketCreation();
    }
    
    List colocatedWithList = ColocationHelper.getColocatedChildRegions(prRegion);
    for(PartitionedRegion child : colocatedWithList) {
      if(child.getRegionAdvisor().isBucketLocal(bucketId)) {
        child.getRedundancyProvider().endBucketCreationLocally(bucketId, newPrimary);
  }
    }
  }

  /** 
   * Get buddy data stores on the same Host as the accepted member
   * @return set of members on the same host, not including accepted member
   * @since gemfire59poc
   * 
   */
  private Set  getBuddyMembersInZone(
      final InternalDistributedMember acceptedMember, 
      final Set allStores) 
  {
    DM dm = this.prRegion.getDistributionManager();
    Set buddies = dm.getMembersInSameZone(acceptedMember);
    //TODO Dan - I'm not sure this retain all is necessary, but there may have been a reason we were 
    //passing this set in before.
    buddies.retainAll(allStores);
    return buddies;
  }

  /**
   * Early check for resources. This code may be executed for every put operation if
   * there are no datastores present, limit excessive logging.
   * @since gemfire5.8
   */
  private void earlySufficientStoresCheck(String partitionName) {
    assert Assert.assertHoldsLock(this,false);
    Set currentStores = getAllStores(partitionName);
    if (currentStores.isEmpty()) {
      if (shouldLogInsufficientStores()) {
        insufficientStores(currentStores, Collections.EMPTY_LIST, true);
      }
      insufficientStores(currentStores, Collections.EMPTY_LIST, false);
    }
  }

  /**
   * Limit the frequency for logging the {@link #INSUFFICIENT_STORES_MSG} message
   * to once per PR after which once every {@link #INSUFFICIENT_LOGGING_THROTTLE_TIME}
   * second
   * @return true if it's time to log
   * @since gemfire5.8
   */
  private boolean shouldLogInsufficientStores() {
    long now = NanoTimer.getTime();
    long delta = now - insufficientLogTimeStamp.get();
    if (this.firstInsufficentStoresLogged.compareAndSet(false, true) ||
        delta >= INSUFFICIENT_LOGGING_THROTTLE_TIME) {
      insufficientLogTimeStamp.set(now);
      return true;
    } else {
      return false;
    }
  }

  /**
   * Compute timeout for waiting for a bucket.  Prefer {@link #DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS}
   * over {@link PartitionedRegion#getRetryTimeout()}
   * @return the milliseconds to wait for a bucket creation operation
   */
  private long computeTimeout() {
    if (DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS != null) {
      long millis = DATASTORE_DISCOVERY_TIMEOUT_MILLISECONDS.longValue();
      if (millis > 0) {  // only positive values allowed
        return millis;
      }
    }
    return this.prRegion.getRetryTimeout();
  }

  /**
   * Check to determine that there are enough datastore VMs to start the bucket
   * creation processes.  Log a warning or throw an exception indicating
   * when there are not enough datastore VMs.
   * @param allStores All known data store instances (including local)
   * @param loggedInsufficentStores indicates whether a warning has been logged
   * @return true when a warning has been logged, false if a warning should be logged.
   */
  private boolean checkSufficientStores(final Set allStores,
      final boolean loggedInsufficentStores) {
    // Report (only once) if insufficient data store have been detected.
    if (! loggedInsufficentStores) {
      if (allStores.size() == 0) {
        insufficientStores(allStores, Collections.EMPTY_LIST, true);
        return true;
      } 
    } else {
      if (allStores.size() > 0) {
        // Excellent, sufficient resources were found!
        final StringId logStr = 
          LocalizedStrings.PRHARRedundancyProvider_0_IN_THE_PARTITIONED_REGION_REGION_NAME_1;
        final Object[] logArgs = new Object[] {SUFFICIENT_STORES_MSG.toLocalizedString(), prRegion.getFullPath()};
        if (TEST_MODE) {
          getLogger().severe(logStr, logArgs);
        } else {
          getLogger().info(logStr, logArgs);
        }
        return false;
      } else {
        // Already logged warning, there are no datastores
        insufficientStores(allStores, Collections.EMPTY_LIST, false);
        // UNREACHABLE
      }
    }
    return loggedInsufficentStores;
  }

  /**
   * Clean up locally created bucket and tell other VMs to 
   * attempt recovering redundancy
   * @param buck the bucket identifier
   */
  private void cleanUpBucket(int buck)
  {
    Set dataStores = this.prRegion.getRegionAdvisor().adviseDataStore();
    BucketBackupMessage.send(dataStores, this.prRegion,  buck);
  }
  
  public void finishIncompleteBucketCreation(int bucketId) {
    String partitionName = null;
    final long startTime = PartitionedRegionStats.startTime();
    if (this.prRegion.isFixedPartitionedRegion()) {
      FixedPartitionAttributesImpl fpa = PartitionedRegionHelper
          .getFixedPartitionAttributesForBucket(this.prRegion, bucketId);
      partitionName = fpa.getPartitionName();
    }
    createBucketAtomically(bucketId, 0, startTime, true, partitionName);
  }
  
  /**
   * Creates bucket with ID bucketId on targetNode. This method
   * will also create the bucket for all of the child colocated PRs.
   * 
   * @param bucketId
   * @param targetNMember
   * @param isRebalance true if bucket creation is directed by rebalancing
   * @param replaceOfflineData 
   * @return true if the bucket was sucessfully created
   */
  public boolean createBackupBucketOnMember(final int bucketId,
      final InternalDistributedMember targetNMember, final boolean isRebalance,
      boolean replaceOfflineData, InternalDistributedMember moveSource,
      boolean forceCreation) {
    
    if (getLogger().fineEnabled()) {
        getLogger().fine("createBackupBucketOnMember for bucketId=" 
          + this.prRegion.bucketStringForLogs(bucketId) 
          + " member: " + targetNMember);
    }

    if (! (targetNMember.equals(this.prRegion.getMyId()))) {
//      final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock;
      PartitionProfile pp = this.prRegion.getRegionAdvisor()
          .getPartitionProfile(targetNMember);
      if (pp != null) {
//        isClosingReadLock = pp.getIsClosingReadLock(
//            this.prRegion.getCancelCriterion());        
      } else {
        return false;
      }

      try {
        ManageBackupBucketMessage.NodeResponse response = ManageBackupBucketMessage
            .send(targetNMember, this.prRegion, bucketId, isRebalance,
                replaceOfflineData, moveSource, forceCreation);

        if (response.waitForAcceptance()) {
          getLogger().fine("createBackupBucketOnMember: "
              + "Bucket creation succeed for bucketId=" 
              + this.prRegion.bucketStringForLogs(bucketId)
              + " on member = " + targetNMember);

          return true;
        }
        else {
          getLogger().fine("createBackupBucketOnMember: "
              + "Bucket creation failed for bucketId=" 
              + this.prRegion.bucketStringForLogs(bucketId)
              + " on member = " + targetNMember);
          return false;
        }
      }
      catch (Throwable e) {
        Error err;
        if (e instanceof Error && SystemFailure.isJVMFailureError(
            err = (Error)e)) {
          SystemFailure.initiateFailure(err);
          // If this ever returns, rethrow the error. We're poisoned
          // now, so don't let this thread continue.
          throw err;
        }
        // Whenever you catch Error or Throwable, you must also
        // check for fatal JVM error (see above).  However, there is
        // _still_ a possibility that you are dealing with a cascading
        // error condition, so you also need to check to see if the JVM
        // is still usable:
        SystemFailure.checkFailure();
        if (e instanceof ForceReattemptException) {
          // no log needed see bug 37569
        }
        else if (e instanceof CancelException
                 || (e.getCause() != null
                     && (e.getCause() instanceof CancelException))) {
          // no need to log exceptions caused by cache closure
        }
        else {
          LogWriterI18n log = getLogger();
          if (log.warningEnabled()) {
              log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e);
          }
        }
        return false;
      } 
    }
    else {
      final PartitionedRegionDataStore prDS = this.prRegion.getDataStore();
      boolean bucketManaged = prDS!=null &&
        prDS.grabBucket(bucketId, moveSource, forceCreation,
              replaceOfflineData, isRebalance, null, false).equals(
              CreateBucketResult.CREATED);
      if (! bucketManaged) {
        getLogger().fine("createBackupBucketOnMember:"
            + " Local data store refused to accommodate the data for bucketId=" 
            + this.prRegion.bucketStringForLogs(bucketId) + " prDS=" + prDS);   
      }
      return bucketManaged;
    }
  }

  private static final ThreadLocal forceLocalPrimaries = new ThreadLocal();

  public static void setForceLocalPrimaries(boolean v) {
    forceLocalPrimaries.set(Boolean.valueOf(v));
  }

  private boolean getForceLocalPrimaries() {
    boolean result = false;
    Boolean v = (Boolean)forceLocalPrimaries.get();
    if (v != null) {
      result = v.booleanValue();
    }
    return result;
  }
  
  /**
   * Creates bucket with ID bucketId on targetNode.
   * 
   * @param bucketId
   * @param targetNMember
   * @param newBucketSize
   * @param forceCreation inform the targetMember it must attempt host the bucket,
   *  appropriately ignoring it's maximums 
   * @return a response object
   */
  public ManageBucketRsp createBucketOnMember(final int bucketId, 
      final InternalDistributedMember targetNMember,
      final int newBucketSize, boolean forceCreation)
  {
    if (getLogger().fineEnabled()) {
        getLogger().fine("createBucketOnMember for bucketId=" 
          + this.prRegion.bucketStringForLogs(bucketId) 
          + " member: " + targetNMember + (forceCreation ? " forced" : ""));
    }

    if (! (targetNMember.equals(this.prRegion.getMyId()))) {
//      final StoppableReentrantReadWriteLock.StoppableReadLock isClosingReadLock;
      PartitionProfile pp = this.prRegion.getRegionAdvisor()
          .getPartitionProfile(targetNMember);
      if (pp != null) {
//        isClosingReadLock = pp.getIsClosingReadLock(
//            this.prRegion.getCancelCriterion());        
      } else {
        return ManageBucketRsp.NO;
      }

      try {
//        isClosingReadLock.lock(); // Grab the read lock, preventing any region closures
        // on this remote Node until this bucket is fully published, forcing the closing
        // Node to recognize any pre-natal buckets.
        NodeResponse response = ManageBucketMessage.send(targetNMember, 
            this.prRegion, bucketId, newBucketSize, forceCreation);

        if (response.waitForAcceptance()) {
          getLogger().fine("createBucketOnMember: "
              + "Bucket creation succeed for bucketId=" 
              + this.prRegion.bucketStringForLogs(bucketId)
              + " on member = " + targetNMember);

//          lockList.add(isClosingReadLock);
          return ManageBucketRsp.YES;
        }
        else {
          getLogger().fine("createBucketOnMember: "
              + "Bucket creation failed for bucketId=" 
              + this.prRegion.bucketStringForLogs(bucketId)
              + " on member = " + targetNMember);
//          isClosingReadLock.unlock();
          return response.rejectedDueToInitialization()
            ? ManageBucketRsp.NO_INITIALIZING : ManageBucketRsp.NO;
        }
      } catch(PartitionOfflineException e) {
        throw e;
      }
      catch (Throwable e) {
        Error err;
        if (e instanceof Error && SystemFailure.isJVMFailureError(
            err = (Error)e)) {
          SystemFailure.initiateFailure(err);
          // If this ever returns, rethrow the error. We're poisoned
          // now, so don't let this thread continue.
          throw err;
        }
        // Whenever you catch Error or Throwable, you must also
        // check for fatal JVM error (see above).  However, there is
        // _still_ a possibility that you are dealing with a cascading
        // error condition, so you also need to check to see if the JVM
        // is still usable:
        SystemFailure.checkFailure();
        if (e instanceof CancelException
                 || (e.getCause() != null
                     && (e.getCause() instanceof CancelException))) {
          // no need to log exceptions caused by cache closure
          return ManageBucketRsp.CLOSED;
        } else if (e instanceof ForceReattemptException) {
          // no log needed see bug 37569
        }
        else {
          LogWriterI18n log = getLogger();
          if (log.warningEnabled()) {
            log.warning(LocalizedStrings.PRHARedundancyProvider_EXCEPTION_CREATING_PARTITION_ON__0, targetNMember, e);
          }
        }
//        isClosingReadLock.unlock();
        return ManageBucketRsp.NO;
      } 
    }
    else {
      final PartitionedRegionDataStore prDS = this.prRegion.getDataStore();
      boolean bucketManaged = prDS!=null &&
        prDS.handleManageBucketRequest(bucketId, newBucketSize, 
            this.prRegion.getMyId(), forceCreation);
      if (! bucketManaged) {
        getLogger().fine("createBucketOnMember:"
            + " Local data store not able to accommodate the data for bucketId=" 
            + this.prRegion.bucketStringForLogs(bucketId));   
      }
      return ManageBucketRsp.valueOf(bucketManaged);
    }
  }
  
  /**
   * Select the member with which is hosting the same bucketid for the PR it is
   * colocated with In case of primary it returns the same node whereas in case
   * of secondary it will return the least loaded datastore which is hosting the
   * bucketid.
   * 
   * @param alreadyUsed
   * @param bucketId
   * @param prName
   * @return InternalDistributedMember colocated data store
   * @since 5.8Beta
   */
  private InternalDistributedMember getColocatedDataStore(
      Collection candidates,
      Collection alreadyUsed, int bucketId,
      String prName) {
    Assert.assertTrue(prName != null); // precondition1
    PartitionedRegion colocatedRegion = ColocationHelper.getColocatedRegion(this.prRegion);
    Region prRoot = PartitionedRegionHelper.getPRRoot(prRegion
        .getCache());
    PartitionRegionConfig config = (PartitionRegionConfig)prRoot.get(prRegion
        .getRegionIdentifier());
    if (!config.isColocationComplete()) {
      throw new IllegalStateException(
          "Cannot create buckets, as colocated regions are not "
              + "configured to be at the same nodes.");
    }

    RegionAdvisor advisor = colocatedRegion.getRegionAdvisor();
    if (alreadyUsed.isEmpty()) {
      InternalDistributedMember primary = advisor.getPrimaryMemberForBucket(bucketId);
      if (!candidates.contains(primary)) {
        return null;
      }
      return primary;
    }
    Set bucketOwnersSet = advisor.getBucketOwners(bucketId);
    bucketOwnersSet.retainAll(candidates);
    ArrayList members = new ArrayList(bucketOwnersSet);    
    if(members.isEmpty()){
      return null;    
    }
    return getPreferredDataStore(members, alreadyUsed);
  }

  /**
   * Select the member with the fewest buckets, among those with the fewest
   * randomly select one.
   * 
   * Under concurrent access, the data that this method uses, may be somewhat 
   * volatile, note that createBucketAtomically synchronizes
   * to enhance the consistency of the data used in this method.
   * 
   * @param candidates  ArrayList of InternalDistributedMember, potential datastores
   * @param alreadyUsed data stores already in use
   * @return a member with the fewest buckets or null if no datastores
   */
  private InternalDistributedMember getPreferredDataStore(
      Collection candidates,
      final Collection alreadyUsed) {
    /* has a primary already been chosen? */
    final boolean forPrimary = alreadyUsed.size() == 0;

    if (forPrimary && getForceLocalPrimaries()) {
      PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
      if (myDS != null) {
        return this.prRegion.getMyId();
      }
    }

    if (candidates.size() == 1) {
      return candidates.iterator().next();
    }
    Assert.assertTrue(candidates.size() > 1);
    
    final LogWriterI18n lw = getLogger();
    
    // Convert peers to DataStoreBuckets
    ArrayList stores = this.prRegion.getRegionAdvisor()
        .adviseFilteredDataStores(new HashSet(candidates));
    
    final DM dm = this.prRegion.getDistributionManager();
    // Add ourself as a candidate, if appropriate
    InternalDistributedMember moi = dm.getId();
    PartitionedRegionDataStore myDS = this.prRegion.getDataStore();
    if (myDS != null && candidates.contains(moi)) {
      int bucketCount = myDS.getBucketsManaged();
      int priCount = myDS.getNumberOfPrimaryBucketsManaged();
      int localMaxMemory = this.prRegion.getLocalMaxMemory();
      stores.add(new DataStoreBuckets(moi, bucketCount, priCount, localMaxMemory));
    }
    if (stores.isEmpty()) {
      return null;
    }
    
    // ---------------------------------------------
    // Calculate all hosts who already have this bucket
    final HashSet existingHosts = new HashSet();
    Iterator it = alreadyUsed.iterator();
    while (it.hasNext()) {
      InternalDistributedMember mem = it.next();
      existingHosts.addAll(dm.getMembersInSameZone(mem));
    }
    
    Comparator comparator = new Comparator() {
      public int compare(DataStoreBuckets d1, DataStoreBuckets d2)
      {
        boolean host1Used = existingHosts.contains(d1.memberId);
        boolean host2Used = existingHosts.contains(d2.memberId);
        
        if (!host1Used && host2Used) {
          return -1; // host1 preferred
        }
        if (host1Used && !host2Used) {
          return 1; // host2 preferred
        }
        
        // Six eggs, half a dozen.  Look for least loaded.
        float metric1, metric2;
        if (forPrimary) {
          metric1 = d1.numPrimaries / (float) d1.localMaxMemoryMB;
          metric2 = d2.numPrimaries / (float) d2.localMaxMemoryMB;
        }
        else {
          metric1 = d1.numBuckets / (float) d1.localMaxMemoryMB;
          metric2 = d2.numBuckets / (float) d2.localMaxMemoryMB;
        }
        int result = Float.compare(metric1, metric2);
        if(result == 0) {
          //if they have the same load, choose the member with the
          //higher localMaxMemory
          result = d2.localMaxMemoryMB - d1.localMaxMemoryMB;
        }
        return result;
      } 
    };
    
    // ---------------------------------------------
    // First step is to sort datastores first by those whose hosts don't
    // hold this bucket, and then secondarily by loading.
    Collections.sort(stores, comparator);
    if (lw.fineEnabled()) {
      lw.fine(fancyFormatBucketAllocation("Sorted ", stores, existingHosts));
    }

    // ---------------------------------------------
    // Always add the first datastore and note just how good it is.
    DataStoreBuckets bestDataStore = stores.get(0);
    ArrayList bestStores = new ArrayList();
    bestStores.add(bestDataStore);
    
    final boolean allStoresInUse = alreadyUsed.contains(bestDataStore.memberId);
    
    // ---------------------------------------------
    // Collect all of the other hosts in this sorted list that are as good
    // as the very first one.
    for (int i = 1; i < stores.size(); i ++) {
      DataStoreBuckets aDataStore = stores.get(i);
      if (!allStoresInUse && 
          alreadyUsed.contains(aDataStore.memberId)) {
        // Only choose between the ones not in use.
        break;
      }
      
      if (comparator.compare(bestDataStore, aDataStore) != 0) {
        break;
      }
      bestStores.add(aDataStore);
    }
    if (lw.fineEnabled()) {
      lw.fine(fancyFormatBucketAllocation("Best Stores ", bestStores, 
          existingHosts));
    }

    // ---------------------------------------------
    int chosen;
    if(DISABLE_CREATE_BUCKET_RANDOMNESS) {
      chosen = 0;
    }
    else {
      // Pick one (at random)
      chosen = PartitionedRegion.rand.nextInt(bestStores.size());
    }
    DataStoreBuckets aDataStore =  bestStores.get(chosen);
    return aDataStore.memberId;
  }
  
  /**
   * Adds a membership listener to watch for member departures,
   * and schedules a task to recover redundancy of existing buckets 
   */
  public void startRedundancyRecovery() {
    prRegion.getRegionAdvisor().addMembershipListener(new PRMembershipListener());
    scheduleRedundancyRecovery(null);
  }

  /**
   * Log bucket allocation in the log files in this format:
   *    * member1: +5/20
   * member2: -10/5
   * 
   * After the member name, the +/- indicates whether or not this bucket is
   * already hosted on the given member.  This is followed by the number of
   * hosted primaries followed by the number of hosted non-primary buckets.
   * 
   * @param prefix first part of message to print
   * @param dataStores list of stores
   * @param existingStores to mark those already in use
   */
  private String fancyFormatBucketAllocation(String prefix, List dataStores,
      Set existingStores) {
    StringBuilder logStr = new StringBuilder();
    if (prefix != null) {
      logStr.append(prefix);  
    }
    logStr.append("Bucket Allocation for prId=" + 
        this.prRegion.getPRId() + ":\n");
    for (Iterator i = dataStores.iterator(); i.hasNext(); ) {
      DataStoreBuckets dsb = (DataStoreBuckets)i.next();
      logStr.append(dsb.memberId).append(": ");
      if (existingStores.contains(dsb.memberId)) {
        logStr.append("+");
      }
      else {
        logStr.append("-");
      }
      logStr.append(Integer.toString(dsb.numPrimaries));
      logStr.append("/");
      logStr.append(Integer.toString(dsb.numBuckets - dsb.numPrimaries));
//      for (int j = 0; j < dsb.numPrimaries; j++) {
//        logStr.append('#');
//      }
//      int nonPrimary = dsb.numBuckets - dsb.numPrimaries;
//      for (int j = 0; j < nonPrimary; j++) {
//        logStr.append('*');
//      }
      logStr.append('\n');
    }
    return logStr.toString();
  }

  public static class DataStoreBuckets  {
    public final InternalDistributedMember memberId;
    public final int numBuckets;
    public final int numPrimaries;
    private final int localMaxMemoryMB;
    
    public DataStoreBuckets(InternalDistributedMember mem, int buckets, 
        int primaryBuckets, int localMaxMemory) {
      this.memberId = mem;
      this.numBuckets = buckets;
      this.numPrimaries = primaryBuckets;
      this.localMaxMemoryMB = localMaxMemory;
    }
    
    @Override
    public boolean equals(Object obj)
    {
      if ((obj == null) || !(obj instanceof DataStoreBuckets)) {
        return false;
      }
      DataStoreBuckets other = (DataStoreBuckets) obj;
      return this.numBuckets == other.numBuckets &&
        this.memberId.equals(other.memberId);
    }

    @Override
    public int hashCode()
    {
      return this.memberId.hashCode(); 
    }

    @Override
    public String toString()
    {
      return "DataStoreBuckets memberId=" + this.memberId
        + "; numBuckets=" + this.numBuckets + "; numPrimaries=" + this.numPrimaries;
    }
  }
  
  /**
   * Verifies the members and removes the members that are either not present in the
   * DistributedSystem or are no longer part of the PartitionedRegion 
   * (close/localDestroy has been performed.) .
   * 
   * @param members collection of members to scan and modify
   */
  void verifyBucketNodes(Collection members, String partitionName) {
    if (members == null || members.isEmpty()) {
      return;
    }
 
    // Revisit region advisor, get current bucket stores.
    final Set availableMembers = getAllStores(partitionName);
    
//  boolean debugAnyRemoved = false;
    for (Iterator itr = members.iterator(); itr.hasNext();) {
      InternalDistributedMember node = itr.next();
      if ( ! availableMembers.contains(node)) {
        if (getLogger().fineEnabled()) {
          getLogger().fine(
              "verifyBucketNodes: removing member " + node);
//          debugAnyRemoved = true;
        }
        itr.remove();
        Assert.assertTrue(!members.contains(node), 
            "return value does not contain " + node);
      }
    } // for
//    if (debugAnyRemoved) {
//      this.prRegion.getRegionAdvisor().dumpProfiles(getLogger(), "DEBUG verifyBucketNodes removed a profile from the 'accepted' list");
//      Set members = ((InternalDistributedSystem)this.prRegion.getCache().getDistributedSystem())
//        .getDistributionManager().getDistributionManagerIds();
//      getLogger().info("Distributed members in view: " + PartitionedRegionHelper.printCollection(members));
//    }
  }
  
  /**
   * Schedule a task to perform redundancy recovery for a new node or for
   * the node departed.
   */
  public void scheduleRedundancyRecovery(Object failedMemId) {

    final boolean isStartup = (failedMemId == null);
    final LogWriterI18n logger = getLogger();
    final GemFireCacheImpl cache = this.prRegion.getCache();
    final int redundantCopies = PRHARedundancyProvider.this.prRegion.getRedundantCopies();
    final long delay;
    final boolean movePrimaries; 

    if (isStartup) {
      delay = this.prRegion.getPartitionAttributes().getStartupRecoveryDelay();
      movePrimaries = !Boolean.getBoolean("gemfire.DISABLE_MOVE_PRIMARIES_ON_STARTUP");
    } else {
      delay = this.prRegion.getPartitionAttributes().getRecoveryDelay();
      movePrimaries = false;
    }
    final boolean requiresRedundancyRecovery = delay >= 0 && redundantCopies > 0;
    
    if(!requiresRedundancyRecovery) {
      return;
    }
    if (!PRHARedundancyProvider.this.prRegion.isDataStore()) {
      return;
    }
    if (cache.isUnInitializedMember(cache.getMyId())) {
      return;
    }
    Runnable task = new RecoveryRunnable(this) {
      @Override
      public void run2()
      {
        try {
          final boolean isFixedPartitionedRegion 
          = PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion();
          final PartitionedRegionRebalanceOp rebalance;
          
          //Fix for 43582 - always replace offline data for fixed partitioned
          //regions - this guarantees we create the buckets we are supposed to
          //create on this node.
          boolean replaceOfflineData = isFixedPartitionedRegion || !isStartup;
            rebalance = new PartitionedRegionRebalanceOp(
                PRHARedundancyProvider.this.prRegion, false, true, false, 
                    movePrimaries, replaceOfflineData,false);

          
          long start = PRHARedundancyProvider.this.prRegion.getPrStats()
              .startRecovery();
          
          if (isFixedPartitionedRegion) {
            rebalance.executeFPA();
          } else {
            rebalance.execute();
          }
          
          PRHARedundancyProvider.this.prRegion.getPrStats().endRecovery(start);
          PRHARedundancyProvider.this.recoveryFuture = null;
        } catch(CancelException e) {
          logger.fine("Cache closed while recovery in progress");
        } catch(RegionDestroyedException e) {
          logger.fine("Region destroyed while recovery in progress");
        }  catch (Exception e) {
          logger.error(LocalizedStrings.PRHARedundancyProvider_UNEXPECTED_EXCEPTION_DURING_BUCKET_RECOVERY, e);
        }
      }
    };
    
    synchronized (this.shutdownLock) { // possible fix for bug 41094
      if (!this.shutdown) {
        try {
          if(logger.fineEnabled()) {
            if (isStartup) {
              logger.fine(this.prRegion + " scheduling redundancy recovery in "
                  + delay + " ms");
            } else {
              logger
                  .fine(prRegion
                      + " scheduling redundancy recovery after departure/crash/error in "
                      + failedMemId + " in " + delay + " ms");
            }
          }
          recoveryFuture = this.recoveryExecutor.schedule(task, delay, TimeUnit.MILLISECONDS);
        } catch(RejectedExecutionException e) {
          //ok, the executor is shutting down.
        }
      }
    }
  }
  
  public boolean isRedundancyImpaired() {
    int numBuckets = this.prRegion.getPartitionAttributes().getTotalNumBuckets();
    int targetRedundancy = this.prRegion.getPartitionAttributes().getRedundantCopies();
    
    for (int i =0; i < numBuckets; i++) {
      int redundancy = this.prRegion.getRegionAdvisor().getBucketRedundancy(i);
      if (redundancy < targetRedundancy && redundancy != -1 
          || redundancy > targetRedundancy) {
        return true;
      }
    }
    
    return false;
  }

  public boolean recoverPersistentBuckets() {
    
    /**
     * To handle a case where ParallelGatewaySender is persistent but userPR is not
     * First recover the GatewaySender buckets for ParallelGatewaySender
     * irrespective of whether colocation is complete or not.
     */
    PartitionedRegion leaderRegion = ColocationHelper.getLeaderRegion(this.prRegion);

    if(getLogger().fineEnabled()) {
      getLogger().fine(
          "recoverPersistentBuckets for " + this.prRegion.getFullPath()
              + " isShadowPR " + this.prRegion.isShadowPR() + " I am persistent : " + this.prRegion.getDataPolicy().withPersistence()
              + " leaderRegion "
              + leaderRegion + " leaderRegion is persistent: " + leaderRegion.getDataPolicy().withPersistence());
    }

    //Check if the leader region or some child shadow PR region is persistent
    //and return the first persistent region found
    PartitionedRegion persistentLeader = getPersistentLeader();
    
    //If there is no persistent region in the colocation chain, no need to recover.
    if(persistentLeader == null) {
      return true;
    }
    
    if (!ColocationHelper.checkMembersColocation(leaderRegion, 
        leaderRegion.getDistributionManager().getDistributionManagerId())) {
      if(getLogger().fineEnabled()) {
        getLogger().fine(
            "Skipping persistent recovery of " + prRegion
                + " because colocation is not complete for " + leaderRegion);
      }
      return false;
    }

//TODO prpersist - It would make sense to hold the lock here in some cases
//to prevent confusing members that are trying to rebalance. BUT, these persistent regions
//need to wait for other members to recover during initialization.
//    RecoveryLock lock = leaderRegion.getRecoveryLock();
//    lock.lock();
//    try {
      final ProxyBucketRegion[] proxyBucketArray = persistentLeader.getRegionAdvisor().getProxyBucketArray();
      
      for(ProxyBucketRegion proxyBucket : proxyBucketArray) {
        proxyBucket.initializePersistenceAdvisor();
      }
      Set peers = this.prRegion.getRegionAdvisor().adviseGeneric();
      
    //TODO prpersist - Ok, this is super lame. We need to make sure here that we don't run into this race condition
      //1) We get a membership view from member A
      //2) Member B removes itself, and distributes to us and A. We don't remove B
      //3) We apply the membership view from A, which includes B.
      //That will add B back into the set.
      //This state flush will make sure that any membership changes
      //That are in progress on the peers are finished.
      MembershipFlushRequest.send(peers, this.prRegion.getDistributionManager(), this.prRegion.getFullPath());
      
    
      ArrayList bucketsNotHostedLocally 
        = new ArrayList(proxyBucketArray.length);
      ArrayList bucketsHostedLocally 
        = new ArrayList(proxyBucketArray.length);

    /*
     * Start the redundancy logger before recovering any proxy buckets.
     */
      allBucketsRecoveredFromDisk = new CountDownLatch(proxyBucketArray.length);
      try {
        if(proxyBucketArray.length > 0) {
          this.redundancyLogger = new RedundancyLogger(this);
          Thread loggingThread = new Thread(this.redundancyLogger,"RedundancyLogger for region " + this.prRegion.getName());
          loggingThread.start();
        }    
      } catch(RuntimeException e) {
        allBucketsRecoveredFromDisk = null;
        throw e;
      }
    
      /*
       * Spawn a separate thread for bucket that we previously hosted
       * to recover that bucket.
       * 
       * That thread will get to the point at which it has determined that
       * at least one member (possibly the local member) has fully initialized 
       * the bucket, at which it will count down the someMemberRecoveredLatch
       * latch on the bucket.
       * 
       * Once at least one copy of each bucket has been created in the distributed
       * system, the initPRInternals method will exit. Some of the threads
       * spawned here will still be doing GII's in the background. This
       * allows the system to become usable as fast as possible.
       * 
       * If we used a bounded thread pool here, we end up waiting for
       * some buckets to finish there GII before returning from initPRInternals.
       * In the future maybe we could let the create bucket return and pass
       * the GII task to a separate thread pool.
       * 
       */
      for(final ProxyBucketRegion proxyBucket : proxyBucketArray) {
        if(proxyBucket.getPersistenceAdvisor().wasHosting()) {
          final RecoveryRunnable recoveryRunnable = new RecoveryRunnable(this) {
            
            
            @Override
            public void run() {
              //Fix for 44551 - make sure that we always count down
              //this latch, even if the region was destroyed.
              try {
                super.run();
              } finally {
                allBucketsRecoveredFromDisk.countDown();
              }
            }

            @Override
            public void run2() {
                proxyBucket.recoverFromDiskRecursively();
            }
          };
          Thread recoveryThread = new Thread(recoveryRunnable, "Recovery thread for bucket " + proxyBucket.getName());
          recoveryThread.start();
          bucketsHostedLocally.add(proxyBucket);
        } else {
          bucketsNotHostedLocally.add(proxyBucket);
        }
      }
      
      try {
        //Partial fix for 44045, try to recover the local
        //buckets before the proxy buckets. This will allow us
        //to detect any ConflictingDataException before the proxy
        //buckets update their membership view.
        for(final ProxyBucketRegion proxyBucket : bucketsHostedLocally) {
          proxyBucket.waitForPrimaryPersistentRecovery();
        }
        for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) {
          proxyBucket.recoverFromDiskRecursively();
        }
      } finally {
        for(final ProxyBucketRegion proxyBucket : bucketsNotHostedLocally) {
          allBucketsRecoveredFromDisk.countDown();
        }
      }
      
      return true;      
//    } finally {
//      lock.unlock();
//    }
  }

  /**
   * Check to see if any colocated region of the current region is persistent.
   * It's not enough to check just the leader region, because a child region might
   * be a persistent parallel WAN queue, which is allowed.
   * 
   * @return the most senior region in the colocation chain (closest to the leader)
   * that is persistent.
   */
  protected PartitionedRegion getPersistentLeader() {
    PartitionedRegion leader = ColocationHelper.getLeaderRegion(this.prRegion);
    
    return findPersistentRegionRecursively(leader);
  }
  
  private PartitionedRegion findPersistentRegionRecursively(
      PartitionedRegion pr) {
    if(pr.getDataPolicy().withPersistence()) {
      return pr;
    }
    for(PartitionedRegion child : ColocationHelper.getColocatedChildRegions(pr)) {
      PartitionedRegion leader = findPersistentRegionRecursively(child);
      if(leader != null) {
        return leader;
      }
    }
    return null;
  }

  public void scheduleCreateMissingBuckets() {
    if (this.prRegion.getColocatedWith() != null
        && ColocationHelper
            .isColocationComplete(this.prRegion)) {
      Runnable task = new CreateMissingBucketsTask(this);
      final InternalResourceManager resourceManager = this.prRegion
          .getGemFireCache().getResourceManager();
      resourceManager.getExecutor().submit(task);
    }
  }
  
  public void shutdown() {
    synchronized (this.shutdownLock) { // possible fix for bug 41094
      this.shutdown = true;
      ScheduledFuture recoveryFuture = this.recoveryFuture;
      if (recoveryFuture != null) {
        recoveryFuture.cancel(false/*mayInterruptIfRunning*/);
        this.recoveryExecutor.purge();
      }
    }
  }

  /**
   * Creates and fills in a PartitionMemberDetails for the partitioned region.
   * 
   * @param internal true if internal-only details should be included
   * @param loadProbe the LoadProbe to use
   * @return PartitionRegionInfo for the partitioned region
   */
  public InternalPRInfo buildPartitionedRegionInfo(
      final boolean internal, 
      final LoadProbe loadProbe) {
    
    final PartitionedRegion pr = this.prRegion;
    
    if (pr == null) {
      return null;
    }
    
    PartitionedRegionStats prStats = pr.getPrStats();
    
    int configuredBucketCount = pr.getTotalNumberOfBuckets();
    int createdBucketCount = pr.getRegionAdvisor().getCreatedBucketsCount();
    int lowRedundancyBucketCount = prStats.getLowRedundancyBucketCount();
    int configuredRedundantCopies = pr.getRedundantCopies();
    int actualRedundantCopies = prStats.getActualRedundantCopies();
    
    final PartitionedRegionDataStore ds = pr.getDataStore();
    
    Set datastores = 
      pr.getRegionAdvisor().adviseDataStore();
    
    //int size = datastores.size() + (ds == null ? 0 : 1);
    
    Set memberDetails = 
        new TreeSet();
    
    OfflineMemberDetails offlineMembers = null;
    boolean fetchOfflineMembers = false;
    if (ds != null) {
      memberDetails.add(buildPartitionMemberDetails(internal, loadProbe));
      offlineMembers = fetchOfflineMembers();
    } else {
      fetchOfflineMembers = true;
    }

    // Get remote results
    if (!datastores.isEmpty()) {
      FetchPartitionDetailsResponse response = 
          FetchPartitionDetailsMessage.send(datastores, pr, internal, fetchOfflineMembers, loadProbe);
      memberDetails.addAll(response.waitForResponse());
      if(fetchOfflineMembers) {
        offlineMembers = response.getOfflineMembers();
      }
    }
    
    String colocatedWithPath = pr.getColocatedWith();
    
    
    
    InternalPRInfo details = new PartitionRegionInfoImpl(
        pr.getFullPath(),
        configuredBucketCount,
        createdBucketCount,
        lowRedundancyBucketCount,
        configuredRedundantCopies,
        actualRedundantCopies,
        memberDetails,
        colocatedWithPath,
        offlineMembers);
    
    return details;
  }

  /**
   * Retrieve the set of members which are currently offline
   * for all buckets.
   */
  public OfflineMemberDetailsImpl fetchOfflineMembers() {
    ProxyBucketRegion[] proxyBuckets = prRegion.getRegionAdvisor().getProxyBucketArray();
    Set[] offlineMembers = new Set[proxyBuckets.length]; 
    for(int i =0; i < proxyBuckets.length; i++) {
      ProxyBucketRegion proxy = proxyBuckets[i];
      if(this.prRegion.getDataPolicy().withPersistence()) {
        Set persistedMembers = proxy.getPersistenceAdvisor().getMissingMembers();
        if(persistedMembers == null) {
          persistedMembers = Collections.emptySet();
        }
        offlineMembers[i] = persistedMembers;
      } else {
        offlineMembers[i] = Collections.emptySet();
      }
    }
    return new OfflineMemberDetailsImpl(offlineMembers);
  }

  /**
   * Creates and fills in a PartitionMemberDetails for the local member.
   * 
   * @param internal true if internal-only details should be included
   * @param loadProbe the LoadProbe to use
   * @return PartitionMemberDetails for the local member
   */
  public InternalPartitionDetails buildPartitionMemberDetails(
      final boolean internal, 
      final LoadProbe loadProbe) {

    final PartitionedRegion pr = this.prRegion;
    
    PartitionedRegionDataStore ds = pr.getDataStore();
    if (ds == null) {
      return null;
    }
    
    InternalPartitionDetails localDetails = null;
    
    long size = 0;
    InternalDistributedMember localMember = pr.getMyId();
    
    int configuredBucketCount = pr.getTotalNumberOfBuckets();
    long[] bucketSizes = new long[configuredBucketCount];
    // key: bid, value: size
    Map bucketSizeMap = ds.getSizeLocally();
    for (Iterator> iter = 
      bucketSizeMap.entrySet().iterator(); iter.hasNext();) {
      Map.Entry me = iter.next();
      int bid = me.getKey().intValue();
      long bucketSize = ds.getBucketSize(bid);
      bucketSizes[bid] = bucketSize;
      size += bucketSize;
    }
    
    if (internal) {
      waitForPersistentBucketRecoveryOrClose();
     
      
      PRLoad prLoad = loadProbe.getLoad(pr);
      localDetails = new PartitionMemberInfoImpl(
          localMember,
          pr.getLocalMaxMemory() * (1024L * 1024L),
          size,
          ds.getBucketsManaged(),
          ds.getNumberOfPrimaryBucketsManaged(),
          prLoad,
          bucketSizes);
    }
    else {
      localDetails = new PartitionMemberInfoImpl(
          localMember,
          pr.getLocalMaxMemory() * (1024L * 1024L),
          size,
          ds.getBucketsManaged(),
          ds.getNumberOfPrimaryBucketsManaged());
    }
    return localDetails;
  }
  
  /**
   * Wait for all persistent buckets to be recovered from disk,
   * or for the region to be closed, whichever happens first.
   */
  protected void waitForPersistentBucketRecoveryOrClose() {
    CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk;
    if(recoveryLatch != null) {
      boolean interrupted =  false;
      while (true) {
        try {
          this.prRegion.getCancelCriterion().checkCancelInProgress(null);
          boolean done = recoveryLatch.await(
              PartitionedRegionHelper.DEFAULT_WAIT_PER_RETRY_ITERATION,
              TimeUnit.MILLISECONDS);
          if (done) {
            break;
          }
        } catch(InterruptedException e) {
          interrupted = true;
        }
      }
      if(interrupted) {
        Thread.currentThread().interrupt();
      }
    }
  }
  
  /**
   * Wait for all persistent buckets to be recovered from disk,
   * regardless of whether the region is currently being closed.
   */
  protected void waitForPersistentBucketRecovery() {
    CountDownLatch recoveryLatch = allBucketsRecoveredFromDisk;
    if(recoveryLatch != null) {
      boolean interrupted =  false;
      while (true) {
        try {
          recoveryLatch.await();
          break;
        } catch(InterruptedException e) {
          interrupted = true;
        }
      }
      if(interrupted) {
        Thread.currentThread().interrupt();
      }
    }
  }

  private static class ManageBucketRsp  {
    final static ManageBucketRsp NO = new ManageBucketRsp("NO");
    final static ManageBucketRsp YES = new ManageBucketRsp("YES");
    final static ManageBucketRsp NO_INITIALIZING = 
        new ManageBucketRsp("NO_INITIALIZING");
    public static final ManageBucketRsp CLOSED = new ManageBucketRsp("CLOSED");

    private final String name;

    private ManageBucketRsp(String name) {
      this.name = name;
    }
    
    boolean isRejection() {
      return this == NO || this == NO_INITIALIZING || this==CLOSED;
    }
    
    boolean isAcceptance() {
      return this == YES;
    }
    
    boolean isInitializing() {
      return this == NO_INITIALIZING;
    }
    
    @Override
    public String toString() {
      return "ManageBucketRsp(" + this.name + ")";
    }
    
    /** return YES if the argument is true, NO if not */
    static ManageBucketRsp valueOf( boolean managed ) {
      return managed? YES : NO;
    }
  }
  
  static private class BucketMembershipObserverResults {
    final boolean problematicDeparture;
    final InternalDistributedMember primary;
    BucketMembershipObserverResults(boolean re, InternalDistributedMember p) {
      problematicDeparture = re;
      primary = p;
    }
    
    @Override
    public String toString() {
      return "pDepart:"+problematicDeparture+" primary:"+primary;
    }
  }
  
  /**
   * Monitors distributed membership for a given bucket
   * @author mthomas
   *
   */
  private class BucketMembershipObserver implements MembershipListener {
    final Bucket bucketToMonitor;
    final AI arrivals = CFactory.createAI(0);
    final AB departures = CFactory.createAB(false);
   
    public BucketMembershipObserver(Bucket b) {
      this.bucketToMonitor = b;
    }

    public BucketMembershipObserver beginMonitoring() {
      int profilesPresent =  
        this.bucketToMonitor.getBucketAdvisor()
        .addMembershipListenerAndAdviseGeneric(this).size();
      arrivals.addAndGet(profilesPresent);
      return this;
    }
    
    public void stopMonitoring() {
      this.bucketToMonitor.getBucketAdvisor().removeMembershipListener(this);      
    }

    public void memberJoined(InternalDistributedMember id) {
      if (getLogger().fineEnabled()) {
        getLogger().fine("Observer for bucket " + this.bucketToMonitor 
            + " member joined " + id);
      }
      synchronized(this) {
        // TODO manipulate failedNodes and verifiedNodeList directly
        arrivals.addAndGet(1);
        notify();
      }
    }

    public void memberSuspect(InternalDistributedMember id,
        InternalDistributedMember whoSuspected) {
    }
    
    public void memberDeparted(InternalDistributedMember id, boolean crashed) {
      if (getLogger().fineEnabled()) {
        getLogger().fine("Observer for bucket " + this.bucketToMonitor 
            + " member departed " + id);
      }
      synchronized(this) {
        // TODO manipulate failedNodes and verifiedNodeList directly
        departures.getAndSet(true);
        notify();
      }
    }

    /**
     * Wait for expected number of owners to be recognized.  When the expected
     * number have been seen, then fetch the primary and report it.  If while
     * waiting for the owners to be recognized there is a departure which compromises
     * redundancy
     * @param expectedCount the number of bucket owners to wait for
     * @param expectedOwners the list of owners used when a departure is detected
     * @return if no problematic departures are detected, the primary 
     * @throws InterruptedException
     */
    public BucketMembershipObserverResults waitForOwnersGetPrimary(
        final int expectedCount, final Collection expectedOwners, String partitionName)
    throws InterruptedException  {
      boolean problematicDeparture = false;
      synchronized(this) {
        for (;;) {
          this.bucketToMonitor.getCancelCriterion().checkCancelInProgress(null);

          // If any departures, need to rethink much...
          boolean oldDepartures = departures.get();
          if (oldDepartures) {
            verifyBucketNodes(expectedOwners, partitionName);
            if ( expectedOwners.isEmpty() ) {
              problematicDeparture = true; // need to pick new victims
            }
            //  reselect = true; // need to pick new victims
            arrivals.set(expectedOwners.size());
            departures.set(false);    
            if(problematicDeparture) {
              if (getLogger().fineEnabled()) {
                getLogger().fine("Bucket observer found departed members - retrying");
              }
            }
            break;
          }

          // Look for success...
          int oldArrivals = arrivals.get();
          if (oldArrivals >= expectedCount) {
            // success!
            break;
          }
          if (getLogger().fineEnabled()) {
            getLogger().fine("Waiting for bucket " + 
                prRegion.bucketStringForLogs(this.bucketToMonitor.getId()) + 
                    " to finish being created");
          }

          prRegion.checkReadiness();
          final int creationWaitMillis = 5 * 1000;
          wait(creationWaitMillis);

          if (oldArrivals == arrivals.get() && 
              oldDepartures == departures.get()) {
            getLogger().warning(
                LocalizedStrings.PRHARedundancyProvider_TIME_OUT_WAITING_0_MS_FOR_CREATION_OF_BUCKET_FOR_PARTITIONED_REGION_1_MEMBERS_REQUESTED_TO_CREATE_THE_BUCKET_ARE_2,
                new Object[] {Integer.valueOf(creationWaitMillis), prRegion.getFullPath(), expectedOwners});
          }
        } // for (;;)
      } // synchronized
      if (problematicDeparture) {
        return new BucketMembershipObserverResults(true, null);
      } 
      InternalDistributedMember primmy = bucketToMonitor.getBucketAdvisor().getPrimary();
      if(primmy==null) {
        /*
         * Handle a race where nobody has the bucket. We can't return a null member here because we haven't created the bucket, need to let
         * the higher level code loop. 
         */
        return new BucketMembershipObserverResults(true, null);
      } else {
        return new BucketMembershipObserverResults(false, 
            primmy);
      }
    }

    @Override
    public void quorumLost(Set failures,
        List remaining) {
    }
  }
  
  /**
   * This class extends MembershipListener to perform cleanup when a node leaves
   * DistributedSystem.
   * 
   */
  protected class PRMembershipListener implements MembershipListener {
    public void memberDeparted(final InternalDistributedMember id,
        final boolean crashed)
    {
      try {
        DistributedMember dmem = prRegion.getSystem().getDistributedMember();
        if(getLogger().fineEnabled()) {
          getLogger().fine(
              "MembershipListener invoked on DistributedMember = " + dmem
              + " for failed memberId = " + id);
        }

        if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() &&
            ! dmem.equals(id)) {

          Runnable postRecoveryTask = null;

          //Only schedule redundancy recovery if this not a fixed PR.
          if (!PRHARedundancyProvider.this.prRegion.isFixedPartitionedRegion()) {
            postRecoveryTask = new Runnable() {
              public void run() {
                //After the metadata has been cleaned, recover redundancy.
                scheduleRedundancyRecovery(id);
              }
            };
          }
          //Schedule clean up the metadata for the failed member.
          PartitionedRegionHelper.cleanUpMetaDataForRegion(prRegion.getCache(),
              prRegion.getRegionIdentifier(), id, postRecoveryTask);
        }
      } catch(CancelException e) {
        //ignore
      }
    }
    
    public void memberSuspect(InternalDistributedMember id,
        InternalDistributedMember whoSuspected) {
    }
    
    public void memberJoined(InternalDistributedMember id)
    {
      // no action required
    }

    public void quorumLost(Set failures, List remaining) {
    }
  }
  
  /**
   * This class extends MembershipListener to start redundancy recovery
   * when a persistent member is revoked
   * 
   */
  protected class PRPersistenceListener extends PersistentStateListener.PersistentStateAdapter {
    
    //TODO prpersist It seems like this might trigger recovery too often. For example, a rebalance
    //can end up removing a bucket, which would trigger recovery here. We really need to only
    //trigger this thing when a PR region is destroyed. And isn't that code already in there?
    @Override
    public void memberRemoved(PersistentMemberID persistentID, boolean revoked) {
      if(!revoked) {
        return;
      }
      
      DistributedMember dmem = prRegion.getSystem().getDistributedMember();
      if(getLogger().fineEnabled()) {
        getLogger().fine(
            "Persistent Membership Listener invoked on DistributedMember = " + dmem
            + " for removed memberId = " + persistentID);
      }
      
      if (! prRegion.isCacheClosing() && !prRegion.isDestroyed() && !prRegion.isFixedPartitionedRegion()) {
        scheduleRedundancyRecovery(persistentID);
      }
    }
  }

  public CountDownLatch getAllBucketsRecoveredFromDiskLatch() {
    return allBucketsRecoveredFromDisk;
  }
}