org.apache.geode.internal.cache.wan.parallel.ParallelGatewaySenderQueue Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of geode-core Show documentation
Show all versions of geode-core Show documentation
Apache Geode provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing
/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.internal.cache.wan.parallel;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import org.apache.logging.log4j.Logger;
import org.apache.geode.CancelException;
import org.apache.geode.SystemFailure;
import org.apache.geode.cache.AttributesFactory;
import org.apache.geode.cache.AttributesMutator;
import org.apache.geode.cache.Cache;
import org.apache.geode.cache.CacheException;
import org.apache.geode.cache.CacheListener;
import org.apache.geode.cache.DataPolicy;
import org.apache.geode.cache.EntryNotFoundException;
import org.apache.geode.cache.EvictionAction;
import org.apache.geode.cache.EvictionAttributes;
import org.apache.geode.cache.PartitionAttributesFactory;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.RegionAttributes;
import org.apache.geode.cache.RegionDestroyedException;
import org.apache.geode.cache.asyncqueue.internal.AsyncEventQueueImpl;
import org.apache.geode.distributed.internal.DM;
import org.apache.geode.distributed.internal.InternalDistributedSystem;
import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
import org.apache.geode.internal.Version;
import org.apache.geode.internal.cache.AbstractBucketRegionQueue;
import org.apache.geode.internal.cache.BucketNotFoundException;
import org.apache.geode.internal.cache.BucketRegion;
import org.apache.geode.internal.cache.BucketRegionQueue;
import org.apache.geode.internal.cache.ColocationHelper;
import org.apache.geode.internal.cache.Conflatable;
import org.apache.geode.internal.cache.DiskRegionStats;
import org.apache.geode.internal.cache.DistributedRegion;
import org.apache.geode.internal.cache.EntryEventImpl;
import org.apache.geode.internal.cache.ForceReattemptException;
import org.apache.geode.internal.cache.GemFireCacheImpl;
import org.apache.geode.internal.cache.InternalRegionArguments;
import org.apache.geode.internal.cache.LocalRegion;
import org.apache.geode.internal.cache.PartitionedRegion;
import org.apache.geode.internal.cache.PartitionedRegionDataStore;
import org.apache.geode.internal.cache.PartitionedRegionHelper;
import org.apache.geode.internal.cache.PrimaryBucketException;
import org.apache.geode.internal.cache.RegionQueue;
import org.apache.geode.internal.cache.wan.AbstractGatewaySender;
import org.apache.geode.internal.cache.wan.AsyncEventQueueConfigurationException;
import org.apache.geode.internal.cache.wan.GatewaySenderConfigurationException;
import org.apache.geode.internal.cache.wan.GatewaySenderEventImpl;
import org.apache.geode.internal.cache.wan.GatewaySenderException;
import org.apache.geode.internal.cache.wan.GatewaySenderStats;
import org.apache.geode.internal.cache.wan.parallel.ParallelQueueBatchRemovalMessage.ParallelQueueBatchRemovalResponse;
import org.apache.geode.internal.i18n.LocalizedStrings;
import org.apache.geode.internal.logging.LogService;
import org.apache.geode.internal.logging.LoggingThreadGroup;
import org.apache.geode.internal.logging.log4j.LocalizedMessage;
import org.apache.geode.internal.size.SingleObjectSizer;
import org.apache.geode.internal.util.concurrent.StoppableCondition;
import org.apache.geode.internal.util.concurrent.StoppableReentrantLock;
public class ParallelGatewaySenderQueue implements RegionQueue {
protected static final Logger logger = LogService.getLogger();
protected final Map userRegionNameToshadowPRMap =
new ConcurrentHashMap();
// >>
private final Map regionToDispatchedKeysMap = new ConcurrentHashMap();
protected final StoppableReentrantLock buckToDispatchLock;
private final StoppableCondition regionToDispatchedKeysMapEmpty;
protected final StoppableReentrantLock queueEmptyLock;
private volatile boolean isQueueEmpty = true;
/**
* False signal is fine on this condition. As processor will loop again and find out if it was a
* false signal. However, make sure that whatever scenario can cause an entry to be peeked shoudld
* signal the processor to unblock.
*/
private StoppableCondition queueEmptyCondition;
protected final GatewaySenderStats stats;
protected volatile boolean resetLastPeeked = false;
/**
* There will be one shadow pr for each of the the PartitionedRegion which has added the
* GatewaySender Fix for Bug#45917 We maintain a tempQueue to queue events when buckets are not
* available locally.
*/
private final ConcurrentMap> bucketToTempQueueMap =
new ConcurrentHashMap>();
/**
* The default frequency (in milliseconds) at which a message will be sent by the primary to all
* the secondary nodes to remove the events which have already been dispatched from the queue.
*/
public static final int DEFAULT_MESSAGE_SYNC_INTERVAL = 10;
// TODO:REF: how to change the message sync interval ? should it be common for serial and parallel
protected static volatile int messageSyncInterval = DEFAULT_MESSAGE_SYNC_INTERVAL;
// TODO:REF: name change for thread, as it appears in the log
private BatchRemovalThread removalThread = null;
protected BlockingQueue peekedEvents =
new LinkedBlockingQueue();
/**
* The peekedEventsProcessing queue is used when the batch size is reduced due to a
* MessageTooLargeException
*/
private BlockingQueue peekedEventsProcessing =
new LinkedBlockingQueue();
/**
* The peekedEventsProcessingInProgress boolean denotes that processing existing peeked events is
* in progress
*/
private boolean peekedEventsProcessingInProgress = false;
public final AbstractGatewaySender sender;
public static final int WAIT_CYCLE_SHADOW_BUCKET_LOAD = 10;
public static final String QSTRING = "_PARALLEL_GATEWAY_SENDER_QUEUE";
/**
* Fixed size Thread pool for conflating the events in the queue. The size of the thread pool is
* set to the number of processors available to the JVM. There will be one thread pool per
* ParallelGatewaySender on a node.
*/
private ExecutorService conflationExecutor;
/**
* This class carries out the actual removal of the previousTailKey from QPR. The class implements
* Runnable and the destroy operation is done in the run method. The Runnable is executed by the
* one of the threads in the conflation thread pool configured above.
*/
private class ConflationHandler implements Runnable {
Conflatable conflatableObject;
Long previousTailKeyTobeRemoved;
int bucketId;
public ConflationHandler(Conflatable conflatableObject, int bId, Long previousTailKey) {
this.conflatableObject = conflatableObject;
this.previousTailKeyTobeRemoved = previousTailKey;
this.bucketId = bId;
}
public void run() {
PartitionedRegion prQ = null;
GatewaySenderEventImpl event = (GatewaySenderEventImpl) conflatableObject;
try {
String regionPath =
ColocationHelper.getLeaderRegion((PartitionedRegion) event.getRegion()).getFullPath();
prQ = userRegionNameToshadowPRMap.get(regionPath);
destroyEventFromQueue(prQ, bucketId, previousTailKeyTobeRemoved);
} catch (EntryNotFoundException e) {
if (logger.isDebugEnabled()) {
logger.debug("{}: Not conflating {} due to EntryNotFoundException", this,
conflatableObject.getKeyToConflate());
}
}
if (logger.isDebugEnabled()) {
logger.debug("{}: Conflated {} for key={} in queue for region={}", this,
conflatableObject.getValueToConflate(), conflatableObject.getKeyToConflate(),
prQ.getName());
}
}
private Object deserialize(Object serializedBytes) {
Object deserializedObject = serializedBytes;
if (serializedBytes instanceof byte[]) {
byte[] serializedBytesCast = (byte[]) serializedBytes;
// This is a debugging method so ignore all exceptions like
// ClassNotFoundException
try {
deserializedObject = EntryEventImpl.deserialize(serializedBytesCast);
} catch (Exception e) {
}
}
return deserializedObject;
}
}
final protected int index;
final protected int nDispatcher;
private MetaRegionFactory metaRegionFactory;
/**
* A transient queue to maintain the eventSeqNum of the events that are to be sent to remote site.
* It is cleared when the queue is cleared.
*/
// private final BlockingQueue eventSeqNumQueue;
public ParallelGatewaySenderQueue(AbstractGatewaySender sender, Set userRegions, int idx,
int nDispatcher) {
this(sender, userRegions, idx, nDispatcher, new MetaRegionFactory());
}
ParallelGatewaySenderQueue(AbstractGatewaySender sender, Set userRegions, int idx,
int nDispatcher, MetaRegionFactory metaRegionFactory) {
this.metaRegionFactory = metaRegionFactory;
this.index = idx;
this.nDispatcher = nDispatcher;
this.stats = sender.getStatistics();
this.sender = sender;
List listOfRegions = new ArrayList(userRegions);
// eventSeqNumQueue = new LinkedBlockingQueue();
Collections.sort(listOfRegions, new Comparator() {
@Override
public int compare(Region o1, Region o2) {
return o1.getFullPath().compareTo(o2.getFullPath());
}
});
for (Region userRegion : listOfRegions) {
if (userRegion instanceof PartitionedRegion) {
addShadowPartitionedRegionForUserPR((PartitionedRegion) userRegion);
} else {
// Fix for Bug#51491. Once decided to support this configuration we have call
// addShadowPartitionedRegionForUserRR
if (this.sender.getId().contains(AsyncEventQueueImpl.ASYNC_EVENT_QUEUE_PREFIX)) {
throw new AsyncEventQueueConfigurationException(
LocalizedStrings.ParallelAsyncEventQueue_0_CAN_NOT_BE_USED_WITH_REPLICATED_REGION_1
.toLocalizedString(new Object[] {
AsyncEventQueueImpl.getAsyncEventQueueIdFromSenderId(this.sender.getId()),
userRegion.getFullPath()}));
}
throw new GatewaySenderConfigurationException(
LocalizedStrings.ParallelGatewaySender_0_CAN_NOT_BE_USED_WITH_REPLICATED_REGION_1
.toLocalizedString(new Object[] {this.sender.getId(), userRegion.getFullPath()}));
// addShadowPartitionedRegionForUserRR((DistributedRegion)userRegion);
}
}
buckToDispatchLock = new StoppableReentrantLock(sender.getCancelCriterion());
regionToDispatchedKeysMapEmpty = buckToDispatchLock.newCondition();
queueEmptyLock = new StoppableReentrantLock(sender.getCancelCriterion());
queueEmptyCondition = queueEmptyLock.newCondition();
// initialize the conflation thread pool if conflation is enabled
if (sender.isBatchConflationEnabled()) {
initializeConflationThreadPool();
}
}
/** Start the background batch removal thread. */
public void start() {
// at present, this won't be accessed by multiple threads,
// still, it is safer approach to synchronize it
synchronized (ParallelGatewaySenderQueue.class) {
if (removalThread == null) {
removalThread = new BatchRemovalThread((GemFireCacheImpl) this.sender.getCache(), this);
removalThread.start();
}
}
}
public void addShadowPartitionedRegionForUserRR(DistributedRegion userRegion) {
this.sender.getLifeCycleLock().writeLock().lock();
PartitionedRegion prQ = null;
if (logger.isDebugEnabled()) {
logger.debug(
"addShadowPartitionedRegionForUserRR: Going to create shadowpr for userRegion {}",
userRegion.getFullPath());
}
try {
String regionName = userRegion.getFullPath();
if (this.userRegionNameToshadowPRMap.containsKey(regionName))
return;
GemFireCacheImpl cache = (GemFireCacheImpl) sender.getCache();
final String prQName = getQueueName(sender.getId(), userRegion.getFullPath());
prQ = (PartitionedRegion) cache.getRegion(prQName);
if (prQ == null) {
// TODO:REF:Avoid deprecated apis
AttributesFactory fact = new AttributesFactory();
// Fix for 48621 - don't enable concurrency checks
// for queue buckets., event with persistence
fact.setConcurrencyChecksEnabled(false);
PartitionAttributesFactory pfact = new PartitionAttributesFactory();
pfact.setTotalNumBuckets(sender.getMaxParallelismForReplicatedRegion());
int localMaxMemory =
userRegion.getDataPolicy().withStorage() ? sender.getMaximumQueueMemory() : 0;
pfact.setLocalMaxMemory(localMaxMemory);
pfact.setRedundantCopies(3); // TODO:Kishor : THis need to be handled nicely
pfact.setPartitionResolver(new RREventIDResolver());
if (sender.isPersistenceEnabled()) {
fact.setDataPolicy(DataPolicy.PERSISTENT_PARTITION);
}
fact.setDiskStoreName(sender.getDiskStoreName());
// if persistence is enabled, set the diskSyncronous to whatever user
// has set
// else set it to false
// optimize with above check of enable persistence
if (sender.isPersistenceEnabled())
fact.setDiskSynchronous(sender.isDiskSynchronous());
else {
fact.setDiskSynchronous(false);
}
// allow for no overflow directory
EvictionAttributes ea = EvictionAttributes.createLIFOMemoryAttributes(
sender.getMaximumQueueMemory(), EvictionAction.OVERFLOW_TO_DISK);
fact.setEvictionAttributes(ea);
fact.setPartitionAttributes(pfact.create());
final RegionAttributes ra = fact.create();
if (logger.isDebugEnabled()) {
logger.debug("{}: Attempting to create queue region: {}", this, prQName);
}
ParallelGatewaySenderQueueMetaRegion meta =
new ParallelGatewaySenderQueueMetaRegion(prQName, ra, null, cache, sender);
try {
prQ = (PartitionedRegion) cache.createVMRegion(prQName, ra,
new InternalRegionArguments().setInternalMetaRegion(meta).setDestroyLockFlag(true)
.setSnapshotInputStream(null).setImageTarget(null));
if (logger.isDebugEnabled()) {
logger.debug("Region created : {} partition Attributes : {}", prQ,
prQ.getPartitionAttributes());
}
// Suranjan: TODO This should not be set on the PR but on the
// GatewaySender
prQ.enableConflation(sender.isBatchConflationEnabled());
// Before going ahead, make sure all the buckets of shadowPR are
// loaded
// and primary nodes have been decided.
// This is required in case of persistent PR and sender.
if (prQ.getLocalMaxMemory() != 0) {
Iterator itr = prQ.getRegionAdvisor().getBucketSet().iterator();
while (itr.hasNext()) {
itr.next();
}
}
// In case of Replicated Region it may not be necessary.
// if (sender.isPersistenceEnabled()) {
// //Kishor: I need to write a test for this code.
// Set allBucketsClone = new HashSet();
// // allBucketsClone.addAll(allBuckets);*/
// for (int i = 0; i < sender.getMaxParallelismForReplicatedRegion(); i++)
// allBucketsClone.add(i);
//
// while (!(allBucketsClone.size() == 0)) {
// Iterator itr = allBucketsClone.iterator();
// while (itr.hasNext()) {
// InternalDistributedMember node = prQ.getNodeForBucketWrite(
// itr.next(), null);
// if (node != null) {
// itr.remove();
// }
// }
// // after the iteration is over, sleep for sometime before trying
// // again
// try {
// Thread.sleep(WAIT_CYCLE_SHADOW_BUCKET_LOAD);
// }
// catch (InterruptedException e) {
// logger.error(e);
// }
// }
// }
} catch (IOException veryUnLikely) {
logger.fatal(LocalizedMessage.create(
LocalizedStrings.SingleWriteSingleReadRegionQueue_UNEXPECTED_EXCEPTION_DURING_INIT_OF_0,
this.getClass()), veryUnLikely);
} catch (ClassNotFoundException alsoUnlikely) {
logger.fatal(LocalizedMessage.create(
LocalizedStrings.SingleWriteSingleReadRegionQueue_UNEXPECTED_EXCEPTION_DURING_INIT_OF_0,
this.getClass()), alsoUnlikely);
}
if (logger.isDebugEnabled()) {
logger.debug("{}: Created queue region: {}", this, prQ);
}
} else {
// in case shadowPR exists already (can be possible when sender is
// started from stop operation)
if (this.index == 0) // HItesh: for first processor only
handleShadowPRExistsScenario(cache, prQ);
}
/*
* Here, enqueueTempEvents need to be invoked when a sender is already running and userPR is
* created later. When the flow comes here through start() method of sender i.e. userPR
* already exists and sender is started later, the enqueueTempEvents is done in the start()
* method of ParallelGatewaySender
*/
if ((this.index == this.nDispatcher - 1) && this.sender.isRunning()) {
((AbstractGatewaySender) sender).enqueueTempEvents();
}
} finally {
if (prQ != null) {
this.userRegionNameToshadowPRMap.put(userRegion.getFullPath(), prQ);
}
this.sender.getLifeCycleLock().writeLock().unlock();
}
}
private static String convertPathToName(String fullPath) {
// return fullPath.replaceAll("/", "_");
return "";
}
public void addShadowPartitionedRegionForUserPR(PartitionedRegion userPR) {
if (logger.isDebugEnabled()) {
logger.debug("{} addShadowPartitionedRegionForUserPR: Attempting to create queue region: {}",
this, userPR.getDisplayName());
}
this.sender.getLifeCycleLock().writeLock().lock();
PartitionedRegion prQ = null;
try {
String regionName = userPR.getFullPath();
// Find if there is any parent region for this userPR
// if there is then no need to add another q for the same
String leaderRegionName = ColocationHelper.getLeaderRegion(userPR).getFullPath();
if (!regionName.equals(leaderRegionName)) {
// Fix for defect #50364. Allow user to attach GatewaySender to child PR (without attaching
// to leader PR)
// though, internally, colocate the GatewaySender's shadowPR with the leader PR in
// colocation chain
if (!this.userRegionNameToshadowPRMap.containsKey(leaderRegionName)) {
addShadowPartitionedRegionForUserPR(ColocationHelper.getLeaderRegion(userPR));
}
return;
}
if (this.userRegionNameToshadowPRMap.containsKey(regionName))
return;
if (userPR.getDataPolicy().withPersistence() && !sender.isPersistenceEnabled()) {
throw new GatewaySenderException(
LocalizedStrings.ParallelGatewaySenderQueue_NON_PERSISTENT_GATEWAY_SENDER_0_CAN_NOT_BE_ATTACHED_TO_PERSISTENT_REGION_1
.toLocalizedString(new Object[] {this.sender.getId(), userPR.getFullPath()}));
}
GemFireCacheImpl cache = (GemFireCacheImpl) sender.getCache();
boolean isAccessor = (userPR.getLocalMaxMemory() == 0);
final String prQName = sender.getId() + QSTRING + convertPathToName(userPR.getFullPath());
prQ = (PartitionedRegion) cache.getRegion(prQName);
if (prQ == null) {
// TODO:REF:Avoid deprecated apis
AttributesFactory fact = new AttributesFactory();
fact.setConcurrencyChecksEnabled(false);
PartitionAttributesFactory pfact = new PartitionAttributesFactory();
pfact.setTotalNumBuckets(userPR.getTotalNumberOfBuckets());
pfact.setRedundantCopies(userPR.getRedundantCopies());
pfact.setColocatedWith(regionName);
// EITHER set localMaxMemory to 0 for accessor node
// OR override shadowPRs default local max memory with the sender's max
// queue memory (Fix for bug#44254)
int localMaxMemory = isAccessor ? 0 : sender.getMaximumQueueMemory();
pfact.setLocalMaxMemory(localMaxMemory);
pfact.setStartupRecoveryDelay(userPR.getPartitionAttributes().getStartupRecoveryDelay());
pfact.setRecoveryDelay(userPR.getPartitionAttributes().getRecoveryDelay());
if (sender.isPersistenceEnabled() && !isAccessor) {
fact.setDataPolicy(DataPolicy.PERSISTENT_PARTITION);
}
fact.setDiskStoreName(sender.getDiskStoreName());
// if persistence is enabled, set the diskSyncronous to whatever user has set
// else set it to false
if (sender.isPersistenceEnabled())
fact.setDiskSynchronous(sender.isDiskSynchronous());
else {
fact.setDiskSynchronous(false);
}
// allow for no overflow directory
EvictionAttributes ea = EvictionAttributes.createLIFOMemoryAttributes(
sender.getMaximumQueueMemory(), EvictionAction.OVERFLOW_TO_DISK);
fact.setEvictionAttributes(ea);
fact.setPartitionAttributes(pfact.create());
final RegionAttributes ra = fact.create();
if (logger.isDebugEnabled()) {
logger.debug("{}: Attempting to create queue region: {}", this, prQName);
}
ParallelGatewaySenderQueueMetaRegion meta =
metaRegionFactory.newMetataRegion(cache, prQName, ra, sender);
try {
prQ = (PartitionedRegion) cache.createVMRegion(prQName, ra,
new InternalRegionArguments().setInternalMetaRegion(meta).setDestroyLockFlag(true)
.setInternalRegion(true).setSnapshotInputStream(null).setImageTarget(null));
// at this point we should be able to assert prQ == meta;
// Suranjan: TODO This should not be set on the PR but on the GatewaySender
prQ.enableConflation(sender.isBatchConflationEnabled());
if (isAccessor)
return; // return from here if accessor node
// Wait for buckets to be recovered.
prQ.shadowPRWaitForBucketRecovery();
} catch (IOException | ClassNotFoundException veryUnLikely) {
logger.fatal(LocalizedMessage.create(
LocalizedStrings.SingleWriteSingleReadRegionQueue_UNEXPECTED_EXCEPTION_DURING_INIT_OF_0,
this.getClass()), veryUnLikely);
}
if (logger.isDebugEnabled()) {
logger.debug("{}: Created queue region: {}", this, prQ);
}
} else {
if (isAccessor)
return; // return from here if accessor node
// in case shadowPR exists already (can be possible when sender is
// started from stop operation)
if (this.index == 0) // HItesh:for first parallelGatewaySenderQueue only
handleShadowPRExistsScenario(cache, prQ);
}
} finally {
if (prQ != null) {
this.userRegionNameToshadowPRMap.put(userPR.getFullPath(), prQ);
}
/*
* Here, enqueueTempEvents need to be invoked when a sender is already running and userPR is
* created later. When the flow comes here through start() method of sender i.e. userPR
* already exists and sender is started later, the enqueueTempEvents is done in the start()
* method of ParallelGatewaySender
*/
if ((this.index == this.nDispatcher - 1) && this.sender.isRunning()) {
((AbstractGatewaySender) sender).enqueueTempEvents();
}
afterRegionAdd(userPR);
this.sender.getLifeCycleLock().writeLock().unlock();
}
}
/**
* This will be case when the sender is started again after stop operation.
*/
private void handleShadowPRExistsScenario(Cache cache, PartitionedRegion prQ) {
// Note: The region will not be null if the sender is started again after stop operation
if (logger.isDebugEnabled()) {
logger.debug("{}: No need to create the region as the region has been retrieved: {}", this,
prQ);
}
// now, clean up the shadowPR's buckets on this node (primary as well as
// secondary) for a fresh start
Set localBucketRegions = prQ.getDataStore().getAllLocalBucketRegions();
for (BucketRegion bucketRegion : localBucketRegions) {
bucketRegion.clear();
}
}
protected void afterRegionAdd(PartitionedRegion userPR) {
}
/**
* Initialize the thread pool, setting the number of threads that is equal to the number of
* processors available to the JVM.
*/
private void initializeConflationThreadPool() {
final LoggingThreadGroup loggingThreadGroup =
LoggingThreadGroup.createThreadGroup("WAN Queue Conflation Logger Group", logger);
final ThreadFactory threadFactory = new ThreadFactory() {
public Thread newThread(final Runnable task) {
final Thread thread = new Thread(loggingThreadGroup, task, "WAN Queue Conflation Thread");
thread.setDaemon(true);
return thread;
}
};
conflationExecutor =
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), threadFactory);
}
/**
* Cleans up the conflation thread pool. Initially, shutdown is done to avoid accepting any newly
* submitted tasks. Wait a while for existing tasks to terminate. If the existing tasks still
* don't complete, cancel them by calling shutdownNow.
*/
private void cleanupConflationThreadPool(AbstractGatewaySender sender) {
if (conflationExecutor == null) {
return;
}
conflationExecutor.shutdown();// Disable new tasks from being submitted
try {
if (!conflationExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
conflationExecutor.shutdownNow(); // Cancel currently executing tasks
// Wait a while for tasks to respond to being cancelled
if (!conflationExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
logger.warn(LocalizedMessage.create(
LocalizedStrings.ParallelGatewaySenderQueue_COULD_NOT_TERMINATE_CONFLATION_THREADPOOL,
(sender == null ? "all" : sender)));
}
}
} catch (InterruptedException e) {
// (Re-)Cancel if current thread also interrupted
conflationExecutor.shutdownNow();
// Preserve interrupt status
Thread.currentThread().interrupt();
}
}
public boolean put(Object object) throws InterruptedException, CacheException {
final boolean isDebugEnabled = logger.isDebugEnabled();
boolean putDone = false;
// Suranjan : Can this region ever be null? Should we work with regionName and not with region
// instance.
// It can't be as put is happeing on the region and its still under process
GatewaySenderEventImpl value = (GatewaySenderEventImpl) object;
boolean isDREvent = isDREvent(value);
// if (isDREvent(value)) {
// putInShadowPRForReplicatedRegion(object);
// value.freeOffHeapValue();
// return;
// }
Region region = value.getRegion();
String regionPath = null;
if (isDREvent) {
regionPath = region.getFullPath();
} else {
regionPath = ColocationHelper.getLeaderRegion((PartitionedRegion) region).getFullPath();
}
if (isDebugEnabled) {
logger.debug("Put is for the region {}", region);
}
if (!this.userRegionNameToshadowPRMap.containsKey(regionPath)) {
if (isDebugEnabled) {
logger.debug("The userRegionNameToshadowPRMap is {}", userRegionNameToshadowPRMap);
}
logger.warn(LocalizedMessage
.create(LocalizedStrings.NOT_QUEUING_AS_USERPR_IS_NOT_YET_CONFIGURED, value));
// does not put into queue
return false;
}
PartitionedRegion prQ = this.userRegionNameToshadowPRMap.get(regionPath);
int bucketId = value.getBucketId();
Object key = null;
if (!isDREvent) {
key = value.getShadowKey();
if ((Long) key == -1) {
// In case of parallel we don't expect
// the key to be not set. If it is the case then the event must be coming
// through listener, so return.
if (isDebugEnabled) {
logger.debug("ParallelGatewaySenderOrderedQueue not putting key {} : Value : {}", key,
value);
}
// does not put into queue
return false;
}
} else {
key = value.getEventId();
}
if (isDebugEnabled) {
logger.debug("ParallelGatewaySenderOrderedQueue putting key {} : Value : {}", key, value);
}
AbstractBucketRegionQueue brq =
(AbstractBucketRegionQueue) prQ.getDataStore().getLocalBucketById(bucketId);
try {
if (brq == null) {
// Set the threadInitLevel to BEFORE_INITIAL_IMAGE.
int oldLevel = LocalRegion.setThreadInitLevelRequirement(LocalRegion.BEFORE_INITIAL_IMAGE);
try {
// Full path of the bucket:
final String bucketFullPath =
Region.SEPARATOR + PartitionedRegionHelper.PR_ROOT_REGION_NAME + Region.SEPARATOR
+ prQ.getBucketName(bucketId);
brq = (AbstractBucketRegionQueue) prQ.getCache().getRegionByPath(bucketFullPath);
if (isDebugEnabled) {
logger.debug(
"ParallelGatewaySenderOrderedQueue : The bucket in the cache is bucketRegionName : {} bucket : {}",
bucketFullPath, brq);
}
if (brq != null) {
brq.getInitializationLock().readLock().lock();
try {
putIntoBucketRegionQueue(brq, key, value);
putDone = true;
} finally {
brq.getInitializationLock().readLock().unlock();
}
} else if (isDREvent) {
// in case of DR with PGS, if shadow bucket is not found event after
// above search then it means that bucket is not intended for this
// node. So lets not add this event in temp queue event as we are
// doing it for PRevent
// does not put onto the queue
} else {
// We have to handle the case where brq is null because the
// colocation
// chain is getting destroyed one by one starting from child region
// i.e this bucket due to moveBucket operation
// In that case we don't want to store this event.
if (((PartitionedRegion) prQ.getColocatedWithRegion()).getRegionAdvisor()
.getBucketAdvisor(bucketId).getShadowBucketDestroyed()) {
if (isDebugEnabled) {
logger.debug(
"ParallelGatewaySenderOrderedQueue not putting key {} : Value : {} as shadowPR bucket is destroyed.",
key, value);
}
// does not put onto the queue
} else {
/*
* This is to prevent data loss, in the scenario when bucket is not available in the
* cache but we know that it will be created.
*/
BlockingQueue tempQueue = null;
synchronized (this.bucketToTempQueueMap) {
tempQueue = this.bucketToTempQueueMap.get(bucketId);
if (tempQueue == null) {
tempQueue = new LinkedBlockingQueue();
this.bucketToTempQueueMap.put(bucketId, tempQueue);
}
}
synchronized (tempQueue) {
brq = (AbstractBucketRegionQueue) prQ.getCache().getRegionByPath(bucketFullPath);
if (brq != null) {
brq.getInitializationLock().readLock().lock();
try {
putIntoBucketRegionQueue(brq, key, value);
putDone = true;
} finally {
brq.getInitializationLock().readLock().unlock();
}
} else {
// tempQueue = this.bucketToTempQueueMap.get(bucketId);
// if (tempQueue == null) {
// tempQueue = new LinkedBlockingQueue();
// this.bucketToTempQueueMap.put(bucketId, tempQueue);
// }
tempQueue.add(value);
putDone = true;
// For debugging purpose.
if (isDebugEnabled) {
logger.debug(
"The value {} is enqueued to the tempQueue for the BucketRegionQueue.",
value);
}
}
}
}
// }
}
} finally {
LocalRegion.setThreadInitLevelRequirement(oldLevel);
}
} else {
boolean thisbucketDestroyed = false;
if (!isDREvent) {
thisbucketDestroyed =
((PartitionedRegion) prQ.getColocatedWithRegion()).getRegionAdvisor()
.getBucketAdvisor(bucketId).getShadowBucketDestroyed() || brq.isDestroyed();
} else {
thisbucketDestroyed = brq.isDestroyed();
}
if (!thisbucketDestroyed) {
putIntoBucketRegionQueue(brq, key, value);
putDone = true;
} else {
if (isDebugEnabled) {
logger.debug(
"ParallelGatewaySenderOrderedQueue not putting key {} : Value : {} as shadowPR bucket is destroyed.",
key, value);
}
// does not put onto the queue
}
}
} finally {
notifyEventProcessorIfRequired();
}
return putDone;
}
public void notifyEventProcessorIfRequired() {
// putter thread should not take lock every time
if (isQueueEmpty) {
queueEmptyLock.lock();
try {
if (logger.isDebugEnabled()) {
logger.debug("Going to notify, isQueueEmpty {}", isQueueEmpty);
}
if (isQueueEmpty) {
isQueueEmpty = false;
queueEmptyCondition.signal();
}
} finally {
if (logger.isDebugEnabled()) {
logger.debug("Notified!, isQueueEmpty {}", isQueueEmpty);
}
queueEmptyLock.unlock();
}
}
}
private void putIntoBucketRegionQueue(AbstractBucketRegionQueue brq, Object key,
GatewaySenderEventImpl value) {
boolean addedValueToQueue = false;
try {
if (brq != null) {
addedValueToQueue = brq.addToQueue(key, value);
// TODO : Kishor : During merge, ParallelWANstats test failed. On
// comment below code test passed. cheetha does not have below code.
// need to find out from hcih revision this code came
// if (brq.getBucketAdvisor().isPrimary()) {
// this.stats.incQueueSize();
// }
}
} catch (BucketNotFoundException e) {
if (logger.isDebugEnabled()) {
logger.debug("For bucket {} the current bucket redundancy is {}", brq.getId(),
brq.getPartitionedRegion().getRegionAdvisor().getBucketAdvisor(brq.getId())
.getBucketRedundancy());
}
} catch (ForceReattemptException e) {
if (logger.isDebugEnabled()) {
logger.debug(
"getInitializedBucketForId: Got ForceReattemptException for {} for bucket = {}", this,
brq.getId());
}
} finally {
if (!addedValueToQueue) {
value.release();
}
}
}
/**
* This returns queueRegion if there is only one PartitionedRegion using the GatewaySender
* Otherwise it returns null.
*/
public Region getRegion() {
return this.userRegionNameToshadowPRMap.size() == 1
? (Region) this.userRegionNameToshadowPRMap.values().toArray()[0] : null;
}
public PartitionedRegion getRegion(String fullpath) {
return this.userRegionNameToshadowPRMap.get(fullpath);
}
public PartitionedRegion removeShadowPR(String fullpath) {
try {
this.sender.getLifeCycleLock().writeLock().lock();
this.sender.setEnqueuedAllTempQueueEvents(false);
return this.userRegionNameToshadowPRMap.remove(fullpath);
} finally {
sender.getLifeCycleLock().writeLock().unlock();
}
}
public ExecutorService getConflationExecutor() {
return this.conflationExecutor;
}
/**
* Returns the set of shadowPR backign this queue.
*/
public Set getRegions() {
return new HashSet(this.userRegionNameToshadowPRMap.values());
}
// TODO: Suranjan Find optimal way to get Random shadow pr as this will be called in each put and
// peek.
protected PartitionedRegion getRandomShadowPR() {
PartitionedRegion prQ = null;
if (this.userRegionNameToshadowPRMap.values().size() > 0) {
int randomIndex = new Random().nextInt(this.userRegionNameToshadowPRMap.size());
prQ = (PartitionedRegion) this.userRegionNameToshadowPRMap.values().toArray()[randomIndex];
}
// if (this.userPRToshadowPRMap.values().size() > 0
// && (prQ == null)) {
// prQ = getRandomShadowPR();
// }
return prQ;
}
private boolean isDREvent(GatewaySenderEventImpl event) {
return (event.getRegion() instanceof DistributedRegion) ? true : false;
}
/**
* Take will choose a random BucketRegionQueue which is primary and will take the head element
* from it.
*/
@Override
public Object take() throws CacheException, InterruptedException {
// merge42180.
throw new UnsupportedOperationException();
}
/**
* TODO: Optimization needed. We are creating 1 array list for each peek!!
*
* @return BucketRegionQueue
*/
private final BucketRegionQueue getRandomBucketRegionQueue() {
PartitionedRegion prQ = getRandomShadowPR();
if (prQ != null) {
final PartitionedRegionDataStore ds = prQ.getDataStore();
final List buckets = new ArrayList(ds.getAllLocalPrimaryBucketIds());
if (buckets.isEmpty())
return null;
final int index = new Random().nextInt(buckets.size());
final int brqId = buckets.get(index);
final BucketRegionQueue brq = (BucketRegionQueue) ds.getLocalBucketById(brqId);
if (brq.isReadyForPeek()) {
return brq;
}
}
return null;
}
protected boolean areLocalBucketQueueRegionsPresent() {
boolean bucketsAvailable = false;
for (PartitionedRegion prQ : this.userRegionNameToshadowPRMap.values()) {
if (prQ.getDataStore().getAllLocalBucketRegions().size() > 0)
return true;
}
return false;
}
private int pickBucketId;
protected int getRandomPrimaryBucket(PartitionedRegion prQ) {
if (prQ != null) {
Set> allBuckets = prQ.getDataStore().getAllLocalBuckets();
List thisProcessorBuckets = new ArrayList();
for (Map.Entry bucketEntry : allBuckets) {
BucketRegion bucket = bucketEntry.getValue();
if (bucket.getBucketAdvisor().isPrimary()) {
int bId = bucket.getId();
if (bId % this.nDispatcher == this.index) {
thisProcessorBuckets.add(bId);
}
}
}
if (logger.isDebugEnabled()) {
logger.debug("getRandomPrimaryBucket: total {} for this processor: {}", allBuckets.size(),
thisProcessorBuckets.size());
}
int nTry = thisProcessorBuckets.size();
while (nTry-- > 0) {
if (pickBucketId >= thisProcessorBuckets.size())
pickBucketId = 0;
BucketRegionQueue br =
getBucketRegionQueueByBucketId(prQ, thisProcessorBuckets.get(pickBucketId++));
if (br != null && br.isReadyForPeek()) {
return br.getId();
}
}
// TODO:REF: instead of shuffle use random number, in this method we are
// returning id instead we should return BRQ itself
/*
* Collections.shuffle(thisProcessorBuckets); for (Integer bucketId : thisProcessorBuckets) {
* BucketRegionQueue br = (BucketRegionQueue)prQ.getDataStore()
* .getBucketRegionQueueByBucketId(bucketId);
*
* if (br != null && br.isReadyForPeek()) { return br.getId(); } }
*/
}
return -1;
}
@Override
public List take(int batchSize) throws CacheException, InterruptedException {
// merge42180
throw new UnsupportedOperationException();
}
@Override
public void remove() throws CacheException {
if (!this.peekedEvents.isEmpty()) {
GatewaySenderEventImpl event = this.peekedEvents.remove();
try {
// PartitionedRegion prQ = this.userPRToshadowPRMap.get(ColocationHelper
// .getLeaderRegion((PartitionedRegion)event.getRegion()).getFullPath());
//
PartitionedRegion prQ = null;
int bucketId = -1;
Object key = null;
if (event.getRegion() != null) {
if (isDREvent(event)) {
prQ = this.userRegionNameToshadowPRMap.get(event.getRegion().getFullPath());
bucketId = event.getEventId().getBucketID();
key = event.getEventId();
} else {
prQ = this.userRegionNameToshadowPRMap.get(ColocationHelper
.getLeaderRegion((PartitionedRegion) event.getRegion()).getFullPath());
bucketId = event.getBucketId();
key = event.getShadowKey();
}
} else {
String regionPath = event.getRegionPath();
GemFireCacheImpl cache = (GemFireCacheImpl) this.sender.getCache();
Region region = (PartitionedRegion) cache.getRegion(regionPath);
if (region != null && !region.isDestroyed()) {
// TODO: Suranjan We have to get colocated parent region for this
// region
if (region instanceof DistributedRegion) {
prQ = this.userRegionNameToshadowPRMap.get(region.getFullPath());
event.getBucketId();
key = event.getEventId();
} else {
prQ = this.userRegionNameToshadowPRMap
.get(ColocationHelper.getLeaderRegion((PartitionedRegion) region).getFullPath());
event.getBucketId();
key = event.getShadowKey();
}
}
}
if (prQ != null) {
destroyEventFromQueue(prQ, bucketId, key);
}
} finally {
try {
event.release();
} catch (IllegalStateException e) {
logger.error("Exception caught and logged. The thread will continue running", e);
}
}
}
}
private void destroyEventFromQueue(PartitionedRegion prQ, int bucketId, Object key) {
boolean isPrimary = prQ.getRegionAdvisor().getBucketAdvisor(bucketId).isPrimary();
if (isPrimary) {
BucketRegionQueue brq = getBucketRegionQueueByBucketId(prQ, bucketId);
// TODO : Kishor : Make sure we dont need to initalize a bucket
// before destroying a key from it
try {
if (brq != null) {
brq.destroyKey(key);
}
stats.decQueueSize();
} catch (EntryNotFoundException e) {
if (!this.sender.isBatchConflationEnabled() && logger.isDebugEnabled()) {
logger.debug(
"ParallelGatewaySenderQueue#remove: Got EntryNotFoundException while removing key {} for {} for bucket = {} for GatewaySender {}",
key, this, bucketId, this.sender);
}
} catch (ForceReattemptException e) {
if (logger.isDebugEnabled()) {
logger.debug("Bucket :{} moved to other member", bucketId);
}
} catch (PrimaryBucketException e) {
if (logger.isDebugEnabled()) {
logger.debug("Primary bucket :{} moved to other member", bucketId);
}
} catch (RegionDestroyedException e) {
if (logger.isDebugEnabled()) {
logger.debug(
"Caught RegionDestroyedException attempting to remove key {} from bucket {} in {}",
key, bucketId, prQ.getFullPath());
}
}
addRemovedEvent(prQ, bucketId, key);
}
}
public void resetLastPeeked() {
this.resetLastPeeked = true;
// Reset the in progress boolean and queue for peeked events in progress
this.peekedEventsProcessingInProgress = false;
this.peekedEventsProcessing.clear();
}
// Need to improve here.If first peek returns NULL then look in another bucket.
@Override
public Object peek() throws InterruptedException, CacheException {
Object object = null;
int bucketId = -1;
PartitionedRegion prQ = getRandomShadowPR();
if (prQ != null && prQ.getDataStore().getAllLocalBucketRegions().size() > 0
&& ((bucketId = getRandomPrimaryBucket(prQ)) != -1)) {
BucketRegionQueue brq;
try {
brq = ((BucketRegionQueue) prQ.getDataStore().getInitializedBucketForId(null, bucketId));
object = brq.peek();
} catch (BucketRegionQueueUnavailableException e) {
return object;// since this is not set, it would be null
} catch (ForceReattemptException e) {
if (logger.isDebugEnabled()) {
logger.debug("Remove: Got ForceReattemptException for {} for bucke = {}", this, bucketId);
}
}
}
return object; // OFFHEAP: ok since only callers uses it to check for empty queue
}
// This method may need synchronization in case it is used by
// ConcurrentParallelGatewaySender
protected void addRemovedEvent(PartitionedRegion prQ, int bucketId, Object key) {
StoppableReentrantLock lock = buckToDispatchLock;
if (lock != null) {
lock.lock();
boolean wasEmpty = regionToDispatchedKeysMap.isEmpty();
try {
Map bucketIdToDispatchedKeys = (Map) regionToDispatchedKeysMap.get(prQ.getFullPath());
if (bucketIdToDispatchedKeys == null) {
bucketIdToDispatchedKeys = new ConcurrentHashMap();
regionToDispatchedKeysMap.put(prQ.getFullPath(), bucketIdToDispatchedKeys);
}
addRemovedEventToMap(bucketIdToDispatchedKeys, bucketId, key);
if (wasEmpty) {
regionToDispatchedKeysMapEmpty.signal();
}
} finally {
lock.unlock();
}
}
}
private void addRemovedEventToMap(Map bucketIdToDispatchedKeys, int bucketId, Object key) {
List dispatchedKeys = (List) bucketIdToDispatchedKeys.get(bucketId);
if (dispatchedKeys == null) {
dispatchedKeys = new ArrayList