Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.gemstone.gemfire.internal.cache.DiskStoreImpl Maven / Gradle / Ivy
/*
* Copyright (c) 2010-2015 Pivotal Software, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
package com.gemstone.gemfire.internal.cache;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetAddress;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import com.gemstone.gemfire.CancelCriterion;
import com.gemstone.gemfire.CancelException;
import com.gemstone.gemfire.StatisticsFactory;
import com.gemstone.gemfire.SystemFailure;
import com.gemstone.gemfire.cache.Cache;
import com.gemstone.gemfire.cache.CacheClosedException;
import com.gemstone.gemfire.cache.DiskAccessException;
import com.gemstone.gemfire.cache.DiskStore;
import com.gemstone.gemfire.cache.DiskStoreFactory;
import com.gemstone.gemfire.cache.RegionDestroyedException;
import com.gemstone.gemfire.cache.persistence.PersistentID;
import com.gemstone.gemfire.cache.query.IndexMaintenanceException;
import com.gemstone.gemfire.distributed.DistributedSystem;
import com.gemstone.gemfire.distributed.internal.InternalDistributedSystem;
import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
import com.gemstone.gemfire.i18n.LogWriterI18n;
import com.gemstone.gemfire.internal.ByteArrayDataInput;
import com.gemstone.gemfire.internal.FileUtil;
import com.gemstone.gemfire.internal.InsufficientDiskSpaceException;
import com.gemstone.gemfire.internal.LogWriterImpl;
import com.gemstone.gemfire.internal.NanoTimer;
import com.gemstone.gemfire.internal.cache.GemFireCacheImpl.StaticSystemCallbacks;
import com.gemstone.gemfire.internal.cache.Oplog.DiskRegionInfo;
import com.gemstone.gemfire.internal.cache.Oplog.KRFEntry;
import com.gemstone.gemfire.internal.cache.control.InternalResourceManager;
import com.gemstone.gemfire.internal.cache.control.InternalResourceManager.ResourceType;
import com.gemstone.gemfire.internal.cache.control.MemoryEvent;
import com.gemstone.gemfire.internal.cache.control.MemoryThresholds.MemoryState;
import com.gemstone.gemfire.internal.cache.control.ResourceListener;
import com.gemstone.gemfire.internal.cache.lru.LRUAlgorithm;
import com.gemstone.gemfire.internal.cache.lru.LRUStatistics;
import com.gemstone.gemfire.internal.cache.persistence.BackupInspector;
import com.gemstone.gemfire.internal.cache.persistence.BackupManager;
import com.gemstone.gemfire.internal.cache.persistence.BytesAndBits;
import com.gemstone.gemfire.internal.cache.persistence.DiskExceptionHandler;
import com.gemstone.gemfire.internal.cache.persistence.DiskRecoveryStore;
import com.gemstone.gemfire.internal.cache.persistence.DiskRegionView;
import com.gemstone.gemfire.internal.cache.persistence.DiskStoreFilter;
import com.gemstone.gemfire.internal.cache.persistence.DiskStoreID;
import com.gemstone.gemfire.internal.cache.persistence.OplogType;
import com.gemstone.gemfire.internal.cache.persistence.PRPersistentConfig;
import com.gemstone.gemfire.internal.cache.persistence.PersistentMemberID;
import com.gemstone.gemfire.internal.cache.persistence.PersistentMemberPattern;
import com.gemstone.gemfire.internal.cache.persistence.RestoreScript;
import com.gemstone.gemfire.internal.cache.snapshot.GFSnapshot;
import com.gemstone.gemfire.internal.cache.snapshot.GFSnapshot.SnapshotWriter;
import com.gemstone.gemfire.internal.cache.snapshot.SnapshotPacket.SnapshotRecord;
import com.gemstone.gemfire.internal.cache.versions.RegionVersionVector;
import com.gemstone.gemfire.internal.cache.versions.VersionSource;
import com.gemstone.gemfire.internal.cache.versions.VersionStamp;
import com.gemstone.gemfire.internal.cache.versions.VersionTag;
import com.gemstone.gemfire.internal.concurrent.ConcurrentHashSet;
import com.gemstone.gemfire.internal.i18n.LocalizedStrings;
import com.gemstone.gemfire.internal.offheap.OffHeapHelper;
import com.gemstone.gemfire.internal.offheap.annotations.Released;
import com.gemstone.gemfire.internal.offheap.annotations.Retained;
import com.gemstone.gemfire.internal.shared.SystemProperties;
import com.gemstone.gemfire.internal.shared.Version;
import com.gemstone.gnu.trove.THashMap;
import com.gemstone.gnu.trove.THashSet;
/**
* Represents a (disk-based) persistent store for region data. Used for both
* persistent recoverable regions and overflow-only regions.
*
* @author David Whitlock
* @author Darrel Schneider
* @author Mitul Bid
* @author Asif
*
* @since 3.2
*/
@SuppressWarnings("synthetic-access")
public class DiskStoreImpl implements DiskStore, ResourceListener {
private static final String BACKUP_DIR_PREFIX = "dir";
private static final SystemProperties sysProps = SystemProperties
.getServerInstance();
public static final boolean TRACE_RECOVERY = sysProps.getBoolean(
"disk.TRACE_RECOVERY", false);
public static final boolean TRACE_WRITES = sysProps.getBoolean(
"disk.TRACE_WRITES", false);
public static final boolean KRF_DEBUG = sysProps.getBoolean(
"disk.KRF_DEBUG", false);
public static final int MAX_OPEN_INACTIVE_OPLOGS = sysProps.getInteger(
"MAX_OPEN_INACTIVE_OPLOGS", 7);
/*
* If less than 20MB (default - configurable through this property) of the
* available space is left for logging and other misc stuff then it
* is better to bail out.
*/
public static final int MIN_DISK_SPACE_FOR_LOGS = sysProps.getInteger(
"MIN_DISK_SPACE_FOR_LOGS", 20);
public static boolean INDEX_LOAD_DEBUG_FINER = sysProps.getBoolean(
"IndexLoadDebugFiner", false);
public static boolean INDEX_LOAD_DEBUG = INDEX_LOAD_DEBUG_FINER
|| sysProps.getBoolean("IndexLoadDebug", false);
public static boolean INDEX_LOAD_PERF_DEBUG = INDEX_LOAD_DEBUG
|| sysProps.getBoolean("IndexLoadPerfDebug", false);
/** Represents an invalid id of a key/value on disk */
public static final long INVALID_ID = 0L; // must be zero
private static final String COMPLETE_COMPACTION_BEFORE_TERMINATION_PROPERTY_BASE_NAME =
"disk.completeCompactionBeforeTermination";
public static final String COMPLETE_COMPACTION_BEFORE_TERMINATION_PROPERTY_NAME =
sysProps.getSystemPropertyNamePrefix()
+ COMPLETE_COMPACTION_BEFORE_TERMINATION_PROPERTY_BASE_NAME;
static final int MINIMUM_DIR_SIZE = 1024;
/**
* The static field delays the joining of the close/clear/destroy & forceFlush
* operation, with the compactor thread. This joining occurs after the
* compactor thread is notified to exit. This was added to reproduce deadlock
* caused by concurrent destroy & clear operation where clear operation is
* restarting the compactor thread ( a new thread object different from the
* one for which destroy operation issued notification for release). The delay
* occurs iff the flag used for enabling callbacks to CacheObserver is enabled
* true
*/
static volatile long DEBUG_DELAY_JOINING_WITH_COMPACTOR = 500;
/**
* Kept for backwards compat. Should use allowForceCompaction api/dtd instead.
*/
private final static boolean ENABLE_NOTIFY_TO_ROLL = sysProps.getBoolean(
"ENABLE_NOTIFY_TO_ROLL", false);
private static final String RECOVER_VALUE_PROPERTY_BASE_NAME =
"disk.recoverValues";
public static final String RECOVER_VALUE_PROPERTY_NAME = sysProps
.getSystemPropertyNamePrefix() + RECOVER_VALUE_PROPERTY_BASE_NAME;
private static final String RECOVER_VALUES_SYNC_PROPERTY_BASE_NAME =
"disk.recoverValuesSync";
public static final String RECOVER_VALUES_SYNC_PROPERTY_NAME = sysProps
.getSystemPropertyNamePrefix() + RECOVER_VALUES_SYNC_PROPERTY_BASE_NAME;
/***
* Flag to determine if KRF recovery is to be done during data extraction.
*/
protected boolean dataExtractionKrfRecovery = false;
/*****
* Flag to determine if the offline disk-store is used for data extraction
*/
protected boolean dataExtraction = false;
boolean RECOVER_VALUES = sysProps.getBoolean(
RECOVER_VALUE_PROPERTY_BASE_NAME, true);
boolean RECOVER_VALUES_SYNC = sysProps.getBoolean(
RECOVER_VALUES_SYNC_PROPERTY_BASE_NAME, false);
boolean FORCE_KRF_RECOVERY = sysProps.getBoolean(
"disk.FORCE_KRF_RECOVERY", false);
public static final int MAX_SOPLOGS_PER_LEVEL = sysProps.getInteger(
"disk.MAX_SOPLOGS_PER_LEVEL", 4);
//TODO soplogs - need to be able to default this to an unlimited number of levels
//The SizeTieredCompactor currently creates all levels up front
public static final int MAX_SOPLOG_LEVELS = sysProps.getInteger(
"disk.MAX_SOPLOG_LEVELS", 10);
public static final long MIN_RESERVED_DRID = 1;
public static final long MAX_RESERVED_DRID = 8;
static final long MIN_DRID = MAX_RESERVED_DRID + 1;
/**
* Estimated number of bytes written to disk for each new disk id.
*/
static final int BYTES_PER_ID = 8;
/**
* Maximum number of oplogs to compact per compaction operations. Defaults to
* 1 to allows oplogs to be deleted quickly, to reduce amount of memory used
* during a compaction and to be fair to other regions waiting for a compactor
* thread from the pool. Ignored if set to <= 0. Made non static so tests can
* set it.
*/
private final int MAX_OPLOGS_PER_COMPACTION = sysProps.getInteger(
"MAX_OPLOGS_PER_COMPACTION",
sysProps.getInteger("MAX_OPLOGS_PER_ROLL", 1));
public static final int MAX_CONCURRENT_COMPACTIONS = sysProps.getInteger(
"MAX_CONCURRENT_COMPACTIONS",
sysProps.getInteger("MAX_CONCURRENT_ROLLS", 1));
/**
* This system property indicates that maximum number of delayed write
* tasks that can be pending before submitting the tasks start blocking.
* These tasks are things like unpreblow oplogs, delete oplogs, etc.
*/
public static final int MAX_PENDING_TASKS = sysProps.getInteger("disk.MAX_PENDING_TASKS", 6);
/**
* This system property indicates that IF should also be preallocated. This property
* will be used in conjunction with the PREALLOCATE_OPLOGS property. If PREALLOCATE_OPLOGS
* is ON the below will by default be ON but in order to switch it off you need to explicitly
*/
static final boolean PREALLOCATE_IF = sysProps.getBoolean(
"preAllocateIF", true);
/**
* This system property indicates that Oplogs should be preallocated till the
* maxOplogSize as specified for the disk store.
*/
static final boolean PREALLOCATE_OPLOGS = sysProps.getBoolean(
"preAllocateDisk", true);
/** For some testing purposes we would not consider top property if this flag is set to true **/
public static boolean SET_IGNORE_PREALLOCATE = false;
/**
* This system property turns on synchronous writes just the the init file.
*/
static final boolean SYNC_IF_WRITES = sysProps.getBoolean(
"syncMetaDataWrites", false);
/**
* Property to disable fsync behavior to speed up precheckin runs.
*/
public static boolean DISABLE_SYNC_WRITES_FOR_TESTS = sysProps
.getBoolean("DISABLE_SYNC_WRITES_FOR_TESTS", false);
// /** delay for slowing down recovery, for testing purposes only */
// public static volatile int recoverDelay = 0;
// //////////////////// Instance Fields ///////////////////////
private final GemFireCacheImpl cache;
/** The stats for this store */
private final DiskStoreStats stats;
final LogWriterI18n logger;
/**
* Asif:Added as stop gap arrangement to fix bug 39380. It is not a clean fix
* as keeping track of the threads acquiring read lock, etc is not a good idea
* to solve the issue
*/
private final AtomicInteger entryOpsCount = new AtomicInteger(0);
/**
* Do not want to take chance with any object like DiskRegion etc as lock
*/
private final Object closeRegionGuard = new Object();
/** Number of dirs* */
final int dirLength;
/** Disk directory holders* */
DirectoryHolder[] directories;
/** max of all the dir sizes given stored in bytes* */
private final long maxDirSize;
/** disk dir to be used by info file * */
private int infoFileDirIndex;
private final int compactionThreshold;
/**
* The limit of how many items can be in the async queue before async starts
* blocking and a flush is forced. If this value is 0 then no limit.
*/
private final int maxAsyncItems;
private final AtomicInteger forceFlushCount;
private final Object asyncMonitor;
// complex vars
/** Compactor task which does the compaction. Null if compaction not possible. */
private final OplogCompactor oplogCompactor;
private DiskInitFile initFile = null;
private volatile DiskStoreBackup diskStoreBackup = null;
private final ReentrantReadWriteLock compactorLock = new ReentrantReadWriteLock();
private final WriteLock compactorWriteLock = compactorLock.writeLock();
private final ReadLock compactorReadLock = compactorLock.readLock();
/**
* Set if we have encountered a disk exception causing us to shutdown this
* disk store. This is currently used only to prevent trying to shutdown the
* disk store from multiple threads, but I think at some point we should use
* this to prevent any other ops from completing during the close operation.
*/
private final AtomicReference diskException = new AtomicReference();
private boolean isForInternalUse;
PersistentOplogSet persistentOplogs = new PersistentOplogSet(this);
OverflowOplogSet overflowOplogs = new OverflowOplogSet(this);
/** For testing purpose **/
public THashMap TEST_INDEX_ACCOUNTING_MAP;
public static boolean TEST_NEW_CONTAINER = false;
public List TEST_NEW_CONTAINER_LIST;
// index recovery related flags
private static final int INDEXRECOVERY_UNINIT = 1;
private static final int INDEXRECOVERY_INIT = 2;
private static final int INDEXRECOVERY_DONE = 3;
private final int[] indexRecoveryState;
private final AtomicReference indexRecoveryFailure;
// private boolean isThreadWaitingForSpace = false;
/**
* Get the next available dir
*/
// /**
// * Max timed wait for disk space to become available for an entry operation
// ,
// * in milliseconds. This will be the maximum time for which a
// * create/modify/remove operation will wait so as to allow switch over & get
// a
// * new Oplog for writing. If no space is available in that time,
// * DiskAccessException will be thrown. The default wait will be for 120
// * seconds
// */
// private static final long MAX_WAIT_FOR_SPACE = SystemProperties.getInteger(
// "MAX_WAIT_FOR_SPACE", 20) * 1000;
private final AtomicLong regionIdCtr = new AtomicLong(MIN_DRID);
/**
* Only contains backup DiskRegions. The Value could be a RecoveredDiskRegion
* or a DiskRegion
*/
private final ConcurrentMap drMap = new ConcurrentHashMap();
/**
* A set of overflow only regions that are using this disk store.
*/
private final Set overflowMap = new ConcurrentHashSet();
/**
* Contains all of the disk recovery stores for which we are recovering values
* asnynchronously.
*/
private final Map currentAsyncValueRecoveryMap = new HashMap();
private final Object asyncValueRecoveryLock = new Object();
/**
* The unique id for this disk store.
*
* Either set during recovery of an existing disk store when the
* IFREC_DISKSTORE_ID record is read or when a new init file is created.
*
*/
private DiskStoreID diskStoreID;
private final ThreadPoolExecutor diskStoreTaskPool;
private final ThreadPoolExecutor delayedWritePool;
private volatile Future lastDelayedWrite;
// ///////////////////// Constructors /////////////////////////
private static int calcCompactionThreshold(int ct) {
if (ct == DiskStoreFactory.DEFAULT_COMPACTION_THRESHOLD) {
// allow the old sys prop for backwards compat.
if (sysProps.getString("OVERFLOW_ROLL_PERCENTAGE", null) != null) {
ct = (int) (Double.parseDouble(sysProps.getString(
"gemfire.OVERFLOW_ROLL_PERCENTAGE", "0.50")) * 100.0);
}
}
return ct;
}
/**
* Creates a new DiskRegion
that access disk on behalf of the
* given region.
*/
DiskStoreImpl(Cache cache, DiskStoreAttributes props) {
this(cache, props, false, null);
}
DiskStoreImpl(Cache cache, DiskStoreAttributes props, boolean ownedByRegion,
InternalRegionArguments internalRegionArgs) {
this(cache, props.getName(), props, ownedByRegion, internalRegionArgs,
false, false/* upgradeVersionOnly */, false, false, true);
}
DiskStoreImpl(Cache cache, String name, DiskStoreAttributes props,
boolean ownedByRegion, InternalRegionArguments internalRegionArgs,
boolean offline, boolean upgradeVersionOnly, boolean offlineValidating,
boolean offlineCompacting, boolean needsOplogs) {
this(cache, name, props, ownedByRegion, internalRegionArgs, offline, upgradeVersionOnly, offlineValidating, offlineCompacting, needsOplogs, false);
}
DiskStoreImpl(Cache cache, String name, DiskStoreAttributes props,
boolean ownedByRegion, InternalRegionArguments internalRegionArgs,
boolean offline, boolean upgradeVersionOnly, boolean offlineValidating,
boolean offlineCompacting, boolean needsOplogs, boolean dataExtraction) {
this.dataExtraction = dataExtraction;
this.offline = offline;
this.upgradeVersionOnly = upgradeVersionOnly;
this.validating = offlineValidating;
this.offlineCompacting = offlineCompacting;
assert internalRegionArgs == null || ownedByRegion : "internalRegionArgs "
+ "should be non-null only if the DiskStore is owned by region";
this.ownedByRegion = ownedByRegion;
this.internalRegionArgs = internalRegionArgs;
// validate properties before reading from it
props.validateAndAdjust();
this.name = name;
this.autoCompact = props.getAutoCompact();
this.allowForceCompaction = props.getAllowForceCompaction();
this.compactionThreshold = calcCompactionThreshold(props
.getCompactionThreshold());
this.maxOplogSizeInBytes = props.getMaxOplogSizeInBytes();
this.timeInterval = props.getTimeInterval();
this.queueSize = props.getQueueSize();
this.writeBufferSize = props.getWriteBufferSize();
this.diskDirs = props.getDiskDirs();
this.diskDirSizes = props.getDiskDirSizes();
this.syncWrites = props.getSyncWrites() && !DISABLE_SYNC_WRITES_FOR_TESTS;
this.cache = (GemFireCacheImpl) cache;
logger = cache.getLoggerI18n();
StatisticsFactory factory = cache.getDistributedSystem();
this.stats = new DiskStoreStats(factory, getName());
// start simple init
this.isCompactionPossible = isOfflineCompacting()
|| (!isOffline() && (getAutoCompact() || getAllowForceCompaction() || ENABLE_NOTIFY_TO_ROLL));
this.maxAsyncItems = getQueueSize();
this.forceFlushCount = new AtomicInteger();
this.asyncMonitor = new Object();
// always use LinkedBlockingQueue to work around bug 41470
// if (this.maxAsyncItems > 0 && this.maxAsyncItems < 1000000) {
// // we compare to 1,000,000 so that very large maxItems will
// // not cause us to consume too much memory in our queue.
// // Removed the +13 since it made the queue bigger than was configured.
// // The +13 is to give us a bit of headroom during the drain.
// this.asyncQueue = new
// ArrayBlockingQueue(this.maxAsyncItems/*+13*/);
// } else {
if (this.maxAsyncItems > 0) {
this.asyncQueue = new ForceableLinkedBlockingQueue(
this.maxAsyncItems); // fix for bug 41310
} else {
this.asyncQueue = new ForceableLinkedBlockingQueue();
}
if (!isValidating() && !isOfflineCompacting()) {
startAsyncFlusher();
}
File[] dirs = getDiskDirs();
int[] dirSizes = getDiskDirSizes();
int length = dirs.length;
this.directories = new DirectoryHolder[length];
long tempMaxDirSize = 0;
for (int i = 0; i < length; i++) {
directories[i] = new DirectoryHolder(getName() + "_DIR#" + i, factory,
dirs[i], dirSizes[i], i);
// logger.info(LocalizedStrings.DEBUG, "DEBUG ds=" + name + " dir#" + i +
// "=" + directories[i]);
if (tempMaxDirSize < dirSizes[i]) {
tempMaxDirSize = dirSizes[i];
}
}
// stored in bytes
this.maxDirSize = tempMaxDirSize * 1024 * 1024;
this.infoFileDirIndex = 0;
// Now that we no longer have db files, use all directories for oplogs
/**
* The infoFileDir contains the lock file and the init file. It will be
* directories[0] on a brand new disk store. On an existing disk store it
* will be the directory the init file is found in.
*/
this.dirLength = length;
loadFiles(needsOplogs);
// Store all the ddl ids which this vm has already seen
final GemFireCacheImpl.StaticSystemCallbacks sysCb = GemFireCacheImpl
.getInternalProductCallbacks();
if (sysCb != null) {
this.persistIndexes = sysCb.persistIndexes(this);
}
else {
this.persistIndexes = false;
}
this.recoveredIndexIds = new HashSet(getDiskInitFile()
.getCreatedIndexIds());
Set deletedIndexIds = getDiskInitFile().getDeletedIndexIds();
if (!deletedIndexIds.isEmpty()) {
this.recoveredIndexIds.removeAll(deletedIndexIds);
}
this.indexRecoveryState = new int[] { INDEXRECOVERY_UNINIT };
this.indexRecoveryFailure = new AtomicReference(null);
// setFirstChild(getSortedOplogs());
// complex init
if (isCompactionPossible() && !isOfflineCompacting()) {
this.oplogCompactor = new OplogCompactor();
this.oplogCompactor.startCompactor();
} else {
this.oplogCompactor = null;
}
int MAXT = DiskStoreImpl.MAX_CONCURRENT_COMPACTIONS;
final ThreadGroup compactThreadGroup = LogWriterImpl.createThreadGroup("Oplog Compactor Thread Group", this.logger);
final ThreadFactory compactThreadFactory = GemfireCacheHelper.CreateThreadFactory(compactThreadGroup, "Idle OplogCompactor");
this.diskStoreTaskPool = new ThreadPoolExecutor(MAXT, MAXT, 10, TimeUnit.SECONDS,
new LinkedBlockingQueue(),
compactThreadFactory);
this.diskStoreTaskPool.allowCoreThreadTimeOut(true);
final ThreadGroup deleteThreadGroup = LogWriterImpl.createThreadGroup("Oplog Delete Thread Group", this.logger);
final ThreadFactory deleteThreadFactory = GemfireCacheHelper.CreateThreadFactory(deleteThreadGroup, "Oplog Delete Task");
this.delayedWritePool = new ThreadPoolExecutor(1, 1, 10, TimeUnit.SECONDS,
new LinkedBlockingQueue(MAX_PENDING_TASKS),
deleteThreadFactory, new ThreadPoolExecutor.CallerRunsPolicy());
this.delayedWritePool.allowCoreThreadTimeOut(true);
// register with ResourceManager to adjust async queue size
InternalResourceManager irm = this.cache.getResourceManager();
if (irm != null) {
irm.addResourceListener(ResourceType.HEAP_MEMORY, this);
}
}
////////////////////// Instance Methods //////////////////////
public void setUsedForInternalUse() {
this.isForInternalUse = true;
}
public boolean isUsedForInternalUse() {
return this.isForInternalUse;
}
public final boolean isPersistIndexes() {
return this.persistIndexes;
}
/**
* set the async queue capacity to the current size if it has not been
* explicitly specified
*/
public final void setAsyncQueueCapacityToCurrent() {
this.asyncQueue.setCurrentSizeAsCapacity();
}
/**
* revert back the capacity to original capacity after a call to
* {@link #setAsyncQueueCapacityToCurrent()}
*/
public final void resetAsyncQueueCapacity() {
this.asyncQueue.resetCapacity();
}
public boolean sameAs(DiskStoreAttributes props) {
if (getAllowForceCompaction() != props.getAllowForceCompaction()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG allowForceCompaction "
+ getAllowForceCompaction() + "!=" + props.getAllowForceCompaction());
}
if (getAutoCompact() != props.getAutoCompact()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG AutoCompact "
+ getAutoCompact() + "!=" + props.getAutoCompact());
}
if (getCompactionThreshold() != props.getCompactionThreshold()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG CompactionThreshold "
+ getCompactionThreshold() + "!=" + props.getCompactionThreshold());
}
if (getMaxOplogSizeInBytes() != props.getMaxOplogSizeInBytes()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG MaxOplogSizeInBytes "
+ getMaxOplogSizeInBytes() + "!=" + props.getMaxOplogSizeInBytes());
}
if (!getName().equals(props.getName())) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG Name " + getName() + "!="
+ props.getName());
}
if (getQueueSize() != props.getQueueSize()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG QueueSize "
+ getQueueSize() + "!=" + props.getQueueSize());
}
if (getTimeInterval() != props.getTimeInterval()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG TimeInterval "
+ getTimeInterval() + "!=" + props.getTimeInterval());
}
if (getWriteBufferSize() != props.getWriteBufferSize()) {
this.logger.info(LocalizedStrings.DEBUG, "DEBUG WriteBufferSize "
+ getWriteBufferSize() + "!=" + props.getWriteBufferSize());
}
if (!Arrays.equals(getDiskDirs(), props.getDiskDirs())) {
this.logger.info(
LocalizedStrings.DEBUG,
"DEBUG DiskDirs " + Arrays.toString(getDiskDirs()) + "!="
+ Arrays.toString(props.getDiskDirs()));
}
if (!Arrays.equals(getDiskDirSizes(), props.getDiskDirSizes())) {
this.logger.info(LocalizedStrings.DEBUG,
"DEBUG DiskDirSizes " + Arrays.toString(getDiskDirSizes()) + "!="
+ Arrays.toString(props.getDiskDirSizes()));
}
return getAllowForceCompaction() == props.getAllowForceCompaction()
&& getAutoCompact() == props.getAutoCompact()
&& getCompactionThreshold() == props.getCompactionThreshold()
&& getMaxOplogSizeInBytes() == props.getMaxOplogSizeInBytes()
&& getName().equals(props.getName())
&& getQueueSize() == props.getQueueSize()
&& getTimeInterval() == props.getTimeInterval()
&& getWriteBufferSize() == props.getWriteBufferSize()
&& Arrays.equals(getDiskDirs(), props.getDiskDirs())
&& Arrays.equals(getDiskDirSizes(), props.getDiskDirSizes());
}
/**
* Returns the DiskStoreStats
for this store
*/
public DiskStoreStats getStats() {
return this.stats;
}
public Map getAllDiskRegions() {
Map results = new HashMap();
results.putAll(drMap);
results.putAll(initFile.getDRMap());
return results;
}
void scheduleForRecovery(DiskRecoveryStore drs) {
DiskRegionView dr = drs.getDiskRegionView();
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.scheduleForRecovery(drs);
}
/**
* Initializes the contents of any regions on this DiskStore that have been
* registered but are not yet initialized.
*/
final void initializeOwner(LocalRegion lr,
InternalRegionArguments internalRegionArgs) {
DiskRegion dr = lr.getDiskRegion();
//We don't need to do recovery for overflow regions.
if(!lr.getDataPolicy().withPersistence() || !dr.isRecreated()) {
return;
}
DiskRegionView drv = lr.getDiskRegionView();
synchronized (currentAsyncValueRecoveryMap) {
dr.changeOwnerForExistingRegionMap(lr, internalRegionArgs);
if (drv.getRecoveredEntryMap() != null) {
PersistentOplogSet oplogSet = getPersistentOplogSet(drv);
// prevent async recovery from recovering a value
// while we are copying the entry map.
drv.copyExistingRegionMap(lr, internalRegionArgs);
getStats().incUncreatedRecoveredRegions(-1);
for (Oplog oplog : oplogSet.getAllOplogs()) {
if (oplog != null) {
oplog.updateDiskRegion(lr.getDiskRegionView());
}
}
if (currentAsyncValueRecoveryMap.containsKey(drv.getId())) {
currentAsyncValueRecoveryMap.put(drv.getId(), lr);
}
return;
}
}
scheduleForRecovery(lr);
// boolean gotLock = false;
try {
// acquireReadLock(dr);
// gotLock = true;
recoverRegionsThatAreReady(false);
} catch (DiskAccessException dae) {
// Asif:Just rethrow t
throw dae;
} catch (RuntimeException re) {
// @todo: if re is caused by a RegionDestroyedException
// (or CacheClosed...) then don't we want to throw that instead
// of a DiskAccessException?
// Asif :wrap it in DiskAccessException
// IOException is alerady wrappped by DiskRegion correctly.
// Howvever EntryEventImpl .deserialize is converting IOException
// into IllegalArgumentExcepption, so handle only run time exception
// here
throw new DiskAccessException(
"RuntimeException in initializing the disk store from the disk", re,
this);
}
// finally {
// if(gotLock) {
// releaseReadLock(dr);
// }
// }
}
public final OplogSet getOplogSet(DiskRegionView drv) {
if (drv.isBackup()) {
return persistentOplogs;
}
else {
return overflowOplogs;
}
}
public final PersistentOplogSet getPersistentOplogSet(DiskRegionView drv) {
if (drv != null) {
assert drv.isBackup();
}
return persistentOplogs;
}
/**
* Stores a key/value pair from a region entry on disk. Updates all of the
* necessary {@linkplain DiskRegionStats statistics}and invokes
* {@link Oplog#create}or {@link Oplog#modify}.
*
* @param entry
* The entry which is going to be written to disk
* @param isSerializedObject
* Do the bytes in value
contain a serialized object (or
* an actually byte
array)?
* @throws RegionClearedException
* If a clear operation completed before the put operation completed
* successfully, resulting in the put operation to abort.
* @throws IllegalArgumentException
* If id
is less than zero
*/
final void put(LocalRegion region, DiskEntry entry, byte[] value,
boolean isSerializedObject, boolean async) throws RegionClearedException {
DiskRegion dr = region.getDiskRegion();
DiskId id = entry.getDiskId();
if (dr.isBackup() && id.getKeyId() < 0) {
throw new IllegalArgumentException(
LocalizedStrings.DiskRegion_CANT_PUT_A_KEYVALUE_PAIR_WITH_ID_0
.toLocalizedString(id));
}
long start = async ? this.stats.startFlush() : this.stats.startWrite();
if (!async) {
dr.getStats().startWrite();
}
try {
if (!async) {
acquireReadLock(dr);
}
try {
if (dr.isRegionClosed()) {
region.getCancelCriterion().checkCancelInProgress(null);
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
// Asif TODO: Should the htree reference in
// DiskRegion/DiskRegion be made
// volatile.Will theacquireReadLock ensure variable update?
boolean doingCreate = false;
if (dr.isBackup() && id.getKeyId() == INVALID_ID) {
doingCreate = true;
// the call to newOplogEntryId moved down into Oplog.basicCreate
}
boolean goahead = true;
if (dr.didClearCountChange()) {
// mbid: if the reference has changed (by a clear)
// after a put has been made in the region
// then we need to confirm if this key still exists in the region
// before writing to disk
goahead = region.basicGetEntry(entry.getKey()) == entry;
}
if (goahead) {
// in overflow only mode, no need to write the key and the
// extra data, hence if it is overflow only mode then use
// modify and not create
OplogSet oplogSet = getOplogSet(dr);
if (doingCreate) {
oplogSet.create(region, entry, value, isSerializedObject, async);
} else {
oplogSet.modify(region, entry, value, isSerializedObject, async);
}
} else {
throw new RegionClearedException(
LocalizedStrings.DiskRegion_CLEAR_OPERATION_ABORTING_THE_ONGOING_ENTRY_0_OPERATION_FOR_ENTRY_WITH_DISKID_1
.toLocalizedString(new Object[] {
((doingCreate) ? "creation" : "modification"), id }));
}
} finally {
if (!async) {
releaseReadLock(dr);
}
}
} finally {
if (async) {
this.stats.endFlush(start);
} else {
dr.getStats().endWrite(start, this.stats.endWrite(start));
dr.getStats().incWrittenBytes(id.getValueLength());
}
}
}
final void putVersionTagOnly(LocalRegion region, VersionTag tag, boolean async) {
DiskRegion dr = region.getDiskRegion();
// this method will only be called by backup oplog
assert dr.isBackup();
if (!async) {
acquireReadLock(dr);
}
try {
if (dr.isRegionClosed()) {
region.getCancelCriterion().checkCancelInProgress(null);
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
if (dr.getRegionVersionVector().contains(tag.getMemberID(),
tag.getRegionVersion())) {
// No need to write the conflicting tag to disk if the disk RVV already
// contains this tag.
return;
}
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.getChild().saveConflictVersionTag(region, tag, async);
} finally {
if (!async) {
releaseReadLock(dr);
}
}
}
/**
* Returns the value of the key/value pair with the given diskId. Updates all
* of the necessary {@linkplain DiskRegionStats statistics}
*
*/
final Object get(DiskRegion dr, DiskId id) {
acquireReadLock(dr);
try {
int count = 0;
RuntimeException ex = null;
while (count < 3) {
// retry at most 3 times
BytesAndBits bb = null;
try {
bb = getBytesAndBitsWithoutLock(dr, id, true/* fault -in */, false /*
* Get
* only
* the
* userbit
*/);
if (bb == CLEAR_BB) {
return Token.REMOVED_PHASE1;
}
return convertBytesAndBitsIntoObject(bb);
} catch (IllegalArgumentException e) {
count++;
this.logger.info(LocalizedStrings.DEBUG,
"DiskRegion: Tried " + count
+ ", getBytesAndBitsWithoutLock returns wrong byte array: "
+ Arrays.toString(bb.getBytes()));
ex = e;
}
} // while
this.logger
.info(
LocalizedStrings.DEBUG,
"Retried 3 times, getting entry from DiskRegion still failed. It must be Oplog file corruption due to HA");
throw ex;
} finally {
releaseReadLock(dr);
}
}
// private static String baToString(byte[] ba) {
// StringBuffer sb = new StringBuffer();
// for (int i=0; i < ba.length; i++) {
// sb.append(ba[i]).append(", ");
// }
// return sb.toString();
// }
/**
* This method was added to fix bug 40192. It is like getBytesAndBits except
* it will return Token.REMOVE_PHASE1 if the htreeReference has changed (which
* means a clear was done).
*
* @return an instance of BytesAndBits or Token.REMOVED_PHASE1
*/
final Object getRaw(DiskRegionView dr, DiskId id) {
BytesAndBits bb = dr.getDiskStore().getBytesAndBitsWithoutLock(dr, id,
true/* fault -in */, false /* Get only the userbit */);
if (bb == CLEAR_BB) {
return Token.REMOVED_PHASE1;
}
return bb;
}
/**
* Given a BytesAndBits object convert it to the relevant Object (deserialize
* if necessary) and return the object
*
* @param bb
* @return the converted object
*/
static Object convertBytesAndBitsIntoObject(BytesAndBits bb) {
byte[] bytes = bb.getBytes();
Object value;
if (EntryBits.isInvalid(bb.getBits())) {
value = Token.INVALID;
} else if (EntryBits.isSerialized(bb.getBits())) {
value = DiskEntry.Helper
.readSerializedValue(bytes, bb.getVersion(), null, true);
} else if (EntryBits.isLocalInvalid(bb.getBits())) {
value = Token.LOCAL_INVALID;
} else if (EntryBits.isTombstone(bb.getBits())) {
value = Token.TOMBSTONE;
} else {
value = DiskEntry.Helper.readRawValue(bytes, bb.getVersion(), null);
}
return value;
}
/**
* Given a BytesAndBits object get the serialized blob
*
* @param bb
* @return the converted object
*/
static Object convertBytesAndBitsToSerializedForm(BytesAndBits bb) {
final byte[] bytes = bb.getBytes();
Object value;
if (EntryBits.isInvalid(bb.getBits())) {
value = Token.INVALID;
} else if (EntryBits.isSerialized(bb.getBits())) {
value = DiskEntry.Helper
.readSerializedValue(bytes, bb.getVersion(), null, false);
} else if (EntryBits.isLocalInvalid(bb.getBits())) {
value = Token.LOCAL_INVALID;
} else if (EntryBits.isTombstone(bb.getBits())) {
value = Token.TOMBSTONE;
} else {
value = DiskEntry.Helper.readRawValue(bytes, bb.getVersion(), null);
}
return value;
}
// CLEAR_BB was added in reaction to bug 41306
private final BytesAndBits CLEAR_BB = new BytesAndBits(null, (byte) 0);
/**
* Gets the Object from the OpLog . It can be invoked from OpLog , if by the
* time a get operation reaches the OpLog, the entry gets compacted or if we
* allow concurrent put & get operations. It will also minimize the synch lock
* on DiskId
*
* @param id
* DiskId object for the entry
* @return value of the entry or CLEAR_BB if it is detected that the entry was
* removed by a concurrent region clear.
*/
final BytesAndBits getBytesAndBitsWithoutLock(DiskRegionView dr, DiskId id,
boolean faultIn, boolean bitOnly) {
if (dr.isRegionClosed()) {
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
if (dr.didClearCountChange()) {
return CLEAR_BB;
}
long oplogId = id.getOplogId();
OplogSet oplogSet = getOplogSet(dr);
CompactableOplog oplog = oplogSet.getChild(oplogId);
if (oplog == null) {
if (dr.didClearCountChange()) {
return CLEAR_BB;
}
throw new DiskAccessException(
LocalizedStrings.DiskRegion_DATA_FOR_DISKENTRY_HAVING_DISKID_AS_0_COULD_NOT_BE_OBTAINED_FROM_DISK_A_CLEAR_OPERATION_MAY_HAVE_DELETED_THE_OPLOGS
.toLocalizedString(id), dr.getName());
}
return oplog.getBytesAndBits(dr, id, faultIn, bitOnly);
}
final BytesAndBits getBytesAndBits(DiskRegion dr, DiskId id,
boolean faultingIn) {
acquireReadLock(dr);
try {
BytesAndBits bb = getBytesAndBitsWithoutLock(dr, id, faultingIn, false /*
* Get
* only
* user
* bit
*/);
if (bb == CLEAR_BB) {
throw new DiskAccessException(
LocalizedStrings.DiskRegion_ENTRY_HAS_BEEN_CLEARED_AND_IS_NOT_PRESENT_ON_DISK
.toLocalizedString(), dr.getName());
}
return bb;
} finally {
releaseReadLock(dr);
}
}
/**
* @since 3.2.1
*/
final byte getBits(DiskRegion dr, DiskId id) {
acquireReadLock(dr);
try {
// TODO:Asif : Fault In?
BytesAndBits bb = getBytesAndBitsWithoutLock(dr, id, true, true /*
* Get
* only
* user
* bit
*/);
if (bb == CLEAR_BB) {
return EntryBits.setInvalid((byte) 0, true);
}
return bb.getBits();
} finally {
releaseReadLock(dr);
}
}
/**
* Asif: THIS SHOULD ONLY BE USED FOR TESTING PURPOSES AS IT IS NOT THREAD
* SAFE
*
* Returns the object stored on disk with the given id. This method is used
* for testing purposes only. As such, it bypasses the buffer and goes
* directly to the disk. This is not a thread safe function , in the sense, it
* is possible that by the time the OpLog is queried , data might move HTree
* with the oplog being destroyed
*
* @return null if entry has nothing stored on disk (id == INVALID_ID)
* @throws IllegalArgumentException
* If id
is less than zero, no action is taken.
*/
public final Object getNoBuffer(DiskRegion dr, DiskId id) {
BytesAndBits bb = null;
acquireReadLock(dr);
try {
long opId = id.getOplogId();
if (opId != -1) {
OplogSet oplogSet = getOplogSet(dr);
bb = oplogSet.getChild(opId).getNoBuffer(dr, id);
return convertBytesAndBitsIntoObject(bb);
} else {
return null;
}
} finally {
releaseReadLock(dr);
}
}
void testHookCloseAllOverflowChannels() {
overflowOplogs.testHookCloseAllOverflowChannels();
}
ArrayList testHookGetAllOverflowOplogs() {
return overflowOplogs.testHookGetAllOverflowOplogs();
}
void testHookCloseAllOverflowOplogs() {
overflowOplogs.testHookCloseAllOverflowOplogs();
}
/**
* Removes the key/value pair with the given id on disk.
*
* @param async
* true if called by the async flusher thread
*
* @throws RegionClearedException
* If a clear operation completed before the put operation completed
* successfully, resulting in the put operation to abort.
* @throws IllegalArgumentException
* If id
is {@linkplain #INVALID_ID invalid}or is less
* than zero, no action is taken.
*/
final void remove(LocalRegion region, DiskEntry entry, boolean async,
boolean isClear) throws RegionClearedException {
DiskRegion dr = region.getDiskRegion();
if (!async) {
acquireReadLock(dr);
}
try {
if (dr.isRegionClosed()) {
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
// mbid: if reference has changed (only clear
// can change the reference) then we should not try to remove again.
// Entry will not be found in diskRegion.
// So if reference has changed, do nothing.
if (!dr.didClearCountChange()) {
long start = this.stats.startRemove();
OplogSet oplogSet = getOplogSet(dr);
oplogSet.remove(region, entry, async, isClear);
dr.getStats().endRemove(start, this.stats.endRemove(start));
} else {
throw new RegionClearedException(
LocalizedStrings.DiskRegion_CLEAR_OPERATION_ABORTING_THE_ONGOING_ENTRY_DESTRUCTION_OPERATION_FOR_ENTRY_WITH_DISKID_0
.toLocalizedString(entry.getDiskId()));
}
} finally {
if (!async) {
releaseReadLock(dr);
}
}
}
private FlushPauser fp = null;
/**
* After tests call this method they must call flushForTesting.
*/
public void pauseFlusherForTesting() {
assert this.fp == null;
this.fp = new FlushPauser();
try {
addAsyncItem(this.fp, true);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
throw new IllegalStateException("unexpected interrupt in test code", ex);
}
}
public void flushForTesting() {
if (this.fp != null) {
this.fp.unpause();
this.fp = null;
}
forceFlush();
}
// //////////////////// Implementation Methods //////////////////////
/**
* This function is having a default visiblity as it is used in the
* OplogJUnitTest for a bug verification of Bug # 35012
*
* All callers must have {@link #releaseWriteLock(DiskRegion)} in a matching
* finally block.
*
* Note that this is no longer implemented by getting a write lock but instead
* locks the same lock that acquireReadLock does.
*
* @since 5.1
*/
private void acquireWriteLock(DiskRegion dr) {
// @todo darrel: this is no longer a write lock need to change method name
dr.acquireWriteLock();
}
/**
*
* This function is having a default visiblity as it is used in the
* OplogJUnitTest for a bug verification of Bug # 35012
*
* @since 5.1
*/
private void releaseWriteLock(DiskRegion dr) {
// @todo darrel: this is no longer a write lock need to change method name
dr.releaseWriteLock();
}
/**
* All callers must have {@link #releaseReadLock(DiskRegion)} in a matching
* finally block. Note that this is no longer implemented by getting a read
* lock but instead locks the same lock that acquireWriteLock does.
*
* @since 5.1
*/
void acquireReadLock(DiskRegion dr) {
dr.basicAcquireReadLock();
synchronized (this.closeRegionGuard) {
entryOpsCount.incrementAndGet();
if (dr.isRegionClosed()) {
dr.releaseReadLock();
throw new RegionDestroyedException(
"The DiskRegion has been closed or destroyed", dr.getName());
}
}
}
/**
* @since 5.1
*/
void releaseReadLock(DiskRegion dr) {
dr.basicReleaseReadLock();
int currentOpsInProgress = entryOpsCount.decrementAndGet();
// Potential candiate for notifying in case of disconnect
if (currentOpsInProgress == 0) {
synchronized (this.closeRegionGuard) {
if (dr.isRegionClosed() && entryOpsCount.get() == 0) {
this.closeRegionGuard.notify();
}
}
}
}
public void forceRoll() {
persistentOplogs.forceRoll(null);
}
public void forceRoll(boolean blocking) {
Oplog child = persistentOplogs.getChild();
if (child != null) {
child.forceRolling(null, blocking);
}
}
/**
* @since 5.1
*/
public void forceRolling(DiskRegion dr) {
if (!dr.isBackup())
return;
if (!dr.isSync() && this.maxAsyncItems == 0 && getTimeInterval() == 0) {
forceFlush();
}
acquireReadLock(dr);
try {
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.forceRoll(dr);
} finally {
releaseReadLock(dr);
}
}
public boolean forceCompaction() {
return basicForceCompaction(null);
}
public boolean forceCompaction(DiskRegion dr) {
if (!dr.isBackup())
return false;
acquireReadLock(dr);
try {
return basicForceCompaction(dr);
} finally {
releaseReadLock(dr);
}
}
/**
* Get serialized form of data off the disk
*
* @param id
* @since gemfire5.7_hotfix
*/
public Object getSerializedData(DiskRegion dr, DiskId id) {
return convertBytesAndBitsToSerializedForm(getBytesAndBits(dr, id, true));
}
public Object getSerializedDataWithoutLock(DiskRegionView dr, DiskId id,
boolean faultIn) {
return convertBytesAndBitsToSerializedForm(getBytesAndBitsWithoutLock(dr,
id, faultIn, false));
}
private void checkForFlusherThreadTermination() {
if (this.flusherThreadTerminated) {
String message = "Could not schedule asynchronous write because the flusher thread had been terminated.";
if(this.isClosing()) {
// for bug 41305
throw this.cache
.getCacheClosedException(message, null);
} else {
throw new DiskAccessException(message, this);
}
}
}
private void handleFullAsyncQueue(Object o) {
AsyncDiskEntry ade = (AsyncDiskEntry) o;
LocalRegion region = ade.region;
try {
VersionTag tag = ade.tag;
if (ade.versionOnly) {
if (tag != null) {
DiskEntry.Helper.doAsyncFlush(tag, region);
}
} else {
DiskEntry entry = ade.de;
DiskEntry.Helper.handleFullAsyncQueue(entry, region, tag);
}
} catch (RegionDestroyedException ex) {
// Normally we flush before closing or destroying a region
// but in some cases it is closed w/o flushing.
// So just ignore it; see bug 41305.
}
}
public void addDiskRegionToQueue(LocalRegion lr) {
try {
addAsyncItem(lr, true);
} catch (InterruptedException ignore) {
// If it fail, that means the RVVTrusted is not written. It will
// automatically do full-GII
}
}
private void addAsyncItem(Object item, boolean forceAsync)
throws InterruptedException {
synchronized (this.lock) { // fix for bug 41390
// 43312: since this thread has gained dsi.lock, dsi.clear() should have
// finished. We check if clear() has happened after ARM.putEntryIfAbsent()
if (item instanceof AsyncDiskEntry) {
AsyncDiskEntry ade = (AsyncDiskEntry) item;
DiskRegion dr = ade.region.getDiskRegion();
if (dr.didClearCountChange() && !ade.versionOnly) {
return;
}
if (ade.region.isDestroyed) {
throw new RegionDestroyedException(ade.region.toString(), ade.region.getFullPath());
}
}
checkForFlusherThreadTermination();
if (forceAsync) {
this.asyncQueue.forcePut(item);
} else {
if (!this.asyncQueue.offer(item)) {
// queue is full so do a sync write to prevent deadlock
handleFullAsyncQueue(item);
// return early since we didn't add it to the queue
return;
}
}
this.stats.incQueueSize(1);
}
// this.logger.info(LocalizedStrings.DEBUG, "DEBUG addAsyncItem=" + item);
if (this.maxAsyncItems > 0) {
if (checkAsyncItemLimit()) {
synchronized (this.asyncMonitor) {
this.asyncMonitor.notifyAll();
}
}
}
}
private void rmAsyncItem(Object item) {
if (this.asyncQueue.remove(item)) {
this.stats.incQueueSize(-1);
}
}
private long startAsyncWrite(DiskRegion dr) {
if (this.stoppingFlusher) {
if (isClosed()) {
throw (new Stopper()).generateCancelledException(null); // fix for bug
// 41141
} else {
throw new DiskAccessException(
"The disk store is still open, but flusher is stopped, probably no space left on device",
this);
}
} else {
this.pendingAsyncEnqueue.incrementAndGet();
}
// logger.info(LocalizedStrings.DEBUG, "DEBUG startAsyncWrite");
dr.getStats().startWrite();
return this.stats.startWrite();
}
private void endAsyncWrite(AsyncDiskEntry ade, DiskRegion dr, long start) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG endAsyncWrite");
this.pendingAsyncEnqueue.decrementAndGet();
dr.getStats().endWrite(start, this.stats.endWrite(start));
if (!ade.versionOnly) { // for versionOnly = true ade.de will be null
long bytesWritten = ade.de.getDiskId().getValueLength();
dr.getStats().incWrittenBytes(bytesWritten);
}
}
/**
* @since prPersistSprint1
*/
public void scheduleAsyncWrite(AsyncDiskEntry ade) {
DiskRegion dr = ade.region.getDiskRegion();
long start = startAsyncWrite(dr);
try {
try {
addAsyncItem(ade, false);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
ade.region.getCancelCriterion().checkCancelInProgress(ie);
// @todo: I'm not sure we need an error here
if (!ade.versionOnly)
ade.de.getDiskId().setPendingAsync(false);
}
} finally {
endAsyncWrite(ade, dr, start);
}
}
/**
* @since prPersistSprint1
*/
public void unscheduleAsyncWrite(DiskId did) {
if (did != null) {
did.setPendingAsync(false);
// we could remove it from the async buffer but currently
// we just wait for the flusher to discover it and drop it.
}
}
/**
* This queue can continue DiskEntry of FlushNotifier.
*/
private final ForceableLinkedBlockingQueue asyncQueue;
private final Object drainSync = new Object();
private ArrayList drainList = null;
private int fillDrainList() {
synchronized (this.drainSync) {
this.drainList = new ArrayList(asyncQueue.size());
int drainCount = asyncQueue.drainTo(this.drainList);
return drainCount;
}
}
private ArrayList getDrainList() {
return this.drainList;
}
/**
* To fix bug 41770 clear the list in a way that will not break a concurrent
* iterator that is not synced on drainSync. Only clear from it entries on the
* given region. Currently we do this by clearing the isPendingAsync bit on
* each entry in this list.
*
* @param rvv
*/
void clearDrainList(LocalRegion r, RegionVersionVector rvv) {
synchronized (this.drainSync) {
if (this.drainList == null)
return;
Iterator it = this.drainList.iterator();
while (it.hasNext()) {
Object o = it.next();
if (o instanceof AsyncDiskEntry) {
AsyncDiskEntry ade = (AsyncDiskEntry) o;
if (shouldClear(r, rvv, ade) && ade.de != null) {
unsetPendingAsync(ade);
}
}
}
}
}
private boolean shouldClear(LocalRegion r, RegionVersionVector rvv,
AsyncDiskEntry ade) {
if (ade.region != r) {
return false;
}
// If no RVV, remove all of the async items for this region.
if (rvv == null) {
return true;
}
// If we are clearing based on an RVV, only remove
// entries contained in the RVV
if (ade.versionOnly) {
return rvv.contains(ade.tag.getMemberID(), ade.tag.getRegionVersion());
} else {
VersionStamp stamp = ade.de.getVersionStamp();
VersionSource member = stamp.getMemberID();
if (member == null) {
// For overflow only regions, the version member may be null
// because that represents the local internal distributed member
member = r.getVersionMember();
}
return rvv.contains(member, stamp.getRegionVersion());
}
}
/**
* Clear the pending async bit on a disk entry.
*/
private void unsetPendingAsync(AsyncDiskEntry ade) {
DiskId did = ade.de.getDiskId();
if (did != null && did.isPendingAsync()) {
synchronized (did) {
did.setPendingAsync(false);
}
}
}
private Thread flusherThread;
/**
* How many threads are waiting to do a put on asyncQueue?
*/
private final AtomicInteger pendingAsyncEnqueue = new AtomicInteger();
private volatile boolean stoppingFlusher;
private volatile boolean stopFlusher;
private volatile boolean flusherThreadTerminated;
private void startAsyncFlusher() {
final String thName = LocalizedStrings.DiskRegion_ASYNCHRONOUS_DISK_WRITER_0
.toLocalizedString(new Object[] { getName() });
this.flusherThread = new Thread(LogWriterImpl.createThreadGroup(
LocalizedStrings.DiskRegion_DISK_WRITERS.toLocalizedString(),
getCache().getDistributedSystem().getLogWriter()
.convertToLogWriterI18n()), new FlusherThread(), thName);
this.flusherThread.setDaemon(true);
this.flusherThread.start();
}
protected void stopAsyncFlusher() {
// logger.info(LocalizedStrings.DEBUG, "DEBUG stopAsyncFlusher immediately="
// + immediately);
this.stoppingFlusher = true;
do {
// Need to keep looping as long as we have more threads
// that are already pending a put on the asyncQueue.
// New threads will fail because stoppingFlusher has been set.
// See bug 41141.
forceFlush();
} while (this.pendingAsyncEnqueue.get() > 0);
// logger.info(LocalizedStrings.DEBUG, "DEBUG "
// + this.owner.getFullPath()
// + " stopAsyncFlusher immediately=" + immediately);
synchronized (asyncMonitor) {
this.stopFlusher = true;
this.asyncMonitor.notifyAll();
}
while (!this.flusherThreadTerminated) {
try {
this.flusherThread.join(100);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
getCache().getCancelCriterion().checkCancelInProgress(ie);
}
}
}
public boolean testWaitForAsyncFlusherThread(int waitMs) {
try {
this.flusherThread.join(waitMs);
return true;
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
return false;
}
/**
* force a flush but do it async (don't wait for the flush to complete).
*/
public void asynchForceFlush() {
try {
flushFlusher(true);
} catch (InterruptedException ignore) {
}
}
public GemFireCacheImpl getCache() {
return this.cache;
}
public void flush() {
forceFlush();
}
/**
* Flush all async queue data, and fsync all oplogs to disk.
*/
public final void flushAndSync() {
forceFlush();
acquireCompactorWriteLock();
try {
for (Oplog oplog : getPersistentOplogSet(null).getAllOplogs()) {
oplog.flushAllAndSync();
}
} finally {
releaseCompactorWriteLock();
}
}
/**
*
*/
public final void flushAndSync(boolean noCompactorLock) {
forceFlush();
for (Oplog oplog : getPersistentOplogSet(null).getAllOplogs()) {
oplog.flushAllAndSync(true);
}
}
public void forceFlush() {
try {
flushFlusher(false);
} catch (InterruptedException ie) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG forceFlush interrupted");
Thread.currentThread().interrupt();
getCache().getCancelCriterion().checkCancelInProgress(ie);
}
}
private boolean isFlusherTerminated() {
return this.stopFlusher || this.flusherThreadTerminated
|| this.flusherThread == null || !this.flusherThread.isAlive();
}
private void flushFlusher(boolean async) throws InterruptedException {
if (!isFlusherTerminated()) {
FlushNotifier fn = new FlushNotifier();
addAsyncItem(fn, true);
if (isFlusherTerminated()) {
rmAsyncItem(fn);
// logger.info(LocalizedStrings.DEBUG, "DEBUG flusher terminated #1");
} else {
incForceFlush();
if (!async) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG flushFlusher waiting");
fn.waitForFlush();
// logger.info(LocalizedStrings.DEBUG,
// "DEBUG flushFlusher done waiting");
}
}
// } else {
// logger.info(LocalizedStrings.DEBUG, "DEBUG flusher terminated #2");
}
}
private void incForceFlush() {
synchronized (this.asyncMonitor) {
this.forceFlushCount.incrementAndGet(); // moved inside sync to fix bug
// 41654
this.asyncMonitor.notifyAll();
}
}
/**
* Return true if a non-zero value is found and the decrement was done.
*/
private boolean checkAndClearForceFlush() {
if (stopFlusher) {
return true;
}
boolean done = false;
boolean result;
do {
int v = this.forceFlushCount.get();
result = v > 0;
if (result) {
done = this.forceFlushCount.compareAndSet(v, 0);
}
} while (result && !done);
return result;
}
private class FlushPauser extends FlushNotifier {
@Override
public synchronized void doFlush() {
// logger.info(LocalizedStrings.DEBUG, "DEBUG: doFlush");
// this is called by flusher thread so have it wait
try {
super.waitForFlush();
} catch (InterruptedException ignore) {
Thread.currentThread().interrupt();
}
}
public synchronized void unpause() {
super.doFlush();
}
@Override
protected boolean isStoppingFlusher() {
return stoppingFlusher;
}
}
private class FlushNotifier {
private boolean flushed;
protected boolean isStoppingFlusher() {
return false;
}
public synchronized void waitForFlush() throws InterruptedException {
while (!flushed && !isFlusherTerminated() && !isStoppingFlusher()) {
wait(333);
}
}
public synchronized void doFlush() {
this.flushed = true;
notifyAll();
}
}
/**
* Return true if we have enough async items to do a flush
*/
private boolean checkAsyncItemLimit() {
return this.asyncQueue.size() >= this.maxAsyncItems;
}
private class FlusherThread implements Runnable {
private boolean waitUntilFlushIsReady() throws InterruptedException {
if (maxAsyncItems > 0) {
final long time = getTimeInterval();
synchronized (asyncMonitor) {
if (time > 0) {
long nanosRemaining = TimeUnit.MILLISECONDS.toNanos(time);
final long endTime = System.nanoTime() + nanosRemaining;
boolean done = checkAndClearForceFlush() || checkAsyncItemLimit();
while (!done && nanosRemaining > 0) {
TimeUnit.NANOSECONDS.timedWait(asyncMonitor, nanosRemaining);
done = checkAndClearForceFlush() || checkAsyncItemLimit();
if (!done) {
nanosRemaining = endTime - System.nanoTime();
}
}
} else {
boolean done = checkAndClearForceFlush() || checkAsyncItemLimit();
while (!done) {
asyncMonitor.wait();
done = checkAndClearForceFlush() || checkAsyncItemLimit();
}
}
}
} else {
long time = getTimeInterval();
if (time > 0) {
long nanosRemaining = TimeUnit.MILLISECONDS.toNanos(time);
final long endTime = System.nanoTime() + nanosRemaining;
synchronized (asyncMonitor) {
boolean done = checkAndClearForceFlush();
while (!done && nanosRemaining > 0) {
TimeUnit.NANOSECONDS.timedWait(asyncMonitor, nanosRemaining);
done = checkAndClearForceFlush();
if (!done) {
nanosRemaining = endTime - System.nanoTime();
}
}
}
} else {
// wait for a forceFlush
synchronized (asyncMonitor) {
boolean done = checkAndClearForceFlush();
while (!done) {
asyncMonitor.wait();
done = checkAndClearForceFlush();
}
}
}
}
return !stopFlusher;
}
private void flushChild() {
persistentOplogs.flushChild();
}
public void run() {
DiskAccessException fatalDae = null;
// logger.info(LocalizedStrings.DEBUG, "DEBUG maxAsyncItems=" +
// maxAsyncItems
// + " asyncTime=" + getTimeInterval());
if (logger.fineEnabled()) {
logger.fine("Async writer thread started");
}
boolean doingFlush = false;
try {
while (waitUntilFlushIsReady()) {
int drainCount = fillDrainList();
if (drainCount > 0) {
stats.incQueueSize(-drainCount);
Iterator it = getDrainList().iterator();
while (it.hasNext()) {
Object o = it.next();
// logger.info(LocalizedStrings.DEBUG, "DEBUG: asyncDequeue=" +
// o);
if (o instanceof FlushNotifier) {
flushChild();
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
if (!it.hasNext()) {
doingFlush = false;
CacheObserverHolder.getInstance().afterWritingBytes();
}
}
// logger.info(LocalizedStrings.DEBUG,
// "DEBUG: about to flush");
((FlushNotifier) o).doFlush();
// logger.info(LocalizedStrings.DEBUG, "DEBUG: after flush");
} else {
try {
if (o!=null && o instanceof LocalRegion) {
LocalRegion lr = (LocalRegion)o;
lr.getDiskRegion().writeRVV(null, true);
lr.getDiskRegion().writeRVVGC(lr);
} else {
AsyncDiskEntry ade = (AsyncDiskEntry) o;
LocalRegion region = ade.region;
VersionTag tag = ade.tag;
if (ade.versionOnly) {
DiskEntry.Helper.doAsyncFlush(tag, region);
} else {
DiskEntry entry = ade.de;
// We check isPendingAsync
if (entry.getDiskId().isPendingAsync()) {
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
if (!doingFlush) {
doingFlush = true;
CacheObserverHolder.getInstance().goingToFlush();
}
}
DiskEntry.Helper.doAsyncFlush(entry, region, tag);
} else {
// If it is no longer pending someone called
// unscheduleAsyncWrite
// so we don't need to write the entry, but
// if we have a version tag we need to record the
// operation
// to update the RVV
if (tag != null) {
DiskEntry.Helper.doAsyncFlush(tag, region);
}
}
}
} // else
} catch (RegionDestroyedException ex) {
// Normally we flush before closing or destroying a region
// but in some cases it is closed w/o flushing.
// So just ignore it; see bug 41305.
}
}
}
flushChild();
if (doingFlush) {
doingFlush = false;
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().afterWritingBytes();
}
}
}
}
} catch (InterruptedException ie) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG: interrupted");
flushChild();
Thread.currentThread().interrupt();
getCache().getCancelCriterion().checkCancelInProgress(ie);
throw new IllegalStateException(
"Async writer thread stopping due to unexpected interrupt");
} catch (DiskAccessException dae) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG: dae", dae);
boolean okToIgnore = dae.getCause() instanceof ClosedByInterruptException;
if (!okToIgnore || !stopFlusher) {
fatalDae = dae;
}
} catch (CancelException ignore) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG", ignore);
// the above checkCancelInProgress will throw a CancelException
// when we are being shutdown
} catch(Throwable t) {
logger.severe(LocalizedStrings.DiskStoreImpl_FATAL_ERROR_ON_FLUSH, t);
fatalDae = new DiskAccessException(LocalizedStrings.DiskStoreImpl_FATAL_ERROR_ON_FLUSH.toLocalizedString(), t, DiskStoreImpl.this);
} finally {
// logger.info(LocalizedStrings.DEBUG,
// "DEBUG: Async writer thread stopped stopFlusher=" + stopFlusher);
if (logger.fineEnabled()) {
logger.fine("Async writer thread stopped. Pending opcount="
+ asyncQueue.size());
}
flusherThreadTerminated = true;
stopFlusher = true; // set this before calling handleDiskAccessException
// or it will hang
if (fatalDae != null) {
handleDiskAccessException(fatalDae, true);
}
}
}
}
// simple code
/** Extension of the oplog lock file * */
private static final String LOCK_FILE_EXT = ".lk";
private FileLock fl;
private File lockFile;
private void createLockFile(String name) throws DiskAccessException {
File f = new File(getInfoFileDir().getDir(), "DRLK_IF" + name
+ LOCK_FILE_EXT);
if (logger.fineEnabled()) {
logger.fine("Creating lock file " + f.getAbsolutePath()/*, new RuntimeException("STACK")*/);
}
FileOutputStream fs = null;
// 41734: A known NFS issue on Redhat. The thread created the directory,
// but when it try to lock, it will fail with permission denied or
// input/output
// error. To workarround it, introduce 5 times retries.
int cnt = 0;
DiskAccessException dae = null;
do {
try {
fs = new FileOutputStream(f);
this.lockFile = f;
this.fl = fs.getChannel().tryLock();
if (fl == null) {
try {
fs.close();
} catch (IOException ignore) {
}
throw new IOException(
LocalizedStrings.Oplog_THE_FILE_0_IS_BEING_USED_BY_ANOTHER_PROCESS
.toLocalizedString(f));
}
f.deleteOnExit();
dae = null;
break;
} catch (IOException ex) {
if (fs != null) {
try {
fs.close();
} catch (IOException ignore) {
}
}
dae = new DiskAccessException(
LocalizedStrings.Oplog_COULD_NOT_LOCK_0.toLocalizedString(f
.getPath()), ex, this);
} catch (IllegalStateException ex2) {
// OverlappingFileLockExtension needs to be caught here see bug 41290
if (fs != null) {
try {
fs.close();
} catch (IOException ignore) {
}
}
dae = new DiskAccessException(
LocalizedStrings.Oplog_COULD_NOT_LOCK_0.toLocalizedString(f
.getPath()), ex2, this);
}
cnt++;
try {
Thread.sleep(50);
} catch (InterruptedException e) {
e.printStackTrace();
}
} while (cnt < 100);
if (dae != null) {
throw dae;
}
logger.info(LocalizedStrings.DEBUG, "Locked disk store " + name
+ " for exclusive access in directory: " + getInfoFileDir().getDir()); // added
// to
// help
// debug
// 41734
}
void closeLockFile() {
FileLock myfl = this.fl;
if (myfl != null) {
try {
FileChannel fc = myfl.channel();
if (myfl.isValid()) {
myfl.release();
}
fc.close();
} catch (IOException ignore) {
}
this.fl = null;
}
File f = this.lockFile;
if (f != null) {
if (f.delete()) {
if (logger.fineEnabled()) {
logger.fine("Deleted lock file " + f);
}
} else if (f.exists()) {
if (logger.fineEnabled()) {
logger.fine("Could not delete lock file " + f);
}
}
}
logger.info(LocalizedStrings.DEBUG, "Unlocked disk store " + name); // added
// to
// help
// debug
// 41734
}
private String getRecoveredGFVersionName() {
String currentVersionStr = "GFE pre-7.0";
Version version = getRecoveredGFVersion();
if (version != null) {
currentVersionStr = version.toString();
}
return currentVersionStr;
}
/**
* Searches the given disk dirs for the files and creates the Oplog objects
* wrapping those files
*/
private void loadFiles(boolean needsOplogs) {
String partialFileName = getName();
boolean foundIfFile = false;
{
// Figure out what directory the init file is in (if we even have one).
// Also detect multiple if files and fail (see bug 41883).
int ifDirIdx = 0;
int idx = 0;
String ifName = "BACKUP" + name + DiskInitFile.IF_FILE_EXT;
for (DirectoryHolder dh : this.directories) {
File f = new File(dh.getDir(), ifName);
if (f.exists()) {
if (foundIfFile) {
throw new IllegalStateException(
"Detected multiple disk store initialization files named \""
+ ifName
+ "\". This disk store directories must only contain one initialization file.");
} else {
foundIfFile = true;
ifDirIdx = idx;
}
}
idx++;
}
this.infoFileDirIndex = ifDirIdx;
}
// get a high level lock file first; if we can't get this then
// this disk store is already open by someone else
createLockFile(partialFileName);
boolean finished = false;
try {
Map persistentBackupFiles = persistentOplogs
.findFiles(partialFileName);
{
boolean backupFilesExist = !persistentBackupFiles.isEmpty();
boolean ifRequired = backupFilesExist || isOffline();
//If this offline disk-store is used by data extractor tool , we still need the cache around to process other diskstores
//We wouldn't want to close the cache due to an IllegalStateException which can be caused to corruption of IF file.
this.initFile = new DiskInitFile(partialFileName, this, ifRequired,
persistentBackupFiles.keySet());
if (this.upgradeVersionOnly) {
if (Version.CURRENT.compareTo(getRecoveredGFVersion()) <= 0 && !dataExtraction) {
if (getCache() != null) {
getCache().close();
}
throw new IllegalStateException(
LocalizedStrings.DiskStoreAlreadyInVersion_0
.toLocalizedString(getRecoveredGFVersionName()));
}
} else {
if (Version.GFE_70.compareTo(getRecoveredGFVersion()) > 0) {
// TODO: In each new version, need to modify the highest version
// that needs converstion.
if (getCache() != null && !dataExtraction) {
getCache().close();
}
throw new IllegalStateException(
LocalizedStrings.DiskStoreStillAtVersion_0
.toLocalizedString(getRecoveredGFVersionName()));
}
}
}
{
FilenameFilter overflowFileFilter = new DiskStoreFilter(OplogType.OVERFLOW, true,
partialFileName);
for (DirectoryHolder dh : this.directories) {
File dir = dh.getDir();
// delete all overflow files
File[] files = FileUtil.listFiles(dir, overflowFileFilter);
for (File file : files) {
boolean deleted = file.delete();
if (!deleted && file.exists()) {
if (logger.fineEnabled()) {
logger.fine("Could not delete file " + file);
}
}
}
}
}
persistentOplogs.createOplogs(needsOplogs, persistentBackupFiles);
finished = true;
// Log a message with the disk store id, indicating whether we recovered
// or created thi disk store.
if (foundIfFile) {
logger.info(
LocalizedStrings.DiskStoreImpl_RecoveredDiskStore_0_With_Id_1,
new Object[] { getName(), getDiskStoreID() });
} else {
logger.info(
LocalizedStrings.DiskStoreImpl_CreatedDiskStore_0_With_Id_1,
new Object[] { getName(), getDiskStoreID() });
}
} finally {
if (!finished) {
closeLockFile();
if (getDiskInitFile() != null) {
getDiskInitFile().close();
}
}
}
}
/**
* The diskStats are at PR level.Hence if the region is a bucket region, the
* stats should not be closed, but the figures of entriesInVM and
* overflowToDisk contributed by that bucket need to be removed from the stats
* .
*/
private void statsClose() {
this.stats.close();
if (this.directories != null) {
for (int i = 0; i < this.directories.length; i++) {
this.directories[i].close();
}
}
}
void initializeIfNeeded(boolean initialRecovery) {
if (!persistentOplogs.alreadyRecoveredOnce.get()) {
recoverRegionsThatAreReady(initialRecovery);
}
}
void doInitialRecovery() {
initializeIfNeeded(true);
}
/**
* Reads the oplogs files and loads them into regions that are ready to be
* recovered.
*/
public final void recoverRegionsThatAreReady(boolean initialRecovery) {
persistentOplogs.recoverRegionsThatAreReady(initialRecovery);
}
void scheduleIndexRecovery(Set allOplogs) {
// schedule index recovery atmost once
if (markIndexRecoveryScheduled()) {
IndexRecoveryTask task = new IndexRecoveryTask(allOplogs);
executeDiskStoreTask(task);
}
}
void scheduleValueRecovery(Set oplogsNeedingValueRecovery,
Map recoveredStores) {
ValueRecoveryTask task = new ValueRecoveryTask(oplogsNeedingValueRecovery,
recoveredStores);
synchronized (currentAsyncValueRecoveryMap) {
DiskStoreImpl.this.currentAsyncValueRecoveryMap.putAll(recoveredStores);
}
executeDiskStoreTask(task);
}
/**
* get the directory which has the info file
*
* @return directory holder which has the info file
*/
DirectoryHolder getInfoFileDir() {
return this.directories[this.infoFileDirIndex];
}
/** For Testing * */
// void addToOplogSet(int oplogID, File opFile, DirectoryHolder dirHolder) {
// Oplog oplog = new Oplog(oplogID, this);
// oplog.addRecoveredFile(opFile, dirHolder);
// // @todo check callers to see if they need drf support
// this.oplogSet.add(oplog);
// }
/** For Testing * */
/**
* returns the size of the biggest directory available to the region
*
*/
public long getMaxDirSize() {
return maxDirSize;
}
/**
*
* @return boolean indicating whether the disk region compaction is on or not
*/
boolean isCompactionEnabled() {
return getAutoCompact();
}
public int getCompactionThreshold() {
return this.compactionThreshold;
}
private final boolean isCompactionPossible;
final boolean isCompactionPossible() {
return this.isCompactionPossible;
}
void scheduleCompaction() {
if (isCompactionEnabled() && !isOfflineCompacting()) {
this.oplogCompactor.scheduleIfNeeded(getOplogToBeCompacted());
}
}
/**
* All the oplogs except the current one are destroyed.
*
* @param rvv
* if not null, clear the region using a version vector Clearing with
* a version vector only removes entries less than the version
* vector, which allows for a consistent clear across members.
*/
private void basicClear(LocalRegion region, DiskRegion dr,
RegionVersionVector rvv) {
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().beforeDiskClear();
}
if (region != null) {
clearAsyncQueue(region, false, rvv);
// to fix bug 41770 need to wait for async flusher thread to finish
// any work it is currently doing since it might be doing an operation on
// this region.
// If I call forceFlush here I might wait forever since I hold the
// writelock
// this preventing the async flush from finishing.
// Can I set some state that will cause the flusher to ignore records
// it currently has in it's hand for region?
// Bug 41770 is caused by us doing a regionMap.clear at the end of this
// method.
// That causes any entry mod for this region that the async flusher has a
// ref to
// to end up being written as a create. We then end up writing another
// create
// since the first create is not in the actual region map.
clearDrainList(region, rvv);
}
if (rvv == null) {
// if we have an RVV, the stats are updated by AbstractRegionMap.clear
// removing each entry.
dr.statsClear(region);
}
if (dr.isBackup()) {
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.clear(dr, rvv);
} else if (rvv == null) {
// For an RVV based clear on an overflow region, freeing entries is
// handled in
// AbstractRegionMap.clear
dr.freeAllEntriesOnDisk(region);
}
}
/**
* Removes anything found in the async queue for the given region
*
* @param rvv
*/
private void clearAsyncQueue(LocalRegion region, boolean needsWriteLock,
RegionVersionVector rvv) {
DiskRegion dr = region.getDiskRegion();
if (needsWriteLock) {
acquireWriteLock(dr);
}
try {
// Now while holding the write lock remove any elements from the queue
// for this region.
Iterator it = this.asyncQueue.iterator();
while (it.hasNext()) {
Object o = it.next();
if (o instanceof AsyncDiskEntry) {
AsyncDiskEntry ade = (AsyncDiskEntry) o;
if (shouldClear(region, rvv, ade)) {
rmAsyncItem(o);
}
}
}
} finally {
if (needsWriteLock) {
releaseWriteLock(dr);
}
}
}
/**
* Obtained and held by clear/destroyRegion/close. Also obtained when adding
* to async queue.
*/
private final Object lock = new Object();
/**
* It invokes appropriate methods of super & current class to clear the
* Oplogs.
*
* @param rvv
* if not null, clear the region using the version vector
*/
void clear(LocalRegion region, DiskRegion dr, RegionVersionVector rvv) {
acquireCompactorWriteLock();
// get lock on sizeGuard first to avoid deadlock that occurred in bug #46133
final ReentrantLock regionLock = region != null ? region.getSizeGuard()
: null;
if (regionLock != null) {
regionLock.lock();
}
try {
synchronized (this.lock) {
// if (this.oplogCompactor != null) {
// this.oplogCompactor.stopCompactor();
// }
acquireWriteLock(dr);
try {
if (dr.isRegionClosed()) {
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
basicClear(region, dr, rvv);
if (rvv == null && region != null) {
// If we have no RVV, clear the region under lock
region.txClearRegion();
region.clearEntries(null);
dr.incClearCount();
}
} finally {
releaseWriteLock(dr);
}
// if (this.oplogCompactor != null) {
// this.oplogCompactor.startCompactor();
// scheduleCompaction();
// }
}
} finally {
if (regionLock != null) {
regionLock.unlock();
}
releaseCompactorWriteLock();
}
if (rvv != null && region != null) {
// If we have an RVV, we need to clear the region
// without holding a lock.
region.txClearRegion();
region.clearEntries(rvv);
// Note, do not increment the clear count in this case.
}
}
private void releaseCompactorWriteLock() {
compactorWriteLock.unlock();
}
private void acquireCompactorWriteLock() {
compactorWriteLock.lock();
}
public void releaseCompactorReadLock() {
compactorReadLock.unlock();
}
public void acquireCompactorReadLock() {
compactorReadLock.lock();
}
private boolean closing = false;
private boolean closed = false;
boolean isClosing() {
return this.closing;
}
boolean isClosed() {
return this.closed;
}
void close() {
close(false);
}
void close(boolean destroy) {
this.closing = true;
RuntimeException rte = null;
try {
// logger.info(LocalizedStrings.DEBUG, "DEBUG DiskStore close");
// at this point all regions should already be closed
try {
closeCompactor(false);
} catch (RuntimeException e) {
rte = e;
}
if (!isOffline()) {
try {
// do this before write lock
stopAsyncFlusher();
} catch (RuntimeException e) {
if (rte != null) {
rte = e;
}
}
}
// Wakeup any threads waiting for the asnyc disk store recovery.
synchronized (currentAsyncValueRecoveryMap) {
currentAsyncValueRecoveryMap.notifyAll();
}
// don't block the shutdown hook
if (Thread.currentThread() != InternalDistributedSystem.shutdownHook) {
waitForBackgroundTasks();
}
try {
overflowOplogs.closeOverflow();
} catch (RuntimeException e) {
if (rte != null) {
rte = e;
}
}
if ((!destroy && getDiskInitFile().hasLiveRegions()) || isValidating()) {
RuntimeException exception = persistentOplogs.close();
if(exception != null && rte != null) {
rte = exception;
}
getDiskInitFile().close();
} else {
try {
destroyAllOplogs();
} catch (RuntimeException e) {
if (rte != null) {
rte = e;
}
}
getDiskInitFile().close();
}
try {
statsClose();
} catch (RuntimeException e) {
if (rte != null) {
rte = e;
}
}
closeLockFile();
if (rte != null) {
throw rte;
}
stopDiskStoreTaskPool();
} finally {
this.closed = true;
}
}
boolean allowKrfCreation() {
// Compactor might be stopped by cache-close. In that case, we should not create krf
return this.oplogCompactor == null || this.oplogCompactor.keepCompactorRunning();
}
void closeCompactor(boolean isPrepare) {
if (this.oplogCompactor == null) {
return;
}
if (isPrepare) {
acquireCompactorWriteLock();
}
try {
synchronized (this.lock) {
// final boolean orig =
// this.oplogCompactor.compactionCompletionRequired;
try {
// to fix bug 40473 don't wait for the compactor to complete.
// this.oplogCompactor.compactionCompletionRequired = true;
this.oplogCompactor.stopCompactor();
} catch (CancelException ignore) {
// Asif:To fix Bug 39380 , ignore the cache closed exception here.
// allow it to call super .close so that it would be able to close
// the
// oplogs
// Though I do not think this exception will be thrown by
// the stopCompactor. Still not taking chance and ignoring it
} catch (RuntimeException e) {
if (logger.warningEnabled()) {
logger
.warning(
LocalizedStrings.DiskRegion_COMPLEXDISKREGION_CLOSE_EXCEPTION_IN_STOPPING_COMPACTOR,
e);
}
throw e;
// } finally {
// this.oplogCompactor.compactionCompletionRequired = orig;
}
}
} finally {
if (isPrepare) {
releaseCompactorWriteLock();
}
}
}
private void basicClose(LocalRegion region, DiskRegion dr, boolean closeDataOnly) {
if (dr.isBackup()) {
if (region != null) {
region.closeEntries();
}
// logger.info(LocalizedStrings.DEBUG, "DEBUG basicClose dr=" +
// dr.getName() + " id=" + dr.getId());
if(!closeDataOnly) {
getDiskInitFile().closeRegion(dr);
}
// call close(dr) on each oplog
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.basicClose(dr);
} else {
if (region != null) {
// OVERFLOW ONLY
clearAsyncQueue(region, true, null); // no need to try to write these to
// disk any longer
dr.freeAllEntriesOnDisk(region);
region.closeEntries();
this.overflowMap.remove(dr);
}
}
}
/**
* Called before LocalRegion clears the contents of its entries map
*/
void prepareForClose(LocalRegion region, DiskRegion dr) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG prepareForClose dr=" +
// dr.getName());
if (dr.isBackup()) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG prepareForClose dr=" +
// dr.getName());
// Need to flush any async ops done on dr.
// The easiest way to do this is to flush the entire async queue.
forceFlush();
}
}
public void prepareForClose() {
forceFlush();
persistentOplogs.prepareForClose();
closeCompactor(true);
}
private void onClose() {
InternalResourceManager irm = this.cache.getResourceManager(false);
if (irm != null) {
irm.removeResourceListener(ResourceType.HEAP_MEMORY, this);
}
this.cache.removeDiskStore(this);
}
void close(LocalRegion region, DiskRegion dr, boolean closeDataOnly) {
// CancelCriterion stopper = dr.getOwner().getCancelCriterion();
if (logger.fineEnabled()) {
logger
.fine("DiskRegion::close:Attempting to close DiskRegion. Region name ="
+ dr.getName());
}
boolean closeDiskStore = false;
acquireCompactorWriteLock();
// Fix for 46284 - we must obtain the size guard lock before getting the
// disk store lock
final ReentrantLock regionLock = region != null ? region.getSizeGuard()
: null;
if (regionLock != null) {
regionLock.lock();
}
try {
synchronized (this.lock) {
// Fix 45104, wait here for addAsyncItem to finish adding into queue
// prepareForClose() should be out of synchronized (this.lock) to avoid deadlock
if (dr.isRegionClosed()) {
return;
}
}
prepareForClose(region, dr);
synchronized (this.lock) {
boolean gotLock = false;
try {
acquireWriteLock(dr);
if(!closeDataOnly) {
dr.setRegionClosed(true);
}
gotLock = true;
} catch (CancelException e) {
synchronized (this.closeRegionGuard) {
if (!dr.isRegionClosed()) {
if(!closeDataOnly) {
dr.setRegionClosed(true);
}
// Asif: I am quite sure that it should also be Ok if instead
// while it is a If Check below. Because if acquireReadLock
// thread
// has acquired thelock, it is bound to see the isRegionClose as
// true
// and so will realse teh lock causing decrement to zeo , before
// releasing the closeRegionGuard. But still...not to take any
// chance
while (this.entryOpsCount.get() > 0) {
try {
this.closeRegionGuard.wait(20000);
} catch (InterruptedException ie) {
// Exit without closing the region, do not know what else
// can be done
Thread.currentThread().interrupt();
dr.setRegionClosed(false);
return;
}
}
} else {
return;
}
}
}
try {
if (logger.fineEnabled()) {
logger
.fine("DiskRegion::close:Before invoking basic Close. Region name ="
+ dr.getName());
}
basicClose(region, dr, closeDataOnly);
} finally {
if (gotLock) {
releaseWriteLock(dr);
}
}
}
if (getOwnedByRegion() && !closeDataOnly) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG: ds=" + getName()
// + "close ownCount=" + getOwnCount(), new RuntimeException("STACK"));
if (this.ownCount.decrementAndGet() <= 0) {
closeDiskStore = true;
}
}
} finally {
if (regionLock != null) {
regionLock.unlock();
}
releaseCompactorWriteLock();
}
// Fix for 44538 - close the disk store without holding
// the compactor write lock.
if (closeDiskStore) {
onClose();
close();
}
}
/**
* stops the compactor outside the write lock. Once stopped then it proceeds
* to destroy the current & old oplogs
*
* @param dr
*/
void beginDestroyRegion(LocalRegion region, DiskRegion dr) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG beginDestroyRegion dr=" +
// dr.getName());
if (dr.isBackup()) {
getDiskInitFile().beginDestroyRegion(dr);
}
}
private final AtomicInteger backgroundTasks = new AtomicInteger();
int incBackgroundTasks() {
getCache().getCachePerfStats().incDiskTasksWaiting();
int v = this.backgroundTasks.incrementAndGet();
// logger.info(LocalizedStrings.DEBUG, "DEBUG: incBackgroundTasks " + v, new
// Exception());
return v;
}
void decBackgroundTasks() {
int v = this.backgroundTasks.decrementAndGet();
// logger.info(LocalizedStrings.DEBUG, "DEBUG: decBackgroundTasks " + v, new
// Exception());
if (v == 0) {
synchronized (this.backgroundTasks) {
this.backgroundTasks.notifyAll();
}
}
getCache().getCachePerfStats().decDiskTasksWaiting();
}
public void waitForBackgroundTasks() {
if (isBackgroundTaskThread()) {
return; // fixes bug 42775
}
if (this.backgroundTasks.get() > 0) {
boolean interrupted = Thread.interrupted();
try {
synchronized (this.backgroundTasks) {
while (this.backgroundTasks.get() > 0) {
try {
this.backgroundTasks.wait(500L);
} catch (InterruptedException ex) {
interrupted = true;
}
}
}
} finally {
if (interrupted) {
Thread.currentThread().interrupt();
}
}
}
}
boolean basicForceCompaction(DiskRegion dr) {
PersistentOplogSet oplogSet = persistentOplogs;
// see if the current active oplog is compactable; if so
{
Oplog active = oplogSet.getChild();
if (active != null) {
if (active.hadLiveEntries() && active.needsCompaction()) {
active.forceRolling(dr, false);
}
}
}
//Compact the oplogs
CompactableOplog[] oplogs = getOplogsToBeCompacted(true/* fixes 41143 */);
// schedule a compaction if at this point there are oplogs to be compacted
if (oplogs != null) {
if (this.oplogCompactor != null) {
if (this.oplogCompactor.scheduleIfNeeded(oplogs)) {
this.oplogCompactor.waitForRunToComplete();
} else {
oplogs = null;
// logger.info(LocalizedStrings.DEBUG, "DEBUG: todo ");
// @todo darrel: still need to schedule oplogs and wait for them to
// compact.
}
}
}
return oplogs != null;
}
/**
* Destroy the given region
*/
private void basicDestroy(LocalRegion region, DiskRegion dr) {
if (dr.isBackup()) {
if (region != null) {
region.closeEntries();
}
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.basicDestroy(dr);
} else {
dr.freeAllEntriesOnDisk(region);
if (region != null) {
region.closeEntries();
}
}
}
/**
* Destroy all the oplogs
*
*/
private void destroyAllOplogs() {
persistentOplogs.destroyAllOplogs();
// Need to also remove all oplogs that logically belong to this DiskStore
// even if we were not using them.
{ // delete all overflow oplog files
FilenameFilter overflowFileFilter = new DiskStoreFilter(OplogType.OVERFLOW, true,
getName());
deleteFiles(overflowFileFilter);
}
{ // delete all backup oplog files
FilenameFilter backupFileFilter = new DiskStoreFilter(OplogType.BACKUP, true,
getName());
deleteFiles(backupFileFilter);
}
}
private void deleteFiles(FilenameFilter overflowFileFilter) {
for (int i = 0; i < this.directories.length; i++) {
File dir = this.directories[i].getDir();
File[] files = FileUtil.listFiles(dir, overflowFileFilter);
for (File file : files) {
boolean deleted = file.delete();
if (!deleted && file.exists()) {
if (logger.fineEnabled()) {
logger.fine("Could not delete file " + file);
}
}
}
}
}
public void destroy() {
Set liveRegions = new TreeSet();
for(AbstractDiskRegion dr : getDiskRegions()) {
liveRegions.add(dr.getName());
}
for(AbstractDiskRegion dr : overflowMap) {
liveRegions.add(dr.getName());
}
if(!liveRegions.isEmpty()) {
throw new IllegalStateException("Disk store is currently in use by these regions " + liveRegions);
}
close(true);
getDiskInitFile().destroy();
onClose();
}
/**
* gets the available oplogs to be compacted from the LinkedHashMap
*
* @return Oplog[] returns the array of oplogs to be compacted if present else
* returns null
*/
CompactableOplog[] getOplogToBeCompacted() {
return getOplogsToBeCompacted(false);
}
/**
* Test hook to see how many oplogs are available for compaction
*/
public int numCompactableOplogs() {
CompactableOplog[] oplogs = getOplogsToBeCompacted(true);
if (oplogs == null) {
return 0;
} else {
return oplogs.length;
}
}
private CompactableOplog[] getOplogsToBeCompacted(boolean all) {
ArrayList l = new ArrayList();
// logger.info(LocalizedStrings.DEBUG, "DEBUG getOplogToBeCompacted=" +
// this.oplogIdToOplog);
int max = Integer.MAX_VALUE;
// logger.info(LocalizedStrings.DEBUG, "DEBUG: num=" + num);
if (!all && max > MAX_OPLOGS_PER_COMPACTION
&& MAX_OPLOGS_PER_COMPACTION > 0) {
max = MAX_OPLOGS_PER_COMPACTION;
}
persistentOplogs.getCompactableOplogs(l, max);
// Note this always puts overflow oplogs on the end of the list.
// They may get starved.
overflowOplogs.getCompactableOplogs(l, max);
if(l.isEmpty()) {
return null;
}
return l.toArray(new CompactableOplog[0]);
}
/**
* Returns the dir name used to back up this DiskStore's directories under.
* The name is a concatenation of the disk store name and id.
*/
public String getBackupDirName() {
String name = getName();
if(name == null) {
name = GemFireCacheImpl.DEFAULT_DS_NAME;
}
return (name + "_" + getDiskStoreID().toString());
}
/**
* Filters and returns the current set of oplogs that aren't already in the
* baseline for incremental backup
*
* @param baselineInspector
* the inspector for the previous backup.
* @param baselineCopyMap
* this will be populated with baseline oplogs Files that will be
* used in the restore script.
* @return an map of Oplogs to be copied for an incremental backup. The map is from
* the oplog to the set of files that still need to be backed up for that oplog
* @throws IOException
*/
private Map> filterBaselineOplogs(BackupInspector baselineInspector,
Map baselineCopyMap) throws IOException {
File baselineDir = new File(baselineInspector.getBackupDir(),
BackupManager.DATA_STORES);
baselineDir = new File(baselineDir, getBackupDirName());
// Find all of the member's diskstore oplogs in the member's baseline
// diskstore directory structure (*.crf,*.krf,*.drf)
List baselineOplogFiles = FileUtil.findAll(baselineDir,
".*\\.(idx)?[kdc]rf$");
// Our list of oplogs to copy (those not already in the baseline)
Map> oplogList = new LinkedHashMap>();
// Total list of member oplogs
Map> allOplogs = getAllOplogsForBackup();
/*
* Loop through operation logs and see if they are already part of the
* baseline backup.
*/
for (Map.Entry> entry: allOplogs.entrySet()) {
Oplog log = entry.getKey();
Set filesNeedingBackup = entry.getValue();
// See if they are backed up in the current baseline
Map oplogMap = log.mapBaseline(baselineOplogFiles, filesNeedingBackup);
// No? Then see if they were backed up in previous baselines
if (!filesNeedingBackup.isEmpty() && baselineInspector.isIncremental()) {
Set matchingOplogs = log
.gatherMatchingOplogFiles(baselineInspector
.getIncrementalOplogFileNames(), filesNeedingBackup);
if (!matchingOplogs.isEmpty()) {
for (String matchingOplog : matchingOplogs) {
oplogMap.put(
new File(baselineInspector
.getCopyFromForOplogFile(matchingOplog)), new File(
baselineInspector.getCopyToForOplogFile(matchingOplog)));
}
}
}
if (!filesNeedingBackup.isEmpty()) {
/*
* These are fresh operation log files so lets back them up.
*/
oplogList.put(log, filesNeedingBackup);
}
/*
* These have been backed up before so lets just add their entries from
* the previous backup or restore script into the current one.
*/
baselineCopyMap.putAll(oplogMap);
}
return oplogList;
}
/**
* Get all of the oplogs
*/
private Map> getAllOplogsForBackup() {
Oplog[] oplogs = persistentOplogs.getAllOplogs();
Map> results = new LinkedHashMap>();
for(Oplog oplog: oplogs) {
results.put(oplog, oplog.getAllFiles());
}
return results;
}
// @todo perhaps a better thing for the tests would be to give them a listener
// hook that notifies them every time an oplog is created.
/**
* Wait before executing an async disk task (e.g. compaction, krf creation).
* Returns true if {@link #endAsyncDiskTask()} should be invoked and false
* otherwise.
* Used by tests to confirm stat size.
*
*/
final boolean waitBeforeAsyncDiskTask() {
if (isOffline()) {
return false;
}
final GemFireCacheImpl.StaticSystemCallbacks sysCb = GemFireCacheImpl
.getInternalProductCallbacks();
if (sysCb != null) {
final long waitMillis = 500L;
while (!sysCb.waitBeforeAsyncDiskTask(waitMillis, this)) {
if (DiskStoreImpl.this.isClosing()) {
// break early if disk store is closing
return false;
}
}
if (logger != null && logger.fineEnabled()) {
logger.fine("Proceeding after waiting for basic "
+ "system initialization to complete");
}
return true;
}
return false;
}
final void endAsyncDiskTask() {
if (isOffline()) {
return;
}
final GemFireCacheImpl.StaticSystemCallbacks sysCb = GemFireCacheImpl
.getInternalProductCallbacks();
if (sysCb != null) {
sysCb.endAsyncDiskTask(this);
}
}
// @todo perhaps a better thing for the tests would be to give them a listener
// hook that notifies them every time an oplog is created.
/**
* Used by tests to confirm stat size.
*/
final AtomicLong undeletedOplogSize = new AtomicLong();
/**
* Compacts oplogs
*
* @author Mitul Bid
* @author Asif
* @since 5.1
*
*/
class OplogCompactor implements Runnable {
/** boolean for the thread to continue compaction* */
private volatile boolean compactorEnabled;
private volatile boolean scheduled;
private CompactableOplog[] scheduledOplogs;
/**
* used to keep track of the Thread currently invoking run on this compactor
*/
private volatile Thread me;
// private LogWriterI18n logger = null;
// Boolean which decides if the compactor can terminate early i.e midway
// between compaction.
// If this boolean is true ,( default is false), then the compactor thread
// if entered the
// compaction phase will exit only after it has compacted the oplogs & also
// deleted the compacted
// oplogs
private final boolean compactionCompletionRequired;
OplogCompactor() {
this.compactionCompletionRequired = sysProps.getBoolean(
COMPLETE_COMPACTION_BEFORE_TERMINATION_PROPERTY_BASE_NAME, false);
}
/**
* Creates a new compactor and starts a new thread
*
* private OplogCompactor() { logger =
* DiskRegion.this.owner.getCache().getLogger(); }
*/
/** Creates a new thread and starts the thread* */
private void startCompactor() {
this.compactorEnabled = true;
}
/**
* Stops the thread from compaction and the compactor thread joins with the
* calling thread
*/
private void stopCompactor() {
synchronized(this) {
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().beforeStoppingCompactor();
}
this.compactorEnabled = false;
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().afterSignallingCompactor();
}
}
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().afterStoppingCompactor();
}
}
/**
* @return true if compaction done; false if it was not
*/
private synchronized boolean scheduleIfNeeded(CompactableOplog[] opLogs) {
if (!this.scheduled) {
return schedule(opLogs);
} else {
return false;
}
}
/**
* @return true if compaction done; false if it was not
*/
private synchronized boolean schedule(CompactableOplog[] opLogs) {
assert !this.scheduled;
if (!this.compactorEnabled)
return false;
if (opLogs != null) {
for (int i = 0; i < opLogs.length; i++) {
// logger.info(LocalizedStrings.DEBUG,
// "schedule oplog#" + opLogs[i].getOplogId(),
// new RuntimeException("STACK"));
opLogs[i].prepareForCompact();
}
this.scheduled = true;
this.scheduledOplogs = opLogs;
boolean result = executeDiskStoreTask(this);
if (!result) {
reschedule(false, new CompactableOplog[0]);
return false;
} else {
return true;
}
} else {
return false;
}
}
/**
* A non-backup just needs values that are written to one of the oplogs
* being compacted that are still alive (have not been deleted or modified
* in a future oplog) to be copied forward to the current active oplog
*/
private boolean compact() {
int totalCount = 0;
long compactionStart = 0;
long start = 0;
// wait for basic GemFireXD initialization to complete first
boolean signalEnd = waitBeforeAsyncDiskTask();
try {
// return if diskstore is closing
if (DiskStoreImpl.this.isClosing()) {
// pretend success; higher level will check for this anyway
// and end at some point
return true;
}
CompactableOplog[] oplogs = this.scheduledOplogs;
// continue if nothing to be compacted
if (oplogs.length == 0) {
return true;
}
compactionStart = getStats().startCompaction();
start = NanoTimer.getTime();
// logger.info(LocalizedStrings.DEBUG, "DEBUG keepCompactorRunning="
// + keepCompactorRunning());
for (int i = 0; i < oplogs.length && keepCompactorRunning() /*
* @todo &&
* !owner.
* isDestroyed
*/; i++) {
int compacted = oplogs[i].compact(this);
totalCount += compacted;
if (DiskStoreImpl.this.testoplogcompact != null) {
if (compacted > 0 && (oplogs[i] == DiskStoreImpl.this.testoplogcompact)) {
DiskStoreImpl.this.testOplogCompacted = true;
}
}
}
// TODO:Asif : DiskRegion: How do we tackle
} finally {
if (compactionStart != 0) {
getStats().endCompaction(compactionStart);
}
if (signalEnd) {
endAsyncDiskTask();
}
}
long endTime = NanoTimer.getTime();
logger.info(LocalizedStrings.DiskRegion_COMPACTION_SUMMARY, new Object[] {
totalCount, ((endTime - start) / 1000000) });
return true /* @todo !owner.isDestroyed */;
}
private boolean isClosing() {
if (getCache().isClosed()) {
return true;
}
CancelCriterion stopper = getCache().getCancelCriterion();
if (stopper.cancelInProgress() != null) {
return true;
}
return false;
}
/**
* Just do compaction and then check to see if another needs to be done and
* if so schedule it. Asif:The compactor thread checks for an oplog in the
* LinkedHasMap in a synchronization on the oplogIdToOplog object. This will
* ensure that an addition of an Oplog to the Map does not get missed.
* Notifications need not be sent if the thread is already compaction
*/
public void run() {
getCache().getCachePerfStats().decDiskTasksWaiting();
if (!this.scheduled)
return;
boolean compactedSuccessfully = false;
try {
SystemFailure.checkFailure();
if (isClosing()) {
return;
}
if (!this.compactorEnabled)
return;
final CompactableOplog[] oplogs = this.scheduledOplogs;
this.me = Thread.currentThread();
try {
// set our thread's name
String tName = "OplogCompactor " + getName() + " for oplog "
+ oplogs[0].toString();
Thread.currentThread().setName(tName);
StringBuilder buffer = new StringBuilder();
for (int j = 0; j < oplogs.length; ++j) {
buffer.append(oplogs[j].toString());
if (j + 1 < oplogs.length) {
buffer.append(", ");
}
}
String ids = buffer.toString();
logger.info(LocalizedStrings.DiskRegion_COMPACTION_OPLOGIDS,
new Object[] { getName(), ids });
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().beforeGoingToCompact();
}
compactedSuccessfully = compact();
if (compactedSuccessfully) {
// logger.info(LocalizedStrings.DiskRegion_COMPACTION_SUCCESS,
// new Object[] {getName(), ids});
if (LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER) {
CacheObserverHolder.getInstance().afterHavingCompacted();
}
} else {
logger.warning(LocalizedStrings.DiskRegion_COMPACTION_FAILURE,
new Object[] { getName(), ids });
}
} catch (DiskAccessException dae) {
handleDiskAccessException(dae, true);
throw dae;
} catch (KillCompactorException ex) {
if (logger.fineEnabled()) {
logger.fine("compactor thread terminated by test");
}
throw ex;
} finally {
if (compactedSuccessfully) {
this.me.setName("Idle OplogCompactor");
}
this.me = null;
}
} catch (CancelException ignore) {
// if cache is closed, just about the compaction
}
finally {
reschedule(compactedSuccessfully, scheduledOplogs);
}
}
synchronized void waitForRunToComplete() {
if (this.me == Thread.currentThread()) {
// no need to wait since we are the compactor to fix bug 40630
return;
}
while (this.scheduled) {
try {
wait();
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
}
}
private synchronized void reschedule(boolean success, CompactableOplog[] previousList) {
this.scheduled = false;
this.scheduledOplogs = null;
notifyAll();
if (!success)
return;
if (!this.compactorEnabled)
return;
if (isClosing())
return;
SystemFailure.checkFailure();
//TODO griddb - is sync this necessary? For what?
//synchronized (DiskStoreImpl.this.oplogIdToOplog) {
if (this.compactorEnabled) {
if (isCompactionEnabled()) {
CompactableOplog[] newList = getOplogToBeCompacted();
if(Arrays.equals(newList, previousList)) {
//If the list of oplogs to be compacted didn't change,
//don't loop and compact again.
logger.warning(LocalizedStrings.DiskStoreImpl_PREVENTING_COMPACTION_LOOP, Arrays.asList(newList));
return;
}
schedule(newList);
}
}
//}
}
boolean keepCompactorRunning() {
return this.compactorEnabled || this.compactionCompletionRequired;
}
}
// for test. test needs to wait after successful compaction happened
// for the given oplog set via this setter method below.
private Oplog testoplogcompact = null;
public void TEST_oplogCompact(Oplog oplog) {
this.testoplogcompact = oplog;
}
private boolean testOplogCompacted;
public boolean isTestOplogCompacted() {
return this.testOplogCompacted;
}
/**
* Used by unit tests to kill the compactor operation.
*/
public static class KillCompactorException extends RuntimeException {
}
public DiskInitFile getDiskInitFile() {
return this.initFile;
}
public void memberOffline(DiskRegionView dr, PersistentMemberID persistentID) {
if (this.initFile != null) {
this.initFile.addOfflinePMID(dr, persistentID);
}
}
public void memberOfflineAndEqual(DiskRegionView dr,
PersistentMemberID persistentID) {
if (this.initFile != null) {
this.initFile.addOfflineAndEqualPMID(dr, persistentID);
}
}
public void memberOnline(DiskRegionView dr, PersistentMemberID persistentID) {
if (this.initFile != null) {
this.initFile.addOnlinePMID(dr, persistentID);
}
}
public void memberRemoved(DiskRegionView dr, PersistentMemberID persistentID) {
if (this.initFile != null) {
this.initFile.rmPMID(dr, persistentID);
}
}
public void memberRevoked(PersistentMemberPattern revokedPattern) {
if (this.initFile != null) {
this.initFile.revokeMember(revokedPattern);
}
}
public void setInitializing(DiskRegionView dr, PersistentMemberID newId) {
if (this.initFile != null) {
this.initFile.addMyInitializingPMID(dr, newId);
}
}
public void setInitialized(DiskRegionView dr) {
if (this.initFile != null) {
this.initFile.markInitialized(dr);
}
}
public Set getRevokedMembers() {
if (this.initFile != null) {
return this.initFile.getRevokedIDs();
}
return Collections.emptySet();
}
public void endDestroyRegion(LocalRegion region, DiskRegion dr) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG endDestroyRegion dr=" +
// dr.getName());
// CancelCriterion stopper = dr.getOwner().getCancelCriterion();
// Fix for 46284 - we must obtain the size guard lock before getting the
// disk store lock
final ReentrantLock regionLock = region != null ? region.getSizeGuard()
: null;
if (regionLock != null) {
regionLock.lock();
}
try {
synchronized (this.lock) {
if (dr.isRegionClosed()) {
return;
}
// // Stop the compactor if running, without taking lock.
// if (this.oplogCompactor != null) {
// try {
// this.oplogCompactor.stopCompactor();
// }
// catch (CancelException ignore) {
// // Asif:To fix Bug 39380 , ignore the cache closed exception here.
// // allow it to call super .close so that it would be able to close
// the
// // oplogs
// // Though I do not think this exception will be thrown by
// // the stopCompactor. Still not taking chance and ignoring it
// }
// }
// // if (!isSync()) {
// stopAsyncFlusher(true); // do this before writeLock
// // }
boolean gotLock = false;
try {
try {
acquireWriteLock(dr);
gotLock = true;
} catch (CancelException e) {
// see workaround below.
}
if (!gotLock) { // workaround for bug39380
// Allow only one thread to proceed
synchronized (this.closeRegionGuard) {
if (dr.isRegionClosed()) {
return;
}
dr.setRegionClosed(true);
// Asif: I am quite sure that it should also be Ok if instead
// while it is a If Check below. Because if acquireReadLock thread
// has acquired the lock, it is bound to see the isRegionClose as
// true
// and so will release the lock causing decrement to zeo , before
// releasing the closeRegionGuard. But still...not to take any
// chance
final int loopCount = 10;
for (int i = 0; i < loopCount; i++) {
if (this.entryOpsCount.get() == 0) {
break;
}
boolean interrupted = Thread.interrupted();
try {
this.closeRegionGuard.wait(1000);
} catch (InterruptedException ie) {
interrupted = true;
} finally {
if (interrupted) {
Thread.currentThread().interrupt();
}
}
} // for
if (this.entryOpsCount.get() > 0) {
logger
.warning(
LocalizedStrings.DisKRegion_OUTSTANDING_OPS_REMAIN_AFTER_0_SECONDS_FOR_DISK_REGION_1,
new Object[] { Integer.valueOf(loopCount), dr.getName() });
for (;;) {
if (this.entryOpsCount.get() == 0) {
break;
}
boolean interrupted = Thread.interrupted();
try {
this.closeRegionGuard.wait(1000);
} catch (InterruptedException ie) {
interrupted = true;
} finally {
if (interrupted) {
Thread.currentThread().interrupt();
}
}
} // for
logger
.info(
LocalizedStrings.DisKRegion_OUTSTANDING_OPS_CLEARED_FOR_DISK_REGION_0,
dr.getName());
}
} // synchronized
}
dr.setRegionClosed(true);
basicDestroy(region, dr);
} finally {
if (gotLock) {
releaseWriteLock(dr);
}
}
}
} finally {
if (regionLock != null) {
regionLock.unlock();
}
}
if (this.initFile != null && dr.isBackup()) {
this.initFile.endDestroyRegion(dr);
} else {
rmById(dr.getId());
this.overflowMap.remove(dr);
}
if (getOwnedByRegion()) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG: ds=" + getName()
// + "destroy ownCount=" + getOwnCount());
if (this.ownCount.decrementAndGet() <= 0) {
destroy();
}
}
}
public void beginDestroyDataStorage(DiskRegion dr) {
if (this.initFile != null && dr.isBackup()/* fixes bug 41389 */) {
this.initFile.beginDestroyDataStorage(dr);
}
}
public void endDestroyDataStorage(LocalRegion region, DiskRegion dr) {
// logger.info(LocalizedStrings.DEBUG, "DEBUG endPartialDestroyRegion dr=" +
// dr.getName());
try {
clear(region, dr, null);
dr.resetRVV();
dr.setRVVTrusted(false);
dr.writeRVV(null, null); // just persist the empty rvv with trust=false
} catch (RegionDestroyedException rde) {
// ignore a RegionDestroyedException at this stage
}
if (this.initFile != null && dr.isBackup()) {
this.initFile.endDestroyDataStorage(dr);
}
}
public PersistentMemberID generatePersistentID(DiskRegionView dr) {
File firstDir = getInfoFileDir().getDir();
InternalDistributedSystem ids = getCache().getDistributedSystem();
InternalDistributedMember memberId = ids.getDistributionManager()
.getDistributionManagerId();
//NOTE - do NOT use DM.cacheTimeMillis here. See bug #49920
long timestamp = System.currentTimeMillis();
PersistentMemberID id = new PersistentMemberID(getDiskStoreID(), memberId.getIpAddress(),
firstDir.getAbsolutePath(), memberId.getName(),
timestamp, (short) 0);
return id;
}
public PersistentID getPersistentID() {
InetAddress host = cache.getDistributedSystem().getDistributedMember()
.getIpAddress();
String dir = getDiskDirs()[0].getAbsolutePath();
return new PersistentMemberPattern(host, dir, this.diskStoreID.toUUID(), 0);
}
// test hook
public void forceIFCompaction() {
if (this.initFile != null) {
this.initFile.forceCompaction();
}
}
// @todo DiskStore it
/**
* Need a stopper that only triggers if this DiskRegion has been closed. If we
* use the LocalRegion's Stopper then our async writer will not be able to
* finish flushing on a cache close.
*/
private class Stopper extends CancelCriterion {
@Override
public String cancelInProgress() {
if (isClosed()) {
return "The disk store is closed.";
} else {
return null;
}
}
@Override
public RuntimeException generateCancelledException(Throwable e) {
if (isClosed()) {
return new CacheClosedException("The disk store is closed", e);
} else {
return null;
}
}
}
private final CancelCriterion stopper = new Stopper();
public CancelCriterion getCancelCriterion() {
return this.stopper;
}
/**
* Called when we are doing recovery and we find a new id.
*/
void recoverRegionId(long drId) {
long newVal = drId + 1;
if (this.regionIdCtr.get() < newVal) { // fixes bug 41421
this.regionIdCtr.set(newVal);
}
}
/**
* Called when creating a new disk region (not a recovered one).
*/
long generateRegionId() {
long result;
do {
result = this.regionIdCtr.getAndIncrement();
} while (result <= MAX_RESERVED_DRID && result >= MIN_RESERVED_DRID);
return result;
}
/**
* Returns a set of the disk regions that are using this disk store. Note that
* this set is read only and live (its contents may change if the regions
* using this disk store changes).
*/
public Collection getDiskRegions() {
return Collections.unmodifiableCollection(this.drMap.values());
}
/**
* This method is slow and should be optimized if used for anything important.
* At this time it was added to do some internal assertions that have since
* been removed.
*/
public DiskRegion getByName(String name) {
for (DiskRegion dr : getDiskRegions()) {
if (dr.getName().equals(name)) {
return dr;
}
}
return null;
}
void addDiskRegion(DiskRegion dr) {
if (dr.isBackup()) {
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
if(!isOffline()) {
oplogSet.initChild();
}
DiskRegion old = this.drMap.putIfAbsent(dr.getId(), dr);
if (old != null) {
throw new IllegalStateException("DiskRegion already exists with id "
+ dr.getId() + " and name " + old.getName());
}
getDiskInitFile().createRegion(dr);
} else {
this.overflowMap.add(dr);
}
if (getOwnedByRegion()) {
this.ownCount.incrementAndGet();
// logger.info(LocalizedStrings.DEBUG, "DEBUG: ds=" + getName()
// + "addDiskRegion ownCount=" + getOwnCount(), new
// RuntimeException("STACK"));
}
}
void addPersistentPR(String name, PRPersistentConfig config) {
getDiskInitFile().createPersistentPR(name, config);
}
void removePersistentPR(String name) {
if(isClosed() && getOwnedByRegion()) {
//A region owned disk store will destroy
//itself when all buckets are removed, resulting
//in an exception when this method is called.
//Do nothing if the disk store is already
//closed
return;
}
getDiskInitFile().destroyPersistentPR(name);
}
PRPersistentConfig getPersistentPRConfig(String name) {
return getDiskInitFile().getPersistentPR(name);
}
Map getAllPRs() {
return getDiskInitFile().getAllPRs();
}
DiskRegion getById(Long regionId) {
return this.drMap.get(regionId);
}
void rmById(Long regionId) {
this.drMap.remove(regionId);
}
void handleDiskAccessException(final DiskAccessException dae,
final boolean stopBridgeServers) {
boolean causedByRDE = LocalRegion.causedByRDE(dae);
// @todo is it ok for flusher and compactor to call this method if RDE?
// I think they need to keep working (for other regions) in this case.
if (causedByRDE) {
return;
}
// If another thread has already hit a DAE and is cleaning up, do nothing
if (!diskException.compareAndSet(null, dae)) {
return;
}
final ThreadGroup exceptionHandlingGroup = LogWriterImpl.createThreadGroup(
"Disk Store Exception Handling Group", cache.getLoggerI18n());
// Shutdown the regions and bridge servers in another thread, to make sure
// that we don't cause a deadlock because this thread is holding some lock
Thread thread = new Thread(exceptionHandlingGroup,
"Disk store exception handler") {
@Override
public void run() {
// first ask each region to handle the exception.
for (DiskRegion dr : DiskStoreImpl.this.drMap.values()) {
DiskExceptionHandler lr = dr.getExceptionHandler();
lr.handleDiskAccessException(dae, false);
}
// then stop the bridge server if needed
if (stopBridgeServers) {
LogWriterI18n logger = getCache().getLoggerI18n();
logger
.info(LocalizedStrings.LocalRegion_ATTEMPTING_TO_CLOSE_THE_BRIDGESERVERS_TO_INDUCE_FAILOVER_OF_THE_CLIENTS);
try {
getCache().stopServers();
// also close GemFireXD network servers to induce failover (#45651)
final StaticSystemCallbacks sysCb =
GemFireCacheImpl.FactoryStatics.systemCallbacks;
if (sysCb != null) {
sysCb.stopNetworkServers();
}
logger.info(LocalizedStrings
.LocalRegion_BRIDGESERVERS_STOPPED_SUCCESSFULLY);
} catch (Exception e) {
logger.error(LocalizedStrings
.LocalRegion_THE_WAS_A_PROBLEM_IN_STOPPING_BRIDGESERVERS_FAILOVER_OF_CLIENTS_IS_SUSPECT, e);
}
}
logger.error(LocalizedStrings
.LocalRegion_A_DISKACCESSEXCEPTION_HAS_OCCURED_WHILE_WRITING_TO_THE_DISK_FOR_DISKSTORE_0_THE_DISKSTORE_WILL_BE_CLOSED,
DiskStoreImpl.this.getName(), dae);
// then close this disk store
onClose();
close();
}
};
thread.start();
}
private final String name;
private final boolean autoCompact;
private final boolean allowForceCompaction;
private final long maxOplogSizeInBytes;
private final long timeInterval;
private final int queueSize;
private final int writeBufferSize;
private final File[] diskDirs;
private final int[] diskDirSizes;
private final boolean syncWrites;
// DiskStore interface methods
public String getName() {
return this.name;
}
public boolean getAutoCompact() {
return this.autoCompact;
}
public boolean getAllowForceCompaction() {
return this.allowForceCompaction;
}
public long getMaxOplogSize() {
return this.maxOplogSizeInBytes / (1024 * 1024);
}
public long getMaxOplogSizeInBytes() {
return this.maxOplogSizeInBytes;
}
public long getTimeInterval() {
return this.timeInterval;
}
public int getQueueSize() {
return this.queueSize;
}
public int getWriteBufferSize() {
return this.writeBufferSize;
}
public File[] getDiskDirs() {
return this.diskDirs;
}
public int[] getDiskDirSizes() {
return this.diskDirSizes;
}
/**
* {@inheritDoc}
*/
@Override
public final boolean getSyncWrites() {
return this.syncWrites;
}
// public String toString() {
// StringBuffer sb = new StringBuffer();
// sb.append("<");
// sb.append(getName());
// if (getOwnedByRegion()) {
// sb.append(" OWNED_BY_REGION");
// }
// sb.append(">");
// return sb.toString();
// }
public static class AsyncDiskEntry {
public final LocalRegion region;
public final DiskEntry de;
public final boolean versionOnly;
public final VersionTag tag;
public AsyncDiskEntry(LocalRegion region, DiskEntry de, VersionTag tag) {
this.region = region;
this.de = de;
this.tag = tag;
this.versionOnly = false;
}
public AsyncDiskEntry(LocalRegion region, VersionTag tag) {
this.region = region;
this.de = null;
this.tag = tag;
this.versionOnly = true;
// if versionOnly, only de.getDiskId() is used for synchronize
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("dr=").append(region.getDiskRegion().getId());
sb.append(" versionOnly=" + this.versionOnly);
if (this.versionOnly) {
sb.append(" versionTag=" + this.tag);
}
if (de != null) {
sb.append(" key=" + de.getKeyCopy());
} else {
sb.append(" ");
}
return sb.toString();
}
}
/**
* Set of OplogEntryIds (longs). Memory is optimized by using an int[] for ids
* in the unsigned int range.
*/
public static class OplogEntryIdSet {
private final TStatelessIntHashSet ints = new TStatelessIntHashSet(
(int) INVALID_ID);
private final TStatelessLongHashSet longs = new TStatelessLongHashSet(
INVALID_ID);
public void add(long id) {
if (id >= 0 && id <= 0x00000000FFFFFFFFL) {
this.ints.add((int) id);
} else {
this.longs.add(id);
}
}
public boolean contains(long id) {
if (id >= 0 && id <= 0x00000000FFFFFFFFL) {
return this.ints.contains((int) id);
} else {
return this.longs.contains(id);
}
}
public int size() {
return this.ints.size() + this.longs.size();
}
public void addAll(OplogEntryIdSet toAdd) {
this.ints.addAll(toAdd.ints.toArray());
this.longs.addAll(toAdd.longs.toArray());
}
}
/**
* Set to true if this diskStore is owned by a single region. This only
* happens in backwardsCompat mode.
*/
private final boolean ownedByRegion;
/**
* Set to the region's {@link InternalRegionArguments} when the diskStore is
* owned by a single region in backwardsCompat mode ({@link #ownedByRegion}
* must be true).
*/
private final InternalRegionArguments internalRegionArgs;
/**
* Number of current owners. Only valid if ownedByRegion is true.
*/
private final AtomicInteger ownCount = new AtomicInteger();
public boolean getOwnedByRegion() {
return this.ownedByRegion;
}
public InternalRegionArguments getInternalRegionArguments() {
return this.internalRegionArgs;
}
public int getOwnCount() {
return this.ownCount.get();
}
private final boolean validating;
boolean isValidating() {
return this.validating;
}
private final boolean offline;
public boolean isOffline() {
return this.offline;
}
public final boolean upgradeVersionOnly;
boolean isUpgradeVersionOnly() {
return this.upgradeVersionOnly
&& Version.GFE_70.compareTo(this.getRecoveredGFVersion()) > 0;
}
boolean isUpgradeVersionOnly(DiskInitFile initFile) {
return this.upgradeVersionOnly
&& Version.GFE_70.compareTo(this.getRecoveredGFVersion(initFile)) > 0;
}
private final boolean offlineCompacting;
boolean isOfflineCompacting() {
return this.offlineCompacting;
}
/**
* Destroy a region which has not been created.
*
* @param regName
* the name of the region to destroy
* @param throwIfNotExists throw an {@link IllegalArgumentException} if the
* given region does not exist, else return false
*/
public boolean destroyRegion(String regName, boolean throwIfNotExists) {
DiskRegionView drv = getDiskInitFile().getDiskRegionByName(regName);
if (drv == null) {
drv = getDiskInitFile().getDiskRegionByPrName(regName);
if (drv == null) {
if (throwIfNotExists) {
throw new IllegalArgumentException(
"The disk store does not contain a region named: " + regName);
}
else {
return false;
}
} else {
getDiskInitFile().destroyPRRegion(regName);
}
} else {
getDiskInitFile().endDestroyRegion(drv);
}
return true;
}
public String modifyRegion(String regName, String lruOption,
String lruActionOption, String lruLimitOption,
String concurrencyLevelOption, String initialCapacityOption,
String loadFactorOption, String compressorClassNameOption,
String statisticsEnabledOption, boolean printToConsole) {
assert isOffline();
DiskRegionView drv = getDiskInitFile().getDiskRegionByName(regName);
if (drv == null) {
drv = getDiskInitFile().getDiskRegionByPrName(regName);
if (drv == null) {
throw new IllegalArgumentException(
"The disk store does not contain a region named: " + regName);
} else {
return getDiskInitFile().modifyPRRegion(regName, lruOption,
lruActionOption, lruLimitOption, concurrencyLevelOption,
initialCapacityOption, loadFactorOption, compressorClassNameOption,
statisticsEnabledOption, printToConsole);
}
} else {
return getDiskInitFile().modifyRegion(drv, lruOption, lruActionOption,
lruLimitOption, concurrencyLevelOption, initialCapacityOption,
loadFactorOption, compressorClassNameOption,
statisticsEnabledOption, printToConsole);
}
}
private void dumpInfo(PrintStream printStream, String regName) {
assert isOffline();
getDiskInitFile().dumpRegionInfo(printStream, regName);
}
private void dumpMetadata(boolean showBuckets) {
assert isOffline();
getDiskInitFile().dumpRegionMetadata(showBuckets);
}
private void exportSnapshot(String name, File out) throws IOException {
// Since we are recovering a disk store, the cast from DiskRegionView -->
// PlaceHolderDiskRegion
// and from RegionEntry --> DiskEntry should be ok.
// In offline mode, we need to schedule the regions to be recovered
// explicitly.
for (DiskRegionView drv : getKnown()) {
scheduleForRecovery((PlaceHolderDiskRegion) drv);
}
recoverRegionsThatAreReady(false);
// coelesce disk regions so that partitioned buckets from a member end up in
// the same file
Map> regions = new HashMap>();
for (DiskRegionView drv : getKnown()) {
PlaceHolderDiskRegion ph = (PlaceHolderDiskRegion) drv;
String regionName = (drv.isBucket() ? ph.getPrName() : drv.getName());
List views = regions.get(regionName);
if (views == null) {
views = new ArrayList();
regions.put(regionName, views);
}
views.add(ph);
}
final ByteArrayDataInput in = new ByteArrayDataInput();
for (Map.Entry> entry : regions
.entrySet()) {
String fname = entry.getKey().substring(1).replace('/', '-');
File f = new File(out, "snapshot-" + name + "-" + fname);
SnapshotWriter writer = GFSnapshot.create(f, entry.getKey());
try {
for (DiskRegionView drv : entry.getValue()) {
// skip regions that have no entries
if (drv.getRecoveredEntryCount() == 0) {
continue;
}
// TODO: [sumedh] for best efficiency this should use equivalent of
// DiskSavyIterator or Oplog.getSortedLiveEntries for recovered
// entry map else random reads will kill performance
Collection entries = drv.getRecoveredEntryMap()
.regionEntries();
for (RegionEntry re : entries) {
Object key = re.getKeyCopy();
// TODO:KIRK:OK Rusty's code was value = de.getValueWithContext(drv);
@Retained @Released Object value = re._getValueRetain(drv, true); // OFFHEAP: passed to SnapshotRecord who copies into byte[]; so for now copy to heap CD
if (Token.isRemoved(value)) {
continue;
}
// some entries may have overflowed to disk
if (value == null && re instanceof DiskEntry) {
DiskEntry de = (DiskEntry) re;
DiskEntry.Helper.recoverValue(de, de.getDiskId().getOplogId(),
((DiskRecoveryStore) drv), in);
// TODO:KIRK:OK Rusty's code was value = de.getValueWithContext(drv);
value = de._getValueRetain(drv, true); // OFFHEAP: passed to SnapshotRecord who copies into byte[]; so for now copy to heap CD
}
try {
writer.snapshotEntry(new SnapshotRecord(key, value));
} finally {
OffHeapHelper.release(value);
}
}
}
} finally {
writer.snapshotComplete();
}
}
}
private void validate() {
assert isValidating();
this.RECOVER_VALUES = false; // save memory @todo should Oplog make sure
// value is deserializable?
this.liveEntryCount = 0;
this.deadRecordCount = 0;
for (DiskRegionView drv : getKnown()) {
scheduleForRecovery(ValidatingDiskRegion.create(this, drv));
}
recoverRegionsThatAreReady(false);
if (getDeadRecordCount() > 0) {
System.out.println("Disk store contains " + getDeadRecordCount()
+ " compactable records.");
}
System.out.println("Total number of region entries in this disk store is: "
+ getLiveEntryCount());
}
private int liveEntryCount;
void incLiveEntryCount(int count) {
this.liveEntryCount += count;
}
public int getLiveEntryCount() {
return this.liveEntryCount;
}
private int deadRecordCount;
void incDeadRecordCount(int count) {
this.deadRecordCount += count;
}
public int getDeadRecordCount() {
return this.deadRecordCount;
}
private void offlineCompact() {
assert isOfflineCompacting();
this.RECOVER_VALUES = false;
this.deadRecordCount = 0;
for (DiskRegionView drv : getKnown()) {
scheduleForRecovery(OfflineCompactionDiskRegion.create(this, drv));
}
persistentOplogs.recoverRegionsThatAreReady(false);
persistentOplogs.offlineCompact();
getDiskInitFile().forceCompaction();
if (this.upgradeVersionOnly) {
System.out.println("Upgrade disk store " + this.name + " to version "
+ getRecoveredGFVersionName() + " finished.");
} else {
if (getDeadRecordCount() == 0) {
System.out
.println("Offline compaction did not find anything to compact.");
} else {
System.out.println("Offline compaction removed " + getDeadRecordCount()
+ " records.");
}
// If we have more than one oplog then the liveEntryCount may not be the
// total
// number of live entries in the disk store. So do not log the live entry
// count
}
}
private final HashMap prlruStatMap = new HashMap();
LRUStatistics getOrCreatePRLRUStats(PlaceHolderDiskRegion dr) {
String prName = dr.getPrName();
LRUStatistics result = null;
synchronized (this.prlruStatMap) {
result = this.prlruStatMap.get(prName);
if (result == null) {
EvictionAttributesImpl ea = dr.getEvictionAttributes();
LRUAlgorithm ec = ea.createEvictionController(null, dr.getEnableOffHeapMemory());
StatisticsFactory sf = cache.getDistributedSystem();
result = ec.getLRUHelper().initStats(dr, sf);
this.prlruStatMap.put(prName, result);
}
}
return result;
}
/**
* If we have recovered a bucket earlier for the given pr then we will have an
* LRUStatistics to return for it. Otherwise return null.
*/
LRUStatistics getPRLRUStats(PartitionedRegion pr) {
String prName = pr.getFullPath();
LRUStatistics result = null;
synchronized (this.prlruStatMap) {
result = this.prlruStatMap.get(prName);
}
return result;
}
/**
* Lock the disk store to prevent updates. This is the first step of the
* backup process. Once all disk stores on all members are locked, we still
* move on to startBackup.
*/
public void lockStoreBeforeBackup() {
// This will prevent any region level operations like
// create/destroy region, and region view changes.
// We might want to consider preventing any entry level
// operations as well. We should at least prevent transactions
// when we support persistent transactions.
//
// When we do start caring about blocking entry
// level operations, we will need to be careful
// to block them *before* they are put in the async
// queue
getDiskInitFile().lockForBackup();
}
/**
* Release the lock that is preventing operations on this disk store during
* the backup process.
*/
public void releaseBackupLock() {
getDiskInitFile().unlockForBackup();
}
/**
* Start the backup process. This is the second step of the backup process. In
* this method, we define the data we're backing up by copying the init file
* and rolling to the next file. After this method returns operations can
* proceed as normal, except that we don't remove oplogs.
*
* @param targetDir
* @param baselineInspector
* @param restoreScript
* @throws IOException
*/
public void startBackup(File targetDir, BackupInspector baselineInspector,
RestoreScript restoreScript) throws IOException {
getDiskInitFile().setBackupThread(Thread.currentThread());
boolean done = false;
try {
for (;;) {
Oplog childOplog = persistentOplogs.getChild();
if (childOplog == null) {
this.diskStoreBackup = new DiskStoreBackup(Collections.EMPTY_MAP, targetDir);
break;
}
//Get an appropriate lock object for each set of oplogs.
Object childLock = childOplog == null ? new Object() : childOplog.lock;;
// TODO - We really should move this lock into the disk store, but
// until then we need to do this magic to make sure we're actually
// locking the latest child for both types of oplogs
//This ensures that all writing to disk is blocked while we are
//creating the snapshot
synchronized (childLock) {
if (persistentOplogs.getChild() != childOplog) {
continue;
}
logger.fine("snapshotting oplogs for disk store " + getName());
// Create the directories for this disk store
for (int i = 0; i < directories.length; i++) {
File dir = getBackupDir(targetDir, i);
if (!FileUtil.mkdirs(dir)) {
throw new IOException("Could not create directory " + dir);
}
restoreScript.addFile(directories[i].getDir(), dir);
}
restoreScript.addExistenceTest(this.initFile.getIFFile());
// Contains all oplogs that will backed up
Map> allOplogs = null;
// Incremental backup so filter out oplogs that have already been
// backed up
if (null != baselineInspector) {
Map baselineCopyMap = new LinkedHashMap();
allOplogs = filterBaselineOplogs(baselineInspector, baselineCopyMap);
restoreScript.addBaselineFiles(baselineCopyMap);
} else {
allOplogs = getAllOplogsForBackup();
}
// mark all oplogs as being backed up. This will
// prevent the oplogs from being deleted
this.diskStoreBackup = new DiskStoreBackup(allOplogs, targetDir);
// copy the init file
File firstDir = getBackupDir(targetDir, infoFileDirIndex);
initFile.copyTo(firstDir);
persistentOplogs.forceRoll(null);
logger.fine("done snaphotting for disk store" + getName());
break;
}
}
done = true;
} finally {
if (!done) {
clearBackup();
}
}
}
private File getBackupDir(File targetDir, int index) {
return new File(targetDir, BACKUP_DIR_PREFIX + index);
}
/**
* Copy the oplogs to the backup directory. This is the final step of the
* backup process. The oplogs we copy are defined in the startBackup method.
*
* @param backupManager
* @throws IOException
*/
public void finishBackup(BackupManager backupManager) throws IOException {
if (diskStoreBackup == null) {
return;
}
try {
//Wait for oplogs to be unpreblown before backing them up.
waitForDelayedWrites();
//Backup all of the oplogs
for (Map.Entry> entry: this.diskStoreBackup.getPendingBackup().entrySet()) {
if (backupManager.isCancelled()) {
break;
}
Oplog oplog = entry.getKey();
Set filesToBackup = entry.getValue();
// Copy theoplog to the destination directory
int index = oplog.getDirectoryHolder().getArrayIndex();
File backupDir = getBackupDir(this.diskStoreBackup.getTargetDir(),
index);
//Backup just the set of files we previously captured
for(File file : filesToBackup) {
FileUtil.copy(file, backupDir);
}
// Allow the oplog to be deleted, and process any pending delete
this.diskStoreBackup.backupFinished(oplog);
}
} finally {
clearBackup();
}
}
private int getArrayIndexOfDirectory(File searchDir) {
for(DirectoryHolder holder : directories) {
if(holder.getDir().equals(searchDir)) {
return holder.getArrayIndex();
}
}
return 0;
}
public DirectoryHolder[] getDirectoryHolders(){
return this.directories;
}
private void clearBackup() {
DiskStoreBackup backup = this.diskStoreBackup;
if (backup != null) {
this.diskStoreBackup = null;
backup.cleanup();
}
}
public DiskStoreBackup getInProgressBackup() {
return diskStoreBackup;
}
private void createRegionsForValidation(String name) {
// first create any parent regions as non-persistent with local scope.
// then create the last region itself as PERSISTENT_REPLICATE with local
// scope
}
protected Collection getKnown() {
return this.initFile.getKnown();
}
private static DiskStoreImpl createForOffline(String dsName, File[] dsDirs)
throws Exception {
return createForOffline(dsName, dsDirs, false, false,
false/* upgradeVersionOnly */, 0, true);
}
private static DiskStoreImpl createForOffline(String dsName, File[] dsDirs,
boolean needsOplogs) throws Exception {
return createForOffline(dsName, dsDirs, false, false,
false/* upgradeVersionOnly */, 0, needsOplogs);
}
private static DiskStoreImpl createForOfflineValidate(String dsName,
File[] dsDirs) throws Exception {
return createForOffline(dsName, dsDirs, false, true,
false/* upgradeVersionOnly */, 0, true);
}
protected static Cache offlineCache = null;
protected static DistributedSystem offlineDS = null;
private final boolean persistIndexes;
private final HashSet recoveredIndexIds;
private static void cleanupOffline() {
if (offlineCache != null) {
offlineCache.close();
offlineCache = null;
}
if (offlineDS != null) {
offlineDS.disconnect();
offlineDS = null;
}
}
private static DiskStoreImpl createForOffline(String dsName, File[] dsDirs,
boolean offlineCompacting, boolean offlineValidate,
boolean upgradeVersionOnly, long maxOplogSize, boolean needsOplogs)
throws Exception {
if (dsDirs == null) {
dsDirs = new File[] { new File("") };
}
// need a cache so create a loner ds
Properties props = new Properties();
props.setProperty("locators", "");
props.setProperty("mcast-port", "0");
props.setProperty("cache-xml-file", "");
if (!TRACE_RECOVERY) {
props.setProperty("log-level", "warning");
}
DistributedSystem ds = DistributedSystem.connect(props);
offlineDS = ds;
Cache c = com.gemstone.gemfire.cache.CacheFactory.create(ds);
offlineCache = c;
com.gemstone.gemfire.cache.DiskStoreFactory dsf = c
.createDiskStoreFactory();
dsf.setDiskDirs(dsDirs);
if (offlineCompacting && maxOplogSize != -1L) {
dsf.setMaxOplogSize(maxOplogSize);
}
DiskStoreImpl dsi = new DiskStoreImpl(c, dsName,
((DiskStoreFactoryImpl) dsf).getDiskStoreAttributes(), false, null,
true, upgradeVersionOnly, offlineValidate, offlineCompacting,
needsOplogs);
((GemFireCacheImpl) c).addDiskStore(dsi);
return dsi;
}
/**
* Use this method to destroy a region in an offline disk store.
*
* @param dsName
* the name of the disk store
* @param dsDirs
* the directories that that the disk store wrote files to
* @param regName
* the name of the region to destroy
*/
public static void destroyRegion(String dsName, File[] dsDirs, String regName)
throws Exception {
DiskStoreImpl dsi = createForOffline(dsName, dsDirs);
try {
dsi.destroyRegion(regName, true);
} finally {
cleanupOffline();
}
}
public static String modifyRegion(String dsName, File[] dsDirs,
String regName, String lruOption, String lruActionOption,
String lruLimitOption, String concurrencyLevelOption,
String initialCapacityOption, String loadFactorOption,
String compressorClassNameOption, String statisticsEnabledOption,
boolean printToConsole) throws Exception {
DiskStoreImpl dsi = createForOffline(dsName, dsDirs);
try {
return dsi.modifyRegion(regName, lruOption, lruActionOption,
lruLimitOption, concurrencyLevelOption, initialCapacityOption,
loadFactorOption, compressorClassNameOption,
statisticsEnabledOption, printToConsole);
} finally {
cleanupOffline();
}
}
public static void dumpInfo(PrintStream printStream, String dsName,
File[] dsDirs, String regName) throws Exception {
DiskStoreImpl dsi = createForOffline(dsName, dsDirs, false);
try {
dsi.dumpInfo(printStream, regName);
} finally {
cleanupOffline();
}
}
public static void dumpMetadata(String dsName, File[] dsDirs,
boolean showBuckets) throws Exception {
DiskStoreImpl dsi = createForOffline(dsName, dsDirs, false);
try {
dsi.dumpMetadata(showBuckets);
} finally {
cleanupOffline();
}
}
public static void exportOfflineSnapshot(String dsName, File[] dsDirs,
File out) throws Exception {
DiskStoreImpl dsi = createForOffline(dsName, dsDirs);
try {
dsi.exportSnapshot(dsName, out);
} finally {
cleanupOffline();
}
}
public static void validate(String name, File[] dirs) throws Exception {
DiskStoreImpl dsi = createForOfflineValidate(name, dirs);
try {
dsi.validate();
} finally {
cleanupOffline();
}
}
public static DiskStoreImpl offlineCompact(String name, File[] dirs,
boolean upgradeVersionOnly, long maxOplogSize) throws Exception {
try {
GemFireCacheImpl.StaticSystemCallbacks sysCb =
GemFireCacheImpl.FactoryStatics.systemCallbacks;
if (sysCb != null) {
sysCb.initializeForOffline();
}
DiskStoreImpl dsi = createForOffline(name, dirs, true, false,
upgradeVersionOnly, maxOplogSize, true);
dsi.offlineCompact();
dsi.close();
return dsi;
} finally {
cleanupOffline();
}
}
public static void main(String args[]) throws Exception {
if (args.length == 0) {
System.out.println("Usage: diskStoreName [dirs]");
} else {
String dsName = args[0];
File[] dirs = null;
if (args.length > 1) {
dirs = new File[args.length - 1];
for (int i = 1; i < args.length; i++) {
dirs[i - 1] = new File(args[i]);
}
}
offlineCompact(dsName, dirs, false, 1024);
}
}
public boolean hasPersistedData() {
return persistentOplogs.getChild() != null;
}
public UUID getDiskStoreUUID() {
return this.diskStoreID.toUUID();
}
public DiskStoreID getDiskStoreID() {
return this.diskStoreID;
}
void setDiskStoreID(DiskStoreID diskStoreID) {
this.diskStoreID = diskStoreID;
}
File getInitFile() {
return getDiskInitFile().getIFFile();
}
public boolean needsLinkedList() {
return isCompactionPossible() || couldHaveKrf();
}
/**
*
* @return true if KRF files are used on this disk store's oplogs
*/
boolean couldHaveKrf() {
return !isOffline();
}
@Override
public String toString() {
return "DiskStore[" + name + "]";
}
private class IndexRecoveryTask implements Runnable {
private final Set allOplogs;
public IndexRecoveryTask(Set allOplogs) {
this.allOplogs = allOplogs;
}
@Override
public void run() {
indexRecoveryFailure.set(null);
final GemFireCacheImpl.StaticSystemCallbacks cb = GemFireCacheImpl
.getInternalProductCallbacks();
// wait for async recovery if required
Set indexes = null;
final DiskStoreImpl dsi = DiskStoreImpl.this;
if (cb != null) {
cb.waitForAsyncIndexRecovery(dsi);
indexes = cb.getAllLocalIndexes(dsi);
}
// need to recover indexes if index recovery map is non-null
if (indexes != null && !indexes.isEmpty()) {
try {
// if there are newly created indexes then do populate all indexes
// from full values since we have to read full values in any case
@SuppressWarnings("unchecked")
final Map allIndexes =
new THashMap();
@SuppressWarnings("unchecked")
final Set newIndexes = new THashSet(4);
for (SortedIndexContainer index : indexes) {
if (!recoveredIndexIds.contains(index.getUUID())) {
if (TEST_NEW_CONTAINER) {
if (TEST_NEW_CONTAINER_LIST == null) {
TEST_NEW_CONTAINER_LIST = new ArrayList();
}
TEST_NEW_CONTAINER_LIST.add(index);
}
newIndexes.add(index);
}
allIndexes.put(index, new SortedIndexRecoveryJob(dsi.getCache(),
dsi, dsi.getCancelCriterion(), index));
}
for (Oplog oplog : this.allOplogs) {
// check if we have started closing
getCancelCriterion().checkCancelInProgress(null);
// recover for indexes if there was no krf (hence value
// recovery already done inline)
// fallback to full recovery if failed to recover from *irf
File indexFile = oplog.getIndexFileIfValid();
boolean hasKrf = !oplog.needsKrf();
if (!hasKrf || !newIndexes.isEmpty() || indexFile == null) {
// for missing krf case, the irf will be created in createKrf
if (persistIndexes && hasKrf) {
Collection targetRegions = oplog
.getTargetRegionsForIndexes(indexes);
List sortedLiveEntries = oplog
.getSortedLiveEntries(targetRegions);
if (indexFile == null) {
// create the full irf files
oplog.writeIRF(sortedLiveEntries, null, indexes, allIndexes);
// check if we have started closing
getCancelCriterion().checkCancelInProgress(null);
getDiskInitFile().irfCreate(oplog.oplogId);
}
else {
// append to IRF for new indexes only but load all indexes
oplog.writeIRF(sortedLiveEntries, null, newIndexes,
allIndexes);
}
}
else {
oplog.recoverIndexes(allIndexes);
}
}
else {
oplog.getOplogIndex().recoverIndexes(allIndexes);
}
}
// submit last jobs for all in parallel and then wait for all
Collection allJobs = allIndexes.values();
for (SortedIndexRecoveryJob indexRecoveryJob : allJobs) {
indexRecoveryJob.submitLastJob();
}
for (SortedIndexRecoveryJob indexRecoveryJob : allJobs) {
indexRecoveryJob.waitForJobs(0);
}
if (!newIndexes.isEmpty()) {
for (SortedIndexContainer index : newIndexes) {
writeIndexCreate(index.getUUID());
}
}
} catch (IOException ioe) {
indexRecoveryFailure
.compareAndSet(null, new DiskAccessException(ioe));
} catch (RuntimeException re) {
indexRecoveryFailure.compareAndSet(null, re);
} catch (Error err) {
if (SystemFailure.isJVMFailureError(err)) {
SystemFailure.initiateFailure(err);
// If this ever returns, rethrow the error. We're poisoned
// now, so don't let this thread continue.
throw err;
}
// Whenever you catch Error or Throwable, you must also
// check for fatal JVM error (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
indexRecoveryFailure.compareAndSet(null, err);
} finally {
for (Oplog oplog : this.allOplogs) {
oplog.clearInitRecoveryMap();
}
markIndexRecoveryDone();
}
}
else {
for (Oplog oplog : this.allOplogs) {
oplog.clearInitRecoveryMap();
}
markIndexRecoveryDone();
}
}
}
private class ValueRecoveryTask implements Runnable {
private final Set oplogSet;
private final Map recoveredStores;
public ValueRecoveryTask(Set oplogSet,
Map recoveredStores) {
this.oplogSet = oplogSet;
this.recoveredStores = new HashMap(
recoveredStores);
}
public void run() {
// store any regions whose initializations have to be deferred
final HashMap deferredRegions =
new HashMap();
synchronized (asyncValueRecoveryLock) {
try {
// wait for index recovery to complete first to avoid interference
waitForIndexRecoveryEnd(-1);
DiskStoreObserver.startAsyncValueRecovery(DiskStoreImpl.this);
// defer regions marked in first pass
for (Oplog oplog : oplogSet) {
oplog.recoverValuesIfNeeded(currentAsyncValueRecoveryMap,
deferredRegions, currentAsyncValueRecoveryMap);
}
} catch (CancelException ignore) {
// do nothing
} finally {
synchronized (currentAsyncValueRecoveryMap) {
currentAsyncValueRecoveryMap.keySet().removeAll(
recoveredStores.keySet());
if (deferredRegions.size() > 0) {
currentAsyncValueRecoveryMap.putAll(deferredRegions);
if (logger.fineEnabled()) {
logger.fine("DiskStoreImpl: deferred recovery stores: "
+ currentAsyncValueRecoveryMap.values());
}
}
else {
DiskStoreObserver.endAsyncValueRecovery(DiskStoreImpl.this);
}
currentAsyncValueRecoveryMap.notifyAll();
}
}
if (deferredRegions.size() > 0) {
// second round to recover the deferred regions/TXStates, but first
// do any initialization (used by GemFireXD to wait for DDL replay);
// break the wait if new tasks have been added to the recovery
// list for recovery to avoid blocking them (#43048)
try {
final GemFireCacheImpl.StaticSystemCallbacks cb = GemFireCacheImpl
.getInternalProductCallbacks();
final long waitMillis = 100L;
if (cb != null) {
while (!cb.initializeForDeferredRegionsRecovery(waitMillis)) {
synchronized (currentAsyncValueRecoveryMap) {
if (currentAsyncValueRecoveryMap.size() > deferredRegions
.size()) {
DiskStoreObserver.endAsyncValueRecovery(DiskStoreImpl.this);
return;
}
}
}
}
for (Oplog oplog : oplogSet) {
oplog.recoverValuesIfNeeded(deferredRegions, null,
currentAsyncValueRecoveryMap);
}
} catch (CancelException ignore) {
// do nothing
} finally {
synchronized (currentAsyncValueRecoveryMap) {
currentAsyncValueRecoveryMap.keySet().removeAll(
recoveredStores.keySet());
currentAsyncValueRecoveryMap.notifyAll();
}
DiskStoreObserver.endAsyncValueRecovery(DiskStoreImpl.this);
}
}
}
}
}
public void waitForAsyncRecovery(DiskRegion diskRegion) {
synchronized (currentAsyncValueRecoveryMap) {
boolean interrupted = false;
while (!isClosing()
&& currentAsyncValueRecoveryMap.containsKey(diskRegion.getId())) {
try {
currentAsyncValueRecoveryMap.wait(500);
} catch (InterruptedException e) {
interrupted = true;
}
}
if (interrupted) {
Thread.currentThread().interrupt();
}
}
}
private static final ThreadLocal backgroundTaskThread = new ThreadLocal();
private static boolean isBackgroundTaskThread() {
boolean result = false;
Boolean tmp = backgroundTaskThread.get();
if (tmp != null) {
result = tmp.booleanValue();
}
return result;
}
private static void markBackgroundTaskThread() {
backgroundTaskThread.set(Boolean.TRUE);
}
/**
* Execute a task which must be performed asnychronously, but has no requirement
* for timely execution. This task pool is used for compactions, creating KRFS, etc.
* So some of the queued tasks may take a while.
*/
public boolean executeDiskStoreTask(final Runnable runnable) {
return executeDiskStoreTask(runnable, this.diskStoreTaskPool) != null;
}
/**
* Execute a task asynchronously, or in the calling thread if the bound
* is reached. This pool is used for write operations which can be delayed,
* but we have a limit on how many write operations we delay so that
* we don't run out of disk space. Used for deletes, unpreblow, RAF close, etc.
*/
public boolean executeDelayedExpensiveWrite(Runnable task) {
Future f = executeDiskStoreTask(task, this.delayedWritePool);
lastDelayedWrite = f;
return f != null;
}
/**
* Wait for any current operations in the delayed write pool. Completion
* of this method ensures that the writes have completed or the pool was shutdown
*/
protected void waitForDelayedWrites() {
Future lastWriteTask = lastDelayedWrite;
if(lastWriteTask != null) {
try {
lastWriteTask.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (Exception e) {
//do nothing, an exception from the write task was already logged.
}
}
}
private Future executeDiskStoreTask(final Runnable runnable, ThreadPoolExecutor executor) {
// schedule another thread to do it
incBackgroundTasks();
Future result = executeDiskStoreTask(new DiskStoreTask() {
public void run() {
try {
markBackgroundTaskThread(); // for bug 42775
//getCache().getCachePerfStats().decDiskTasksWaiting();
runnable.run();
} finally {
decBackgroundTasks();
}
}
public void taskCancelled() {
decBackgroundTasks();
}
}, executor);
if(result == null) {
decBackgroundTasks();
}
return result;
}
private Future executeDiskStoreTask(DiskStoreTask r, ThreadPoolExecutor executor) {
try {
return executor.submit(r);
} catch (RejectedExecutionException ex) {
if (this.logger.fineEnabled()) {
this.logger.fine("Ignored compact schedule during shutdown", ex);
}
}
return null;
}
private void stopDiskStoreTaskPool() {
if (this.logger.infoEnabled()) {
this.logger.convertToLogWriter().info("Stopping DiskStoreTaskPool");
}
shutdownPool(diskStoreTaskPool);
//Allow the delayed writes to complete
delayedWritePool.shutdown();
try {
delayedWritePool.awaitTermination(1, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
private void shutdownPool(ThreadPoolExecutor pool) {
// All the regions have already been closed
// so this pool shouldn't be doing anything.
List l = pool.shutdownNow();
for (Runnable runnable : l) {
if (l instanceof DiskStoreTask) {
((DiskStoreTask) l).taskCancelled();
}
}
}
public void writeRVVGC(DiskRegion dr, LocalRegion region) {
if (region != null && !region.getConcurrencyChecksEnabled()) {
return;
}
acquireReadLock(dr);
try {
if (dr.isRegionClosed()) {
dr.getCancelCriterion().checkCancelInProgress(null);
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
// Update on the on disk region version vector.
// TODO - RVV - For async regions, it's possible that
// the on disk RVV is actually less than the GC RVV we're trying record
// it might make sense to push the RVV through the async queue?
// What we're doing here is only recording the GC RVV if it is dominated
// by the RVV of what we have persisted.
RegionVersionVector inMemoryRVV = region.getVersionVector();
RegionVersionVector diskRVV = dr.getRegionVersionVector();
// Update the GC version for each member in our on disk version map
updateDiskGCRVV(diskRVV, inMemoryRVV, diskRVV.getOwnerId());
for (VersionSource member : (Collection) inMemoryRVV
.getMemberToGCVersion().keySet()) {
updateDiskGCRVV(diskRVV, inMemoryRVV, member);
}
// Remove any exceptions from the disk RVV that are are dominated
// by the GC RVV.
diskRVV.pruneOldExceptions();
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
// persist the new GC RVV information for this region to the DRF
oplogSet.getChild().writeGCRVV(dr);
} finally {
releaseReadLock(dr);
}
}
public void writeRVV(DiskRegion dr, LocalRegion region, Boolean isRVVTrusted) {
if (region != null && !region.getConcurrencyChecksEnabled()) {
return;
}
acquireReadLock(dr);
try {
if (dr.isRegionClosed()) {
dr.getCancelCriterion().checkCancelInProgress(null);
throw new RegionDestroyedException(
LocalizedStrings.DiskRegion_THE_DISKREGION_HAS_BEEN_CLOSED_OR_DESTROYED
.toLocalizedString(), dr.getName());
}
RegionVersionVector inMemoryRVV = (region==null)?null:region.getVersionVector();
// persist the new GC RVV information for this region to the CRF
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
// use current dr.rvvTrust
oplogSet.getChild().writeRVV(dr, inMemoryRVV, isRVVTrusted);
} finally {
releaseReadLock(dr);
}
}
/**
* Update the on disk GC version for the given member, only if the disk has
* actually recorded all of the updates including that member.
*
* @param diskRVV
* the RVV for what has been persisted
* @param inMemoryRVV
* the RVV of what is in memory
* @param member
* The member we're trying to update
*/
private void updateDiskGCRVV(RegionVersionVector diskRVV,
RegionVersionVector inMemoryRVV, VersionSource member) {
long diskVersion = diskRVV.getVersionForMember(member);
long memoryGCVersion = inMemoryRVV.getGCVersion(member);
// If the GC version is less than what we have on disk, go ahead
// and record it.
if (memoryGCVersion <= diskVersion) {
diskRVV.recordGCVersion(member, memoryGCVersion);
}
}
public final Version getRecoveredGFVersion() {
return getRecoveredGFVersion(this.initFile);
}
final Version getRecoveredGFVersion(DiskInitFile initFile) {
return initFile.currentRecoveredGFVersion();
}
public DirectoryHolder[] getDirectories() {
return this.directories;
}
public void updateDiskRegion(AbstractDiskRegion dr) {
PersistentOplogSet oplogSet = getPersistentOplogSet(dr);
oplogSet.updateDiskRegion(dr);
}
public void writeIndexCreate(String indexId) {
this.initFile.indexCreate(indexId);
}
public void writeIndexDelete(String indexId) {
this.initFile.indexDelete(indexId);
}
public boolean markIndexRecoveryScheduled() {
synchronized (this.indexRecoveryState) {
if (this.indexRecoveryState[0] == INDEXRECOVERY_UNINIT) {
markIndexRecovery(INDEXRECOVERY_INIT);
return true;
}
else {
return false;
}
}
}
public void markIndexRecoveryDone() {
synchronized (this.indexRecoveryState) {
markIndexRecovery(INDEXRECOVERY_DONE);
}
}
/** should be invoked under synchronized (this.indexRecoveryState) */
private void markIndexRecovery(int state) {
assert Thread.holdsLock(this.indexRecoveryState);
if (logger.fineEnabled()) {
logger
.fine("DSI: marking indexRecovery=" + state + " for: " + toString());
}
this.indexRecoveryState[0] = state;
this.indexRecoveryState.notifyAll();
}
public boolean waitForIndexRecoveryEnd(long waitMillis) {
return waitForIndexRecovery(INDEXRECOVERY_DONE, waitMillis);
}
private boolean waitForIndexRecovery(int expected, long waitMillis) {
long endMillis;
if (waitMillis < 0) {
endMillis = waitMillis = Long.MAX_VALUE;
}
else {
long currentTime = System.currentTimeMillis();
endMillis = currentTime + waitMillis;
if (endMillis < currentTime) {
endMillis = Long.MAX_VALUE;
}
}
final long loopMillis = Math.min(1000L, waitMillis);
synchronized (this.indexRecoveryState) {
while (this.indexRecoveryState[0] < expected && !isClosing()) {
Throwable t = null;
try {
if (logger.fineEnabled()) {
logger.fine("DSI: waiting for indexRecovery=" + expected + " for: "
+ toString());
}
this.indexRecoveryState.wait(loopMillis);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
t = ie;
}
getCancelCriterion().checkCancelInProgress(t);
checkIndexRecoveryFailure();
if (System.currentTimeMillis() >= endMillis) {
return (this.indexRecoveryState[0] >= expected || isClosing());
}
}
}
checkIndexRecoveryFailure();
return true;
}
private void checkIndexRecoveryFailure() {
final Throwable t = this.indexRecoveryFailure.get();
if (t != null) {
if (t instanceof RuntimeException) {
throw (RuntimeException)t;
}
else if (t instanceof Error) {
throw (Error)t;
}
else {
throw new IndexMaintenanceException(t);
}
}
}
/**
* {@inheritDoc}
*/
@Override
public void onEvent(MemoryEvent event) {
// stop growth of async queue on EVICTION_UP or CRITICAL_UP and restart on
// EVICTION_DOWN
final MemoryState memoryState = event.getState();
if (this.logger.fineEnabled()) {
this.logger.fine("DiskStoreImpl " + getName()
+ ": received memory event " + event + " with queueSize="
+ this.asyncQueue.size());
}
if (memoryState.isCritical()) {
if (!this.cache.isClosing) {
setAsyncQueueCapacityToCurrent();
}
}
else if (memoryState.isEviction()) {
setAsyncQueueCapacityToCurrent();
}
else {
resetAsyncQueueCapacity();
}
}
}