com.sleepycat.je.recovery.DirtyINMap Maven / Gradle / Ivy
The newest version!
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.recovery;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Level;
import com.sleepycat.je.CacheMode;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.log.Provisional;
import com.sleepycat.je.recovery.Checkpointer.CheckpointReference;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.MapLN;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.Pair;
import com.sleepycat.je.utilint.TestHookExecute;
/**
* Manages the by-level map of checkpoint references that are to be flushed by
* a checkpoint or Database.sync, the MapLNs to be flushed, the highest level
* by database to be flushed, and the state of the checkpoint.
*
* An single instance of this class is used for checkpoints and has the same
* lifetime as the checkpointer and environment. An instance per Database.sync
* is created as needed. Only one checkpoint can occur at a time, but multiple
* syncs may occur concurrently with each other and with the checkpoint.
*
* The methods in this class are synchronized to protect internal state from
* concurrent access by the checkpointer and eviction, and to coordinate state
* changes between the two. Eviction must participate in the checkpoint so
* that INs cascade up properly; see coordinateEvictionWithCheckpoint.
*
* When INs are latched along with synchronization on a DirtyINMap, the order
* must be: 1) IN latches and 2) synchronize on DirtyINMap. For example,
* the evictor latches the parent and child IN before calling the synchronized
* method coordinateEvictionWithCheckpoint, and selectDirtyINsForCheckpoint
* latches the IN before calling the synchronized method selectForCheckpoint.
*/
class DirtyINMap {
static final boolean DIRTY_SET_DEBUG_TRACE = false;
private final EnvironmentImpl envImpl;
private final SortedMap,
Map>> levelMap;
private int numEntries;
private final Set mapLNsToFlush;
private final Map highestFlushLevels;
enum CkptState {
/** No checkpoint in progress, or is used for Database.sync. */
NONE,
/** Checkpoint started but dirty map is not yet complete. */
DIRTY_MAP_INCOMPLETE,
/** Checkpoint in progress and dirty map is complete. */
DIRTY_MAP_COMPLETE,
}
private CkptState ckptState;
private boolean ckptFlushAll;
private boolean ckptFlushExtraLevel;
DirtyINMap(EnvironmentImpl envImpl) {
this.envImpl = envImpl;
levelMap = new TreeMap<>();
numEntries = 0;
mapLNsToFlush = new HashSet<>();
highestFlushLevels = new IdentityHashMap<>();
ckptState = CkptState.NONE;
}
/**
* Coordinates an eviction with an in-progress checkpoint and returns
* whether or not provisional logging is needed.
*
* @return the provisional status to use for logging the target.
*/
synchronized Provisional coordinateEvictionWithCheckpoint(
final DatabaseImpl db,
final int targetLevel,
final IN parent) {
/*
* If the checkpoint is in-progress and has not finished dirty map
* construction, we must add the parent to the dirty map. That way the
* dirtiness and logging will cascade up in the same way as if the
* target were not evicted, and instead were encountered during dirty
* map construction. We don't want the evictor's actions to introduce
* an IN in the log that has not cascaded up properly.
*
* Note that we add the parent even if it is not dirty here. It will
* become dirty after the target child is logged, but that hasn't
* happened yet.
*
* We do not add the parent if it is null, which is the case when the
* root is being evicted.
*/
if (ckptState == CkptState.DIRTY_MAP_INCOMPLETE &&
parent != null) {
/* Add latched parent IN to dirty map. */
selectForCheckpoint(parent, -1 /*index*/);
/* Save dirty/temp DBs for later. */
saveMapLNsToFlush(parent);
}
/*
* The evictor has to log provisionally in three cases:
*
* 1 - The eviction target is part of a deferred write database.
*/
if (db.isDeferredWriteMode()) {
return Provisional.YES;
}
/*
* 2 - The checkpoint is in-progress and has not finished dirty map
* construction, and the target is not the root. The parent IN has
* been added to the dirty map, so we know the child IN is at a
* level below the max flush level.
*/
if (ckptState == CkptState.DIRTY_MAP_INCOMPLETE &&
parent != null) {
return Provisional.YES;
}
/*
* 3 - The checkpoint is in-progress and has finished dirty map
* construction, and is at a level above the eviction target.
*/
if (ckptState == CkptState.DIRTY_MAP_COMPLETE &&
targetLevel < getHighestFlushLevel(db)) {
return Provisional.YES;
}
/* Otherwise, log non-provisionally. */
return Provisional.NO;
}
/**
* Coordinates a split with an in-progress checkpoint.
*
* TODO:
* Is it necessary to perform MapLN flushing for nodes logged by a split
* (and not just the new sibling)?
*
* @param newSibling the sibling IN created by the split.
*/
void coordinateSplitWithCheckpoint(final IN newSibling) {
assert newSibling.isLatchExclusiveOwner();
/*
* If the checkpoint is in-progress and has not finished dirty map
* construction, we must add the BIN children of the new sibling to the
* dirty map. The new sibling will be added to the INList but it may or
* may not be seen by the in-progress INList iteration, and we must
* ensure that its dirty BIN children are logged by the checkpoint.
*
* Note that we cannot synchronize on 'this' before calling
* selectDirtyBINChildrenForCheckpoint, since it latches BIN children.
* IN latching must come before synchronization on 'this'. Eventually
* after latching the IN, selectForCheckpoint is called , which is
* synchronized and checks for ckptState == DIRTY_MAP_INCOMPLETE.
*/
selectDirtyBINChildrenForCheckpoint(newSibling);
}
/**
* Must be called before starting a checkpoint, and must not be called for
* Database.sync. Updates memory budget and sets checkpoint state.
*/
synchronized void beginCheckpoint(boolean flushAll,
boolean flushExtraLevel) {
assert levelMap.isEmpty();
assert mapLNsToFlush.isEmpty();
assert highestFlushLevels.isEmpty();
assert numEntries == 0;
assert ckptState == CkptState.NONE;
ckptState = CkptState.DIRTY_MAP_INCOMPLETE;
ckptFlushAll = flushAll;
ckptFlushExtraLevel = flushExtraLevel;
}
/**
* Must be called after a checkpoint or Database.sync is complete. Updates
* memory budget and clears checkpoint state.
*/
synchronized void reset() {
removeCostFromMemoryBudget();
levelMap.clear();
mapLNsToFlush.clear();
highestFlushLevels.clear();
numEntries = 0;
ckptState = CkptState.NONE;
}
/**
* Scan the INList for all dirty INs, excluding temp DB INs. Save them in
* a tree-level ordered map for level ordered flushing.
*
* Take this opportunity to recalculate the memory budget tree usage.
*
* This method itself is not synchronized to allow concurrent eviction.
* Synchronization is performed on a per-IN basis to protect the data
* structures here, and eviction can occur in between INs.
*/
void selectDirtyINsForCheckpoint() {
assert ckptState == CkptState.DIRTY_MAP_INCOMPLETE;
/*
* Opportunistically recalculate the INList memory budget while
* traversing the entire INList.
*/
final INList inMemINs = envImpl.getInMemoryINs();
inMemINs.memRecalcBegin();
boolean completed = false;
try {
for (IN in : inMemINs) {
in.latchShared(CacheMode.UNCHANGED);
try {
if (!in.getInListResident()) {
continue;
}
inMemINs.memRecalcIterate(in);
/* Add dirty UIN to dirty map. */
if (in.getDirty() && !in.isBIN()) {
selectForCheckpoint(in, -1 /*index*/);
}
/* Add dirty level 2 children to dirty map. */
selectDirtyBINChildrenForCheckpoint(in);
/* Save dirty/temp DBs for later. */
saveMapLNsToFlush(in);
} finally {
in.releaseLatch();
}
/* Call test hook after releasing latch. */
TestHookExecute.doHookIfSet(
Checkpointer.examineINForCheckpointHook, in);
}
completed = true;
} finally {
inMemINs.memRecalcEnd(completed);
}
/*
* Finish filling out the highestFlushLevels map. For each entry in
* highestFlushLevels that has a null level Integer value (set by
* selectForCheckpoint), we call DbTree.getHighestLevel and replace the
* null level. We must call DbTree.getHighestLevel, which latches the
* root, only when not synchronized, to avoid breaking the
* synchronization rules described in the class comment. This must be
* done in several steps to follow the synchronization rules, yet
* protect the highestFlushLevels using synchronization.
*/
final Map maxFlushDbs = new HashMap<>();
/* Copy entries with a null level. */
synchronized (this) {
for (DatabaseImpl db : highestFlushLevels.keySet()) {
if (highestFlushLevels.get(db) == null) {
maxFlushDbs.put(db, null);
}
}
}
/* Call getHighestLevel without synchronization. */
final DbTree dbTree = envImpl.getDbTree();
for (Map.Entry entry : maxFlushDbs.entrySet()) {
entry.setValue(dbTree.getHighestLevel(entry.getKey()));
}
/* Fill in levels in highestFlushLevels. */
synchronized (this) {
for (Map.Entry entry :
maxFlushDbs.entrySet()) {
highestFlushLevels.put(entry.getKey(), entry.getValue());
}
}
/* Complete this phase of the checkpoint. */
synchronized (this) {
addCostToMemoryBudget();
ckptState = CkptState.DIRTY_MAP_COMPLETE;
}
if (DIRTY_SET_DEBUG_TRACE) {
traceDirtySet();
}
}
/**
* Add the IN to the dirty map if dirty map construction is in progress and
* the IN is not in a temp DB. If added, the highest flush level map is
* also updated.
*/
private synchronized void selectForCheckpoint(final IN in,
final int index) {
/*
* Must check state while synchronized. The state may not be
* DIRTY_MAP_INCOMPLETE when called from eviction or a split.
*/
if (ckptState != CkptState.DIRTY_MAP_INCOMPLETE) {
return;
}
final DatabaseImpl db = in.getDatabase();
if (db.isTemporary()) {
return;
}
addIN(in, index,
true /*updateFlushLevels*/,
false /*updateMemoryBudget*/);
}
/**
* Adds the the dirty child BINs of the 'in' if dirty map construction is
* in progress and the IN is not in a temp DB.
*
* Main cache resident BINs are added when their parent is encountered in
* the INList iteration, rather than when the BIN is encountered in the
* iteration. This is because a BIN can transition between main and
* off-heap caches during the construction of the dirty map. When a BIN is
* loaded from off-heap and added to the main cache, it is added to the
* INList at that time, and such a BIN may not be encountered in the
* iteration. (ConcurrentHashMap iteration only guarantees that nodes will
* be encountered if they are present when the iterator is created). So if
* we relied on encountering BINs in the iteration, some might be missed.
*
* Note that this method is not synchronized because it latches the BIN
* children. IN latching must come before synchronizing on 'this'. The
* selectForCheckpoint method, which is called after latching the BIN, is
* synchronized.
*/
private void selectDirtyBINChildrenForCheckpoint(final IN in) {
if (in.getNormalizedLevel() != 2) {
return;
}
for (int i = 0; i < in.getNEntries(); i += 1) {
final IN bin = (IN) in.getTarget(i);
if (bin != null) {
/* When called via split a child may already be latched. */
final boolean latchBinHere = !bin.isLatchOwner();
if (latchBinHere) {
bin.latchShared(CacheMode.UNCHANGED);
}
try {
if (bin.getDirty()) {
selectForCheckpoint(bin, -1);
}
} finally {
if (latchBinHere) {
bin.releaseLatch();
}
}
} else {
if (in.isOffHeapBINDirty(i)) {
selectForCheckpoint(in, i);
}
}
}
}
private void updateFlushLevels(Integer level,
final DatabaseImpl db,
final boolean isBIN,
final boolean isRoot) {
/*
* IN was added to the dirty map. Update the highest level seen
* for the database. Use one level higher when ckptFlushExtraLevel
* is set. When ckptFlushAll is set, use the maximum level for the
* database. Durable deferred-write databases must be synced, so
* also use the maximum level.
*
* Always flush at least one level above the bottom-most BIN level so
* that the BIN level is logged provisionally and the expense of
* processing BINs during recovery is avoided.
*/
if (ckptFlushAll || db.isDurableDeferredWrite()) {
if (!highestFlushLevels.containsKey(db)) {
/*
* Null is used as an indicator that getHighestLevel should be
* called in selectDirtyINsForCheckpoint, when not
* synchronized.
*/
highestFlushLevels.put(db, null);
}
} else {
if ((ckptFlushExtraLevel || isBIN) && !isRoot) {
/* Next level up in the same tree. */
level += 1;
}
final Integer highestLevelSeen = highestFlushLevels.get(db);
if (highestLevelSeen == null || level > highestLevelSeen) {
highestFlushLevels.put(db, level);
}
}
}
/**
* Scan the INList for all dirty INs for a given database. Arrange them in
* level sorted map for level ordered flushing.
*
* This method is not synchronized to allow concurrent eviction.
* Coordination between eviction and Database.sync is not required.
*/
void selectDirtyINsForDbSync(DatabaseImpl dbImpl) {
assert ckptState == CkptState.NONE;
final DatabaseId dbId = dbImpl.getId();
for (IN in : envImpl.getInMemoryINs()) {
if (in.getDatabaseId().equals(dbId)) {
in.latch(CacheMode.UNCHANGED);
try {
if (in.getInListResident() && in.getDirty()) {
addIN(
in, -1 /*index*/,
false /*updateFlushLevels*/,
false /*updateMemoryBudget*/);
}
} finally {
in.releaseLatch();
}
}
}
/*
* Create a single entry map that forces all levels of this DB to
* be flushed.
*/
highestFlushLevels.put(
dbImpl, envImpl.getDbTree().getHighestLevel(dbImpl));
/* Add the dirty map to the memory budget. */
addCostToMemoryBudget();
}
synchronized int getHighestFlushLevel(DatabaseImpl db) {
assert ckptState != CkptState.DIRTY_MAP_INCOMPLETE;
/*
* This method is only called while flushing dirty nodes for a
* checkpoint or Database.sync, not for an eviction, so an entry for
* this database should normally exist. However, if the DB root (and
* DatabaseImpl) have been evicted since the highestFlushLevels was
* constructed, the new DatabaseImpl instance will not be present in
* the map. In this case, we do not need to checkpoint the IN and
* eviction should be non-provisional.
*/
Integer val = highestFlushLevels.get(db);
return (val != null) ? val : IN.MIN_LEVEL;
}
synchronized int getNumLevels() {
return levelMap.size();
}
private synchronized void addCostToMemoryBudget() {
final MemoryBudget mb = envImpl.getMemoryBudget();
final long cost =
((long) numEntries) * MemoryBudget.CHECKPOINT_REFERENCE_SIZE;
mb.updateAdminMemoryUsage(cost);
}
private synchronized void removeCostFromMemoryBudget() {
final MemoryBudget mb = envImpl.getMemoryBudget();
final long cost =
((long) numEntries) * MemoryBudget.CHECKPOINT_REFERENCE_SIZE;
mb.updateAdminMemoryUsage(0 - cost);
}
/**
* Add a node unconditionally to the dirty map.
*
* @param in is the IN to add, or the parent of an off-heap IN to add when
* index >= 0.
*
* @param index is the index of the off-heap child to add, or -1 to add the
* 'in' itself.
*
* @param updateMemoryBudget if true then update the memory budget as the
* map is changed; if false then addCostToMemoryBudget must be called
* later.
*/
synchronized void addIN(final IN in,
final int index,
final boolean updateFlushLevels,
final boolean updateMemoryBudget) {
final Integer level;
final long lsn;
final long nodeId;
final boolean isRoot;
final byte[] idKey;
final boolean isBin;
if (index >= 0) {
level = in.getLevel() - 1;
lsn = in.getLsn(index);
nodeId = -1;
isRoot = false;
idKey = in.getKey(index);
isBin = true;
} else {
level = in.getLevel();
lsn = in.getLastLoggedLsn();
nodeId = in.getNodeId();
isRoot = in.isRoot();
idKey = in.getIdentifierKey();
isBin = in.isBIN();
}
final Map lsnMap;
final Map nodeMap;
Pair
© 2015 - 2024 Weber Informatics LLC | Privacy Policy