com.sleepycat.je.rep.vlsn.VLSNBucket Maven / Gradle / Ivy
The newest version!
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.rep.vlsn;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import com.sleepycat.bind.tuple.LongBinding;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.bind.tuple.TupleInput;
import com.sleepycat.bind.tuple.TupleOutput;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.CursorConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.log.FileReader;
import com.sleepycat.je.txn.Txn;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.VLSN;
/**
* {@literal
* A VLSNBucket instance represents a set of VLSN->LSN mappings. Buckets are
* usually not updated, except at times when the replication stream may have
* been reduced in size, by log cleaning or syncup. The VLSNBuckets in the
* VLSNIndex's VLSNTracker are written to disk and are persistent. There are
* also VLSNBuckets in the temporary recovery-time tracker that are used for
* collecting mappings found in the log during recovery.
*
* VLSNBuckets only hold mappings from a single log file. A single log file
* may be mapped by multiple VLSNBuckets though.
*
* As a tradeoff in space vs time, a VLSNBucket only stores a sparse set of
* mappings and the caller must use a VLSNReader to scan the log file and
* find any log entries not mapped directly by the bucket. In addition,
* the VLSN is not actually stored. Only the offset portion of the LSN is
* stored, and the VLSN is intuited by a stride field. Each VLSNBucket
* only maps a single file, though a single file may be mapped by several
* VLSNBuckets.
*
* For example, suppose a node had these VLSN->LSN mappings:
*
* VLSN LSN (file/offset)
* 9 10/100
* 10 10/110
* 11 10/120
* 12 10/130
* 13 10/140
* 14 11/100
* 15 11/120
*
* The mappings in file 10 could be represented by a VLSNBucket with
* a stride of 4. That means the bucket would hold the mappings for
* 9 10/100,
* 13 10/140
*
* And since the target log file number and the stride is known, the mappings
* can be represented in by the offset alone in this array: {100, 140}, rather
* than storing the whole lsn.
*
* Each bucket can also provide the mapping for the first and last VLSN it
* covers, even if the lastVLSN is not divisible by the stride. This is done to
* support forward and backward scanning. From the example above, the completed
* bucket can provide 9->10/100, 13->10/140, 15 -> 10/160 even though 15 is not
* a stride's worth away from 13.
*
* Because registering a VLSN->LSN mapping is done outside the log write latch,
* any inserts into the VLSNBucket may not be in order. However, when any
* VLSN is registered, we can assume that all VLSNs < that value do exist in
* the log. It's just an accident of timing that they haven't yet been
* registered. Note that out of order inserts into the buckets can create holes
* in the bucket's offset array, or cause the array to be shorter than
* anticipated.
*
* For example, if the insertion order into the bucket is vlsns 9, 15, we'll
* actually only keep an offset array of size 1. We have to be able to handle
* holes in the bucket, and can't count on filling them in when the lagging
* vlsn arrives, because it is possible that a reading thread will access the
* bucket before the laggard inserter arrives, or that the bucket might be
* flushed to disk, and become immutable.
* }
*/
public class VLSNBucket {
/* On-disk version. */
private static final int VERSION = 1;
/* File number for target file. */
private final long fileNumber;
/* Interval between VLSN values that are mapped. */
private final int stride;
protected VLSN firstVLSN = VLSN.NULL_VLSN;
protected VLSN lastVLSN = VLSN.NULL_VLSN;
private long lastLsn = DbLsn.NULL_LSN;
/*
* The file offsets are really unsigned ints. The calls to put() are
* implemented to let us assume that the list is fully populated. A
* subclass of truncateableList has been used in order to provide access to
* the ArrayList.removeFromRange method.
*/
private TruncateableList fileOffsets;
/*
* The max number of offsets and maxDistance help guide when to close the
* bucket and start a new one. Not persistent.
*/
private int maxMappings;
private int maxDistance;
private static final int NO_OFFSET = 0;
/* True if there are changes to the bucket that are not on disk. */
boolean dirty;
/*
* True if the VLSNBucket will not accept any more modifications; used to
* safeguard the bucket while the index is being written to disk.
*/
private boolean closed = false;
VLSNBucket(long fileNumber,
int stride,
int maxMappings,
int maxDistance,
VLSN firstVLSN) {
this.fileNumber = fileNumber;
this.stride = stride;
this.maxMappings = maxMappings;
this.maxDistance = maxDistance;
/*
* The VLSNs in the bucket are initialized to indicate what range
* should be covered by this bucket. But there may not be any offsets
* recorded either in the lastLsn or the fileOffset.
*/
this.firstVLSN = firstVLSN;
this.lastVLSN = firstVLSN;
/*
* Initialize the file offsets with a -1 value to correspond to
* firstVLSN.
*/
fileOffsets = new TruncateableList();
fileOffsets.add(0, NO_OFFSET);
}
/* For reading from disk. */
private VLSNBucket(TupleInput ti) {
fileNumber = ti.readPackedLong();
stride = ti.readPackedInt();
firstVLSN = new VLSN(ti.readPackedLong());
lastVLSN = new VLSN(ti.readPackedLong());
lastLsn = ti.readPackedLong();
int size = ti.readPackedInt();
fileOffsets = new TruncateableList(size);
for (int i = 0; i < size; i++) {
fileOffsets.add(i, DbLsn.getFileOffsetAsInt(ti.readUnsignedInt()));
}
}
/**
* Record the LSN location for this VLSN.
*
* One key issue is that puts() are not synchronized, and the VLSNs may
* arrive out of order. If an out of order VLSN does arrive, we can still
* assume that the earlier VLSNs have been successfully logged. If a VLSN
* arrives that is divisible by the stride, and should be recorded in the
* fileOffsets, but is not the next VLSN that should be recorded, we'll pad
* out the fileOffsets list with placeholders.
*
* For example, suppose the stride is 3, and the first VLSN is 2. Then this
* bucket should record VLSN 2, 5, 8, ... etc. If VLSN 8 arrives before
* VLSN 5, VLSN 8 will be recorded, and VLSN 5 will have an offset
* placeholder of NO_OFFSET. It is a non-issue if VLSNs 3, 4, 6, 7 arrive
* out of order, because they would not have been recorded anyway. This
* should not happen often, because the stride should be fairly large, and
* the calls to put() should be close together. If the insertion order is
* vlsn 2, 5, 9, then the file offsets array will be a little short, and
* will only have 2 elements, instead of 3.
*
* We follow this policy because we must always have a valid begin and end
* point for the range. We must handle placeholders in all cases, and can't
* count of later vlsn inserts, because a bucket can become immutable at
* any time if it is flushed to disk.
*
* @return false if this bucket will not accept this VLSN. Generally, a
* refusal might happen because the bucket was full or the mapping is too
* large a distance away from the previous mapping. In that case, the
* tracker will start another bucket.
*/
synchronized boolean put(VLSN vlsn, long lsn) {
if (closed) {
return false;
}
if (!belongs(vlsn, lsn)) {
return false;
}
/*
* Add it to the fileOffset if it's on a stride boundary and is the
* next mapping in the fileOffset list.
*/
if (isModulo(vlsn)) {
int index = getIndex(vlsn);
int listLen = fileOffsets.size();
if (index < listLen) {
fileOffsets.set(index, DbLsn.getFileOffsetAsInt(lsn));
} else if (index == listLen) {
fileOffsets.add(DbLsn.getFileOffsetAsInt(lsn));
} else {
for (int i = listLen; i < index; i++) {
fileOffsets.add(NO_OFFSET);
}
fileOffsets.add(DbLsn.getFileOffsetAsInt(lsn));
}
dirty = true;
}
/* If the lastLsn is less, or not initialized, set it to this VLSN. */
if ((lastVLSN.compareTo(vlsn) < 0) ||
(lastLsn == DbLsn.NULL_LSN)) {
lastVLSN = vlsn;
lastLsn = lsn;
dirty = true;
}
return true;
}
/*
* Return true if this VLSN is on a stride boundary. Assumes
* !firstVLSN.isNull()
*/
private boolean isModulo(VLSN vlsn) {
return (((vlsn.getSequence() - firstVLSN.getSequence()) % stride) ==
0);
}
private int getIndex(VLSN vlsn) {
assert isModulo(vlsn) : "Don't call getIndex on non-modulo VLSN " +
vlsn + " bucket=" + this;
return (int) ((vlsn.getSequence() - firstVLSN.getSequence()) / stride);
}
/**
* @return true if this VLSN->LSN mapping should go into this bucket.
*/
private boolean belongs(VLSN vlsn, long lsn) {
assert vlsn.compareTo(firstVLSN) >= 0 :
"firstVLSN = " + firstVLSN + " should not be greater than " + vlsn;
if (DbLsn.getFileNumber(lsn) != fileNumber) {
/* Mappings must be for same file. */
return false;
}
if (emptyInternal()) {
return true;
}
/*
* Some other thread beat us to the put() call and inserted a later
* mapping, so we know for sure that we fit in this bucket
*/
if (lastVLSN.compareTo(vlsn) > 0) {
return true;
}
boolean onStrideBoundary = isModulo(vlsn);
if (onStrideBoundary && (fileOffsets.size() >= maxMappings)) {
/* Too full. */
return false;
}
/*
* Will this VLSN be next one recorded in the fileOffsets? If so,
* calculate the scan distance.
*/
if ((onStrideBoundary && (getIndex(vlsn) == fileOffsets.size())) ||
lastVLSN.compareTo(vlsn) < 0) {
/* This VLSN is going in at the tail of the bucket. */
int lastOffset = fileOffsets.get(fileOffsets.size() - 1);
if ((DbLsn.getFileOffset(lsn) -
DbLsn.convertIntFileOffsetToLong(lastOffset)) >
maxDistance) {
/* The scan distance is exceeded. */
return false;
}
}
return true;
}
/**
* @return true if this bucket contains this mapping.
*/
synchronized boolean owns(VLSN vlsn) {
if (vlsn.equals(VLSN.NULL_VLSN)) {
return false;
} else if (firstVLSN.equals(VLSN.NULL_VLSN)) {
return false;
} else {
return (firstVLSN.compareTo(vlsn) <= 0) &&
(lastVLSN.compareTo(vlsn) >= 0);
}
}
synchronized VLSN getFirst() {
return firstVLSN;
}
synchronized VLSN getLast() {
return lastVLSN;
}
/**
* Return a file number that is less or equal to the first lsn mapped
* by this bucket. In standard VLSNBuckets, only one file is covered, so
* there is only one possible value. In GhostBuckets, multiple files could
* be covered.
* @return
*/
long getLTEFileNumber() {
return fileNumber;
}
/*
* Similar to getLTEFileNumber, for this implementation there's only one
* possible file.
*/
long getGTEFileNumber() {
return fileNumber;
}
synchronized boolean empty() {
return emptyInternal();
}
private boolean emptyInternal() {
return (firstVLSN.equals(lastVLSN) &&
(lastLsn == DbLsn.NULL_LSN));
}
boolean follows(VLSN vlsn) {
return (firstVLSN.compareTo(vlsn) > 0);
}
boolean precedes(VLSN vlsn) {
return (!lastVLSN.equals(VLSN.NULL_VLSN) &&
(lastVLSN.compareTo(vlsn) < 0));
}
/**
* Returns the mapping whose VLSN is {@literal >=} the VLSN parameter. For
* example, if the bucket holds mappings for vlsn 10, 13, 16,
*
* - the greater than or equal mapping for VLSN 10 is 10/lsn
* - the greater than or equal mapping for VLSN 11 is 13/lsn
* - the greater than or equal mapping for VLSN 13 is 13/lsn
*
* File offsets may be null in the middle of the file offsets array because
* of out of order mappings. This method must return a non-null lsn, and
* must account for null offsets.
*
* @return the mapping whose VLSN is {@literal >=} the VLSN parameter. Will
* never return NULL_LSN, because the VLSNRange begin and end point are
* always mapped.
*/
public synchronized long getGTELsn(VLSN vlsn) {
if (lastVLSN.equals(vlsn)) {
return lastLsn;
}
int index;
if (firstVLSN.compareTo(vlsn) >= 0) {
/*
* It's possible for vlsn to be < the firstVLSN if vlsn
* falls between two buckets. For example, if the buckets are:
* bucketA = vlsn 10-> 20
* bucketB = vlsn 22->30
* then vlsn 21 will fall between two buckets, and will get bucketB
*/
index = 0;
} else {
index = getGTEIndex(vlsn);
}
/*
* This should never happen. Throw this exception to make debugging
* info available.
*/
if (index < 0) {
throw EnvironmentFailureException.unexpectedState
("index=" + index +
" vlsn=" + vlsn +
" bucket=" + this);
}
if (index >= fileOffsets.size()) {
return lastLsn;
}
int useIndex = findPopulatedIndex(index, true /* forward */);
int offset = fileOffsets.get(useIndex);
return offset == NO_OFFSET ?
lastLsn : DbLsn.makeLsn(fileNumber, offset);
}
/**
* Return the index for the mapping >= this VLSN. Note that this is just
* a stride calculation, and a non-existent file offset index might be
* returned.
*/
private int getGTEIndex(VLSN vlsn) {
long diff = vlsn.getSequence() - firstVLSN.getSequence();
return (int) ((diff + (stride - 1)) / stride);
}
/**
* We'd like to return the mapping at startIndex for the get{LTE, GTE}
* Mapping methods, but the offsets may not be populated if put() calls
* have come out of order. Search for the next populated offset.
*/
private int findPopulatedIndex(int startIndex, boolean forward) {
if (forward) {
for (int i = startIndex; i < fileOffsets.size(); i++) {
if (fileOffsets.get(i) != NO_OFFSET) {
return i;
}
}
} else {
for (int i = startIndex; i >= 0; i--) {
if (fileOffsets.get(i) != NO_OFFSET) {
return i;
}
}
}
return startIndex;
}
/**
* Returns the lsn whose VLSN is <= the VLSN parameter. For example, if
* the bucket holds mappings for vlsn 10, 13, 16,
*
* - the less than or equal mapping for VLSN 10 is 10/lsn
* - the less than or equal mapping for VLSN 11 is 10/lsn
* - the less than or equal mapping for VLSN 13 is 13/lsn
*
* File offsets may be null in the middle of the file offsets array because
* of out of order mappings. This method must return a non-null lsn, and
* must account for null offsets.
*
* @return the lsn whose VLSN is <= the VLSN parameter. Will never return
* NULL_LSN, because the VLSNRange begin and end points are always mapped.
*/
synchronized long getLTELsn(VLSN vlsn) {
/*
* It's possible for vlsn to be greater than lastVLSN if vlsn falls
* between two buckets.
* For example, if the buckets are:
* bucketA = vlsn 10-> 20
* bucketB = vlsn 22->30
* then vlsn 21 will fall between two buckets, and will get bucketA
*/
if (lastVLSN.compareTo(vlsn) <= 0) {
return lastLsn;
}
long diff = vlsn.getSequence() - firstVLSN.getSequence();
/*
* Make sure that the file offset array isn't unexpectedly short due to
* out of order inserts.
*/
int index = (int)(diff / stride);
if (index >= fileOffsets.size()) {
index = fileOffsets.size() - 1;
}
int useIndex = findPopulatedIndex(index, false /* forward */);
int offset = fileOffsets.get(useIndex);
assert offset != NO_OFFSET : "bucket should always have a non-null " +
"first offset. vlsn= " + vlsn + " bucket=" + this;
return (DbLsn.makeLsn(fileNumber, offset));
}
/**
* @return the lsn whose VLSN is == the VLSN parameter or DbLsn.NULL_LSN if
* there is no mapping. Note that because of out of order puts, there may
* be missing mappings that appear later on.
*/
public synchronized long getLsn(VLSN vlsn) {
assert owns(vlsn) : "vlsn=" + vlsn + " " + this;
if (lastVLSN.equals(vlsn)) {
return lastLsn;
}
if (!isModulo(vlsn)) {
return DbLsn.NULL_LSN;
}
int index = getIndex(vlsn);
if (index >= fileOffsets.size()) {
return DbLsn.NULL_LSN;
}
int offset = fileOffsets.get(index);
if (offset == NO_OFFSET) {
return DbLsn.NULL_LSN;
}
return DbLsn.makeLsn(fileNumber, offset);
}
synchronized long getLastLsn() {
return lastLsn;
}
/**
* Remove the mappings from this bucket that are for VLSNs <=
* lastDuplicate. If this results in a broken stride interval, package all
* those mappings into their own bucket and return it as a remainder
* bucket.
*
* For example, suppose this bucket has a stride of 5 and maps VLSN 10-23.
* Then it has mappings for 10, 15, 20, 23.
*
* If we need to remove mappings <= 16, we'll end up without a bucket that
* serves as a home base for vlsns 17,18,19. Those will be spun out into
* their own bucket, and this bucket will be adjusted to start at VLSN 20.
* This bucket should end up with
*
* - firstVLSN = 20
* - fileOffset is an array of size 1, for the LSN for VLSN 20
* - lastVLSN = 23
* - lastLsn = the same as before
*
* The spun-off bucket should be:
* - firstVLSN = 17
* - fileOffset is an array of size 1, for the LSN for VLSN 17
* - lastVLSN = 19
* - lastLsn = lsn for 19
*
* @return the newly created bucket that holds mappings from a broken
* stride interval, or null if there was no need to create such a bucket.
*/
VLSNBucket removeFromHead(EnvironmentImpl envImpl, VLSN lastDuplicate) {
if (empty()) {
return null;
}
/*
* No overlap, this bucket owns mappngs that follow the duplicate
* range.
*/
if (lastDuplicate.compareTo(firstVLSN) < 0) {
return null;
}
/*
* This whole bucket is to be deleted, all its mappings are <= the
* lastDuplicate.
*/
if (lastVLSN.compareTo(lastDuplicate) <= 0) {
fileOffsets = null;
firstVLSN = VLSN.NULL_VLSN;
lastVLSN = VLSN.NULL_VLSN;
lastLsn = DbLsn.NULL_LSN;
return null;
}
VLSN indexVLSN = firstVLSN;
int newFirstIndex = -1;
/*
* Find the mappings that still belong. Using the example above, we
* should find that we can delete fileOffset[0] and fileOffset[1] and
* preserve fileOffset[2]
*/
for (int i = 0; i < fileOffsets.size(); i++) {
if ((indexVLSN.compareTo(lastDuplicate) > 0) &&
(fileOffsets.get(i) != NO_OFFSET)) {
newFirstIndex = i;
break;
}
indexVLSN = new VLSN(indexVLSN.getSequence() + stride);
}
VLSNBucket remainder = null;
int lastOffset;
if (newFirstIndex == -1) {
/*
* None of the VLSNs represented by the strided file offsets are
* needed anymore. This bucket consists solely of the last
* VLSN->LSN pair.
*/
lastOffset = fileOffsets.get(fileOffsets.size() - 1);
fileOffsets = new TruncateableList();
fileOffsets.add(DbLsn.getFileOffsetAsInt(lastLsn));
firstVLSN = lastVLSN;
} else {
/* Move the still-valid mappings to a new list. */
assert (newFirstIndex > 0);
lastOffset = fileOffsets.get(newFirstIndex - 1);
TruncateableList newFileOffsets =
new TruncateableList
(fileOffsets.subList(newFirstIndex, fileOffsets.size()));
fileOffsets = newFileOffsets;
firstVLSN = new VLSN((newFirstIndex * stride) +
firstVLSN.getSequence());
}
if (!firstVLSN.equals(lastDuplicate.getNext())) {
/*
* If lastDuplicate was not on the same stride boundary as our old
* bucket, we may have a broken bucket of mappings to preserve.
* Using our example numbers above, we still need to make sure
* there's a bucket that matches VLSNs 17, 18 19.
*/
long scanStart = DbLsn.makeLsn(fileNumber, lastOffset);
remainder = scanForNewBucket(envImpl,
lastDuplicate.getNext(),
firstVLSN.getPrev(),
scanStart);
}
dirty = true;
return remainder;
}
/**
* Scan the log fle for VLSN->LSN mappings for creating a new bucket.
*/
private VLSNBucket scanForNewBucket(EnvironmentImpl envImpl,
VLSN first,
VLSN last,
long startLsn) {
VLSNBucket newBucket = new VLSNBucket(fileNumber, stride,
maxMappings, maxDistance,
first);
int readBufferSize =
envImpl.getConfigManager().getInt
(EnvironmentParams.LOG_ITERATOR_MAX_SIZE);
NewBucketReader scanner =
new NewBucketReader(newBucket, envImpl, readBufferSize, first,
last, startLsn);
while (!scanner.isDone() && (scanner.readNextEntry())) {
}
assert scanner.isDone();
return newBucket;
}
/**
* Remove the mappings from this bucket that are for VLSNs >=
* startOfDelete. Unlike removing from the head, we need not worry about
* breaking a bucket stride interval.
*
* If prevLsn is NULL_VLSN, we don't have a good value to cap the bucket.
* Instead, we'll have to delete the bucket back to whatever was the next
* available lsn. For example, suppose the bucket has these mappings. This
* strange bucket (stride 25 is missing) is possible if vlsn 26 arrived
* early, out of order.
*
* in fileOffset: 10 -> 101
* in fileOffset: 15 -> no offset
* in fileOffset: 20 -> 201
* lastVLSN->lastnLsn mapping 26 -> 250
*
* If we have a prevLsn and the startOfDelete is 17, then we can create
* a new mapping
* in fileOffset: 10 -> 101
* in fileOffset: 15 -> no offset
* lastVLSN->lastnLsn mapping 17 -> 190
*
* If we don't have a prevLsn, then we know that we have to cut the bucket
* back to the largest known mapping, losing many mappings along the way.
* in fileOffset: 10 -> 101
* lastVLSN->lastnLsn mapping 10 -> 101
*
* If we are deleting in the vlsn area between the last stride and the
* last offset, (i.e. vlsn 23 is the startOfDelete) the with and without
* prevLSn cases would look like this:
*
* (there is a prevLsn, and 23 is startDelete. No need to truncate
* anything)
* in fileOffset: 10 -> 101
* in fileOffset: 15 -> no offset
* in fileOffset: 20 -> 201
* lastVLSN->lastnLsn mapping 23 -> prevLsn
*
* (there is no prevLsn, and 23 is startDelete)
* in fileOffset: 10 -> 101
* in fileOffset: 15 -> no offset
* in fileOffset: 20 -> 201
* lastVLSN->lastnLsn mapping 20 -> 201
*
* @param startOfDelete is the VLSN that begins the range to delete,
* inclusive
* @param prevLsn is the lsn of startOfDelete.getPrev(). We'll be using it
* to cap off the end of the bucket, by assigning it to the lastLsn field.
*/
void removeFromTail(VLSN startOfDelete, long prevLsn) {
if (empty()) {
return;
}
if (lastVLSN.compareTo(startOfDelete) < 0) {
return;
}
/* Delete all the mappings. */
if (firstVLSN.compareTo(startOfDelete) >= 0) {
lastVLSN = firstVLSN;
lastLsn = DbLsn.NULL_LSN;
fileOffsets.clear();
return;
}
/* Delete some of the mappings. */
int deleteIndex = getGTEIndex(startOfDelete);
/*
* This should never happen, because the startOfDelete should be a vlsn
* that is >= the first vlsn and we handled the case where
* startOfDelete == firstVLSN already.) Throw this exception to make
* debugging info available.
*/
if (deleteIndex <= 0) {
throw EnvironmentFailureException.unexpectedState
("deleteIndex=" + deleteIndex +
" startOfDelete=" + startOfDelete +
" bucket=" + this);
}
/* See if there are any fileoffsets to prune off. */
if (deleteIndex < fileOffsets.size()) {
/*
* The startOfDeleteVLSN is a value between the firstVLSN and
* the last file offset.
*/
if (prevLsn == DbLsn.NULL_LSN) {
int lastPopulatedIndex =
findPopulatedIndex(deleteIndex-1, false);
if (lastPopulatedIndex != (deleteIndex -1)) {
deleteIndex = lastPopulatedIndex + 1;
}
}
fileOffsets.truncate(deleteIndex);
} else {
/*
* The startOfDelete vlsn is somewhere between the last file offset
* and the lastVLSN.
*/
if (prevLsn == DbLsn.NULL_LSN) {
int lastIndex = fileOffsets.size() - 1;
int lastPopulatedIndex = findPopulatedIndex(lastIndex, false);
if (lastPopulatedIndex < lastIndex) {
fileOffsets.truncate(lastPopulatedIndex);
}
}
}
/* Now set the lastVLSN -> lastLSN mapping. */
if (prevLsn == DbLsn.NULL_LSN) {
lastVLSN = new VLSN(((fileOffsets.size()-1) * stride) +
firstVLSN.getSequence());
Integer lastOffset = fileOffsets.get(fileOffsets.size() - 1);
assert lastOffset != null;
lastLsn = DbLsn.makeLsn(fileNumber, lastOffset);
} else {
lastVLSN = startOfDelete.getPrev();
lastLsn = prevLsn;
}
dirty = true;
}
/* For unit tests */
int getNumOffsets() {
return fileOffsets.size();
}
void close() {
closed = true;
}
/**
* Write this bucket to the mapping database.
*/
void writeToDatabase(EnvironmentImpl envImpl,
DatabaseImpl bucketDbImpl,
Txn txn) {
if (!dirty) {
return;
}
Cursor c = null;
try {
c = DbInternal.makeCursor(bucketDbImpl,
txn,
CursorConfig.DEFAULT);
writeToDatabase(envImpl, c);
} finally {
if (c != null) {
c.close();
}
}
}
/**
* Write this bucket to the mapping database using a cursor. Note that
* this method must disable critical eviction. Critical eviction makes the
* calling thread search for a target IN node to evict. That target IN node
* may or may not be in the internal VLSN db.
*
* For example, when a new, replicated LN is inserted or modified, a
* new VLSN is allocated. To do so, the app thread that is executing the
* operation
* A1. Takes a BIN latch on a BIN in a replicated db
* A2. Takes the VLSNINdex mutex
*
* Anyone calling writeDatabase() has to take these steps:
* B1. Take the VLSNIndex mutex
* B2. Get a BIN latch for a BIN in the internal vlsn db.
*
* This difference in locking hierarchy could cause a deadlock except for
* the fact that A1 and B2 are guaranteed to be in different databases. If
* writeDatabase() also did critical eviction, it would have a step where
* it tried to get a BIN latch on a replicated db, and we'd have a
* deadlock. [#18475]
*/
void writeToDatabase(EnvironmentImpl envImpl, Cursor cursor) {
if (!dirty) {
return;
}
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
LongBinding.longToEntry(firstVLSN.getSequence(), key);
VLSNBucketBinding bucketBinding = new VLSNBucketBinding();
bucketBinding.objectToEntry(this, data);
DbInternal.getCursorImpl(cursor).setAllowEviction(false);
OperationStatus status = cursor.put(key, data);
if (status != OperationStatus.SUCCESS) {
throw EnvironmentFailureException.unexpectedState
(envImpl, "Unable to write VLSNBucket for file " +
fileNumber + " status=" + status);
}
dirty = false;
}
/**
* Instantiate this from the database. Assumes that this bucket will not be
* used for insertion in the future.
*/
public static VLSNBucket readFromDatabase(DatabaseEntry data) {
VLSNBucketBinding mapperBinding = new VLSNBucketBinding();
VLSNBucket bucket = mapperBinding.entryToObject(data);
return bucket;
}
void fillDataEntry(DatabaseEntry data) {
VLSNBucketBinding binding = new VLSNBucketBinding();
binding.objectToEntry(this, data);
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return String.format("",
fileNumber, fileNumber,
(fileOffsets == null) ? 0 : fileOffsets.size(),
stride, firstVLSN, lastVLSN,
DbLsn.getNoFormatString(lastLsn));
}
/**
* For debugging and tracing.
*/
public void dump(PrintStream out) {
if (fileOffsets == null) {
return;
}
long vlsnVal = firstVLSN.getSequence();
int newlineCounter = 0;
for (Integer offset : fileOffsets) {
out.printf(" [%d 0x%x]", vlsnVal,
DbLsn.convertIntFileOffsetToLong(offset));
vlsnVal += stride;
if (++newlineCounter > 6) {
out.println("\n");
newlineCounter = 0;
}
}
out.printf("\n---------Last: VLSN=%s LSN=%s", lastVLSN,
DbLsn.getNoFormatString(lastLsn));
}
boolean isGhost() {
return false;
}
void writeToTupleOutput(TupleOutput to) {
to.writePackedLong(fileNumber);
to.writePackedInt(stride);
to.writePackedLong(firstVLSN.getSequence());
to.writePackedLong(lastVLSN.getSequence());
to.writePackedLong(lastLsn);
to.writePackedInt(fileOffsets.size());
for (Integer offset: fileOffsets) {
to.writeUnsignedInt(DbLsn.convertIntFileOffsetToLong(offset));
}
}
/**
* Marshals a VLSNBucket to a byte buffer to store in the database.
* Doesn't persist the file number, because that's the key of the database.
* A number of the fields are transient and are also not stored.
*/
private static class VLSNBucketBinding extends TupleBinding {
@Override
public VLSNBucket entryToObject(TupleInput ti) {
int onDiskVersion = ti.readPackedInt();
if (onDiskVersion != VLSNBucket.VERSION) {
throw EnvironmentFailureException.unexpectedState
("Don't expect version diff on_disk=" + onDiskVersion +
" source=" + VLSNBucket.VERSION);
}
boolean isGhost = ti.readBoolean();
VLSNBucket bucket = null;
if (isGhost) {
bucket = GhostBucket.makeNewInstance(ti);
} else {
bucket = new VLSNBucket(ti);
}
return bucket;
}
@Override
public void objectToEntry(VLSNBucket bucket, TupleOutput to) {
to.writePackedInt(VLSNBucket.VERSION);
to.writeBoolean(bucket.isGhost());
bucket.writeToTupleOutput(to);
}
}
/**
* Scan a specific section of log and generate a new VLSNBucket for
* this section.
*/
private static class NewBucketReader extends FileReader {
private final VLSNBucket remainderBucket;
private boolean done = false;
private final VLSN first;
private final VLSN last;
public NewBucketReader(VLSNBucket remainderBucket,
EnvironmentImpl envImpl,
int readBufferSize,
VLSN first,
VLSN last,
long startLsn) {
super(envImpl,
readBufferSize,
true, // forward
startLsn,
null, // singleFileNumber
DbLsn.NULL_LSN, // endOfFileLsn
DbLsn.NULL_LSN); // finishLsn
this.remainderBucket = remainderBucket;
this.first = first;
this.last = last;
}
/**
* Return true if this entry is replicated and its VLSN >= the
* firstVLSN and the entry is not invisible. These entries will
* be used to bring the VLSNIndex up to speed.
*/
@Override
protected boolean isTargetEntry() {
return (!currentEntryHeader.isInvisible() &&
entryIsReplicated() &&
(currentEntryHeader.getVLSN().compareTo(first) >= 0));
}
@Override
protected boolean processEntry(ByteBuffer entryBuffer) {
if (currentEntryHeader.getVLSN().compareTo(last) > 0) {
done = true;
} else {
remainderBucket.put(currentEntryHeader.getVLSN(),
getLastLsn());
}
entryBuffer.position(entryBuffer.position() +
currentEntryHeader.getItemSize());
return true;
}
boolean isDone() {
return done;
}
}
@SuppressWarnings("serial")
private static class TruncateableList extends ArrayList {
TruncateableList() {
super();
}
TruncateableList(int capacity) {
super(capacity);
}
TruncateableList(List list) {
super(list);
}
void truncate(int fromIndex) {
removeRange(fromIndex, size());
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy