org.elasticsearch.index.seqno.LocalCheckpointTracker Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.seqno;
import com.carrotsearch.hppc.LongObjectHashMap;
import org.elasticsearch.common.SuppressForbidden;
import java.util.concurrent.atomic.AtomicLong;
/**
* This class generates sequences numbers and keeps track of the so-called "local checkpoint" which is the highest number for which all
* previous sequence numbers have been processed (inclusive).
*/
public class LocalCheckpointTracker {
/**
* We keep a bit for each sequence number that is still pending. To optimize allocation, we do so in multiple sets allocating them on
* demand and cleaning up while completed. This constant controls the size of the sets.
*/
static final short BIT_SET_SIZE = 1024;
/**
* A collection of bit sets representing processed sequence numbers. Each sequence number is mapped to a bit set by dividing by the
* bit set size.
*/
final LongObjectHashMap processedSeqNo = new LongObjectHashMap<>();
/**
* A collection of bit sets representing durably persisted sequence numbers. Each sequence number is mapped to a bit set by dividing by
* the bit set size.
*/
final LongObjectHashMap persistedSeqNo = new LongObjectHashMap<>();
/**
* The current local checkpoint, i.e., all sequence numbers no more than this number have been processed.
*/
final AtomicLong processedCheckpoint = new AtomicLong();
/**
* The current persisted local checkpoint, i.e., all sequence numbers no more than this number have been durably persisted.
*/
final AtomicLong persistedCheckpoint = new AtomicLong();
/**
* The next available sequence number.
*/
final AtomicLong nextSeqNo = new AtomicLong();
/**
* Initialize the local checkpoint service. The {@code maxSeqNo} should be set to the last sequence number assigned, or
* {@link SequenceNumbers#NO_OPS_PERFORMED} and {@code localCheckpoint} should be set to the last known local checkpoint,
* or {@link SequenceNumbers#NO_OPS_PERFORMED}.
*
* @param maxSeqNo the last sequence number assigned, or {@link SequenceNumbers#NO_OPS_PERFORMED}
* @param localCheckpoint the last known local checkpoint, or {@link SequenceNumbers#NO_OPS_PERFORMED}
*/
public LocalCheckpointTracker(final long maxSeqNo, final long localCheckpoint) {
if (localCheckpoint < 0 && localCheckpoint != SequenceNumbers.NO_OPS_PERFORMED) {
throw new IllegalArgumentException(
"local checkpoint must be non-negative or [" + SequenceNumbers.NO_OPS_PERFORMED + "] "
+ "but was [" + localCheckpoint + "]");
}
if (maxSeqNo < 0 && maxSeqNo != SequenceNumbers.NO_OPS_PERFORMED) {
throw new IllegalArgumentException(
"max seq. no. must be non-negative or [" + SequenceNumbers.NO_OPS_PERFORMED + "] but was [" + maxSeqNo + "]");
}
nextSeqNo.set(maxSeqNo + 1);
processedCheckpoint.set(localCheckpoint);
persistedCheckpoint.set(localCheckpoint);
}
/**
* Issue the next sequence number.
*
* @return the next assigned sequence number
*/
public long generateSeqNo() {
return nextSeqNo.getAndIncrement();
}
/**
* Marks the provided sequence number as seen and updates the max_seq_no if needed.
*/
public void advanceMaxSeqNo(final long seqNo) {
nextSeqNo.accumulateAndGet(seqNo + 1, Math::max);
}
/**
* Marks the provided sequence number as processed and updates the processed checkpoint if possible.
*
* @param seqNo the sequence number to mark as processed
*/
public synchronized void markSeqNoAsProcessed(final long seqNo) {
markSeqNo(seqNo, processedCheckpoint, processedSeqNo);
}
/**
* Marks the provided sequence number as persisted and updates the checkpoint if possible.
*
* @param seqNo the sequence number to mark as persisted
*/
public synchronized void markSeqNoAsPersisted(final long seqNo) {
markSeqNo(seqNo, persistedCheckpoint, persistedSeqNo);
}
private void markSeqNo(final long seqNo, final AtomicLong checkPoint, final LongObjectHashMap bitSetMap) {
assert Thread.holdsLock(this);
// make sure we track highest seen sequence number
advanceMaxSeqNo(seqNo);
if (seqNo <= checkPoint.get()) {
// this is possible during recovery where we might replay an operation that was also replicated
return;
}
final CountedBitSet bitSet = getBitSetForSeqNo(bitSetMap, seqNo);
final int offset = seqNoToBitSetOffset(seqNo);
bitSet.set(offset);
if (seqNo == checkPoint.get() + 1) {
updateCheckpoint(checkPoint, bitSetMap);
}
}
/**
* The current checkpoint which can be advanced by {@link #markSeqNoAsProcessed(long)}.
*
* @return the current checkpoint
*/
public long getProcessedCheckpoint() {
return processedCheckpoint.get();
}
/**
* The current persisted checkpoint which can be advanced by {@link #markSeqNoAsPersisted(long)}.
*
* @return the current persisted checkpoint
*/
public long getPersistedCheckpoint() {
return persistedCheckpoint.get();
}
/**
* The maximum sequence number issued so far.
*
* @return the maximum sequence number
*/
public long getMaxSeqNo() {
return nextSeqNo.get() - 1;
}
/**
* constructs a {@link SeqNoStats} object, using local state and the supplied global checkpoint
*
* This is needed to make sure the persisted local checkpoint and max seq no are consistent
*/
public synchronized SeqNoStats getStats(final long globalCheckpoint) {
return new SeqNoStats(getMaxSeqNo(), getPersistedCheckpoint(), globalCheckpoint);
}
/**
* Waits for all operations up to the provided sequence number to complete.
*
* @param seqNo the sequence number that the checkpoint must advance to before this method returns
* @throws InterruptedException if the thread was interrupted while blocking on the condition
*/
@SuppressForbidden(reason = "Object#wait")
public synchronized void waitForProcessedOpsToComplete(final long seqNo) throws InterruptedException {
while (processedCheckpoint.get() < seqNo) {
// notified by updateCheckpoint
this.wait();
}
}
/**
* Checks if the given sequence number was marked as processed in this tracker.
*/
public boolean hasProcessed(final long seqNo) {
assert seqNo >= 0 : "invalid seq_no=" + seqNo;
if (seqNo >= nextSeqNo.get()) {
return false;
}
if (seqNo <= processedCheckpoint.get()) {
return true;
}
final long bitSetKey = getBitSetKey(seqNo);
final int bitSetOffset = seqNoToBitSetOffset(seqNo);
synchronized (this) {
// check again under lock
if (seqNo <= processedCheckpoint.get()) {
return true;
}
final CountedBitSet bitSet = processedSeqNo.get(bitSetKey);
return bitSet != null && bitSet.get(bitSetOffset);
}
}
/**
* Moves the checkpoint to the last consecutively processed sequence number. This method assumes that the sequence number
* following the current checkpoint is processed.
*/
@SuppressForbidden(reason = "Object#notifyAll")
private void updateCheckpoint(AtomicLong checkPoint, LongObjectHashMap bitSetMap) {
assert Thread.holdsLock(this);
assert getBitSetForSeqNo(bitSetMap, checkPoint.get() + 1).get(seqNoToBitSetOffset(checkPoint.get() + 1)) :
"updateCheckpoint is called but the bit following the checkpoint is not set";
try {
// keep it simple for now, get the checkpoint one by one; in the future we can optimize and read words
long bitSetKey = getBitSetKey(checkPoint.get());
CountedBitSet current = bitSetMap.get(bitSetKey);
if (current == null) {
// the bit set corresponding to the checkpoint has already been removed, set ourselves up for the next bit set
assert checkPoint.get() % BIT_SET_SIZE == BIT_SET_SIZE - 1;
current = bitSetMap.get(++bitSetKey);
}
do {
checkPoint.incrementAndGet();
/*
* The checkpoint always falls in the current bit set or we have already cleaned it; if it falls on the last bit of the
* current bit set, we can clean it.
*/
if (checkPoint.get() == lastSeqNoInBitSet(bitSetKey)) {
assert current != null;
final CountedBitSet removed = bitSetMap.remove(bitSetKey);
assert removed == current;
current = bitSetMap.get(++bitSetKey);
}
} while (current != null && current.get(seqNoToBitSetOffset(checkPoint.get() + 1)));
} finally {
// notifies waiters in waitForProcessedOpsToComplete
this.notifyAll();
}
}
private static long lastSeqNoInBitSet(final long bitSetKey) {
return (1 + bitSetKey) * BIT_SET_SIZE - 1;
}
/**
* Return the bit set for the provided sequence number, possibly allocating a new set if needed.
*
* @param seqNo the sequence number to obtain the bit set for
* @return the bit set corresponding to the provided sequence number
*/
private static long getBitSetKey(final long seqNo) {
return seqNo / BIT_SET_SIZE;
}
private CountedBitSet getBitSetForSeqNo(final LongObjectHashMap bitSetMap, final long seqNo) {
assert Thread.holdsLock(this);
final long bitSetKey = getBitSetKey(seqNo);
final int index = bitSetMap.indexOf(bitSetKey);
final CountedBitSet bitSet;
if (bitSetMap.indexExists(index)) {
bitSet = bitSetMap.indexGet(index);
} else {
bitSet = new CountedBitSet(BIT_SET_SIZE);
bitSetMap.indexInsert(index, bitSetKey, bitSet);
}
return bitSet;
}
/**
* Obtain the position in the bit set corresponding to the provided sequence number. The bit set corresponding to the sequence number
* can be obtained via {@link #getBitSetForSeqNo(LongObjectHashMap, long)}.
*
* @param seqNo the sequence number to obtain the position for
* @return the position in the bit set corresponding to the provided sequence number
*/
private static int seqNoToBitSetOffset(final long seqNo) {
return Math.toIntExact(seqNo % BIT_SET_SIZE);
}
}