com.yahoo.oak.Chunk Maven / Gradle / Ivy
/*
* Copyright 2020, Verizon Media.
* Licensed under the terms of the Apache 2.0 license.
* Please see LICENSE file in the project root for terms.
*/
package com.yahoo.oak;
import sun.misc.Unsafe;
import java.util.EmptyStackException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicMarkableReference;
import java.util.concurrent.atomic.AtomicReference;
class Chunk {
static final int NONE_NEXT = 0; // an entry with NONE_NEXT as its next pointer, points to a null entry
/*-------------- Constants --------------*/
enum State {
INFANT,
NORMAL,
FROZEN,
RELEASED
}
// used for checking if rebalance is needed
private static final double REBALANCE_PROB_PERC = 30;
private static final double SORTED_REBALANCE_RATIO = 2;
private static final double MAX_ENTRIES_FACTOR = 2;
private static final double MAX_IDLE_ENTRIES_FACTOR = 5;
private static final int INVALID_ANCHOR_INDEX = -1;
// defaults
public static final int MAX_ITEMS_DEFAULT = 4096;
/*-------------- Members --------------*/
private static final Unsafe UNSAFE = UnsafeUtils.unsafe;
KeyBuffer minKey; // minimal key that can be put in this chunk
AtomicMarkableReference> next;
OakComparator comparator;
// in split/compact process, represents parent of split (can be null!)
private final AtomicReference> creator;
// chunk can be in the following states: normal, frozen or infant(has a creator)
private final AtomicReference state;
private final AtomicReference> rebalancer;
private final EntrySet entrySet;
private final AtomicInteger pendingOps;
private final Statistics statistics;
// # of sorted items at entry-array's beginning (resulting from split)
private final AtomicInteger sortedCount;
private final int maxItems;
AtomicInteger externalSize; // for updating oak's size (reference to one global per Oak size)
/*-------------- Constructors --------------*/
/**
* This constructor is only used internally to instantiate a Chunk without a creator and a min-key.
* The caller should set the creator and min-key before returning the Chunk to the user.
*/
private Chunk(int maxItems, AtomicInteger externalSize, MemoryManager memoryManager, OakComparator comparator,
OakSerializer keySerializer, OakSerializer valueSerializer, ValueUtils valueOperator) {
this.maxItems = maxItems;
this.externalSize = externalSize;
this.comparator = comparator;
this.entrySet = new EntrySet<>(memoryManager, maxItems, keySerializer, valueSerializer, valueOperator);
// if not zero, sorted count keeps the entry index of the last
// subsequent and ordered entry in the entries array
this.sortedCount = new AtomicInteger(0);
this.minKey = new KeyBuffer();
this.creator = new AtomicReference<>(null);
this.state = new AtomicReference<>(State.NORMAL);
this.next = new AtomicMarkableReference<>(null, false);
this.pendingOps = new AtomicInteger();
this.rebalancer = new AtomicReference<>(null); // to be updated on rebalance
this.statistics = new Statistics();
}
/**
* This constructor is only used when creating the first chunk (without a creator).
*/
Chunk(K minKey, int maxItems, AtomicInteger externalSize, MemoryManager memoryManager, OakComparator comparator,
OakSerializer keySerializer, OakSerializer valueSerializer, ValueUtils valueOperator) {
this(maxItems, externalSize, memoryManager, comparator, keySerializer, valueSerializer, valueOperator);
entrySet.allocateKey(minKey, this.minKey);
}
/**
* Create a child Chunk where this Chunk object as its creator.
* The child Chunk will have the same minKey as this Chunk (without duplicating the KeyBuffer data).
*/
Chunk createFirstChild() {
Chunk child = new Chunk<>(maxItems, externalSize, entrySet.memoryManager, comparator,
entrySet.keySerializer, entrySet.valueSerializer, entrySet.valOffHeapOperator);
child.creator.set(this);
child.state.set(State.INFANT);
child.minKey.copyFrom(this.minKey);
return child;
}
/**
* Create a child Chunk where this Chunk object as its creator.
* The child Chunk will use a duplicate minKey of the input (allocates a new buffer).
*/
Chunk createNextChild(KeyBuffer minKey) {
Chunk child = new Chunk<>(maxItems, externalSize, entrySet.memoryManager, comparator,
entrySet.keySerializer, entrySet.valueSerializer, entrySet.valOffHeapOperator);
child.creator.set(this);
child.state.set(State.INFANT);
entrySet.duplicateKey(minKey, child.minKey);
return child;
}
/********************************************************************************************/
/*----------------------------- Wrappers for EntrySet methods -----------------------------*/
/**
* See {@code EntrySet.isValueRefValid(int)} for more information
*/
boolean isValueRefValid(int ei) {
return entrySet.isValueRefValid(ei);
}
/**
* See {@code EntrySet.readKey(ThreadContext)} for more information
*/
void readKeyFromEntryIndex(ThreadContext ctx) {
entrySet.readKey(ctx);
}
/**
* See {@code EntrySet.readValue(ThreadContext)} for more information
*/
void readValueFromEntryIndex(ThreadContext ctx) {
entrySet.readValue(ctx);
}
/**
* See {@code EntrySet.readKey(KeyBuffer)} for more information
*/
boolean readKeyFromEntryIndex(KeyBuffer key, int ei) {
return entrySet.readKey(key, ei);
}
/**
* See {@code EntrySet.readValue(ValueBuffer)} for more information
*/
boolean readValueFromEntryIndex(ValueBuffer value, int ei) {
return entrySet.readValue(value, ei);
}
/**
* See {@code EntrySet.readValueNoVersion(ValueBuffer)} for more information
*/
boolean readValueNoVersionFromEntryIndex(ValueBuffer value, int ei) {
return entrySet.readValueNoVersion(value, ei);
}
/**
* See {@code EntrySet.allocateEntry(ThreadContext)} for more information
*/
boolean allocateEntryAndKey(ThreadContext ctx, K key) {
return entrySet.allocateEntry(ctx, key);
}
/**
* See {@code EntrySet.writeValueStart(ThreadContext)} for more information
*/
void writeValue(ThreadContext ctx, V value, boolean writeForMove) {
entrySet.writeValueStart(ctx, value, writeForMove);
}
/**
* See {@code EntrySet.releaseKey(ThreadContext)} for more information
*/
void releaseKey(ThreadContext ctx) {
entrySet.releaseKey(ctx);
}
/**
* See {@code EntrySet.releaseNewValue(ThreadContext)} for more information
*/
void releaseNewValue(ThreadContext ctx) {
entrySet.releaseNewValue(ctx);
}
/**
* @param key a key buffer to be updated with the minimal key
* @return true if successful
*/
boolean readMinKey(KeyBuffer key) {
return entrySet.readKey(key, entrySet.getHeadNextIndex());
}
/**
* @param key a key buffer to be updated with the maximal key
* @return true if successful
*/
boolean readMaxKey(KeyBuffer key) {
return entrySet.readKey(key, getLastItemEntryIndex());
}
/**
* @return the index of the first item in the chunk
* See {@code EntrySet.getHeadNextIndex} for more information.
*/
final int getFirstItemEntryIndex() {
return entrySet.getHeadNextIndex();
}
/**
* Finds the last sorted entry.
*
* @return the last sorted entry
*/
private int getLastItemEntryIndex() {
int sortedCount = this.sortedCount.get();
int entryIndex = sortedCount == 0 ? entrySet.getHeadNextIndex() : sortedCount;
int nextEntryIndex = entrySet.getNextEntryIndex(entryIndex);
while (nextEntryIndex != NONE_NEXT) {
entryIndex = nextEntryIndex;
nextEntryIndex = entrySet.getNextEntryIndex(entryIndex);
}
return entryIndex;
}
/********************************************************************************************/
/*----------------------- Methods for looking up item in this chunk -----------------------*/
/**
* Compare a key with a serialized key that is pointed by a specific entry index
*
* @param tempKeyBuff a reusable buffer object for internal temporary usage
* As a side effect, this buffer will contain the compared
* serialized key.
* @param key the key to compare
* @param ei the entry index to compare with
* @return the comparison result
*/
int compareKeyAndEntryIndex(KeyBuffer tempKeyBuff, K key, int ei) {
boolean isAllocated = entrySet.readKey(tempKeyBuff, ei);
assert isAllocated;
return comparator.compareKeyAndSerializedKey(key, tempKeyBuff);
}
/**
* Look up a key in this chunk.
*
* @param ctx The context that follows the operation following this key look up.
* It will describe the state of the entry (key and value) associated with the input {@code key}.
* Following are the possible states of the entry:
* (1) {@code key} was not found.
* This means there is no entry with the this key in this chunk.
* In this case, {@code ctx.isKeyValid() == False} and {@code ctx.isValueValid() == False}.
* (2) {@code key} was found.
* In this case, {@code (ctx.isKeyValid() == True}
* The state of the value associated with {@code key} is described in {@code ctx.valueState}.
* It can be one of the following states:
* (1) not yet inserted, (2) in the process of being inserted, (3) valid,
* (4) in the process of being deleted, (5) deleted.
* For cases (2) and (3), {@code ctx.isValueValid() == True}.
* Otherwise, {@code ctx.isValueValid() == False}.
* This means that there is an entry with that key, but there is no value attached to this key.
* Such entry can be reused after finishing the deletion process, if needed.
* @param key the key to look up
*/
void lookUp(ThreadContext ctx, K key) {
// binary search sorted part of key array to quickly find node to start search at
// it finds previous-to-key
int curr = binaryFind(ctx.tempKey, key);
curr = (curr == NONE_NEXT) ? entrySet.getHeadNextIndex() : entrySet.getNextEntryIndex(curr);
// iterate until end of list (or key is found)
while (curr != NONE_NEXT) {
// compare current item's key to searched key
int cmp = compareKeyAndEntryIndex(ctx.key, key, curr);
// if item's key is larger - we've exceeded our key
// it's not in chunk - no need to search further
if (cmp < 0) {
// Reset entry context to be INVALID
ctx.invalidate();
return;
} else if (cmp == 0) { // if keys are equal - we've found the item
// Updates the entry's context
// ctx.key was already updated as a side effect of compareKeyAndEntryIndex()
ctx.entryIndex = curr;
entrySet.readValue(ctx);
return;
}
// otherwise- proceed to next item
curr = entrySet.getNextEntryIndex(curr);
}
// Reset entry context to be INVALID
ctx.invalidate();
}
/**
* binary search for largest-entry smaller than 'key' in sorted part of key array.
*
* @param tempKey a reusable buffer object for internal temporary usage
* @param key the key to look up
* @return the index of the entry from which to start a linear search -
* if key is found, its previous entry is returned!
* In cases when search from the head is needed, meaning:
* (1) the given key is less or equal than the smallest key in the chunk OR
* (2) entries are unsorted so there is a need to start from the beginning of the linked list
* NONE_NEXT is going to be returned
*/
private int binaryFind(KeyBuffer tempKey, K key) {
int sortedCount = this.sortedCount.get();
// if there are no sorted keys, return NONE_NEXT to indicate that a regular linear search is needed
if (sortedCount == 0) {
return NONE_NEXT;
}
// if the first item is already larger than key,
// return NONE_NEXT to indicate that a regular linear search is needed
if (compareKeyAndEntryIndex(tempKey, key, entrySet.getHeadNextIndex()) <= 0) {
return NONE_NEXT;
}
// optimization: compare with last key to avoid binary search (here sortedCount is not zero)
if (compareKeyAndEntryIndex(tempKey, key, sortedCount) > 0) {
return sortedCount;
}
int start = 0;
int end = sortedCount;
while (end - start > 1) {
int curr = start + (end - start) / 2;
if (compareKeyAndEntryIndex(tempKey, key, curr) <= 0) {
end = curr;
} else {
start = curr;
}
}
return start;
}
/********************************************************************************************/
/*---------- Methods for managing the put/remove path of the keys and values --------------*/
/**
* publish operation into thread array
* if CAS didn't succeed then this means that a rebalancer got here first and entry is frozen
*
* @return result of CAS
**/
boolean publish() {
pendingOps.incrementAndGet();
State currentState = state.get();
if (currentState == State.FROZEN || currentState == State.RELEASED) {
pendingOps.decrementAndGet();
return false;
}
return true;
}
/**
* unpublish operation from thread array
* if CAS didn't succeed then this means that a rebalancer did this already
**/
void unpublish() {
pendingOps.decrementAndGet();
}
/**
* This function completes the insertion of a value to Entry. When inserting a
* value, the value reference is CASed inside the entry first and only afterwards the version is
* CASed. Thus, there can be a time in which the value reference is valid but the version is
* INVALID_VERSION or a negative one. In this function, the version is CASed to complete the
* insertion.
*
* The version written to entry is the version written in the off-heap memory. There is no worry
* of concurrent removals since these removals will have to first call this function as well,
* and they eventually change the version as well.
*
* This method expects the value buffer to be valid, the valueState to be VALID_INSERT_NOT_FINALIZED, and the
* version to be positive.
* If the context that not match these requirements, its behavior is undefined.
*
* @param ctx The context that follows the operation since the key was found/created.
* It holds the entry to CAS, the value version written in this entry and the
* value reference from which the correct version can be read.
* @return a version is returned.
*
* Note: the value's version and state in {@code ctx} are updated in this method to be the
* updated positive version and a valid state.
*/
int completeLinking(ThreadContext ctx) {
if (ctx.valueState != EntrySet.ValueState.VALID_INSERT_NOT_FINALIZED) {
// the version written in the value is a good one!
return ctx.value.getVersion();
}
if (!publish()) {
return EntrySet.INVALID_VERSION;
}
try {
entrySet.writeValueFinish(ctx);
} finally {
unpublish();
}
return ctx.value.getVersion();
}
/**
* As written in {@code writeValueFinish(ctx)}, when changing an entry, the value reference is CASed first and
* later the value version, and the same applies when removing a value. However, there is another step before
* changing an entry to remove a value and it is marking the value off-heap (the LP). This function is used to
* first CAS the value reference to {@code INVALID_VALUE_REFERENCE} and then CAS the version to be a negative one.
* Other threads seeing a marked value call this function before they proceed (e.g., before performing a
* successful {@code putIfAbsent()}).
*
* @param ctx The context that follows the operation since the key was found/created.
* Holds the entry to change, the old value reference to CAS out, and the current value version.
* @return true if a rebalance is needed
*/
boolean finalizeDeletion(ThreadContext ctx) {
if (ctx.valueState != EntrySet.ValueState.DELETED_NOT_FINALIZED) {
return false;
}
if (!publish()) {
return true;
}
try {
if (!entrySet.deleteValueFinish(ctx)) {
return false;
}
externalSize.decrementAndGet();
statistics.decrementAddedCount();
return false;
} finally {
unpublish();
}
}
/**
* @param ctx the context that follows the operation since the key was found/created
* @param key the key to link
* @return The previous entry index if the key was already added by another thread.
* Otherwise, if successful, it will return the current entry index.
*/
int linkEntry(ThreadContext ctx, K key) {
int prev;
int curr;
int cmp;
int anchor = INVALID_ANCHOR_INDEX;
final int ei = ctx.entryIndex;
final KeyBuffer tempKeyBuff = ctx.tempKey;
while (true) {
// start iterating from quickly-found node (by binary search) in sorted part of order-array
if (anchor == INVALID_ANCHOR_INDEX) {
anchor = binaryFind(tempKeyBuff, key);
}
if (anchor == NONE_NEXT) {
prev = NONE_NEXT;
curr = entrySet.getHeadNextIndex();
} else {
prev = anchor;
curr = entrySet.getNextEntryIndex(anchor); // index of next item in list
}
//TODO: use ctx and location window inside ctx (when key wasn't found),
//TODO: so there us no need to iterate again in linkEntry
// iterate items until key's position is found
while (true) {
// if no item, done searching - add to end of list
if (curr == NONE_NEXT) {
break;
}
// compare current item's key to ours
cmp = compareKeyAndEntryIndex(tempKeyBuff, key, curr);
// if current item's key is larger, done searching - add between prev and curr
if (cmp < 0) {
break;
}
// if same key, someone else managed to add the key to the linked list
if (cmp == 0) {
return curr;
}
prev = curr;
curr = entrySet.getNextEntryIndex(prev); // index of next item in list
}
// link to list between curr and previous, first change this entry's next to point to curr
// no need for CAS since put is not even published yet
entrySet.setNextEntryIndex(ei, curr);
if (entrySet.casNextEntryIndex(prev, curr, ei)) {
// Here is the single place where we do enter a new entry to the chunk, meaning
// there is none else who can simultaneously insert the same key
// (we were the first to insert this key).
// If the new entry's index is exactly after the sorted count and
// the entry's key is greater or equal then to the previous (sorted count)
// index key. Then increase the sorted count.
int sortedCount = this.sortedCount.get();
if (sortedCount > 0) {
if (ei == (sortedCount + 1)) { // first entry has entry index 1, not 0
// the new entry's index is exactly after the sorted count
if (compareKeyAndEntryIndex(tempKeyBuff, key, sortedCount) >= 0) {
// compare with sorted count key, if inserting the "if-statement",
// the sorted count key is less or equal to the key just inserted
this.sortedCount.compareAndSet(sortedCount, (sortedCount + 1));
}
}
}
return ei;
}
// CAS didn't succeed, try again
}
}
/**
* This function does the physical CAS of the value reference, which is the LP of the insertion. It then tries to
* complete the insertion @see writeValueFinish(ctx).
* This is also the only place in which the size of Oak is updated.
*
* @param ctx The context that follows the operation since the key was found/created.
* Holds the entry to which the value reference is linked, the old and new value references and
* the old and new value versions.
* @return true if the value reference was CASed successfully.
*/
ValueUtils.ValueResult linkValue(ThreadContext ctx) {
if (entrySet.writeValueCommit(ctx) == ValueUtils.ValueResult.FALSE) {
return ValueUtils.ValueResult.FALSE;
}
// If we move a value, the statistics shouldn't change
if (!ctx.isNewValueForMove) {
statistics.incrementAddedCount();
externalSize.incrementAndGet();
}
return ValueUtils.ValueResult.TRUE;
}
/********************************************************************************************/
/*------------------------- Methods that are used for rebalance ---------------------------*/
int getMaxItems() {
return maxItems;
}
/**
* Engage the chunk to a rebalancer r.
*
* @param r -- a rebalancer to engage with
*/
void engage(Rebalancer r) {
rebalancer.compareAndSet(null, r);
}
/**
* Checks whether the chunk is engaged with a given rebalancer.
*
* @param r -- a rebalancer object. If r is null, verifies that the chunk is not engaged to any rebalancer
* @return true if the chunk is engaged with r, false otherwise
*/
boolean isEngaged(Rebalancer r) {
return rebalancer.get() == r;
}
/**
* Fetch a rebalancer engaged with the chunk.
*
* @return rebalancer object or null if not engaged.
*/
Rebalancer getRebalancer() {
return rebalancer.get();
}
boolean shouldRebalance() {
// perform actual check only in pre defined percentage of puts
if (ThreadLocalRandom.current().nextInt(100) > REBALANCE_PROB_PERC) {
return false;
}
// if another thread already runs rebalance -- skip it
if (!isEngaged(null)) {
return false;
}
int numOfEntries = entrySet.getNumOfEntries();
int numOfItems = statistics.getCompactedCount();
int sortedCount = this.sortedCount.get();
// Reasons for executing a rebalance:
// 1. There are no sorted keys and the total number of entries is above a certain threshold.
// 2. There are sorted keys, but the total number of unsorted keys is too big.
// 3. Out of the occupied entries, there are not enough actual items.
return (sortedCount == 0 && (numOfEntries * MAX_ENTRIES_FACTOR) > maxItems)
|| (sortedCount > 0 && (sortedCount * SORTED_REBALANCE_RATIO) < numOfEntries)
|| ((numOfEntries * MAX_IDLE_ENTRIES_FACTOR) > maxItems
&& (numOfItems * MAX_IDLE_ENTRIES_FACTOR) < numOfEntries);
}
/**
* Copies entries from srcChunk (starting srcEntryIdx) to this chunk,
* performing entries sorting on the fly (delete entries that are removed as well).
*
* @param tempValue a reusable buffer object for internal temporary usage
* @param srcChunk chunk to copy from
* @param srcEntryIdx start position for copying
* @param maxCapacity max number of entries "this" chunk can contain after copy
* @return entry index of next to the last copied entry (in the srcChunk),
* NONE_NEXT if all items were copied
*/
final int copyPartNoKeys(ValueBuffer tempValue, Chunk srcChunk, final int srcEntryIdx, int maxCapacity) {
if (srcEntryIdx == NONE_NEXT) {
return NONE_NEXT;
}
// use local variables and just set the atomic variables once at the end
int numOfEntries = entrySet.getNumOfEntries();
// next *free* index of this entries array
int sortedThisEntryIndex = numOfEntries + 1;
// check that we are not beyond allowed number of entries to copy from source chunk
if (numOfEntries >= maxCapacity) {
return srcEntryIdx;
}
// assuming that all chunks are bounded with same number of entries to hold
assert srcEntryIdx <= maxItems;
// set the next entry index (previous entry or head) from where we start to copy
// if sortedThisEntryIndex is one (first entry to be occupied on this chunk)
// we are exactly going to update the head (ei=0)
entrySet.setNextEntryIndex(sortedThisEntryIndex - 1, sortedThisEntryIndex);
// Here was the code that was trying to read entries from srcEntryIdx on the source chunk
// to see how much of them are subject for a copy, ordered and not deleted,
// so theoretically they can be copied with copy array. The code is removed, because anyway
// the long copy array doesn't happen since "next" needs to be updated separately.
// copy entry by entry traversing the source linked list
int curEntryIdx = srcEntryIdx;
while (entrySet.copyEntry(tempValue, srcChunk.entrySet, curEntryIdx)) {
// the source entry was either copied or disregarded as deleted
// anyway move to next source entry (according to the linked list)
curEntryIdx = srcChunk.entrySet.getNextEntryIndex(curEntryIdx);
// if entry was ignored as deleted (no change in this EntrySet num of entries), continue
if (numOfEntries == entrySet.getNumOfEntries()) {
continue;
}
// we indeed copied the entry, update the number of entries and the next pointer
numOfEntries++;
sortedThisEntryIndex++;
entrySet.setNextEntryIndex(sortedThisEntryIndex - 1, sortedThisEntryIndex);
// check that we are not beyond allowed number of entries to copy from source chunk
if (numOfEntries >= maxCapacity) {
break;
}
// is there something to copy on the source side?
if (curEntryIdx == NONE_NEXT) {
break;
}
}
// we have stopped the copy because (1) this entry set is full, OR (2) ended source entries,
// OR (3) we copied allowed number of entries
// the last next pointer was set to what is there in the source to copy, reset it to null
entrySet.setNextEntryIndex(sortedThisEntryIndex - 1, NONE_NEXT);
// sorted count keeps the number of sorted entries
sortedCount.set(numOfEntries);
statistics.updateInitialSortedCount(sortedCount.get());
return curEntryIdx; // if NONE_NEXT then we finished copying old chunk, else we reached max in new chunk
}
/********************************************************************************************/
/*----------------------- Methods for managing the chunk's state --------------------------*/
State state() {
return state.get();
}
Chunk creator() {
return creator.get();
}
private void setState(State state) {
this.state.set(state);
}
void normalize() {
state.compareAndSet(State.INFANT, State.NORMAL);
creator.set(null);
// using fence so other puts can continue working immediately on this chunk
Chunk.UNSAFE.storeFence();
}
/**
* freezes chunk so no more changes can be done to it (marks pending items as frozen)
*/
void freeze() {
setState(State.FROZEN); // prevent new puts to this chunk
while (pendingOps.get() != 0) {
assert Boolean.TRUE;
}
}
/**
* try to change the state from frozen to released
*/
void release() {
state.compareAndSet(State.FROZEN, State.RELEASED);
}
/**
* marks this chunk's next pointer so this chunk is marked as deleted
*
* @return the next chunk pointed to once marked (will not change)
*/
Chunk markAndGetNext() {
// new chunks are ready, we mark frozen chunk's next pointer so it won't change
// since next pointer can be changed by other split operations we need to do this in a loop - until we succeed
while (true) {
// if chunk is marked - that is ok and its next pointer will not be changed anymore
// return whatever chunk is set as next
if (next.isMarked()) {
return next.getReference();
} else { // otherwise try to mark it
// read chunk's current next
Chunk savedNext = next.getReference();
// try to mark next while keeping the same next chunk - using CAS
// if we succeeded then the next pointer we remembered is set and will not change - return it
if (next.compareAndSet(savedNext, savedNext, false, true)) {
return savedNext;
}
}
}
}
/*-------------- Iterators --------------*/
AscendingIter ascendingIter() {
return new AscendingIter();
}
AscendingIter ascendingIter(ThreadContext ctx, K from, boolean inclusive) {
return new AscendingIter(ctx, from, inclusive);
}
DescendingIter descendingIter(ThreadContext ctx) {
return new DescendingIter(ctx);
}
DescendingIter descendingIter(ThreadContext ctx, K from, boolean inclusive) {
return new DescendingIter(ctx, from, inclusive);
}
private int advanceNextIndex(final int entryIndex) {
int next = entryIndex;
while (next != NONE_NEXT && !entrySet.isValueRefValid(next)) {
next = entrySet.getNextEntryIndex(next);
}
return next;
}
interface ChunkIter {
boolean hasNext();
int next(ThreadContext ctx);
}
class AscendingIter implements ChunkIter {
private int next;
AscendingIter() {
next = entrySet.getHeadNextIndex();
next = advanceNextIndex(next);
}
AscendingIter(ThreadContext ctx, K from, boolean inclusive) {
KeyBuffer tempKeyBuff = ctx.tempKey;
next = binaryFind(tempKeyBuff, from);
next = (next == NONE_NEXT) ? entrySet.getHeadNextIndex() : entrySet.getNextEntryIndex(next);
int compare = -1;
if (next != NONE_NEXT) {
compare = compareKeyAndEntryIndex(tempKeyBuff, from, next);
}
while (next != NONE_NEXT &&
(compare > 0 || (compare >= 0 && !inclusive) || !entrySet.isValueRefValid(next))) {
next = entrySet.getNextEntryIndex(next);
if (next != NONE_NEXT) {
compare = compareKeyAndEntryIndex(tempKeyBuff, from, next);
}
}
}
private void advance() {
next = entrySet.getNextEntryIndex(next);
next = advanceNextIndex(next);
}
@Override
public boolean hasNext() {
return next != NONE_NEXT;
}
@Override
public int next(ThreadContext ctx) {
int toReturn = next;
advance();
return toReturn;
}
}
class DescendingIter implements ChunkIter {
private int next;
private int anchor;
private int prevAnchor;
private final IntStack stack;
private final K from;
private boolean inclusive;
static final int SKIP_ENTRIES_FOR_BIGGER_STACK = 1; // 1 is the lowest possible value
DescendingIter(ThreadContext ctx) {
KeyBuffer tempKeyBuff = ctx.tempKey;
from = null;
stack = new IntStack(entrySet.getLastEntryIndex());
int sortedCnt = sortedCount.get();
anchor = // this is the last sorted entry
(sortedCnt == 0 ? entrySet.getHeadNextIndex() : sortedCnt);
stack.push(anchor);
initNext(tempKeyBuff);
}
DescendingIter(ThreadContext ctx, K from, boolean inclusive) {
KeyBuffer tempKeyBuff = ctx.tempKey;
this.from = from;
this.inclusive = inclusive;
stack = new IntStack(entrySet.getLastEntryIndex());
anchor = binaryFind(tempKeyBuff, from);
// translate to be valid index, if anchor is head we know to stop the iteration
anchor = (anchor == NONE_NEXT) ? entrySet.getHeadNextIndex() : anchor;
stack.push(anchor);
initNext(tempKeyBuff);
}
private void initNext(KeyBuffer keyBuff) {
traverseLinkedList(keyBuff, true);
advance(keyBuff);
}
/**
* use stack to find a valid next, removed items can't be next
*/
private void findNewNextInStack() {
if (stack.empty()) {
next = NONE_NEXT;
return;
}
next = stack.pop();
while (next != NONE_NEXT && !entrySet.isValueRefValid(next)) {
if (!stack.empty()) {
next = stack.pop();
} else {
next = NONE_NEXT;
return;
}
}
}
private void pushToStack(boolean compareWithPrevAnchor) {
while (next != NONE_NEXT) {
if (!compareWithPrevAnchor) {
stack.push(next);
next = entrySet.getNextEntryIndex(next);
} else {
if (next != prevAnchor) {
stack.push(next);
next = entrySet.getNextEntryIndex(next);
} else {
break;
}
}
}
}
/**
* fill the stack
*
* @param firstTimeInvocation
*/
private void traverseLinkedList(KeyBuffer tempKeyBuff, boolean firstTimeInvocation) {
assert stack.size() == 1; // ancor is in the stack
if (prevAnchor == entrySet.getNextEntryIndex(anchor)) {
next = NONE_NEXT; // there is no next;
return;
}
next = entrySet.getNextEntryIndex(anchor);
if (from == null) {
// if this is not the first invocation, stop when reaching previous anchor
pushToStack(!firstTimeInvocation);
} else {
if (firstTimeInvocation) {
final int threshold = inclusive ? -1 : 0;
// This is equivalent to continue while:
// when inclusive: CMP >= 0
// when non-inclusive: CMP > 0
while (next != NONE_NEXT && compareKeyAndEntryIndex(tempKeyBuff, from, next) > threshold) {
stack.push(next);
next = entrySet.getNextEntryIndex(next);
}
} else {
// stop when reaching previous anchor
pushToStack(true);
}
}
}
/**
* find new valid anchor
*/
private void findNewAnchor() {
assert stack.empty();
prevAnchor = anchor;
if (anchor == entrySet.getHeadNextIndex()) {
next = NONE_NEXT; // there is no more in this chunk
return;
} else if (anchor == 1) { // cannot get below the first index
anchor = entrySet.getHeadNextIndex();
} else {
if ((anchor - SKIP_ENTRIES_FOR_BIGGER_STACK) > 1) {
// try to skip more then one backward step at a time
// if it shows better performance
anchor -= SKIP_ENTRIES_FOR_BIGGER_STACK;
} else {
anchor -= 1;
}
}
stack.push(anchor);
}
private void advance(KeyBuffer keyBuff) {
while (true) {
findNewNextInStack();
if (next != NONE_NEXT) {
return;
}
// there is no next in stack
if (anchor == entrySet.getHeadNextIndex()) {
// there is no next at all
return;
}
findNewAnchor();
traverseLinkedList(keyBuff, false);
}
}
@Override
public boolean hasNext() {
return next != NONE_NEXT;
}
@Override
public int next(ThreadContext ctx) {
int toReturn = next;
advance(ctx.tempKey);
return toReturn;
}
}
/**
* just a simple stack of int, implemented with int array
*/
static class IntStack {
private final int[] stack;
private int top;
IntStack(int size) {
stack = new int[size];
top = 0;
}
void push(int i) {
stack[top] = i;
top++;
}
int pop() {
if (empty()) {
throw new EmptyStackException();
}
top--;
return stack[top];
}
boolean empty() {
return top == 0;
}
int size() {
return top;
}
}
/*-------------- Statistics --------------*/
/**
* This class contains information about chunk utilization.
*/
static class Statistics {
private final AtomicInteger addedCount = new AtomicInteger(0);
private int initialSortedCount = 0;
/**
* Initial sorted count here is immutable after chunk re-balance
*/
void updateInitialSortedCount(int sortedCount) {
this.initialSortedCount = sortedCount;
}
/**
* @return number of items chunk will contain after compaction.
*/
int getCompactedCount() {
return initialSortedCount + getAddedCount();
}
/**
* Incremented when put a key that was removed before
*/
void incrementAddedCount() {
addedCount.incrementAndGet();
}
/**
* Decrement when remove a key that was put before
*/
void decrementAddedCount() {
addedCount.decrementAndGet();
}
int getAddedCount() {
return addedCount.get();
}
}
/**
* @return statistics object containing approximate utilization information.
*/
Statistics getStatistics() {
return statistics;
}
}