All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.segment.data.FrontCodedIndexed Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment.data;

import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;

import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * {@link Indexed} specialized for storing variable-width binary values (such as utf8 encoded strings), which must be
 * sorted and unique, using 'front coding'. Front coding is a type of delta encoding for byte arrays, where sorted
 * values are grouped into buckets. The first value of the bucket is written entirely, and remaining values are stored
 * as a pair of an integer which indicates how much of the first byte array of the bucket to use as a prefix, followed
 * by the remaining bytes after the prefix to complete the value. If using 'incremental' buckets, instead of using the
 * prefix of the first bucket value, instead the prefix is computed against the immediately preceding value in the
 * bucket.
 * 

* front coded indexed layout: * | version | bucket size | has null? | number of values | size of "offsets" + "buckets" | "offsets" | "buckets" | * | ------- | ----------- | --------- | ---------------- | ----------------------------- | --------- | --------- | * | byte | byte | byte | vbyte int | vbyte int | int[] | bucket[] | *

* "offsets" are the ending offsets of each bucket stored in order, stored as plain integers for easy random access. *

* bucket layout: * | first value | prefix length | fragment | ... | prefix length | fragment | * | ----------- | ------------- | -------- | --- | ------------- | -------- | * | blob | vbyte int | blob | ... | vbyte int | blob | *

* blob layout: * | blob length | blob bytes | * | ----------- | ---------- | * | vbyte int | byte[] | *

*

* Getting a value first picks the appropriate bucket, finds its offset in the underlying buffer, then scans the bucket * values to seek to the correct position of the value within the bucket in order to reconstruct it using the prefix * length. *

* Finding the index of a value involves binary searching the first values of each bucket to find the correct bucket, * then a linear scan within the bucket to find the matching value (or negative insertion point -1 for values that * are not present). *

* The value iterator reads an entire bucket at a time, reconstructing the values into an array to iterate within the * bucket before moving onto the next bucket as the iterator is consumed. *

* This class is not thread-safe since during operation modifies positions of a shared buffer. */ public abstract class FrontCodedIndexed implements Indexed { public static final byte V0 = 0; public static final byte V1 = 1; public static final byte DEFAULT_VERSION = V0; public static final int DEFAULT_BUCKET_SIZE = 4; public static byte validateVersion(byte version) { if (version != FrontCodedIndexed.V0 && version != FrontCodedIndexed.V1) { throw new IAE( "Unknown format version for FrontCodedIndexed [%s], must be [%s] or [%s]", version, FrontCodedIndexed.V0, FrontCodedIndexed.V1 ); } return version; } public static Supplier read(ByteBuffer buffer, ByteOrder ordering) { final ByteBuffer orderedBuffer = buffer.asReadOnlyBuffer().order(ordering); final byte version = orderedBuffer.get(); Preconditions.checkArgument(version == V0 || version == V1, "only V0 and V1 exist, encountered " + version); final int bucketSize = Byte.toUnsignedInt(orderedBuffer.get()); final boolean hasNull = NullHandling.IS_NULL_BYTE == orderedBuffer.get(); final int numValues = VByte.readInt(orderedBuffer); // size of offsets + values final int size = VByte.readInt(orderedBuffer); final int offsetsPosition = orderedBuffer.position(); // move position to end of buffer buffer.position(offsetsPosition + size); if (version == V0) { return () -> new FrontCodedV0( buffer, ordering, bucketSize, numValues, hasNull, offsetsPosition ); } else { return () -> new FrontCodedV1( buffer, ordering, bucketSize, numValues, hasNull, offsetsPosition ); } } protected final ByteBuffer buffer; protected final int adjustedNumValues; protected final int adjustIndex; protected final int bucketSize; protected final int numBuckets; protected final int div; protected final int rem; protected final int offsetsPosition; protected final int bucketsPosition; protected final boolean hasNull; protected final int lastBucketNumValues; private FrontCodedIndexed( ByteBuffer buffer, ByteOrder order, int bucketSize, int numValues, boolean hasNull, int offsetsPosition ) { if (Integer.bitCount(bucketSize) != 1) { throw new ISE("bucketSize must be a power of two but was[%,d]", bucketSize); } this.buffer = buffer.asReadOnlyBuffer().order(order); this.bucketSize = bucketSize; this.hasNull = hasNull; this.numBuckets = (int) Math.ceil((double) numValues / (double) bucketSize); this.adjustIndex = hasNull ? 1 : 0; this.adjustedNumValues = numValues + adjustIndex; this.div = Integer.numberOfTrailingZeros(bucketSize); this.rem = bucketSize - 1; this.lastBucketNumValues = (numValues & rem) == 0 ? bucketSize : numValues & rem; this.offsetsPosition = offsetsPosition; this.bucketsPosition = offsetsPosition + ((numBuckets - 1) * Integer.BYTES); } /** * Get a value from a bucket at a relative position. *

* This method modifies the position of the buffer. */ abstract ByteBuffer getFromBucket(ByteBuffer buffer, int offset); /** * Read an entire bucket from a {@link ByteBuffer}, returning an array of reconstructed value bytes. *

* This method modifies the position of the buffer. */ abstract ByteBuffer[] readBucket(ByteBuffer buffer, int numValues); /** * Finds a value in a bucket among the fragments. The first value is assumed to have been already compared against * and be smaller than the value we are looking for. This comparison is the source of the 'shared prefix', which is * the length which the value has in common with the previous value of the bucket. *

* This method uses this shared prefix length to skip more expensive byte by byte full value comparisons when * possible by comparing the shared prefix length with the prefix length of the fragment. Since the bucket is always * sorted, prefix lengths shrink as you progress to higher indexes, and we can use this to reason that a fragment * with a longer prefix length than the shared prefix will always sort before the value we are looking for, and values * which have a shorter prefix will always be greater than the value we are looking for, so we only need to do a * full comparison if the prefix length is the same *

* this method modifies the position of {@link #buffer} */ abstract int findInBucket(ByteBuffer value, int currBucketFirstValueIndex, int bucketSize, int sharedPrefixLength); @Override public int size() { return adjustedNumValues; } @Nullable @Override public ByteBuffer get(int index) { if (hasNull && index == 0) { return null; } Indexed.checkIndex(index, adjustedNumValues); // due to vbyte encoding, the null value is not actually stored in the bucket. we would typically represent it as a // length of -1, since 0 is the empty string, but VByte encoding cannot have negative values, so if the null value // is present, we adjust the index by 1 since it is always stored as position 0 due to sorting first final int adjustedIndex = index - adjustIndex; // find the bucket which contains the value with maths final int bucket = adjustedIndex >> div; final int bucketIndex = adjustedIndex & rem; final int offset = getBucketOffset(bucket); buffer.position(offset); return getFromBucket(buffer, bucketIndex); } @Override public int indexOf(@Nullable ByteBuffer value) { // performs binary search using the first values of each bucket to locate the appropriate bucket, and then does // a linear scan to find the value within the bucket if (value == null) { return hasNull ? 0 : -1; } if (numBuckets == 0) { return hasNull ? -2 : -1; } int minBucketIndex = 0; int maxBucketIndex = numBuckets - 1; while (minBucketIndex < maxBucketIndex) { int currentBucket = (minBucketIndex + maxBucketIndex) >>> 1; int currBucketFirstValueIndex = currentBucket * bucketSize; // compare against first value in "current" bucket final int offset = getBucketOffset(currentBucket); buffer.position(offset); final int firstLength = VByte.readInt(buffer); final int firstOffset = buffer.position(); int comparison = compareBucketFirstValue(buffer, firstLength, value); // save the length of the shared prefix with the first value of the bucket and the value to match so we // can use it later to skip over all values in the bucket that share a longer prefix with the first value // (the bucket is sorted, so the prefix length gets smaller as values increase) final int sharedPrefix = buffer.position() - firstOffset; if (comparison == 0) { if (firstLength == value.remaining()) { // it turns out that the first value in current bucket is what we are looking for, short circuit return currBucketFirstValueIndex + adjustIndex; } else { comparison = Integer.compare(firstLength, value.remaining()); } } // we also compare against the adjacent bucket to determine if the value is actually in this bucket or // if we need to keep searching buckets final int nextOffset = getBucketOffset(currentBucket + 1); buffer.position(nextOffset); final int nextLength = VByte.readInt(buffer); int comparisonNext = compareBucketFirstValue(buffer, nextLength, value); if (comparisonNext == 0) { if (nextLength == value.remaining()) { // it turns out that the first value in next bucket is what we are looking for, go ahead and short circuit // for that as well, even though we weren't going to scan that bucket on this iteration... return (currBucketFirstValueIndex + adjustIndex) + bucketSize; } else { comparisonNext = Integer.compare(nextLength, value.remaining()); } } if (comparison < 0 && comparisonNext > 0) { // this is exactly the right bucket // find the value in the bucket (or where it would be if it were present) buffer.position(firstOffset + firstLength); return findInBucket(value, currBucketFirstValueIndex, bucketSize, sharedPrefix); } else if (comparison < 0) { minBucketIndex = currentBucket + 1; } else { maxBucketIndex = currentBucket - 1; } } // this is where we ended up, try to find the value in the bucket final int bucketIndexBase = minBucketIndex * bucketSize; final int numValuesInBucket; if (minBucketIndex == numBuckets - 1) { numValuesInBucket = lastBucketNumValues; } else { numValuesInBucket = bucketSize; } final int offset = getBucketOffset(minBucketIndex); // like we did in the loop, except comparison being smaller the first value here is a short circuit buffer.position(offset); final int firstLength = VByte.readInt(buffer); final int firstOffset = buffer.position(); int comparison = compareBucketFirstValue(buffer, firstLength, value); final int sharedPrefix = buffer.position() - firstOffset; if (comparison == 0) { if (firstLength == value.remaining()) { // it turns out that the first value in current bucket is what we are looking for, short circuit return bucketIndexBase + adjustIndex; } else { comparison = Integer.compare(firstLength, value.remaining()); } } if (comparison > 0) { // value preceedes bucket, so bail out return ~(bucketIndexBase + adjustIndex); } buffer.position(firstOffset + firstLength); return findInBucket(value, bucketIndexBase, numValuesInBucket, sharedPrefix); } @Override public boolean isSorted() { // FrontCodedIndexed only supports sorted values return true; } @Override public Iterator iterator() { if (adjustedNumValues == 0) { return Collections.emptyIterator(); } if (hasNull && adjustedNumValues == 1) { return Collections.singletonList(null).iterator(); } ByteBuffer copy = buffer.asReadOnlyBuffer().order(buffer.order()); copy.position(bucketsPosition); final ByteBuffer[] firstBucket = readBucket(copy, numBuckets > 1 ? bucketSize : lastBucketNumValues); // iterator decodes and buffers a bucket at a time, paging through buckets as the iterator is consumed return new Iterator() { private int currIndex = 0; private int currentBucketIndex = 0; private ByteBuffer[] currentBucket = firstBucket; @Override public boolean hasNext() { return currIndex < adjustedNumValues; } @Override public ByteBuffer next() { // null is handled special if (hasNull && currIndex == 0) { currIndex++; return null; } if (!hasNext()) { throw new NoSuchElementException(); } final int adjustedCurrIndex = hasNull ? currIndex - 1 : currIndex; final int bucketNum = adjustedCurrIndex >> div; // load next bucket if needed if (bucketNum != currentBucketIndex) { final int offset = copy.getInt(offsetsPosition + ((bucketNum - 1) * Integer.BYTES)); copy.position(bucketsPosition + offset); currentBucket = readBucket( copy, bucketNum < (numBuckets - 1) ? bucketSize : lastBucketNumValues ); currentBucketIndex = bucketNum; } int offset = adjustedCurrIndex & rem; currIndex++; return currentBucket[offset]; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { inspector.visit("buffer", buffer); inspector.visit("hasNulls", hasNull); inspector.visit("bucketSize", bucketSize); } private int getBucketOffset(int bucket) { // get offset of that bucket in the value buffer, subtract 1 to get the starting position because we only store the // ending offset, so look at the ending offset of the previous bucket, or 0 if this is the first bucket return bucketsPosition + (bucket > 0 ? buffer.getInt(offsetsPosition + ((bucket - 1) * Integer.BYTES)) : 0); } /** * Performs byte-by-byte comparison of the first value in a bucket with the specified value. Note that this method * MUST be prepared before calling, as it expects the length of the first value to have already been read externally, * and the buffer position to be at the start of the first bucket value. The final buffer position will be the * 'shared prefix length' of the first value in the bucket and the value to compare. *

* Bytes are compared using {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte, byte)}. Therefore, when the * values are UTF-8 encoded strings, the ordering is compatible with {@link String#compareTo(String)}. */ private static int compareBucketFirstValue(ByteBuffer bucketBuffer, int length, ByteBuffer value) { final int startOffset = bucketBuffer.position(); final int commonLength = Math.min(length, value.remaining()); // save the length of the shared prefix with the first value of the bucket and the value to match so we // can use it later to skip over all values in the bucket that share a longer prefix with the first value // (the bucket is sorted, so the prefix length gets smaller as values increase) int sharedPrefix; int comparison = 0; for (sharedPrefix = 0; sharedPrefix < commonLength; sharedPrefix++) { comparison = StringUtils.compareUtf8UsingJavaStringOrdering(bucketBuffer.get(), value.get(sharedPrefix)); if (comparison != 0) { bucketBuffer.position(startOffset + sharedPrefix); break; } } return comparison; } public static final class FrontCodedV0 extends FrontCodedIndexed { private FrontCodedV0( ByteBuffer buffer, ByteOrder order, int bucketSize, int numValues, boolean hasNull, int offsetsPosition ) { super(buffer, order, bucketSize, numValues, hasNull, offsetsPosition); } @Override ByteBuffer getFromBucket(ByteBuffer buffer, int offset) { return getValueFromBucket(buffer, offset); } public static ByteBuffer getValueFromBucket(ByteBuffer buffer, int offset) { int prefixPosition; if (offset == 0) { final int length = VByte.readInt(buffer); final ByteBuffer firstValue = buffer.asReadOnlyBuffer(); firstValue.limit(firstValue.position() + length); return firstValue; } else { final int firstLength = VByte.readInt(buffer); prefixPosition = buffer.position(); buffer.position(buffer.position() + firstLength); } int pos = 0; int prefixLength; int fragmentLength; int fragmentPosition; // scan through bucket values until we reach offset do { prefixLength = VByte.readInt(buffer); if (++pos < offset) { // not there yet, no need to read anything other than the length to skip ahead final int skipLength = VByte.readInt(buffer); buffer.position(buffer.position() + skipLength); } else { // we've reached our destination fragmentLength = VByte.readInt(buffer); fragmentPosition = buffer.position(); break; } } while (true); final int valueLength = prefixLength + fragmentLength; ByteBuffer value = ByteBuffer.allocate(valueLength); for (int i = 0; i < valueLength; i++) { if (i < prefixLength) { value.put(buffer.get(prefixPosition + i)); } else { value.put(buffer.get(fragmentPosition + i - prefixLength)); } } value.flip(); return value; } @Override ByteBuffer[] readBucket(ByteBuffer buffer, int numValues) { final int length = VByte.readInt(buffer); final byte[] prefixBytes = new byte[length]; buffer.get(prefixBytes, 0, length); final ByteBuffer[] bucketBuffers = new ByteBuffer[numValues]; bucketBuffers[0] = ByteBuffer.wrap(prefixBytes); int pos = 1; while (pos < numValues) { final int prefixLength = VByte.readInt(buffer); final int fragmentLength = VByte.readInt(buffer); final byte[] fragment = new byte[fragmentLength]; buffer.get(fragment, 0, fragmentLength); final ByteBuffer value = ByteBuffer.allocate(prefixLength + fragmentLength); value.put(prefixBytes, 0, prefixLength); value.put(fragment); value.flip(); bucketBuffers[pos++] = value; } return bucketBuffers; } @Override int findInBucket(ByteBuffer value, int currBucketFirstValueIndex, int bucketSize, int sharedPrefixLength) { int relativePosition = 0; int prefixLength; // scan through bucket values until we find match or compare numValues int insertionPoint = 1; while (++relativePosition < bucketSize) { prefixLength = VByte.readInt(buffer); if (prefixLength > sharedPrefixLength) { // this value shares more in common with the first value, so the value we are looking for comes after final int skip = VByte.readInt(buffer); buffer.position(buffer.position() + skip); insertionPoint++; } else if (prefixLength < sharedPrefixLength) { // prefix is smaller, that means this value sorts ahead of it break; } else { final int fragmentLength = VByte.readInt(buffer); final int common = Math.min(fragmentLength, value.remaining() - prefixLength); int fragmentComparison = 0; for (int i = 0; i < common; i++) { fragmentComparison = StringUtils.compareUtf8UsingJavaStringOrdering( buffer.get(buffer.position() + i), value.get(prefixLength + i) ); if (fragmentComparison != 0) { break; } } if (fragmentComparison == 0) { fragmentComparison = Integer.compare(prefixLength + fragmentLength, value.remaining()); } if (fragmentComparison == 0) { return (currBucketFirstValueIndex + adjustIndex) + relativePosition; } else if (fragmentComparison < 0) { buffer.position(buffer.position() + fragmentLength); insertionPoint++; } else { break; } } } // (-(insertion point) - 1) return -(currBucketFirstValueIndex + adjustIndex) + (~insertionPoint); } } public static final class FrontCodedV1 extends FrontCodedIndexed { private final int[] unwindPrefixLength; private final int[] unwindBufferPosition; private FrontCodedV1( ByteBuffer buffer, ByteOrder order, int bucketSize, int numValues, boolean hasNull, int offsetsPosition ) { super(buffer, order, bucketSize, numValues, hasNull, offsetsPosition); this.unwindPrefixLength = new int[bucketSize]; this.unwindBufferPosition = new int[bucketSize]; } @Override ByteBuffer getFromBucket(ByteBuffer buffer, int offset) { // first value is written whole final int length = VByte.readInt(buffer); if (offset == 0) { // return first value directly from underlying buffer since it is stored whole final ByteBuffer value = buffer.asReadOnlyBuffer(); value.limit(value.position() + length); return value; } int pos = 0; int prefixLength; int fragmentLength; unwindPrefixLength[pos] = 0; unwindBufferPosition[pos] = buffer.position(); buffer.position(buffer.position() + length); do { prefixLength = VByte.readInt(buffer); if (++pos < offset) { // not there yet, no need to read anything other than the length to skip ahead final int skipLength = VByte.readInt(buffer); unwindPrefixLength[pos] = prefixLength; unwindBufferPosition[pos] = buffer.position(); buffer.position(buffer.position() + skipLength); } else { // we've reached our destination fragmentLength = VByte.readInt(buffer); if (prefixLength == 0) { // no prefix, return it directly from the underlying buffer final ByteBuffer value = buffer.asReadOnlyBuffer(); value.limit(value.position() + fragmentLength); return value; } break; } } while (true); final int valueLength = prefixLength + fragmentLength; final byte[] valueBytes = new byte[valueLength]; buffer.get(valueBytes, prefixLength, fragmentLength); for (int i = prefixLength; i > 0;) { // previous value had a larger prefix than or the same as the value we are looking for // skip it since the fragment doesn't have anything we need if (unwindPrefixLength[--pos] >= i) { continue; } buffer.position(unwindBufferPosition[pos]); buffer.get(valueBytes, unwindPrefixLength[pos], i - unwindPrefixLength[pos]); i = unwindPrefixLength[pos]; } return ByteBuffer.wrap(valueBytes); } @Override ByteBuffer[] readBucket(ByteBuffer buffer, int numValues) { final ByteBuffer[] bucketBuffers = new ByteBuffer[numValues]; // first value is written whole final int length = VByte.readInt(buffer); byte[] prefixBytes = new byte[length]; buffer.get(prefixBytes, 0, length); bucketBuffers[0] = ByteBuffer.wrap(prefixBytes); int pos = 1; while (pos < numValues) { final int prefixLength = VByte.readInt(buffer); final int fragmentLength = VByte.readInt(buffer); byte[] nextValueBytes = new byte[prefixLength + fragmentLength]; System.arraycopy(prefixBytes, 0, nextValueBytes, 0, prefixLength); buffer.get(nextValueBytes, prefixLength, fragmentLength); final ByteBuffer value = ByteBuffer.wrap(nextValueBytes); prefixBytes = nextValueBytes; bucketBuffers[pos++] = value; } return bucketBuffers; } @Override int findInBucket(ByteBuffer value, int currBucketFirstValueIndex, int bucketSize, int sharedPrefixLength) { int relativePosition = 0; int prefixLength; // scan through bucket values until we find match or compare numValues int insertionPoint = 1; while (++relativePosition < bucketSize) { prefixLength = VByte.readInt(buffer); if (prefixLength > sharedPrefixLength) { // bucket value shares more in common with the preceding value, so the value we are looking for comes after final int skip = VByte.readInt(buffer); buffer.position(buffer.position() + skip); insertionPoint++; } else if (prefixLength < sharedPrefixLength) { // bucket value prefix is smaller, that means the value we are looking for sorts ahead of it break; } else { // value has the same shared prefix, so compare additional values to find final int fragmentLength = VByte.readInt(buffer); final int common = Math.min(fragmentLength, value.remaining() - prefixLength); int fragmentComparison = 0; boolean shortCircuit = false; for (int i = 0; i < common; i++) { fragmentComparison = StringUtils.compareUtf8UsingJavaStringOrdering( buffer.get(buffer.position() + i), value.get(prefixLength + i) ); if (fragmentComparison != 0) { sharedPrefixLength = prefixLength + i; shortCircuit = true; break; } } if (fragmentComparison == 0) { fragmentComparison = Integer.compare(prefixLength + fragmentLength, value.remaining()); } if (fragmentComparison == 0) { return (currBucketFirstValueIndex + adjustIndex) + relativePosition; } else if (fragmentComparison < 0) { // value we are looking for is longer than the current bucket value, continue on if (!shortCircuit) { sharedPrefixLength = prefixLength + common; } buffer.position(buffer.position() + fragmentLength); insertionPoint++; } else { break; } } } // (-(insertion point) - 1) return -(currBucketFirstValueIndex + adjustIndex) + (~insertionPoint); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy