org.apache.lucene.util.packed.XPacked64 Maven / Gradle / Ivy
Show all versions of opensearch Show documentation
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.apache.lucene.util.packed;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
import java.util.Arrays;
/**
* Forked from Lucene 8.x; removed in Lucene 9.0
*
* TODO: further investigate a better alternative
*
* Space optimized random access capable array of values with a fixed number of bits/value. Values
* are packed contiguously.
*
*
The implementation strives to perform as fast as possible under the constraint of contiguous
* bits, by avoiding expensive operations. This comes at the cost of code clarity.
*
*
Technical details: This implementation is a refinement of a non-branching version. The
* non-branching get and set methods meant that 2 or 4 atomics in the underlying array were always
* accessed, even for the cases where only 1 or 2 were needed. Even with caching, this had a
* detrimental effect on performance. Related to this issue, the old implementation used lookup
* tables for shifts and masks, which also proved to be a bit slower than calculating the shifts and
* masks on the fly. See https://issues.apache.org/jira/browse/LUCENE-4062 for details.
*/
class XPacked64 extends XPackedInts.MutableImpl {
static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
/** Values are stores contiguously in the blocks array. */
private final long[] blocks;
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
private final long maskRight;
/** Optimization: Saves one lookup in {@link #get(int)}. */
private final int bpvMinusBlockSize;
/**
* Creates an array with the internal structures adjusted for the given limits and initialized to
* 0.
*
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
*/
public XPacked64(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue);
this.blocks = new long[longCount];
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* Creates an array with content retrieved from the given DataInput.
*
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not be retrieved.
*/
public XPacked64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue) throws IOException {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
blocks = new long[longCount];
// read as many longs as we can
for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
final int remaining = (int) (byteCount % 8);
if (remaining != 0) {
// read the last bytes
long lastLong = 0;
for (int i = 0; i < remaining; ++i) {
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
}
blocks[blocks.length - 1] = lastLong;
}
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* @param index the position of the value.
* @return the value at the given index.
*/
@Override
public long get(final int index) {
// The abstract index in a bit stream
final long majorBitPos = (long) index * bitsPerValue;
// The index in the backing long-array
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS);
// The number of value-bits in the second long
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) { // Single block
return (blocks[elementPos] >>> -endBits) & maskRight;
}
// Two blocks
return ((blocks[elementPos] << endBits) | (blocks[elementPos + 1] >>> (BLOCK_SIZE - endBits))) & maskRight;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % decoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % decoder.longValueCount() == 0;
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / decoder.longValueCount();
decoder.decode(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * decoder.longValueCount();
index += gotValues;
len -= gotValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public void set(final int index, final long value) {
// The abstract index in a contiguous bit stream
final long majorBitPos = (long) index * bitsPerValue;
// The index in the backing long-array
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
// The number of value-bits in the second long
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) { // Single block
blocks[elementPos] = blocks[elementPos] & ~(maskRight << -endBits) | (value << -endBits);
return;
}
// Two blocks
blocks[elementPos] = blocks[elementPos] & ~(maskRight >>> endBits) | (value >>> endBits);
blocks[elementPos + 1] = blocks[elementPos + 1] & (~0L >>> endBits) | (value << (BLOCK_SIZE - endBits));
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % encoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk set
assert index % encoder.longValueCount() == 0;
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / encoder.longValueCount();
encoder.encode(arr, off, blocks, blockIndex, iterations);
final int setValues = iterations * encoder.longValueCount();
index += setValues;
len -= setValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.set(index, arr, off, len);
}
}
@Override
public String toString() {
return "Packed64(bitsPerValue=" + bitsPerValue + ",size=" + size() + ",blocks=" + blocks.length + ")";
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * Integer.BYTES // bpvMinusBlockSize,valueCount,bitsPerValue
+ Long.BYTES // maskRight
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
) // blocks ref
+ RamUsageEstimator.sizeOf(blocks);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert PackedInts.unsignedBitsRequired(val) <= getBitsPerValue();
assert fromIndex <= toIndex;
// minimum number of values that use an exact number of full blocks
final int nAlignedValues = 64 / gcd(64, bitsPerValue);
final int span = toIndex - fromIndex;
if (span <= 3 * nAlignedValues) {
// there needs be at least 2 * nAlignedValues aligned values for the
// block approach to be worth trying
super.fill(fromIndex, toIndex, val);
return;
}
// fill the first values naively until the next block start
final int fromIndexModNAlignedValues = fromIndex % nAlignedValues;
if (fromIndexModNAlignedValues != 0) {
for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i) {
set(fromIndex++, val);
}
}
assert fromIndex % nAlignedValues == 0;
// compute the long[] blocks for nAlignedValues consecutive values and
// use them to set as many values as possible without applying any mask
// or shift
final int nAlignedBlocks = (nAlignedValues * bitsPerValue) >> 6;
final long[] nAlignedValuesBlocks;
{
XPacked64 values = new XPacked64(nAlignedValues, bitsPerValue);
for (int i = 0; i < nAlignedValues; ++i) {
values.set(i, val);
}
nAlignedValuesBlocks = values.blocks;
assert nAlignedBlocks <= nAlignedValuesBlocks.length;
}
final int startBlock = (int) (((long) fromIndex * bitsPerValue) >>> 6);
final int endBlock = (int) (((long) toIndex * bitsPerValue) >>> 6);
for (int block = startBlock; block < endBlock; ++block) {
final long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks];
blocks[block] = blockValue;
}
// fill the gap
for (int i = (int) (((long) endBlock << 6) / bitsPerValue); i < toIndex; ++i) {
set(i, val);
}
}
private static int gcd(int a, int b) {
if (a < b) {
return gcd(b, a);
} else if (b == 0) {
return a;
} else {
return gcd(b, a % b);
}
}
@Override
public void clear() {
Arrays.fill(blocks, 0L);
}
}