org.apache.solr.util.hll.BigEndianAscendingWordDeserializer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.util.hll;

/**
 * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}.
 */
class BigEndianAscendingWordDeserializer implements IWordDeserializer {
    // The number of bits per byte.
    private static final int BITS_PER_BYTE = 8;

    // long mask for the maximum value stored in a byte
    private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L;

    // ************************************************************************
    // The length in bits of the words to be read.
    private final int wordLength;

    // The byte array to which the words are serialized.
    private final byte[] bytes;

    // The number of leading padding bytes in 'bytes' to be ignored.
    private final int bytePadding;

    // The number of words that the byte array contains.
    private final int wordCount;

    // The current read state.
    private int currentWordIndex;

    // ========================================================================
    /**
     * @param wordLength the length in bits of the words to be deserialized. Must
     *        be less than or equal to 64 and greater than or equal to 1.
     * @param bytePadding the number of leading bytes that pad the serialized words.
     *        Must be greater than or equal to zero.
     * @param bytes the byte array containing the serialized words. Cannot be
     *        null.
     */
    public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) {
        if((wordLength < 1) || (wordLength > 64)) {
            throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
        }

        if(bytePadding < 0) {
            throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")");
        }

        this.wordLength = wordLength;
        this.bytes = bytes;
        this.bytePadding = bytePadding;

        final int dataBytes = (bytes.length - bytePadding);
        final long dataBits = (dataBytes * BITS_PER_BYTE);

        this.wordCount = (int)(dataBits/wordLength);

        currentWordIndex = 0;
    }

    // ========================================================================
    /* (non-Javadoc)
     * @see net.agkn.hll.serialization.IWordDeserializer#readWord()
     */
    @Override
    public long readWord() {
        final long word = readWord(currentWordIndex);
        currentWordIndex++;

        return word;
    }

    // ------------------------------------------------------------------------
    /**
     * Reads the word at the specified sequence position (zero-indexed).
     *
     * @param  position the zero-indexed position of the word to be read. This
     *         must be greater than or equal to zero.
     * @return the value of the serialized word at the specified position.
     */
    private long readWord(final int position) {
        if(position < 0) {
            throw new ArrayIndexOutOfBoundsException(position);
        }

        // First bit of the word
        final long firstBitIndex = ((long)position) * ((long)wordLength);
        final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE));
        final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE);

        // Last bit of the word
        final long lastBitIndex = (firstBitIndex + wordLength - 1);
        final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE));
        final int lastByteBitsToConsume;

        final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE);
        // If the word terminates at the end of the last byte, consume the whole
        // last byte.
        if(bitsAfterByteBoundary == 0) {
            lastByteBitsToConsume = BITS_PER_BYTE;
        } else {
            // Otherwise, only consume what is necessary.
            lastByteBitsToConsume = bitsAfterByteBoundary;
        }

        if(lastByteIndex >= bytes.length) {
            throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array.");
        }

        // Accumulator
        long value = 0;

        // --------------------------------------------------------------------
        // First byte
        final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits);
        final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength);
        long firstByte = (long)bytes[firstByteIndex];

        // Mask off the bits to skip in the first byte.
        final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L);
        firstByte &= firstByteMask;
        // Right-align relevant bits of first byte.
        firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte);

        value |= firstByte;

        // If the first byte contains the whole word, short-circuit.
        if(firstByteIndex == lastByteIndex) {
            return value;
        }

        // --------------------------------------------------------------------
        // Middle bytes
        final int middleByteCount = (lastByteIndex - firstByteIndex - 1);
        for(int i=0; i>= (BITS_PER_BYTE - lastByteBitsToConsume);
        value <<= lastByteBitsToConsume;
        value |= lastByte;
        return value;
    }

    /* (non-Javadoc)
     * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount()
     */
    @Override
    public int totalWordCount() {
        return wordCount;
    }
}