All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.packed.BlockPackedReaderIterator Maven / Gradle / Ivy

There is a newer version: 9.10.0
Show newest version
package org.apache.lucene.util.packed;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import static org.apache.lucene.util.BitUtil.zigZagDecode;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;

import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.LongsRef;

/**
 * Reader for sequences of longs written with {@link BlockPackedWriter}.
 * @see BlockPackedWriter
 * @lucene.internal
 */
public final class BlockPackedReaderIterator {

  // same as DataInput.readVLong but supports negative values
  static long readVLong(DataInput in) throws IOException {
    byte b = in.readByte();
    if (b >= 0) return b;
    long i = b & 0x7FL;
    b = in.readByte();
    i |= (b & 0x7FL) << 7;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 14;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 21;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 28;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 35;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 42;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0x7FL) << 49;
    if (b >= 0) return i;
    b = in.readByte();
    i |= (b & 0xFFL) << 56;
    return i;
  }

  DataInput in;
  final int packedIntsVersion;
  long valueCount;
  final int blockSize;
  final long[] values;
  final LongsRef valuesRef;
  byte[] blocks;
  int off;
  long ord;

  /** Sole constructor.
   * @param blockSize the number of values of a block, must be equal to the
   *                  block size of the {@link BlockPackedWriter} which has
   *                  been used to write the stream
   */
  public BlockPackedReaderIterator(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
    checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
    this.packedIntsVersion = packedIntsVersion;
    this.blockSize = blockSize;
    this.values = new long[blockSize];
    this.valuesRef = new LongsRef(this.values, 0, 0);
    reset(in, valueCount);
  }

  /** Reset the current reader to wrap a stream of valueCount
   * values contained in in. The block size remains unchanged. */
  public void reset(DataInput in, long valueCount) {
    this.in = in;
    assert valueCount >= 0;
    this.valueCount = valueCount;
    off = blockSize;
    ord = 0;
  }

  /** Skip exactly count values. */
  public void skip(long count) throws IOException {
    assert count >= 0;
    if (ord + count > valueCount || ord + count < 0) {
      throw new EOFException();
    }

    // 1. skip buffered values
    final int skipBuffer = (int) Math.min(count, blockSize - off);
    off += skipBuffer;
    ord += skipBuffer;
    count -= skipBuffer;
    if (count == 0L) {
      return;
    }

    // 2. skip as many blocks as necessary
    assert off == blockSize;
    while (count >= blockSize) {
      final int token = in.readByte() & 0xFF;
      final int bitsPerValue = token >>> BPV_SHIFT;
      if (bitsPerValue > 64) {
        throw new IOException("Corrupted");
      }
      if ((token & MIN_VALUE_EQUALS_0) == 0) {
        readVLong(in);
      }
      final long blockBytes = PackedInts.Format.PACKED.byteCount(packedIntsVersion, blockSize, bitsPerValue);
      skipBytes(blockBytes);
      ord += blockSize;
      count -= blockSize;
    }
    if (count == 0L) {
      return;
    }

    // 3. skip last values
    assert count < blockSize;
    refill();
    ord += count;
    off += count;
  }

  private void skipBytes(long count) throws IOException {
    if (in instanceof IndexInput) {
      final IndexInput iin = (IndexInput) in;
      iin.seek(iin.getFilePointer() + count);
    } else {
      if (blocks == null) {
        blocks = new byte[blockSize];
      }
      long skipped = 0;
      while (skipped < count) {
        final int toSkip = (int) Math.min(blocks.length, count - skipped);
        in.readBytes(blocks, 0, toSkip);
        skipped += toSkip;
      }
    }
  }

  /** Read the next value. */
  public long next() throws IOException {
    if (ord == valueCount) {
      throw new EOFException();
    }
    if (off == blockSize) {
      refill();
    }
    final long value = values[off++];
    ++ord;
    return value;
  }

  /** Read between 1 and count values. */
  public LongsRef next(int count) throws IOException {
    assert count > 0;
    if (ord == valueCount) {
      throw new EOFException();
    }
    if (off == blockSize) {
      refill();
    }

    count = Math.min(count, blockSize - off);
    count = (int) Math.min(count, valueCount - ord);

    valuesRef.offset = off;
    valuesRef.length = count;
    off += count;
    ord += count;
    return valuesRef;
  }

  private void refill() throws IOException {
    final int token = in.readByte() & 0xFF;
    final boolean minEquals0 = (token & MIN_VALUE_EQUALS_0) != 0;
    final int bitsPerValue = token >>> BPV_SHIFT;
    if (bitsPerValue > 64) {
      throw new IOException("Corrupted");
    }
    final long minValue = minEquals0 ? 0L : zigZagDecode(1L + readVLong(in));
    assert minEquals0 || minValue != 0;

    if (bitsPerValue == 0) {
      Arrays.fill(values, minValue);
    } else {
      final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
      final int iterations = blockSize / decoder.byteValueCount();
      final int blocksSize = iterations * decoder.byteBlockCount();
      if (blocks == null || blocks.length < blocksSize) {
        blocks = new byte[blocksSize];
      }

      final int valueCount = (int) Math.min(this.valueCount - ord, blockSize);
      final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
      in.readBytes(blocks, 0, blocksCount);

      decoder.decode(blocks, 0, values, 0, iterations);

      if (minValue != 0) {
        for (int i = 0; i < valueCount; ++i) {
          values[i] += minValue;
        }
      }
    }
    off = 0;
  }

  /** Return the offset of the next value to read. */
  public long ord() {
    return ord;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy