org.apache.lucene.backward_codecs.lucene80.IndexedDISI Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-backward-codecs Show documentation
Apache Lucene (module: backward-codecs)
There is a newer version: 9.11.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.backward_codecs.lucene80;

import java.io.DataInput;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RoaringDocIdSet;

/**
 * Disk-based implementation of a {@link DocIdSetIterator} which can return the index of the current
 * document, i.e. the ordinal of the current document among the list of documents that this iterator
 * can return. This is useful to implement sparse doc values by only having to encode values for
 * documents that actually have a value.
 *
 * Implementation-wise, this {@link DocIdSetIterator} is inspired of {@link RoaringDocIdSet
 * roaring bitmaps} and encodes ranges of {@code 65536} documents independently and picks between 3
 * encodings depending on the density of the range:
 *
 * 

 *   {@code ALL} if the range contains 65536 documents exactly,
 *   
{@code DENSE} if the range contains 4096 documents or more; in that case documents are
 *       stored in a bit set,
 *   
{@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are stored in a {@link
 *       DataInput#readShort() short}.
 * 
 *
 * Only ranges that contain at least one value are encoded.
 *
 * 
This implementation uses 6 bytes per document in the worst-case, which happens in the case
 * that all ranges contain exactly one document.
 *
 * 
To avoid O(n) lookup time complexity, with n being the number of documents, two lookup tables
 * are used: A lookup table for block offset and index, and a rank structure for DENSE block index
 * lookups.
 *
 * 
The lookup table is an array of {@code int}-pairs, with a pair for each block. It allows for
 * direct jumping to the block, as opposed to iteration from the current position and forward one
 * block at a time.
 *
 * 
Each int-pair entry consists of 2 logical parts:
 *
 * 
The first 32 bit int holds the index (number of set bits in the blocks) up to just before the
 * wanted block. The maximum number of set bits is the maximum number of documents, which is < 2^31.
 *
 * 
The next int holds the offset in bytes into the underlying slice. As there is a maximum of
 * 2^16 blocks, it follows that the maximum size of any block must not exceed 2^15 bytes to avoid
 * overflow (2^16 bytes if the int is treated as unsigned). This is currently the case, with the
 * largest block being DENSE and using 2^13 + 36 bytes.
 *
 * 
The cache overhead is numDocs/1024 bytes.
 *
 * 
Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). In the
 * case of non-existing blocks, the entry in the lookup table has index equal to the previous entry
 * and offset equal to the next non-empty block.
 *
 * 
The block lookup table is stored at the end of the total block structure.
 *
 * 
The rank structure for DENSE blocks is an array of byte-pairs with an entry for each sub-block
 * (default 512 bits) out of the 65536 bits in the outer DENSE block.
 *
 * 
Each rank-entry states the number of set bits within the block up to the bit before the bit
 * positioned at the start of the sub-block. Note that that the rank entry of the first sub-block is
 * always 0 and that the last entry can at most be 65536-2 = 65634 and thus will always fit into an
 * byte-pair of 16 bits.
 *
 * The rank structure for a given DENSE block is stored at the beginning of the DENSE block. This
 * ensures locality and keeps logistics simple.
 *
 * @lucene.internal
 */
final class IndexedDISI extends DocIdSetIterator {

  // jump-table time/space trade-offs to consider:
  // The block offsets and the block indexes could be stored in more compressed form with
  // two PackedInts or two MonotonicDirectReaders.
  // The DENSE ranks (default 128 shorts = 256 bytes) could likewise be compressed. But as there is
  // at least 4096 set bits in DENSE blocks, there will be at least one rank with 2^12 bits, so it
  // is doubtful if there is much to gain here.

  private static final int BLOCK_SIZE =
      65536; // The number of docIDs that a single block represents

  private static final int DENSE_BLOCK_LONGS = BLOCK_SIZE / Long.SIZE; // 1024
  public static final byte DEFAULT_DENSE_RANK_POWER = 9; // Every 512 docIDs / 8 longs

  static final int MAX_ARRAY_LENGTH = (1 << 12) - 1;

  private static void flush(
      int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out)
      throws IOException {
    assert block >= 0 && block < 65536;
    out.writeShort((short) block);
    assert cardinality > 0 && cardinality <= 65536;
    out.writeShort((short) (cardinality - 1));
    if (cardinality > MAX_ARRAY_LENGTH) {
      if (cardinality != 65536) { // all docs are set
        if (denseRankPower != -1) {
          final byte[] rank = createRank(buffer, denseRankPower);
          out.writeBytes(rank, rank.length);
        }
        for (long word : buffer.getBits()) {
          out.writeLong(word);
        }
      }
    } else {
      BitSetIterator it = new BitSetIterator(buffer, cardinality);
      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        out.writeShort((short) doc);
      }
    }
  }

  // Creates a DENSE rank-entry (the number of set bits up to a given point) for the buffer.
  // One rank-entry for every {@code 2^denseRankPower} bits, with each rank-entry using 2 bytes.
  // Represented as a byte[] for fast flushing and mirroring of the retrieval representation.
  private static byte[] createRank(FixedBitSet buffer, byte denseRankPower) {
    final int longsPerRank = 1 << (denseRankPower - 6);
    final int rankMark = longsPerRank - 1;
    final int rankIndexShift = denseRankPower - 7; // 6 for the long (2^6) + 1 for 2 bytes/entry
    final byte[] rank = new byte[DENSE_BLOCK_LONGS >> rankIndexShift];
    final long[] bits = buffer.getBits();
    int bitCount = 0;
    for (int word = 0; word < DENSE_BLOCK_LONGS; word++) {
      if ((word & rankMark) == 0) { // Every longsPerRank longs
        rank[word >> rankIndexShift] = (byte) (bitCount >> 8);
        rank[(word >> rankIndexShift) + 1] = (byte) (bitCount & 0xFF);
      }
      bitCount += Long.bitCount(bits[word]);
    }
    return rank;
  }

  /**
   * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically
   * increasing gap-less order. DENSE blocks uses {@link #DEFAULT_DENSE_RANK_POWER} of 9 (every 512
   * docIDs / 8 longs). The caller must keep track of the number of jump-table entries (returned by
   * this method) as well as the denseRankPower (9 for this method) and provide them when
   * constructing an IndexedDISI for reading.
   *
   * @param it the document IDs.
   * @param out destination for the blocks.
   * @throws IOException if there was an error writing to out.
   * @return the number of jump-table entries following the blocks, -1 for no entries. This should
   *     be stored in meta and used when creating an instance of IndexedDISI.
   */
  static short writeBitSet(DocIdSetIterator it, IndexOutput out) throws IOException {
    return writeBitSet(it, out, DEFAULT_DENSE_RANK_POWER);
  }

  /**
   * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically
   * increasing gap-less order. The caller must keep track of the number of jump-table entries
   * (returned by this method) as well as the denseRankPower and provide them when constructing an
   * IndexedDISI for reading.
   *
   * @param it the document IDs.
   * @param out destination for the blocks.
   * @param denseRankPower for {@link Method#DENSE} blocks, a rank will be written every {@code
   *     2^denseRankPower} docIDs. Values < 7 (every 128 docIDs) or > 15 (every 32768 docIDs)
   *     disables DENSE rank. Recommended values are 8-12: Every 256-4096 docIDs or 4-64 longs.
   *     {@link #DEFAULT_DENSE_RANK_POWER} is 9: Every 512 docIDs. This should be stored in meta and
   *     used when creating an instance of IndexedDISI.
   * @throws IOException if there was an error writing to out.
   * @return the number of jump-table entries following the blocks, -1 for no entries. This should
   *     be stored in meta and used when creating an instance of IndexedDISI.
   */
  static short writeBitSet(DocIdSetIterator it, IndexOutput out, byte denseRankPower)
      throws IOException {
    final long origo = out.getFilePointer(); // All jumps are relative to the origo
    if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
      throw new IllegalArgumentException(
          "Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). "
              + "The provided power was "
              + denseRankPower
              + " (every "
              + (int) Math.pow(2, denseRankPower)
              + " docIDs)");
    }
    int totalCardinality = 0;
    int blockCardinality = 0;
    final FixedBitSet buffer = new FixedBitSet(1 << 16);
    int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES * 2)];
    int prevBlock = -1;
    int jumpBlockIndex = 0;

    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
      final int block = doc >>> 16;
      if (prevBlock != -1 && block != prevBlock) {
        // Track offset+index from previous block up to current
        jumps =
            addJumps(
                jumps,
                out.getFilePointer() - origo,
                totalCardinality,
                jumpBlockIndex,
                prevBlock + 1);
        jumpBlockIndex = prevBlock + 1;
        // Flush block
        flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
        // Reset for next block
        buffer.clear(0, buffer.length());
        totalCardinality += blockCardinality;
        blockCardinality = 0;
      }
      buffer.set(doc & 0xFFFF);
      blockCardinality++;
      prevBlock = block;
    }
    if (blockCardinality > 0) {
      jumps =
          addJumps(
              jumps, out.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1);
      totalCardinality += blockCardinality;
      flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
      buffer.clear(0, buffer.length());
      prevBlock++;
    }
    final int lastBlock =
        prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS)
    // Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the
    // docID NO_MORE_DOCS
    // To avoid creating 65K jump-table entries, only a single entry is created pointing to the
    // offset of the
    // NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real
    // blocks.
    jumps =
        addJumps(jumps, out.getFilePointer() - origo, totalCardinality, lastBlock, lastBlock + 1);
    buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
    flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, out);
    // offset+index jump-table stored at the end
    return flushBlockJumps(jumps, lastBlock + 1, out);
  }

  // Adds entries to the offset & index jump-table for blocks
  private static int[] addJumps(int[] jumps, long offset, int index, int startBlock, int endBlock) {
    assert offset < Integer.MAX_VALUE
        : "Logically the offset should not exceed 2^30 but was >= Integer.MAX_VALUE";
    jumps = ArrayUtil.grow(jumps, (endBlock + 1) * 2);
    for (int b = startBlock; b < endBlock; b++) {
      jumps[b * 2] = index;
      jumps[b * 2 + 1] = (int) offset;
    }
    return jumps;
  }

  // Flushes the offset & index jump-table for blocks. This should be the last data written to out
  // This method returns the blockCount for the blocks reachable for the jump_table or -1 for no
  // jump-table
  private static short flushBlockJumps(int[] jumps, int blockCount, IndexOutput out)
      throws IOException {
    if (blockCount
        == 2) { // Jumps with a single real entry + NO_MORE_DOCS is just wasted space so we ignore
      // that
      blockCount = 0;
    }
    for (int i = 0; i < blockCount; i++) {
      out.writeInt(jumps[i * 2]); // index
      out.writeInt(jumps[i * 2 + 1]); // offset
    }
    // As there are at most 32k blocks, the count is a short
    // The jumpTableOffset will be at lastPos - (blockCount * Long.BYTES)
    return (short) blockCount;
  }

  // Members are pkg-private to avoid synthetic accessors when accessed from the `Method` enum

  /** The slice that stores the {@link DocIdSetIterator}. */
  final IndexInput slice;

  final int jumpTableEntryCount;
  final byte denseRankPower;
  final RandomAccessInput jumpTable; // Skip blocks of 64K bits
  final byte[] denseRankTable;
  final long cost;

  /**
   * This constructor always creates a new blockSlice and a new jumpTable from in, to ensure that
   * operations are independent from the caller. See {@link #IndexedDISI(IndexInput,
   * RandomAccessInput, int, byte, long)} for re-use of blockSlice and jumpTable.
   *
   * @param in backing data.
   * @param offset starting offset for blocks in the backing data.
   * @param length the number of bytes holding blocks and jump-table in the backing data.
   * @param jumpTableEntryCount the number of blocks covered by the jump-table. This must match the
   *     number returned by {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}.
   * @param denseRankPower the number of docIDs covered by each rank entry in DENSE blocks,
   *     expressed as {@code 2^denseRankPower}. This must match the power given in {@link
   *     #writeBitSet(DocIdSetIterator, IndexOutput, byte)}
   * @param cost normally the number of logical docIDs.
   */
  IndexedDISI(
      IndexInput in,
      long offset,
      long length,
      int jumpTableEntryCount,
      byte denseRankPower,
      long cost)
      throws IOException {
    this(
        createBlockSlice(in, "docs", offset, length, jumpTableEntryCount),
        createJumpTable(in, offset, length, jumpTableEntryCount),
        jumpTableEntryCount,
        denseRankPower,
        cost);
  }

  /**
   * This constructor allows to pass the slice and jumpTable directly in case it helps reuse. see
   * eg. Lucene80 norms producer's merge instance.
   *
   * @param blockSlice data blocks, normally created by {@link #createBlockSlice}.
   * @param jumpTable table holding jump-data for block-skips, normally created by {@link
   *     #createJumpTable}.
   * @param jumpTableEntryCount the number of blocks covered by the jump-table. This must match the
   *     number returned by {@link #writeBitSet(DocIdSetIterator, IndexOutput, byte)}.
   * @param denseRankPower the number of docIDs covered by each rank entry in DENSE blocks,
   *     expressed as {@code 2^denseRankPower}. This must match the power given in {@link
   *     #writeBitSet(DocIdSetIterator, IndexOutput, byte)}
   * @param cost normally the number of logical docIDs.
   */
  IndexedDISI(
      IndexInput blockSlice,
      RandomAccessInput jumpTable,
      int jumpTableEntryCount,
      byte denseRankPower,
      long cost)
      throws IOException {
    if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
      throw new IllegalArgumentException(
          "Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). "
              + "The provided power was "
              + denseRankPower
              + " (every "
              + (int) Math.pow(2, denseRankPower)
              + " docIDs). ");
    }

    this.slice = blockSlice;
    this.jumpTable = jumpTable;
    this.jumpTableEntryCount = jumpTableEntryCount;
    this.denseRankPower = denseRankPower;
    final int rankIndexShift = denseRankPower - 7;
    this.denseRankTable =
        denseRankPower == -1 ? null : new byte[DENSE_BLOCK_LONGS >> rankIndexShift];
    this.cost = cost;
  }

  /**
   * Helper method for using {@link #IndexedDISI(IndexInput, RandomAccessInput, int, byte, long)}.
   * Creates a disiSlice for the IndexedDISI data blocks, without the jump-table.
   *
   * @param slice backing data, holding both blocks and jump-table.
   * @param sliceDescription human readable slice designation.
   * @param offset relative to the backing data.
   * @param length full length of the IndexedDISI, including blocks and jump-table data.
   * @param jumpTableEntryCount the number of blocks covered by the jump-table.
   * @return a jumpTable containing the block jump-data or null if no such table exists.
   * @throws IOException if a RandomAccessInput could not be created from slice.
   */
  public static IndexInput createBlockSlice(
      IndexInput slice, String sliceDescription, long offset, long length, int jumpTableEntryCount)
      throws IOException {
    long jumpTableBytes = jumpTableEntryCount < 0 ? 0 : jumpTableEntryCount * Integer.BYTES * 2;
    return slice.slice(sliceDescription, offset, length - jumpTableBytes);
  }

  /**
   * Helper method for using {@link #IndexedDISI(IndexInput, RandomAccessInput, int, byte, long)}.
   * Creates a RandomAccessInput covering only the jump-table data or null.
   *
   * @param slice backing data, holding both blocks and jump-table.
   * @param offset relative to the backing data.
   * @param length full length of the IndexedDISI, including blocks and jump-table data.
   * @param jumpTableEntryCount the number of blocks covered by the jump-table.
   * @return a jumpTable containing the block jump-data or null if no such table exists.
   * @throws IOException if a RandomAccessInput could not be created from slice.
   */
  public static RandomAccessInput createJumpTable(
      IndexInput slice, long offset, long length, int jumpTableEntryCount) throws IOException {
    if (jumpTableEntryCount <= 0) {
      return null;
    } else {
      int jumpTableBytes = jumpTableEntryCount * Integer.BYTES * 2;
      return slice.randomAccessSlice(offset + length - jumpTableBytes, jumpTableBytes);
    }
  }

  int block = -1;
  long blockEnd;
  long denseBitmapOffset = -1; // Only used for DENSE blocks
  int nextBlockIndex = -1;
  Method method;

  int doc = -1;
  int index = -1;

  // SPARSE variables
  boolean exists;

  // DENSE variables
  long word;
  int wordIndex = -1;
  // number of one bits encountered so far, including those of `word`
  int numberOfOnes;
  // Used with rank for jumps inside of DENSE as they are absolute instead of relative
  int denseOrigoIndex;

  // ALL variables
  int gap;

  @Override
  public int docID() {
    return doc;
  }

  @Override
  public int advance(int target) throws IOException {
    final int targetBlock = target & 0xFFFF0000;
    if (block < targetBlock) {
      advanceBlock(targetBlock);
    }
    if (block == targetBlock) {
      if (method.advanceWithinBlock(this, target)) {
        return doc;
      }
      readBlockHeader();
    }
    boolean found = method.advanceWithinBlock(this, block);
    assert found;
    return doc;
  }

  public boolean advanceExact(int target) throws IOException {
    final int targetBlock = target & 0xFFFF0000;
    if (block < targetBlock) {
      advanceBlock(targetBlock);
    }
    boolean found = block == targetBlock && method.advanceExactWithinBlock(this, target);
    this.doc = target;
    return found;
  }

  private void advanceBlock(int targetBlock) throws IOException {
    final int blockIndex = targetBlock >> 16;
    // If the destination block is 2 blocks or more ahead, we use the jump-table.
    if (jumpTable != null && blockIndex >= (block >> 16) + 2) {
      // If the jumpTableEntryCount is exceeded, there are no further bits. Last entry is always
      // NO_MORE_DOCS
      final int inRangeBlockIndex =
          blockIndex < jumpTableEntryCount ? blockIndex : jumpTableEntryCount - 1;
      final int index = jumpTable.readInt(inRangeBlockIndex * Integer.BYTES * 2);
      final int offset = jumpTable.readInt(inRangeBlockIndex * Integer.BYTES * 2 + Integer.BYTES);
      this.nextBlockIndex = index - 1; // -1 to compensate for the always-added 1 in readBlockHeader
      slice.seek(offset);
      readBlockHeader();
      return;
    }

    // Fallback to iteration of blocks
    do {
      slice.seek(blockEnd);
      readBlockHeader();
    } while (block < targetBlock);
  }

  private void readBlockHeader() throws IOException {
    block = Short.toUnsignedInt(slice.readShort()) << 16;
    assert block >= 0;
    final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
    index = nextBlockIndex;
    nextBlockIndex = index + numValues;
    if (numValues <= MAX_ARRAY_LENGTH) {
      method = Method.SPARSE;
      blockEnd = slice.getFilePointer() + (numValues << 1);
    } else if (numValues == 65536) {
      method = Method.ALL;
      blockEnd = slice.getFilePointer();
      gap = block - index - 1;
    } else {
      method = Method.DENSE;
      denseBitmapOffset =
          slice.getFilePointer() + (denseRankTable == null ? 0 : denseRankTable.length);
      blockEnd = denseBitmapOffset + (1 << 13);
      // Performance consideration: All rank (default 128 * 16 bits) are loaded up front. This
      // should be fast with the
      // reusable byte[] buffer, but it is still wasted if the DENSE block is iterated in small
      // steps.
      // If this results in too great a performance regression, a heuristic strategy might work
      // where the rank data
      // are loaded on first in-block advance, if said advance is > X docIDs. The hope being that a
      // small first
      // advance means that subsequent advances will be small too.
      // Another alternative is to maintain an extra slice for DENSE rank, but IndexedDISI is
      // already slice-heavy.
      if (denseRankPower != -1) {
        slice.readBytes(denseRankTable, 0, denseRankTable.length);
      }
      wordIndex = -1;
      numberOfOnes = index + 1;
      denseOrigoIndex = numberOfOnes;
    }
  }

  @Override
  public int nextDoc() throws IOException {
    return advance(doc + 1);
  }

  public int index() {
    return index;
  }

  @Override
  public long cost() {
    return cost;
  }

  enum Method {
    SPARSE {
      @Override
      boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
        final int targetInBlock = target & 0xFFFF;
        // TODO: binary search
        for (; disi.index < disi.nextBlockIndex; ) {
          int doc = Short.toUnsignedInt(disi.slice.readShort());
          disi.index++;
          if (doc >= targetInBlock) {
            disi.doc = disi.block | doc;
            disi.exists = true;
            return true;
          }
        }
        return false;
      }

      @Override
      boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
        final int targetInBlock = target & 0xFFFF;
        // TODO: binary search
        if (target == disi.doc) {
          return disi.exists;
        }
        for (; disi.index < disi.nextBlockIndex; ) {
          int doc = Short.toUnsignedInt(disi.slice.readShort());
          disi.index++;
          if (doc >= targetInBlock) {
            if (doc != targetInBlock) {
              disi.index--;
              disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES);
              break;
            }
            disi.exists = true;
            return true;
          }
        }
        disi.exists = false;
        return false;
      }
    },
    DENSE {
      @Override
      boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
        final int targetInBlock = target & 0xFFFF;
        final int targetWordIndex = targetInBlock >>> 6;

        // If possible, skip ahead using the rank cache
        // If the distance between the current position and the target is < rank-longs
        // there is no sense in using rank
        if (disi.denseRankPower != -1
            && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower - 6))) {
          rankSkip(disi, targetInBlock);
        }

        for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
          disi.word = disi.slice.readLong();
          disi.numberOfOnes += Long.bitCount(disi.word);
        }
        disi.wordIndex = targetWordIndex;

        long leftBits = disi.word >>> target;
        if (leftBits != 0L) {
          disi.doc = target + Long.numberOfTrailingZeros(leftBits);
          disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
          return true;
        }

        // There were no set bits at the wanted position. Move forward until one is reached
        while (++disi.wordIndex < 1024) {
          // This could use the rank cache to skip empty spaces >= 512 bits, but it seems
          // unrealistic
          // that such blocks would be DENSE
          disi.word = disi.slice.readLong();
          if (disi.word != 0) {
            disi.index = disi.numberOfOnes;
            disi.numberOfOnes += Long.bitCount(disi.word);
            disi.doc = disi.block | (disi.wordIndex << 6) | Long.numberOfTrailingZeros(disi.word);
            return true;
          }
        }
        // No set bits in the block at or after the wanted position.
        return false;
      }

      @Override
      boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
        final int targetInBlock = target & 0xFFFF;
        final int targetWordIndex = targetInBlock >>> 6;

        // If possible, skip ahead using the rank cache
        // If the distance between the current position and the target is < rank-longs
        // there is no sense in using rank
        if (disi.denseRankPower != -1
            && targetWordIndex - disi.wordIndex >= (1 << (disi.denseRankPower - 6))) {
          rankSkip(disi, targetInBlock);
        }

        for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
          disi.word = disi.slice.readLong();
          disi.numberOfOnes += Long.bitCount(disi.word);
        }
        disi.wordIndex = targetWordIndex;

        long leftBits = disi.word >>> target;
        disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
        return (leftBits & 1L) != 0;
      }
    },
    ALL {
      @Override
      boolean advanceWithinBlock(IndexedDISI disi, int target) {
        disi.doc = target;
        disi.index = target - disi.gap;
        return true;
      }

      @Override
      boolean advanceExactWithinBlock(IndexedDISI disi, int target) {
        disi.index = target - disi.gap;
        return true;
      }
    };

    /**
     * Advance to the first doc from the block that is equal to or greater than {@code target}.
     * Return true if there is such a doc and false otherwise.
     */
    abstract boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException;

    /**
     * Advance the iterator exactly to the position corresponding to the given {@code target} and
     * return whether this document exists.
     */
    abstract boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException;
  }

  /**
   * If the distance between the current position and the target is > 8 words, the rank cache will
   * be used to guarantee a worst-case of 1 rank-lookup and 7 word-read-and-count-bits operations.
   * Note: This does not guarantee a skip up to target, only up to nearest rank boundary. It is the
   * responsibility of the caller to iterate further to reach target.
   *
   * @param disi standard DISI.
   * @param targetInBlock lower 16 bits of the target
   * @throws IOException if a DISI seek failed.
   */
  private static void rankSkip(IndexedDISI disi, int targetInBlock) throws IOException {
    assert disi.denseRankPower >= 0 : disi.denseRankPower;
    // Resolve the rank as close to targetInBlock as possible (maximum distance is 8 longs)
    // Note: rankOrigoOffset is tracked on block open, so it is absolute (e.g. don't add origo)
    final int rankIndex =
        targetInBlock >> disi.denseRankPower; // Default is 9 (8 longs: 2^3 * 2^6 = 512 docIDs)

    final int rank =
        (disi.denseRankTable[rankIndex << 1] & 0xFF) << 8
            | (disi.denseRankTable[(rankIndex << 1) + 1] & 0xFF);

    // Position the counting logic just after the rank point
    final int rankAlignedWordIndex = rankIndex << disi.denseRankPower >> 6;
    disi.slice.seek(disi.denseBitmapOffset + rankAlignedWordIndex * Long.BYTES);
    long rankWord = disi.slice.readLong();
    int denseNOO = rank + Long.bitCount(rankWord);

    disi.wordIndex = rankAlignedWordIndex;
    disi.word = rankWord;
    disi.numberOfOnes = disi.denseOrigoIndex + denseNOO;
  }
}