All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.lucene50.Lucene50PostingsReader Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.lucene50;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;

import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_DATA_SIZE;
import static org.apache.lucene.codecs.lucene50.ForUtil.MAX_ENCODED_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.DOC_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.PAY_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.POS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.VERSION_START;

/**
 * Concrete class that reads docId(maybe frq,pos,offset,payloads) list
 * with postings format.
 *
 * @lucene.experimental
 */
public final class Lucene50PostingsReader extends PostingsReaderBase {

  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Lucene50PostingsReader.class);

  private final IndexInput docIn;
  private final IndexInput posIn;
  private final IndexInput payIn;

  final ForUtil forUtil;
  private int version;

  /** Sole constructor. */
  public Lucene50PostingsReader(SegmentReadState state) throws IOException {
    boolean success = false;
    IndexInput docIn = null;
    IndexInput posIn = null;
    IndexInput payIn = null;
    
    // NOTE: these data files are too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    
    String docName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.DOC_EXTENSION);
    try {
      docIn = state.directory.openInput(docName, state.context);
      version = CodecUtil.checkIndexHeader(docIn, DOC_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
      forUtil = new ForUtil(docIn);
      CodecUtil.retrieveChecksum(docIn);

      if (state.fieldInfos.hasProx()) {
        String proxName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.POS_EXTENSION);
        posIn = state.directory.openInput(proxName, state.context);
        CodecUtil.checkIndexHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
        CodecUtil.retrieveChecksum(posIn);

        if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
          String payName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.PAY_EXTENSION);
          payIn = state.directory.openInput(payName, state.context);
          CodecUtil.checkIndexHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
          CodecUtil.retrieveChecksum(payIn);
        }
      }

      this.docIn = docIn;
      this.posIn = posIn;
      this.payIn = payIn;
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
      }
    }
  }

  @Override
  public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
    // Make sure we are talking to the matching postings writer
    CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    final int indexBlockSize = termsIn.readVInt();
    if (indexBlockSize != BLOCK_SIZE) {
      throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
    }
  }

  /**
   * Read values that have been written using variable-length encoding instead of bit-packing.
   */
  static void readVIntBlock(IndexInput docIn, int[] docBuffer,
      int[] freqBuffer, int num, boolean indexHasFreq) throws IOException {
    if (indexHasFreq) {
      for(int i=0;i>> 1;
        if ((code & 1) != 0) {
          freqBuffer[i] = 1;
        } else {
          freqBuffer[i] = docIn.readVInt();
        }
      }
    } else {
      for(int i=0;i= 0;
    final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    final boolean fieldHasPayloads = fieldInfo.hasPayloads();

    if (absolute) {
      termState.docStartFP = 0;
      termState.posStartFP = 0;
      termState.payStartFP = 0;
    }

    termState.docStartFP += in.readVLong();
    if (fieldHasPositions) {
      termState.posStartFP += in.readVLong();
      if (fieldHasOffsets || fieldHasPayloads) {
        termState.payStartFP += in.readVLong();
      }
    }
    if (termState.docFreq == 1) {
      termState.singletonDocID = in.readVInt();
    } else {
      termState.singletonDocID = -1;
    }
    if (fieldHasPositions) {
      if (termState.totalTermFreq > BLOCK_SIZE) {
        termState.lastPosBlockOffset = in.readVLong();
      } else {
        termState.lastPosBlockOffset = -1;
      }
    }
    if (termState.docFreq > BLOCK_SIZE) {
      termState.skipOffset = in.readVLong();
    } else {
      termState.skipOffset = -1;
    }
  }
    
  @Override
  public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException {
    
    boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;

    if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
      BlockDocsEnum docsEnum;
      if (reuse instanceof BlockDocsEnum) {
        docsEnum = (BlockDocsEnum) reuse;
        if (!docsEnum.canReuse(docIn, fieldInfo)) {
          docsEnum = new BlockDocsEnum(fieldInfo);
        }
      } else {
        docsEnum = new BlockDocsEnum(fieldInfo);
      }
      return docsEnum.reset((IntBlockTermState) termState, flags);
    } else {
      EverythingEnum everythingEnum;
      if (reuse instanceof EverythingEnum) {
        everythingEnum = (EverythingEnum) reuse;
        if (!everythingEnum.canReuse(docIn, fieldInfo)) {
          everythingEnum = new EverythingEnum(fieldInfo);
        }
      } else {
        everythingEnum = new EverythingEnum(fieldInfo);
      }
      return everythingEnum.reset((IntBlockTermState) termState, flags);
    }
  }

  @Override
  public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
    if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
      // no skip data
      return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
    }

    final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    final boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    final boolean indexHasPayloads = fieldInfo.hasPayloads();

    if (indexHasPositions &&
        PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) &&
        (indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
        (indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
      return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
    }

    return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
  }

  final class BlockDocsEnum extends PostingsEnum {
    private final byte[] encoded;
    
    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];

    private int docBufferUpto;

    private Lucene50SkipReader skipper;
    private boolean skipped;

    final IndexInput startDocIn;

    IndexInput docIn;
    final boolean indexHasFreq;
    final boolean indexHasPos;
    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // sum of freqs in this posting list (or docFreq when omitted)
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's skip data starts (after
    // docTermStartFP) in the .doc file (or -1 if there is
    // no skip data for this term):
    private long skipOffset;

    // docID for next skip point, we won't use skipper if 
    // target docID is not larger than this
    private int nextSkipDoc;
    
    private boolean needsFreq; // true if the caller actually needs frequencies
    // as we read freqs lazily, isFreqsRead shows if freqs are read for the current block
    // always true when we don't have freqs (indexHasFreq=false) or don't need freqs (needsFreq=false)
    private boolean isFreqsRead;
    private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1

    public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
      this.startDocIn = Lucene50PostingsReader.this.docIn;
      this.docIn = null;
      indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
      encoded = new byte[MAX_ENCODED_SIZE];    
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
      return docIn == startDocIn &&
        indexHasFreq == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) &&
        indexHasPos == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) &&
        indexHasPayloads == fieldInfo.hasPayloads();
    }
    
    public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
      docFreq = termState.docFreq;
      totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
      docTermStartFP = termState.docStartFP;
      skipOffset = termState.skipOffset;
      singletonDocID = termState.singletonDocID;
      if (docFreq > 1) {
        if (docIn == null) {
          // lazy init
          docIn = startDocIn.clone();
        }
        docIn.seek(docTermStartFP);
      }

      doc = -1;
      this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
      this.isFreqsRead = true;
      if (indexHasFreq == false || needsFreq == false) {
        Arrays.fill(freqBuffer, 1);
      }
      accum = 0;
      docUpto = 0;
      nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
      return this;
    }

    @Override
    public int freq() throws IOException {
      if (isFreqsRead == false) {
        forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this block
        isFreqsRead = true;
      }
      return freqBuffer[docBufferUpto-1];
    }

    @Override
    public int nextPosition() throws IOException {
      return -1;
    }

    @Override
    public int startOffset() throws IOException {
      return -1;
    }

    @Override
    public int endOffset() throws IOException {
      return -1;
    }

    @Override
    public BytesRef getPayload() throws IOException {
      return null;
    }

    @Override
    public int docID() {
      return doc;
    }
    
    private void refillDocs() throws IOException {
      // Check if we skipped reading the previous block of freqs, and if yes, position docIn after it
      if (isFreqsRead == false) {
        forUtil.skipBlock(docIn);
        isFreqsRead = true;
      }
      
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);

        if (indexHasFreq) {
          if (needsFreq) {
            isFreqsRead = false;
          } else {
            forUtil.skipBlock(docIn); // skip over freqs if we don't need them at all
          }
        }
      } else if (docFreq == 1) {
        docDeltaBuffer[0] = singletonDocID;
        freqBuffer[0] = (int) totalTermFreq;
      } else {
        // Read vInts:
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
      }
      docBufferUpto = 0;
    }

    @Override
    public int nextDoc() throws IOException {
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs(); // we don't need to load freqs for now (will be loaded later if necessary)
      }

      accum += docDeltaBuffer[docBufferUpto];
      docUpto++;

      doc = accum;
      docBufferUpto++;
      return doc;
    }

    @Override
    public int advance(int target) throws IOException {
      // current skip docID < docIDs generated from current buffer <= next skip docID
      // we don't need to skip if target is buffered already
      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {

        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper = new Lucene50SkipReader(version,
                                           docIn.clone(),
                                           MAX_SKIP_LEVELS,
                                           indexHasPos,
                                           indexHasOffsets,
                                           indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          skipper.init(docTermStartFP+skipOffset, docTermStartFP, 0, 0, docFreq);
          skipped = true;
        }

        // always plus one to fix the result, since skip position in Lucene50SkipReader 
        // is a little different from MultiLevelSkipListReader
        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();               // actually, this is just lastSkipEntry
          docIn.seek(skipper.getDocPointer());    // now point to the block we want to search
          // even if freqs were not read from the previous block, we will mark them as read,
          // as we don't need to skip the previous block freqs in refillDocs,
          // as we have already positioned docIn where in needs to be.
          isFreqsRead = true;
        }
        // next time we call advance, this is used to 
        // foresee whether skipper is necessary.
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan... this is an inlined/pared down version
      // of nextDoc():
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        docUpto++;

        if (accum >= target) {
          break;
        }
        docBufferUpto++;
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }

      docBufferUpto++;
      return doc = accum;
    }
    
    @Override
    public long cost() {
      return docFreq;
    }
  }

  // Also handles payloads + offsets
  final class EverythingEnum extends PostingsEnum {

    private final byte[] encoded;

    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];
    private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];

    private final int[] payloadLengthBuffer;
    private final int[] offsetStartDeltaBuffer;
    private final int[] offsetLengthBuffer;

    private byte[] payloadBytes;
    private int payloadByteUpto;
    private int payloadLength;

    private int lastStartOffset;
    private int startOffset;
    private int endOffset;

    private int docBufferUpto;
    private int posBufferUpto;

    private Lucene50SkipReader skipper;
    private boolean skipped;

    final IndexInput startDocIn;

    IndexInput docIn;
    final IndexInput posIn;
    final IndexInput payIn;
    final BytesRef payload;

    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // number of positions in this posting list
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int freq;                                 // freq we last read
    private int position;                             // current position

    // how many positions "behind" we are; nextPosition must
    // skip these to "catch up":
    private int posPendingCount;

    // Lazy pos seek: if != -1 then we must seek to this FP
    // before reading positions:
    private long posPendingFP;

    // Lazy pay seek: if != -1 then we must seek to this FP
    // before reading payloads/offsets:
    private long payPendingFP;

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's postings start in the .pos file:
    private long posTermStartFP;

    // Where this term's payloads/offsets start in the .pay
    // file:
    private long payTermStartFP;

    // File pointer where the last (vInt encoded) pos delta
    // block is.  We need this to know whether to bulk
    // decode vs vInt decode the block:
    private long lastPosBlockFP;

    // Where this term's skip data starts (after
    // docTermStartFP) in the .doc file (or -1 if there is
    // no skip data for this term):
    private long skipOffset;

    private int nextSkipDoc;

    private boolean needsOffsets; // true if we actually need offsets
    private boolean needsPayloads; // true if we actually need payloads
    private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1

    public EverythingEnum(FieldInfo fieldInfo) throws IOException {
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();

      this.startDocIn = Lucene50PostingsReader.this.docIn;
      this.docIn = null;
      this.posIn = Lucene50PostingsReader.this.posIn.clone();
      if (indexHasOffsets || indexHasPayloads) {
        this.payIn = Lucene50PostingsReader.this.payIn.clone();
      } else {
        this.payIn = null;
      }
      encoded = new byte[MAX_ENCODED_SIZE];
      if (indexHasOffsets) {
        offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
        offsetLengthBuffer = new int[MAX_DATA_SIZE];
      } else {
        offsetStartDeltaBuffer = null;
        offsetLengthBuffer = null;
        startOffset = -1;
        endOffset = -1;
      }

      if (indexHasPayloads) {
        payloadLengthBuffer = new int[MAX_DATA_SIZE];
        payloadBytes = new byte[128];
        payload = new BytesRef();
      } else {
        payloadLengthBuffer = null;
        payloadBytes = null;
        payload = null;
      }
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
      return docIn == startDocIn &&
        indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) &&
        indexHasPayloads == fieldInfo.hasPayloads();
    }

    public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException {
      docFreq = termState.docFreq;
      docTermStartFP = termState.docStartFP;
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      skipOffset = termState.skipOffset;
      totalTermFreq = termState.totalTermFreq;
      singletonDocID = termState.singletonDocID;
      if (docFreq > 1) {
        if (docIn == null) {
          // lazy init
          docIn = startDocIn.clone();
        }
        docIn.seek(docTermStartFP);
      }
      posPendingFP = posTermStartFP;
      payPendingFP = payTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
        lastPosBlockFP = posTermStartFP;
      } else if (termState.totalTermFreq == BLOCK_SIZE) {
        lastPosBlockFP = -1;
      } else {
        lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
      }

      this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
      this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);

      doc = -1;
      accum = 0;
      docUpto = 0;
      if (docFreq > BLOCK_SIZE) {
        nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      } else {
        nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
      }
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
      return this;
    }

    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int docID() {
      return doc;
    }

    private void refillDocs() throws IOException {
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);
        forUtil.readBlock(docIn, encoded, freqBuffer);
      } else if (docFreq == 1) {
        docDeltaBuffer[0] = singletonDocID;
        freqBuffer[0] = (int) totalTermFreq;
      } else {
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }

    private void refillPositions() throws IOException {
      if (posIn.getFilePointer() == lastPosBlockFP) {
        final int count = (int) (totalTermFreq % BLOCK_SIZE);
        int payloadLength = 0;
        int offsetLength = 0;
        payloadByteUpto = 0;
        for(int i=0;i>> 1;
            if (payloadLength != 0) {
              if (payloadByteUpto + payloadLength > payloadBytes.length) {
                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
              }
              posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
              payloadByteUpto += payloadLength;
            }
          } else {
            posDeltaBuffer[i] = code;
          }

          if (indexHasOffsets) {
            int deltaCode = posIn.readVInt();
            if ((deltaCode & 1) != 0) {
              offsetLength = posIn.readVInt();
            }
            offsetStartDeltaBuffer[i] = deltaCode >>> 1;
            offsetLengthBuffer[i] = offsetLength;
          }
        }
        payloadByteUpto = 0;
      } else {
        forUtil.readBlock(posIn, encoded, posDeltaBuffer);

        if (indexHasPayloads) {
          if (needsPayloads) {
            forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
            int numBytes = payIn.readVInt();

            if (numBytes > payloadBytes.length) {
              payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
            }
            payIn.readBytes(payloadBytes, 0, numBytes);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over lengths
            int numBytes = payIn.readVInt(); // read length of payloadBytes
            payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
          }
          payloadByteUpto = 0;
        }

        if (indexHasOffsets) {
          if (needsOffsets) {
            forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
            forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over starts
            forUtil.skipBlock(payIn); // skip over lengths
          }
        }
      }
    }

    @Override
    public int nextDoc() throws IOException {
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      accum += docDeltaBuffer[docBufferUpto];
      freq = freqBuffer[docBufferUpto];
      posPendingCount += freq;
      docBufferUpto++;
      docUpto++;

      doc = accum;
      position = 0;
      lastStartOffset = 0;
      return doc;
    }

    @Override
    public int advance(int target) throws IOException {
      // TODO: make frq block load lazy/skippable

      if (target > nextSkipDoc) {
        if (skipper == null) {
          // Lazy init: first time this enum has ever been used for skipping
          skipper = new Lucene50SkipReader(version,
                                        docIn.clone(),
                                        MAX_SKIP_LEVELS,
                                        true,
                                        indexHasOffsets,
                                        indexHasPayloads);
        }

        if (!skipped) {
          assert skipOffset != -1;
          // This is the first time this enum has skipped
          // since reset() was called; load the skip data:
          skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
          skipped = true;
        }

        final int newDocUpto = skipper.skipTo(target) + 1;

        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          docIn.seek(skipper.getDocPointer());
          posPendingFP = skipper.getPosPointer();
          payPendingFP = skipper.getPayPointer();
          posPendingCount = skipper.getPosBufferUpto();
          lastStartOffset = 0; // new document
          payloadByteUpto = skipper.getPayloadByteUpto();
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
      }

      // Now scan:
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        freq = freqBuffer[docBufferUpto];
        posPendingCount += freq;
        docBufferUpto++;
        docUpto++;

        if (accum >= target) {
          break;
        }
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }

      position = 0;
      lastStartOffset = 0;
      return doc = accum;
    }

    // TODO: in theory we could avoid loading frq block
    // when not needed, ie, use skip data to load how far to
    // seek the pos pointer ... instead of having to load frq
    // blocks only to sum up how many positions to skip
    private void skipPositions() throws IOException {
      // Skip positions now:
      int toSkip = posPendingCount - freq;
      // if (DEBUG) {
      //   System.out.println("      FPR.skipPositions: toSkip=" + toSkip);
      // }

      final int leftInBlock = BLOCK_SIZE - posBufferUpto;
      if (toSkip < leftInBlock) {
        int end = posBufferUpto + toSkip;
        while(posBufferUpto < end) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      } else {
        toSkip -= leftInBlock;
        while(toSkip >= BLOCK_SIZE) {
          assert posIn.getFilePointer() != lastPosBlockFP;
          forUtil.skipBlock(posIn);

          if (indexHasPayloads) {
            // Skip payloadLength block:
            forUtil.skipBlock(payIn);

            // Skip payloadBytes block:
            int numBytes = payIn.readVInt();
            payIn.seek(payIn.getFilePointer() + numBytes);
          }

          if (indexHasOffsets) {
            forUtil.skipBlock(payIn);
            forUtil.skipBlock(payIn);
          }
          toSkip -= BLOCK_SIZE;
        }
        refillPositions();
        payloadByteUpto = 0;
        posBufferUpto = 0;
        while(posBufferUpto < toSkip) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      }

      position = 0;
      lastStartOffset = 0;
    }

    @Override
    public int nextPosition() throws IOException {
      assert posPendingCount > 0;

      if (posPendingFP != -1) {
        posIn.seek(posPendingFP);
        posPendingFP = -1;

        if (payPendingFP != -1 && payIn != null) {
          payIn.seek(payPendingFP);
          payPendingFP = -1;
        }

        // Force buffer refill:
        posBufferUpto = BLOCK_SIZE;
      }

      if (posPendingCount > freq) {
        skipPositions();
        posPendingCount = freq;
      }

      if (posBufferUpto == BLOCK_SIZE) {
        refillPositions();
        posBufferUpto = 0;
      }
      position += posDeltaBuffer[posBufferUpto];

      if (indexHasPayloads) {
        payloadLength = payloadLengthBuffer[posBufferUpto];
        payload.bytes = payloadBytes;
        payload.offset = payloadByteUpto;
        payload.length = payloadLength;
        payloadByteUpto += payloadLength;
      }

      if (indexHasOffsets) {
        startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
        endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
        lastStartOffset = startOffset;
      }

      posBufferUpto++;
      posPendingCount--;
      return position;
    }

    @Override
    public int startOffset() {
      return startOffset;
    }

    @Override
    public int endOffset() {
      return endOffset;
    }

    @Override
    public BytesRef getPayload() {
      if (payloadLength == 0) {
        return null;
      } else {
        return payload;
      }
    }

    @Override
    public long cost() {
      return docFreq;
    }
  }

  final class BlockImpactsPostingsEnum extends ImpactsEnum {

    private final byte[] encoded;

    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];
    private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];

    private int docBufferUpto;
    private int posBufferUpto;

    private final Lucene50ScoreSkipReader skipper;

    final IndexInput docIn;
    final IndexInput posIn;

    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // number of positions in this posting list
    private int docUpto;                              // how many docs we've read
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int freq;                                 // freq we last read
    private int position;                             // current position

    // how many positions "behind" we are; nextPosition must
    // skip these to "catch up":
    private int posPendingCount;

    // Lazy pos seek: if != -1 then we must seek to this FP
    // before reading positions:
    private long posPendingFP;

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's postings start in the .pos file:
    private long posTermStartFP;

    // Where this term's payloads/offsets start in the .pay
    // file:
    private long payTermStartFP;

    // File pointer where the last (vInt encoded) pos delta
    // block is.  We need this to know whether to bulk
    // decode vs vInt decode the block:
    private long lastPosBlockFP;

    private int nextSkipDoc = -1;

    private long seekTo = -1;

    public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) throws IOException {
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();

      this.docIn = Lucene50PostingsReader.this.docIn.clone();

      encoded = new byte[MAX_ENCODED_SIZE];

      this.posIn = Lucene50PostingsReader.this.posIn.clone();

      docFreq = termState.docFreq;
      docTermStartFP = termState.docStartFP;
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      totalTermFreq = termState.totalTermFreq;
      docIn.seek(docTermStartFP);
      posPendingFP = posTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
        lastPosBlockFP = posTermStartFP;
      } else if (termState.totalTermFreq == BLOCK_SIZE) {
        lastPosBlockFP = -1;
      } else {
        lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
      }

      doc = -1;
      accum = 0;
      docUpto = 0;
      docBufferUpto = BLOCK_SIZE;

      skipper = new Lucene50ScoreSkipReader(version,
          docIn.clone(),
          MAX_SKIP_LEVELS,
          true,
          indexHasOffsets,
          indexHasPayloads);
      skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
    }

    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int docID() {
      return doc;
    }

    private void refillDocs() throws IOException {
      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);
        forUtil.readBlock(docIn, encoded, freqBuffer);
      } else {
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
      }
      docBufferUpto = 0;
    }

    private void refillPositions() throws IOException {
      if (posIn.getFilePointer() == lastPosBlockFP) {
        final int count = (int) (totalTermFreq % BLOCK_SIZE);
        int payloadLength = 0;
        for(int i=0;i>> 1;
            if (payloadLength != 0) {
              posIn.seek(posIn.getFilePointer() + payloadLength);
            }
          } else {
            posDeltaBuffer[i] = code;
          }
          if (indexHasOffsets) {
            if ((posIn.readVInt() & 1) != 0) {
              // offset length changed
              posIn.readVInt();
            }
          }
        }
      } else {
        forUtil.readBlock(posIn, encoded, posDeltaBuffer);
      }
    }

    @Override
    public void advanceShallow(int target) throws IOException {
      if (target > nextSkipDoc) {
        // always plus one to fix the result, since skip position in Lucene50SkipReader
        // is a little different from MultiLevelSkipListReader
        final int newDocUpto = skipper.skipTo(target) + 1;

        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          posPendingFP = skipper.getPosPointer();
          posPendingCount = skipper.getPosBufferUpto();
          seekTo = skipper.getDocPointer();       // delay the seek
        }
        // next time we call advance, this is used to 
        // foresee whether skipper is necessary.
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      assert nextSkipDoc >= target;
    }

    @Override
    public Impacts getImpacts() throws IOException {
      advanceShallow(doc);
      return skipper.getImpacts();
    }

    @Override
    public int nextDoc() throws IOException {
      return advance(doc + 1);
    }

    @Override
    public int advance(int target) throws IOException {
      if (target > nextSkipDoc) {
        advanceShallow(target);
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        if (seekTo >= 0) {
          docIn.seek(seekTo);
          seekTo = -1;
        }
        refillDocs();
      }

      // Now scan:
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        freq = freqBuffer[docBufferUpto];
        posPendingCount += freq;
        docBufferUpto++;
        docUpto++;

        if (accum >= target) {
          break;
        }
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }
      position = 0;

      return doc = accum;
    }

    // TODO: in theory we could avoid loading frq block
    // when not needed, ie, use skip data to load how far to
    // seek the pos pointer ... instead of having to load frq
    // blocks only to sum up how many positions to skip
    private void skipPositions() throws IOException {
      // Skip positions now:
      int toSkip = posPendingCount - freq;

      final int leftInBlock = BLOCK_SIZE - posBufferUpto;
      if (toSkip < leftInBlock) {
        posBufferUpto += toSkip;
      } else {
        toSkip -= leftInBlock;
        while(toSkip >= BLOCK_SIZE) {
          assert posIn.getFilePointer() != lastPosBlockFP;
          forUtil.skipBlock(posIn);
          toSkip -= BLOCK_SIZE;
        }
        refillPositions();
        posBufferUpto = toSkip;
      }

      position = 0;
    }

    @Override
    public int nextPosition() throws IOException {
      assert posPendingCount > 0;

      if (posPendingFP != -1) {
        posIn.seek(posPendingFP);
        posPendingFP = -1;

        // Force buffer refill:
        posBufferUpto = BLOCK_SIZE;
      }

      if (posPendingCount > freq) {
        skipPositions();
        posPendingCount = freq;
      }

      if (posBufferUpto == BLOCK_SIZE) {
        refillPositions();
        posBufferUpto = 0;
      }
      position += posDeltaBuffer[posBufferUpto++];

      posPendingCount--;
      return position;
    }

    @Override
    public int startOffset() {
      return -1;
    }

    @Override
    public int endOffset() {
      return -1;
    }

    @Override
    public BytesRef getPayload() {
      return null;
    }

    @Override
    public long cost() {
      return docFreq;
    }

  }

  final class BlockImpactsEverythingEnum extends ImpactsEnum {
    
    private final byte[] encoded;

    private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
    private final int[] freqBuffer = new int[MAX_DATA_SIZE];
    private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];

    private final int[] payloadLengthBuffer;
    private final int[] offsetStartDeltaBuffer;
    private final int[] offsetLengthBuffer;

    private byte[] payloadBytes;
    private int payloadByteUpto;
    private int payloadLength;

    private int lastStartOffset;
    private int startOffset = -1;
    private int endOffset = -1;

    private int docBufferUpto;
    private int posBufferUpto;

    private final Lucene50ScoreSkipReader skipper;

    final IndexInput docIn;
    final IndexInput posIn;
    final IndexInput payIn;
    final BytesRef payload;

    final boolean indexHasFreq;
    final boolean indexHasPos;
    final boolean indexHasOffsets;
    final boolean indexHasPayloads;

    private int docFreq;                              // number of docs in this posting list
    private long totalTermFreq;                       // number of positions in this posting list
    private int docUpto;                              // how many docs we've read
    private int posDocUpTo;                           // for how many docs we've read positions, offsets, and payloads
    private int doc;                                  // doc we last read
    private int accum;                                // accumulator for doc deltas
    private int position;                             // current position

    // how many positions "behind" we are; nextPosition must
    // skip these to "catch up":
    private int posPendingCount;

    // Lazy pos seek: if != -1 then we must seek to this FP
    // before reading positions:
    private long posPendingFP;

    // Lazy pay seek: if != -1 then we must seek to this FP
    // before reading payloads/offsets:
    private long payPendingFP;

    // Where this term's postings start in the .doc file:
    private long docTermStartFP;

    // Where this term's postings start in the .pos file:
    private long posTermStartFP;

    // Where this term's payloads/offsets start in the .pay
    // file:
    private long payTermStartFP;

    // File pointer where the last (vInt encoded) pos delta
    // block is.  We need this to know whether to bulk
    // decode vs vInt decode the block:
    private long lastPosBlockFP;

    private int nextSkipDoc = -1;
    
    private final boolean needsPositions;
    private final boolean needsOffsets; // true if we actually need offsets
    private final boolean needsPayloads; // true if we actually need payloads

    private boolean isFreqsRead; // shows if freqs for the current doc block are read into freqBuffer
    
    private long seekTo = -1;
    
    public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) throws IOException {
      indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      indexHasPayloads = fieldInfo.hasPayloads();
      
      needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
      needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
      needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
      
      this.docIn = Lucene50PostingsReader.this.docIn.clone();
      
      encoded = new byte[MAX_ENCODED_SIZE];

      if (indexHasPos && needsPositions) {
        this.posIn = Lucene50PostingsReader.this.posIn.clone();
      } else {
        this.posIn = null;
      }
      
      if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) {
        this.payIn = Lucene50PostingsReader.this.payIn.clone();
      } else {
        this.payIn = null;
      }
      
      if (indexHasOffsets) {
        offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
        offsetLengthBuffer = new int[MAX_DATA_SIZE];
      } else {
        offsetStartDeltaBuffer = null;
        offsetLengthBuffer = null;
        startOffset = -1;
        endOffset = -1;
      }

      if (indexHasPayloads) {
        payloadLengthBuffer = new int[MAX_DATA_SIZE];
        payloadBytes = new byte[128];
        payload = new BytesRef();
      } else {
        payloadLengthBuffer = null;
        payloadBytes = null;
        payload = null;
      }

      docFreq = termState.docFreq;
      docTermStartFP = termState.docStartFP;
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      totalTermFreq = termState.totalTermFreq;
      docIn.seek(docTermStartFP);
      posPendingFP = posTermStartFP;
      payPendingFP = payTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
        lastPosBlockFP = posTermStartFP;
      } else if (termState.totalTermFreq == BLOCK_SIZE) {
        lastPosBlockFP = -1;
      } else {
        lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
      }

      doc = -1;
      accum = 0;
      docUpto = 0;
      posDocUpTo = 0;
      isFreqsRead = true;
      docBufferUpto = BLOCK_SIZE;

      skipper = new Lucene50ScoreSkipReader(version,
          docIn.clone(),
          MAX_SKIP_LEVELS,
          indexHasPos,
          indexHasOffsets,
          indexHasPayloads);
      skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);

      if (indexHasFreq == false) {
        Arrays.fill(freqBuffer, 1);
      }
    }
    
    @Override
    public int freq() throws IOException {
      if (indexHasFreq && (isFreqsRead == false)) {
        forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this block
        isFreqsRead = true;
      }
      return freqBuffer[docBufferUpto-1];
    }

    @Override
    public int docID() {
      return doc;
    }

    private void refillDocs() throws IOException {
      if (indexHasFreq) {
        if (isFreqsRead == false) { // previous freq block was not read
          // check if we need to load the previous freq block to catch up on positions or we can skip it
          if (indexHasPos && needsPositions && (posDocUpTo < docUpto)) {
            forUtil.readBlock(docIn, encoded, freqBuffer); // load the previous freq block
          } else {
            forUtil.skipBlock(docIn); // skip it
          }
          isFreqsRead = true;
        }
        if (indexHasPos && needsPositions) {
          while (posDocUpTo < docUpto) { // catch on positions, bring posPendingCount upto the current doc
            posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
            posDocUpTo++;
          }
        }
      }

      final int left = docFreq - docUpto;
      assert left > 0;

      if (left >= BLOCK_SIZE) {
        forUtil.readBlock(docIn, encoded, docDeltaBuffer);
        if (indexHasFreq) {
          isFreqsRead = false; // freq block will be loaded lazily when necessary, we don't load it here
        }
      } else {
        readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
      }
      docBufferUpto = 0;
    }
    
    private void refillPositions() throws IOException {
      if (posIn.getFilePointer() == lastPosBlockFP) {
        final int count = (int) (totalTermFreq % BLOCK_SIZE);
        int payloadLength = 0;
        int offsetLength = 0;
        payloadByteUpto = 0;
        for(int i=0;i>> 1;
            if (payloadLength != 0) {
              if (payloadByteUpto + payloadLength > payloadBytes.length) {
                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
              }
              posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
              payloadByteUpto += payloadLength;
            }
          } else {
            posDeltaBuffer[i] = code;
          }

          if (indexHasOffsets) {
            int deltaCode = posIn.readVInt();
            if ((deltaCode & 1) != 0) {
              offsetLength = posIn.readVInt();
            }
            offsetStartDeltaBuffer[i] = deltaCode >>> 1;
            offsetLengthBuffer[i] = offsetLength;
          }
        }
        payloadByteUpto = 0;
      } else {
        forUtil.readBlock(posIn, encoded, posDeltaBuffer);

        if (indexHasPayloads && payIn != null) {
          if (needsPayloads) {
            forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
            int numBytes = payIn.readVInt();

            if (numBytes > payloadBytes.length) {
              payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
            }
            payIn.readBytes(payloadBytes, 0, numBytes);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over lengths
            int numBytes = payIn.readVInt(); // read length of payloadBytes
            payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
          }
          payloadByteUpto = 0;
        }

        if (indexHasOffsets && payIn != null) {
          if (needsOffsets) {
            forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
            forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
          } else {
            // this works, because when writing a vint block we always force the first length to be written
            forUtil.skipBlock(payIn); // skip over starts
            forUtil.skipBlock(payIn); // skip over lengths
          }
        }
      }
    }

    @Override
    public void advanceShallow(int target) throws IOException {
      if (target > nextSkipDoc) {
        // always plus one to fix the result, since skip position in Lucene50SkipReader 
        // is a little different from MultiLevelSkipListReader
        final int newDocUpto = skipper.skipTo(target) + 1; 
  
        if (newDocUpto > docUpto) {
          // Skipper moved
          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
          docUpto = newDocUpto;
          posDocUpTo = docUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
          posPendingFP = skipper.getPosPointer();
          payPendingFP = skipper.getPayPointer();
          posPendingCount = skipper.getPosBufferUpto();
          lastStartOffset = 0; // new document
          payloadByteUpto = skipper.getPayloadByteUpto();             // actually, this is just lastSkipEntry
          seekTo = skipper.getDocPointer();       // delay the seek
        }
        // next time we call advance, this is used to 
        // foresee whether skipper is necessary.
        nextSkipDoc = skipper.getNextSkipDoc();
      }
      assert nextSkipDoc >= target;
    }

    @Override
    public Impacts getImpacts() throws IOException {
      advanceShallow(doc);
      return skipper.getImpacts();
    }

    @Override
    public int nextDoc() throws IOException {
      return advance(doc + 1);
    }

    @Override
    public int advance(int target) throws IOException {
      if (target > nextSkipDoc) {
        advanceShallow(target);
      }
      if (docUpto == docFreq) {
        return doc = NO_MORE_DOCS;
      }
      if (docBufferUpto == BLOCK_SIZE) {
        if (seekTo >= 0) {
          docIn.seek(seekTo);
          seekTo = -1;
          isFreqsRead = true; // reset isFreqsRead
        }
        refillDocs();
      }

      // Now scan:
      while (true) {
        accum += docDeltaBuffer[docBufferUpto];
        docBufferUpto++;
        docUpto++;

        if (accum >= target) {
          break;
        }
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
      }
      position = 0;
      lastStartOffset = 0;

      return doc = accum;
    }

    // TODO: in theory we could avoid loading frq block
    // when not needed, ie, use skip data to load how far to
    // seek the pos pointer ... instead of having to load frq
    // blocks only to sum up how many positions to skip
    private void skipPositions() throws IOException {
      // Skip positions now:
      int toSkip = posPendingCount - freqBuffer[docBufferUpto-1];
      // if (DEBUG) {
      //   System.out.println("      FPR.skipPositions: toSkip=" + toSkip);
      // }

      final int leftInBlock = BLOCK_SIZE - posBufferUpto;
      if (toSkip < leftInBlock) {
        int end = posBufferUpto + toSkip;
        while(posBufferUpto < end) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      } else {
        toSkip -= leftInBlock;
        while(toSkip >= BLOCK_SIZE) {
          assert posIn.getFilePointer() != lastPosBlockFP;
          forUtil.skipBlock(posIn);
  
          if (indexHasPayloads && payIn != null) {
            // Skip payloadLength block:
            forUtil.skipBlock(payIn);

            // Skip payloadBytes block:
            int numBytes = payIn.readVInt();
            payIn.seek(payIn.getFilePointer() + numBytes);
          }

          if (indexHasOffsets && payIn != null) {
            forUtil.skipBlock(payIn);
            forUtil.skipBlock(payIn);
          }
          toSkip -= BLOCK_SIZE;
        }
        refillPositions();
        payloadByteUpto = 0;
        posBufferUpto = 0;
        while(posBufferUpto < toSkip) {
          if (indexHasPayloads) {
            payloadByteUpto += payloadLengthBuffer[posBufferUpto];
          }
          posBufferUpto++;
        }
      }

      position = 0;
      lastStartOffset = 0;
    }

    @Override
    public int nextPosition() throws IOException {
      if (indexHasPos == false || needsPositions == false) {
        return -1;
      }

      if (isFreqsRead == false) {
        forUtil.readBlock(docIn, encoded, freqBuffer); // read freqs for this docs block
        isFreqsRead = true;
      }
      while (posDocUpTo < docUpto) { // bring posPendingCount upto the current doc
        posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
        posDocUpTo++;
      }

      assert posPendingCount > 0;
      
      if (posPendingFP != -1) {
        posIn.seek(posPendingFP);
        posPendingFP = -1;

        if (payPendingFP != -1 && payIn != null) {
          payIn.seek(payPendingFP);
          payPendingFP = -1;
        }

        // Force buffer refill:
        posBufferUpto = BLOCK_SIZE;
      }

      if (posPendingCount > freqBuffer[docBufferUpto-1]) {
        skipPositions();
        posPendingCount = freqBuffer[docBufferUpto-1];
      }

      if (posBufferUpto == BLOCK_SIZE) {
        refillPositions();
        posBufferUpto = 0;
      }
      position += posDeltaBuffer[posBufferUpto];

      if (indexHasPayloads) {
        payloadLength = payloadLengthBuffer[posBufferUpto];
        payload.bytes = payloadBytes;
        payload.offset = payloadByteUpto;
        payload.length = payloadLength;
        payloadByteUpto += payloadLength;
      }

      if (indexHasOffsets && needsOffsets) {
        startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
        endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
        lastStartOffset = startOffset;
      }

      posBufferUpto++;
      posPendingCount--;
      return position;
    }

    @Override
    public int startOffset() {
      return startOffset;
    }
  
    @Override
    public int endOffset() {
      return endOffset;
    }
  
    @Override
    public BytesRef getPayload() {
      if (payloadLength == 0) {
        return null;
      } else {
        return payload;
      }
    }
    
    @Override
    public long cost() {
      return docFreq;
    }

  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED;
  }

  @Override
  public void checkIntegrity() throws IOException {
    if (docIn != null) {
      CodecUtil.checksumEntireFile(docIn);
    }
    if (posIn != null) {
      CodecUtil.checksumEntireFile(posIn);
    }
    if (payIn != null) {
      CodecUtil.checksumEntireFile(payIn);
    }
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(positions=" + (posIn != null) + ",payloads=" + (payIn != null) +")";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy