All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.lucene40.Lucene40PostingsReader Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * COPIED FROM APACHE LUCENE 4.7.2
 *
 * Git URL: [email protected]:apache/lucene.git, tag: releases/lucene-solr/4.7.2, path: lucene/core/src/java
 *
 * (see https://issues.apache.org/jira/browse/OAK-10786 for details)
 */

package org.apache.lucene.codecs.lucene40;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

/** 
 * Concrete class that reads the 4.0 frq/prox
 * postings format. 
 *  
 *  @see Lucene40PostingsFormat
 *  @deprecated Only for reading old 4.0 segments */
@Deprecated
public class Lucene40PostingsReader extends PostingsReaderBase {

  final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
  final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
  final static String PRX_CODEC = "Lucene40PostingsWriterPrx";

  //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
  
  // Increment version to change it:
  final static int VERSION_START = 0;
  final static int VERSION_LONG_SKIP = 1;
  final static int VERSION_CURRENT = VERSION_LONG_SKIP;

  private final IndexInput freqIn;
  private final IndexInput proxIn;
  // public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

  int skipInterval;
  int maxSkipLevels;
  int skipMinimum;

  // private String segment;

  /** Sole constructor. */
  public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
    boolean success = false;
    IndexInput freqIn = null;
    IndexInput proxIn = null;
    try {
      freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
                           ioContext);
      CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT);
      // TODO: hasProx should (somehow!) become codec private,
      // but it's tricky because 1) FIS.hasProx is global (it
      // could be all fields that have prox are written by a
      // different codec), 2) the field may have had prox in
      // the past but all docs w/ that field were deleted.
      // Really we'd need to init prxOut lazily on write, and
      // then somewhere record that we actually wrote it so we
      // know whether to open on read:
      if (fieldInfos.hasProx()) {
        proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION),
                             ioContext);
        CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT);
      } else {
        proxIn = null;
      }
      this.freqIn = freqIn;
      this.proxIn = proxIn;
      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(freqIn, proxIn);
      }
    }
  }

  @Override
  public void init(IndexInput termsIn) throws IOException {

    // Make sure we are talking to the matching past writer
    CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT);

    skipInterval = termsIn.readInt();
    maxSkipLevels = termsIn.readInt();
    skipMinimum = termsIn.readInt();
  }

  // Must keep final because we do non-standard clone
  private final static class StandardTermState extends BlockTermState {
    long freqOffset;
    long proxOffset;
    long skipOffset;

    @Override
    public StandardTermState clone() {
      StandardTermState other = new StandardTermState();
      other.copyFrom(this);
      return other;
    }

    @Override
    public void copyFrom(TermState _other) {
      super.copyFrom(_other);
      StandardTermState other = (StandardTermState) _other;
      freqOffset = other.freqOffset;
      proxOffset = other.proxOffset;
      skipOffset = other.skipOffset;
    }

    @Override
    public String toString() {
      return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset;
    }
  }

  @Override
  public BlockTermState newTermState() {
    return new StandardTermState();
  }

  @Override
  public void close() throws IOException {
    try {
      if (freqIn != null) {
        freqIn.close();
      }
    } finally {
      if (proxIn != null) {
        proxIn.close();
      }
    }
  }

  @Override
  public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
    throws IOException {
    final StandardTermState termState = (StandardTermState) _termState;
    // if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
    final boolean isFirstTerm = termState.termBlockOrd == 0;
    if (absolute) {
      termState.freqOffset = 0;
      termState.proxOffset = 0;
    }

    termState.freqOffset += in.readVLong();
    /*
    if (DEBUG) {
      System.out.println("  dF=" + termState.docFreq);
      System.out.println("  freqFP=" + termState.freqOffset);
    }
    */
    assert termState.freqOffset < freqIn.length();

    if (termState.docFreq >= skipMinimum) {
      termState.skipOffset = in.readVLong();
      // if (DEBUG) System.out.println("  skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
      assert termState.freqOffset + termState.skipOffset < freqIn.length();
    } else {
      // undefined
    }

    if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
      termState.proxOffset += in.readVLong();
      // if (DEBUG) System.out.println("  proxFP=" + termState.proxOffset);
    }
  }
    
  @Override
  public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
    if (canReuse(reuse, liveDocs)) {
      // if (DEBUG) System.out.println("SPR.docs ts=" + termState);
      return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
    }
    return newDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
  }
  
  private boolean canReuse(DocsEnum reuse, Bits liveDocs) {
    if (reuse != null && (reuse instanceof SegmentDocsEnumBase)) {
      SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase) reuse;
      // If you are using ParellelReader, and pass in a
      // reused DocsEnum, it could have come from another
      // reader also using standard codec
      if (docsEnum.startFreqIn == freqIn) {
        // we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs
        return liveDocs == docsEnum.liveDocs;
      }
    }
    return false;
  }
  
  private DocsEnum newDocsEnum(Bits liveDocs, FieldInfo fieldInfo, StandardTermState termState) throws IOException {
    if (liveDocs == null) {
      return new AllDocsSegmentDocsEnum(freqIn).reset(fieldInfo, termState);
    } else {
      return new LiveDocsSegmentDocsEnum(freqIn, liveDocs).reset(fieldInfo, termState);
    }
  }

  @Override
  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
                                               DocsAndPositionsEnum reuse, int flags)
    throws IOException {

    boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

    // TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS
    // isn't passed?

    // TODO: refactor
    if (fieldInfo.hasPayloads() || hasOffsets) {
      SegmentFullPositionsEnum docsEnum;
      if (reuse == null || !(reuse instanceof SegmentFullPositionsEnum)) {
        docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
      } else {
        docsEnum = (SegmentFullPositionsEnum) reuse;
        if (docsEnum.startFreqIn != freqIn) {
          // If you are using ParellelReader, and pass in a
          // reused DocsEnum, it could have come from another
          // reader also using standard codec
          docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
        }
      }
      return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
    } else {
      SegmentDocsAndPositionsEnum docsEnum;
      if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
        docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
      } else {
        docsEnum = (SegmentDocsAndPositionsEnum) reuse;
        if (docsEnum.startFreqIn != freqIn) {
          // If you are using ParellelReader, and pass in a
          // reused DocsEnum, it could have come from another
          // reader also using standard codec
          docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
        }
      }
      return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
    }
  }

  static final int BUFFERSIZE = 64;
  
  private abstract class SegmentDocsEnumBase extends DocsEnum {
    
    protected final int[] docs = new int[BUFFERSIZE];
    protected final int[] freqs = new int[BUFFERSIZE];
    
    final IndexInput freqIn; // reuse
    final IndexInput startFreqIn; // reuse
    Lucene40SkipListReader skipper; // reuse - lazy loaded
    
    protected boolean indexOmitsTF;                               // does current field omit term freq?
    protected boolean storePayloads;                        // does current field store payloads?
    protected boolean storeOffsets;                         // does current field store offsets?

    protected int limit;                                    // number of docs in this posting
    protected int ord;                                      // how many docs we've read
    protected int doc;                                 // doc we last read
    protected int accum;                                    // accumulator for doc deltas
    protected int freq;                                     // freq we last read
    protected int maxBufferedDocId;
    
    protected int start;
    protected int count;


    protected long freqOffset;
    protected long skipOffset;

    protected boolean skipped;
    protected final Bits liveDocs;
    
    SegmentDocsEnumBase(IndexInput startFreqIn, Bits liveDocs) {
      this.startFreqIn = startFreqIn;
      this.freqIn = startFreqIn.clone();
      this.liveDocs = liveDocs;
      
    }
    
    
    DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
      indexOmitsTF = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY;
      storePayloads = fieldInfo.hasPayloads();
      storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      freqOffset = termState.freqOffset;
      skipOffset = termState.skipOffset;

      // TODO: for full enum case (eg segment merging) this
      // seek is unnecessary; maybe we can avoid in such
      // cases
      freqIn.seek(termState.freqOffset);
      limit = termState.docFreq;
      assert limit > 0;
      ord = 0;
      doc = -1;
      accum = 0;
      // if (DEBUG) System.out.println("  sde limit=" + limit + " freqFP=" + freqOffset);
      skipped = false;

      start = -1;
      count = 0;
      freq = 1;
      if (indexOmitsTF) {
        Arrays.fill(freqs, 1);
      }
      maxBufferedDocId = -1;
      return this;
    }
    
    @Override
    public final int freq() {
      return freq;
    }

    @Override
    public final int docID() {
      return doc;
    }
    
    @Override
    public final int advance(int target) throws IOException {
      // last doc in our buffer is >= target, binary search + next()
      if (++start < count && maxBufferedDocId >= target) {
        if ((count-start) > 32) { // 32 seemed to be a sweetspot here so use binsearch if the pending results are a lot
          start = binarySearch(count - 1, start, target, docs);
          return nextDoc();
        } else {
          return linearScan(target);
        }
      }
      
      start = count; // buffer is consumed
      
      return doc = skipTo(target);
    }
    
    private final int binarySearch(int hi, int low, int target, int[] docs) {
      while (low <= hi) {
        int mid = (hi + low) >>> 1;
        int doc = docs[mid];
        if (doc < target) {
          low = mid + 1;
        } else if (doc > target) {
          hi = mid - 1;
        } else {
          low = mid;
          break;
        }
      }
      return low-1;
    }
    
    final int readFreq(final IndexInput freqIn, final int code)
        throws IOException {
      if ((code & 1) != 0) { // if low bit is set
        return 1; // freq is one
      } else {
        return freqIn.readVInt(); // else read freq
      }
    }
    
    protected abstract int linearScan(int scanTo) throws IOException;
    
    protected abstract int scanTo(int target) throws IOException;

    protected final int refill() throws IOException {
      final int doc = nextUnreadDoc();
      count = 0;
      start = -1;
      if (doc == NO_MORE_DOCS) {
        return NO_MORE_DOCS;
      }
      final int numDocs = Math.min(docs.length, limit - ord);
      ord += numDocs;
      if (indexOmitsTF) {
        count = fillDocs(numDocs);
      } else {
        count = fillDocsAndFreqs(numDocs);
      }
      maxBufferedDocId = count > 0 ? docs[count-1] : NO_MORE_DOCS;
      return doc;
    }
    

    protected abstract int nextUnreadDoc() throws IOException;


    private final int fillDocs(int size) throws IOException {
      final IndexInput freqIn = this.freqIn;
      final int docs[] = this.docs;
      int docAc = accum;
      for (int i = 0; i < size; i++) {
        docAc += freqIn.readVInt();
        docs[i] = docAc;
      }
      accum = docAc;
      return size;
    }
    
    private final int fillDocsAndFreqs(int size) throws IOException {
      final IndexInput freqIn = this.freqIn;
      final int docs[] = this.docs;
      final int freqs[] = this.freqs;
      int docAc = accum;
      for (int i = 0; i < size; i++) {
        final int code = freqIn.readVInt();
        docAc += code >>> 1; // shift off low bit
        freqs[i] = readFreq(freqIn, code);
        docs[i] = docAc;
      }
      accum = docAc;
      return size;
     
    }

    private final int skipTo(int target) throws IOException {
      if ((target - skipInterval) >= accum && limit >= skipMinimum) {

        // There are enough docs in the posting to have
        // skip data, and it isn't too close.

        if (skipper == null) {
          // This is the first time this enum has ever been used for skipping -- do lazy init
          skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
        }

        if (!skipped) {

          // This is the first time this posting has
          // skipped since reset() was called, so now we
          // load the skip data for this posting

          skipper.init(freqOffset + skipOffset,
                       freqOffset, 0,
                       limit, storePayloads, storeOffsets);

          skipped = true;
        }

        final int newOrd = skipper.skipTo(target); 

        if (newOrd > ord) {
          // Skipper moved

          ord = newOrd;
          accum = skipper.getDoc();
          freqIn.seek(skipper.getFreqPointer());
        }
      }
      return scanTo(target);
    }
    
    @Override
    public long cost() {
      return limit;
    }
  }
  
  private final class AllDocsSegmentDocsEnum extends SegmentDocsEnumBase {

    AllDocsSegmentDocsEnum(IndexInput startFreqIn) {
      super(startFreqIn, null);
      assert liveDocs == null;
    }
    
    @Override
    public final int nextDoc() throws IOException {
      if (++start < count) {
        freq = freqs[start];
        return doc = docs[start];
      }
      return doc = refill();
    }
    

    @Override
    protected final int linearScan(int scanTo) throws IOException {
      final int[] docs = this.docs;
      final int upTo = count;
      for (int i = start; i < upTo; i++) {
        final int d = docs[i];
        if (scanTo <= d) {
          start = i;
          freq = freqs[i];
          return doc = docs[i];
        }
      }
      return doc = refill();
    }

    @Override
    protected int scanTo(int target) throws IOException { 
      int docAcc = accum;
      int frq = 1;
      final IndexInput freqIn = this.freqIn;
      final boolean omitTF = indexOmitsTF;
      final int loopLimit = limit;
      for (int i = ord; i < loopLimit; i++) {
        int code = freqIn.readVInt();
        if (omitTF) {
          docAcc += code;
        } else {
          docAcc += code >>> 1; // shift off low bit
          frq = readFreq(freqIn, code);
        }
        if (docAcc >= target) {
          freq = frq;
          ord = i + 1;
          return accum = docAcc;
        }
      }
      ord = limit;
      freq = frq;
      accum = docAcc;
      return NO_MORE_DOCS;
    }

    @Override
    protected final int nextUnreadDoc() throws IOException {
      if (ord++ < limit) {
        int code = freqIn.readVInt();
        if (indexOmitsTF) {
          accum += code;
        } else {
          accum += code >>> 1; // shift off low bit
          freq = readFreq(freqIn, code);
        }
        return accum;
      } else {
        return NO_MORE_DOCS;
      }
    }
    
  }
  
  private final class LiveDocsSegmentDocsEnum extends SegmentDocsEnumBase {

    LiveDocsSegmentDocsEnum(IndexInput startFreqIn, Bits liveDocs) {
      super(startFreqIn, liveDocs);
      assert liveDocs != null;
    }
    
    @Override
    public final int nextDoc() throws IOException {
      final Bits liveDocs = this.liveDocs;
      for (int i = start+1; i < count; i++) {
        int d = docs[i];
        if (liveDocs.get(d)) {
          start = i;
          freq = freqs[i];
          return doc = d;
        }
      }
      start = count;
      return doc = refill();
    }

    @Override
    protected final int linearScan(int scanTo) throws IOException {
      final int[] docs = this.docs;
      final int upTo = count;
      final Bits liveDocs = this.liveDocs;
      for (int i = start; i < upTo; i++) {
        int d = docs[i];
        if (scanTo <= d && liveDocs.get(d)) {
          start = i;
          freq = freqs[i];
          return doc = docs[i];
        }
      }
      return doc = refill();
    }
    
    @Override
    protected int scanTo(int target) throws IOException { 
      int docAcc = accum;
      int frq = 1;
      final IndexInput freqIn = this.freqIn;
      final boolean omitTF = indexOmitsTF;
      final int loopLimit = limit;
      final Bits liveDocs = this.liveDocs;
      for (int i = ord; i < loopLimit; i++) {
        int code = freqIn.readVInt();
        if (omitTF) {
          docAcc += code;
        } else {
          docAcc += code >>> 1; // shift off low bit
          frq = readFreq(freqIn, code);
        }
        if (docAcc >= target && liveDocs.get(docAcc)) {
          freq = frq;
          ord = i + 1;
          return accum = docAcc;
        }
      }
      ord = limit;
      freq = frq;
      accum = docAcc;
      return NO_MORE_DOCS;
    }

    @Override
    protected final int nextUnreadDoc() throws IOException {
      int docAcc = accum;
      int frq = 1;
      final IndexInput freqIn = this.freqIn;
      final boolean omitTF = indexOmitsTF;
      final int loopLimit = limit;
      final Bits liveDocs = this.liveDocs;
      for (int i = ord; i < loopLimit; i++) {
        int code = freqIn.readVInt();
        if (omitTF) {
          docAcc += code;
        } else {
          docAcc += code >>> 1; // shift off low bit
          frq = readFreq(freqIn, code);
        }
        if (liveDocs.get(docAcc)) {
          freq = frq;
          ord = i + 1;
          return accum = docAcc;
        }
      }
      ord = limit;
      freq = frq;
      accum = docAcc;
      return NO_MORE_DOCS;
      
    }
  }
  
  // TODO specialize DocsAndPosEnum too
  
  // Decodes docs & positions. payloads nor offsets are present.
  private final class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
    final IndexInput startFreqIn;
    private final IndexInput freqIn;
    private final IndexInput proxIn;
    int limit;                                    // number of docs in this posting
    int ord;                                      // how many docs we've read
    int doc = -1;                                 // doc we last read
    int accum;                                    // accumulator for doc deltas
    int freq;                                     // freq we last read
    int position;

    Bits liveDocs;

    long freqOffset;
    long skipOffset;
    long proxOffset;

    int posPendingCount;

    boolean skipped;
    Lucene40SkipListReader skipper;
    private long lazyProxPointer;

    public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
      startFreqIn = freqIn;
      this.freqIn = freqIn.clone();
      this.proxIn = proxIn.clone();
    }

    public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
      assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
      assert !fieldInfo.hasPayloads();

      this.liveDocs = liveDocs;

      // TODO: for full enum case (eg segment merging) this
      // seek is unnecessary; maybe we can avoid in such
      // cases
      freqIn.seek(termState.freqOffset);
      lazyProxPointer = termState.proxOffset;

      limit = termState.docFreq;
      assert limit > 0;

      ord = 0;
      doc = -1;
      accum = 0;
      position = 0;

      skipped = false;
      posPendingCount = 0;

      freqOffset = termState.freqOffset;
      proxOffset = termState.proxOffset;
      skipOffset = termState.skipOffset;
      // if (DEBUG) System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);

      return this;
    }

    @Override
    public int nextDoc() throws IOException {
      // if (DEBUG) System.out.println("SPR.nextDoc seg=" + segment + " freqIn.fp=" + freqIn.getFilePointer());
      while(true) {
        if (ord == limit) {
          // if (DEBUG) System.out.println("  return END");
          return doc = NO_MORE_DOCS;
        }

        ord++;

        // Decode next doc/freq pair
        final int code = freqIn.readVInt();

        accum += code >>> 1;              // shift off low bit
        if ((code & 1) != 0) {          // if low bit is set
          freq = 1;                     // freq is one
        } else {
          freq = freqIn.readVInt();     // else read freq
        }
        posPendingCount += freq;

        if (liveDocs == null || liveDocs.get(accum)) {
          break;
        }
      }

      position = 0;

      // if (DEBUG) System.out.println("  return doc=" + doc);
      return (doc = accum);
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int freq() {
      return freq;
    }

    @Override
    public int advance(int target) throws IOException {

      //System.out.println("StandardR.D&PE advance target=" + target);

      if ((target - skipInterval) >= doc && limit >= skipMinimum) {

        // There are enough docs in the posting to have
        // skip data, and it isn't too close

        if (skipper == null) {
          // This is the first time this enum has ever been used for skipping -- do lazy init
          skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
        }

        if (!skipped) {

          // This is the first time this posting has
          // skipped, since reset() was called, so now we
          // load the skip data for this posting

          skipper.init(freqOffset+skipOffset,
                       freqOffset, proxOffset,
                       limit, false, false);

          skipped = true;
        }

        final int newOrd = skipper.skipTo(target); 

        if (newOrd > ord) {
          // Skipper moved
          ord = newOrd;
          doc = accum = skipper.getDoc();
          freqIn.seek(skipper.getFreqPointer());
          lazyProxPointer = skipper.getProxPointer();
          posPendingCount = 0;
          position = 0;
        }
      }
        
      // Now, linear scan for the rest:
      do {
        nextDoc();
      } while (target > doc);

      return doc;
    }

    @Override
    public int nextPosition() throws IOException {

      if (lazyProxPointer != -1) {
        proxIn.seek(lazyProxPointer);
        lazyProxPointer = -1;
      }

      // scan over any docs that were iterated without their positions
      if (posPendingCount > freq) {
        position = 0;
        while(posPendingCount != freq) {
          if ((proxIn.readByte() & 0x80) == 0) {
            posPendingCount--;
          }
        }
      }

      position += proxIn.readVInt();

      posPendingCount--;

      assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;

      return position;
    }

    @Override
    public int startOffset() {
      return -1;
    }

    @Override
    public int endOffset() {
      return -1;
    }

    /** Returns the payload at this position, or null if no
     *  payload was indexed. */
    @Override
    public BytesRef getPayload() throws IOException {
      return null;
    }
    
    @Override
    public long cost() {
      return limit;
    }
  }
  
  // Decodes docs & positions & (payloads and/or offsets)
  private class SegmentFullPositionsEnum extends DocsAndPositionsEnum {
    final IndexInput startFreqIn;
    private final IndexInput freqIn;
    private final IndexInput proxIn;

    int limit;                                    // number of docs in this posting
    int ord;                                      // how many docs we've read
    int doc = -1;                                 // doc we last read
    int accum;                                    // accumulator for doc deltas
    int freq;                                     // freq we last read
    int position;

    Bits liveDocs;

    long freqOffset;
    long skipOffset;
    long proxOffset;

    int posPendingCount;
    int payloadLength;
    boolean payloadPending;

    boolean skipped;
    Lucene40SkipListReader skipper;
    private BytesRef payload;
    private long lazyProxPointer;
    
    boolean storePayloads;
    boolean storeOffsets;
    
    int offsetLength;
    int startOffset;

    public SegmentFullPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
      startFreqIn = freqIn;
      this.freqIn = freqIn.clone();
      this.proxIn = proxIn.clone();
    }

    public SegmentFullPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
      storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      storePayloads = fieldInfo.hasPayloads();
      assert fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      assert storePayloads || storeOffsets;
      if (payload == null) {
        payload = new BytesRef();
        payload.bytes = new byte[1];
      }

      this.liveDocs = liveDocs;

      // TODO: for full enum case (eg segment merging) this
      // seek is unnecessary; maybe we can avoid in such
      // cases
      freqIn.seek(termState.freqOffset);
      lazyProxPointer = termState.proxOffset;

      limit = termState.docFreq;
      ord = 0;
      doc = -1;
      accum = 0;
      position = 0;
      startOffset = 0;

      skipped = false;
      posPendingCount = 0;
      payloadPending = false;

      freqOffset = termState.freqOffset;
      proxOffset = termState.proxOffset;
      skipOffset = termState.skipOffset;
      //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this);

      return this;
    }

    @Override
    public int nextDoc() throws IOException {
      while(true) {
        if (ord == limit) {
          //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
          return doc = NO_MORE_DOCS;
        }

        ord++;

        // Decode next doc/freq pair
        final int code = freqIn.readVInt();

        accum += code >>> 1; // shift off low bit
        if ((code & 1) != 0) { // if low bit is set
          freq = 1; // freq is one
        } else {
          freq = freqIn.readVInt(); // else read freq
        }
        posPendingCount += freq;

        if (liveDocs == null || liveDocs.get(accum)) {
          break;
        }
      }

      position = 0;
      startOffset = 0;

      //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
      return (doc = accum);
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int freq() throws IOException {
      return freq;
    }

    @Override
    public int advance(int target) throws IOException {

      //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);

      if ((target - skipInterval) >= doc && limit >= skipMinimum) {

        // There are enough docs in the posting to have
        // skip data, and it isn't too close

        if (skipper == null) {
          // This is the first time this enum has ever been used for skipping -- do lazy init
          skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
        }

        if (!skipped) {

          // This is the first time this posting has
          // skipped, since reset() was called, so now we
          // load the skip data for this posting
          //System.out.println("  init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length());
          skipper.init(freqOffset+skipOffset,
                       freqOffset, proxOffset,
                       limit, storePayloads, storeOffsets);

          skipped = true;
        }

        final int newOrd = skipper.skipTo(target); 

        if (newOrd > ord) {
          // Skipper moved
          ord = newOrd;
          doc = accum = skipper.getDoc();
          freqIn.seek(skipper.getFreqPointer());
          lazyProxPointer = skipper.getProxPointer();
          posPendingCount = 0;
          position = 0;
          startOffset = 0;
          payloadPending = false;
          payloadLength = skipper.getPayloadLength();
          offsetLength = skipper.getOffsetLength();
        }
      }
        
      // Now, linear scan for the rest:
      do {
        nextDoc();
      } while (target > doc);

      return doc;
    }

    @Override
    public int nextPosition() throws IOException {

      if (lazyProxPointer != -1) {
        proxIn.seek(lazyProxPointer);
        lazyProxPointer = -1;
      }
      
      if (payloadPending && payloadLength > 0) {
        // payload of last position was never retrieved -- skip it
        proxIn.seek(proxIn.getFilePointer() + payloadLength);
        payloadPending = false;
      }

      // scan over any docs that were iterated without their positions
      while(posPendingCount > freq) {
        final int code = proxIn.readVInt();

        if (storePayloads) {
          if ((code & 1) != 0) {
            // new payload length
            payloadLength = proxIn.readVInt();
            assert payloadLength >= 0;
          }
          assert payloadLength != -1;
        }
        
        if (storeOffsets) {
          if ((proxIn.readVInt() & 1) != 0) {
            // new offset length
            offsetLength = proxIn.readVInt();
          }
        }
        
        if (storePayloads) {
          proxIn.seek(proxIn.getFilePointer() + payloadLength);
        }

        posPendingCount--;
        position = 0;
        startOffset = 0;
        payloadPending = false;
        //System.out.println("StandardR.D&PE skipPos");
      }

      // read next position
      if (payloadPending && payloadLength > 0) {
        // payload wasn't retrieved for last position
        proxIn.seek(proxIn.getFilePointer()+payloadLength);
      }

      int code = proxIn.readVInt();
      if (storePayloads) {
        if ((code & 1) != 0) {
          // new payload length
          payloadLength = proxIn.readVInt();
          assert payloadLength >= 0;
        }
        assert payloadLength != -1;
          
        payloadPending = true;
        code >>>= 1;
      }
      position += code;
      
      if (storeOffsets) {
        int offsetCode = proxIn.readVInt();
        if ((offsetCode & 1) != 0) {
          // new offset length
          offsetLength = proxIn.readVInt();
        }
        startOffset += offsetCode >>> 1;
      }

      posPendingCount--;

      assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;

      //System.out.println("StandardR.D&PE nextPos   return pos=" + position);
      return position;
    }

    @Override
    public int startOffset() throws IOException {
      return storeOffsets ? startOffset : -1;
    }

    @Override
    public int endOffset() throws IOException {
      return storeOffsets ? startOffset + offsetLength : -1;
    }

    /** Returns the payload at this position, or null if no
     *  payload was indexed. */
    @Override
    public BytesRef getPayload() throws IOException {
      if (storePayloads) {
        if (payloadLength <= 0) {
          return null;
        }
        assert lazyProxPointer == -1;
        assert posPendingCount < freq;
        
        if (payloadPending) {
          if (payloadLength > payload.bytes.length) {
            payload.grow(payloadLength);
          }

          proxIn.readBytes(payload.bytes, 0, payloadLength);
          payload.length = payloadLength;
          payloadPending = false;
        }

        return payload;
      } else {
        return null;
      }
    }
    
    @Override
    public long cost() {
      return limit;
    }
  }

  @Override
  public long ramBytesUsed() {
    return 0;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy