org.apache.lucene.codecs.lucene40.Lucene40PostingsReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* COPIED FROM APACHE LUCENE 4.7.2
*
* Git URL: [email protected]:apache/lucene.git, tag: releases/lucene-solr/4.7.2, path: lucene/core/src/java
*
* (see https://issues.apache.org/jira/browse/OAK-10786 for details)
*/
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* Concrete class that reads the 4.0 frq/prox
* postings format.
*
* @see Lucene40PostingsFormat
* @deprecated Only for reading old 4.0 segments */
@Deprecated
public class Lucene40PostingsReader extends PostingsReaderBase {
final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
final static String PRX_CODEC = "Lucene40PostingsWriterPrx";
//private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
// Increment version to change it:
final static int VERSION_START = 0;
final static int VERSION_LONG_SKIP = 1;
final static int VERSION_CURRENT = VERSION_LONG_SKIP;
private final IndexInput freqIn;
private final IndexInput proxIn;
// public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
int skipInterval;
int maxSkipLevels;
int skipMinimum;
// private String segment;
/** Sole constructor. */
public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
boolean success = false;
IndexInput freqIn = null;
IndexInput proxIn = null;
try {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
ioContext);
CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT);
// TODO: hasProx should (somehow!) become codec private,
// but it's tricky because 1) FIS.hasProx is global (it
// could be all fields that have prox are written by a
// different codec), 2) the field may have had prox in
// the past but all docs w/ that field were deleted.
// Really we'd need to init prxOut lazily on write, and
// then somewhere record that we actually wrote it so we
// know whether to open on read:
if (fieldInfos.hasProx()) {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION),
ioContext);
CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT);
} else {
proxIn = null;
}
this.freqIn = freqIn;
this.proxIn = proxIn;
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(freqIn, proxIn);
}
}
}
@Override
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
skipMinimum = termsIn.readInt();
}
// Must keep final because we do non-standard clone
private final static class StandardTermState extends BlockTermState {
long freqOffset;
long proxOffset;
long skipOffset;
@Override
public StandardTermState clone() {
StandardTermState other = new StandardTermState();
other.copyFrom(this);
return other;
}
@Override
public void copyFrom(TermState _other) {
super.copyFrom(_other);
StandardTermState other = (StandardTermState) _other;
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
}
@Override
public String toString() {
return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset;
}
}
@Override
public BlockTermState newTermState() {
return new StandardTermState();
}
@Override
public void close() throws IOException {
try {
if (freqIn != null) {
freqIn.close();
}
} finally {
if (proxIn != null) {
proxIn.close();
}
}
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
// if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
final boolean isFirstTerm = termState.termBlockOrd == 0;
if (absolute) {
termState.freqOffset = 0;
termState.proxOffset = 0;
}
termState.freqOffset += in.readVLong();
/*
if (DEBUG) {
System.out.println(" dF=" + termState.docFreq);
System.out.println(" freqFP=" + termState.freqOffset);
}
*/
assert termState.freqOffset < freqIn.length();
if (termState.docFreq >= skipMinimum) {
termState.skipOffset = in.readVLong();
// if (DEBUG) System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
} else {
// undefined
}
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
termState.proxOffset += in.readVLong();
// if (DEBUG) System.out.println(" proxFP=" + termState.proxOffset);
}
}
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (canReuse(reuse, liveDocs)) {
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
}
return newDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
}
private boolean canReuse(DocsEnum reuse, Bits liveDocs) {
if (reuse != null && (reuse instanceof SegmentDocsEnumBase)) {
SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase) reuse;
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
if (docsEnum.startFreqIn == freqIn) {
// we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs
return liveDocs == docsEnum.liveDocs;
}
}
return false;
}
private DocsEnum newDocsEnum(Bits liveDocs, FieldInfo fieldInfo, StandardTermState termState) throws IOException {
if (liveDocs == null) {
return new AllDocsSegmentDocsEnum(freqIn).reset(fieldInfo, termState);
} else {
return new LiveDocsSegmentDocsEnum(freqIn, liveDocs).reset(fieldInfo, termState);
}
}
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
DocsAndPositionsEnum reuse, int flags)
throws IOException {
boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
// TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS
// isn't passed?
// TODO: refactor
if (fieldInfo.hasPayloads() || hasOffsets) {
SegmentFullPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentFullPositionsEnum)) {
docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
} else {
docsEnum = (SegmentFullPositionsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
}
}
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
} else {
SegmentDocsAndPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
} else {
docsEnum = (SegmentDocsAndPositionsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
}
}
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
}
}
static final int BUFFERSIZE = 64;
private abstract class SegmentDocsEnumBase extends DocsEnum {
protected final int[] docs = new int[BUFFERSIZE];
protected final int[] freqs = new int[BUFFERSIZE];
final IndexInput freqIn; // reuse
final IndexInput startFreqIn; // reuse
Lucene40SkipListReader skipper; // reuse - lazy loaded
protected boolean indexOmitsTF; // does current field omit term freq?
protected boolean storePayloads; // does current field store payloads?
protected boolean storeOffsets; // does current field store offsets?
protected int limit; // number of docs in this posting
protected int ord; // how many docs we've read
protected int doc; // doc we last read
protected int accum; // accumulator for doc deltas
protected int freq; // freq we last read
protected int maxBufferedDocId;
protected int start;
protected int count;
protected long freqOffset;
protected long skipOffset;
protected boolean skipped;
protected final Bits liveDocs;
SegmentDocsEnumBase(IndexInput startFreqIn, Bits liveDocs) {
this.startFreqIn = startFreqIn;
this.freqIn = startFreqIn.clone();
this.liveDocs = liveDocs;
}
DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
indexOmitsTF = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.hasPayloads();
storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
freqOffset = termState.freqOffset;
skipOffset = termState.skipOffset;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = -1;
accum = 0;
// if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
start = -1;
count = 0;
freq = 1;
if (indexOmitsTF) {
Arrays.fill(freqs, 1);
}
maxBufferedDocId = -1;
return this;
}
@Override
public final int freq() {
return freq;
}
@Override
public final int docID() {
return doc;
}
@Override
public final int advance(int target) throws IOException {
// last doc in our buffer is >= target, binary search + next()
if (++start < count && maxBufferedDocId >= target) {
if ((count-start) > 32) { // 32 seemed to be a sweetspot here so use binsearch if the pending results are a lot
start = binarySearch(count - 1, start, target, docs);
return nextDoc();
} else {
return linearScan(target);
}
}
start = count; // buffer is consumed
return doc = skipTo(target);
}
private final int binarySearch(int hi, int low, int target, int[] docs) {
while (low <= hi) {
int mid = (hi + low) >>> 1;
int doc = docs[mid];
if (doc < target) {
low = mid + 1;
} else if (doc > target) {
hi = mid - 1;
} else {
low = mid;
break;
}
}
return low-1;
}
final int readFreq(final IndexInput freqIn, final int code)
throws IOException {
if ((code & 1) != 0) { // if low bit is set
return 1; // freq is one
} else {
return freqIn.readVInt(); // else read freq
}
}
protected abstract int linearScan(int scanTo) throws IOException;
protected abstract int scanTo(int target) throws IOException;
protected final int refill() throws IOException {
final int doc = nextUnreadDoc();
count = 0;
start = -1;
if (doc == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
final int numDocs = Math.min(docs.length, limit - ord);
ord += numDocs;
if (indexOmitsTF) {
count = fillDocs(numDocs);
} else {
count = fillDocsAndFreqs(numDocs);
}
maxBufferedDocId = count > 0 ? docs[count-1] : NO_MORE_DOCS;
return doc;
}
protected abstract int nextUnreadDoc() throws IOException;
private final int fillDocs(int size) throws IOException {
final IndexInput freqIn = this.freqIn;
final int docs[] = this.docs;
int docAc = accum;
for (int i = 0; i < size; i++) {
docAc += freqIn.readVInt();
docs[i] = docAc;
}
accum = docAc;
return size;
}
private final int fillDocsAndFreqs(int size) throws IOException {
final IndexInput freqIn = this.freqIn;
final int docs[] = this.docs;
final int freqs[] = this.freqs;
int docAc = accum;
for (int i = 0; i < size; i++) {
final int code = freqIn.readVInt();
docAc += code >>> 1; // shift off low bit
freqs[i] = readFreq(freqIn, code);
docs[i] = docAc;
}
accum = docAc;
return size;
}
private final int skipTo(int target) throws IOException {
if ((target - skipInterval) >= accum && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset + skipOffset,
freqOffset, 0,
limit, storePayloads, storeOffsets);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
}
}
return scanTo(target);
}
@Override
public long cost() {
return limit;
}
}
private final class AllDocsSegmentDocsEnum extends SegmentDocsEnumBase {
AllDocsSegmentDocsEnum(IndexInput startFreqIn) {
super(startFreqIn, null);
assert liveDocs == null;
}
@Override
public final int nextDoc() throws IOException {
if (++start < count) {
freq = freqs[start];
return doc = docs[start];
}
return doc = refill();
}
@Override
protected final int linearScan(int scanTo) throws IOException {
final int[] docs = this.docs;
final int upTo = count;
for (int i = start; i < upTo; i++) {
final int d = docs[i];
if (scanTo <= d) {
start = i;
freq = freqs[i];
return doc = docs[i];
}
}
return doc = refill();
}
@Override
protected int scanTo(int target) throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (docAcc >= target) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
@Override
protected final int nextUnreadDoc() throws IOException {
if (ord++ < limit) {
int code = freqIn.readVInt();
if (indexOmitsTF) {
accum += code;
} else {
accum += code >>> 1; // shift off low bit
freq = readFreq(freqIn, code);
}
return accum;
} else {
return NO_MORE_DOCS;
}
}
}
private final class LiveDocsSegmentDocsEnum extends SegmentDocsEnumBase {
LiveDocsSegmentDocsEnum(IndexInput startFreqIn, Bits liveDocs) {
super(startFreqIn, liveDocs);
assert liveDocs != null;
}
@Override
public final int nextDoc() throws IOException {
final Bits liveDocs = this.liveDocs;
for (int i = start+1; i < count; i++) {
int d = docs[i];
if (liveDocs.get(d)) {
start = i;
freq = freqs[i];
return doc = d;
}
}
start = count;
return doc = refill();
}
@Override
protected final int linearScan(int scanTo) throws IOException {
final int[] docs = this.docs;
final int upTo = count;
final Bits liveDocs = this.liveDocs;
for (int i = start; i < upTo; i++) {
int d = docs[i];
if (scanTo <= d && liveDocs.get(d)) {
start = i;
freq = freqs[i];
return doc = docs[i];
}
}
return doc = refill();
}
@Override
protected int scanTo(int target) throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
final Bits liveDocs = this.liveDocs;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (docAcc >= target && liveDocs.get(docAcc)) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
@Override
protected final int nextUnreadDoc() throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
final Bits liveDocs = this.liveDocs;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (liveDocs.get(docAcc)) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
}
// TODO specialize DocsAndPosEnum too
// Decodes docs & positions. payloads nor offsets are present.
private final class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
final IndexInput startFreqIn;
private final IndexInput freqIn;
private final IndexInput proxIn;
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc = -1; // doc we last read
int accum; // accumulator for doc deltas
int freq; // freq we last read
int position;
Bits liveDocs;
long freqOffset;
long skipOffset;
long proxOffset;
int posPendingCount;
boolean skipped;
Lucene40SkipListReader skipper;
private long lazyProxPointer;
public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
startFreqIn = freqIn;
this.freqIn = freqIn.clone();
this.proxIn = proxIn.clone();
}
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert !fieldInfo.hasPayloads();
this.liveDocs = liveDocs;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = -1;
accum = 0;
position = 0;
skipped = false;
posPendingCount = 0;
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
// if (DEBUG) System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
@Override
public int nextDoc() throws IOException {
// if (DEBUG) System.out.println("SPR.nextDoc seg=" + segment + " freqIn.fp=" + freqIn.getFilePointer());
while(true) {
if (ord == limit) {
// if (DEBUG) System.out.println(" return END");
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
// if (DEBUG) System.out.println(" return doc=" + doc);
return (doc = accum);
}
@Override
public int docID() {
return doc;
}
@Override
public int freq() {
return freq;
}
@Override
public int advance(int target) throws IOException {
//System.out.println("StandardR.D&PE advance target=" + target);
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, false, false);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
position = 0;
}
}
// Now, linear scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
@Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
proxIn.seek(lazyProxPointer);
lazyProxPointer = -1;
}
// scan over any docs that were iterated without their positions
if (posPendingCount > freq) {
position = 0;
while(posPendingCount != freq) {
if ((proxIn.readByte() & 0x80) == 0) {
posPendingCount--;
}
}
}
position += proxIn.readVInt();
posPendingCount--;
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
return position;
}
@Override
public int startOffset() {
return -1;
}
@Override
public int endOffset() {
return -1;
}
/** Returns the payload at this position, or null if no
* payload was indexed. */
@Override
public BytesRef getPayload() throws IOException {
return null;
}
@Override
public long cost() {
return limit;
}
}
// Decodes docs & positions & (payloads and/or offsets)
private class SegmentFullPositionsEnum extends DocsAndPositionsEnum {
final IndexInput startFreqIn;
private final IndexInput freqIn;
private final IndexInput proxIn;
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc = -1; // doc we last read
int accum; // accumulator for doc deltas
int freq; // freq we last read
int position;
Bits liveDocs;
long freqOffset;
long skipOffset;
long proxOffset;
int posPendingCount;
int payloadLength;
boolean payloadPending;
boolean skipped;
Lucene40SkipListReader skipper;
private BytesRef payload;
private long lazyProxPointer;
boolean storePayloads;
boolean storeOffsets;
int offsetLength;
int startOffset;
public SegmentFullPositionsEnum(IndexInput freqIn, IndexInput proxIn) {
startFreqIn = freqIn;
this.freqIn = freqIn.clone();
this.proxIn = proxIn.clone();
}
public SegmentFullPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
storePayloads = fieldInfo.hasPayloads();
assert fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
assert storePayloads || storeOffsets;
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[1];
}
this.liveDocs = liveDocs;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
ord = 0;
doc = -1;
accum = 0;
position = 0;
startOffset = 0;
skipped = false;
posPendingCount = 0;
payloadPending = false;
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
//System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this);
return this;
}
@Override
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
//System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
startOffset = 0;
//System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return (doc = accum);
}
@Override
public int docID() {
return doc;
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public int advance(int target) throws IOException {
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader(freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
//System.out.println(" init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length());
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, storePayloads, storeOffsets);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
position = 0;
startOffset = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
offsetLength = skipper.getOffsetLength();
}
}
// Now, linear scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
@Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
proxIn.seek(lazyProxPointer);
lazyProxPointer = -1;
}
if (payloadPending && payloadLength > 0) {
// payload of last position was never retrieved -- skip it
proxIn.seek(proxIn.getFilePointer() + payloadLength);
payloadPending = false;
}
// scan over any docs that were iterated without their positions
while(posPendingCount > freq) {
final int code = proxIn.readVInt();
if (storePayloads) {
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
}
if (storeOffsets) {
if ((proxIn.readVInt() & 1) != 0) {
// new offset length
offsetLength = proxIn.readVInt();
}
}
if (storePayloads) {
proxIn.seek(proxIn.getFilePointer() + payloadLength);
}
posPendingCount--;
position = 0;
startOffset = 0;
payloadPending = false;
//System.out.println("StandardR.D&PE skipPos");
}
// read next position
if (payloadPending && payloadLength > 0) {
// payload wasn't retrieved for last position
proxIn.seek(proxIn.getFilePointer()+payloadLength);
}
int code = proxIn.readVInt();
if (storePayloads) {
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
payloadPending = true;
code >>>= 1;
}
position += code;
if (storeOffsets) {
int offsetCode = proxIn.readVInt();
if ((offsetCode & 1) != 0) {
// new offset length
offsetLength = proxIn.readVInt();
}
startOffset += offsetCode >>> 1;
}
posPendingCount--;
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
//System.out.println("StandardR.D&PE nextPos return pos=" + position);
return position;
}
@Override
public int startOffset() throws IOException {
return storeOffsets ? startOffset : -1;
}
@Override
public int endOffset() throws IOException {
return storeOffsets ? startOffset + offsetLength : -1;
}
/** Returns the payload at this position, or null if no
* payload was indexed. */
@Override
public BytesRef getPayload() throws IOException {
if (storePayloads) {
if (payloadLength <= 0) {
return null;
}
assert lazyProxPointer == -1;
assert posPendingCount < freq;
if (payloadPending) {
if (payloadLength > payload.bytes.length) {
payload.grow(payloadLength);
}
proxIn.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
payloadPending = false;
}
return payload;
} else {
return null;
}
}
@Override
public long cost() {
return limit;
}
}
@Override
public long ramBytesUsed() {
return 0;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy