org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-backward-codecs Show documentation
Show all versions of lucene-backward-codecs Show documentation
Apache Lucene (module: backward-codecs)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene99;
import static org.apache.lucene.backward_codecs.lucene99.ForUtil.BLOCK_SIZE;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.DOC_CODEC;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.MAX_SKIP_LEVELS;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.PAY_CODEC;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.POS_CODEC;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.TERMS_CODEC;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.VERSION_CURRENT;
import static org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.VERSION_START;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
*
* @lucene.experimental
*/
public final class Lucene99PostingsReader extends PostingsReaderBase {
/** Maximum byte size of a postings list to be fully prefetched. */
private static final int MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH = 16_384;
private final IndexInput docIn;
private final IndexInput posIn;
private final IndexInput payIn;
private final int version;
/** Sole constructor. */
public Lucene99PostingsReader(SegmentReadState state) throws IOException {
boolean success = false;
IndexInput docIn = null;
IndexInput posIn = null;
IndexInput payIn = null;
// NOTE: these data files are too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
String docName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.DOC_EXTENSION);
try {
// Postings have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform
// readahead.
docIn = state.directory.openInput(docName, state.context.withReadAdvice(ReadAdvice.NORMAL));
version =
CodecUtil.checkIndexHeader(
docIn,
DOC_CODEC,
VERSION_START,
VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.retrieveChecksum(docIn);
if (state.fieldInfos.hasProx()) {
String proxName =
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.POS_EXTENSION);
posIn = state.directory.openInput(proxName, state.context);
CodecUtil.checkIndexHeader(
posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.retrieveChecksum(posIn);
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
String payName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
Lucene99PostingsFormat.PAY_EXTENSION);
payIn = state.directory.openInput(payName, state.context);
CodecUtil.checkIndexHeader(
payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.retrieveChecksum(payIn);
}
}
this.docIn = docIn;
this.posIn = posIn;
this.payIn = payIn;
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
}
}
}
@Override
public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
// Make sure we are talking to the matching postings writer
CodecUtil.checkIndexHeader(
termsIn,
TERMS_CODEC,
VERSION_START,
VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException(
"index-time BLOCK_SIZE ("
+ indexBlockSize
+ ") != read-time BLOCK_SIZE ("
+ BLOCK_SIZE
+ ")");
}
}
static void prefixSum(long[] buffer, int count, long base) {
buffer[0] += base;
for (int i = 1; i < count; ++i) {
buffer[i] += buffer[i - 1];
}
}
static int findFirstGreater(long[] buffer, int target, int from) {
for (int i = from; i < BLOCK_SIZE; ++i) {
if (buffer[i] >= target) {
return i;
}
}
return BLOCK_SIZE;
}
@Override
public BlockTermState newTermState() {
return new IntBlockTermState();
}
@Override
public void close() throws IOException {
IOUtils.close(docIn, posIn, payIn);
}
@Override
public void decodeTerm(
DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean fieldHasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
final boolean fieldHasPayloads = fieldInfo.hasPayloads();
if (absolute) {
termState.docStartFP = 0;
termState.posStartFP = 0;
termState.payStartFP = 0;
}
final long l = in.readVLong();
if ((l & 0x01) == 0) {
termState.docStartFP += l >>> 1;
if (termState.docFreq == 1) {
termState.singletonDocID = in.readVInt();
} else {
termState.singletonDocID = -1;
}
} else {
assert absolute == false;
assert termState.singletonDocID != -1;
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
}
if (fieldHasPositions) {
termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += in.readVLong();
}
if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVLong();
} else {
termState.lastPosBlockOffset = -1;
}
}
if (termState.docFreq > BLOCK_SIZE) {
termState.skipOffset = in.readVLong();
} else {
termState.skipOffset = -1;
}
}
@Override
public PostingsEnum postings(
FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags)
throws IOException {
boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
if (indexHasPositions == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
BlockDocsEnum docsEnum;
if (reuse instanceof BlockDocsEnum) {
docsEnum = (BlockDocsEnum) reuse;
if (!docsEnum.canReuse(docIn, fieldInfo)) {
docsEnum = new BlockDocsEnum(fieldInfo);
}
} else {
docsEnum = new BlockDocsEnum(fieldInfo);
}
return docsEnum.reset((IntBlockTermState) termState, flags);
} else {
EverythingEnum everythingEnum;
if (reuse instanceof EverythingEnum) {
everythingEnum = (EverythingEnum) reuse;
if (!everythingEnum.canReuse(docIn, fieldInfo)) {
everythingEnum = new EverythingEnum(fieldInfo);
}
} else {
everythingEnum = new EverythingEnum(fieldInfo);
}
return everythingEnum.reset((IntBlockTermState) termState, flags);
}
}
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
throws IOException {
if (state.docFreq <= BLOCK_SIZE) {
// no skip data
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
}
final boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean indexHasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
final boolean indexHasPayloads = fieldInfo.hasPayloads();
if (indexHasPositions == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
}
if (indexHasPositions
&& PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)
&& (indexHasOffsets == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
&& (indexHasPayloads == false
|| PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
}
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
}
final class BlockDocsEnum extends PostingsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private int docBufferUpto;
private Lucene99SkipReader skipper;
private boolean skipped;
private boolean prefetchedSkipData;
final IndexInput startDocIn;
IndexInput docIn;
final boolean indexHasFreq;
final boolean indexHasPos;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
private int docFreq; // number of docs in this posting list
private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
// Where this term's postings start in the .doc file:
private long docTermStartFP;
// Where this term's skip data starts (after
// docTermStartFP) in the .doc file (or -1 if there is
// no skip data for this term):
private long skipOffset;
// docID for next skip point, we won't use skipper if
// target docID is not larger than this
private int nextSkipDoc;
private boolean needsFreq; // true if the caller actually needs frequencies
// as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
// always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
// (needsFreq=false)
private boolean isFreqsRead;
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene99PostingsReader.this.docIn;
this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads();
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn
&& indexHasFreq
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
&& indexHasPos
== (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0)
&& indexHasPayloads == fieldInfo.hasPayloads();
}
public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
docFreq = termState.docFreq;
totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
docTermStartFP = termState.docStartFP;
skipOffset = termState.skipOffset;
singletonDocID = termState.singletonDocID;
if (docFreq > 1) {
if (docIn == null) {
// lazy init
docIn = startDocIn.clone();
}
seekAndPrefetchPostings(docIn, termState);
}
doc = -1;
this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
this.isFreqsRead = true;
if (indexHasFreq == false || needsFreq == false) {
// Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
// not fill more than `docFreq` entries.
Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
}
accum = 0;
blockUpto = 0;
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
docBufferUpto = BLOCK_SIZE;
skipped = false;
prefetchedSkipData = false;
return this;
}
@Override
public int freq() throws IOException {
if (isFreqsRead == false) {
pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
isFreqsRead = true;
}
return (int) freqBuffer[docBufferUpto - 1];
}
@Override
public int nextPosition() throws IOException {
return -1;
}
@Override
public int startOffset() throws IOException {
return -1;
}
@Override
public int endOffset() throws IOException {
return -1;
}
@Override
public BytesRef getPayload() throws IOException {
return null;
}
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException {
// Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn
// after it
if (isFreqsRead == false) {
pforUtil.skip(docIn);
isFreqsRead = true;
}
final int left = docFreq - blockUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
if (indexHasFreq) {
if (needsFreq) {
isFreqsRead = false;
} else {
pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
}
}
blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) {
docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq;
docBuffer[1] = NO_MORE_DOCS;
blockUpto++;
} else {
// Read vInts:
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); // we don't need to load freqBuffer for now (will be loaded later if
// necessary)
}
doc = (int) docBuffer[docBufferUpto];
docBufferUpto++;
return doc;
}
@Override
public int advance(int target) throws IOException {
// current skip docID < docIDs generated from current buffer <= next skip docID
// we don't need to skip if target is buffered already
if (docFreq > BLOCK_SIZE) {
if (target <= nextSkipDoc) {
// We don't need skip data yet, but we have evidence that advance() is called, so let's
// prefetch skip data in the background.
if (prefetchedSkipData == false) {
prefetchSkipData(docIn, docTermStartFP, skipOffset);
prefetchedSkipData = true;
}
} else {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper =
new Lucene99SkipReader(
docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
}
if (!skipped) {
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
skipped = true;
}
// always plus one to fix the result, since skip position in Lucene99SkipReader
// is a little different from MultiLevelSkipListReader
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto >= blockUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc(); // actually, this is just lastSkipEntry
docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
// even if freqBuffer were not read from the previous block, we will mark them as read,
// as we don't need to skip the previous block freqBuffer in refillDocs,
// as we have already positioned docIn where in needs to be.
isFreqsRead = true;
}
// next time we call advance, this is used to
// foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
}
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
// Now scan... this is an inlined/pared down version
// of nextDoc():
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
if (doc >= target) {
break;
}
++docBufferUpto;
}
docBufferUpto++;
return this.doc = (int) doc;
}
@Override
public long cost() {
return docFreq;
}
}
// Also handles payloads + offsets
final class EverythingEnum extends PostingsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private final long[] payloadLengthBuffer;
private final long[] offsetStartDeltaBuffer;
private final long[] offsetLengthBuffer;
private byte[] payloadBytes;
private int payloadByteUpto;
private int payloadLength;
private int lastStartOffset;
private int startOffset;
private int endOffset;
private int docBufferUpto;
private int posBufferUpto;
private Lucene99SkipReader skipper;
private boolean skipped;
private boolean prefetchedSkipData;
final IndexInput startDocIn;
IndexInput docIn;
final IndexInput posIn;
final IndexInput payIn;
final BytesRef payload;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
private int docFreq; // number of docs in this posting list
private long totalTermFreq; // number of positions in this posting list
private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
private int freq; // freq we last read
private int position; // current position
// how many positions "behind" we are; nextPosition must
// skip these to "catch up":
private int posPendingCount;
// Lazy pos seek: if != -1 then we must seek to this FP
// before reading positions:
private long posPendingFP;
// Lazy pay seek: if != -1 then we must seek to this FP
// before reading payloads/offsets:
private long payPendingFP;
// Where this term's postings start in the .doc file:
private long docTermStartFP;
// Where this term's postings start in the .pos file:
private long posTermStartFP;
// Where this term's payloads/offsets start in the .pay
// file:
private long payTermStartFP;
// File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk
// decode vs vInt decode the block:
private long lastPosBlockFP;
// Where this term's skip data starts (after
// docTermStartFP) in the .doc file (or -1 if there is
// no skip data for this term):
private long skipOffset;
private int nextSkipDoc;
private boolean needsOffsets; // true if we actually need offsets
private boolean needsPayloads; // true if we actually need payloads
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads();
this.startDocIn = Lucene99PostingsReader.this.docIn;
this.docIn = null;
this.posIn = Lucene99PostingsReader.this.posIn.clone();
if (indexHasOffsets || indexHasPayloads) {
this.payIn = Lucene99PostingsReader.this.payIn.clone();
} else {
this.payIn = null;
}
if (indexHasOffsets) {
offsetStartDeltaBuffer = new long[BLOCK_SIZE];
offsetLengthBuffer = new long[BLOCK_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
startOffset = -1;
endOffset = -1;
}
if (indexHasPayloads) {
payloadLengthBuffer = new long[BLOCK_SIZE];
payloadBytes = new byte[128];
payload = new BytesRef();
} else {
payloadLengthBuffer = null;
payloadBytes = null;
payload = null;
}
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
return docIn == startDocIn
&& indexHasOffsets
== (fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0)
&& indexHasPayloads == fieldInfo.hasPayloads();
}
public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException {
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
posTermStartFP = termState.posStartFP;
payTermStartFP = termState.payStartFP;
skipOffset = termState.skipOffset;
totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID;
if (docFreq > 1) {
if (docIn == null) {
// lazy init
docIn = startDocIn.clone();
}
seekAndPrefetchPostings(docIn, termState);
}
posPendingFP = posTermStartFP;
payPendingFP = payTermStartFP;
posPendingCount = 0;
if (termState.totalTermFreq < BLOCK_SIZE) {
lastPosBlockFP = posTermStartFP;
} else if (termState.totalTermFreq == BLOCK_SIZE) {
lastPosBlockFP = -1;
} else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
}
this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
doc = -1;
accum = 0;
blockUpto = 0;
if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else {
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
}
docBufferUpto = BLOCK_SIZE;
skipped = false;
prefetchedSkipData = false;
return this;
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException {
final int left = docFreq - blockUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer);
blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) {
docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq;
docBuffer[1] = NO_MORE_DOCS;
blockUpto++;
} else {
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
}
private void refillPositions() throws IOException {
if (posIn.getFilePointer() == lastPosBlockFP) {
final int count = (int) (totalTermFreq % BLOCK_SIZE);
int payloadLength = 0;
int offsetLength = 0;
payloadByteUpto = 0;
for (int i = 0; i < count; i++) {
int code = posIn.readVInt();
if (indexHasPayloads) {
if ((code & 1) != 0) {
payloadLength = posIn.readVInt();
}
payloadLengthBuffer[i] = payloadLength;
posDeltaBuffer[i] = code >>> 1;
if (payloadLength != 0) {
if (payloadByteUpto + payloadLength > payloadBytes.length) {
payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
}
posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
payloadByteUpto += payloadLength;
}
} else {
posDeltaBuffer[i] = code;
}
if (indexHasOffsets) {
int deltaCode = posIn.readVInt();
if ((deltaCode & 1) != 0) {
offsetLength = posIn.readVInt();
}
offsetStartDeltaBuffer[i] = deltaCode >>> 1;
offsetLengthBuffer[i] = offsetLength;
}
}
payloadByteUpto = 0;
} else {
pforUtil.decode(posIn, posDeltaBuffer);
if (indexHasPayloads) {
if (needsPayloads) {
pforUtil.decode(payIn, payloadLengthBuffer);
int numBytes = payIn.readVInt();
if (numBytes > payloadBytes.length) {
payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
}
payIn.readBytes(payloadBytes, 0, numBytes);
} else {
// this works, because when writing a vint block we always force the first length to be
// written
pforUtil.skip(payIn); // skip over lengths
int numBytes = payIn.readVInt(); // read length of payloadBytes
payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
}
payloadByteUpto = 0;
}
if (indexHasOffsets) {
if (needsOffsets) {
pforUtil.decode(payIn, offsetStartDeltaBuffer);
pforUtil.decode(payIn, offsetLengthBuffer);
} else {
// this works, because when writing a vint block we always force the first length to be
// written
pforUtil.skip(payIn); // skip over starts
pforUtil.skip(payIn); // skip over lengths
}
}
}
}
@Override
public int nextDoc() throws IOException {
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
doc = (int) docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
position = 0;
lastStartOffset = 0;
return doc;
}
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper =
new Lucene99SkipReader(
docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
}
if (!skipped) {
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
skipper.init(
docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto();
}
nextSkipDoc = skipper.getNextSkipDoc();
} else {
// We don't need skip data yet, but we have evidence that advance() is used, so prefetch it
// in the background.
if (prefetchedSkipData == false) {
prefetchSkipData(docIn, docTermStartFP, skipOffset);
prefetchedSkipData = true;
}
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
// Now scan:
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
if (doc >= target) {
break;
}
}
position = 0;
lastStartOffset = 0;
return this.doc = (int) doc;
}
// TODO: in theory we could avoid loading frq block
// when not needed, ie, use skip data to load how far to
// seek the pos pointer ... instead of having to load frq
// blocks only to sum up how many positions to skip
private void skipPositions() throws IOException {
// Skip positions now:
int toSkip = posPendingCount - freq;
// if (DEBUG) {
// System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
// }
final int leftInBlock = BLOCK_SIZE - posBufferUpto;
if (toSkip < leftInBlock) {
int end = posBufferUpto + toSkip;
while (posBufferUpto < end) {
if (indexHasPayloads) {
payloadByteUpto += payloadLengthBuffer[posBufferUpto];
}
posBufferUpto++;
}
} else {
toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP;
pforUtil.skip(posIn);
if (indexHasPayloads) {
// Skip payloadLength block:
pforUtil.skip(payIn);
// Skip payloadBytes block:
int numBytes = payIn.readVInt();
payIn.seek(payIn.getFilePointer() + numBytes);
}
if (indexHasOffsets) {
pforUtil.skip(payIn);
pforUtil.skip(payIn);
}
toSkip -= BLOCK_SIZE;
}
refillPositions();
payloadByteUpto = 0;
posBufferUpto = 0;
while (posBufferUpto < toSkip) {
if (indexHasPayloads) {
payloadByteUpto += payloadLengthBuffer[posBufferUpto];
}
posBufferUpto++;
}
}
position = 0;
lastStartOffset = 0;
}
@Override
public int nextPosition() throws IOException {
assert posPendingCount > 0;
if (posPendingFP != -1) {
posIn.seek(posPendingFP);
posPendingFP = -1;
if (payPendingFP != -1 && payIn != null) {
payIn.seek(payPendingFP);
payPendingFP = -1;
}
// Force buffer refill:
posBufferUpto = BLOCK_SIZE;
}
if (posPendingCount > freq) {
skipPositions();
posPendingCount = freq;
}
if (posBufferUpto == BLOCK_SIZE) {
refillPositions();
posBufferUpto = 0;
}
position += posDeltaBuffer[posBufferUpto];
if (indexHasPayloads) {
payloadLength = (int) payloadLengthBuffer[posBufferUpto];
payload.bytes = payloadBytes;
payload.offset = payloadByteUpto;
payload.length = payloadLength;
payloadByteUpto += payloadLength;
}
if (indexHasOffsets) {
startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto];
endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto];
lastStartOffset = startOffset;
}
posBufferUpto++;
posPendingCount--;
return position;
}
@Override
public int startOffset() {
return startOffset;
}
@Override
public int endOffset() {
return endOffset;
}
@Override
public BytesRef getPayload() {
if (payloadLength == 0) {
return null;
} else {
return payload;
}
}
@Override
public long cost() {
return docFreq;
}
}
final class BlockImpactsDocsEnum extends ImpactsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private int docBufferUpto;
private final Lucene99ScoreSkipReader skipper;
final IndexInput docIn;
final boolean indexHasFreqs;
private int docFreq; // number of docs in this posting list
private int blockUpto; // number of documents in or before the current block
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
private int nextSkipDoc = -1;
// as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
// always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
// (needsFreq=false)
private boolean isFreqsRead;
public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException {
indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final boolean indexHasPositions =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
final boolean indexHasPayloads = fieldInfo.hasPayloads();
this.docIn = Lucene99PostingsReader.this.docIn.clone();
docFreq = termState.docFreq;
seekAndPrefetchPostings(docIn, termState);
// Impacts almost certainly need skip data
prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
doc = -1;
accum = 0;
blockUpto = 0;
docBufferUpto = BLOCK_SIZE;
skipper =
new Lucene99ScoreSkipReader(
docIn.clone(), MAX_SKIP_LEVELS, indexHasPositions, indexHasOffsets, indexHasPayloads);
skipper.init(
termState.docStartFP + termState.skipOffset,
termState.docStartFP,
termState.posStartFP,
termState.payStartFP,
docFreq);
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
this.isFreqsRead = true;
if (indexHasFreqs == false) {
Arrays.fill(freqBuffer, 1L);
}
}
@Override
public int freq() throws IOException {
if (isFreqsRead == false) {
pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
isFreqsRead = true;
}
return (int) freqBuffer[docBufferUpto - 1];
}
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException {
// Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn
// after it
if (isFreqsRead == false) {
pforUtil.skip(docIn);
isFreqsRead = true;
}
final int left = docFreq - blockUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
if (indexHasFreqs) {
isFreqsRead = false;
}
blockUpto += BLOCK_SIZE;
} else {
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true);
prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS;
blockUpto += left;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
}
@Override
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
// always plus one to fix the result, since skip position in Lucene99SkipReader
// is a little different from MultiLevelSkipListReader
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto >= blockUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
isFreqsRead = true;
}
// next time we call advance, this is used to
// foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
}
assert nextSkipDoc >= target;
}
@Override
public Impacts getImpacts() throws IOException {
// nextDoc() doesn't advance skip lists, so it's important to do it here to make sure we're
// not returning impacts over a bigger range of doc IDs than necessary.
advanceShallow(doc);
return skipper.getImpacts();
}
@Override
public int nextDoc() throws IOException {
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
return this.doc = (int) docBuffer[docBufferUpto++];
}
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
int next = findFirstGreater(docBuffer, target, docBufferUpto);
this.doc = (int) docBuffer[next];
docBufferUpto = next + 1;
return doc;
}
@Override
public int nextPosition() throws IOException {
return -1;
}
@Override
public int startOffset() {
return -1;
}
@Override
public int endOffset() {
return -1;
}
@Override
public BytesRef getPayload() {
return null;
}
@Override
public long cost() {
return docFreq;
}
}
final class BlockImpactsPostingsEnum extends ImpactsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private int docBufferUpto;
private int posBufferUpto;
private final Lucene99ScoreSkipReader skipper;
final IndexInput docIn;
final IndexInput posIn;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
private int docFreq; // number of docs in this posting list
private long totalTermFreq; // number of positions in this posting list
private int docUpto; // how many docs we've read
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
private int freq; // freq we last read
private int position; // current position
// how many positions "behind" we are; nextPosition must
// skip these to "catch up":
private int posPendingCount;
// Lazy pos seek: if != -1 then we must seek to this FP
// before reading positions:
private long posPendingFP;
// Where this term's postings start in the .doc file:
private long docTermStartFP;
// Where this term's postings start in the .pos file:
private long posTermStartFP;
// Where this term's payloads/offsets start in the .pay
// file:
private long payTermStartFP;
// File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk
// decode vs vInt decode the block:
private long lastPosBlockFP;
private int nextSkipDoc = -1;
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException {
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads();
this.docIn = Lucene99PostingsReader.this.docIn.clone();
this.posIn = Lucene99PostingsReader.this.posIn.clone();
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
posTermStartFP = termState.posStartFP;
payTermStartFP = termState.payStartFP;
totalTermFreq = termState.totalTermFreq;
seekAndPrefetchPostings(docIn, termState);
prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
posPendingFP = posTermStartFP;
posPendingCount = 0;
if (termState.totalTermFreq < BLOCK_SIZE) {
lastPosBlockFP = posTermStartFP;
} else if (termState.totalTermFreq == BLOCK_SIZE) {
lastPosBlockFP = -1;
} else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
}
doc = -1;
accum = 0;
docUpto = 0;
docBufferUpto = BLOCK_SIZE;
skipper =
new Lucene99ScoreSkipReader(
docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
skipper.init(
docTermStartFP + termState.skipOffset,
docTermStartFP,
posTermStartFP,
payTermStartFP,
docFreq);
}
@Override
public int freq() throws IOException {
return freq;
}
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException {
final int left = docFreq - docUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer);
} else {
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
}
private void refillPositions() throws IOException {
if (posIn.getFilePointer() == lastPosBlockFP) {
final int count = (int) (totalTermFreq % BLOCK_SIZE);
int payloadLength = 0;
for (int i = 0; i < count; i++) {
int code = posIn.readVInt();
if (indexHasPayloads) {
if ((code & 1) != 0) {
payloadLength = posIn.readVInt();
}
posDeltaBuffer[i] = code >>> 1;
if (payloadLength != 0) {
posIn.seek(posIn.getFilePointer() + payloadLength);
}
} else {
posDeltaBuffer[i] = code;
}
if (indexHasOffsets) {
if ((posIn.readVInt() & 1) != 0) {
// offset length changed
posIn.readVInt();
}
}
}
} else {
pforUtil.decode(posIn, posDeltaBuffer);
}
}
@Override
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
// always plus one to fix the result, since skip position in Lucene99SkipReader
// is a little different from MultiLevelSkipListReader
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
posPendingFP = skipper.getPosPointer();
posPendingCount = skipper.getPosBufferUpto();
docIn.seek(skipper.getDocPointer());
}
// next time we call advance, this is used to
// foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
}
assert nextSkipDoc >= target;
}
@Override
public Impacts getImpacts() throws IOException {
advanceShallow(doc);
return skipper.getImpacts();
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
int next = findFirstGreater(docBuffer, target, docBufferUpto);
if (next == BLOCK_SIZE) {
return doc = NO_MORE_DOCS;
}
this.doc = (int) docBuffer[next];
this.freq = (int) freqBuffer[next];
for (int i = docBufferUpto; i <= next; ++i) {
posPendingCount += freqBuffer[i];
}
docUpto += next - docBufferUpto + 1;
docBufferUpto = next + 1;
position = 0;
return doc;
}
// TODO: in theory we could avoid loading frq block
// when not needed, ie, use skip data to load how far to
// seek the pos pointer ... instead of having to load frq
// blocks only to sum up how many positions to skip
private void skipPositions() throws IOException {
// Skip positions now:
int toSkip = posPendingCount - freq;
final int leftInBlock = BLOCK_SIZE - posBufferUpto;
if (toSkip < leftInBlock) {
posBufferUpto += toSkip;
} else {
toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP;
pforUtil.skip(posIn);
toSkip -= BLOCK_SIZE;
}
refillPositions();
posBufferUpto = toSkip;
}
position = 0;
}
@Override
public int nextPosition() throws IOException {
assert posPendingCount > 0;
if (posPendingFP != -1) {
posIn.seek(posPendingFP);
posPendingFP = -1;
// Force buffer refill:
posBufferUpto = BLOCK_SIZE;
}
if (posPendingCount > freq) {
skipPositions();
posPendingCount = freq;
}
if (posBufferUpto == BLOCK_SIZE) {
refillPositions();
posBufferUpto = 0;
}
position += posDeltaBuffer[posBufferUpto++];
posPendingCount--;
return position;
}
@Override
public int startOffset() {
return -1;
}
@Override
public int endOffset() {
return -1;
}
@Override
public BytesRef getPayload() {
return null;
}
@Override
public long cost() {
return docFreq;
}
}
final class BlockImpactsEverythingEnum extends ImpactsEnum {
final ForUtil forUtil = new ForUtil();
final ForDeltaUtil forDeltaUtil = new ForDeltaUtil(forUtil);
final PForUtil pforUtil = new PForUtil(forUtil);
private final long[] docBuffer = new long[BLOCK_SIZE];
private final long[] freqBuffer = new long[BLOCK_SIZE];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
private final long[] payloadLengthBuffer;
private final long[] offsetStartDeltaBuffer;
private final long[] offsetLengthBuffer;
private byte[] payloadBytes;
private int payloadByteUpto;
private int payloadLength;
private int lastStartOffset;
private int startOffset = -1;
private int endOffset = -1;
private int docBufferUpto;
private int posBufferUpto;
private final Lucene99ScoreSkipReader skipper;
final IndexInput docIn;
final IndexInput posIn;
final IndexInput payIn;
final BytesRef payload;
final boolean indexHasFreq;
final boolean indexHasPos;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
private int docFreq; // number of docs in this posting list
private long totalTermFreq; // number of positions in this posting list
private int docUpto; // how many docs we've read
private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads
private int doc; // doc we last read
private long accum; // accumulator for doc deltas
private int position; // current position
// how many positions "behind" we are; nextPosition must
// skip these to "catch up":
private int posPendingCount;
// Lazy pos seek: if != -1 then we must seek to this FP
// before reading positions:
private long posPendingFP;
// Lazy pay seek: if != -1 then we must seek to this FP
// before reading payloads/offsets:
private long payPendingFP;
// Where this term's postings start in the .doc file:
private long docTermStartFP;
// Where this term's postings start in the .pos file:
private long posTermStartFP;
// Where this term's payloads/offsets start in the .pay
// file:
private long payTermStartFP;
// File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk
// decode vs vInt decode the block:
private long lastPosBlockFP;
private int nextSkipDoc = -1;
private final boolean needsPositions;
private final boolean needsOffsets; // true if we actually need offsets
private final boolean needsPayloads; // true if we actually need payloads
private boolean
isFreqsRead; // shows if freqBuffer for the current doc block are read into freqBuffer
private long seekTo = -1;
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags)
throws IOException {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets =
fieldInfo
.getIndexOptions()
.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
indexHasPayloads = fieldInfo.hasPayloads();
needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
this.docIn = Lucene99PostingsReader.this.docIn.clone();
if (indexHasPos && needsPositions) {
this.posIn = Lucene99PostingsReader.this.posIn.clone();
} else {
this.posIn = null;
}
if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) {
this.payIn = Lucene99PostingsReader.this.payIn.clone();
} else {
this.payIn = null;
}
if (indexHasOffsets) {
offsetStartDeltaBuffer = new long[BLOCK_SIZE];
offsetLengthBuffer = new long[BLOCK_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
startOffset = -1;
endOffset = -1;
}
if (indexHasPayloads) {
payloadLengthBuffer = new long[BLOCK_SIZE];
payloadBytes = new byte[128];
payload = new BytesRef();
} else {
payloadLengthBuffer = null;
payloadBytes = null;
payload = null;
}
docFreq = termState.docFreq;
docTermStartFP = termState.docStartFP;
posTermStartFP = termState.posStartFP;
payTermStartFP = termState.payStartFP;
totalTermFreq = termState.totalTermFreq;
seekAndPrefetchPostings(docIn, termState);
prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
posPendingFP = posTermStartFP;
payPendingFP = payTermStartFP;
posPendingCount = 0;
if (termState.totalTermFreq < BLOCK_SIZE) {
lastPosBlockFP = posTermStartFP;
} else if (termState.totalTermFreq == BLOCK_SIZE) {
lastPosBlockFP = -1;
} else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
}
doc = -1;
accum = 0;
docUpto = 0;
posDocUpTo = 0;
isFreqsRead = true;
docBufferUpto = BLOCK_SIZE;
skipper =
new Lucene99ScoreSkipReader(
docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
skipper.init(
docTermStartFP + termState.skipOffset,
docTermStartFP,
posTermStartFP,
payTermStartFP,
docFreq);
if (indexHasFreq == false) {
for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
freqBuffer[i] = 1;
}
}
}
@Override
public int freq() throws IOException {
if (indexHasFreq && (isFreqsRead == false)) {
pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
isFreqsRead = true;
}
return (int) freqBuffer[docBufferUpto - 1];
}
@Override
public int docID() {
return doc;
}
private void refillDocs() throws IOException {
if (indexHasFreq) {
if (isFreqsRead == false) { // previous freq block was not read
// check if we need to load the previous freq block to catch up on positions or we can
// skip it
if (indexHasPos && needsPositions && (posDocUpTo < docUpto)) {
pforUtil.decode(docIn, freqBuffer); // load the previous freq block
} else {
pforUtil.skip(docIn); // skip it
}
isFreqsRead = true;
}
if (indexHasPos && needsPositions) {
while (posDocUpTo
< docUpto) { // catch on positions, bring posPendingCount upto the current doc
posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
posDocUpTo++;
}
}
}
final int left = docFreq - docUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
if (indexHasFreq) {
isFreqsRead =
false; // freq block will be loaded lazily when necessary, we don't load it here
}
} else {
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS;
}
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0;
}
private void refillPositions() throws IOException {
if (posIn.getFilePointer() == lastPosBlockFP) {
final int count = (int) (totalTermFreq % BLOCK_SIZE);
int payloadLength = 0;
int offsetLength = 0;
payloadByteUpto = 0;
for (int i = 0; i < count; i++) {
int code = posIn.readVInt();
if (indexHasPayloads) {
if ((code & 1) != 0) {
payloadLength = posIn.readVInt();
}
payloadLengthBuffer[i] = payloadLength;
posDeltaBuffer[i] = code >>> 1;
if (payloadLength != 0) {
if (payloadByteUpto + payloadLength > payloadBytes.length) {
payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
}
posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
payloadByteUpto += payloadLength;
}
} else {
posDeltaBuffer[i] = code;
}
if (indexHasOffsets) {
int deltaCode = posIn.readVInt();
if ((deltaCode & 1) != 0) {
offsetLength = posIn.readVInt();
}
offsetStartDeltaBuffer[i] = deltaCode >>> 1;
offsetLengthBuffer[i] = offsetLength;
}
}
payloadByteUpto = 0;
} else {
pforUtil.decode(posIn, posDeltaBuffer);
if (indexHasPayloads && payIn != null) {
if (needsPayloads) {
pforUtil.decode(payIn, payloadLengthBuffer);
int numBytes = payIn.readVInt();
if (numBytes > payloadBytes.length) {
payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
}
payIn.readBytes(payloadBytes, 0, numBytes);
} else {
// this works, because when writing a vint block we always force the first length to be
// written
pforUtil.skip(payIn); // skip over lengths
int numBytes = payIn.readVInt(); // read length of payloadBytes
payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
}
payloadByteUpto = 0;
}
if (indexHasOffsets && payIn != null) {
if (needsOffsets) {
pforUtil.decode(payIn, offsetStartDeltaBuffer);
pforUtil.decode(payIn, offsetLengthBuffer);
} else {
// this works, because when writing a vint block we always force the first length to be
// written
pforUtil.skip(payIn); // skip over starts
pforUtil.skip(payIn); // skip over lengths
}
}
}
}
@Override
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
// always plus one to fix the result, since skip position in Lucene99SkipReader
// is a little different from MultiLevelSkipListReader
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
docUpto = newDocUpto;
posDocUpTo = docUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry
seekTo = skipper.getDocPointer(); // delay the seek
}
// next time we call advance, this is used to
// foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
}
assert nextSkipDoc >= target;
}
@Override
public Impacts getImpacts() throws IOException {
advanceShallow(doc);
return skipper.getImpacts();
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
if (seekTo >= 0) {
docIn.seek(seekTo);
seekTo = -1;
isFreqsRead = true; // reset isFreqsRead
}
refillDocs();
}
// Now scan:
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
docBufferUpto++;
docUpto++;
if (doc >= target) {
break;
}
if (docBufferUpto == BLOCK_SIZE) {
return this.doc = NO_MORE_DOCS;
}
}
position = 0;
lastStartOffset = 0;
return this.doc = (int) doc;
}
// TODO: in theory we could avoid loading frq block
// when not needed, ie, use skip data to load how far to
// seek the pos pointer ... instead of having to load frq
// blocks only to sum up how many positions to skip
private void skipPositions() throws IOException {
// Skip positions now:
int toSkip = posPendingCount - (int) freqBuffer[docBufferUpto - 1];
// if (DEBUG) {
// System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
// }
final int leftInBlock = BLOCK_SIZE - posBufferUpto;
if (toSkip < leftInBlock) {
int end = posBufferUpto + toSkip;
while (posBufferUpto < end) {
if (indexHasPayloads) {
payloadByteUpto += payloadLengthBuffer[posBufferUpto];
}
posBufferUpto++;
}
} else {
toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP;
pforUtil.skip(posIn);
if (indexHasPayloads && payIn != null) {
// Skip payloadLength block:
pforUtil.skip(payIn);
// Skip payloadBytes block:
int numBytes = payIn.readVInt();
payIn.seek(payIn.getFilePointer() + numBytes);
}
if (indexHasOffsets && payIn != null) {
pforUtil.skip(payIn);
pforUtil.skip(payIn);
}
toSkip -= BLOCK_SIZE;
}
refillPositions();
payloadByteUpto = 0;
posBufferUpto = 0;
while (posBufferUpto < toSkip) {
if (indexHasPayloads) {
payloadByteUpto += payloadLengthBuffer[posBufferUpto];
}
posBufferUpto++;
}
}
position = 0;
lastStartOffset = 0;
}
@Override
public int nextPosition() throws IOException {
if (indexHasPos == false || needsPositions == false) {
return -1;
}
if (isFreqsRead == false) {
pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this docs block
isFreqsRead = true;
}
while (posDocUpTo < docUpto) { // bring posPendingCount upto the current doc
posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
posDocUpTo++;
}
assert posPendingCount > 0;
if (posPendingFP != -1) {
posIn.seek(posPendingFP);
posPendingFP = -1;
if (payPendingFP != -1 && payIn != null) {
payIn.seek(payPendingFP);
payPendingFP = -1;
}
// Force buffer refill:
posBufferUpto = BLOCK_SIZE;
}
if (posPendingCount > freqBuffer[docBufferUpto - 1]) {
skipPositions();
posPendingCount = (int) freqBuffer[docBufferUpto - 1];
}
if (posBufferUpto == BLOCK_SIZE) {
refillPositions();
posBufferUpto = 0;
}
position += posDeltaBuffer[posBufferUpto];
if (indexHasPayloads) {
payloadLength = (int) payloadLengthBuffer[posBufferUpto];
payload.bytes = payloadBytes;
payload.offset = payloadByteUpto;
payload.length = payloadLength;
payloadByteUpto += payloadLength;
}
if (indexHasOffsets && needsOffsets) {
startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto];
endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto];
lastStartOffset = startOffset;
}
posBufferUpto++;
posPendingCount--;
return position;
}
@Override
public int startOffset() {
return startOffset;
}
@Override
public int endOffset() {
return endOffset;
}
@Override
public BytesRef getPayload() {
if (payloadLength == 0) {
return null;
} else {
return payload;
}
}
@Override
public long cost() {
return docFreq;
}
}
private void seekAndPrefetchPostings(IndexInput docIn, IntBlockTermState state)
throws IOException {
if (docIn.getFilePointer() != state.docStartFP) {
// Don't prefetch if the input is already positioned at the right offset, which suggests that
// the caller is streaming the entire inverted index (e.g. for merging), let the read-ahead
// logic do its work instead. Note that this heuristic doesn't work for terms that have skip
// data, since skip data is stored after the last term, but handling all terms that have <128
// docs is a good start already.
docIn.seek(state.docStartFP);
if (state.skipOffset < 0) {
// This postings list is very short as it doesn't have skip data, prefetch the page that
// holds the first byte of the postings list.
docIn.prefetch(state.docStartFP, 1);
} else if (state.skipOffset <= MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
// This postings list is short as it fits on a few pages, prefetch it all, plus one byte to
// make sure to include some skip data.
docIn.prefetch(state.docStartFP, state.skipOffset + 1);
} else {
// Default case: prefetch the page that holds the first byte of postings. We'll prefetch
// skip data when we have evidence that it is used.
docIn.prefetch(state.docStartFP, 1);
}
}
// Note: we don't prefetch positions or offsets, which are less likely to be needed.
}
private void prefetchSkipData(IndexInput docIn, long docStartFP, long skipOffset)
throws IOException {
if (skipOffset > MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
// If skipOffset is less than this value, skip data was already prefetched when doing
// #seekAndPrefetchPostings
docIn.prefetch(docStartFP + skipOffset, 1);
}
}
@Override
public void checkIntegrity() throws IOException {
if (docIn != null) {
CodecUtil.checksumEntireFile(docIn);
}
if (posIn != null) {
CodecUtil.checksumEntireFile(posIn);
}
if (payIn != null) {
CodecUtil.checksumEntireFile(payIn);
}
}
@Override
public String toString() {
return getClass().getSimpleName()
+ "(positions="
+ (posIn != null)
+ ",payloads="
+ (payIn != null)
+ ")";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy