org.apache.lucene.backward_codecs.lucene99.Lucene99SkipWriter Maven / Gradle / Ivy
Show all versions of lucene-backward-codecs Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene99;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.index.Impact;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
/**
* Write skip lists with multiple levels, and support skip within block ints.
*
* Assume that docFreq = 28, skipInterval = blockSize = 12
*
*
* | block#0 | | block#1 | |vInts|
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
* ^ ^ (level 0 skip point)
*
*
* Note that skipWriter will ignore first document in block#0, since it is useless as a skip
* point. Also, we'll never skip into the vInts block, only record skip data at the start its start
* point(if it exist).
*
*
For each skip point, we will record: 1. docID in former position, i.e. for position 12, record
* docID[11], etc. 2. its related file points(position, payload), 3. related numbers or
* uptos(position, payload). 4. start offset.
*/
public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
private final int[] lastSkipDoc;
private final long[] lastSkipDocPointer;
private final long[] lastSkipPosPointer;
private final long[] lastSkipPayPointer;
private final IndexOutput docOut;
private final IndexOutput posOut;
private final IndexOutput payOut;
private int curDoc;
private long curDocPointer;
private long curPosPointer;
private long curPayPointer;
private int curPosBufferUpto;
private int curPayloadByteUpto;
private final CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
/** Sole constructor. */
public Lucene99SkipWriter(
int maxSkipLevels,
int blockSize,
int docCount,
IndexOutput docOut,
IndexOutput posOut,
IndexOutput payOut) {
super(blockSize, 8, maxSkipLevels, docCount);
this.docOut = docOut;
this.posOut = posOut;
this.payOut = payOut;
lastSkipDoc = new int[maxSkipLevels];
lastSkipDocPointer = new long[maxSkipLevels];
if (posOut != null) {
lastSkipPosPointer = new long[maxSkipLevels];
if (payOut != null) {
lastSkipPayPointer = new long[maxSkipLevels];
} else {
lastSkipPayPointer = null;
}
} else {
lastSkipPosPointer = null;
lastSkipPayPointer = null;
}
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
for (int i = 0; i < maxSkipLevels; ++i) {
curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
}
}
/** Reset state for the given index options. */
public void setField(
boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
this.fieldHasPositions = fieldHasPositions;
this.fieldHasOffsets = fieldHasOffsets;
this.fieldHasPayloads = fieldHasPayloads;
}
// tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the
// skipper
// is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of
// junk.
// this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we
// save
// away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
private boolean initialized;
long lastDocFP;
long lastPosFP;
long lastPayFP;
@Override
public void resetSkip() {
lastDocFP = docOut.getFilePointer();
if (fieldHasPositions) {
lastPosFP = posOut.getFilePointer();
if (fieldHasOffsets || fieldHasPayloads) {
lastPayFP = payOut.getFilePointer();
}
}
if (initialized) {
for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
acc.clear();
}
}
initialized = false;
}
private void initSkip() {
if (!initialized) {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
Arrays.fill(lastSkipDocPointer, lastDocFP);
if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, lastPosFP);
if (fieldHasOffsets || fieldHasPayloads) {
Arrays.fill(lastSkipPayPointer, lastPayFP);
}
}
// sets of competitive freq,norm pairs should be empty at this point
assert Arrays.stream(curCompetitiveFreqNorms)
.map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
.mapToInt(Collection::size)
.sum()
== 0;
initialized = true;
}
}
/** Sets the values for the current skip data. */
public void bufferSkip(
int doc,
CompetitiveImpactAccumulator competitiveFreqNorms,
int numDocs,
long posFP,
long payFP,
int posBufferUpto,
int payloadByteUpto)
throws IOException {
initSkip();
this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer();
this.curPosPointer = posFP;
this.curPayPointer = payFP;
this.curPosBufferUpto = posBufferUpto;
this.curPayloadByteUpto = payloadByteUpto;
this.curCompetitiveFreqNorms[0].copy(competitiveFreqNorms);
bufferSkip(numDocs);
}
private final ByteBuffersDataOutput freqNormOut = ByteBuffersDataOutput.newResettableInstance();
@Override
protected void writeSkipData(int level, DataOutput skipBuffer) throws IOException {
int delta = curDoc - lastSkipDoc[level];
skipBuffer.writeVInt(delta);
lastSkipDoc[level] = curDoc;
skipBuffer.writeVLong(curDocPointer - lastSkipDocPointer[level]);
lastSkipDocPointer[level] = curDocPointer;
if (fieldHasPositions) {
skipBuffer.writeVLong(curPosPointer - lastSkipPosPointer[level]);
lastSkipPosPointer[level] = curPosPointer;
skipBuffer.writeVInt(curPosBufferUpto);
if (fieldHasPayloads) {
skipBuffer.writeVInt(curPayloadByteUpto);
}
if (fieldHasOffsets || fieldHasPayloads) {
skipBuffer.writeVLong(curPayPointer - lastSkipPayPointer[level]);
lastSkipPayPointer[level] = curPayPointer;
}
}
CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
if (level + 1 < numberOfSkipLevels) {
curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
}
writeImpacts(competitiveFreqNorms, freqNormOut);
skipBuffer.writeVInt(Math.toIntExact(freqNormOut.size()));
freqNormOut.copyTo(skipBuffer);
freqNormOut.reset();
competitiveFreqNorms.clear();
}
/** Write impacts to the given output. */
public static void writeImpacts(CompetitiveImpactAccumulator acc, DataOutput out)
throws IOException {
Collection impacts = acc.getCompetitiveFreqNormPairs();
Impact previous = new Impact(0, 0);
for (Impact impact : impacts) {
assert impact.freq > previous.freq;
assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
int freqDelta = impact.freq - previous.freq - 1;
long normDelta = impact.norm - previous.norm - 1;
if (normDelta == 0) {
// most of time, norm only increases by 1, so we can fold everything in a single byte
out.writeVInt(freqDelta << 1);
} else {
out.writeVInt((freqDelta << 1) | 1);
out.writeZLong(normDelta);
}
previous = impact;
}
}
}