All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.uniformsplit.DeltaBaseTermStateSerializer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.codecs.uniformsplit;

import static org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat.BLOCK_SIZE;

import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat.IntBlockTermState;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsReader;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
 * {@link TermState} serializer which encodes each file pointer as a delta relative to a base file
 * pointer. It differs from {@link Lucene101PostingsWriter#encodeTerm} which encodes each file
 * pointer as a delta relative to the previous file pointer.
 *
 * 

It automatically sets the base file pointer to the first valid file pointer for doc start FP, * pos start FP, pay start FP. These base file pointers have to be {@link #resetBaseStartFP() reset} * by the caller before starting to write a new block. * * @lucene.experimental */ public class DeltaBaseTermStateSerializer implements Accountable { private static final long RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(DeltaBaseTermStateSerializer.class); private static final long INT_BLOCK_TERM_STATE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(IntBlockTermState.class); protected long baseDocStartFP; protected long basePosStartFP; protected long basePayStartFP; public DeltaBaseTermStateSerializer() { resetBaseStartFP(); } /** * Resets the base file pointers to 0. This method has to be called before starting to write a new * block. */ public void resetBaseStartFP() { this.baseDocStartFP = 0; this.basePosStartFP = 0; this.basePayStartFP = 0; } /** * @return The base doc start file pointer. It is the file pointer of the first {@link TermState} * written after {@link #resetBaseStartFP()} is called. */ public long getBaseDocStartFP() { return baseDocStartFP; } /** * @return The base position start file pointer. It is the file pointer of the first {@link * TermState} written after {@link #resetBaseStartFP()} is called. */ public long getBasePosStartFP() { return basePosStartFP; } /** * @return The base payload start file pointer. It is the file pointer of the first {@link * TermState} written after {@link #resetBaseStartFP()} is called. */ public long getBasePayStartFP() { return basePayStartFP; } /** * Writes a {@link BlockTermState} to the provided {@link DataOutput}. * *

Simpler variant of {@link Lucene101PostingsWriter#encodeTerm(DataOutput, FieldInfo, * BlockTermState, boolean)}. */ public void writeTermState( DataOutput termStatesOutput, FieldInfo fieldInfo, BlockTermState termState) throws IOException { IndexOptions indexOptions = fieldInfo.getIndexOptions(); boolean hasFreqs = indexOptions != IndexOptions.DOCS; boolean hasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; boolean hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; boolean hasPayloads = fieldInfo.hasPayloads(); IntBlockTermState intTermState = (IntBlockTermState) termState; termStatesOutput.writeVInt(intTermState.docFreq); if (hasFreqs) { assert intTermState.totalTermFreq >= intTermState.docFreq; termStatesOutput.writeVLong(intTermState.totalTermFreq - intTermState.docFreq); } if (intTermState.singletonDocID != -1) { termStatesOutput.writeVInt(intTermState.singletonDocID); } else { if (baseDocStartFP == 0) { baseDocStartFP = intTermState.docStartFP; } termStatesOutput.writeVLong(intTermState.docStartFP - baseDocStartFP); } if (hasPositions) { if (basePosStartFP == 0) { basePosStartFP = intTermState.posStartFP; } termStatesOutput.writeVLong(intTermState.posStartFP - basePosStartFP); if (hasPayloads || hasOffsets) { if (basePayStartFP == 0) { basePayStartFP = intTermState.payStartFP; } termStatesOutput.writeVLong(intTermState.payStartFP - basePayStartFP); } if (intTermState.lastPosBlockOffset != -1) { termStatesOutput.writeVLong(intTermState.lastPosBlockOffset); } } } /** * Reads a {@link BlockTermState} from the provided {@link DataInput}. * *

Simpler variant of {@link Lucene101PostingsReader#decodeTerm(DataInput, FieldInfo, * BlockTermState, boolean)}. * * @param reuse {@link BlockTermState} to reuse; or null to create a new one. */ public BlockTermState readTermState( long baseDocStartFP, long basePosStartFP, long basePayStartFP, DataInput termStatesInput, FieldInfo fieldInfo, BlockTermState reuse) throws IOException { IndexOptions indexOptions = fieldInfo.getIndexOptions(); boolean hasFreqs = indexOptions != IndexOptions.DOCS; boolean hasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; IntBlockTermState intTermState = reuse != null ? reset((IntBlockTermState) reuse) : new IntBlockTermState(); intTermState.docFreq = termStatesInput.readVInt(); intTermState.totalTermFreq = hasFreqs ? intTermState.docFreq + termStatesInput.readVLong() : intTermState.docFreq; assert intTermState.totalTermFreq >= intTermState.docFreq; if (intTermState.docFreq == 1) { intTermState.singletonDocID = termStatesInput.readVInt(); } else { intTermState.docStartFP = baseDocStartFP + termStatesInput.readVLong(); } if (hasPositions) { intTermState.posStartFP = basePosStartFP + termStatesInput.readVLong(); boolean hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (hasOffsets || fieldInfo.hasPayloads()) { intTermState.payStartFP = basePayStartFP + termStatesInput.readVLong(); } if (intTermState.totalTermFreq > BLOCK_SIZE) { intTermState.lastPosBlockOffset = termStatesInput.readVLong(); } } return intTermState; } protected IntBlockTermState reset(IntBlockTermState termState) { // OrdTermState. termState.ord = 0; // BlockTermState. termState.docFreq = 0; termState.totalTermFreq = 0; termState.termBlockOrd = 0; termState.blockFilePointer = 0; // IntBlockTermState. termState.docStartFP = 0; termState.posStartFP = 0; termState.payStartFP = 0; termState.lastPosBlockOffset = -1; termState.singletonDocID = -1; return termState; } @Override public long ramBytesUsed() { return RAM_USAGE; } /** * @return The estimated RAM usage of the given {@link TermState}. */ public static long ramBytesUsed(TermState termState) { return termState instanceof IntBlockTermState ? INT_BLOCK_TERM_STATE_RAM_USAGE : RamUsageEstimator.shallowSizeOf(termState); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy