All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.uniformsplit.FSTDictionary Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.codecs.uniformsplit;

import java.io.IOException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.OffHeapFSTStore;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;

/**
 * Immutable stateless {@link FST}-based index dictionary kept in memory.
 *
 * 

Use {@link IndexDictionary.Builder} to build the {@link IndexDictionary}. * *

Create a stateful {@link IndexDictionary.Browser} to seek a term in this {@link * IndexDictionary} and get its corresponding block file pointer to the terms block file. * *

Its greatest advantage is to be very compact in memory thanks to both the compaction of the * {@link FST} as a byte array, and the incremental encoding of the leaves block pointer values, * which are long integers in increasing order, with {@link PositiveIntOutputs}.
* With a compact dictionary in memory we can increase the number of blocks. This allows us to * reduce the average block size, which means faster scan inside a block. * * @lucene.experimental */ public class FSTDictionary implements IndexDictionary { protected final FST fst; protected FSTDictionary(FST fst) { this.fst = fst; } @Override public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException { if (blockEncoder == null) { fst.save(output, output); } else { ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance(); fst.save(bytesDataOutput, bytesDataOutput); BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size()); output.writeVLong(encodedBytes.size()); encodedBytes.writeTo(output); } } /** * Reads a {@link FSTDictionary} from the provided input. * * @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none. */ protected static FSTDictionary read( DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException { DataInput fstDataInput; if (blockDecoder == null) { fstDataInput = input; } else { long numBytes = input.readVLong(); BytesRef decodedBytes = blockDecoder.decode(input, numBytes); fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length); // OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput. // When the block is decoded we must load the FST on heap. isFSTOnHeap = true; } PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(); FST.FSTMetadata metadata = FST.readMetadata(fstDataInput, fstOutputs); FST fst; if (isFSTOnHeap) { fst = new FST<>(metadata, fstDataInput); } else { final IndexInput indexInput = (IndexInput) fstDataInput; fst = FST.fromFSTReader( metadata, new OffHeapFSTStore(indexInput, indexInput.getFilePointer(), metadata)); } return new FSTDictionary(fst); } @Override public Browser browser() { return new Browser(); } /** * Stateful {@link Browser} to seek a term in this {@link FSTDictionary} and get its corresponding * block file pointer in the block file. */ protected class Browser implements IndexDictionary.Browser { protected final BytesRefFSTEnum fstEnum = new BytesRefFSTEnum<>(fst); @Override public long seekBlock(BytesRef term) throws IOException { BytesRefFSTEnum.InputOutput seekFloor = fstEnum.seekFloor(term); return seekFloor == null ? -1 : seekFloor.output; } } /** * Provides stateful {@link Browser} to seek in the {@link FSTDictionary}. * * @lucene.experimental */ public static class BrowserSupplier implements IndexDictionary.BrowserSupplier { protected final IndexInput dictionaryInput; protected final BlockDecoder blockDecoder; protected final boolean isFSTOnHeap; /** * Lazy loaded immutable index dictionary FST. The FST is either kept off-heap, or hold in RAM * on-heap. */ protected IndexDictionary dictionary; public BrowserSupplier( IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException { this.dictionaryInput = dictionaryInput.clone(); this.dictionaryInput.seek(dictionaryStartFP); this.blockDecoder = blockDecoder; this.isFSTOnHeap = isFSTOnHeap; } @Override public IndexDictionary.Browser get() throws IOException { // This double-check idiom does not require the dictionary to be volatile // because it is immutable. See section "Double-Checked Locking Immutable Objects" // of https://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html. if (dictionary == null) { synchronized (this) { if (dictionary == null) { dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap); } } } return dictionary.browser(); } } /** * Builds an immutable {@link FSTDictionary}. * * @lucene.experimental */ public static class Builder implements IndexDictionary.Builder { protected final FSTCompiler fstCompiler; protected final IntsRefBuilder scratchInts; public Builder() throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build(); scratchInts = new IntsRefBuilder(); } @Override public void add(BytesRef blockKey, long blockFilePointer) throws IOException { fstCompiler.add(Util.toIntsRef(blockKey, scratchInts), blockFilePointer); } @Override public FSTDictionary build() throws IOException { return new FSTDictionary( FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader())); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy