All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.uniformsplit.IndexDictionary Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.codecs.uniformsplit;

import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOSupplier;

/**
 * Immutable stateless index dictionary kept in RAM.
 *
 * 

Implementations must be immutable. * *

Use {@link IndexDictionary.Builder} to build the {@link IndexDictionary}. * *

Create a stateful {@link IndexDictionary.Browser} to seek a term in this {@link * IndexDictionary} and get its corresponding block file pointer to the terms block file. * *

There is a single implementation of this interface, {@link FSTDictionary}. However this * interface allows you to plug easily a new kind of index dictionary to experiment and improve the * existing one. * * @lucene.experimental */ public interface IndexDictionary { /** * Writes this dictionary to the provided output. * * @param blockEncoder The {@link BlockEncoder} for specific encoding of this index dictionary; or * null if none. */ void write(DataOutput output, BlockEncoder blockEncoder) throws IOException; /** Creates a new {@link IndexDictionary.Browser}. */ Browser browser() throws IOException; /** Builds an immutable {@link IndexDictionary}. */ interface Builder { /** * Adds a [block key - block file pointer] entry to the dictionary. * *

The Uniform Split technique adds block keys in the dictionary. See {@link BlockReader} and * {@link TermBytes} for more info about block key and minimal distinguishing prefix (MDP). * *

All block keys are added in strictly increasing order of the block file pointers, this * allows long encoding optimizations such as with {@link * org.apache.lucene.util.fst.PositiveIntOutputs} for {@link org.apache.lucene.util.fst.FST}. * * @param blockKey The block key which is the minimal distinguishing prefix (MDP) of the first * term of a block. * @param blockFilePointer Non-negative file pointer to the start of the block in the block * file. */ void add(BytesRef blockKey, long blockFilePointer) throws IOException; /** Builds the immutable {@link IndexDictionary} for the added entries. */ IndexDictionary build() throws IOException; } /** * Stateful {@link IndexDictionary.Browser} to seek a term in this {@link IndexDictionary} and get * its corresponding block file pointer in the block file. */ interface Browser { /** * Seeks the given term in the {@link IndexDictionary} and returns its corresponding block file * pointer. * * @return The block file pointer corresponding to the term if it matches exactly a block key in * the dictionary. Otherwise the floor block key, which is the greatest block key present in * the dictionary that is alphabetically preceding the searched term. Otherwise {@code -1} * if there is no floor block key because the searched term precedes alphabetically the * first block key of the dictionary. */ long seekBlock(BytesRef term) throws IOException; } /** * Supplier for a new stateful {@link Browser} created on the immutable {@link IndexDictionary}. * *

The immutable {@link IndexDictionary} is lazy loaded thread safely. This lazy loading allows * us to load it only when {@link org.apache.lucene.index.TermsEnum#seekCeil} or {@link * org.apache.lucene.index.TermsEnum#seekExact} are called (it is not loaded for a direct * all-terms enumeration). */ interface BrowserSupplier extends IOSupplier {} }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy