All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.TermsEnum Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOBooleanSupplier;

/**
 * Iterator to seek ({@link #seekCeil(BytesRef)}, {@link #seekExact(BytesRef)}) or step through
 * ({@link #next} terms to obtain frequency information ({@link #docFreq}), {@link PostingsEnum} or
 * {@link PostingsEnum} for the current term ({@link #postings}.
 *
 * 

Term enumerations are always ordered by BytesRef.compareTo, which is Unicode sort order if the * terms are UTF-8 bytes. Each term in the enumeration is greater than the one before it. * *

The TermsEnum is unpositioned when you first obtain it and you must first successfully call * {@link #next} or one of the seek methods. * * @lucene.experimental */ public abstract class TermsEnum implements BytesRefIterator { /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */ protected TermsEnum() {} /** Returns the related attributes. */ public abstract AttributeSource attributes(); /** Represents returned result from {@link #seekCeil}. */ public enum SeekStatus { /** The term was not found, and the end of iteration was hit. */ END, /** The precise term was found. */ FOUND, /** A different term was found after the requested term */ NOT_FOUND }; /** * Attempts to seek to the exact term, returning true if the term is found. If this returns false, * the enum is unpositioned. For some codecs, seekExact may be substantially faster than {@link * #seekCeil}. * * @return true if the term is found; return false if the enum is unpositioned. */ public abstract boolean seekExact(BytesRef text) throws IOException; /** * Two-phase {@link #seekExact}. The first phase typically calls {@link IndexInput#prefetch} on * the right range of bytes under the hood, while the second phase {@link IOBooleanSupplier#get()} * actually seeks the term within these bytes. This can be used to parallelize I/O across multiple * terms by calling {@link #prepareSeekExact} on multiple terms enums before calling {@link * IOBooleanSupplier#get()}. * *

NOTE: It is illegal to call other methods on this {@link TermsEnum} after calling * this method until {@link IOBooleanSupplier#get()} is called. * *

NOTE: This may return {@code null} if this {@link TermsEnum} can identify that the * term may not exist without performing any I/O. * *

NOTE: The returned {@link IOBooleanSupplier} must be consumed in the same thread. */ public abstract IOBooleanSupplier prepareSeekExact(BytesRef text) throws IOException; /** * Seeks to the specified term, if it exists, or to the next (ceiling) term. Returns SeekStatus to * indicate whether exact term was found, a different term was found, or EOF was hit. The target * term may be before or after the current term. If this returns SeekStatus.END, the enum is * unpositioned. */ public abstract SeekStatus seekCeil(BytesRef text) throws IOException; /** * Seeks to the specified term by ordinal (position) as previously returned by {@link #ord}. The * target ord may be before or after the current ord, and must be within bounds. */ public abstract void seekExact(long ord) throws IOException; /** * Expert: Seeks a specific position by {@link TermState} previously obtained from {@link * #termState()}. Callers should maintain the {@link TermState} to use this method. Low-level * implementations may position the TermsEnum without re-seeking the term dictionary. * *

Seeking by {@link TermState} should only be used iff the state was obtained from the same * {@link TermsEnum} instance. * *

NOTE: Using this method with an incompatible {@link TermState} might leave this {@link * TermsEnum} in undefined state. On a segment level {@link TermState} instances are compatible * only iff the source and the target {@link TermsEnum} operate on the same field. If operating on * segment level, TermState instances must not be used across segments. * *

NOTE: A seek by {@link TermState} might not restore the {@link AttributeSource}'s state. * {@link AttributeSource} states must be maintained separately if this method is used. * * @param term the term the TermState corresponds to * @param state the {@link TermState} */ public abstract void seekExact(BytesRef term, TermState state) throws IOException; /** Returns current term. Do not call this when the enum is unpositioned. */ public abstract BytesRef term() throws IOException; /** * Returns ordinal position for current term. This is an optional method (the codec may throw * {@link UnsupportedOperationException}). Do not call this when the enum is unpositioned. */ public abstract long ord() throws IOException; /** * Returns the number of documents containing the current term. Do not call this when the enum is * unpositioned. {@link SeekStatus#END}. */ public abstract int docFreq() throws IOException; /** * Returns the total number of occurrences of this term across all documents (the sum of the * freq() for each doc that has this term). Note that, like other term measures, this measure does * not take deleted documents into account. */ public abstract long totalTermFreq() throws IOException; /** * Get {@link PostingsEnum} for the current term. Do not call this when the enum is unpositioned. * This method will not return null. * *

NOTE: the returned iterator may return deleted documents, so deleted documents have * to be checked on top of the {@link PostingsEnum}. * *

Use this method if you only require documents and frequencies, and do not need any proximity * data. This method is equivalent to {@link #postings(PostingsEnum, int) postings(reuse, * PostingsEnum.FREQS)} * * @param reuse pass a prior PostingsEnum for possible reuse * @see #postings(PostingsEnum, int) */ public final PostingsEnum postings(PostingsEnum reuse) throws IOException { return postings(reuse, PostingsEnum.FREQS); } /** * Get {@link PostingsEnum} for the current term, with control over whether freqs, positions, * offsets or payloads are required. Do not call this when the enum is unpositioned. This method * will not return null. * *

NOTE: the returned iterator may return deleted documents, so deleted documents have * to be checked on top of the {@link PostingsEnum}. * * @param reuse pass a prior PostingsEnum for possible reuse * @param flags specifies which optional per-document values you require; see {@link * PostingsEnum#FREQS} */ public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException; /** * Return a {@link ImpactsEnum}. * * @see #postings(PostingsEnum, int) */ public abstract ImpactsEnum impacts(int flags) throws IOException; /** * Expert: Returns the TermsEnums internal state to position the TermsEnum without re-seeking the * term dictionary. * *

NOTE: A seek by {@link TermState} might not capture the {@link AttributeSource}'s state. * Callers must maintain the {@link AttributeSource} states separately * * @see TermState * @see #seekExact(BytesRef, TermState) */ public abstract TermState termState() throws IOException; /** * An empty TermsEnum for quickly returning an empty instance e.g. in {@link * org.apache.lucene.search.MultiTermQuery} * *

Please note: This enum should be unmodifiable, but it is currently possible to add * Attributes to it. This should not be a problem, as the enum is always empty and the existence * of unused Attributes does not matter. */ public static final TermsEnum EMPTY = new BaseTermsEnum() { @Override public SeekStatus seekCeil(BytesRef term) { return SeekStatus.END; } @Override public void seekExact(long ord) {} @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); } @Override public int docFreq() { throw new IllegalStateException("this method should never be called"); } @Override public long totalTermFreq() { throw new IllegalStateException("this method should never be called"); } @Override public long ord() { throw new IllegalStateException("this method should never be called"); } @Override public PostingsEnum postings(PostingsEnum reuse, int flags) { throw new IllegalStateException("this method should never be called"); } @Override public ImpactsEnum impacts(int flags) throws IOException { throw new IllegalStateException("this method should never be called"); } @Override public BytesRef next() { return null; } @Override public TermState termState() { throw new IllegalStateException("this method should never be called"); } @Override public void seekExact(BytesRef term, TermState state) { throw new IllegalStateException("this method should never be called"); } }; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy