All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.IndexReader Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.AlreadyClosedException;

/**
 * IndexReader is an abstract class, providing an interface for accessing a point-in-time view of an
 * index. Any changes made to the index via {@link IndexWriter} will not be visible until a new
 * {@code IndexReader} is opened. It's best to use {@link DirectoryReader#open(IndexWriter)} to
 * obtain an {@code IndexReader}, if your {@link IndexWriter} is in-process. When you need to
 * re-open to see changes to the index, it's best to use {@link
 * DirectoryReader#openIfChanged(DirectoryReader)} since the new reader will share resources with
 * the previous one when possible. Search of an index is done entirely through this abstract
 * interface, so that any subclass which implements it is searchable.
 *
 * 

There are two different types of IndexReaders: * *

    *
  • {@link LeafReader}: These indexes do not consist of several sub-readers, they are atomic. * They support retrieval of stored fields, doc values, terms, and postings. *
  • {@link CompositeReader}: Instances (like {@link DirectoryReader}) of this reader can only * be used to get stored fields from the underlying LeafReaders, but it is not possible to * directly retrieve postings. To do that, get the sub-readers via {@link * CompositeReader#getSequentialSubReaders}. *
* *

IndexReader instances for indexes on disk are usually constructed with a call to one of the * static DirectoryReader.open() methods, e.g. {@link * DirectoryReader#open(org.apache.lucene.store.Directory)}. {@link DirectoryReader} implements the * {@link CompositeReader} interface, it is not possible to directly get postings. * *

For efficiency, in this API documents are often referred to via document numbers, * non-negative integers which each name a unique document in the index. These document numbers are * ephemeral -- they may change as documents are added to and deleted from an index. Clients should * thus not rely on a given document having the same number between sessions. * *

* *

NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple * threads can call any of its methods, concurrently. If your application requires external * synchronization, you should not synchronize on the IndexReader instance; use * your own (non-Lucene) objects instead. */ public abstract sealed class IndexReader implements Closeable permits CompositeReader, LeafReader { private boolean closed = false; private boolean closedByChild = false; private final AtomicInteger refCount = new AtomicInteger(1); IndexReader() {} /** * A utility class that gives hooks in order to help build a cache based on the data that is * contained in this index. * *

Example: cache the number of documents that match a query per reader. * *

   * public class QueryCountCache {
   *
   *   private final Query query;
   *   private final Map<IndexReader.CacheKey, Integer> counts = new ConcurrentHashMap<>();
   *
   *   // Create a cache of query counts for the given query
   *   public QueryCountCache(Query query) {
   *     this.query = query;
   *   }
   *
   *   // Count the number of matches of the query on the given IndexSearcher
   *   public int count(IndexSearcher searcher) throws IOException {
   *     IndexReader.CacheHelper cacheHelper = searcher.getIndexReader().getReaderCacheHelper();
   *     if (cacheHelper == null) {
   *       // reader doesn't support caching
   *       return searcher.count(query);
   *     } else {
   *       // make sure the cache entry is cleared when the reader is closed
   *       cacheHelper.addClosedListener(counts::remove);
   *       return counts.computeIfAbsent(cacheHelper.getKey(), cacheKey -> {
   *         try {
   *           return searcher.count(query);
   *         } catch (IOException e) {
   *           throw new UncheckedIOException(e);
   *         }
   *       });
   *     }
   *   }
   *
   * }
   * 
* * @lucene.experimental */ public interface CacheHelper { /** * Get a key that the resource can be cached on. The given entry can be compared using identity, * ie. {@link Object#equals} is implemented as {@code ==} and {@link Object#hashCode} is * implemented as {@link System#identityHashCode}. */ CacheKey getKey(); /** * Add a {@link ClosedListener} which will be called when the resource guarded by {@link * #getKey()} is closed. */ void addClosedListener(ClosedListener listener); } /** A cache key identifying a resource that is being cached on. */ public static final class CacheKey { CacheKey() {} // only instantiable by core impls } /** * A listener that is called when a resource gets closed. * * @lucene.experimental */ @FunctionalInterface public interface ClosedListener { /** * Invoked when the resource (segment core, or index reader) that is being cached on is closed. */ void onClose(CacheKey key) throws IOException; } private final Set parentReaders = Collections.synchronizedSet( Collections.newSetFromMap(new WeakHashMap())); /** * Expert: This method is called by {@code IndexReader}s which wrap other readers (e.g. {@link * CompositeReader} or {@link FilterLeafReader}) to register the parent at the child (this reader) * on construction of the parent. When this reader is closed, it will mark all registered parents * as closed, too. The references to parent readers are weak only, so they can be GCed once they * are no longer in use. * * @lucene.experimental */ public final void registerParentReader(IndexReader reader) { ensureOpen(); parentReaders.add(reader); } /** * For test framework use only. * * @lucene.internal */ protected void notifyReaderClosedListeners() throws IOException { // nothing to notify in the base impl } private void reportCloseToParentReaders() throws IOException { synchronized (parentReaders) { for (IndexReader parent : parentReaders) { parent.closedByChild = true; // cross memory barrier by a fake write: parent.refCount.addAndGet(0); // recurse: parent.reportCloseToParentReaders(); } } } /** Expert: returns the current refCount for this reader */ public final int getRefCount() { // NOTE: don't ensureOpen, so that callers can see // refCount is 0 (reader is closed) return refCount.get(); } /** * Expert: increments the refCount of this IndexReader instance. RefCounts are used to determine * when a reader can be closed safely, i.e. as soon as there are no more references. Be sure to * always call a corresponding {@link #decRef}, in a finally clause; otherwise the reader may * never be closed. Note that {@link #close} simply calls decRef(), which means that the * IndexReader will not really be closed until {@link #decRef} has been called for all outstanding * references. * * @see #decRef * @see #tryIncRef */ public final void incRef() { if (!tryIncRef()) { ensureOpen(); } } /** * Expert: increments the refCount of this IndexReader instance only if the IndexReader has not * been closed yet and returns true iff the refCount was successfully incremented, * otherwise false. If this method returns false the reader is either * already closed or is currently being closed. Either way this reader instance shouldn't be used * by an application unless true is returned. * *

RefCounts are used to determine when a reader can be closed safely, i.e. as soon as there * are no more references. Be sure to always call a corresponding {@link #decRef}, in a finally * clause; otherwise the reader may never be closed. Note that {@link #close} simply calls * decRef(), which means that the IndexReader will not really be closed until {@link #decRef} has * been called for all outstanding references. * * @see #decRef * @see #incRef */ public final boolean tryIncRef() { int count; while ((count = refCount.get()) > 0) { if (refCount.compareAndSet(count, count + 1)) { return true; } } return false; } /** * Expert: decreases the refCount of this IndexReader instance. If the refCount drops to 0, then * this reader is closed. If an exception is hit, the refCount is unchanged. * * @throws IOException in case an IOException occurs in doClose() * @see #incRef */ @SuppressWarnings("try") public final void decRef() throws IOException { // only check refcount here (don't call ensureOpen()), so we can // still close the reader if it was made invalid by a child: if (refCount.get() <= 0) { throw new AlreadyClosedException("this IndexReader is closed"); } final int rc = refCount.decrementAndGet(); if (rc == 0) { closed = true; try (Closeable finalizer = this::reportCloseToParentReaders; Closeable finalizer1 = this::notifyReaderClosedListeners) { doClose(); } } else if (rc < 0) { throw new IllegalStateException( "too many decRef calls: refCount is " + rc + " after decrement"); } } /** * Throws AlreadyClosedException if this IndexReader or any of its child readers is closed, * otherwise returns. */ protected final void ensureOpen() throws AlreadyClosedException { if (refCount.get() <= 0) { throw new AlreadyClosedException("this IndexReader is closed"); } // the happens before rule on reading the refCount, which must be after the fake write, // ensures that we see the value: if (closedByChild) { throw new AlreadyClosedException( "this IndexReader cannot be used anymore as one of its child readers was closed"); } } /** * {@inheritDoc} * *

{@code IndexReader} subclasses are not allowed to implement equals/hashCode, so methods are * declared final. */ @Override public final boolean equals(Object obj) { return (this == obj); } /** * {@inheritDoc} * *

{@code IndexReader} subclasses are not allowed to implement equals/hashCode, so methods are * declared final. */ @Override public final int hashCode() { return System.identityHashCode(this); } /** * Returns a {@link TermVectors} reader for the term vectors of this index. * *

This call never returns {@code null}, even if no term vectors were indexed. The returned * instance should only be used by a single thread. * *

Example: * *

   * TopDocs hits = searcher.search(query, 10);
   * TermVectors termVectors = reader.termVectors();
   * for (ScoreDoc hit : hits.scoreDocs) {
   *   Fields vector = termVectors.get(hit.doc);
   * }
   * 
* * @throws IOException If there is a low-level IO error */ public abstract TermVectors termVectors() throws IOException; /** * Returns the number of documents in this index. * *

NOTE: This operation may run in O(maxDoc). Implementations that can't return this * number in constant-time should cache it. */ public abstract int numDocs(); /** * Returns one greater than the largest possible document number. This may be used to, e.g., * determine how big to allocate an array which will have an element for every document number in * an index. */ public abstract int maxDoc(); /** * Returns the number of deleted documents. * *

NOTE: This operation may run in O(maxDoc). */ public final int numDeletedDocs() { return maxDoc() - numDocs(); } /** * Returns a {@link StoredFields} reader for the stored fields of this index. * *

This call never returns {@code null}, even if no stored fields were indexed. The returned * instance should only be used by a single thread. * *

Example: * *

   * TopDocs hits = searcher.search(query, 10);
   * StoredFields storedFields = reader.storedFields();
   * for (ScoreDoc hit : hits.scoreDocs) {
   *   Document doc = storedFields.document(hit.doc);
   * }
   * 
* * @throws IOException If there is a low-level IO error */ public abstract StoredFields storedFields() throws IOException; /** * Returns true if any documents have been deleted. Implementers should consider overriding this * method if {@link #maxDoc()} or {@link #numDocs()} are not constant-time operations. */ public boolean hasDeletions() { return numDeletedDocs() > 0; } /** * Closes files associated with this index. Also saves any new deletions to disk. No other methods * should be called after this has been called. * * @throws IOException if there is a low-level IO error */ @Override public final synchronized void close() throws IOException { if (!closed) { decRef(); closed = true; } } /** Implements close. */ protected abstract void doClose() throws IOException; /** * Expert: Returns the root {@link IndexReaderContext} for this {@link IndexReader}'s sub-reader * tree. * *

Iff this reader is composed of sub readers, i.e. this reader being a composite reader, this * method returns a {@link CompositeReaderContext} holding the reader's direct children as well as * a view of the reader tree's atomic leaf contexts. All sub- {@link IndexReaderContext} instances * referenced from this readers top-level context are private to this reader and are not shared * with another context tree. For example, IndexSearcher uses this API to drive searching by one * atomic leaf reader at a time. If this reader is not composed of child readers, this method * returns an {@link LeafReaderContext}. * *

Note: Any of the sub-{@link CompositeReaderContext} instances referenced from this top-level * context do not support {@link CompositeReaderContext#leaves()}. Only the top-level context * maintains the convenience leaf-view for performance reasons. */ public abstract IndexReaderContext getContext(); /** * Returns the reader's leaves, or itself if this reader is atomic. This is a convenience method * calling {@code this.getContext().leaves()}. * * @see IndexReaderContext#leaves() */ public final List leaves() { return getContext().leaves(); } /** * Optional method: Return a {@link CacheHelper} that can be used to cache based on the content of * this reader. Two readers that have different data or different sets of deleted documents will * be considered different. * *

A return value of {@code null} indicates that this reader is not suited for caching, which * is typically the case for short-lived wrappers that alter the content of the wrapped reader. * * @lucene.experimental */ public abstract CacheHelper getReaderCacheHelper(); /** * Returns the number of documents containing the term. This method returns 0 if the * term or field does not exists. This method does not take into account deleted documents that * have not yet been merged away. * * @see TermsEnum#docFreq() */ public abstract int docFreq(Term term) throws IOException; /** * Returns the total number of occurrences of {@code term} across all documents (the sum of the * freq() for each doc that has this term). Note that, like other term measures, this measure does * not take deleted documents into account. */ public abstract long totalTermFreq(Term term) throws IOException; /** * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field. Note that, just * like other term measures, this measure does not take deleted documents into account. * * @see Terms#getSumDocFreq() */ public abstract long getSumDocFreq(String field) throws IOException; /** * Returns the number of documents that have at least one term for this field. Note that, just * like other term measures, this measure does not take deleted documents into account. * * @see Terms#getDocCount() */ public abstract int getDocCount(String field) throws IOException; /** * Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this field. Note that, just * like other term measures, this measure does not take deleted documents into account. * * @see Terms#getSumTotalTermFreq() */ public abstract long getSumTotalTermFreq(String field) throws IOException; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy