All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.QueryCachingPolicy Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;

import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.TieredMergePolicy;

/**
 * A policy defining which filters should be cached.
 *
 * Implementations of this class must be thread-safe.
 *
 * @see UsageTrackingQueryCachingPolicy
 * @see LRUQueryCache
 * @lucene.experimental
 */
// TODO: add APIs for integration with IndexWriter.IndexReaderWarmer
public interface QueryCachingPolicy {

  /** A simple policy that caches all the provided filters on all segments. */
  public static final QueryCachingPolicy ALWAYS_CACHE = new QueryCachingPolicy() {

    @Override
    public void onUse(Query query) {}

    @Override
    public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
      return true;
    }

  };

  /** A simple policy that only caches on the largest segments of an index.
   *  The reasoning is that these segments likely account for most of the
   *  execution time of queries and are also more likely to stay around longer
   *  than small segments, which makes them more interesting for caching.
   */
  public static class CacheOnLargeSegments implements QueryCachingPolicy {

    /** {@link CacheOnLargeSegments} instance that only caches on segments that
     *  account for more than 3% of the total index size. This should guarantee
     *  that all segments from the upper {@link TieredMergePolicy tier} will be
     *  cached while ensuring that at most 33 segments can make it to
     *  the cache (given that some implementations such as {@link LRUQueryCache}
     *  perform better when the number of cached segments is low). */
    public static final CacheOnLargeSegments DEFAULT = new CacheOnLargeSegments(10000, 0.03f);

    private final int minIndexSize;
    private final float minSizeRatio;

    /**
     * Create a {@link CacheOnLargeSegments} instance that only caches on a
     * given segment if the total number of documents in the index is greater
     * than {@code minIndexSize} and the number of documents in the segment
     * divided by the total number of documents in the index is greater than
     * or equal to {@code minSizeRatio}.
     */
    public CacheOnLargeSegments(int minIndexSize, float minSizeRatio) {
      if (minSizeRatio <= 0 || minSizeRatio >= 1) {
        throw new IllegalArgumentException("minSizeRatio must be in ]0, 1[, got " + minSizeRatio);
      }
      this.minIndexSize = minIndexSize;
      this.minSizeRatio = minSizeRatio;
    }

    @Override
    public void onUse(Query query) {}

    @Override
    public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
      final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
      if (topLevelContext.reader().maxDoc() < minIndexSize) {
        return false;
      }
      final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
      return sizeRatio >= minSizeRatio;
    }

  };

  /** Callback that is called every time that a cached filter is used.
   *  This is typically useful if the policy wants to track usage statistics
   *  in order to make decisions. */
  void onUse(Query query);

  /** Whether the given {@link DocIdSet} should be cached on a given segment.
   *  This method will be called on each leaf context to know if the filter
   *  should be cached on this particular leaf. The filter cache will first
   *  attempt to load a {@link DocIdSet} from the cache. If it is not cached
   *  yet and this method returns true then a cache entry will be
   *  generated. Otherwise an uncached set will be returned. */
  boolean shouldCache(Query query, LeafReaderContext context) throws IOException;

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy