org.apache.lucene.search.UsageTrackingQueryCachingPolicy Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.FrequencyTrackingRingBuffer;
/**
* A {@link QueryCachingPolicy} that tracks usage statistics of recently-used
* filters in order to decide on which filters are worth caching.
*
* It also uses some heuristics on segments, filters and the doc id sets that
* they produce in order to cache more aggressively when the execution cost
* significantly outweighs the caching overhead.
*
* @lucene.experimental
*/
public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
// the hash code that we use as a sentinel in the ring buffer.
private static final int SENTINEL = Integer.MIN_VALUE;
static boolean isCostly(Query query) {
// This does not measure the cost of iterating over the filter (for this we
// already have the DocIdSetIterator#cost API) but the cost to build the
// DocIdSet in the first place
return query instanceof MultiTermQuery ||
query instanceof MultiTermQueryConstantScoreWrapper;
}
static boolean isCheap(Query query) {
// same for cheap queries
// these queries are so cheap that they usually do not need caching
return query instanceof TermQuery;
}
private final QueryCachingPolicy.CacheOnLargeSegments segmentPolicy;
private final FrequencyTrackingRingBuffer recentlyUsedFilters;
/**
* Create a new instance.
*
* @param minIndexSize the minimum size of the top-level index
* @param minSizeRatio the minimum size ratio for segments to be cached, see {@link QueryCachingPolicy.CacheOnLargeSegments}
* @param historySize the number of recently used filters to track
*/
public UsageTrackingQueryCachingPolicy(
int minIndexSize,
float minSizeRatio,
int historySize) {
this(new QueryCachingPolicy.CacheOnLargeSegments(minIndexSize, minSizeRatio), historySize);
}
/** Create a new instance with an history size of 256. */
public UsageTrackingQueryCachingPolicy() {
this(QueryCachingPolicy.CacheOnLargeSegments.DEFAULT, 256);
}
private UsageTrackingQueryCachingPolicy(
QueryCachingPolicy.CacheOnLargeSegments segmentPolicy,
int historySize) {
this.segmentPolicy = segmentPolicy;
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
}
/**
* For a given query, return how many times it should appear in the history
* before being cached.
*/
protected int minFrequencyToCache(Query query) {
if (isCostly(query)) {
return 2;
} else if (isCheap(query)) {
return 20;
} else {
// default: cache after the filter has been seen 5 times
return 5;
}
}
@Override
public void onUse(Query query) {
assert query instanceof BoostQuery == false;
assert query instanceof ConstantScoreQuery == false;
// call hashCode outside of sync block
// in case it's somewhat expensive:
int hashCode = query.hashCode();
// we only track hash codes to avoid holding references to possible
// large queries; this may cause rare false positives, but at worse
// this just means we cache a query that was not in fact used enough:
synchronized (this) {
recentlyUsedFilters.add(hashCode);
}
}
int frequency(Query query) {
assert query instanceof BoostQuery == false;
assert query instanceof ConstantScoreQuery == false;
// call hashCode outside of sync block
// in case it's somewhat expensive:
int hashCode = query.hashCode();
synchronized (this) {
return recentlyUsedFilters.frequency(hashCode);
}
}
@Override
public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
if (query instanceof MatchAllDocsQuery
// MatchNoDocsQuery currently rewrites to a BooleanQuery,
// but who knows, it might get its own Weight one day
|| query instanceof MatchNoDocsQuery) {
return false;
}
if (query instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) query;
if (bq.clauses().isEmpty()) {
return false;
}
}
if (query instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
if (dmq.getDisjuncts().isEmpty()) {
return false;
}
}
if (segmentPolicy.shouldCache(query, context) == false) {
return false;
}
final int frequency = frequency(query);
final int minFrequency = minFrequencyToCache(query);
return frequency >= minFrequency;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy