All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.stats.LRUStatsCache Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.stats;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.CaffeineCache;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Unlike {@link ExactStatsCache} this implementation preserves term stats across queries in a set
 * of LRU caches (with the same life-cycle as SolrIndexSearcher), and based on surface features of a
 * query it determines the need to send additional requests to retrieve local term and collection
 * statistics from shards. As a result the additional requests may be needed much less frequently.
 *
 * 

Query terms, their stats and field stats are maintained in LRU caches, with the size by * default {@link #DEFAULT_MAX_SIZE}, one cache per shard. These caches are updated as needed (when * term or field statistics are missing). Each instance of the component keeps also a global stats * cache, which is aggregated from per-shard caches. * *

Cache entries expire after a max idle time, by default {@link #DEFAULT_MAX_IDLE_TIME}. */ public class LRUStatsCache extends ExactStatsCache { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final int DEFAULT_MAX_SIZE = 200; public static final int DEFAULT_MAX_IDLE_TIME = 60; // local stats obtained from shard servers // map of > private final Map> perShardTermStats = new ConcurrentHashMap<>(); // map of > private final Map> perShardColStats = new ConcurrentHashMap<>(); // global stats synchronized from the leader // cache of private final CaffeineCache currentGlobalTermStats = new CaffeineCache<>(); // cache of private final CaffeineCache currentGlobalColStats = new CaffeineCache<>(); // missing stats to be fetched with the next request private Set missingColStats = ConcurrentHashMap.newKeySet(); private Set missingTermStats = ConcurrentHashMap.newKeySet(); private final Map lruCacheInitArgs = new HashMap<>(); private final StatsCacheMetrics ignorableMetrics = new StatsCacheMetrics(); @Override protected StatsSource doGet(SolrQueryRequest req) { if (log.isDebugEnabled()) { log.debug("## GET total={}, cache {}", currentGlobalColStats, currentGlobalTermStats.size()); } return new LRUStatsSource(statsCacheMetrics); } @Override public void clear() { super.clear(); perShardTermStats.clear(); perShardColStats.clear(); currentGlobalTermStats.clear(); currentGlobalColStats.clear(); ignorableMetrics.clear(); } @Override public void init(PluginInfo info) { super.init(info); if (info != null && info.attributes != null) { lruCacheInitArgs.putAll(info.attributes); } lruCacheInitArgs.computeIfAbsent(SolrCache.SIZE_PARAM, s -> String.valueOf(DEFAULT_MAX_SIZE)); lruCacheInitArgs.computeIfAbsent( SolrCache.MAX_IDLE_TIME_PARAM, t -> String.valueOf(DEFAULT_MAX_IDLE_TIME)); Map map = new HashMap<>(lruCacheInitArgs); map.put(CommonParams.NAME, "globalTermStats"); currentGlobalTermStats.init(lruCacheInitArgs, null, null); currentGlobalTermStats.setState(SolrCache.State.LIVE); map = new HashMap<>(lruCacheInitArgs); map.put(CommonParams.NAME, "globalColStats"); currentGlobalColStats.init(lruCacheInitArgs, null, null); currentGlobalColStats.setState(SolrCache.State.LIVE); } @Override protected ShardRequest doRetrieveStatsRequest(ResponseBuilder rb) { // check approximately what terms are needed. // NOTE: query rewrite only expands to terms that are present in the local index // so it's possible that the result will contain less terms than present in all shards. // HOWEVER: the absence of these terms is recorded by LRUStatsSource, and they will be // force-fetched on next request and cached. // check for missing stats from previous requests if (!missingColStats.isEmpty() || !missingTermStats.isEmpty()) { // needs to fetch anyway, so get the full query stats + the missing stats for caching ShardRequest sreq = super.doRetrieveStatsRequest(rb); if (!missingColStats.isEmpty()) { Set requestColStats = missingColStats; // there's a small window when new items may be added before // creating the request and clearing, so don't clear - instead replace the instance missingColStats = ConcurrentHashMap.newKeySet(); sreq.params.add(FIELDS_KEY, StatsUtil.fieldsToString(requestColStats)); } if (!missingTermStats.isEmpty()) { Set requestTermStats = missingTermStats; missingTermStats = ConcurrentHashMap.newKeySet(); sreq.params.add(TERMS_KEY, StatsUtil.termsToEncodedString(requestTermStats)); } return sreq; } // rewrite locally to see if there are any missing terms. See the note above for caveats. LongAdder missing = new LongAdder(); try { // use ignorableMetrics to avoid counting this checking as real misses approxCheckMissingStats( rb, new LRUStatsSource(ignorableMetrics), t -> missing.increment(), f -> missing.increment()); if (missing.sum() == 0) { // it should be (approximately) ok to skip the fetching // since we already incremented the stats decrement it here statsCacheMetrics.retrieveStats.decrement(); statsCacheMetrics.useCachedGlobalStats.increment(); return null; } else { return super.doRetrieveStatsRequest(rb); } } catch (IOException e) { log.warn( "Exception checking missing stats for query {}, forcing retrieving stats", rb.getQuery(), e); // retrieve anyway return super.doRetrieveStatsRequest(rb); } } @Override protected void addToGlobalTermStats(SolrQueryRequest req, Entry e) { currentGlobalTermStats.put(e.getKey(), e.getValue()); } @Override protected void addToPerShardColStats( SolrQueryRequest req, String shard, Map colStats) { perShardColStats.put(shard, colStats); } @Override protected Map getPerShardColStats(ResponseBuilder rb, String shard) { return perShardColStats.get(shard); } @Override protected void addToPerShardTermStats( SolrQueryRequest req, String shard, String termStatsString) { Map termStats = StatsUtil.termStatsMapFromString(termStatsString); if (termStats != null) { SolrCache cache = perShardTermStats.computeIfAbsent( shard, s -> { CaffeineCache c = new CaffeineCache<>(); Map map = new HashMap<>(lruCacheInitArgs); map.put(CommonParams.NAME, s); c.init(map, null, null); c.setState(SolrCache.State.LIVE); return c; }); for (Entry e : termStats.entrySet()) { cache.put(e.getKey(), e.getValue()); } } } @Override protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) { SolrCache cache = perShardTermStats.get(shard); return (cache != null) ? cache.get(t) : null; // Term doesn't exist in shard } @Override protected void addToGlobalColStats(SolrQueryRequest req, Entry e) { currentGlobalColStats.put(e.getKey(), e.getValue()); } @Override protected void printStats(SolrQueryRequest req) { log.debug( "## MERGED: perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats); } class LRUStatsSource extends StatsSource { private final StatsCacheMetrics metrics; LRUStatsSource(StatsCacheMetrics metrics) { this.metrics = metrics; } @Override public TermStatistics termStatistics( SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq) throws IOException { TermStats termStats = currentGlobalTermStats.get(term.toString()); if (termStats == null) { log.debug("## Missing global termStats info: {}, using local", term); missingTermStats.add(term); metrics.missingGlobalTermStats.increment(); return localSearcher != null ? localSearcher.localTermStatistics(term, docFreq, totalTermFreq) : null; } else { return termStats.toTermStatistics(); } } @Override public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field) throws IOException { CollectionStats colStats = currentGlobalColStats.get(field); if (colStats == null) { log.debug("## Missing global colStats info: {}, using local", field); missingColStats.add(field); metrics.missingGlobalFieldStats.increment(); return localSearcher != null ? localSearcher.localCollectionStatistics(field) : null; } else { return colStats.toCollectionStatistics(); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy