org.apache.solr.request.SimpleFacets Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RunnableFuture;
import java.util.concurrent.Semaphore;
import java.util.function.Predicate;
import java.util.stream.Stream;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.grouping.AllGroupHeadsCollector;
import org.apache.lucene.search.grouping.AllGroupsCollector;
import org.apache.lucene.search.grouping.TermGroupFacetCollector;
import org.apache.lucene.search.grouping.TermGroupSelector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.RequiredSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SpatialHeatmapFacets;
import org.apache.solr.request.IntervalFacets.FacetInterval;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
import org.apache.solr.search.Grouping;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.Insanity;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortedIntDocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.facet.FacetRequest;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.RTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.SORT;
/**
* A class that generates simple Facet information for a request.
*
* More advanced facet implementations may compose or subclass this class
* to leverage any of its functionality.
*/
public class SimpleFacets {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/** The main set of documents all facet counts should be relative to */
protected DocSet docsOrig;
/** Configuration params behavior should be driven by */
protected final SolrParams global;
/** Searcher to use for all calculations */
protected final SolrIndexSearcher searcher;
protected final SolrQueryRequest req;
protected final ResponseBuilder rb;
protected FacetDebugInfo fdebugParent;
protected FacetDebugInfo fdebug;
// per-facet values
protected final static class ParsedParams {
final public SolrParams localParams; // localParams on this particular facet command
final public SolrParams params; // local+original
final public SolrParams required; // required version of params
final public String facetValue; // the field to or query to facet on (minus local params)
final public DocSet docs; // the base docset for this particular facet
final public String key; // what name should the results be stored under
final public List tags; // the tags applied to this facet value
final public int threads;
public ParsedParams(final SolrParams localParams, // localParams on this particular facet command
final SolrParams params, // local+original
final SolrParams required, // required version of params
final String facetValue, // the field to or query to facet on (minus local params)
final DocSet docs, // the base docset for this particular facet
final String key, // what name should the results be stored under
final List tags,
final int threads) {
this.localParams = localParams;
this.params = params;
this.required = required;
this.facetValue = facetValue;
this.docs = docs;
this.key = key;
this.tags = tags;
this.threads = threads;
}
public ParsedParams withDocs(final DocSet docs) {
return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
}
}
public SimpleFacets(SolrQueryRequest req,
DocSet docs,
SolrParams params) {
this(req,docs,params,null);
}
public SimpleFacets(SolrQueryRequest req,
DocSet docs,
SolrParams params,
ResponseBuilder rb) {
this.req = req;
this.searcher = req.getSearcher();
this.docsOrig = docs;
this.global = params;
this.rb = rb;
this.facetExecutor = req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor();
}
public void setFacetDebugInfo(FacetDebugInfo fdebugParent) {
this.fdebugParent = fdebugParent;
}
protected ParsedParams parseParams(String type, String param) throws SyntaxError, IOException {
SolrParams localParams = QueryParsing.getLocalParams(param, req.getParams());
DocSet docs = docsOrig;
String facetValue = param;
String key = param;
List tags = Collections.emptyList();
int threads = -1;
if (localParams == null) {
SolrParams params = global;
SolrParams required = new RequiredSolrParams(params);
return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
}
SolrParams params = SolrParams.wrapDefaults(localParams, global);
SolrParams required = new RequiredSolrParams(params);
// remove local params unless it's a query
if (type != FacetParams.FACET_QUERY) { // TODO Cut over to an Enum here
facetValue = localParams.get(CommonParams.VALUE);
}
// reset set the default key now that localParams have been removed
key = facetValue;
// allow explicit set of the key
key = localParams.get(CommonParams.OUTPUT_KEY, key);
String tagStr = localParams.get(CommonParams.TAG);
tags = tagStr == null ? Collections.emptyList() : StrUtils.splitSmart(tagStr,',');
String threadStr = localParams.get(CommonParams.THREADS);
if (threadStr != null) {
threads = Integer.parseInt(threadStr);
}
// figure out if we need a new base DocSet
String excludeStr = localParams.get(CommonParams.EXCLUDE);
if (excludeStr == null) return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
List excludeTagList = StrUtils.splitSmart(excludeStr,',');
docs = computeDocSet(docs, excludeTagList);
return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
}
protected DocSet computeDocSet(DocSet baseDocSet, List excludeTagList) throws SyntaxError, IOException {
Map tagMap = (Map)req.getContext().get("tags");
// rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
if (tagMap == null || rb == null) {
return baseDocSet;
}
IdentityHashMap excludeSet = new IdentityHashMap<>();
for (String excludeTag : excludeTagList) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
for (Object o : (Collection)olst) {
if (!(o instanceof QParser)) continue;
QParser qp = (QParser)o;
excludeSet.put(qp.getQuery(), Boolean.TRUE);
}
}
if (excludeSet.size() == 0) return baseDocSet;
List qlist = new ArrayList<>();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// get the new base docset for this facet
DocSet base = searcher.getDocSet(qlist);
if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
Grouping grouping = new Grouping(searcher, null, rb.createQueryCommand(), false, 0, false);
grouping.setWithinGroupSort(rb.getGroupingSpec().getWithinGroupSortSpec().getSort());
if (rb.getGroupingSpec().getFields().length > 0) {
grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
} else if (rb.getGroupingSpec().getFunctions().length > 0) {
grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
} else {
return base;
}
AllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();
searcher.search(base.getTopFilter(), allGroupHeadsCollector);
return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
} else {
return base;
}
}
/**
* Returns a list of facet counts for each of the facet queries
* specified in the params
*
* @see FacetParams#FACET_QUERY
*/
public NamedList getFacetQueryCounts() throws IOException,SyntaxError {
NamedList res = new SimpleOrderedMap<>();
/* Ignore CommonParams.DF - could have init param facet.query assuming
* the schema default with query param DF intented to only affect Q.
* If user doesn't want schema default for facet.query, they should be
* explicit.
*/
// SolrQueryParser qp = searcher.getSchema().getSolrQueryParser(null);
String[] facetQs = global.getParams(FacetParams.FACET_QUERY);
if (null != facetQs && 0 != facetQs.length) {
for (String q : facetQs) {
final ParsedParams parsed = parseParams(FacetParams.FACET_QUERY, q);
getFacetQueryCount(parsed, res);
}
}
return res;
}
public void getFacetQueryCount(ParsedParams parsed, NamedList res) throws SyntaxError, IOException {
// TODO: slight optimization would prevent double-parsing of any localParams
// TODO: SOLR-7753
Query qobj = QParser.getParser(parsed.facetValue, req).getQuery();
if (qobj == null) {
res.add(parsed.key, 0);
} else if (parsed.params.getBool(GroupParams.GROUP_FACET, false)) {
res.add(parsed.key, getGroupedFacetQueryCount(qobj, parsed.docs));
} else {
res.add(parsed.key, searcher.numDocs(qobj, parsed.docs));
}
}
/**
* Returns a grouped facet count for the facet query
*
* @see FacetParams#FACET_QUERY
*/
public int getGroupedFacetQueryCount(Query facetQuery, DocSet docSet) throws IOException {
// It is okay to retrieve group.field from global because it is never a local param
String groupField = global.get(GroupParams.GROUP_FIELD);
if (groupField == null) {
throw new SolrException (
SolrException.ErrorCode.BAD_REQUEST,
"Specify the group.field as parameter or local parameter"
);
}
AllGroupsCollector collector = new AllGroupsCollector<>(new TermGroupSelector(groupField));
Filter mainQueryFilter = docSet.getTopFilter(); // This returns a filter that only matches documents matching with q param and fq params
Query filteredFacetQuery = new BooleanQuery.Builder()
.add(facetQuery, Occur.MUST)
.add(mainQueryFilter, Occur.FILTER)
.build();
searcher.search(filteredFacetQuery, collector);
return collector.getGroupCount();
}
enum FacetMethod {
ENUM, FC, FCS, UIF;
}
/**
* Create a new bytes ref filter for excluding facet terms.
*
* This method by default uses the {@link FacetParams#FACET_EXCLUDETERMS} parameter
* but custom SimpleFacets classes could use a different implementation.
*
* @param field the field to check for facet term filters
* @param params the request parameter object
* @return A predicate for filtering terms or null if no filters are applicable.
*/
protected Predicate newExcludeBytesRefFilter(String field, SolrParams params) {
final String exclude = params.getFieldParam(field, FacetParams.FACET_EXCLUDETERMS);
if (exclude == null) {
return null;
}
final Set excludeTerms = new HashSet<>(StrUtils.splitSmart(exclude, ",", true));
return new Predicate() {
@Override
public boolean test(BytesRef bytesRef) {
return !excludeTerms.contains(bytesRef.utf8ToString());
}
};
}
/**
* Create a new bytes ref filter for filtering facet terms. If more than one filter is
* applicable the applicable filters will be returned as an {@link Predicate#and(Predicate)}
* of all such filters.
*
* @param field the field to check for facet term filters
* @param params the request parameter object
* @return A predicate for filtering terms or null if no filters are applicable.
*/
protected Predicate newBytesRefFilter(String field, SolrParams params) {
final String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
Predicate finalFilter = null;
if (contains != null) {
final boolean containsIgnoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
finalFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
}
final String regex = params.getFieldParam(field, FacetParams.FACET_MATCHES);
if (regex != null) {
final RegexBytesRefFilter regexBytesRefFilter = new RegexBytesRefFilter(regex);
finalFilter = (finalFilter == null) ? regexBytesRefFilter : finalFilter.and(regexBytesRefFilter);
}
final Predicate excludeFilter = newExcludeBytesRefFilter(field, params);
if (excludeFilter != null) {
finalFilter = (finalFilter == null) ? excludeFilter : finalFilter.and(excludeFilter);
}
return finalFilter;
}
/**
* Term counts for use in pivot faceting that resepcts the appropriate mincount
* @see FacetParams#FACET_PIVOT_MINCOUNT
*/
public NamedList getTermCountsForPivots(String field, ParsedParams parsed) throws IOException {
Integer mincount = parsed.params.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
return getTermCounts(field, mincount, parsed);
}
/**
* Term counts for use in field faceting that resepects the appropriate mincount
*
* @see FacetParams#FACET_MINCOUNT
*/
public NamedList getTermCounts(String field, ParsedParams parsed) throws IOException {
Integer mincount = parsed.params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
return getTermCounts(field, mincount, parsed);
}
/**
* Term counts for use in field faceting that resepcts the specified mincount -
* if mincount is null, the "zeros" param is consulted for the appropriate backcompat
* default
*
* @see FacetParams#FACET_ZEROS
*/
private NamedList getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
final SolrParams params = parsed.params;
final DocSet docs = parsed.docs;
final int threads = parsed.threads;
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// when limit=0 and missing=false then return empty list
if (limit == 0 && !missing) return new NamedList<>();
if (mincount==null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros!=null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
// default to sorting if there is a limit.
String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
final Predicate termFilter = newBytesRefFilter(field, params);
boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
NamedList counts;
SchemaField sf = searcher.getSchema().getField(field);
if (sf.getType().isPointField() && !sf.hasDocValues()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Can't facet on a PointField without docValues");
}
FieldType ft = sf.getType();
// determine what type of faceting method to use
final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
final FacetMethod requestedMethod;
if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
requestedMethod = FacetMethod.ENUM;
} else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
requestedMethod = FacetMethod.FCS;
} else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
requestedMethod = FacetMethod.FC;
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
requestedMethod = FacetMethod.UIF;
} else {
requestedMethod=null;
}
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
FacetMethod appliedFacetMethod = selectFacetMethod(field,
sf, requestedMethod, mincount,
exists);
RTimer timer = null;
if (fdebug != null) {
fdebug.putInfoItem("requestedMethod", requestedMethod==null?"not specified":requestedMethod.name());
fdebug.putInfoItem("appliedMethod", appliedFacetMethod.name());
fdebug.putInfoItem("inputDocSetSize", docs.size());
fdebug.putInfoItem("field", field);
timer = new RTimer();
}
if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
counts = getGroupedCounts(searcher, docs, field, multiToken, offset,limit, mincount, missing, sort, prefix, termFilter);
} else {
assert appliedFacetMethod != null;
switch (appliedFacetMethod) {
case ENUM:
assert TrieField.getMainValuePrefix(ft) == null;
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, termFilter, exists);
break;
case FCS:
assert ft.isPointField() || !multiToken;
if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
if (prefix != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
}
if (termFilter != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters ("
+ FacetParams.FACET_MATCHES + ", "
+ FacetParams.FACET_CONTAINS + ", "
+ FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
}
if (ft.isPointField() && mincount <= 0) { // default is mincount=0. See SOLR-10033 & SOLR-11174.
String warningMessage
= "Raising facet.mincount from " + mincount + " to 1, because field " + field + " is Points-based.";
log.warn(warningMessage);
List warnings = (List)rb.rsp.getResponseHeader().get("warnings");
if (null == warnings) {
warnings = new ArrayList<>();
rb.rsp.getResponseHeader().add("warnings", warnings);
}
warnings.add(warningMessage);
mincount = 1;
}
counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
} else {
PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
Executor executor = threads == 0 ? directExecutor : facetExecutor;
ps.setNumThreads(threads);
counts = ps.getFacetCounts(executor);
}
break;
case UIF:
//Emulate the JSON Faceting structure so we can use the same parsing classes
Map jsonFacet = new HashMap<>(13);
jsonFacet.put("type", "terms");
jsonFacet.put("field", field);
jsonFacet.put("offset", offset);
jsonFacet.put("limit", limit);
jsonFacet.put("mincount", mincount);
jsonFacet.put("missing", missing);
jsonFacet.put("prefix", prefix);
jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
jsonFacet.put("method", "uif");
jsonFacet.put("cacheDf", 0);
jsonFacet.put("perSeg", false);
final String sortVal;
switch(sort){
case FacetParams.FACET_SORT_COUNT_LEGACY:
sortVal = FacetParams.FACET_SORT_COUNT;
break;
case FacetParams.FACET_SORT_INDEX_LEGACY:
sortVal = FacetParams.FACET_SORT_INDEX;
break;
default:
sortVal = sort;
}
jsonFacet.put(SORT, sortVal );
//TODO do we handle debug? Should probably already be handled by the legacy code
Object resObj = FacetRequest.parseOneFacetReq(req, jsonFacet).process(req, docs);
//Go through the response to build the expected output for SimpleFacets
counts = new NamedList<>();
if(resObj != null) {
NamedList