org.apache.solr.request.SimpleFacets Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.request;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RunnableFuture;
import java.util.concurrent.Semaphore;
import java.util.function.Predicate;
import java.util.stream.Stream;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.grouping.AllGroupHeadsCollector;
import org.apache.lucene.search.grouping.AllGroupsCollector;
import org.apache.lucene.search.grouping.TermGroupFacetCollector;
import org.apache.lucene.search.grouping.TermGroupSelector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.RequiredSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SpatialHeatmapFacets;
import org.apache.solr.request.IntervalFacets.FacetInterval;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
import org.apache.solr.search.Grouping;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.Insanity;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortedIntDocSet;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.facet.FacetRequest;
import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.RTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.solr.common.params.CommonParams.SORT;

/**
 * A class that generates simple Facet information for a request.
 *
 * More advanced facet implementations may compose or subclass this class 
 * to leverage any of its functionality.
 */
public class SimpleFacets {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
  
  /** The main set of documents all facet counts should be relative to */
  protected DocSet docsOrig;
  /** Configuration params behavior should be driven by */
  protected final SolrParams global;
  /** Searcher to use for all calculations */
  protected final SolrIndexSearcher searcher;
  protected final SolrQueryRequest req;
  protected final ResponseBuilder rb;

  protected FacetDebugInfo fdebugParent;
  protected FacetDebugInfo fdebug;

  // per-facet values
  protected final static class ParsedParams {
    final public SolrParams localParams; // localParams on this particular facet command
    final public SolrParams params;      // local+original
    final public SolrParams required;    // required version of params
    final public String facetValue;      // the field to or query to facet on (minus local params)
    final public DocSet docs;            // the base docset for this particular facet
    final public String key;             // what name should the results be stored under
    final public List tags;      // the tags applied to this facet value
    final public int threads;
    
    public ParsedParams(final SolrParams localParams, // localParams on this particular facet command
                        final SolrParams params,      // local+original
                        final SolrParams required,    // required version of params
                        final String facetValue,      // the field to or query to facet on (minus local params)
                        final DocSet docs,            // the base docset for this particular facet
                        final String key,             // what name should the results be stored under
                        final List tags,
                        final int threads) {
      this.localParams = localParams;
      this.params = params;
      this.required = required;
      this.facetValue = facetValue;
      this.docs = docs;
      this.key = key;
      this.tags = tags;
      this.threads = threads;
    }
    
    public ParsedParams withDocs(final DocSet docs) {
      return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
    }
  }

  public SimpleFacets(SolrQueryRequest req,
                      DocSet docs,
                      SolrParams params) {
    this(req,docs,params,null);
  }

  public SimpleFacets(SolrQueryRequest req,
                      DocSet docs,
                      SolrParams params,
                      ResponseBuilder rb) {
    this.req = req;
    this.searcher = req.getSearcher();
    this.docsOrig = docs;
    this.global = params;
    this.rb = rb;
    this.facetExecutor = req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor();
  }

  public void setFacetDebugInfo(FacetDebugInfo fdebugParent) {
    this.fdebugParent = fdebugParent;
  }

  protected ParsedParams parseParams(String type, String param) throws SyntaxError, IOException {
    SolrParams localParams = QueryParsing.getLocalParams(param, req.getParams());
    DocSet docs = docsOrig;
    String facetValue = param;
    String key = param;
    List tags = Collections.emptyList();
    int threads = -1;

    if (localParams == null) {
      SolrParams params = global;
      SolrParams required = new RequiredSolrParams(params);
      return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
    }
    
    SolrParams params = SolrParams.wrapDefaults(localParams, global);
    SolrParams required = new RequiredSolrParams(params);

    // remove local params unless it's a query
    if (type != FacetParams.FACET_QUERY) { // TODO Cut over to an Enum here
      facetValue = localParams.get(CommonParams.VALUE);
    }

    // reset set the default key now that localParams have been removed
    key = facetValue;

    // allow explicit set of the key
    key = localParams.get(CommonParams.OUTPUT_KEY, key);

    String tagStr = localParams.get(CommonParams.TAG);
    tags = tagStr == null ? Collections.emptyList() : StrUtils.splitSmart(tagStr,',');

    String threadStr = localParams.get(CommonParams.THREADS);
    if (threadStr != null) {
      threads = Integer.parseInt(threadStr);
    }

    // figure out if we need a new base DocSet
    String excludeStr = localParams.get(CommonParams.EXCLUDE);
    if (excludeStr == null) return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);

    List excludeTagList = StrUtils.splitSmart(excludeStr,',');
    docs = computeDocSet(docs, excludeTagList);
    return new ParsedParams(localParams, params, required, facetValue, docs, key, tags, threads);
  }

  protected DocSet computeDocSet(DocSet baseDocSet, List excludeTagList) throws SyntaxError, IOException {
    Map tagMap = (Map)req.getContext().get("tags");
    // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
    if (tagMap == null || rb == null) {
      return baseDocSet;
    }

    IdentityHashMap excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTagList) {
      Object olst = tagMap.get(excludeTag);
      // tagMap has entries of List>, but subject to change in the future
      if (!(olst instanceof Collection)) continue;
      for (Object o : (Collection)olst) {
        if (!(o instanceof QParser)) continue;
        QParser qp = (QParser)o;
        excludeSet.put(qp.getQuery(), Boolean.TRUE);
      }
    }
    if (excludeSet.size() == 0) return baseDocSet;

    List qlist = new ArrayList<>();

    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
      qlist.add(rb.getQuery());
    }

    // add the filters
    if (rb.getFilters() != null) {
      for (Query q : rb.getFilters()) {
        if (!excludeSet.containsKey(q)) {
          qlist.add(q);
        }
      }
    }

    // get the new base docset for this facet
    DocSet base = searcher.getDocSet(qlist);
    if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
      Grouping grouping = new Grouping(searcher, null, rb.createQueryCommand(), false, 0, false);
      grouping.setWithinGroupSort(rb.getGroupingSpec().getWithinGroupSortSpec().getSort());
      if (rb.getGroupingSpec().getFields().length > 0) {
        grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
      } else if (rb.getGroupingSpec().getFunctions().length > 0) {
        grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
      } else {
        return base;
      }
      AllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();
      searcher.search(base.getTopFilter(), allGroupHeadsCollector);
      return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
    } else {
      return base;
    }
  }

  /**
   * Returns a list of facet counts for each of the facet queries
   * specified in the params
   *
   * @see FacetParams#FACET_QUERY
   */
  public NamedList getFacetQueryCounts() throws IOException,SyntaxError {

    NamedList res = new SimpleOrderedMap<>();

    /* Ignore CommonParams.DF - could have init param facet.query assuming
     * the schema default with query param DF intented to only affect Q.
     * If user doesn't want schema default for facet.query, they should be
     * explicit.
     */
    // SolrQueryParser qp = searcher.getSchema().getSolrQueryParser(null);

    String[] facetQs = global.getParams(FacetParams.FACET_QUERY);

    if (null != facetQs && 0 != facetQs.length) {
      for (String q : facetQs) {
        final ParsedParams parsed = parseParams(FacetParams.FACET_QUERY, q);
        getFacetQueryCount(parsed, res);
      }
    }

    return res;
  }

  public void getFacetQueryCount(ParsedParams parsed, NamedList res) throws SyntaxError, IOException {
    // TODO: slight optimization would prevent double-parsing of any localParams
    // TODO: SOLR-7753
    Query qobj = QParser.getParser(parsed.facetValue, req).getQuery();

    if (qobj == null) {
      res.add(parsed.key, 0);
    } else if (parsed.params.getBool(GroupParams.GROUP_FACET, false)) {
      res.add(parsed.key, getGroupedFacetQueryCount(qobj, parsed.docs));
    } else {
      res.add(parsed.key, searcher.numDocs(qobj, parsed.docs));
    }
  }

  /**
   * Returns a grouped facet count for the facet query
   *
   * @see FacetParams#FACET_QUERY
   */
  public int getGroupedFacetQueryCount(Query facetQuery, DocSet docSet) throws IOException {
    // It is okay to retrieve group.field from global because it is never a local param
    String groupField = global.get(GroupParams.GROUP_FIELD);
    if (groupField == null) {
      throw new SolrException (
          SolrException.ErrorCode.BAD_REQUEST,
          "Specify the group.field as parameter or local parameter"
      );
    }

    AllGroupsCollector collector = new AllGroupsCollector<>(new TermGroupSelector(groupField));
    Filter mainQueryFilter = docSet.getTopFilter(); // This returns a filter that only matches documents matching with q param and fq params
    Query filteredFacetQuery = new BooleanQuery.Builder()
        .add(facetQuery, Occur.MUST)
        .add(mainQueryFilter, Occur.FILTER)
        .build();
    searcher.search(filteredFacetQuery, collector);
    return collector.getGroupCount();
  }

  enum FacetMethod {
    ENUM, FC, FCS, UIF;
  }

  /**
   * Create a new bytes ref filter for excluding facet terms.
   *
   * This method by default uses the {@link FacetParams#FACET_EXCLUDETERMS} parameter
   * but custom SimpleFacets classes could use a different implementation.
   *
   * @param field the field to check for facet term filters
   * @param params the request parameter object
   * @return A predicate for filtering terms or null if no filters are applicable.
   */
  protected Predicate newExcludeBytesRefFilter(String field, SolrParams params) {
    final String exclude = params.getFieldParam(field, FacetParams.FACET_EXCLUDETERMS);
    if (exclude == null) {
      return null;
    }

    final Set excludeTerms = new HashSet<>(StrUtils.splitSmart(exclude, ",", true));

    return new Predicate() {
      @Override
      public boolean test(BytesRef bytesRef) {
        return !excludeTerms.contains(bytesRef.utf8ToString());
      }
    };
  }

  /**
   * Create a new bytes ref filter for filtering facet terms. If more than one filter is
   * applicable the applicable filters will be returned as an {@link Predicate#and(Predicate)}
   * of all such filters.
   *
   * @param field the field to check for facet term filters
   * @param params the request parameter object
   * @return A predicate for filtering terms or null if no filters are applicable.
   */
  protected Predicate newBytesRefFilter(String field, SolrParams params) {
    final String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);

    Predicate finalFilter = null;

    if (contains != null) {
      final boolean containsIgnoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
      finalFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
    }

    final String regex = params.getFieldParam(field, FacetParams.FACET_MATCHES);
    if (regex != null) {
      final RegexBytesRefFilter regexBytesRefFilter = new RegexBytesRefFilter(regex);
      finalFilter = (finalFilter == null) ? regexBytesRefFilter : finalFilter.and(regexBytesRefFilter);
    }

    final Predicate excludeFilter = newExcludeBytesRefFilter(field, params);
    if (excludeFilter != null) {
      finalFilter = (finalFilter == null) ? excludeFilter : finalFilter.and(excludeFilter);
    }

    return finalFilter;
  }

  /**
   * Term counts for use in pivot faceting that resepcts the appropriate mincount
   * @see FacetParams#FACET_PIVOT_MINCOUNT
   */
  public NamedList getTermCountsForPivots(String field, ParsedParams parsed) throws IOException {
    Integer mincount = parsed.params.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
    return getTermCounts(field, mincount, parsed);
  }

  /**
   * Term counts for use in field faceting that resepects the appropriate mincount
   *
   * @see FacetParams#FACET_MINCOUNT
   */
  public NamedList getTermCounts(String field, ParsedParams parsed) throws IOException {
    Integer mincount = parsed.params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
    return getTermCounts(field, mincount, parsed);
  }

  /**
   * Term counts for use in field faceting that resepcts the specified mincount - 
   * if mincount is null, the "zeros" param is consulted for the appropriate backcompat 
   * default
   *
   * @see FacetParams#FACET_ZEROS
   */
  private NamedList getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs = parsed.docs;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);

    // when limit=0 and missing=false then return empty list
    if (limit == 0 && !missing) return new NamedList<>();

    if (mincount==null) {
      Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
      // mincount = (zeros!=null && zeros) ? 0 : 1;
      mincount = (zeros!=null && !zeros) ? 1 : 0;
      // current default is to include zeros.
    }

    // default to sorting if there is a limit.
    String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);

    final Predicate termFilter = newBytesRefFilter(field, params);

    boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
    
    NamedList counts;
    SchemaField sf = searcher.getSchema().getField(field);
    if (sf.getType().isPointField() && !sf.hasDocValues()) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
          "Can't facet on a PointField without docValues");
    }
    FieldType ft = sf.getType();

    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    final FacetMethod requestedMethod;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
      requestedMethod = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
      requestedMethod = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
      requestedMethod = FacetMethod.FC;
    } else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
      requestedMethod = FacetMethod.UIF;
    } else {
      requestedMethod=null;
    }

    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();

    FacetMethod appliedFacetMethod = selectFacetMethod(field,
                                sf, requestedMethod, mincount,
                                exists);

    RTimer timer = null;
    if (fdebug != null) {
       fdebug.putInfoItem("requestedMethod", requestedMethod==null?"not specified":requestedMethod.name());
       fdebug.putInfoItem("appliedMethod", appliedFacetMethod.name());
       fdebug.putInfoItem("inputDocSetSize", docs.size());
       fdebug.putInfoItem("field", field);
       timer = new RTimer();
    }

    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
      counts = getGroupedCounts(searcher, docs, field, multiToken, offset,limit, mincount, missing, sort, prefix, termFilter);
    } else {
      assert appliedFacetMethod != null;
      switch (appliedFacetMethod) {
        case ENUM:
          assert TrieField.getMainValuePrefix(ft) == null;
          counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, termFilter, exists);
          break;
        case FCS:
          assert ft.isPointField() || !multiToken;
          if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
            if (prefix != null) {
              throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
            }
            if (termFilter != null) {
              throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters ("
                      + FacetParams.FACET_MATCHES + ", "
                      + FacetParams.FACET_CONTAINS + ", "
                      + FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
            }
            if (ft.isPointField() && mincount <= 0) { // default is mincount=0.  See SOLR-10033 & SOLR-11174.
              String warningMessage 
                  = "Raising facet.mincount from " + mincount + " to 1, because field " + field + " is Points-based.";
              log.warn(warningMessage);
              List warnings = (List)rb.rsp.getResponseHeader().get("warnings");
              if (null == warnings) {
                warnings = new ArrayList<>();
                rb.rsp.getResponseHeader().add("warnings", warnings);
              }
              warnings.add(warningMessage);

              mincount = 1;
            }
            counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
          } else {
            PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
            Executor executor = threads == 0 ? directExecutor : facetExecutor;
            ps.setNumThreads(threads);
            counts = ps.getFacetCounts(executor);
          }
          break;
        case UIF:
            //Emulate the JSON Faceting structure so we can use the same parsing classes
            Map jsonFacet = new HashMap<>(13);
            jsonFacet.put("type", "terms");
            jsonFacet.put("field", field);
            jsonFacet.put("offset", offset);
            jsonFacet.put("limit", limit);
            jsonFacet.put("mincount", mincount);
            jsonFacet.put("missing", missing);
            jsonFacet.put("prefix", prefix);
            jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
            jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
            jsonFacet.put("method", "uif");
            jsonFacet.put("cacheDf", 0);
            jsonFacet.put("perSeg", false);
            
            final String sortVal;
            switch(sort){
              case FacetParams.FACET_SORT_COUNT_LEGACY:
                sortVal = FacetParams.FACET_SORT_COUNT;
              break;
              case FacetParams.FACET_SORT_INDEX_LEGACY:
                sortVal = FacetParams.FACET_SORT_INDEX;
              break;
              default:
                sortVal = sort;
            }
            jsonFacet.put(SORT, sortVal );

            //TODO do we handle debug?  Should probably already be handled by the legacy code

            Object resObj = FacetRequest.parseOneFacetReq(req, jsonFacet).process(req, docs);
            //Go through the response to build the expected output for SimpleFacets
            counts = new NamedList<>();
            if(resObj != null) {
              NamedList