org.apache.solr.search.facet.FacetFieldProcessor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search.facet;

import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.function.IntFunction;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.facet.SlotAcc.SlotContext;
import org.apache.solr.search.facet.SlotAcc.SweepableSlotAcc;
import org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc;

/**
 * Facet processing based on field values. (not range nor by query)
 *
 * @see FacetField
 */
abstract class FacetFieldProcessor extends FacetProcessor {
  SchemaField sf;
  SlotAcc indexOrderAcc;
  int effectiveMincount;
  final boolean singlePassSlotAccCollection;
  // never null (may be the user's requested sort, or the prelim_sort)
  final FacetRequest.FacetSort sort;
  final FacetRequest.FacetSort resort; // typically null (unless the user specified a prelim_sort)

  final Map deferredAggs = new HashMap();

  // TODO: push any of this down to base class?

  //
  // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x".
  // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those
  // buckets.
  //

  // Accumulator to collect across entire domain (in addition to the countAcc). May be null.
  SlotAcc collectAcc;
  // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of
  // countAcc, collectAcc, or indexOrderAcc
  SlotAcc sortAcc;
  // Accumulators that do not need to be calculated across all buckets.
  SlotAcc[] otherAccs;

  // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on
  // otherAccs directly if they exist.
  SpecialSlotAcc allBucketsAcc;

  FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
    super(fcontext, freq);
    this.sf = sf;
    this.effectiveMincount =
        (int) (fcontext.isShard() ? Math.min(1, freq.mincount) : freq.mincount);
    this.singlePassSlotAccCollection = (freq.limit == -1 && freq.subFacets.size() == 0);

    if (null == freq.prelim_sort) {
      // If the user has not specified any preliminary sort, then things are very simple.
      // Just use the "sort" as is w/o needing any re-sorting
      this.sort = freq.sort;
      this.resort = null;
    } else {
      assert null != freq.prelim_sort;

      if (fcontext.isShard()) {
        // for a shard request, we can ignore the users requested "sort" and focus solely on the
        // prelim_sort the merger will worry about the final sorting -- we don't need to resort
        // anything...
        this.sort = freq.prelim_sort;
        this.resort = null;

      } else { // non shard...
        if (singlePassSlotAccCollection) { // special case situation...
          // when we can do a single pass SlotAcc collection on non-shard request, there is
          // no point re-sorting. Ignore the freq.prelim_sort and use the freq.sort option as is...
          this.sort = freq.sort;
          this.resort = null;
        } else {
          // for a non-shard request, we will use the prelim_sort as our initial sort option if it
          // exists then later we will re-sort on the final desired sort...
          this.sort = freq.prelim_sort;
          this.resort = freq.sort;
        }
      }
    }
    assert null != this.sort;
  }

  /** This is used to create accs for second phase (or to create accs for all aggs) */
  @Override
  protected void createAccs(long docCount, int slotCount) throws IOException {
    if (accMap == null) {
      accMap = new LinkedHashMap<>();
    }

    // allow a custom count acc to be used
    if (countAcc == null) {
      countAcc = new SlotAcc.CountSlotArrAcc(fcontext, slotCount);
    }

    if (accs != null) {
      // reuse these accs, but reset them first and resize since size could be different
      for (SlotAcc acc : accs) {
        acc.reset();
        acc.resize(new FlatteningResizer(slotCount));
      }
      return;
    } else {
      accs = new SlotAcc[freq.getFacetStats().size()];
    }

    int accIdx = 0;
    for (Map.Entry entry : freq.getFacetStats().entrySet()) {
      SlotAcc acc = null;
      if (slotCount == 1) {
        acc = accMap.get(entry.getKey());
        if (acc != null) {
          acc.reset();
        }
      }
      if (acc == null) {
        acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
        acc.key = entry.getKey();
        accMap.put(acc.key, acc);
      }
      accs[accIdx++] = acc;
    }
  }

  /**
   * Simple helper for checking if a {@link FacetRequest.FacetSort} is on "count" or "index" and
   * picking the existing SlotAcc
   *
   * @return an existing SlotAcc for sorting, else null if it should be built from the Aggs
   */
  private SlotAcc getTrivialSortingSlotAcc(FacetRequest.FacetSort fsort) {
    if ("count".equals(fsort.sortVariable)) {
      assert null != countAcc;
      return countAcc;
    } else if ("index".equals(fsort.sortVariable)) {
      // allow subclass to set indexOrderAcc first
      if (indexOrderAcc == null) {
        // This sorting accumulator just goes by the slot number, so does not need to be collected
        // and hence does not need to find it's way into the accMap or accs array.
        indexOrderAcc = new SlotAcc.SortSlotAcc(fcontext);
      }
      return indexOrderAcc;
    }
    return null;
  }

  void createCollectAcc(int numDocs, int numSlots) throws IOException {
    accMap = new LinkedHashMap<>();

    // start with the assumption that we're going to defer the computation of all stats
    deferredAggs.putAll(freq.getFacetStats());

    // we always count...
    // allow a subclass to set a custom counter.
    if (countAcc == null) {
      countAcc = new SlotAcc.CountSlotArrAcc(fcontext, numSlots);
    }

    sortAcc = getTrivialSortingSlotAcc(this.sort);

    if (this.singlePassSlotAccCollection) {
      // If we are going to return all buckets, and if there are no subfacets (that would need a
      // domain), then don't defer any aggregation calculations to a second phase.
      // This way we can avoid calculating domains for each bucket, which can be expensive.

      // TODO: BEGIN: why can't we just call createAccs here ?
      accs = new SlotAcc[freq.getFacetStats().size()];
      int otherAccIdx = 0;
      for (Map.Entry entry : freq.getFacetStats().entrySet()) {
        AggValueSource agg = entry.getValue();
        SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
        acc.key = entry.getKey();
        accMap.put(acc.key, acc);
        accs[otherAccIdx++] = acc;
      }
      // TODO: END: why can't we just call createAccs here ?
      if (accs.length == 1) {
        collectAcc = accs[0];
      } else {
        collectAcc = new MultiAcc(fcontext, accs);
      }

      if (sortAcc == null) {
        sortAcc = accMap.get(sort.sortVariable);
        assert sortAcc != null;
      }

      deferredAggs.clear();
    }

    if (sortAcc == null) {
      AggValueSource sortAgg = freq.getFacetStats().get(sort.sortVariable);
      if (sortAgg != null) {
        collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
        collectAcc.key = sort.sortVariable; // TODO: improve this
      }
      sortAcc = collectAcc;
      deferredAggs.remove(sort.sortVariable);
    }

    boolean needOtherAccs = freq.allBuckets; // TODO: use for missing too...

    if (sortAcc == null) {
      // as sort is already validated, in what case sortAcc would be null?
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Invalid sort '" + sort + "' for field '" + sf.getName() + "'");
    }

    if (!needOtherAccs) {
      // we may need them later, but we don't want to create them now
      // otherwise we won't know if we need to call setNextReader on them.
      return;
    }

    // create the deferred aggs up front for use by allBuckets
    createOtherAccs(numDocs, 1);
  }

  private void createOtherAccs(int numDocs, int numSlots) throws IOException {
    if (otherAccs != null) {
      // reuse existing accumulators
      for (SlotAcc acc : otherAccs) {
        acc.reset(); // todo - make reset take numDocs and numSlots?
      }
      return;
    }

    final int numDeferred = deferredAggs.size();
    if (numDeferred <= 0) return;

    otherAccs = new SlotAcc[numDeferred];

    int otherAccIdx = 0;
    for (Map.Entry entry : deferredAggs.entrySet()) {
      AggValueSource agg = entry.getValue();
      SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
      acc.key = entry.getKey();
      accMap.put(acc.key, acc);
      otherAccs[otherAccIdx++] = acc;
    }

    if (numDeferred == freq.getFacetStats().size()) {
      // accs and otherAccs are the same...
      accs = otherAccs;
    }
  }

  int collectFirstPhase(DocSet docs, int slot, IntFunction slotContext)
      throws IOException {
    int num = -1;
    if (collectAcc != null) {
      num = collectAcc.collect(docs, slot, slotContext);
    }
    if (allBucketsAcc != null) {
      num = allBucketsAcc.collect(docs, slot, slotContext);
    }
    return num >= 0 ? num : docs.size();
  }

  void collectFirstPhase(int segDoc, int slot, IntFunction slotContext)
      throws IOException {
    if (collectAcc != null) {
      collectAcc.collect(segDoc, slot, slotContext);
    }
    if (allBucketsAcc != null) {
      allBucketsAcc.collect(segDoc, slot, slotContext);
    }
  }

  private static long applyDefaultOverrequest(long offset, long limit) {
    // NOTE: consider modifying the below heuristic; see SOLR-15760
    // add over-request if this is a shard request and if we have a small offset (large offsets will
    // already be gathering many more buckets than needed)
    if (offset < 10) {
      return (long)
          (limit * 1.1 + 4); // default: add 10% plus 4 (to overrequest for very small limits)
    }
    return limit;
  }

  /**
   * Processes the collected data to finds the top slots, and composes it in the response NamedList.
   */
  SimpleOrderedMap