org.apache.solr.search.facet.FacetFieldProcessor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr Core
There is a newer version: 9.6.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search.facet;

import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.function.IntFunction;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.facet.SlotAcc.SlotContext;

import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;

/**
 * Facet processing based on field values. (not range nor by query)
 * @see FacetField
 */
abstract class FacetFieldProcessor extends FacetProcessor {
  SchemaField sf;
  SlotAcc indexOrderAcc;
  int effectiveMincount;
  final boolean singlePassSlotAccCollection;
  final FacetRequest.FacetSort sort; // never null (may be the user's requested sort, or the prelim_sort)
  final FacetRequest.FacetSort resort; // typically null (unless the user specified a prelim_sort)
  
  final Map deferredAggs = new HashMap();

  // TODO: push any of this down to base class?

  //
  // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x".
  // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those buckets.
  //
  SlotAcc collectAcc;  // Accumulator to collect across entire domain (in addition to the countAcc).  May be null.
  SlotAcc sortAcc;     // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of countAcc, collectAcc, or indexOrderAcc
  SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.

  SpecialSlotAcc allBucketsAcc;  // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.

  FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
    super(fcontext, freq);
    this.sf = sf;
    this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount);
    this.singlePassSlotAccCollection = (freq.limit == -1 && freq.subFacets.size() == 0);

    if ( null == freq.prelim_sort ) {
      // If the user has not specified any preliminary sort, then things are very simple.
      // Just use the "sort" as is w/o needing any re-sorting
      this.sort = freq.sort;
      this.resort = null;
    } else {
      assert null != freq.prelim_sort;
      
      if ( fcontext.isShard() ) {
        // for a shard request, we can ignore the users requested "sort" and focus solely on the prelim_sort
        // the merger will worry about the final sorting -- we don't need to resort anything...
        this.sort = freq.prelim_sort;
        this.resort = null;
        
      } else { // non shard...
        if ( singlePassSlotAccCollection ) { // special case situation...
          // when we can do a single pass SlotAcc collection on non-shard request, there is
          // no point re-sorting. Ignore the freq.prelim_sort and use the freq.sort option as is...
          this.sort = freq.sort;
          this.resort = null;
        } else {
          // for a non-shard request, we will use the prelim_sort as our initial sort option if it exists
          // then later we will re-sort on the final desired sort...
          this.sort = freq.prelim_sort;
          this.resort = freq.sort;
        }
      }
    }
    assert null != this.sort;
  }

  /** This is used to create accs for second phase (or to create accs for all aggs) */
  @Override
  protected void createAccs(int docCount, int slotCount) throws IOException {
    if (accMap == null) {
      accMap = new LinkedHashMap<>();
    }

    // allow a custom count acc to be used
    if (countAcc == null) {
      countAcc = new CountSlotArrAcc(fcontext, slotCount);
      countAcc.key = "count";
    }

    if (accs != null) {
      // reuse these accs, but reset them first and resize since size could be different
      for (SlotAcc acc : accs) {
        acc.reset();
        acc.resize(new FlatteningResizer(slotCount));
      }
      return;
    } else {
      accs = new SlotAcc[ freq.getFacetStats().size() ];
    }

    int accIdx = 0;
    for (Map.Entry entry : freq.getFacetStats().entrySet()) {
      SlotAcc acc = null;
      if (slotCount == 1) {
        acc = accMap.get(entry.getKey());
        if (acc != null) {
          acc.reset();
        }
      }
      if (acc == null) {
        acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
        acc.key = entry.getKey();
        accMap.put(acc.key, acc);
      }
      accs[accIdx++] = acc;
    }
  }

  /** 
   * Simple helper for checking if a {@FacetRequest.FacetSort} is on "count" or "index" and picking 
   * the existing SlotAcc 
   * @return an existing SlotAcc for sorting, else null if it should be built from the Aggs
   */
  private SlotAcc getTrivialSortingSlotAcc(FacetRequest.FacetSort fsort) {
    if ("count".equals(fsort.sortVariable)) {
      assert null != countAcc;
      return countAcc;
    } else if ("index".equals(fsort.sortVariable)) {
      // allow subclass to set indexOrderAcc first
      if (indexOrderAcc == null) {
        // This sorting accumulator just goes by the slot number, so does not need to be collected
        // and hence does not need to find it's way into the accMap or accs array.
        indexOrderAcc = new SortSlotAcc(fcontext);
      }
      return indexOrderAcc;
    }
    return null;
  }
  
  void createCollectAcc(int numDocs, int numSlots) throws IOException {
    accMap = new LinkedHashMap<>();
    
    // start with the assumption that we're going to defer the computation of all stats
    deferredAggs.putAll(freq.getFacetStats());
 
    // we always count...
    // allow a subclass to set a custom counter.
    if (countAcc == null) {
      countAcc = new CountSlotArrAcc(fcontext, numSlots);
    }

    sortAcc = getTrivialSortingSlotAcc(this.sort);

    if (this.singlePassSlotAccCollection) {
      // If we are going to return all buckets, and if there are no subfacets (that would need a domain),
      // then don't defer any aggregation calculations to a second phase.
      // This way we can avoid calculating domains for each bucket, which can be expensive.

      // TODO: BEGIN: why can't we just call createAccs here ?
      accs = new SlotAcc[ freq.getFacetStats().size() ];
      int otherAccIdx = 0;
      for (Map.Entry entry : freq.getFacetStats().entrySet()) {
        AggValueSource agg = entry.getValue();
        SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
        acc.key = entry.getKey();
        accMap.put(acc.key, acc);
        accs[otherAccIdx++] = acc;
      }
      // TODO: END: why can't we just call createAccs here ?
      if (accs.length == 1) {
        collectAcc = accs[0];
      } else {
        collectAcc = new MultiAcc(fcontext, accs);
      }

      if (sortAcc == null) {
        sortAcc = accMap.get(sort.sortVariable);
        assert sortAcc != null;
      }

      deferredAggs.clear();
    }

    if (sortAcc == null) {
      AggValueSource sortAgg = freq.getFacetStats().get(sort.sortVariable);
      if (sortAgg != null) {
        collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
        collectAcc.key = sort.sortVariable; // TODO: improve this
      }
      sortAcc = collectAcc;
      deferredAggs.remove(sort.sortVariable);
    }

    boolean needOtherAccs = freq.allBuckets;  // TODO: use for missing too...

    if (!needOtherAccs) {
      // we may need them later, but we don't want to create them now
      // otherwise we won't know if we need to call setNextReader on them.
      return;
    }

    // create the deferred aggs up front for use by allBuckets
    createOtherAccs(numDocs, 1);
  }

  private void createOtherAccs(int numDocs, int numSlots) throws IOException {
    if (otherAccs != null) {
      // reuse existing accumulators
      for (SlotAcc acc : otherAccs) {
        acc.reset();  // todo - make reset take numDocs and numSlots?
      }
      return;
    }

    final int numDeferred = deferredAggs.size();
    if (numDeferred <= 0) return;

    otherAccs = new SlotAcc[ numDeferred ];

    int otherAccIdx = 0;
    for (Map.Entry entry : deferredAggs.entrySet()) {
      AggValueSource agg = entry.getValue();
      SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
      acc.key = entry.getKey();
      accMap.put(acc.key, acc);
      otherAccs[otherAccIdx++] = acc;
    }

    if (numDeferred == freq.getFacetStats().size()) {
      // accs and otherAccs are the same...
      accs = otherAccs;
    }
  }

  int collectFirstPhase(DocSet docs, int slot, IntFunction slotContext) throws IOException {
    int num = -1;
    if (collectAcc != null) {
      num = collectAcc.collect(docs, slot, slotContext);
    }
    if (allBucketsAcc != null) {
      num = allBucketsAcc.collect(docs, slot, slotContext);
    }
    return num >= 0 ? num : docs.size();
  }

  void collectFirstPhase(int segDoc, int slot, IntFunction slotContext) throws IOException {
    if (collectAcc != null) {
      collectAcc.collect(segDoc, slot, slotContext);
    }
    if (allBucketsAcc != null) {
      allBucketsAcc.collect(segDoc, slot, slotContext);
    }
  }

  /** Processes the collected data to finds the top slots, and composes it in the response NamedList. */
  SimpleOrderedMap