All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.component.PivotFacetField Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.component;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;


/**
 * Models a single field somewhere in a hierarchy of fields as part of a pivot facet.  
 * This pivot field contains {@link PivotFacetValue}s which may each contain a nested
 * {@link PivotFacetField} child.  This PivotFacetField may itself 
 * be a child of a {@link PivotFacetValue} parent.
 *
 * @see PivotFacetValue
 * @see PivotFacetFieldValueCollection
 */
@SuppressWarnings("rawtypes")
public class PivotFacetField {
  
  public final String field;

  // null if this is a top level pivot, 
  // otherwise the value of the parent pivot we are nested under
  public final PivotFacetValue parentValue;

  public final PivotFacetFieldValueCollection valueCollection;
  
  // Facet parameters relating to this field
  private final int facetFieldLimit;
  private final int facetFieldMinimumCount;
  private final int facetFieldOffset;  
  private final String facetFieldSort;

  private final Map numberOfValuesContributedByShard = new HashMap<>();
  private final Map shardLowestCount = new HashMap<>();

  private boolean needRefinementAtThisLevel = true;
    
  private PivotFacetField(ResponseBuilder rb, PivotFacetValue parent, String fieldName) {
    
    field = fieldName;
    parentValue = parent;
    
    // facet params
    SolrParams parameters = rb.req.getParams();
    facetFieldMinimumCount = parameters.getFieldInt(field, FacetParams.FACET_PIVOT_MINCOUNT, 1);
    facetFieldOffset = parameters.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    facetFieldLimit = parameters.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    String defaultSort = (facetFieldLimit > 0) ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX;
    facetFieldSort = parameters.getFieldParam(field, FacetParams.FACET_SORT, defaultSort);

    valueCollection = new PivotFacetFieldValueCollection(facetFieldMinimumCount, facetFieldOffset, facetFieldLimit, facetFieldSort);
    
    if ( (facetFieldLimit < 0) || 
         // TODO: possible refinement issue if limit=0 & mincount=0 & missing=true
         // (ie: we only want the missing count for this field)
         (facetFieldLimit <= 0 && facetFieldMinimumCount == 0) ||
         (facetFieldSort.equals(FacetParams.FACET_SORT_INDEX) && facetFieldMinimumCount <= 0) 
         ) {
      // in any of these cases, there's no need to refine this level of the pivot
      needRefinementAtThisLevel = false;
    }
  }

  /** 
   * A recursive method that walks up the tree of pivot fields/values to build 
   * a list of String representations of the values that lead down to this 
   * PivotFacetField.
   *
   * @return A mutable List of the pivot values leading down to this pivot field, 
   *      will never be null but may contain nulls and may be empty if this is a top 
   *      level pivot field
   * @see PivotFacetValue#getValuePath
   */
  public List getValuePath() {
    if (null != parentValue) {
      return parentValue.getValuePath();
    }
    return new ArrayList(3);
  }

  /**
   * A recursive method to construct a new PivotFacetField object from 
   * the contents of the {@link NamedList}s provided by the specified shard, relative 
   * to a parent value (if this is not the top field in the pivot hierarchy)
   *
   * The associated child {@link PivotFacetValue}s will be recursively built as well.
   *
   * @see PivotFacetValue#createFromNamedList
   * @param shardNumber the id of the shard that provided this data
   * @param rb The response builder of the current request
   * @param owner the parent value in the current pivot (may be null)
   * @param pivotValues the data from the specified shard for this pivot field, may be null or empty
   * @return the new PivotFacetField, null if pivotValues is null or empty.
   */
  public static PivotFacetField createFromListOfNamedLists(int shardNumber, ResponseBuilder rb, PivotFacetValue owner, List> pivotValues) {
    
    if (null == pivotValues || pivotValues.size() <= 0) return null;
    
    NamedList firstValue = pivotValues.get(0);
    PivotFacetField createdPivotFacetField 
      = new PivotFacetField(rb, owner, PivotFacetHelper.getField(firstValue));
    
    int lowestCount = Integer.MAX_VALUE;
    
    for (NamedList pivotValue : pivotValues) {
      
      lowestCount = Math.min(lowestCount, PivotFacetHelper.getCount(pivotValue));
      
      PivotFacetValue newValue = PivotFacetValue.createFromNamedList
        (shardNumber, rb, createdPivotFacetField, pivotValue);
      createdPivotFacetField.valueCollection.add(newValue);
    }
      
    createdPivotFacetField.shardLowestCount.put(shardNumber,  lowestCount);
    createdPivotFacetField.numberOfValuesContributedByShard.put(shardNumber, pivotValues.size());

    return createdPivotFacetField;
  }
  
  /**
   * Destructive method that recursively prunes values from the data structure 
   * based on the counts for those values and the effective sort, mincount, limit, 
   * and offset being used for each field.
   * 

* This method should only be called after all refinement is completed just prior * calling {@link #convertToListOfNamedLists} *

* * @see PivotFacet#getTrimmedPivotsAsListOfNamedLists * @see PivotFacetFieldValueCollection#trim */ public void trim() { // SOLR-6331... // // we can probably optimize the memory usage by trimming each level of the pivot once // we know we've fully refined the values at that level // (ie: fold this logic into refineNextLevelOfFacets) this.valueCollection.trim(); } /** * Recursively sorts the collection of values associated with this field, and * any sub-pivots those values have. * * @see FacetParams#FACET_SORT * @see PivotFacetFieldValueCollection#sort */ public void sort() { this.valueCollection.sort(); } /** * A recursive method for generating NamedLists from this field * suitable for including in a pivot facet response to the original distributed request. */ public List> convertToListOfNamedLists() { List> convertedPivotList = null; if (valueCollection.size() > 0) { convertedPivotList = new LinkedList<>(); for (PivotFacetValue pivot : valueCollection) convertedPivotList.add(pivot.convertToNamedList()); } return convertedPivotList; } /** * A recursive method for determining which {@link PivotFacetValue}s need to be * refined for this pivot. * * @see PivotFacet#queuePivotRefinementRequests */ public void queuePivotRefinementRequests(PivotFacet pf) { if (needRefinementAtThisLevel) { if (0 < facetFieldMinimumCount) { // missing is always a candidate for refinement if at least one shard met the minimum PivotFacetValue missing = valueCollection.getMissingValue(); if (null != missing) { processDefiniteCandidateElement(pf, valueCollection.getMissingValue()); } } if (! valueCollection.getExplicitValuesList().isEmpty()) { if (FacetParams.FACET_SORT_COUNT.equals(facetFieldSort)) { // we only need to things that are currently in our limit, // or might be in our limit if we get increased counts from shards that // didn't include this value the first time final int indexOfCountThreshold = Math.min(valueCollection.getExplicitValuesListSize(), facetFieldOffset + facetFieldLimit) - 1; final int countThreshold = valueCollection.getAt(indexOfCountThreshold).getCount(); int positionInResults = 0; for (PivotFacetValue value : valueCollection.getExplicitValuesList()) { if (positionInResults <= indexOfCountThreshold) { // This element is within the top results, so we need to get information // from all of the shards. processDefiniteCandidateElement(pf, value); } else { // This element is not within the top results, but may still need to be refined. processPossibleCandidateElement(pf, value, countThreshold); } positionInResults++; } } else { // FACET_SORT_INDEX // everything needs refined to see what the per-shard mincount excluded for (PivotFacetValue value : valueCollection.getExplicitValuesList()) { processDefiniteCandidateElement(pf, value); } } } needRefinementAtThisLevel = false; } if ( pf.isRefinementsRequired() ) { // if any refinements are needed, then we need to stop and wait to // see how the picture may change before drilling down to child pivot fields return; } else { // Since outstanding requests have been filled, then we can drill down // to the next deeper level and check it. refineNextLevelOfFacets(pf); } } /** * Adds refinement requests for the value for each shard that has not already contributed * a count for this value. */ private void processDefiniteCandidateElement(PivotFacet pf, PivotFacetValue value) { for (int shard = pf.knownShards.nextSetBit(0); 0 <= shard; shard = pf.knownShards.nextSetBit(shard+1)) { if ( ! value.shardHasContributed(shard) ) { if ( // if we're doing index order, we need to refine anything // (mincount may have excluded from a shard) FacetParams.FACET_SORT_INDEX.equals(facetFieldSort) || (// 'missing' value isn't affected by limit, needs refined if shard didn't provide null == value.getValue() || // if we are doing count order, we need to refine if the limit was hit // (if not, the shard doesn't have the value or it would have returned already) numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(shard))) { pf.addRefinement(shard, value); } } } } private boolean numberOfValuesContributedByShardWasLimitedByFacetFieldLimit(int shardNumber) { return facetFieldLimit <= numberOfValuesContributedByShard(shardNumber); } private int numberOfValuesContributedByShard(final int shardNumber) { return numberOfValuesContributedByShard.containsKey(shardNumber) ? numberOfValuesContributedByShard.get(shardNumber) : 0; } /** * Checks the {@link #lowestCountContributedbyShard} for each shard, combined with the * counts we already know, to see if this value is a viable candidate -- * Does not make sense when using {@link FacetParams#FACET_SORT_INDEX} * * @see #processDefiniteCandidateElement */ private void processPossibleCandidateElement(PivotFacet pf, PivotFacetValue value, final int refinementThreshold) { assert FacetParams.FACET_SORT_COUNT.equals(facetFieldSort) : "Method only makes sense when sorting by count"; int maxPossibleCountAfterRefinement = value.getCount(); for (int shard = pf.knownShards.nextSetBit(0); 0 <= shard; shard = pf.knownShards.nextSetBit(shard+1)) { if ( ! value.shardHasContributed(shard) ) { maxPossibleCountAfterRefinement += lowestCountContributedbyShard(shard); } } if (refinementThreshold <= maxPossibleCountAfterRefinement) { processDefiniteCandidateElement(pf, value); } } private int lowestCountContributedbyShard(int shardNumber) { return (shardLowestCount.containsKey(shardNumber)) ? shardLowestCount.get(shardNumber) : 0; } private void refineNextLevelOfFacets(PivotFacet pf) { List explicitValsToRefine = valueCollection.getNextLevelValuesToRefine(); for (PivotFacetValue value : explicitValsToRefine) { if (null != value.getChildPivot()) { value.getChildPivot().queuePivotRefinementRequests(pf); } } PivotFacetValue missing = this.valueCollection.getMissingValue(); if(null != missing && null != missing.getChildPivot()) { missing.getChildPivot().queuePivotRefinementRequests(pf); } } private void incrementShardValueCount(int shardNumber) { if (!numberOfValuesContributedByShard.containsKey(shardNumber)) { numberOfValuesContributedByShard.put(shardNumber, 1); } else { numberOfValuesContributedByShard.put(shardNumber, numberOfValuesContributedByShard.get(shardNumber)+1); } } private void contributeValueFromShard(int shardNumber, ResponseBuilder rb, NamedList shardValue) { incrementShardValueCount(shardNumber); Comparable value = PivotFacetHelper.getValue(shardValue); int count = PivotFacetHelper.getCount(shardValue); // We're changing values so we most mark the collection as dirty valueCollection.markDirty(); if ( ( !shardLowestCount.containsKey(shardNumber) ) || shardLowestCount.get(shardNumber) > count) { shardLowestCount.put(shardNumber, count); } PivotFacetValue facetValue = valueCollection.get(value); if (null == facetValue) { // never seen before, we need to create it from scratch facetValue = PivotFacetValue.createFromNamedList(shardNumber, rb, this, shardValue); this.valueCollection.add(facetValue); } else { facetValue.mergeContributionFromShard(shardNumber, rb, shardValue); } } /** * Recursively merges the contributions from the specified shard for each * {@link PivotFacetValue} represended in the response. * * @see PivotFacetValue#mergeContributionFromShard * @param shardNumber the id of the shard that provided this data * @param rb The response builder of the current request * @param response the data from the specified shard for this pivot field, may be null */ public void contributeFromShard(int shardNumber, ResponseBuilder rb, List> response) { if (null == response) return; for (NamedList responseValue : response) { contributeValueFromShard(shardNumber, rb, responseValue); } } public String toString(){ return String.format(Locale.ROOT, "P:%s F:%s V:%s", parentValue, field, valueCollection); } }